HADOOP-13345 HS3Guard: Improved Consistency for S3A.
Contributed by: Chris Nauroth, Aaron Fabbri, Mingliang Liu, Lei (Eddy) Xu, Sean Mackrory, Steve Loughran and others.
This commit is contained in:
parent
7a96033b15
commit
621b43e254
@ -174,6 +174,19 @@
|
|||||||
<directory>../hadoop-sls/target/hadoop-sls-${project.version}/sls</directory>
|
<directory>../hadoop-sls/target/hadoop-sls-${project.version}/sls</directory>
|
||||||
<outputDirectory>/share/hadoop/${hadoop.component}/sls</outputDirectory>
|
<outputDirectory>/share/hadoop/${hadoop.component}/sls</outputDirectory>
|
||||||
</fileSet>
|
</fileSet>
|
||||||
|
<fileSet>
|
||||||
|
<directory>../hadoop-aws/src/main/bin</directory>
|
||||||
|
<outputDirectory>/bin</outputDirectory>
|
||||||
|
<fileMode>0755</fileMode>
|
||||||
|
</fileSet>
|
||||||
|
<fileSet>
|
||||||
|
<directory>../hadoop-aws/src/main/shellprofile.d</directory>
|
||||||
|
<includes>
|
||||||
|
<include>*</include>
|
||||||
|
</includes>
|
||||||
|
<outputDirectory>/libexec/shellprofile.d</outputDirectory>
|
||||||
|
<fileMode>0755</fileMode>
|
||||||
|
</fileSet>
|
||||||
</fileSets>
|
</fileSets>
|
||||||
<dependencySets>
|
<dependencySets>
|
||||||
<dependencySet>
|
<dependencySet>
|
||||||
|
@ -171,6 +171,11 @@
|
|||||||
<artifactId>commons-configuration2</artifactId>
|
<artifactId>commons-configuration2</artifactId>
|
||||||
<scope>compile</scope>
|
<scope>compile</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.commons</groupId>
|
||||||
|
<artifactId>commons-lang3</artifactId>
|
||||||
|
<scope>compile</scope>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.slf4j</groupId>
|
<groupId>org.slf4j</groupId>
|
||||||
<artifactId>slf4j-api</artifactId>
|
<artifactId>slf4j-api</artifactId>
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.lang.reflect.Constructor;
|
import java.lang.reflect.Constructor;
|
||||||
|
import java.lang.reflect.InvocationTargetException;
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
import java.net.URISyntaxException;
|
import java.net.URISyntaxException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
@ -132,6 +133,13 @@ static <T> T newInstance(Class<T> theClass,
|
|||||||
CONSTRUCTOR_CACHE.put(theClass, meth);
|
CONSTRUCTOR_CACHE.put(theClass, meth);
|
||||||
}
|
}
|
||||||
result = meth.newInstance(uri, conf);
|
result = meth.newInstance(uri, conf);
|
||||||
|
} catch (InvocationTargetException e) {
|
||||||
|
Throwable cause = e.getCause();
|
||||||
|
if (cause instanceof RuntimeException) {
|
||||||
|
throw (RuntimeException) cause;
|
||||||
|
} else {
|
||||||
|
throw new RuntimeException(cause);
|
||||||
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
}
|
}
|
||||||
|
@ -331,6 +331,15 @@ public AbstractFileSystem run() throws UnsupportedFileSystemException {
|
|||||||
return AbstractFileSystem.get(uri, conf);
|
return AbstractFileSystem.get(uri, conf);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
} catch (RuntimeException ex) {
|
||||||
|
// RTEs can wrap other exceptions; if there is an IOException inner,
|
||||||
|
// throw it direct.
|
||||||
|
Throwable cause = ex.getCause();
|
||||||
|
if (cause instanceof IOException) {
|
||||||
|
throw (IOException) cause;
|
||||||
|
} else {
|
||||||
|
throw ex;
|
||||||
|
}
|
||||||
} catch (InterruptedException ex) {
|
} catch (InterruptedException ex) {
|
||||||
LOG.error(ex.toString());
|
LOG.error(ex.toString());
|
||||||
throw new IOException("Failed to get the AbstractFileSystem for path: "
|
throw new IOException("Failed to get the AbstractFileSystem for path: "
|
||||||
|
@ -1308,12 +1308,120 @@
|
|||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>fs.s3a.metadatastore.authoritative</name>
|
||||||
|
<value>false</value>
|
||||||
|
<description>
|
||||||
|
When true, allow MetadataStore implementations to act as source of
|
||||||
|
truth for getting file status and directory listings. Even if this
|
||||||
|
is set to true, MetadataStore implementations may choose not to
|
||||||
|
return authoritative results. If the configured MetadataStore does
|
||||||
|
not support being authoritative, this setting will have no effect.
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>fs.s3a.metadatastore.impl</name>
|
||||||
|
<value>org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore</value>
|
||||||
|
<description>
|
||||||
|
Fully-qualified name of the class that implements the MetadataStore
|
||||||
|
to be used by s3a. The default class, NullMetadataStore, has no
|
||||||
|
effect: s3a will continue to treat the backing S3 service as the one
|
||||||
|
and only source of truth for file and directory metadata.
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>fs.s3a.s3guard.cli.prune.age</name>
|
||||||
|
<value>86400000</value>
|
||||||
|
<description>
|
||||||
|
Default age (in milliseconds) after which to prune metadata from the
|
||||||
|
metadatastore when the prune command is run. Can be overridden on the
|
||||||
|
command-line.
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>fs.s3a.impl</name>
|
<name>fs.s3a.impl</name>
|
||||||
<value>org.apache.hadoop.fs.s3a.S3AFileSystem</value>
|
<value>org.apache.hadoop.fs.s3a.S3AFileSystem</value>
|
||||||
<description>The implementation class of the S3A Filesystem</description>
|
<description>The implementation class of the S3A Filesystem</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>fs.s3a.s3guard.ddb.region</name>
|
||||||
|
<value></value>
|
||||||
|
<description>
|
||||||
|
AWS DynamoDB region to connect to. An up-to-date list is
|
||||||
|
provided in the AWS Documentation: regions and endpoints. Without this
|
||||||
|
property, the S3Guard will operate table in the associated S3 bucket region.
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>fs.s3a.s3guard.ddb.table</name>
|
||||||
|
<value></value>
|
||||||
|
<description>
|
||||||
|
The DynamoDB table name to operate. Without this property, the respective
|
||||||
|
S3 bucket name will be used.
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>fs.s3a.s3guard.ddb.table.create</name>
|
||||||
|
<value>false</value>
|
||||||
|
<description>
|
||||||
|
If true, the S3A client will create the table if it does not already exist.
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>fs.s3a.s3guard.ddb.table.capacity.read</name>
|
||||||
|
<value>500</value>
|
||||||
|
<description>
|
||||||
|
Provisioned throughput requirements for read operations in terms of capacity
|
||||||
|
units for the DynamoDB table. This config value will only be used when
|
||||||
|
creating a new DynamoDB table, though later you can manually provision by
|
||||||
|
increasing or decreasing read capacity as needed for existing tables.
|
||||||
|
See DynamoDB documents for more information.
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>fs.s3a.s3guard.ddb.table.capacity.write</name>
|
||||||
|
<value>100</value>
|
||||||
|
<description>
|
||||||
|
Provisioned throughput requirements for write operations in terms of
|
||||||
|
capacity units for the DynamoDB table. Refer to related config
|
||||||
|
fs.s3a.s3guard.ddb.table.capacity.read before usage.
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>fs.s3a.s3guard.ddb.max.retries</name>
|
||||||
|
<value>9</value>
|
||||||
|
<description>
|
||||||
|
Max retries on batched DynamoDB operations before giving up and
|
||||||
|
throwing an IOException. Each retry is delayed with an exponential
|
||||||
|
backoff timer which starts at 100 milliseconds and approximately
|
||||||
|
doubles each time. The minimum wait before throwing an exception is
|
||||||
|
sum(100, 200, 400, 800, .. 100*2^N-1 ) == 100 * ((2^N)-1)
|
||||||
|
So N = 9 yields at least 51.1 seconds (51,100) milliseconds of blocking
|
||||||
|
before throwing an IOException.
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>fs.s3a.s3guard.ddb.background.sleep</name>
|
||||||
|
<value>25</value>
|
||||||
|
<description>
|
||||||
|
Length (in milliseconds) of pause between each batch of deletes when
|
||||||
|
pruning metadata. Prevents prune operations (which can typically be low
|
||||||
|
priority background operations) from overly interfering with other I/O
|
||||||
|
operations.
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>fs.AbstractFileSystem.s3a.impl</name>
|
<name>fs.AbstractFileSystem.s3a.impl</name>
|
||||||
<value>org.apache.hadoop.fs.s3a.S3A</value>
|
<value>org.apache.hadoop.fs.s3a.S3A</value>
|
||||||
|
@ -748,13 +748,27 @@ public void testRenameChildDirForbidden() throws Exception {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* This a sanity check to make sure that any filesystem's handling of
|
* This a sanity check to make sure that any filesystem's handling of
|
||||||
* renames doesn't cause any regressions
|
* renames empty dirs doesn't cause any regressions.
|
||||||
|
*/
|
||||||
|
public void testRenameEmptyToDirWithSamePrefixAllowed() throws Throwable {
|
||||||
|
assumeTrue(renameSupported());
|
||||||
|
Path parentdir = path("testRenameEmptyToDirWithSamePrefixAllowed");
|
||||||
|
fs.mkdirs(parentdir);
|
||||||
|
Path dest = path("testRenameEmptyToDirWithSamePrefixAllowedDest");
|
||||||
|
rename(parentdir, dest, true, false, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This a sanity check to make sure that any filesystem's handling of
|
||||||
|
* renames non-empty dirs doesn't cause any regressions.
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testRenameToDirWithSamePrefixAllowed() throws Throwable {
|
public void testRenameToDirWithSamePrefixAllowed() throws Throwable {
|
||||||
assumeTrue(renameSupported());
|
assumeTrue(renameSupported());
|
||||||
final Path parentdir = path("testRenameToDirWithSamePrefixAllowed");
|
final Path parentdir = path("testRenameToDirWithSamePrefixAllowed");
|
||||||
fs.mkdirs(parentdir);
|
fs.mkdirs(parentdir);
|
||||||
|
// Before renaming, we create one file under the source parent directory
|
||||||
|
createFile(new Path(parentdir, "mychild"));
|
||||||
final Path dest = path("testRenameToDirWithSamePrefixAllowedDest");
|
final Path dest = path("testRenameToDirWithSamePrefixAllowedDest");
|
||||||
rename(parentdir, dest, true, false, true);
|
rename(parentdir, dest, true, false, true);
|
||||||
}
|
}
|
||||||
|
@ -222,4 +222,67 @@ public void testRenameWithNonEmptySubDir() throws Throwable {
|
|||||||
assertPathDoesNotExist("not deleted",
|
assertPathDoesNotExist("not deleted",
|
||||||
new Path(srcDir, "source.txt"));
|
new Path(srcDir, "source.txt"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that after renaming, the nested subdirectory is moved along with all
|
||||||
|
* its ancestors.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testRenamePopulatesDirectoryAncestors() throws IOException {
|
||||||
|
final FileSystem fs = getFileSystem();
|
||||||
|
final Path src = path("testRenamePopulatesDirectoryAncestors/source");
|
||||||
|
fs.mkdirs(src);
|
||||||
|
final String nestedDir = "/dir1/dir2/dir3/dir4";
|
||||||
|
fs.mkdirs(path(src + nestedDir));
|
||||||
|
|
||||||
|
Path dst = path("testRenamePopulatesDirectoryAncestorsNew");
|
||||||
|
|
||||||
|
fs.rename(src, dst);
|
||||||
|
validateAncestorsMoved(src, dst, nestedDir);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that after renaming, the nested file is moved along with all its
|
||||||
|
* ancestors. It is similar to {@link #testRenamePopulatesDirectoryAncestors}.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testRenamePopulatesFileAncestors() throws IOException {
|
||||||
|
final FileSystem fs = getFileSystem();
|
||||||
|
final Path src = path("testRenamePopulatesFileAncestors/source");
|
||||||
|
fs.mkdirs(src);
|
||||||
|
final String nestedFile = "/dir1/dir2/dir3/file4";
|
||||||
|
byte[] srcDataset = dataset(256, 'a', 'z');
|
||||||
|
writeDataset(fs, path(src + nestedFile), srcDataset, srcDataset.length,
|
||||||
|
1024, false);
|
||||||
|
|
||||||
|
Path dst = path("testRenamePopulatesFileAncestorsNew");
|
||||||
|
|
||||||
|
fs.rename(src, dst);
|
||||||
|
validateAncestorsMoved(src, dst, nestedFile);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Validate that the nested path and its ancestors should have been moved.
|
||||||
|
*
|
||||||
|
* @param src the source root to move
|
||||||
|
* @param dst the destination root to move
|
||||||
|
* @param nestedPath the nested path to move
|
||||||
|
*/
|
||||||
|
private void validateAncestorsMoved(Path src, Path dst, String nestedPath)
|
||||||
|
throws IOException {
|
||||||
|
assertIsDirectory(dst);
|
||||||
|
assertPathDoesNotExist("src path should not exist", path(src + nestedPath));
|
||||||
|
assertPathExists("dst path should exist", path(dst + nestedPath));
|
||||||
|
|
||||||
|
Path path = new Path(nestedPath).getParent();
|
||||||
|
while (path != null && !path.isRoot()) {
|
||||||
|
final Path parentSrc = path(src + path.toString());
|
||||||
|
assertPathDoesNotExist(parentSrc + " is not deleted", parentSrc);
|
||||||
|
final Path parentDst = path(dst + path.toString());
|
||||||
|
assertPathExists(parentDst + " should exist after rename", parentDst);
|
||||||
|
assertIsDirectory(parentDst);
|
||||||
|
path = path.getParent();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -248,6 +248,23 @@ public static <T> T eventually(int timeoutMillis,
|
|||||||
throw ex;
|
throw ex;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Variant of {@link #eventually(int, Callable, Callable)} method for
|
||||||
|
* void lambda expressions.
|
||||||
|
* @param timeoutMillis timeout in milliseconds.
|
||||||
|
* Can be zero, in which case only one attempt is made before failing.
|
||||||
|
* @param eval expression to evaluate
|
||||||
|
* @param retry retry interval generator
|
||||||
|
* @throws Exception the last exception thrown before timeout was triggered
|
||||||
|
* @throws FailFastException if raised -without any retry attempt.
|
||||||
|
* @throws InterruptedException if interrupted during the sleep operation.
|
||||||
|
*/
|
||||||
|
public static void eventually(int timeoutMillis,
|
||||||
|
VoidCallable eval,
|
||||||
|
Callable<Integer> retry) throws Exception {
|
||||||
|
eventually(timeoutMillis, new VoidCaller(eval), retry);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Simplified {@link #eventually(int, Callable, Callable)} method
|
* Simplified {@link #eventually(int, Callable, Callable)} method
|
||||||
* with a fixed interval.
|
* with a fixed interval.
|
||||||
@ -276,6 +293,25 @@ public static <T> T eventually(int timeoutMillis,
|
|||||||
new FixedRetryInterval(intervalMillis));
|
new FixedRetryInterval(intervalMillis));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
/**
|
||||||
|
* Variant of {@link #eventually(int, int, Callable)} method for
|
||||||
|
* void lambda expressions.
|
||||||
|
* @param timeoutMillis timeout in milliseconds.
|
||||||
|
* Can be zero, in which case only one attempt is made before failing.
|
||||||
|
* @param intervalMillis interval in milliseconds
|
||||||
|
* @param eval expression to evaluate
|
||||||
|
* @throws Exception the last exception thrown before timeout was triggered
|
||||||
|
* @throws FailFastException if raised -without any retry attempt.
|
||||||
|
* @throws InterruptedException if interrupted during the sleep operation.
|
||||||
|
*/
|
||||||
|
public static void eventually(int timeoutMillis,
|
||||||
|
int intervalMillis,
|
||||||
|
VoidCallable eval) throws Exception {
|
||||||
|
eventually(timeoutMillis, eval,
|
||||||
|
new FixedRetryInterval(intervalMillis));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Intercept an exception; throw an {@code AssertionError} if one not raised.
|
* Intercept an exception; throw an {@code AssertionError} if one not raised.
|
||||||
* The caught exception is rethrown if it is of the wrong class or
|
* The caught exception is rethrown if it is of the wrong class or
|
||||||
@ -318,6 +354,32 @@ public static <T, E extends Throwable> E intercept(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Variant of {@link #intercept(Class, Callable)} to simplify void
|
||||||
|
* invocations.
|
||||||
|
* @param clazz class of exception; the raised exception must be this class
|
||||||
|
* <i>or a subclass</i>.
|
||||||
|
* @param eval expression to eval
|
||||||
|
* @param <E> exception class
|
||||||
|
* @return the caught exception if it was of the expected type
|
||||||
|
* @throws Exception any other exception raised
|
||||||
|
* @throws AssertionError if the evaluation call didn't raise an exception.
|
||||||
|
*/
|
||||||
|
public static <E extends Throwable> E intercept(
|
||||||
|
Class<E> clazz,
|
||||||
|
VoidCallable eval)
|
||||||
|
throws Exception {
|
||||||
|
try {
|
||||||
|
eval.call();
|
||||||
|
throw new AssertionError("Expected an exception");
|
||||||
|
} catch (Throwable e) {
|
||||||
|
if (clazz.isAssignableFrom(e.getClass())) {
|
||||||
|
return (E)e;
|
||||||
|
}
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Intercept an exception; throw an {@code AssertionError} if one not raised.
|
* Intercept an exception; throw an {@code AssertionError} if one not raised.
|
||||||
* The caught exception is rethrown if it is of the wrong class or
|
* The caught exception is rethrown if it is of the wrong class or
|
||||||
@ -358,6 +420,29 @@ public static <T, E extends Throwable> E intercept(
|
|||||||
return ex;
|
return ex;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Variant of {@link #intercept(Class, Callable)} to simplify void
|
||||||
|
* invocations.
|
||||||
|
* @param clazz class of exception; the raised exception must be this class
|
||||||
|
* <i>or a subclass</i>.
|
||||||
|
* @param contained string which must be in the {@code toString()} value
|
||||||
|
* of the exception
|
||||||
|
* @param eval expression to eval
|
||||||
|
* @param <E> exception class
|
||||||
|
* @return the caught exception if it was of the expected type
|
||||||
|
* @throws Exception any other exception raised
|
||||||
|
* @throws AssertionError if the evaluation call didn't raise an exception.
|
||||||
|
*/
|
||||||
|
public static <E extends Throwable> E intercept(
|
||||||
|
Class<E> clazz,
|
||||||
|
String contained,
|
||||||
|
VoidCallable eval)
|
||||||
|
throws Exception {
|
||||||
|
E ex = intercept(clazz, eval);
|
||||||
|
GenericTestUtils.assertExceptionContains(contained, ex);
|
||||||
|
return ex;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Robust string converter for exception messages; if the {@code toString()}
|
* Robust string converter for exception messages; if the {@code toString()}
|
||||||
* method throws an exception then that exception is caught and logged,
|
* method throws an exception then that exception is caught and logged,
|
||||||
@ -518,4 +603,31 @@ public static FailFastException newInstance(String format, Object...args) {
|
|||||||
return new FailFastException(String.format(format, args));
|
return new FailFastException(String.format(format, args));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A simple interface for lambdas, which returns nothing; this exists
|
||||||
|
* to simplify lambda tests on operations with no return value.
|
||||||
|
*/
|
||||||
|
public interface VoidCallable {
|
||||||
|
void call() throws Exception;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Bridge class to make {@link VoidCallable} something to use in anything
|
||||||
|
* which takes an {@link Callable}.
|
||||||
|
*/
|
||||||
|
public static class VoidCaller implements Callable<Void> {
|
||||||
|
private final VoidCallable callback;
|
||||||
|
|
||||||
|
public VoidCaller(VoidCallable callback) {
|
||||||
|
this.callback = callback;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Void call() throws Exception {
|
||||||
|
callback.call();
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -870,6 +870,17 @@
|
|||||||
<groupId>org.apache.commons</groupId>
|
<groupId>org.apache.commons</groupId>
|
||||||
<artifactId>commons-configuration2</artifactId>
|
<artifactId>commons-configuration2</artifactId>
|
||||||
<version>2.1</version>
|
<version>2.1</version>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>org.apache.commons</groupId>
|
||||||
|
<artifactId>commons-lang3</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.commons</groupId>
|
||||||
|
<artifactId>commons-lang3</artifactId>
|
||||||
|
<version>3.4</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.slf4j</groupId>
|
<groupId>org.slf4j</groupId>
|
||||||
@ -1734,4 +1745,12 @@
|
|||||||
</build>
|
</build>
|
||||||
</profile>
|
</profile>
|
||||||
</profiles>
|
</profiles>
|
||||||
|
|
||||||
|
<repositories>
|
||||||
|
<repository>
|
||||||
|
<id>dynamodb-local-oregon</id>
|
||||||
|
<name>DynamoDB Local Release Repository</name>
|
||||||
|
<url>https://s3-us-west-2.amazonaws.com/dynamodb-local/release</url>
|
||||||
|
</repository>
|
||||||
|
</repositories>
|
||||||
</project>
|
</project>
|
||||||
|
@ -26,4 +26,10 @@
|
|||||||
<Match>
|
<Match>
|
||||||
<Class name="org.apache.hadoop.fs.s3.INode" />
|
<Class name="org.apache.hadoop.fs.s3.INode" />
|
||||||
</Match>
|
</Match>
|
||||||
|
<!-- Redundant null check makes code clearer, future-proof here. -->
|
||||||
|
<Match>
|
||||||
|
<Class name="org.apache.hadoop.fs.s3a.S3AFileSystem" />
|
||||||
|
<Method name="s3Exists" />
|
||||||
|
<Bug pattern="RCN_REDUNDANT_NULLCHECK_OF_NONNULL_VALUE" />
|
||||||
|
</Match>
|
||||||
</FindBugsFilter>
|
</FindBugsFilter>
|
||||||
|
@ -36,6 +36,7 @@
|
|||||||
<downloadSources>true</downloadSources>
|
<downloadSources>true</downloadSources>
|
||||||
<hadoop.tmp.dir>${project.build.directory}/test</hadoop.tmp.dir>
|
<hadoop.tmp.dir>${project.build.directory}/test</hadoop.tmp.dir>
|
||||||
|
|
||||||
|
<dynamodb.local.version>1.11.86</dynamodb.local.version>
|
||||||
<!-- are scale tests enabled ? -->
|
<!-- are scale tests enabled ? -->
|
||||||
<fs.s3a.scale.test.enabled>unset</fs.s3a.scale.test.enabled>
|
<fs.s3a.scale.test.enabled>unset</fs.s3a.scale.test.enabled>
|
||||||
<!-- Size in MB of huge files. -->
|
<!-- Size in MB of huge files. -->
|
||||||
@ -44,6 +45,11 @@
|
|||||||
<fs.s3a.scale.test.huge.partitionsize>unset</fs.s3a.scale.test.huge.partitionsize>
|
<fs.s3a.scale.test.huge.partitionsize>unset</fs.s3a.scale.test.huge.partitionsize>
|
||||||
<!-- Timeout in seconds for scale tests.-->
|
<!-- Timeout in seconds for scale tests.-->
|
||||||
<fs.s3a.scale.test.timeout>3600</fs.s3a.scale.test.timeout>
|
<fs.s3a.scale.test.timeout>3600</fs.s3a.scale.test.timeout>
|
||||||
|
<!-- are scale tests enabled ? -->
|
||||||
|
<fs.s3a.s3guard.test.enabled>false</fs.s3a.s3guard.test.enabled>
|
||||||
|
<fs.s3a.s3guard.test.authoritative>false</fs.s3a.s3guard.test.authoritative>
|
||||||
|
<fs.s3a.s3guard.test.implementation>local</fs.s3a.s3guard.test.implementation>
|
||||||
|
|
||||||
</properties>
|
</properties>
|
||||||
|
|
||||||
<profiles>
|
<profiles>
|
||||||
@ -164,6 +170,11 @@
|
|||||||
<fs.s3a.scale.test.huge.filesize>${fs.s3a.scale.test.huge.filesize}</fs.s3a.scale.test.huge.filesize>
|
<fs.s3a.scale.test.huge.filesize>${fs.s3a.scale.test.huge.filesize}</fs.s3a.scale.test.huge.filesize>
|
||||||
<fs.s3a.scale.test.huge.huge.partitionsize>${fs.s3a.scale.test.huge.partitionsize}</fs.s3a.scale.test.huge.huge.partitionsize>
|
<fs.s3a.scale.test.huge.huge.partitionsize>${fs.s3a.scale.test.huge.partitionsize}</fs.s3a.scale.test.huge.huge.partitionsize>
|
||||||
<fs.s3a.scale.test.timeout>${fs.s3a.scale.test.timeout}</fs.s3a.scale.test.timeout>
|
<fs.s3a.scale.test.timeout>${fs.s3a.scale.test.timeout}</fs.s3a.scale.test.timeout>
|
||||||
|
<!-- S3Guard -->
|
||||||
|
<fs.s3a.s3guard.test.enabled>${fs.s3a.s3guard.test.enabled}</fs.s3a.s3guard.test.enabled>
|
||||||
|
<fs.s3a.s3guard.test.authoritative>${fs.s3a.s3guard.test.authoritative}</fs.s3a.s3guard.test.authoritative>
|
||||||
|
<fs.s3a.s3guard.test.implementation>${fs.s3a.s3guard.test.implementation}</fs.s3a.s3guard.test.implementation>
|
||||||
|
|
||||||
</systemPropertyVariables>
|
</systemPropertyVariables>
|
||||||
<!-- Some tests cannot run in parallel. Tests that cover -->
|
<!-- Some tests cannot run in parallel. Tests that cover -->
|
||||||
<!-- access to the root directory must run in isolation -->
|
<!-- access to the root directory must run in isolation -->
|
||||||
@ -205,6 +216,10 @@
|
|||||||
<fs.s3a.scale.test.huge.filesize>${fs.s3a.scale.test.huge.filesize}</fs.s3a.scale.test.huge.filesize>
|
<fs.s3a.scale.test.huge.filesize>${fs.s3a.scale.test.huge.filesize}</fs.s3a.scale.test.huge.filesize>
|
||||||
<fs.s3a.scale.test.huge.huge.partitionsize>${fs.s3a.scale.test.huge.partitionsize}</fs.s3a.scale.test.huge.huge.partitionsize>
|
<fs.s3a.scale.test.huge.huge.partitionsize>${fs.s3a.scale.test.huge.partitionsize}</fs.s3a.scale.test.huge.huge.partitionsize>
|
||||||
<fs.s3a.scale.test.timeout>${fs.s3a.scale.test.timeout}</fs.s3a.scale.test.timeout>
|
<fs.s3a.scale.test.timeout>${fs.s3a.scale.test.timeout}</fs.s3a.scale.test.timeout>
|
||||||
|
<!-- S3Guard -->
|
||||||
|
<fs.s3a.s3guard.test.enabled>${fs.s3a.s3guard.test.enabled}</fs.s3a.s3guard.test.enabled>
|
||||||
|
<fs.s3a.s3guard.test.implementation>${fs.s3a.s3guard.test.implementation}</fs.s3a.s3guard.test.implementation>
|
||||||
|
<fs.s3a.s3guard.test.authoritative>${fs.s3a.s3guard.test.authoritative}</fs.s3a.s3guard.test.authoritative>
|
||||||
</systemPropertyVariables>
|
</systemPropertyVariables>
|
||||||
<!-- Do a sequential run for tests that cannot handle -->
|
<!-- Do a sequential run for tests that cannot handle -->
|
||||||
<!-- parallel execution. -->
|
<!-- parallel execution. -->
|
||||||
@ -247,6 +262,10 @@
|
|||||||
<fs.s3a.scale.test.enabled>${fs.s3a.scale.test.enabled}</fs.s3a.scale.test.enabled>
|
<fs.s3a.scale.test.enabled>${fs.s3a.scale.test.enabled}</fs.s3a.scale.test.enabled>
|
||||||
<fs.s3a.scale.test.huge.filesize>${fs.s3a.scale.test.huge.filesize}</fs.s3a.scale.test.huge.filesize>
|
<fs.s3a.scale.test.huge.filesize>${fs.s3a.scale.test.huge.filesize}</fs.s3a.scale.test.huge.filesize>
|
||||||
<fs.s3a.scale.test.timeout>${fs.s3a.scale.test.timeout}</fs.s3a.scale.test.timeout>
|
<fs.s3a.scale.test.timeout>${fs.s3a.scale.test.timeout}</fs.s3a.scale.test.timeout>
|
||||||
|
<!-- S3Guard -->
|
||||||
|
<fs.s3a.s3guard.test.enabled>${fs.s3a.s3guard.test.enabled}</fs.s3a.s3guard.test.enabled>
|
||||||
|
<fs.s3a.s3guard.test.implementation>${fs.s3a.s3guard.test.implementation}</fs.s3a.s3guard.test.implementation>
|
||||||
|
<fs.s3a.s3guard.test.authoritative>${fs.s3a.s3guard.test.authoritative}</fs.s3a.s3guard.test.authoritative>
|
||||||
</systemPropertyVariables>
|
</systemPropertyVariables>
|
||||||
<forkedProcessTimeoutInSeconds>${fs.s3a.scale.test.timeout}</forkedProcessTimeoutInSeconds>
|
<forkedProcessTimeoutInSeconds>${fs.s3a.scale.test.timeout}</forkedProcessTimeoutInSeconds>
|
||||||
</configuration>
|
</configuration>
|
||||||
@ -269,6 +288,60 @@
|
|||||||
<fs.s3a.scale.test.enabled>true</fs.s3a.scale.test.enabled>
|
<fs.s3a.scale.test.enabled>true</fs.s3a.scale.test.enabled>
|
||||||
</properties>
|
</properties>
|
||||||
</profile>
|
</profile>
|
||||||
|
|
||||||
|
<!-- Turn on S3Guard tests-->
|
||||||
|
<profile>
|
||||||
|
<id>s3guard</id>
|
||||||
|
<activation>
|
||||||
|
<property>
|
||||||
|
<name>s3guard</name>
|
||||||
|
</property>
|
||||||
|
</activation>
|
||||||
|
<properties >
|
||||||
|
<fs.s3a.s3guard.test.enabled>true</fs.s3a.s3guard.test.enabled>
|
||||||
|
</properties>
|
||||||
|
</profile>
|
||||||
|
|
||||||
|
<!-- Switch to DynamoDB for S3Guard. Has no effect unless S3Guard is enabled -->
|
||||||
|
<profile>
|
||||||
|
<id>dynamo</id>
|
||||||
|
<activation>
|
||||||
|
<property>
|
||||||
|
<name>dynamo</name>
|
||||||
|
</property>
|
||||||
|
</activation>
|
||||||
|
<properties >
|
||||||
|
<fs.s3a.s3guard.test.implementation>dynamo</fs.s3a.s3guard.test.implementation>
|
||||||
|
</properties>
|
||||||
|
</profile>
|
||||||
|
|
||||||
|
<!-- Switch to DynamoDBLocal for S3Guard. Has no effect unless S3Guard is enabled -->
|
||||||
|
<profile>
|
||||||
|
<id>dynamodblocal</id>
|
||||||
|
<activation>
|
||||||
|
<property>
|
||||||
|
<name>dynamodblocal</name>
|
||||||
|
</property>
|
||||||
|
</activation>
|
||||||
|
<properties>
|
||||||
|
<fs.s3a.s3guard.test.implementation>dynamodblocal</fs.s3a.s3guard.test.implementation>
|
||||||
|
</properties>
|
||||||
|
</profile>
|
||||||
|
|
||||||
|
<!-- Switch S3Guard from Authoritative=false to true
|
||||||
|
Has no effect unless S3Guard is enabled -->
|
||||||
|
<profile>
|
||||||
|
<id>non-auth</id>
|
||||||
|
<activation>
|
||||||
|
<property>
|
||||||
|
<name>auth</name>
|
||||||
|
</property>
|
||||||
|
</activation>
|
||||||
|
<properties >
|
||||||
|
<fs.s3a.s3guard.test.authoritative>true</fs.s3a.s3guard.test.authoritative>
|
||||||
|
</properties>
|
||||||
|
</profile>
|
||||||
|
|
||||||
</profiles>
|
</profiles>
|
||||||
|
|
||||||
<build>
|
<build>
|
||||||
@ -296,16 +369,48 @@
|
|||||||
<artifactId>maven-dependency-plugin</artifactId>
|
<artifactId>maven-dependency-plugin</artifactId>
|
||||||
<executions>
|
<executions>
|
||||||
<execution>
|
<execution>
|
||||||
<id>deplist</id>
|
<id>deplist1</id>
|
||||||
<phase>compile</phase>
|
<phase>compile</phase>
|
||||||
<goals>
|
<goals>
|
||||||
<goal>list</goal>
|
<goal>list</goal>
|
||||||
</goals>
|
</goals>
|
||||||
<configuration>
|
<configuration>
|
||||||
<!-- build a shellprofile -->
|
<!-- build a shellprofile for hadoop-aws optional tools -->
|
||||||
<outputFile>${project.basedir}/target/hadoop-tools-deps/${project.artifactId}.tools-optional.txt</outputFile>
|
<outputFile>${project.basedir}/target/hadoop-tools-deps/${project.artifactId}.tools-optional.txt</outputFile>
|
||||||
</configuration>
|
</configuration>
|
||||||
</execution>
|
</execution>
|
||||||
|
<execution>
|
||||||
|
<id>copy</id>
|
||||||
|
<phase>test-compile</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>copy-dependencies</goal>
|
||||||
|
</goals>
|
||||||
|
<configuration>
|
||||||
|
<includeScope>test</includeScope>
|
||||||
|
<includeTypes>so,dll,dylib</includeTypes>
|
||||||
|
<outputDirectory>${project.build.directory}/native-libs</outputDirectory>
|
||||||
|
</configuration>
|
||||||
|
</execution>
|
||||||
|
<execution>
|
||||||
|
<phase>package</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>copy-dependencies</goal>
|
||||||
|
</goals>
|
||||||
|
<configuration>
|
||||||
|
<outputDirectory>${project.build.directory}/lib</outputDirectory>
|
||||||
|
</configuration>
|
||||||
|
</execution>
|
||||||
|
<execution>
|
||||||
|
<id>deplist2</id>
|
||||||
|
<phase>compile</phase>
|
||||||
|
<goals>
|
||||||
|
<goal>list</goal>
|
||||||
|
</goals>
|
||||||
|
<configuration>
|
||||||
|
<!-- referenced by the s3guard command -->
|
||||||
|
<outputFile>${project.basedir}/target/hadoop-tools-deps/${project.artifactId}.tools-builtin.txt</outputFile>
|
||||||
|
</configuration>
|
||||||
|
</execution>
|
||||||
</executions>
|
</executions>
|
||||||
</plugin>
|
</plugin>
|
||||||
</plugins>
|
</plugins>
|
||||||
@ -333,6 +438,26 @@
|
|||||||
<artifactId>aws-java-sdk-bundle</artifactId>
|
<artifactId>aws-java-sdk-bundle</artifactId>
|
||||||
<scope>compile</scope>
|
<scope>compile</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.amazonaws</groupId>
|
||||||
|
<artifactId>DynamoDBLocal</artifactId>
|
||||||
|
<version>${dynamodb.local.version}</version>
|
||||||
|
<scope>test</scope>
|
||||||
|
<exclusions>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>org.hamcrest</groupId>
|
||||||
|
<artifactId>hamcrest-core</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>org.eclipse.jetty</groupId>
|
||||||
|
<artifactId>jetty-http</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
<exclusion>
|
||||||
|
<groupId>org.apache.commons</groupId>
|
||||||
|
<artifactId>commons-lang3</artifactId>
|
||||||
|
</exclusion>
|
||||||
|
</exclusions>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>junit</groupId>
|
<groupId>junit</groupId>
|
||||||
<artifactId>junit</artifactId>
|
<artifactId>junit</artifactId>
|
||||||
|
@ -267,6 +267,11 @@ private Constants() {
|
|||||||
|
|
||||||
public static final String USER_AGENT_PREFIX = "fs.s3a.user.agent.prefix";
|
public static final String USER_AGENT_PREFIX = "fs.s3a.user.agent.prefix";
|
||||||
|
|
||||||
|
/** Whether or not to allow MetadataStore to be source of truth. */
|
||||||
|
public static final String METADATASTORE_AUTHORITATIVE =
|
||||||
|
"fs.s3a.metadatastore.authoritative";
|
||||||
|
public static final boolean DEFAULT_METADATASTORE_AUTHORITATIVE = false;
|
||||||
|
|
||||||
/** read ahead buffer size to prevent connection re-establishments. */
|
/** read ahead buffer size to prevent connection re-establishments. */
|
||||||
public static final String READAHEAD_RANGE = "fs.s3a.readahead.range";
|
public static final String READAHEAD_RANGE = "fs.s3a.readahead.range";
|
||||||
public static final long DEFAULT_READAHEAD_RANGE = 64 * 1024;
|
public static final long DEFAULT_READAHEAD_RANGE = 64 * 1024;
|
||||||
@ -312,7 +317,7 @@ private Constants() {
|
|||||||
@InterfaceStability.Unstable
|
@InterfaceStability.Unstable
|
||||||
public static final Class<? extends S3ClientFactory>
|
public static final Class<? extends S3ClientFactory>
|
||||||
DEFAULT_S3_CLIENT_FACTORY_IMPL =
|
DEFAULT_S3_CLIENT_FACTORY_IMPL =
|
||||||
S3ClientFactory.DefaultS3ClientFactory.class;
|
DefaultS3ClientFactory.class;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Maximum number of partitions in a multipart upload: {@value}.
|
* Maximum number of partitions in a multipart upload: {@value}.
|
||||||
@ -320,4 +325,130 @@ private Constants() {
|
|||||||
@InterfaceAudience.Private
|
@InterfaceAudience.Private
|
||||||
public static final int MAX_MULTIPART_COUNT = 10000;
|
public static final int MAX_MULTIPART_COUNT = 10000;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Classname of the S3A-specific output committer factory. This
|
||||||
|
* is what must be declared when attempting to use
|
||||||
|
*/
|
||||||
|
@InterfaceStability.Unstable
|
||||||
|
public static final String S3A_OUTPUT_COMMITTER_FACTORY =
|
||||||
|
"org.apache.hadoop.fs.s3a.commit.S3AOutputCommitterFactory";
|
||||||
|
|
||||||
|
/* Constants. */
|
||||||
|
public static final String S3_METADATA_STORE_IMPL =
|
||||||
|
"fs.s3a.metadatastore.impl";
|
||||||
|
|
||||||
|
/** Minimum period of time (in milliseconds) to keep metadata (may only be
|
||||||
|
* applied when a prune command is manually run).
|
||||||
|
*/
|
||||||
|
@InterfaceStability.Unstable
|
||||||
|
public static final String S3GUARD_CLI_PRUNE_AGE =
|
||||||
|
"fs.s3a.s3guard.cli.prune.age";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The region of the DynamoDB service.
|
||||||
|
*
|
||||||
|
* This config has no default value. If the user does not set this, the
|
||||||
|
* S3Guard will operate table in the associated S3 bucket region.
|
||||||
|
*/
|
||||||
|
@InterfaceStability.Unstable
|
||||||
|
public static final String S3GUARD_DDB_REGION_KEY =
|
||||||
|
"fs.s3a.s3guard.ddb.region";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The DynamoDB table name to use.
|
||||||
|
*
|
||||||
|
* This config has no default value. If the user does not set this, the
|
||||||
|
* S3Guard implementation will use the respective S3 bucket name.
|
||||||
|
*/
|
||||||
|
@InterfaceStability.Unstable
|
||||||
|
public static final String S3GUARD_DDB_TABLE_NAME_KEY =
|
||||||
|
"fs.s3a.s3guard.ddb.table";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Whether to create the DynamoDB table if the table does not exist.
|
||||||
|
*/
|
||||||
|
@InterfaceStability.Unstable
|
||||||
|
public static final String S3GUARD_DDB_TABLE_CREATE_KEY =
|
||||||
|
"fs.s3a.s3guard.ddb.table.create";
|
||||||
|
|
||||||
|
@InterfaceStability.Unstable
|
||||||
|
public static final String S3GUARD_DDB_TABLE_CAPACITY_READ_KEY =
|
||||||
|
"fs.s3a.s3guard.ddb.table.capacity.read";
|
||||||
|
public static final long S3GUARD_DDB_TABLE_CAPACITY_READ_DEFAULT = 500;
|
||||||
|
@InterfaceStability.Unstable
|
||||||
|
public static final String S3GUARD_DDB_TABLE_CAPACITY_WRITE_KEY =
|
||||||
|
"fs.s3a.s3guard.ddb.table.capacity.write";
|
||||||
|
public static final long S3GUARD_DDB_TABLE_CAPACITY_WRITE_DEFAULT = 100;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The maximum put or delete requests per BatchWriteItem request.
|
||||||
|
*
|
||||||
|
* Refer to Amazon API reference for this limit.
|
||||||
|
*/
|
||||||
|
public static final int S3GUARD_DDB_BATCH_WRITE_REQUEST_LIMIT = 25;
|
||||||
|
|
||||||
|
@InterfaceStability.Unstable
|
||||||
|
public static final String S3GUARD_DDB_MAX_RETRIES =
|
||||||
|
"fs.s3a.s3guard.ddb.max.retries";
|
||||||
|
/**
|
||||||
|
* Max retries on batched DynamoDB operations before giving up and
|
||||||
|
* throwing an IOException. Default is {@value}. See core-default.xml for
|
||||||
|
* more detail.
|
||||||
|
*/
|
||||||
|
public static final int S3GUARD_DDB_MAX_RETRIES_DEFAULT = 9;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Period of time (in milliseconds) to sleep between batches of writes.
|
||||||
|
* Currently only applies to prune operations, as they are naturally a
|
||||||
|
* lower priority than other operations.
|
||||||
|
*/
|
||||||
|
@InterfaceStability.Unstable
|
||||||
|
public static final String S3GUARD_DDB_BACKGROUND_SLEEP_MSEC_KEY =
|
||||||
|
"fs.s3a.s3guard.ddb.background.sleep";
|
||||||
|
public static final int S3GUARD_DDB_BACKGROUND_SLEEP_MSEC_DEFAULT = 25;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* V1 committer.
|
||||||
|
*/
|
||||||
|
@InterfaceStability.Unstable
|
||||||
|
public static final String S3A_OUTPUT_COMMITTER_MRV1 =
|
||||||
|
"org.apache.hadoop.fs.s3a.commit.S3OutputCommitterMRv1";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The default "Null" metadata store: {@value}.
|
||||||
|
*/
|
||||||
|
@InterfaceStability.Unstable
|
||||||
|
public static final String S3GUARD_METASTORE_NULL
|
||||||
|
= "org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use Local memory for the metadata: {@value}.
|
||||||
|
* This is not coherent across processes and must be used for testing only.
|
||||||
|
*/
|
||||||
|
@InterfaceStability.Unstable
|
||||||
|
public static final String S3GUARD_METASTORE_LOCAL
|
||||||
|
= "org.apache.hadoop.fs.s3a.s3guard.LocalMetadataStore";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use DynamoDB for the metadata: {@value}.
|
||||||
|
*/
|
||||||
|
@InterfaceStability.Unstable
|
||||||
|
public static final String S3GUARD_METASTORE_DYNAMO
|
||||||
|
= "org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Inconsistency (visibility delay) injection settings.
|
||||||
|
*/
|
||||||
|
@InterfaceStability.Unstable
|
||||||
|
public static final String FAIL_INJECT_INCONSISTENCY_KEY =
|
||||||
|
"fs.s3a.failinject.inconsistency.key.substring";
|
||||||
|
|
||||||
|
@InterfaceStability.Unstable
|
||||||
|
public static final String FAIL_INJECT_INCONSISTENCY_MSEC =
|
||||||
|
"fs.s3a.failinject.inconsistency.msec";
|
||||||
|
|
||||||
|
@InterfaceStability.Unstable
|
||||||
|
public static final String FAIL_INJECT_INCONSISTENCY_PROBABILITY =
|
||||||
|
"fs.s3a.failinject.inconsistency.probability";
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,233 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a;
|
||||||
|
|
||||||
|
import com.amazonaws.ClientConfiguration;
|
||||||
|
import com.amazonaws.Protocol;
|
||||||
|
import com.amazonaws.auth.AWSCredentialsProvider;
|
||||||
|
import com.amazonaws.services.s3.AmazonS3;
|
||||||
|
import com.amazonaws.services.s3.AmazonS3Client;
|
||||||
|
import com.amazonaws.services.s3.S3ClientOptions;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.conf.Configured;
|
||||||
|
import org.apache.hadoop.util.VersionInfo;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.URI;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.fs.s3a.Constants.*;
|
||||||
|
import static org.apache.hadoop.fs.s3a.S3AUtils.createAWSCredentialProviderSet;
|
||||||
|
import static org.apache.hadoop.fs.s3a.S3AUtils.intOption;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The default factory implementation, which calls the AWS SDK to configure
|
||||||
|
* and create an {@link AmazonS3Client} that communicates with the S3 service.
|
||||||
|
*/
|
||||||
|
public class DefaultS3ClientFactory extends Configured implements
|
||||||
|
S3ClientFactory {
|
||||||
|
|
||||||
|
protected static final Logger LOG = S3AFileSystem.LOG;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public AmazonS3 createS3Client(URI name) throws IOException {
|
||||||
|
Configuration conf = getConf();
|
||||||
|
AWSCredentialsProvider credentials =
|
||||||
|
createAWSCredentialProviderSet(name, conf);
|
||||||
|
final ClientConfiguration awsConf = createAwsConf(getConf());
|
||||||
|
AmazonS3 s3 = newAmazonS3Client(credentials, awsConf);
|
||||||
|
return createAmazonS3Client(s3, conf, credentials, awsConf);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new {@link ClientConfiguration}.
|
||||||
|
* @param conf The Hadoop configuration
|
||||||
|
* @return new AWS client configuration
|
||||||
|
*/
|
||||||
|
public static ClientConfiguration createAwsConf(Configuration conf) {
|
||||||
|
final ClientConfiguration awsConf = new ClientConfiguration();
|
||||||
|
initConnectionSettings(conf, awsConf);
|
||||||
|
initProxySupport(conf, awsConf);
|
||||||
|
initUserAgent(conf, awsConf);
|
||||||
|
return awsConf;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wrapper around constructor for {@link AmazonS3} client. Override this to
|
||||||
|
* provide an extended version of the client
|
||||||
|
* @param credentials credentials to use
|
||||||
|
* @param awsConf AWS configuration
|
||||||
|
* @return new AmazonS3 client
|
||||||
|
*/
|
||||||
|
protected AmazonS3 newAmazonS3Client(
|
||||||
|
AWSCredentialsProvider credentials, ClientConfiguration awsConf) {
|
||||||
|
return new AmazonS3Client(credentials, awsConf);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initializes all AWS SDK settings related to connection management.
|
||||||
|
*
|
||||||
|
* @param conf Hadoop configuration
|
||||||
|
* @param awsConf AWS SDK configuration
|
||||||
|
*/
|
||||||
|
private static void initConnectionSettings(Configuration conf,
|
||||||
|
ClientConfiguration awsConf) {
|
||||||
|
awsConf.setMaxConnections(intOption(conf, MAXIMUM_CONNECTIONS,
|
||||||
|
DEFAULT_MAXIMUM_CONNECTIONS, 1));
|
||||||
|
boolean secureConnections = conf.getBoolean(SECURE_CONNECTIONS,
|
||||||
|
DEFAULT_SECURE_CONNECTIONS);
|
||||||
|
awsConf.setProtocol(secureConnections ? Protocol.HTTPS : Protocol.HTTP);
|
||||||
|
awsConf.setMaxErrorRetry(intOption(conf, MAX_ERROR_RETRIES,
|
||||||
|
DEFAULT_MAX_ERROR_RETRIES, 0));
|
||||||
|
awsConf.setConnectionTimeout(intOption(conf, ESTABLISH_TIMEOUT,
|
||||||
|
DEFAULT_ESTABLISH_TIMEOUT, 0));
|
||||||
|
awsConf.setSocketTimeout(intOption(conf, SOCKET_TIMEOUT,
|
||||||
|
DEFAULT_SOCKET_TIMEOUT, 0));
|
||||||
|
int sockSendBuffer = intOption(conf, SOCKET_SEND_BUFFER,
|
||||||
|
DEFAULT_SOCKET_SEND_BUFFER, 2048);
|
||||||
|
int sockRecvBuffer = intOption(conf, SOCKET_RECV_BUFFER,
|
||||||
|
DEFAULT_SOCKET_RECV_BUFFER, 2048);
|
||||||
|
awsConf.setSocketBufferSizeHints(sockSendBuffer, sockRecvBuffer);
|
||||||
|
String signerOverride = conf.getTrimmed(SIGNING_ALGORITHM, "");
|
||||||
|
if (!signerOverride.isEmpty()) {
|
||||||
|
LOG.debug("Signer override = {}", signerOverride);
|
||||||
|
awsConf.setSignerOverride(signerOverride);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initializes AWS SDK proxy support if configured.
|
||||||
|
*
|
||||||
|
* @param conf Hadoop configuration
|
||||||
|
* @param awsConf AWS SDK configuration
|
||||||
|
* @throws IllegalArgumentException if misconfigured
|
||||||
|
*/
|
||||||
|
private static void initProxySupport(Configuration conf,
|
||||||
|
ClientConfiguration awsConf) throws IllegalArgumentException {
|
||||||
|
String proxyHost = conf.getTrimmed(PROXY_HOST, "");
|
||||||
|
int proxyPort = conf.getInt(PROXY_PORT, -1);
|
||||||
|
if (!proxyHost.isEmpty()) {
|
||||||
|
awsConf.setProxyHost(proxyHost);
|
||||||
|
if (proxyPort >= 0) {
|
||||||
|
awsConf.setProxyPort(proxyPort);
|
||||||
|
} else {
|
||||||
|
if (conf.getBoolean(SECURE_CONNECTIONS, DEFAULT_SECURE_CONNECTIONS)) {
|
||||||
|
LOG.warn("Proxy host set without port. Using HTTPS default 443");
|
||||||
|
awsConf.setProxyPort(443);
|
||||||
|
} else {
|
||||||
|
LOG.warn("Proxy host set without port. Using HTTP default 80");
|
||||||
|
awsConf.setProxyPort(80);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
String proxyUsername = conf.getTrimmed(PROXY_USERNAME);
|
||||||
|
String proxyPassword = conf.getTrimmed(PROXY_PASSWORD);
|
||||||
|
if ((proxyUsername == null) != (proxyPassword == null)) {
|
||||||
|
String msg = "Proxy error: " + PROXY_USERNAME + " or " +
|
||||||
|
PROXY_PASSWORD + " set without the other.";
|
||||||
|
LOG.error(msg);
|
||||||
|
throw new IllegalArgumentException(msg);
|
||||||
|
}
|
||||||
|
awsConf.setProxyUsername(proxyUsername);
|
||||||
|
awsConf.setProxyPassword(proxyPassword);
|
||||||
|
awsConf.setProxyDomain(conf.getTrimmed(PROXY_DOMAIN));
|
||||||
|
awsConf.setProxyWorkstation(conf.getTrimmed(PROXY_WORKSTATION));
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("Using proxy server {}:{} as user {} with password {} on " +
|
||||||
|
"domain {} as workstation {}", awsConf.getProxyHost(),
|
||||||
|
awsConf.getProxyPort(),
|
||||||
|
String.valueOf(awsConf.getProxyUsername()),
|
||||||
|
awsConf.getProxyPassword(), awsConf.getProxyDomain(),
|
||||||
|
awsConf.getProxyWorkstation());
|
||||||
|
}
|
||||||
|
} else if (proxyPort >= 0) {
|
||||||
|
String msg =
|
||||||
|
"Proxy error: " + PROXY_PORT + " set without " + PROXY_HOST;
|
||||||
|
LOG.error(msg);
|
||||||
|
throw new IllegalArgumentException(msg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initializes the User-Agent header to send in HTTP requests to the S3
|
||||||
|
* back-end. We always include the Hadoop version number. The user also
|
||||||
|
* may set an optional custom prefix to put in front of the Hadoop version
|
||||||
|
* number. The AWS SDK interally appends its own information, which seems
|
||||||
|
* to include the AWS SDK version, OS and JVM version.
|
||||||
|
*
|
||||||
|
* @param conf Hadoop configuration
|
||||||
|
* @param awsConf AWS SDK configuration
|
||||||
|
*/
|
||||||
|
private static void initUserAgent(Configuration conf,
|
||||||
|
ClientConfiguration awsConf) {
|
||||||
|
String userAgent = "Hadoop " + VersionInfo.getVersion();
|
||||||
|
String userAgentPrefix = conf.getTrimmed(USER_AGENT_PREFIX, "");
|
||||||
|
if (!userAgentPrefix.isEmpty()) {
|
||||||
|
userAgent = userAgentPrefix + ", " + userAgent;
|
||||||
|
}
|
||||||
|
LOG.debug("Using User-Agent: {}", userAgent);
|
||||||
|
awsConf.setUserAgentPrefix(userAgent);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates an {@link AmazonS3Client} from the established configuration.
|
||||||
|
*
|
||||||
|
* @param conf Hadoop configuration
|
||||||
|
* @param credentials AWS credentials
|
||||||
|
* @param awsConf AWS SDK configuration
|
||||||
|
* @return S3 client
|
||||||
|
* @throws IllegalArgumentException if misconfigured
|
||||||
|
*/
|
||||||
|
private static AmazonS3 createAmazonS3Client(AmazonS3 s3, Configuration conf,
|
||||||
|
AWSCredentialsProvider credentials, ClientConfiguration awsConf)
|
||||||
|
throws IllegalArgumentException {
|
||||||
|
String endPoint = conf.getTrimmed(ENDPOINT, "");
|
||||||
|
if (!endPoint.isEmpty()) {
|
||||||
|
try {
|
||||||
|
s3.setEndpoint(endPoint);
|
||||||
|
} catch (IllegalArgumentException e) {
|
||||||
|
String msg = "Incorrect endpoint: " + e.getMessage();
|
||||||
|
LOG.error(msg);
|
||||||
|
throw new IllegalArgumentException(msg, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
enablePathStyleAccessIfRequired(s3, conf);
|
||||||
|
return s3;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Enables path-style access to S3 buckets if configured. By default, the
|
||||||
|
* behavior is to use virtual hosted-style access with URIs of the form
|
||||||
|
* http://bucketname.s3.amazonaws.com. Enabling path-style access and a
|
||||||
|
* region-specific endpoint switches the behavior to use URIs of the form
|
||||||
|
* http://s3-eu-west-1.amazonaws.com/bucketname.
|
||||||
|
*
|
||||||
|
* @param s3 S3 client
|
||||||
|
* @param conf Hadoop configuration
|
||||||
|
*/
|
||||||
|
private static void enablePathStyleAccessIfRequired(AmazonS3 s3,
|
||||||
|
Configuration conf) {
|
||||||
|
final boolean pathStyleAccess = conf.getBoolean(PATH_STYLE_ACCESS, false);
|
||||||
|
if (pathStyleAccess) {
|
||||||
|
LOG.debug("Enabling path style access!");
|
||||||
|
s3.setS3ClientOptions(S3ClientOptions.builder()
|
||||||
|
.setPathStyleAccess(true)
|
||||||
|
.build());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,434 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a;
|
||||||
|
|
||||||
|
import com.amazonaws.AmazonClientException;
|
||||||
|
import com.amazonaws.AmazonServiceException;
|
||||||
|
import com.amazonaws.ClientConfiguration;
|
||||||
|
import com.amazonaws.auth.AWSCredentialsProvider;
|
||||||
|
import com.amazonaws.services.s3.AmazonS3;
|
||||||
|
import com.amazonaws.services.s3.AmazonS3Client;
|
||||||
|
import com.amazonaws.services.s3.model.DeleteObjectRequest;
|
||||||
|
import com.amazonaws.services.s3.model.DeleteObjectsRequest;
|
||||||
|
import com.amazonaws.services.s3.model.DeleteObjectsResult;
|
||||||
|
import com.amazonaws.services.s3.model.ListObjectsRequest;
|
||||||
|
import com.amazonaws.services.s3.model.ObjectListing;
|
||||||
|
import com.amazonaws.services.s3.model.PutObjectRequest;
|
||||||
|
import com.amazonaws.services.s3.model.PutObjectResult;
|
||||||
|
import com.amazonaws.services.s3.model.S3ObjectSummary;
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.fs.s3a.Constants.*;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A wrapper around {@link com.amazonaws.services.s3.AmazonS3} that injects
|
||||||
|
* inconsistency and/or errors. Used for testing S3Guard.
|
||||||
|
* Currently only delays listing visibility, not affecting GET.
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
@InterfaceStability.Unstable
|
||||||
|
public class InconsistentAmazonS3Client extends AmazonS3Client {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Keys containing this substring will be subject to delayed visibility.
|
||||||
|
*/
|
||||||
|
public static final String DEFAULT_DELAY_KEY_SUBSTRING = "DELAY_LISTING_ME";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* How many seconds affected keys will be delayed from appearing in listing.
|
||||||
|
* This should probably be a config value.
|
||||||
|
*/
|
||||||
|
public static final long DEFAULT_DELAY_KEY_MSEC = 5 * 1000;
|
||||||
|
|
||||||
|
public static final float DEFAULT_DELAY_KEY_PROBABILITY = 1.0f;
|
||||||
|
|
||||||
|
/** Special config value since we can't store empty strings in XML. */
|
||||||
|
public static final String MATCH_ALL_KEYS = "*";
|
||||||
|
|
||||||
|
private static final Logger LOG =
|
||||||
|
LoggerFactory.getLogger(InconsistentAmazonS3Client.class);
|
||||||
|
|
||||||
|
/** Empty string matches all keys. */
|
||||||
|
private String delayKeySubstring;
|
||||||
|
|
||||||
|
/** Probability to delay visibility of a matching key. */
|
||||||
|
private float delayKeyProbability;
|
||||||
|
|
||||||
|
/** Time in milliseconds to delay visibility of newly modified object. */
|
||||||
|
private long delayKeyMsec;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Composite of data we need to track about recently deleted objects:
|
||||||
|
* when it was deleted (same was with recently put objects) and the object
|
||||||
|
* summary (since we should keep returning it for sometime after its
|
||||||
|
* deletion).
|
||||||
|
*/
|
||||||
|
private static class Delete {
|
||||||
|
private Long time;
|
||||||
|
private S3ObjectSummary summary;
|
||||||
|
|
||||||
|
Delete(Long time, S3ObjectSummary summary) {
|
||||||
|
this.time = time;
|
||||||
|
this.summary = summary;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Long time() {
|
||||||
|
return time;
|
||||||
|
}
|
||||||
|
|
||||||
|
public S3ObjectSummary summary() {
|
||||||
|
return summary;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Map of key to delay -> time it was deleted + object summary (object
|
||||||
|
* summary is null for prefixes. */
|
||||||
|
private Map<String, Delete> delayedDeletes = new HashMap<>();
|
||||||
|
|
||||||
|
/** Map of key to delay -> time it was created. */
|
||||||
|
private Map<String, Long> delayedPutKeys = new HashMap<>();
|
||||||
|
|
||||||
|
public InconsistentAmazonS3Client(AWSCredentialsProvider credentials,
|
||||||
|
ClientConfiguration clientConfiguration, Configuration conf) {
|
||||||
|
super(credentials, clientConfiguration);
|
||||||
|
setupConfig(conf);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void setupConfig(Configuration conf) {
|
||||||
|
|
||||||
|
delayKeySubstring = conf.get(FAIL_INJECT_INCONSISTENCY_KEY,
|
||||||
|
DEFAULT_DELAY_KEY_SUBSTRING);
|
||||||
|
// "" is a substring of all strings, use it to match all keys.
|
||||||
|
if (delayKeySubstring.equals(MATCH_ALL_KEYS)) {
|
||||||
|
delayKeySubstring = "";
|
||||||
|
}
|
||||||
|
delayKeyProbability = conf.getFloat(FAIL_INJECT_INCONSISTENCY_PROBABILITY,
|
||||||
|
DEFAULT_DELAY_KEY_PROBABILITY);
|
||||||
|
delayKeyMsec = conf.getLong(FAIL_INJECT_INCONSISTENCY_MSEC,
|
||||||
|
DEFAULT_DELAY_KEY_MSEC);
|
||||||
|
LOG.info("Enabled with {} msec delay, substring {}, probability {}",
|
||||||
|
delayKeyMsec, delayKeySubstring, delayKeyProbability);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clear all oustanding inconsistent keys. After calling this function,
|
||||||
|
* listings should behave normally (no failure injection), until additional
|
||||||
|
* keys are matched for delay, e.g. via putObject(), deleteObject().
|
||||||
|
*/
|
||||||
|
public void clearInconsistency() {
|
||||||
|
LOG.info("clearing all delayed puts / deletes");
|
||||||
|
delayedDeletes.clear();
|
||||||
|
delayedPutKeys.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convenience function for test code to cast from supertype.
|
||||||
|
* @param c supertype to cast from
|
||||||
|
* @return subtype, not null
|
||||||
|
* @throws Exception on error
|
||||||
|
*/
|
||||||
|
public static InconsistentAmazonS3Client castFrom(AmazonS3 c) throws
|
||||||
|
Exception {
|
||||||
|
InconsistentAmazonS3Client ic = null;
|
||||||
|
if (c instanceof InconsistentAmazonS3Client) {
|
||||||
|
ic = (InconsistentAmazonS3Client) c;
|
||||||
|
}
|
||||||
|
Preconditions.checkNotNull(ic, "Not an instance of " +
|
||||||
|
"InconsistentAmazonS3Client");
|
||||||
|
return ic;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DeleteObjectsResult deleteObjects(DeleteObjectsRequest
|
||||||
|
deleteObjectsRequest)
|
||||||
|
throws AmazonClientException, AmazonServiceException {
|
||||||
|
for (DeleteObjectsRequest.KeyVersion keyVersion :
|
||||||
|
deleteObjectsRequest.getKeys()) {
|
||||||
|
registerDeleteObject(keyVersion.getKey(), deleteObjectsRequest
|
||||||
|
.getBucketName());
|
||||||
|
}
|
||||||
|
return super.deleteObjects(deleteObjectsRequest);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void deleteObject(DeleteObjectRequest deleteObjectRequest)
|
||||||
|
throws AmazonClientException, AmazonServiceException {
|
||||||
|
String key = deleteObjectRequest.getKey();
|
||||||
|
LOG.debug("key {}", key);
|
||||||
|
registerDeleteObject(key, deleteObjectRequest.getBucketName());
|
||||||
|
super.deleteObject(deleteObjectRequest);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We should only need to override this version of putObject() */
|
||||||
|
@Override
|
||||||
|
public PutObjectResult putObject(PutObjectRequest putObjectRequest)
|
||||||
|
throws AmazonClientException, AmazonServiceException {
|
||||||
|
LOG.debug("key {}", putObjectRequest.getKey());
|
||||||
|
registerPutObject(putObjectRequest);
|
||||||
|
return super.putObject(putObjectRequest);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We should only need to override this version of listObjects() */
|
||||||
|
@Override
|
||||||
|
public ObjectListing listObjects(ListObjectsRequest listObjectsRequest)
|
||||||
|
throws AmazonClientException, AmazonServiceException {
|
||||||
|
LOG.debug("prefix {}", listObjectsRequest.getPrefix());
|
||||||
|
ObjectListing listing = super.listObjects(listObjectsRequest);
|
||||||
|
listing = filterListObjects(listObjectsRequest, listing);
|
||||||
|
listing = restoreListObjects(listObjectsRequest, listing);
|
||||||
|
return listing;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addSummaryIfNotPresent(List<S3ObjectSummary> list,
|
||||||
|
S3ObjectSummary item) {
|
||||||
|
// Behavior of S3ObjectSummary
|
||||||
|
String key = item.getKey();
|
||||||
|
for (S3ObjectSummary member : list) {
|
||||||
|
if (member.getKey().equals(key)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
list.add(item);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add prefix of child to given list. The added prefix will be equal to
|
||||||
|
* ancestor plus one directory past ancestor. e.g.:
|
||||||
|
* if ancestor is "/a/b/c" and child is "/a/b/c/d/e/file" then "a/b/c/d" is
|
||||||
|
* added to list.
|
||||||
|
* @param prefixes list to add to
|
||||||
|
* @param ancestor path we are listing in
|
||||||
|
* @param child full path to get prefix from
|
||||||
|
*/
|
||||||
|
private void addPrefixIfNotPresent(List<String> prefixes, String ancestor,
|
||||||
|
String child) {
|
||||||
|
Path prefixCandidate = new Path(child).getParent();
|
||||||
|
Path ancestorPath = new Path(ancestor);
|
||||||
|
Preconditions.checkArgument(child.startsWith(ancestor), "%s does not " +
|
||||||
|
"start with %s", child, ancestor);
|
||||||
|
while (!prefixCandidate.isRoot()) {
|
||||||
|
Path nextParent = prefixCandidate.getParent();
|
||||||
|
if (nextParent.equals(ancestorPath)) {
|
||||||
|
String prefix = prefixCandidate.toString();
|
||||||
|
if (!prefixes.contains(prefix)) {
|
||||||
|
prefixes.add(prefix);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
prefixCandidate = nextParent;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks that the parent key is an ancestor of the child key.
|
||||||
|
* @param parent key that may be the parent.
|
||||||
|
* @param child key that may be the child.
|
||||||
|
* @param recursive if false, only return true for direct children. If
|
||||||
|
* true, any descendant will count.
|
||||||
|
* @return true if parent is an ancestor of child
|
||||||
|
*/
|
||||||
|
private boolean isDescendant(String parent, String child, boolean recursive) {
|
||||||
|
if (recursive) {
|
||||||
|
if (!parent.endsWith("/")) {
|
||||||
|
parent = parent + "/";
|
||||||
|
}
|
||||||
|
return child.startsWith(parent);
|
||||||
|
} else {
|
||||||
|
Path actualParentPath = new Path(child).getParent();
|
||||||
|
Path expectedParentPath = new Path(parent);
|
||||||
|
return actualParentPath.equals(expectedParentPath);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Simulate eventual consistency of delete for this list operation: Any
|
||||||
|
* recently-deleted keys will be added.
|
||||||
|
* @param request List request
|
||||||
|
* @param rawListing listing returned from underlying S3
|
||||||
|
* @return listing with recently-deleted items restored
|
||||||
|
*/
|
||||||
|
private ObjectListing restoreListObjects(ListObjectsRequest request,
|
||||||
|
ObjectListing rawListing) {
|
||||||
|
List<S3ObjectSummary> outputList = rawListing.getObjectSummaries();
|
||||||
|
List<String> outputPrefixes = rawListing.getCommonPrefixes();
|
||||||
|
// recursive list has no delimiter, returns everything that matches a
|
||||||
|
// prefix.
|
||||||
|
boolean recursiveObjectList = !("/".equals(request.getDelimiter()));
|
||||||
|
|
||||||
|
// Go through all deleted keys
|
||||||
|
for (String key : new HashSet<>(delayedDeletes.keySet())) {
|
||||||
|
Delete delete = delayedDeletes.get(key);
|
||||||
|
if (isKeyDelayed(delete.time(), key)) {
|
||||||
|
if (isDescendant(request.getPrefix(), key, recursiveObjectList)) {
|
||||||
|
if (delete.summary() != null) {
|
||||||
|
addSummaryIfNotPresent(outputList, delete.summary());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Non-recursive list has delimiter: will return rolled-up prefixes for
|
||||||
|
// all keys that are not direct children
|
||||||
|
if (!recursiveObjectList) {
|
||||||
|
if (isDescendant(request.getPrefix(), key, true)) {
|
||||||
|
addPrefixIfNotPresent(outputPrefixes, request.getPrefix(), key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Clean up any expired entries
|
||||||
|
delayedDeletes.remove(key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return new CustomObjectListing(rawListing, outputList, outputPrefixes);
|
||||||
|
}
|
||||||
|
|
||||||
|
private ObjectListing filterListObjects(ListObjectsRequest request,
|
||||||
|
ObjectListing rawListing) {
|
||||||
|
|
||||||
|
// Filter object listing
|
||||||
|
List<S3ObjectSummary> outputList = new ArrayList<>();
|
||||||
|
for (S3ObjectSummary s : rawListing.getObjectSummaries()) {
|
||||||
|
String key = s.getKey();
|
||||||
|
if (!isKeyDelayed(delayedPutKeys.get(key), key)) {
|
||||||
|
outputList.add(s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Filter prefixes (directories)
|
||||||
|
List<String> outputPrefixes = new ArrayList<>();
|
||||||
|
for (String key : rawListing.getCommonPrefixes()) {
|
||||||
|
if (!isKeyDelayed(delayedPutKeys.get(key), key)) {
|
||||||
|
outputPrefixes.add(key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return new CustomObjectListing(rawListing, outputList, outputPrefixes);
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isKeyDelayed(Long enqueueTime, String key) {
|
||||||
|
if (enqueueTime == null) {
|
||||||
|
LOG.debug("no delay for key {}", key);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
long currentTime = System.currentTimeMillis();
|
||||||
|
long deadline = enqueueTime + delayKeyMsec;
|
||||||
|
if (currentTime >= deadline) {
|
||||||
|
delayedDeletes.remove(key);
|
||||||
|
LOG.debug("no longer delaying {}", key);
|
||||||
|
return false;
|
||||||
|
} else {
|
||||||
|
LOG.info("delaying {}", key);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void registerDeleteObject(String key, String bucket) {
|
||||||
|
if (shouldDelay(key)) {
|
||||||
|
// Record summary so we can add it back for some time post-deletion
|
||||||
|
S3ObjectSummary summary = null;
|
||||||
|
ObjectListing list = listObjects(bucket, key);
|
||||||
|
for (S3ObjectSummary result : list.getObjectSummaries()) {
|
||||||
|
if (result.getKey().equals(key)) {
|
||||||
|
summary = result;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
delayedDeletes.put(key, new Delete(System.currentTimeMillis(), summary));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void registerPutObject(PutObjectRequest req) {
|
||||||
|
String key = req.getKey();
|
||||||
|
if (shouldDelay(key)) {
|
||||||
|
enqueueDelayedPut(key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Should we delay listing visibility for this key?
|
||||||
|
* @param key key which is being put
|
||||||
|
* @return true if we should delay
|
||||||
|
*/
|
||||||
|
private boolean shouldDelay(String key) {
|
||||||
|
boolean delay = key.contains(delayKeySubstring);
|
||||||
|
delay = delay && trueWithProbability(delayKeyProbability);
|
||||||
|
LOG.debug("{} -> {}", key, delay);
|
||||||
|
return delay;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private boolean trueWithProbability(float p) {
|
||||||
|
return Math.random() < p;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Record this key as something that should not become visible in
|
||||||
|
* listObject replies for a while, to simulate eventual list consistency.
|
||||||
|
* @param key key to delay visibility of
|
||||||
|
*/
|
||||||
|
private void enqueueDelayedPut(String key) {
|
||||||
|
LOG.debug("delaying put of {}", key);
|
||||||
|
delayedPutKeys.put(key, System.currentTimeMillis());
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Since ObjectListing is immutable, we just override it with wrapper. */
|
||||||
|
private static class CustomObjectListing extends ObjectListing {
|
||||||
|
|
||||||
|
private final List<S3ObjectSummary> customListing;
|
||||||
|
private final List<String> customPrefixes;
|
||||||
|
|
||||||
|
CustomObjectListing(ObjectListing rawListing,
|
||||||
|
List<S3ObjectSummary> customListing,
|
||||||
|
List<String> customPrefixes) {
|
||||||
|
super();
|
||||||
|
this.customListing = customListing;
|
||||||
|
this.customPrefixes = customPrefixes;
|
||||||
|
|
||||||
|
this.setBucketName(rawListing.getBucketName());
|
||||||
|
this.setCommonPrefixes(rawListing.getCommonPrefixes());
|
||||||
|
this.setDelimiter(rawListing.getDelimiter());
|
||||||
|
this.setEncodingType(rawListing.getEncodingType());
|
||||||
|
this.setMarker(rawListing.getMarker());
|
||||||
|
this.setMaxKeys(rawListing.getMaxKeys());
|
||||||
|
this.setNextMarker(rawListing.getNextMarker());
|
||||||
|
this.setPrefix(rawListing.getPrefix());
|
||||||
|
this.setTruncated(rawListing.isTruncated());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<S3ObjectSummary> getObjectSummaries() {
|
||||||
|
return customListing;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<String> getCommonPrefixes() {
|
||||||
|
return customPrefixes;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,40 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a;
|
||||||
|
|
||||||
|
import com.amazonaws.ClientConfiguration;
|
||||||
|
import com.amazonaws.auth.AWSCredentialsProvider;
|
||||||
|
import com.amazonaws.services.s3.AmazonS3;
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* S3 Client factory used for testing with eventual consistency fault injection.
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
@InterfaceStability.Unstable
|
||||||
|
public class InconsistentS3ClientFactory extends DefaultS3ClientFactory {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected AmazonS3 newAmazonS3Client(AWSCredentialsProvider credentials,
|
||||||
|
ClientConfiguration awsConf) {
|
||||||
|
LOG.warn("** FAILURE INJECTION ENABLED. Do not run in production! **");
|
||||||
|
return new InconsistentAmazonS3Client(credentials, awsConf, getConf());
|
||||||
|
}
|
||||||
|
}
|
@ -22,18 +22,25 @@
|
|||||||
import com.amazonaws.services.s3.model.ListObjectsRequest;
|
import com.amazonaws.services.s3.model.ListObjectsRequest;
|
||||||
import com.amazonaws.services.s3.model.ObjectListing;
|
import com.amazonaws.services.s3.model.ObjectListing;
|
||||||
import com.amazonaws.services.s3.model.S3ObjectSummary;
|
import com.amazonaws.services.s3.model.S3ObjectSummary;
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
import org.apache.hadoop.fs.LocatedFileStatus;
|
import org.apache.hadoop.fs.LocatedFileStatus;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.fs.PathFilter;
|
import org.apache.hadoop.fs.PathFilter;
|
||||||
import org.apache.hadoop.fs.RemoteIterator;
|
import org.apache.hadoop.fs.RemoteIterator;
|
||||||
|
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.ListIterator;
|
import java.util.ListIterator;
|
||||||
import java.util.NoSuchElementException;
|
import java.util.NoSuchElementException;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
import static org.apache.hadoop.fs.s3a.Constants.S3N_FOLDER_SUFFIX;
|
import static org.apache.hadoop.fs.s3a.Constants.S3N_FOLDER_SUFFIX;
|
||||||
import static org.apache.hadoop.fs.s3a.S3AUtils.createFileStatus;
|
import static org.apache.hadoop.fs.s3a.S3AUtils.createFileStatus;
|
||||||
@ -54,8 +61,25 @@ public Listing(S3AFileSystem owner) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a FileStatus iterator against a path, with a given
|
* Create a FileStatus iterator against a provided list of file status, with
|
||||||
* list object request.
|
* a given status filter.
|
||||||
|
*
|
||||||
|
* @param fileStatuses the provided list of file status. NO remote calls.
|
||||||
|
* @param filter file path filter on which paths to accept
|
||||||
|
* @param acceptor the file status acceptor
|
||||||
|
* @return the file status iterator
|
||||||
|
*/
|
||||||
|
ProvidedFileStatusIterator createProvidedFileStatusIterator(
|
||||||
|
FileStatus[] fileStatuses,
|
||||||
|
PathFilter filter,
|
||||||
|
FileStatusAcceptor acceptor) {
|
||||||
|
return new ProvidedFileStatusIterator(fileStatuses, filter, acceptor);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a FileStatus iterator against a path, with a given list object
|
||||||
|
* request.
|
||||||
|
*
|
||||||
* @param listPath path of the listing
|
* @param listPath path of the listing
|
||||||
* @param request initial request to make
|
* @param request initial request to make
|
||||||
* @param filter the filter on which paths to accept
|
* @param filter the filter on which paths to accept
|
||||||
@ -69,10 +93,34 @@ FileStatusListingIterator createFileStatusListingIterator(
|
|||||||
ListObjectsRequest request,
|
ListObjectsRequest request,
|
||||||
PathFilter filter,
|
PathFilter filter,
|
||||||
Listing.FileStatusAcceptor acceptor) throws IOException {
|
Listing.FileStatusAcceptor acceptor) throws IOException {
|
||||||
|
return createFileStatusListingIterator(listPath, request, filter, acceptor,
|
||||||
|
null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a FileStatus iterator against a path, with a given
|
||||||
|
* list object request.
|
||||||
|
* @param listPath path of the listing
|
||||||
|
* @param request initial request to make
|
||||||
|
* @param filter the filter on which paths to accept
|
||||||
|
* @param acceptor the class/predicate to decide which entries to accept
|
||||||
|
* in the listing based on the full file status.
|
||||||
|
* @param providedStatus the provided list of file status, which may contain
|
||||||
|
* items that are not listed from source.
|
||||||
|
* @return the iterator
|
||||||
|
* @throws IOException IO Problems
|
||||||
|
*/
|
||||||
|
FileStatusListingIterator createFileStatusListingIterator(
|
||||||
|
Path listPath,
|
||||||
|
ListObjectsRequest request,
|
||||||
|
PathFilter filter,
|
||||||
|
Listing.FileStatusAcceptor acceptor,
|
||||||
|
RemoteIterator<FileStatus> providedStatus) throws IOException {
|
||||||
return new FileStatusListingIterator(
|
return new FileStatusListingIterator(
|
||||||
new ObjectListingIterator(listPath, request),
|
new ObjectListingIterator(listPath, request),
|
||||||
filter,
|
filter,
|
||||||
acceptor);
|
acceptor,
|
||||||
|
providedStatus);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -80,11 +128,26 @@ FileStatusListingIterator createFileStatusListingIterator(
|
|||||||
* @param statusIterator an iterator over the remote status entries
|
* @param statusIterator an iterator over the remote status entries
|
||||||
* @return a new remote iterator
|
* @return a new remote iterator
|
||||||
*/
|
*/
|
||||||
|
@VisibleForTesting
|
||||||
LocatedFileStatusIterator createLocatedFileStatusIterator(
|
LocatedFileStatusIterator createLocatedFileStatusIterator(
|
||||||
RemoteIterator<FileStatus> statusIterator) {
|
RemoteIterator<FileStatus> statusIterator) {
|
||||||
return new LocatedFileStatusIterator(statusIterator);
|
return new LocatedFileStatusIterator(statusIterator);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create an located status iterator that wraps another to filter out a set
|
||||||
|
* of recently deleted items.
|
||||||
|
* @param iterator an iterator over the remote located status entries.
|
||||||
|
* @param tombstones set of paths that are recently deleted and should be
|
||||||
|
* filtered.
|
||||||
|
* @return a new remote iterator.
|
||||||
|
*/
|
||||||
|
@VisibleForTesting
|
||||||
|
TombstoneReconcilingIterator createTombstoneReconcilingIterator(
|
||||||
|
RemoteIterator<LocatedFileStatus> iterator, Set<Path> tombstones) {
|
||||||
|
return new TombstoneReconcilingIterator(iterator, tombstones);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Interface to implement by the logic deciding whether to accept a summary
|
* Interface to implement by the logic deciding whether to accept a summary
|
||||||
* entry or path as a valid file or directory.
|
* entry or path as a valid file or directory.
|
||||||
@ -108,6 +171,13 @@ interface FileStatusAcceptor {
|
|||||||
* should be generated.)
|
* should be generated.)
|
||||||
*/
|
*/
|
||||||
boolean accept(Path keyPath, String commonPrefix);
|
boolean accept(Path keyPath, String commonPrefix);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Predicate to decide whether or not to accept a file status.
|
||||||
|
* @param status file status containing file path information
|
||||||
|
* @return true if the status is accepted else false
|
||||||
|
*/
|
||||||
|
boolean accept(FileStatus status);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -115,9 +185,9 @@ interface FileStatusAcceptor {
|
|||||||
* value.
|
* value.
|
||||||
*
|
*
|
||||||
* If the status value is null, the iterator declares that it has no data.
|
* If the status value is null, the iterator declares that it has no data.
|
||||||
* This iterator is used to handle {@link listStatus()} calls where the path
|
* This iterator is used to handle {@link S3AFileSystem#listStatus} calls
|
||||||
* handed in refers to a file, not a directory: this is the iterator
|
* where the path handed in refers to a file, not a directory: this is the
|
||||||
* returned.
|
* iterator returned.
|
||||||
*/
|
*/
|
||||||
static final class SingleStatusRemoteIterator
|
static final class SingleStatusRemoteIterator
|
||||||
implements RemoteIterator<LocatedFileStatus> {
|
implements RemoteIterator<LocatedFileStatus> {
|
||||||
@ -168,6 +238,47 @@ public LocatedFileStatus next() throws IOException {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This wraps up a provided non-null list of file status as a remote iterator.
|
||||||
|
*
|
||||||
|
* It firstly filters the provided list and later {@link #next} call will get
|
||||||
|
* from the filtered list. This suffers from scalability issues if the
|
||||||
|
* provided list is too large.
|
||||||
|
*
|
||||||
|
* There is no remote data to fetch.
|
||||||
|
*/
|
||||||
|
static class ProvidedFileStatusIterator
|
||||||
|
implements RemoteIterator<FileStatus> {
|
||||||
|
private final ArrayList<FileStatus> filteredStatusList;
|
||||||
|
private int index = 0;
|
||||||
|
|
||||||
|
ProvidedFileStatusIterator(FileStatus[] fileStatuses, PathFilter filter,
|
||||||
|
FileStatusAcceptor acceptor) {
|
||||||
|
Preconditions.checkArgument(fileStatuses != null, "Null status list!");
|
||||||
|
|
||||||
|
filteredStatusList = new ArrayList<>(fileStatuses.length);
|
||||||
|
for (FileStatus status : fileStatuses) {
|
||||||
|
if (filter.accept(status.getPath()) && acceptor.accept(status)) {
|
||||||
|
filteredStatusList.add(status);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
filteredStatusList.trimToSize();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasNext() throws IOException {
|
||||||
|
return index < filteredStatusList.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FileStatus next() throws IOException {
|
||||||
|
if (!hasNext()) {
|
||||||
|
throw new NoSuchElementException();
|
||||||
|
}
|
||||||
|
return filteredStatusList.get(index++);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Wraps up object listing into a remote iterator which will ask for more
|
* Wraps up object listing into a remote iterator which will ask for more
|
||||||
* listing data if needed.
|
* listing data if needed.
|
||||||
@ -179,7 +290,7 @@ public LocatedFileStatus next() throws IOException {
|
|||||||
* iterator can declare that there is more data available.
|
* iterator can declare that there is more data available.
|
||||||
*
|
*
|
||||||
* The need to filter the results precludes the iterator from simply
|
* The need to filter the results precludes the iterator from simply
|
||||||
* declaring that if the {@link S3AFileSystem.ObjectListingIterator#hasNext()}
|
* declaring that if the {@link ObjectListingIterator#hasNext()}
|
||||||
* is true then there are more results. Instead the next batch of results must
|
* is true then there are more results. Instead the next batch of results must
|
||||||
* be retrieved and filtered.
|
* be retrieved and filtered.
|
||||||
*
|
*
|
||||||
@ -208,20 +319,33 @@ class FileStatusListingIterator
|
|||||||
/** Iterator over the current set of results. */
|
/** Iterator over the current set of results. */
|
||||||
private ListIterator<FileStatus> statusBatchIterator;
|
private ListIterator<FileStatus> statusBatchIterator;
|
||||||
|
|
||||||
|
private final Set<FileStatus> providedStatus;
|
||||||
|
private Iterator<FileStatus> providedStatusIterator;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create an iterator over file status entries.
|
* Create an iterator over file status entries.
|
||||||
* @param source the listing iterator from a listObjects call.
|
* @param source the listing iterator from a listObjects call.
|
||||||
* @param filter the filter on which paths to accept
|
* @param filter the filter on which paths to accept
|
||||||
* @param acceptor the class/predicate to decide which entries to accept
|
* @param acceptor the class/predicate to decide which entries to accept
|
||||||
* in the listing based on the full file status.
|
* in the listing based on the full file status.
|
||||||
|
* @param providedStatus the provided list of file status, which may contain
|
||||||
|
* items that are not listed from source.
|
||||||
* @throws IOException IO Problems
|
* @throws IOException IO Problems
|
||||||
*/
|
*/
|
||||||
FileStatusListingIterator(ObjectListingIterator source,
|
FileStatusListingIterator(ObjectListingIterator source,
|
||||||
PathFilter filter,
|
PathFilter filter,
|
||||||
FileStatusAcceptor acceptor) throws IOException {
|
FileStatusAcceptor acceptor,
|
||||||
|
RemoteIterator<FileStatus> providedStatus) throws IOException {
|
||||||
this.source = source;
|
this.source = source;
|
||||||
this.filter = filter;
|
this.filter = filter;
|
||||||
this.acceptor = acceptor;
|
this.acceptor = acceptor;
|
||||||
|
this.providedStatus = new HashSet<>();
|
||||||
|
for (; providedStatus != null && providedStatus.hasNext();) {
|
||||||
|
final FileStatus status = providedStatus.next();
|
||||||
|
if (filter.accept(status.getPath()) && acceptor.accept(status)) {
|
||||||
|
this.providedStatus.add(status);
|
||||||
|
}
|
||||||
|
}
|
||||||
// build the first set of results. This will not trigger any
|
// build the first set of results. This will not trigger any
|
||||||
// remote IO, assuming the source iterator is in its initial
|
// remote IO, assuming the source iterator is in its initial
|
||||||
// iteration
|
// iteration
|
||||||
@ -233,26 +357,53 @@ class FileStatusListingIterator
|
|||||||
* If there is data in the local filtered list, return true.
|
* If there is data in the local filtered list, return true.
|
||||||
* Else: request more data util that condition is met, or there
|
* Else: request more data util that condition is met, or there
|
||||||
* is no more remote listing data.
|
* is no more remote listing data.
|
||||||
|
* Lastly, return true if the {@code providedStatusIterator}
|
||||||
|
* has left items.
|
||||||
* @return true if a call to {@link #next()} will succeed.
|
* @return true if a call to {@link #next()} will succeed.
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public boolean hasNext() throws IOException {
|
public boolean hasNext() throws IOException {
|
||||||
return statusBatchIterator.hasNext() || requestNextBatch();
|
return sourceHasNext() || providedStatusIterator.hasNext();
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean sourceHasNext() throws IOException {
|
||||||
|
if (statusBatchIterator.hasNext() || requestNextBatch()) {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
// turn to file status that are only in provided list
|
||||||
|
if (providedStatusIterator == null) {
|
||||||
|
LOG.debug("Start iterating the provided status.");
|
||||||
|
providedStatusIterator = providedStatus.iterator();
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public FileStatus next() throws IOException {
|
public FileStatus next() throws IOException {
|
||||||
if (!hasNext()) {
|
final FileStatus status;
|
||||||
throw new NoSuchElementException();
|
if (sourceHasNext()) {
|
||||||
|
status = statusBatchIterator.next();
|
||||||
|
// We remove from provided list the file status listed by S3 so that
|
||||||
|
// this does not return duplicate items.
|
||||||
|
LOG.debug("Removing the status from provided file status {}", status);
|
||||||
|
providedStatus.remove(status);
|
||||||
|
} else {
|
||||||
|
if (providedStatusIterator.hasNext()) {
|
||||||
|
status = providedStatusIterator.next();
|
||||||
|
LOG.debug("Returning provided file status {}", status);
|
||||||
|
} else {
|
||||||
|
throw new NoSuchElementException();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return statusBatchIterator.next();
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Try to retrieve another batch.
|
* Try to retrieve another batch.
|
||||||
* Note that for the initial batch,
|
* Note that for the initial batch,
|
||||||
* {@link S3AFileSystem.ObjectListingIterator} does not generate a request;
|
* {@link ObjectListingIterator} does not generate a request;
|
||||||
* it simply returns the initial set.
|
* it simply returns the initial set.
|
||||||
*
|
*
|
||||||
* @return true if a new batch was created.
|
* @return true if a new batch was created.
|
||||||
@ -312,7 +463,7 @@ private boolean buildNextStatusBatch(ObjectListing objects) {
|
|||||||
for (String prefix : objects.getCommonPrefixes()) {
|
for (String prefix : objects.getCommonPrefixes()) {
|
||||||
Path keyPath = owner.keyToQualifiedPath(prefix);
|
Path keyPath = owner.keyToQualifiedPath(prefix);
|
||||||
if (acceptor.accept(keyPath, prefix) && filter.accept(keyPath)) {
|
if (acceptor.accept(keyPath, prefix) && filter.accept(keyPath)) {
|
||||||
FileStatus status = new S3AFileStatus(false, keyPath,
|
FileStatus status = new S3AFileStatus(Tristate.FALSE, keyPath,
|
||||||
owner.getUsername());
|
owner.getUsername());
|
||||||
LOG.debug("Adding directory: {}", status);
|
LOG.debug("Adding directory: {}", status);
|
||||||
added++;
|
added++;
|
||||||
@ -352,7 +503,7 @@ public int getBatchSize() {
|
|||||||
* instance.
|
* instance.
|
||||||
*
|
*
|
||||||
* 2. Second and later invocations will continue the ongoing listing,
|
* 2. Second and later invocations will continue the ongoing listing,
|
||||||
* calling {@link #continueListObjects(ObjectListing)} to request the next
|
* calling {@link S3AFileSystem#continueListObjects} to request the next
|
||||||
* batch of results.
|
* batch of results.
|
||||||
*
|
*
|
||||||
* 3. The {@link #hasNext()} predicate returns true for the initial call,
|
* 3. The {@link #hasNext()} predicate returns true for the initial call,
|
||||||
@ -504,6 +655,11 @@ public boolean accept(Path keyPath, S3ObjectSummary summary) {
|
|||||||
public boolean accept(Path keyPath, String prefix) {
|
public boolean accept(Path keyPath, String prefix) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean accept(FileStatus status) {
|
||||||
|
return (status != null) && status.isFile();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -533,6 +689,80 @@ public LocatedFileStatus next() throws IOException {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wraps another iterator and filters out files that appear in the provided
|
||||||
|
* set of tombstones. Will read ahead in the iterator when necessary to
|
||||||
|
* ensure that emptiness is detected early enough if only deleted objects
|
||||||
|
* remain in the source iterator.
|
||||||
|
*/
|
||||||
|
static class TombstoneReconcilingIterator implements
|
||||||
|
RemoteIterator<LocatedFileStatus> {
|
||||||
|
private LocatedFileStatus next = null;
|
||||||
|
private final RemoteIterator<LocatedFileStatus> iterator;
|
||||||
|
private final Set<Path> tombstones;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param iterator Source iterator to filter
|
||||||
|
* @param tombstones set of tombstone markers to filter out of results
|
||||||
|
*/
|
||||||
|
TombstoneReconcilingIterator(RemoteIterator<LocatedFileStatus>
|
||||||
|
iterator, Set<Path> tombstones) {
|
||||||
|
this.iterator = iterator;
|
||||||
|
if (tombstones != null) {
|
||||||
|
this.tombstones = tombstones;
|
||||||
|
} else {
|
||||||
|
this.tombstones = Collections.EMPTY_SET;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean fetch() throws IOException {
|
||||||
|
while (next == null && iterator.hasNext()) {
|
||||||
|
LocatedFileStatus candidate = iterator.next();
|
||||||
|
if (!tombstones.contains(candidate.getPath())) {
|
||||||
|
next = candidate;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasNext() throws IOException {
|
||||||
|
if (next != null) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return fetch();
|
||||||
|
}
|
||||||
|
|
||||||
|
public LocatedFileStatus next() throws IOException {
|
||||||
|
if (hasNext()) {
|
||||||
|
LocatedFileStatus result = next;
|
||||||
|
next = null;
|
||||||
|
fetch();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
throw new NoSuchElementException();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Accept all entries except those which map to S3N pseudo directory markers.
|
||||||
|
*/
|
||||||
|
static class AcceptAllButS3nDirs implements FileStatusAcceptor {
|
||||||
|
|
||||||
|
public boolean accept(Path keyPath, S3ObjectSummary summary) {
|
||||||
|
return !summary.getKey().endsWith(S3N_FOLDER_SUFFIX);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean accept(Path keyPath, String prefix) {
|
||||||
|
return !keyPath.toString().endsWith(S3N_FOLDER_SUFFIX);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean accept(FileStatus status) {
|
||||||
|
return !status.getPath().toString().endsWith(S3N_FOLDER_SUFFIX);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Accept all entries except the base path and those which map to S3N
|
* Accept all entries except the base path and those which map to S3N
|
||||||
* pseudo directory markers.
|
* pseudo directory markers.
|
||||||
@ -575,6 +805,11 @@ public boolean accept(Path keyPath, S3ObjectSummary summary) {
|
|||||||
public boolean accept(Path keyPath, String prefix) {
|
public boolean accept(Path keyPath, String prefix) {
|
||||||
return !keyPath.equals(qualifiedPath);
|
return !keyPath.equals(qualifiedPath);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean accept(FileStatus status) {
|
||||||
|
return (status != null) && !status.getPath().equals(qualifiedPath);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -79,6 +79,9 @@ class S3ABlockOutputStream extends OutputStream {
|
|||||||
/** Size of all blocks. */
|
/** Size of all blocks. */
|
||||||
private final int blockSize;
|
private final int blockSize;
|
||||||
|
|
||||||
|
/** Total bytes for uploads submitted so far. */
|
||||||
|
private long bytesSubmitted;
|
||||||
|
|
||||||
/** Callback for progress. */
|
/** Callback for progress. */
|
||||||
private final ProgressListener progressListener;
|
private final ProgressListener progressListener;
|
||||||
private final ListeningExecutorService executorService;
|
private final ListeningExecutorService executorService;
|
||||||
@ -302,6 +305,7 @@ private synchronized void uploadCurrentBlock() throws IOException {
|
|||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
multiPartUpload.uploadBlockAsync(getActiveBlock());
|
multiPartUpload.uploadBlockAsync(getActiveBlock());
|
||||||
|
bytesSubmitted += getActiveBlock().dataSize();
|
||||||
} finally {
|
} finally {
|
||||||
// set the block to null, so the next write will create a new block.
|
// set the block to null, so the next write will create a new block.
|
||||||
clearActiveBlock();
|
clearActiveBlock();
|
||||||
@ -330,13 +334,14 @@ public void close() throws IOException {
|
|||||||
this,
|
this,
|
||||||
blockCount,
|
blockCount,
|
||||||
hasBlock ? block : "(none)");
|
hasBlock ? block : "(none)");
|
||||||
|
long bytes = 0;
|
||||||
try {
|
try {
|
||||||
if (multiPartUpload == null) {
|
if (multiPartUpload == null) {
|
||||||
if (hasBlock) {
|
if (hasBlock) {
|
||||||
// no uploads of data have taken place, put the single block up.
|
// no uploads of data have taken place, put the single block up.
|
||||||
// This must happen even if there is no data, so that 0 byte files
|
// This must happen even if there is no data, so that 0 byte files
|
||||||
// are created.
|
// are created.
|
||||||
putObject();
|
bytes = putObject();
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// there has already been at least one block scheduled for upload;
|
// there has already been at least one block scheduled for upload;
|
||||||
@ -350,6 +355,7 @@ public void close() throws IOException {
|
|||||||
multiPartUpload.waitForAllPartUploads();
|
multiPartUpload.waitForAllPartUploads();
|
||||||
// then complete the operation
|
// then complete the operation
|
||||||
multiPartUpload.complete(partETags);
|
multiPartUpload.complete(partETags);
|
||||||
|
bytes = bytesSubmitted;
|
||||||
}
|
}
|
||||||
LOG.debug("Upload complete for {}", writeOperationHelper);
|
LOG.debug("Upload complete for {}", writeOperationHelper);
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
@ -362,7 +368,7 @@ public void close() throws IOException {
|
|||||||
clearActiveBlock();
|
clearActiveBlock();
|
||||||
}
|
}
|
||||||
// All end of write operations, including deleting fake parent directories
|
// All end of write operations, including deleting fake parent directories
|
||||||
writeOperationHelper.writeSuccessful();
|
writeOperationHelper.writeSuccessful(bytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -370,8 +376,11 @@ public void close() throws IOException {
|
|||||||
* is empty a 0-byte PUT will be invoked, as it is needed to create an
|
* is empty a 0-byte PUT will be invoked, as it is needed to create an
|
||||||
* entry at the far end.
|
* entry at the far end.
|
||||||
* @throws IOException any problem.
|
* @throws IOException any problem.
|
||||||
|
* @return number of bytes uploaded. If thread was interrupted while
|
||||||
|
* waiting for upload to complete, returns zero with interrupted flag set
|
||||||
|
* on this thread.
|
||||||
*/
|
*/
|
||||||
private void putObject() throws IOException {
|
private int putObject() throws IOException {
|
||||||
LOG.debug("Executing regular upload for {}", writeOperationHelper);
|
LOG.debug("Executing regular upload for {}", writeOperationHelper);
|
||||||
|
|
||||||
final S3ADataBlocks.DataBlock block = getActiveBlock();
|
final S3ADataBlocks.DataBlock block = getActiveBlock();
|
||||||
@ -405,9 +414,11 @@ public PutObjectResult call() throws Exception {
|
|||||||
//wait for completion
|
//wait for completion
|
||||||
try {
|
try {
|
||||||
putObjectResult.get();
|
putObjectResult.get();
|
||||||
|
return size;
|
||||||
} catch (InterruptedException ie) {
|
} catch (InterruptedException ie) {
|
||||||
LOG.warn("Interrupted object upload", ie);
|
LOG.warn("Interrupted object upload", ie);
|
||||||
Thread.currentThread().interrupt();
|
Thread.currentThread().interrupt();
|
||||||
|
return 0;
|
||||||
} catch (ExecutionException ee) {
|
} catch (ExecutionException ee) {
|
||||||
throw extractException("regular upload", key, ee);
|
throw extractException("regular upload", key, ee);
|
||||||
}
|
}
|
||||||
|
@ -31,7 +31,7 @@
|
|||||||
@InterfaceAudience.Private
|
@InterfaceAudience.Private
|
||||||
@InterfaceStability.Evolving
|
@InterfaceStability.Evolving
|
||||||
public class S3AFileStatus extends FileStatus {
|
public class S3AFileStatus extends FileStatus {
|
||||||
private boolean isEmptyDirectory;
|
private Tristate isEmptyDirectory;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a directory status.
|
* Create a directory status.
|
||||||
@ -42,6 +42,18 @@ public class S3AFileStatus extends FileStatus {
|
|||||||
public S3AFileStatus(boolean isemptydir,
|
public S3AFileStatus(boolean isemptydir,
|
||||||
Path path,
|
Path path,
|
||||||
String owner) {
|
String owner) {
|
||||||
|
this(Tristate.fromBool(isemptydir), path, owner);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a directory status.
|
||||||
|
* @param isemptydir is this an empty directory?
|
||||||
|
* @param path the path
|
||||||
|
* @param owner the owner
|
||||||
|
*/
|
||||||
|
public S3AFileStatus(Tristate isemptydir,
|
||||||
|
Path path,
|
||||||
|
String owner) {
|
||||||
super(0, true, 1, 0, 0, path);
|
super(0, true, 1, 0, 0, path);
|
||||||
isEmptyDirectory = isemptydir;
|
isEmptyDirectory = isemptydir;
|
||||||
setOwner(owner);
|
setOwner(owner);
|
||||||
@ -59,12 +71,37 @@ public S3AFileStatus(boolean isemptydir,
|
|||||||
public S3AFileStatus(long length, long modification_time, Path path,
|
public S3AFileStatus(long length, long modification_time, Path path,
|
||||||
long blockSize, String owner) {
|
long blockSize, String owner) {
|
||||||
super(length, false, 1, blockSize, modification_time, path);
|
super(length, false, 1, blockSize, modification_time, path);
|
||||||
isEmptyDirectory = false;
|
isEmptyDirectory = Tristate.FALSE;
|
||||||
setOwner(owner);
|
setOwner(owner);
|
||||||
setGroup(owner);
|
setGroup(owner);
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean isEmptyDirectory() {
|
/**
|
||||||
|
* Convenience constructor for creating from a vanilla FileStatus plus
|
||||||
|
* an isEmptyDirectory flag.
|
||||||
|
* @param source FileStatus to convert to S3AFileStatus
|
||||||
|
* @param isEmptyDirectory TRUE/FALSE if known to be / not be an empty
|
||||||
|
* directory, UNKNOWN if that information was not computed.
|
||||||
|
* @return a new S3AFileStatus
|
||||||
|
*/
|
||||||
|
public static S3AFileStatus fromFileStatus(FileStatus source,
|
||||||
|
Tristate isEmptyDirectory) {
|
||||||
|
if (source.isDirectory()) {
|
||||||
|
return new S3AFileStatus(isEmptyDirectory, source.getPath(),
|
||||||
|
source.getOwner());
|
||||||
|
} else {
|
||||||
|
return new S3AFileStatus(source.getLen(), source.getModificationTime(),
|
||||||
|
source.getPath(), source.getBlockSize(), source.getOwner());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return FALSE if status is not a directory, or its a dir, but known to
|
||||||
|
* not be empty. TRUE if it is an empty directory. UNKNOWN if it is a
|
||||||
|
* directory, but we have not computed whether or not it is empty.
|
||||||
|
*/
|
||||||
|
public Tristate isEmptyDirectory() {
|
||||||
return isEmptyDirectory;
|
return isEmptyDirectory;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -110,7 +147,7 @@ public long getModificationTime(){
|
|||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return super.toString() +
|
return super.toString() +
|
||||||
String.format(" isEmptyDirectory=%s", isEmptyDirectory());
|
String.format(" isEmptyDirectory=%s", isEmptyDirectory().name());
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -25,12 +25,16 @@
|
|||||||
import java.io.InterruptedIOException;
|
import java.io.InterruptedIOException;
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.Date;
|
import java.util.Date;
|
||||||
import java.util.EnumSet;
|
import java.util.EnumSet;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.concurrent.ExecutorService;
|
import java.util.Set;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
import java.util.concurrent.ExecutorService;
|
||||||
import java.util.concurrent.LinkedBlockingQueue;
|
import java.util.concurrent.LinkedBlockingQueue;
|
||||||
import java.util.concurrent.ThreadPoolExecutor;
|
import java.util.concurrent.ThreadPoolExecutor;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
@ -92,6 +96,11 @@
|
|||||||
import org.apache.hadoop.fs.RemoteIterator;
|
import org.apache.hadoop.fs.RemoteIterator;
|
||||||
import org.apache.hadoop.fs.StorageStatistics;
|
import org.apache.hadoop.fs.StorageStatistics;
|
||||||
import org.apache.hadoop.fs.permission.FsPermission;
|
import org.apache.hadoop.fs.permission.FsPermission;
|
||||||
|
import org.apache.hadoop.fs.s3a.s3guard.DirListingMetadata;
|
||||||
|
import org.apache.hadoop.fs.s3a.s3guard.MetadataStoreListFilesIterator;
|
||||||
|
import org.apache.hadoop.fs.s3a.s3guard.MetadataStore;
|
||||||
|
import org.apache.hadoop.fs.s3a.s3guard.PathMetadata;
|
||||||
|
import org.apache.hadoop.fs.s3a.s3guard.S3Guard;
|
||||||
import org.apache.hadoop.fs.s3native.S3xLoginHelper;
|
import org.apache.hadoop.fs.s3native.S3xLoginHelper;
|
||||||
import org.apache.hadoop.security.UserGroupInformation;
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
import org.apache.hadoop.util.Progressable;
|
import org.apache.hadoop.util.Progressable;
|
||||||
@ -149,6 +158,8 @@ public class S3AFileSystem extends FileSystem {
|
|||||||
private long readAhead;
|
private long readAhead;
|
||||||
private S3AInputPolicy inputPolicy;
|
private S3AInputPolicy inputPolicy;
|
||||||
private final AtomicBoolean closed = new AtomicBoolean(false);
|
private final AtomicBoolean closed = new AtomicBoolean(false);
|
||||||
|
private MetadataStore metadataStore;
|
||||||
|
private boolean allowAuthoritative;
|
||||||
|
|
||||||
// The maximum number of entries that can be deleted in any call to s3
|
// The maximum number of entries that can be deleted in any call to s3
|
||||||
private static final int MAX_ENTRIES_TO_DELETE = 1000;
|
private static final int MAX_ENTRIES_TO_DELETE = 1000;
|
||||||
@ -277,6 +288,10 @@ public StorageStatistics provide() {
|
|||||||
} else {
|
} else {
|
||||||
LOG.debug("Using S3AOutputStream");
|
LOG.debug("Using S3AOutputStream");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
metadataStore = S3Guard.getMetadataStore(this);
|
||||||
|
allowAuthoritative = conf.getBoolean(METADATASTORE_AUTHORITATIVE,
|
||||||
|
DEFAULT_METADATASTORE_AUTHORITATIVE);
|
||||||
} catch (AmazonClientException e) {
|
} catch (AmazonClientException e) {
|
||||||
throw translateException("initializing ", new Path(name), e);
|
throw translateException("initializing ", new Path(name), e);
|
||||||
}
|
}
|
||||||
@ -388,11 +403,34 @@ public int getDefaultPort() {
|
|||||||
* Returns the S3 client used by this filesystem.
|
* Returns the S3 client used by this filesystem.
|
||||||
* @return AmazonS3Client
|
* @return AmazonS3Client
|
||||||
*/
|
*/
|
||||||
@VisibleForTesting
|
|
||||||
AmazonS3 getAmazonS3Client() {
|
AmazonS3 getAmazonS3Client() {
|
||||||
return s3;
|
return s3;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the region of a bucket.
|
||||||
|
* @return the region in which a bucket is located
|
||||||
|
* @throws IOException on any failure.
|
||||||
|
*/
|
||||||
|
public String getBucketLocation() throws IOException {
|
||||||
|
return getBucketLocation(bucket);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the region of a bucket.
|
||||||
|
* @param bucketName the name of the bucket
|
||||||
|
* @return the region in which a bucket is located
|
||||||
|
* @throws IOException on any failure.
|
||||||
|
*/
|
||||||
|
public String getBucketLocation(String bucketName) throws IOException {
|
||||||
|
try {
|
||||||
|
return s3.getBucketLocation(bucketName);
|
||||||
|
} catch (AmazonClientException e) {
|
||||||
|
throw translateException("getBucketLocation()",
|
||||||
|
bucketName, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the read ahead range value used by this filesystem
|
* Returns the read ahead range value used by this filesystem
|
||||||
* @return
|
* @return
|
||||||
@ -457,7 +495,7 @@ public void setInputPolicy(S3AInputPolicy inputPolicy) {
|
|||||||
* @return a key excluding the leading "/", or, if it is the root path, ""
|
* @return a key excluding the leading "/", or, if it is the root path, ""
|
||||||
*/
|
*/
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
String pathToKey(Path path) {
|
public String pathToKey(Path path) {
|
||||||
if (!path.isAbsolute()) {
|
if (!path.isAbsolute()) {
|
||||||
path = new Path(workingDir, path);
|
path = new Path(workingDir, path);
|
||||||
}
|
}
|
||||||
@ -508,7 +546,7 @@ Path keyToQualifiedPath(String key) {
|
|||||||
* @param path path to qualify
|
* @param path path to qualify
|
||||||
* @return a qualified path.
|
* @return a qualified path.
|
||||||
*/
|
*/
|
||||||
Path qualify(Path path) {
|
public Path qualify(Path path) {
|
||||||
return path.makeQualified(uri, workingDir);
|
return path.makeQualified(uri, workingDir);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -578,7 +616,7 @@ public FSDataOutputStream create(Path f, FsPermission permission,
|
|||||||
boolean overwrite, int bufferSize, short replication, long blockSize,
|
boolean overwrite, int bufferSize, short replication, long blockSize,
|
||||||
Progressable progress) throws IOException {
|
Progressable progress) throws IOException {
|
||||||
String key = pathToKey(f);
|
String key = pathToKey(f);
|
||||||
S3AFileStatus status = null;
|
FileStatus status = null;
|
||||||
try {
|
try {
|
||||||
// get the status or throw an FNFE
|
// get the status or throw an FNFE
|
||||||
status = getFileStatus(f);
|
status = getFileStatus(f);
|
||||||
@ -706,8 +744,8 @@ public boolean rename(Path src, Path dst) throws IOException {
|
|||||||
* the description of the operation.
|
* the description of the operation.
|
||||||
* This operation throws an exception on any failure which needs to be
|
* This operation throws an exception on any failure which needs to be
|
||||||
* reported and downgraded to a failure. That is: if a rename
|
* reported and downgraded to a failure. That is: if a rename
|
||||||
* @param src path to be renamed
|
* @param source path to be renamed
|
||||||
* @param dst new path after rename
|
* @param dest new path after rename
|
||||||
* @throws RenameFailedException if some criteria for a state changing
|
* @throws RenameFailedException if some criteria for a state changing
|
||||||
* rename was not met. This means work didn't happen; it's not something
|
* rename was not met. This means work didn't happen; it's not something
|
||||||
* which is reported upstream to the FileSystem APIs, for which the semantics
|
* which is reported upstream to the FileSystem APIs, for which the semantics
|
||||||
@ -716,9 +754,12 @@ public boolean rename(Path src, Path dst) throws IOException {
|
|||||||
* @throws IOException on IO failure.
|
* @throws IOException on IO failure.
|
||||||
* @throws AmazonClientException on failures inside the AWS SDK
|
* @throws AmazonClientException on failures inside the AWS SDK
|
||||||
*/
|
*/
|
||||||
private boolean innerRename(Path src, Path dst)
|
private boolean innerRename(Path source, Path dest)
|
||||||
throws RenameFailedException, FileNotFoundException, IOException,
|
throws RenameFailedException, FileNotFoundException, IOException,
|
||||||
AmazonClientException {
|
AmazonClientException {
|
||||||
|
Path src = qualify(source);
|
||||||
|
Path dst = qualify(dest);
|
||||||
|
|
||||||
LOG.debug("Rename path {} to {}", src, dst);
|
LOG.debug("Rename path {} to {}", src, dst);
|
||||||
incrementStatistic(INVOCATION_RENAME);
|
incrementStatistic(INVOCATION_RENAME);
|
||||||
|
|
||||||
@ -734,7 +775,7 @@ private boolean innerRename(Path src, Path dst)
|
|||||||
|
|
||||||
// get the source file status; this raises a FNFE if there is no source
|
// get the source file status; this raises a FNFE if there is no source
|
||||||
// file.
|
// file.
|
||||||
S3AFileStatus srcStatus = getFileStatus(src);
|
S3AFileStatus srcStatus = innerGetFileStatus(src, true);
|
||||||
|
|
||||||
if (srcKey.equals(dstKey)) {
|
if (srcKey.equals(dstKey)) {
|
||||||
LOG.debug("rename: src and dest refer to the same file or directory: {}",
|
LOG.debug("rename: src and dest refer to the same file or directory: {}",
|
||||||
@ -746,7 +787,7 @@ private boolean innerRename(Path src, Path dst)
|
|||||||
|
|
||||||
S3AFileStatus dstStatus = null;
|
S3AFileStatus dstStatus = null;
|
||||||
try {
|
try {
|
||||||
dstStatus = getFileStatus(dst);
|
dstStatus = innerGetFileStatus(dst, true);
|
||||||
// if there is no destination entry, an exception is raised.
|
// if there is no destination entry, an exception is raised.
|
||||||
// hence this code sequence can assume that there is something
|
// hence this code sequence can assume that there is something
|
||||||
// at the end of the path; the only detail being what it is and
|
// at the end of the path; the only detail being what it is and
|
||||||
@ -756,7 +797,7 @@ private boolean innerRename(Path src, Path dst)
|
|||||||
throw new RenameFailedException(src, dst,
|
throw new RenameFailedException(src, dst,
|
||||||
"source is a directory and dest is a file")
|
"source is a directory and dest is a file")
|
||||||
.withExitCode(srcStatus.isFile());
|
.withExitCode(srcStatus.isFile());
|
||||||
} else if (!dstStatus.isEmptyDirectory()) {
|
} else if (dstStatus.isEmptyDirectory() != Tristate.TRUE) {
|
||||||
throw new RenameFailedException(src, dst,
|
throw new RenameFailedException(src, dst,
|
||||||
"Destination is a non-empty directory")
|
"Destination is a non-empty directory")
|
||||||
.withExitCode(false);
|
.withExitCode(false);
|
||||||
@ -778,7 +819,8 @@ private boolean innerRename(Path src, Path dst)
|
|||||||
Path parent = dst.getParent();
|
Path parent = dst.getParent();
|
||||||
if (!pathToKey(parent).isEmpty()) {
|
if (!pathToKey(parent).isEmpty()) {
|
||||||
try {
|
try {
|
||||||
S3AFileStatus dstParentStatus = getFileStatus(dst.getParent());
|
S3AFileStatus dstParentStatus = innerGetFileStatus(dst.getParent(),
|
||||||
|
false);
|
||||||
if (!dstParentStatus.isDirectory()) {
|
if (!dstParentStatus.isDirectory()) {
|
||||||
throw new RenameFailedException(src, dst,
|
throw new RenameFailedException(src, dst,
|
||||||
"destination parent is not a directory");
|
"destination parent is not a directory");
|
||||||
@ -790,9 +832,20 @@ private boolean innerRename(Path src, Path dst)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If we have a MetadataStore, track deletions/creations.
|
||||||
|
Collection<Path> srcPaths = null;
|
||||||
|
List<PathMetadata> dstMetas = null;
|
||||||
|
if (hasMetadataStore()) {
|
||||||
|
srcPaths = new HashSet<>(); // srcPaths need fast look up before put
|
||||||
|
dstMetas = new ArrayList<>();
|
||||||
|
}
|
||||||
|
// TODO S3Guard HADOOP-13761: retries when source paths are not visible yet
|
||||||
|
// TODO S3Guard: performance: mark destination dirs as authoritative
|
||||||
|
|
||||||
// Ok! Time to start
|
// Ok! Time to start
|
||||||
if (srcStatus.isFile()) {
|
if (srcStatus.isFile()) {
|
||||||
LOG.debug("rename: renaming file {} to {}", src, dst);
|
LOG.debug("rename: renaming file {} to {}", src, dst);
|
||||||
|
long length = srcStatus.getLen();
|
||||||
if (dstStatus != null && dstStatus.isDirectory()) {
|
if (dstStatus != null && dstStatus.isDirectory()) {
|
||||||
String newDstKey = dstKey;
|
String newDstKey = dstKey;
|
||||||
if (!newDstKey.endsWith("/")) {
|
if (!newDstKey.endsWith("/")) {
|
||||||
@ -801,9 +854,14 @@ private boolean innerRename(Path src, Path dst)
|
|||||||
String filename =
|
String filename =
|
||||||
srcKey.substring(pathToKey(src.getParent()).length()+1);
|
srcKey.substring(pathToKey(src.getParent()).length()+1);
|
||||||
newDstKey = newDstKey + filename;
|
newDstKey = newDstKey + filename;
|
||||||
copyFile(srcKey, newDstKey, srcStatus.getLen());
|
copyFile(srcKey, newDstKey, length);
|
||||||
|
S3Guard.addMoveFile(metadataStore, srcPaths, dstMetas, src,
|
||||||
|
keyToQualifiedPath(newDstKey), length, getDefaultBlockSize(dst),
|
||||||
|
username);
|
||||||
} else {
|
} else {
|
||||||
copyFile(srcKey, dstKey, srcStatus.getLen());
|
copyFile(srcKey, dstKey, srcStatus.getLen());
|
||||||
|
S3Guard.addMoveFile(metadataStore, srcPaths, dstMetas, src, dst,
|
||||||
|
length, getDefaultBlockSize(dst), username);
|
||||||
}
|
}
|
||||||
innerDelete(srcStatus, false);
|
innerDelete(srcStatus, false);
|
||||||
} else {
|
} else {
|
||||||
@ -825,42 +883,66 @@ private boolean innerRename(Path src, Path dst)
|
|||||||
}
|
}
|
||||||
|
|
||||||
List<DeleteObjectsRequest.KeyVersion> keysToDelete = new ArrayList<>();
|
List<DeleteObjectsRequest.KeyVersion> keysToDelete = new ArrayList<>();
|
||||||
if (dstStatus != null && dstStatus.isEmptyDirectory()) {
|
if (dstStatus != null && dstStatus.isEmptyDirectory() == Tristate.TRUE) {
|
||||||
// delete unnecessary fake directory.
|
// delete unnecessary fake directory.
|
||||||
keysToDelete.add(new DeleteObjectsRequest.KeyVersion(dstKey));
|
keysToDelete.add(new DeleteObjectsRequest.KeyVersion(dstKey));
|
||||||
}
|
}
|
||||||
|
|
||||||
ListObjectsRequest request = new ListObjectsRequest();
|
Path parentPath = keyToPath(srcKey);
|
||||||
request.setBucketName(bucket);
|
RemoteIterator<LocatedFileStatus> iterator = listFilesAndEmptyDirectories(
|
||||||
request.setPrefix(srcKey);
|
parentPath, true);
|
||||||
request.setMaxKeys(maxKeys);
|
while (iterator.hasNext()) {
|
||||||
|
LocatedFileStatus status = iterator.next();
|
||||||
|
long length = status.getLen();
|
||||||
|
String key = pathToKey(status.getPath());
|
||||||
|
if (status.isDirectory() && !key.endsWith("/")) {
|
||||||
|
key += "/";
|
||||||
|
}
|
||||||
|
keysToDelete
|
||||||
|
.add(new DeleteObjectsRequest.KeyVersion(key));
|
||||||
|
String newDstKey =
|
||||||
|
dstKey + key.substring(srcKey.length());
|
||||||
|
copyFile(key, newDstKey, length);
|
||||||
|
|
||||||
ObjectListing objects = listObjects(request);
|
if (hasMetadataStore()) {
|
||||||
|
// with a metadata store, the object entries need to be updated,
|
||||||
while (true) {
|
// including, potentially, the ancestors
|
||||||
for (S3ObjectSummary summary : objects.getObjectSummaries()) {
|
Path childSrc = keyToQualifiedPath(key);
|
||||||
keysToDelete.add(
|
Path childDst = keyToQualifiedPath(newDstKey);
|
||||||
new DeleteObjectsRequest.KeyVersion(summary.getKey()));
|
if (objectRepresentsDirectory(key, length)) {
|
||||||
String newDstKey =
|
S3Guard.addMoveDir(metadataStore, srcPaths, dstMetas, childSrc,
|
||||||
dstKey + summary.getKey().substring(srcKey.length());
|
childDst, username);
|
||||||
copyFile(summary.getKey(), newDstKey, summary.getSize());
|
} else {
|
||||||
|
S3Guard.addMoveFile(metadataStore, srcPaths, dstMetas, childSrc,
|
||||||
if (keysToDelete.size() == MAX_ENTRIES_TO_DELETE) {
|
childDst, length, getDefaultBlockSize(childDst), username);
|
||||||
removeKeys(keysToDelete, true, false);
|
|
||||||
}
|
}
|
||||||
|
// Ancestor directories may not be listed, so we explicitly add them
|
||||||
|
S3Guard.addMoveAncestors(metadataStore, srcPaths, dstMetas,
|
||||||
|
keyToQualifiedPath(srcKey), childSrc, childDst, username);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (objects.isTruncated()) {
|
if (keysToDelete.size() == MAX_ENTRIES_TO_DELETE) {
|
||||||
objects = continueListObjects(objects);
|
removeKeys(keysToDelete, true, false);
|
||||||
} else {
|
|
||||||
if (!keysToDelete.isEmpty()) {
|
|
||||||
removeKeys(keysToDelete, false, false);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (!keysToDelete.isEmpty()) {
|
||||||
|
removeKeys(keysToDelete, false, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
// We moved all the children, now move the top-level dir
|
||||||
|
// Empty directory should have been added as the object summary
|
||||||
|
if (hasMetadataStore()
|
||||||
|
&& srcPaths != null
|
||||||
|
&& !srcPaths.contains(src)) {
|
||||||
|
LOG.debug("To move the non-empty top-level dir src={} and dst={}",
|
||||||
|
src, dst);
|
||||||
|
S3Guard.addMoveDir(metadataStore, srcPaths, dstMetas, src, dst,
|
||||||
|
username);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
metadataStore.move(srcPaths, dstMetas);
|
||||||
|
|
||||||
if (src.getParent() != dst.getParent()) {
|
if (src.getParent() != dst.getParent()) {
|
||||||
deleteUnnecessaryFakeDirectories(dst.getParent());
|
deleteUnnecessaryFakeDirectories(dst.getParent());
|
||||||
createFakeDirectoryIfNecessary(src.getParent());
|
createFakeDirectoryIfNecessary(src.getParent());
|
||||||
@ -879,6 +961,31 @@ public ObjectMetadata getObjectMetadata(Path path) throws IOException {
|
|||||||
return getObjectMetadata(pathToKey(path));
|
return getObjectMetadata(pathToKey(path));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Does this Filesystem have a metadata store?
|
||||||
|
* @return true iff the FS has been instantiated with a metadata store
|
||||||
|
*/
|
||||||
|
public boolean hasMetadataStore() {
|
||||||
|
return !S3Guard.isNullMetadataStore(metadataStore);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the metadata store.
|
||||||
|
* This will always be non-null, but may be bound to the
|
||||||
|
* {@code NullMetadataStore}.
|
||||||
|
* @return the metadata store of this FS instance
|
||||||
|
*/
|
||||||
|
@VisibleForTesting
|
||||||
|
MetadataStore getMetadataStore() {
|
||||||
|
return metadataStore;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** For testing only. See ITestS3GuardEmptyDirs. */
|
||||||
|
@VisibleForTesting
|
||||||
|
void setMetadataStore(MetadataStore ms) {
|
||||||
|
metadataStore = ms;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Increment a statistic by 1.
|
* Increment a statistic by 1.
|
||||||
* @param statistic The operation to increment
|
* @param statistic The operation to increment
|
||||||
@ -1063,8 +1170,9 @@ public PutObjectRequest newPutObjectRequest(String key,
|
|||||||
* @param inputStream source data.
|
* @param inputStream source data.
|
||||||
* @return the request
|
* @return the request
|
||||||
*/
|
*/
|
||||||
private PutObjectRequest newPutObjectRequest(String key,
|
PutObjectRequest newPutObjectRequest(String key,
|
||||||
ObjectMetadata metadata, InputStream inputStream) {
|
ObjectMetadata metadata,
|
||||||
|
InputStream inputStream) {
|
||||||
Preconditions.checkNotNull(inputStream);
|
Preconditions.checkNotNull(inputStream);
|
||||||
PutObjectRequest putObjectRequest = new PutObjectRequest(bucket, key,
|
PutObjectRequest putObjectRequest = new PutObjectRequest(bucket, key,
|
||||||
inputStream, metadata);
|
inputStream, metadata);
|
||||||
@ -1115,7 +1223,7 @@ public ObjectMetadata newObjectMetadata(long length) {
|
|||||||
* @param putObjectRequest the request
|
* @param putObjectRequest the request
|
||||||
* @return the upload initiated
|
* @return the upload initiated
|
||||||
*/
|
*/
|
||||||
public Upload putObject(PutObjectRequest putObjectRequest) {
|
public UploadInfo putObject(PutObjectRequest putObjectRequest) {
|
||||||
long len;
|
long len;
|
||||||
if (putObjectRequest.getFile() != null) {
|
if (putObjectRequest.getFile() != null) {
|
||||||
len = putObjectRequest.getFile().length();
|
len = putObjectRequest.getFile().length();
|
||||||
@ -1126,7 +1234,7 @@ public Upload putObject(PutObjectRequest putObjectRequest) {
|
|||||||
try {
|
try {
|
||||||
Upload upload = transfers.upload(putObjectRequest);
|
Upload upload = transfers.upload(putObjectRequest);
|
||||||
incrementPutCompletedStatistics(true, len);
|
incrementPutCompletedStatistics(true, len);
|
||||||
return upload;
|
return new UploadInfo(upload, len);
|
||||||
} catch (AmazonClientException e) {
|
} catch (AmazonClientException e) {
|
||||||
incrementPutCompletedStatistics(false, len);
|
incrementPutCompletedStatistics(false, len);
|
||||||
throw e;
|
throw e;
|
||||||
@ -1142,14 +1250,10 @@ public Upload putObject(PutObjectRequest putObjectRequest) {
|
|||||||
* @return the upload initiated
|
* @return the upload initiated
|
||||||
* @throws AmazonClientException on problems
|
* @throws AmazonClientException on problems
|
||||||
*/
|
*/
|
||||||
public PutObjectResult putObjectDirect(PutObjectRequest putObjectRequest)
|
PutObjectResult putObjectDirect(PutObjectRequest putObjectRequest)
|
||||||
throws AmazonClientException {
|
throws AmazonClientException {
|
||||||
long len;
|
long len = getPutRequestLength(putObjectRequest);
|
||||||
if (putObjectRequest.getFile() != null) {
|
LOG.debug("PUT {} bytes to {}", len, putObjectRequest.getKey());
|
||||||
len = putObjectRequest.getFile().length();
|
|
||||||
} else {
|
|
||||||
len = putObjectRequest.getMetadata().getContentLength();
|
|
||||||
}
|
|
||||||
incrementPutStartStatistics(len);
|
incrementPutStartStatistics(len);
|
||||||
try {
|
try {
|
||||||
PutObjectResult result = s3.putObject(putObjectRequest);
|
PutObjectResult result = s3.putObject(putObjectRequest);
|
||||||
@ -1161,6 +1265,23 @@ public PutObjectResult putObjectDirect(PutObjectRequest putObjectRequest)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the length of the PUT, verifying that the length is known.
|
||||||
|
* @param putObjectRequest a request bound to a file or a stream.
|
||||||
|
* @return the request length
|
||||||
|
* @throws IllegalArgumentException if the length is negative
|
||||||
|
*/
|
||||||
|
private long getPutRequestLength(PutObjectRequest putObjectRequest) {
|
||||||
|
long len;
|
||||||
|
if (putObjectRequest.getFile() != null) {
|
||||||
|
len = putObjectRequest.getFile().length();
|
||||||
|
} else {
|
||||||
|
len = putObjectRequest.getMetadata().getContentLength();
|
||||||
|
}
|
||||||
|
Preconditions.checkState(len >= 0, "Cannot PUT object of unknown length");
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Upload part of a multi-partition file.
|
* Upload part of a multi-partition file.
|
||||||
* Increments the write and put counters.
|
* Increments the write and put counters.
|
||||||
@ -1288,7 +1409,7 @@ void removeKeys(List<DeleteObjectsRequest.KeyVersion> keysToDelete,
|
|||||||
*/
|
*/
|
||||||
public boolean delete(Path f, boolean recursive) throws IOException {
|
public boolean delete(Path f, boolean recursive) throws IOException {
|
||||||
try {
|
try {
|
||||||
return innerDelete(getFileStatus(f), recursive);
|
return innerDelete(innerGetFileStatus(f, true), recursive);
|
||||||
} catch (FileNotFoundException e) {
|
} catch (FileNotFoundException e) {
|
||||||
LOG.debug("Couldn't delete {} - does not exist", f);
|
LOG.debug("Couldn't delete {} - does not exist", f);
|
||||||
instrumentation.errorIgnored();
|
instrumentation.errorIgnored();
|
||||||
@ -1318,6 +1439,9 @@ private boolean innerDelete(S3AFileStatus status, boolean recursive)
|
|||||||
|
|
||||||
if (status.isDirectory()) {
|
if (status.isDirectory()) {
|
||||||
LOG.debug("delete: Path is a directory: {}", f);
|
LOG.debug("delete: Path is a directory: {}", f);
|
||||||
|
Preconditions.checkArgument(
|
||||||
|
status.isEmptyDirectory() != Tristate.UNKNOWN,
|
||||||
|
"File status must have directory emptiness computed");
|
||||||
|
|
||||||
if (!key.endsWith("/")) {
|
if (!key.endsWith("/")) {
|
||||||
key = key + "/";
|
key = key + "/";
|
||||||
@ -1327,13 +1451,15 @@ private boolean innerDelete(S3AFileStatus status, boolean recursive)
|
|||||||
return rejectRootDirectoryDelete(status, recursive);
|
return rejectRootDirectoryDelete(status, recursive);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!recursive && !status.isEmptyDirectory()) {
|
if (!recursive && status.isEmptyDirectory() == Tristate.FALSE) {
|
||||||
throw new PathIsNotEmptyDirectoryException(f.toString());
|
throw new PathIsNotEmptyDirectoryException(f.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (status.isEmptyDirectory()) {
|
if (status.isEmptyDirectory() == Tristate.TRUE) {
|
||||||
LOG.debug("Deleting fake empty directory {}", key);
|
LOG.debug("Deleting fake empty directory {}", key);
|
||||||
|
// HADOOP-13761 S3Guard: retries here
|
||||||
deleteObject(key);
|
deleteObject(key);
|
||||||
|
metadataStore.delete(f);
|
||||||
instrumentation.directoryDeleted();
|
instrumentation.directoryDeleted();
|
||||||
} else {
|
} else {
|
||||||
LOG.debug("Getting objects for directory prefix {} to delete", key);
|
LOG.debug("Getting objects for directory prefix {} to delete", key);
|
||||||
@ -1349,6 +1475,7 @@ private boolean innerDelete(S3AFileStatus status, boolean recursive)
|
|||||||
LOG.debug("Got object to delete {}", summary.getKey());
|
LOG.debug("Got object to delete {}", summary.getKey());
|
||||||
|
|
||||||
if (keys.size() == MAX_ENTRIES_TO_DELETE) {
|
if (keys.size() == MAX_ENTRIES_TO_DELETE) {
|
||||||
|
// TODO: HADOOP-13761 S3Guard: retries
|
||||||
removeKeys(keys, true, false);
|
removeKeys(keys, true, false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1357,16 +1484,19 @@ private boolean innerDelete(S3AFileStatus status, boolean recursive)
|
|||||||
objects = continueListObjects(objects);
|
objects = continueListObjects(objects);
|
||||||
} else {
|
} else {
|
||||||
if (!keys.isEmpty()) {
|
if (!keys.isEmpty()) {
|
||||||
|
// TODO: HADOOP-13761 S3Guard: retries
|
||||||
removeKeys(keys, false, false);
|
removeKeys(keys, false, false);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
metadataStore.deleteSubtree(f);
|
||||||
} else {
|
} else {
|
||||||
LOG.debug("delete: Path is a file");
|
LOG.debug("delete: Path is a file");
|
||||||
instrumentation.fileDeleted(1);
|
instrumentation.fileDeleted(1);
|
||||||
deleteObject(key);
|
deleteObject(key);
|
||||||
|
metadataStore.delete(f);
|
||||||
}
|
}
|
||||||
|
|
||||||
Path parent = f.getParent();
|
Path parent = f.getParent();
|
||||||
@ -1390,7 +1520,7 @@ private boolean innerDelete(S3AFileStatus status, boolean recursive)
|
|||||||
private boolean rejectRootDirectoryDelete(S3AFileStatus status,
|
private boolean rejectRootDirectoryDelete(S3AFileStatus status,
|
||||||
boolean recursive) throws IOException {
|
boolean recursive) throws IOException {
|
||||||
LOG.info("s3a delete the {} root directory of {}", bucket, recursive);
|
LOG.info("s3a delete the {} root directory of {}", bucket, recursive);
|
||||||
boolean emptyRoot = status.isEmptyDirectory();
|
boolean emptyRoot = status.isEmptyDirectory() == Tristate.TRUE;
|
||||||
if (emptyRoot) {
|
if (emptyRoot) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -1405,7 +1535,7 @@ private boolean rejectRootDirectoryDelete(S3AFileStatus status,
|
|||||||
private void createFakeDirectoryIfNecessary(Path f)
|
private void createFakeDirectoryIfNecessary(Path f)
|
||||||
throws IOException, AmazonClientException {
|
throws IOException, AmazonClientException {
|
||||||
String key = pathToKey(f);
|
String key = pathToKey(f);
|
||||||
if (!key.isEmpty() && !exists(f)) {
|
if (!key.isEmpty() && !s3Exists(f)) {
|
||||||
LOG.debug("Creating new fake directory at {}", f);
|
LOG.debug("Creating new fake directory at {}", f);
|
||||||
createFakeDirectory(key);
|
createFakeDirectory(key);
|
||||||
}
|
}
|
||||||
@ -1454,6 +1584,11 @@ public FileStatus[] innerListStatus(Path f) throws FileNotFoundException,
|
|||||||
key = key + '/';
|
key = key + '/';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DirListingMetadata dirMeta = metadataStore.listChildren(path);
|
||||||
|
if (allowAuthoritative && dirMeta != null && dirMeta.isAuthoritative()) {
|
||||||
|
return S3Guard.dirMetaToStatuses(dirMeta);
|
||||||
|
}
|
||||||
|
|
||||||
ListObjectsRequest request = createListObjectsRequest(key, "/");
|
ListObjectsRequest request = createListObjectsRequest(key, "/");
|
||||||
LOG.debug("listStatus: doing listObjects for directory {}", key);
|
LOG.debug("listStatus: doing listObjects for directory {}", key);
|
||||||
|
|
||||||
@ -1466,7 +1601,8 @@ public FileStatus[] innerListStatus(Path f) throws FileNotFoundException,
|
|||||||
while (files.hasNext()) {
|
while (files.hasNext()) {
|
||||||
result.add(files.next());
|
result.add(files.next());
|
||||||
}
|
}
|
||||||
return result.toArray(new FileStatus[result.size()]);
|
return S3Guard.dirListingUnion(metadataStore, path, result, dirMeta,
|
||||||
|
allowAuthoritative);
|
||||||
} else {
|
} else {
|
||||||
LOG.debug("Adding: rd (not a dir): {}", path);
|
LOG.debug("Adding: rd (not a dir): {}", path);
|
||||||
FileStatus[] stats = new FileStatus[1];
|
FileStatus[] stats = new FileStatus[1];
|
||||||
@ -1482,7 +1618,8 @@ public FileStatus[] innerListStatus(Path f) throws FileNotFoundException,
|
|||||||
* @param delimiter any delimiter
|
* @param delimiter any delimiter
|
||||||
* @return the request
|
* @return the request
|
||||||
*/
|
*/
|
||||||
private ListObjectsRequest createListObjectsRequest(String key,
|
@VisibleForTesting
|
||||||
|
ListObjectsRequest createListObjectsRequest(String key,
|
||||||
String delimiter) {
|
String delimiter) {
|
||||||
ListObjectsRequest request = new ListObjectsRequest();
|
ListObjectsRequest request = new ListObjectsRequest();
|
||||||
request.setBucketName(bucket);
|
request.setBucketName(bucket);
|
||||||
@ -1541,23 +1678,30 @@ public boolean mkdirs(Path path, FsPermission permission) throws IOException,
|
|||||||
throw translateException("innerMkdirs", path, e);
|
throw translateException("innerMkdirs", path, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
* Make the given path and all non-existent parents into
|
* Make the given path and all non-existent parents into
|
||||||
* directories.
|
* directories.
|
||||||
* See {@link #mkdirs(Path, FsPermission)}
|
* See {@link #mkdirs(Path, FsPermission)}
|
||||||
* @param f path to create
|
* @param p path to create
|
||||||
* @param permission to apply to f
|
* @param permission to apply to f
|
||||||
* @return true if a directory was created
|
* @return true if a directory was created or already existed
|
||||||
* @throws FileAlreadyExistsException there is a file at the path specified
|
* @throws FileAlreadyExistsException there is a file at the path specified
|
||||||
* @throws IOException other IO problems
|
* @throws IOException other IO problems
|
||||||
* @throws AmazonClientException on failures inside the AWS SDK
|
* @throws AmazonClientException on failures inside the AWS SDK
|
||||||
*/
|
*/
|
||||||
private boolean innerMkdirs(Path f, FsPermission permission)
|
private boolean innerMkdirs(Path p, FsPermission permission)
|
||||||
throws IOException, FileAlreadyExistsException, AmazonClientException {
|
throws IOException, FileAlreadyExistsException, AmazonClientException {
|
||||||
|
Path f = qualify(p);
|
||||||
LOG.debug("Making directory: {}", f);
|
LOG.debug("Making directory: {}", f);
|
||||||
incrementStatistic(INVOCATION_MKDIRS);
|
incrementStatistic(INVOCATION_MKDIRS);
|
||||||
FileStatus fileStatus;
|
FileStatus fileStatus;
|
||||||
|
List<Path> metadataStoreDirs = null;
|
||||||
|
if (hasMetadataStore()) {
|
||||||
|
metadataStoreDirs = new ArrayList<>();
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
fileStatus = getFileStatus(f);
|
fileStatus = getFileStatus(f);
|
||||||
|
|
||||||
@ -1567,8 +1711,12 @@ private boolean innerMkdirs(Path f, FsPermission permission)
|
|||||||
throw new FileAlreadyExistsException("Path is a file: " + f);
|
throw new FileAlreadyExistsException("Path is a file: " + f);
|
||||||
}
|
}
|
||||||
} catch (FileNotFoundException e) {
|
} catch (FileNotFoundException e) {
|
||||||
|
// Walk path to root, ensuring closest ancestor is a directory, not file
|
||||||
Path fPart = f.getParent();
|
Path fPart = f.getParent();
|
||||||
do {
|
if (metadataStoreDirs != null) {
|
||||||
|
metadataStoreDirs.add(f);
|
||||||
|
}
|
||||||
|
while (fPart != null) {
|
||||||
try {
|
try {
|
||||||
fileStatus = getFileStatus(fPart);
|
fileStatus = getFileStatus(fPart);
|
||||||
if (fileStatus.isDirectory()) {
|
if (fileStatus.isDirectory()) {
|
||||||
@ -1581,12 +1729,17 @@ private boolean innerMkdirs(Path f, FsPermission permission)
|
|||||||
}
|
}
|
||||||
} catch (FileNotFoundException fnfe) {
|
} catch (FileNotFoundException fnfe) {
|
||||||
instrumentation.errorIgnored();
|
instrumentation.errorIgnored();
|
||||||
|
// We create all missing directories in MetadataStore; it does not
|
||||||
|
// infer directories exist by prefix like S3.
|
||||||
|
if (metadataStoreDirs != null) {
|
||||||
|
metadataStoreDirs.add(fPart);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
fPart = fPart.getParent();
|
fPart = fPart.getParent();
|
||||||
} while (fPart != null);
|
}
|
||||||
|
|
||||||
String key = pathToKey(f);
|
String key = pathToKey(f);
|
||||||
createFakeDirectory(key);
|
createFakeDirectory(key);
|
||||||
|
S3Guard.makeDirsOrdered(metadataStore, metadataStoreDirs, username, true);
|
||||||
// this is complicated because getParent(a/b/c/) returns a/b/c, but
|
// this is complicated because getParent(a/b/c/) returns a/b/c, but
|
||||||
// we want a/b. See HADOOP-14428 for more details.
|
// we want a/b. See HADOOP-14428 for more details.
|
||||||
deleteUnnecessaryFakeDirectories(new Path(f.toString()).getParent());
|
deleteUnnecessaryFakeDirectories(new Path(f.toString()).getParent());
|
||||||
@ -1598,21 +1751,93 @@ private boolean innerMkdirs(Path f, FsPermission permission)
|
|||||||
* Return a file status object that represents the path.
|
* Return a file status object that represents the path.
|
||||||
* @param f The path we want information from
|
* @param f The path we want information from
|
||||||
* @return a FileStatus object
|
* @return a FileStatus object
|
||||||
* @throws java.io.FileNotFoundException when the path does not exist;
|
* @throws FileNotFoundException when the path does not exist
|
||||||
* @throws IOException on other problems.
|
* @throws IOException on other problems.
|
||||||
*/
|
*/
|
||||||
public S3AFileStatus getFileStatus(final Path f) throws IOException {
|
public FileStatus getFileStatus(final Path f) throws IOException {
|
||||||
|
return innerGetFileStatus(f, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Internal version of {@link #getFileStatus(Path)}.
|
||||||
|
* @param f The path we want information from
|
||||||
|
* @param needEmptyDirectoryFlag if true, implementation will calculate
|
||||||
|
* a TRUE or FALSE value for {@link S3AFileStatus#isEmptyDirectory()}
|
||||||
|
* @return a S3AFileStatus object
|
||||||
|
* @throws FileNotFoundException when the path does not exist
|
||||||
|
* @throws IOException on other problems.
|
||||||
|
*/
|
||||||
|
@VisibleForTesting
|
||||||
|
S3AFileStatus innerGetFileStatus(final Path f,
|
||||||
|
boolean needEmptyDirectoryFlag) throws IOException {
|
||||||
incrementStatistic(INVOCATION_GET_FILE_STATUS);
|
incrementStatistic(INVOCATION_GET_FILE_STATUS);
|
||||||
final Path path = qualify(f);
|
final Path path = qualify(f);
|
||||||
String key = pathToKey(path);
|
String key = pathToKey(path);
|
||||||
LOG.debug("Getting path status for {} ({})", path , key);
|
LOG.debug("Getting path status for {} ({})", path, key);
|
||||||
|
|
||||||
|
// Check MetadataStore, if any.
|
||||||
|
PathMetadata pm = metadataStore.get(path, needEmptyDirectoryFlag);
|
||||||
|
Set<Path> tombstones = Collections.EMPTY_SET;
|
||||||
|
if (pm != null) {
|
||||||
|
if (pm.isDeleted()) {
|
||||||
|
throw new FileNotFoundException("Path " + f + " is recorded as " +
|
||||||
|
"deleted by S3Guard");
|
||||||
|
}
|
||||||
|
|
||||||
|
FileStatus msStatus = pm.getFileStatus();
|
||||||
|
if (needEmptyDirectoryFlag && msStatus.isDirectory()) {
|
||||||
|
if (pm.isEmptyDirectory() != Tristate.UNKNOWN) {
|
||||||
|
// We have a definitive true / false from MetadataStore, we are done.
|
||||||
|
return S3AFileStatus.fromFileStatus(msStatus, pm.isEmptyDirectory());
|
||||||
|
} else {
|
||||||
|
DirListingMetadata children = metadataStore.listChildren(path);
|
||||||
|
if (children != null) {
|
||||||
|
tombstones = children.listTombstones();
|
||||||
|
}
|
||||||
|
LOG.debug("MetadataStore doesn't know if dir is empty, using S3.");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Either this is not a directory, or we don't care if it is empty
|
||||||
|
return S3AFileStatus.fromFileStatus(msStatus, pm.isEmptyDirectory());
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the metadata store has no children for it and it's not listed in
|
||||||
|
// S3 yet, we'll assume the empty directory is true;
|
||||||
|
S3AFileStatus s3FileStatus;
|
||||||
|
try {
|
||||||
|
s3FileStatus = s3GetFileStatus(path, key, tombstones);
|
||||||
|
} catch (FileNotFoundException e) {
|
||||||
|
return S3AFileStatus.fromFileStatus(msStatus, Tristate.TRUE);
|
||||||
|
}
|
||||||
|
// entry was found, save in S3Guard
|
||||||
|
return S3Guard.putAndReturn(metadataStore, s3FileStatus, instrumentation);
|
||||||
|
} else {
|
||||||
|
// there was no entry in S3Guard
|
||||||
|
// retrieve the data and update the metadata store in the process.
|
||||||
|
return S3Guard.putAndReturn(metadataStore,
|
||||||
|
s3GetFileStatus(path, key, tombstones), instrumentation);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Raw {@code getFileStatus} that talks direct to S3.
|
||||||
|
* Used to implement {@link #innerGetFileStatus(Path, boolean)},
|
||||||
|
* and for direct management of empty directory blobs.
|
||||||
|
* @param path Qualified path
|
||||||
|
* @param key Key string for the path
|
||||||
|
* @return Status
|
||||||
|
* @throws FileNotFoundException when the path does not exist
|
||||||
|
* @throws IOException on other problems.
|
||||||
|
*/
|
||||||
|
private S3AFileStatus s3GetFileStatus(final Path path, String key,
|
||||||
|
Set<Path> tombstones) throws IOException {
|
||||||
if (!key.isEmpty()) {
|
if (!key.isEmpty()) {
|
||||||
try {
|
try {
|
||||||
ObjectMetadata meta = getObjectMetadata(key);
|
ObjectMetadata meta = getObjectMetadata(key);
|
||||||
|
|
||||||
if (objectRepresentsDirectory(key, meta.getContentLength())) {
|
if (objectRepresentsDirectory(key, meta.getContentLength())) {
|
||||||
LOG.debug("Found exact file: fake directory");
|
LOG.debug("Found exact file: fake directory");
|
||||||
return new S3AFileStatus(true, path, username);
|
return new S3AFileStatus(Tristate.TRUE, path, username);
|
||||||
} else {
|
} else {
|
||||||
LOG.debug("Found exact file: normal file");
|
LOG.debug("Found exact file: normal file");
|
||||||
return new S3AFileStatus(meta.getContentLength(),
|
return new S3AFileStatus(meta.getContentLength(),
|
||||||
@ -1637,16 +1862,16 @@ public S3AFileStatus getFileStatus(final Path f) throws IOException {
|
|||||||
|
|
||||||
if (objectRepresentsDirectory(newKey, meta.getContentLength())) {
|
if (objectRepresentsDirectory(newKey, meta.getContentLength())) {
|
||||||
LOG.debug("Found file (with /): fake directory");
|
LOG.debug("Found file (with /): fake directory");
|
||||||
return new S3AFileStatus(true, path, username);
|
return new S3AFileStatus(Tristate.TRUE, path, username);
|
||||||
} else {
|
} else {
|
||||||
LOG.warn("Found file (with /): real file? should not happen: {}",
|
LOG.warn("Found file (with /): real file? should not happen: {}",
|
||||||
key);
|
key);
|
||||||
|
|
||||||
return new S3AFileStatus(meta.getContentLength(),
|
return new S3AFileStatus(meta.getContentLength(),
|
||||||
dateToLong(meta.getLastModified()),
|
dateToLong(meta.getLastModified()),
|
||||||
path,
|
path,
|
||||||
getDefaultBlockSize(path),
|
getDefaultBlockSize(path),
|
||||||
username);
|
username);
|
||||||
}
|
}
|
||||||
} catch (AmazonServiceException e) {
|
} catch (AmazonServiceException e) {
|
||||||
if (e.getStatusCode() != 404) {
|
if (e.getStatusCode() != 404) {
|
||||||
@ -1668,25 +1893,26 @@ public S3AFileStatus getFileStatus(final Path f) throws IOException {
|
|||||||
|
|
||||||
ObjectListing objects = listObjects(request);
|
ObjectListing objects = listObjects(request);
|
||||||
|
|
||||||
if (!objects.getCommonPrefixes().isEmpty()
|
Collection<String> prefixes = objects.getCommonPrefixes();
|
||||||
|| !objects.getObjectSummaries().isEmpty()) {
|
Collection<S3ObjectSummary> summaries = objects.getObjectSummaries();
|
||||||
|
if (!isEmptyOfKeys(prefixes, tombstones) ||
|
||||||
|
!isEmptyOfObjects(summaries, tombstones)) {
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
LOG.debug("Found path as directory (with /): {}/{}",
|
LOG.debug("Found path as directory (with /): {}/{}",
|
||||||
objects.getCommonPrefixes().size() ,
|
prefixes.size(), summaries.size());
|
||||||
objects.getObjectSummaries().size());
|
|
||||||
|
|
||||||
for (S3ObjectSummary summary : objects.getObjectSummaries()) {
|
for (S3ObjectSummary summary : summaries) {
|
||||||
LOG.debug("Summary: {} {}", summary.getKey(), summary.getSize());
|
LOG.debug("Summary: {} {}", summary.getKey(), summary.getSize());
|
||||||
}
|
}
|
||||||
for (String prefix : objects.getCommonPrefixes()) {
|
for (String prefix : prefixes) {
|
||||||
LOG.debug("Prefix: {}", prefix);
|
LOG.debug("Prefix: {}", prefix);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return new S3AFileStatus(false, path, username);
|
return new S3AFileStatus(Tristate.FALSE, path, username);
|
||||||
} else if (key.isEmpty()) {
|
} else if (key.isEmpty()) {
|
||||||
LOG.debug("Found root directory");
|
LOG.debug("Found root directory");
|
||||||
return new S3AFileStatus(true, path, username);
|
return new S3AFileStatus(Tristate.TRUE, path, username);
|
||||||
}
|
}
|
||||||
} catch (AmazonServiceException e) {
|
} catch (AmazonServiceException e) {
|
||||||
if (e.getStatusCode() != 404) {
|
if (e.getStatusCode() != 404) {
|
||||||
@ -1700,6 +1926,64 @@ public S3AFileStatus getFileStatus(final Path f) throws IOException {
|
|||||||
throw new FileNotFoundException("No such file or directory: " + path);
|
throw new FileNotFoundException("No such file or directory: " + path);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper function to determine if a collection of paths is empty
|
||||||
|
* after accounting for tombstone markers (if provided).
|
||||||
|
* @param keys Collection of path (prefixes / directories or keys).
|
||||||
|
* @param tombstones Set of tombstone markers, or null if not applicable.
|
||||||
|
* @return false if summaries contains objects not accounted for by
|
||||||
|
* tombstones.
|
||||||
|
*/
|
||||||
|
private boolean isEmptyOfKeys(Collection<String> keys, Set<Path>
|
||||||
|
tombstones) {
|
||||||
|
if (tombstones == null) {
|
||||||
|
return keys.isEmpty();
|
||||||
|
}
|
||||||
|
for (String key : keys) {
|
||||||
|
Path qualified = keyToQualifiedPath(key);
|
||||||
|
if (!tombstones.contains(qualified)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper function to determine if a collection of object summaries is empty
|
||||||
|
* after accounting for tombstone markers (if provided).
|
||||||
|
* @param summaries Collection of objects as returned by listObjects.
|
||||||
|
* @param tombstones Set of tombstone markers, or null if not applicable.
|
||||||
|
* @return false if summaries contains objects not accounted for by
|
||||||
|
* tombstones.
|
||||||
|
*/
|
||||||
|
private boolean isEmptyOfObjects(Collection<S3ObjectSummary> summaries,
|
||||||
|
Set<Path> tombstones) {
|
||||||
|
if (tombstones == null) {
|
||||||
|
return summaries.isEmpty();
|
||||||
|
}
|
||||||
|
Collection<String> stringCollection = new ArrayList<>(summaries.size());
|
||||||
|
for (S3ObjectSummary summary : summaries) {
|
||||||
|
stringCollection.add(summary.getKey());
|
||||||
|
}
|
||||||
|
return isEmptyOfKeys(stringCollection, tombstones);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Raw version of {@link FileSystem#exists(Path)} which uses S3 only:
|
||||||
|
* S3Guard MetadataStore, if any, will be skipped.
|
||||||
|
* @return true if path exists in S3
|
||||||
|
*/
|
||||||
|
private boolean s3Exists(final Path f) throws IOException {
|
||||||
|
Path path = qualify(f);
|
||||||
|
String key = pathToKey(path);
|
||||||
|
try {
|
||||||
|
s3GetFileStatus(path, key, null);
|
||||||
|
return true;
|
||||||
|
} catch (FileNotFoundException e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The src file is on the local disk. Add it to FS at
|
* The src file is on the local disk. Add it to FS at
|
||||||
* the given dst name.
|
* the given dst name.
|
||||||
@ -1777,12 +2061,13 @@ private void innerCopyFromLocalFile(boolean delSrc, boolean overwrite,
|
|||||||
final String key = pathToKey(dst);
|
final String key = pathToKey(dst);
|
||||||
final ObjectMetadata om = newObjectMetadata(srcfile.length());
|
final ObjectMetadata om = newObjectMetadata(srcfile.length());
|
||||||
PutObjectRequest putObjectRequest = newPutObjectRequest(key, om, srcfile);
|
PutObjectRequest putObjectRequest = newPutObjectRequest(key, om, srcfile);
|
||||||
Upload up = putObject(putObjectRequest);
|
UploadInfo info = putObject(putObjectRequest);
|
||||||
|
Upload upload = info.getUpload();
|
||||||
ProgressableProgressListener listener = new ProgressableProgressListener(
|
ProgressableProgressListener listener = new ProgressableProgressListener(
|
||||||
this, key, up, null);
|
this, key, upload, null);
|
||||||
up.addProgressListener(listener);
|
upload.addProgressListener(listener);
|
||||||
try {
|
try {
|
||||||
up.waitForUploadResult();
|
upload.waitForUploadResult();
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
throw new InterruptedIOException("Interrupted copying " + src
|
throw new InterruptedIOException("Interrupted copying " + src
|
||||||
+ " to " + dst + ", cancelling");
|
+ " to " + dst + ", cancelling");
|
||||||
@ -1790,7 +2075,7 @@ private void innerCopyFromLocalFile(boolean delSrc, boolean overwrite,
|
|||||||
listener.uploadCompleted();
|
listener.uploadCompleted();
|
||||||
|
|
||||||
// This will delete unnecessary fake parent directories
|
// This will delete unnecessary fake parent directories
|
||||||
finishedWrite(key);
|
finishedWrite(key, info.getLength());
|
||||||
|
|
||||||
if (delSrc) {
|
if (delSrc) {
|
||||||
local.delete(src, false);
|
local.delete(src, false);
|
||||||
@ -1814,6 +2099,10 @@ public void close() throws IOException {
|
|||||||
transfers.shutdownNow(true);
|
transfers.shutdownNow(true);
|
||||||
transfers = null;
|
transfers = null;
|
||||||
}
|
}
|
||||||
|
if (metadataStore != null) {
|
||||||
|
metadataStore.close();
|
||||||
|
metadataStore = null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1956,11 +2245,38 @@ private SSECustomerKey generateSSECustomerKey() {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Perform post-write actions.
|
* Perform post-write actions.
|
||||||
|
* This operation MUST be called after any PUT/multipart PUT completes
|
||||||
|
* successfully.
|
||||||
|
* This includes
|
||||||
|
* <ol>
|
||||||
|
* <li>Calling {@link #deleteUnnecessaryFakeDirectories(Path)}</li>
|
||||||
|
* <li>Updating any metadata store with details on the newly created
|
||||||
|
* object.</li>
|
||||||
|
* </ol>
|
||||||
* @param key key written to
|
* @param key key written to
|
||||||
|
* @param length total length of file written
|
||||||
*/
|
*/
|
||||||
public void finishedWrite(String key) {
|
@InterfaceAudience.Private
|
||||||
LOG.debug("Finished write to {}", key);
|
void finishedWrite(String key, long length) {
|
||||||
deleteUnnecessaryFakeDirectories(keyToPath(key).getParent());
|
LOG.debug("Finished write to {}, len {}", key, length);
|
||||||
|
Path p = keyToQualifiedPath(key);
|
||||||
|
deleteUnnecessaryFakeDirectories(p.getParent());
|
||||||
|
Preconditions.checkArgument(length >= 0, "content length is negative");
|
||||||
|
|
||||||
|
// See note about failure semantics in S3Guard documentation
|
||||||
|
try {
|
||||||
|
if (hasMetadataStore()) {
|
||||||
|
S3Guard.addAncestors(metadataStore, p, username);
|
||||||
|
S3AFileStatus status = createUploadFileStatus(p,
|
||||||
|
S3AUtils.objectRepresentsDirectory(key, length), length,
|
||||||
|
getDefaultBlockSize(p), username);
|
||||||
|
S3Guard.putAndReturn(metadataStore, status, instrumentation);
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
LOG.error("S3Guard: Error updating MetadataStore for write to {}:",
|
||||||
|
key, e);
|
||||||
|
instrumentation.errorIgnored();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -2015,9 +2331,9 @@ public int read() throws IOException {
|
|||||||
PutObjectRequest putObjectRequest = newPutObjectRequest(objectName,
|
PutObjectRequest putObjectRequest = newPutObjectRequest(objectName,
|
||||||
newObjectMetadata(0L),
|
newObjectMetadata(0L),
|
||||||
im);
|
im);
|
||||||
Upload upload = putObject(putObjectRequest);
|
UploadInfo info = putObject(putObjectRequest);
|
||||||
try {
|
try {
|
||||||
upload.waitForUploadResult();
|
info.getUpload().waitForUploadResult();
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
throw new InterruptedIOException("Interrupted creating " + objectName);
|
throw new InterruptedIOException("Interrupted creating " + objectName);
|
||||||
}
|
}
|
||||||
@ -2123,6 +2439,8 @@ public String toString() {
|
|||||||
if (blockFactory != null) {
|
if (blockFactory != null) {
|
||||||
sb.append(", blockFactory=").append(blockFactory);
|
sb.append(", blockFactory=").append(blockFactory);
|
||||||
}
|
}
|
||||||
|
sb.append(", metastore=").append(metadataStore);
|
||||||
|
sb.append(", authoritative=").append(allowAuthoritative);
|
||||||
sb.append(", boundedExecutor=").append(boundedThreadPool);
|
sb.append(", boundedExecutor=").append(boundedThreadPool);
|
||||||
sb.append(", unboundedExecutor=").append(unboundedThreadPool);
|
sb.append(", unboundedExecutor=").append(unboundedThreadPool);
|
||||||
sb.append(", statistics {")
|
sb.append(", statistics {")
|
||||||
@ -2241,6 +2559,18 @@ public boolean isFile(Path f) throws IOException {
|
|||||||
@Override
|
@Override
|
||||||
public RemoteIterator<LocatedFileStatus> listFiles(Path f,
|
public RemoteIterator<LocatedFileStatus> listFiles(Path f,
|
||||||
boolean recursive) throws FileNotFoundException, IOException {
|
boolean recursive) throws FileNotFoundException, IOException {
|
||||||
|
return innerListFiles(f, recursive,
|
||||||
|
new Listing.AcceptFilesOnly(qualify(f)));
|
||||||
|
}
|
||||||
|
|
||||||
|
public RemoteIterator<LocatedFileStatus> listFilesAndEmptyDirectories(Path f,
|
||||||
|
boolean recursive) throws IOException {
|
||||||
|
return innerListFiles(f, recursive,
|
||||||
|
new Listing.AcceptAllButS3nDirs());
|
||||||
|
}
|
||||||
|
|
||||||
|
private RemoteIterator<LocatedFileStatus> innerListFiles(Path f, boolean
|
||||||
|
recursive, Listing.FileStatusAcceptor acceptor) throws IOException {
|
||||||
incrementStatistic(INVOCATION_LIST_FILES);
|
incrementStatistic(INVOCATION_LIST_FILES);
|
||||||
Path path = qualify(f);
|
Path path = qualify(f);
|
||||||
LOG.debug("listFiles({}, {})", path, recursive);
|
LOG.debug("listFiles({}, {})", path, recursive);
|
||||||
@ -2258,13 +2588,42 @@ public RemoteIterator<LocatedFileStatus> listFiles(Path f,
|
|||||||
String delimiter = recursive ? null : "/";
|
String delimiter = recursive ? null : "/";
|
||||||
LOG.debug("Requesting all entries under {} with delimiter '{}'",
|
LOG.debug("Requesting all entries under {} with delimiter '{}'",
|
||||||
key, delimiter);
|
key, delimiter);
|
||||||
return listing.createLocatedFileStatusIterator(
|
final RemoteIterator<FileStatus> cachedFilesIterator;
|
||||||
listing.createFileStatusListingIterator(path,
|
final Set<Path> tombstones;
|
||||||
createListObjectsRequest(key, delimiter),
|
if (recursive) {
|
||||||
ACCEPT_ALL,
|
final PathMetadata pm = metadataStore.get(path, true);
|
||||||
new Listing.AcceptFilesOnly(path)));
|
// shouldn't need to check pm.isDeleted() because that will have
|
||||||
|
// been caught by getFileStatus above.
|
||||||
|
MetadataStoreListFilesIterator metadataStoreListFilesIterator =
|
||||||
|
new MetadataStoreListFilesIterator(metadataStore, pm,
|
||||||
|
allowAuthoritative);
|
||||||
|
tombstones = metadataStoreListFilesIterator.listTombstones();
|
||||||
|
cachedFilesIterator = metadataStoreListFilesIterator;
|
||||||
|
} else {
|
||||||
|
DirListingMetadata meta = metadataStore.listChildren(path);
|
||||||
|
if (meta != null) {
|
||||||
|
tombstones = meta.listTombstones();
|
||||||
|
} else {
|
||||||
|
tombstones = null;
|
||||||
|
}
|
||||||
|
cachedFilesIterator = listing.createProvidedFileStatusIterator(
|
||||||
|
S3Guard.dirMetaToStatuses(meta), ACCEPT_ALL, acceptor);
|
||||||
|
if (allowAuthoritative && meta != null && meta.isAuthoritative()) {
|
||||||
|
// metadata listing is authoritative, so return it directly
|
||||||
|
return listing.createLocatedFileStatusIterator(cachedFilesIterator);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return listing.createTombstoneReconcilingIterator(
|
||||||
|
listing.createLocatedFileStatusIterator(
|
||||||
|
listing.createFileStatusListingIterator(path,
|
||||||
|
createListObjectsRequest(key, delimiter),
|
||||||
|
ACCEPT_ALL,
|
||||||
|
acceptor,
|
||||||
|
cachedFilesIterator)),
|
||||||
|
tombstones);
|
||||||
}
|
}
|
||||||
} catch (AmazonClientException e) {
|
} catch (AmazonClientException e) {
|
||||||
|
// TODO S3Guard: retry on file not found exception
|
||||||
throw translateException("listFiles", path, e);
|
throw translateException("listFiles", path, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -2309,12 +2668,21 @@ public RemoteIterator<LocatedFileStatus> listLocatedStatus(final Path f,
|
|||||||
filter.accept(path) ? toLocatedFileStatus(fileStatus) : null);
|
filter.accept(path) ? toLocatedFileStatus(fileStatus) : null);
|
||||||
} else {
|
} else {
|
||||||
// directory: trigger a lookup
|
// directory: trigger a lookup
|
||||||
String key = maybeAddTrailingSlash(pathToKey(path));
|
final String key = maybeAddTrailingSlash(pathToKey(path));
|
||||||
return listing.createLocatedFileStatusIterator(
|
final Listing.FileStatusAcceptor acceptor =
|
||||||
listing.createFileStatusListingIterator(path,
|
new Listing.AcceptAllButSelfAndS3nDirs(path);
|
||||||
createListObjectsRequest(key, "/"),
|
DirListingMetadata meta = metadataStore.listChildren(path);
|
||||||
filter,
|
final RemoteIterator<FileStatus> cachedFileStatusIterator =
|
||||||
new Listing.AcceptAllButSelfAndS3nDirs(path)));
|
listing.createProvidedFileStatusIterator(
|
||||||
|
S3Guard.dirMetaToStatuses(meta), filter, acceptor);
|
||||||
|
return (allowAuthoritative && meta != null && meta.isAuthoritative())
|
||||||
|
? listing.createLocatedFileStatusIterator(cachedFileStatusIterator)
|
||||||
|
: listing.createLocatedFileStatusIterator(
|
||||||
|
listing.createFileStatusListingIterator(path,
|
||||||
|
createListObjectsRequest(key, "/"),
|
||||||
|
filter,
|
||||||
|
acceptor,
|
||||||
|
cachedFileStatusIterator));
|
||||||
}
|
}
|
||||||
} catch (AmazonClientException e) {
|
} catch (AmazonClientException e) {
|
||||||
throw translateException("listLocatedStatus", path, e);
|
throw translateException("listLocatedStatus", path, e);
|
||||||
@ -2389,8 +2757,8 @@ PutObjectRequest newPutRequest(File sourceFile) {
|
|||||||
/**
|
/**
|
||||||
* Callback on a successful write.
|
* Callback on a successful write.
|
||||||
*/
|
*/
|
||||||
void writeSuccessful() {
|
void writeSuccessful(long length) {
|
||||||
finishedWrite(key);
|
finishedWrite(key, length);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -23,6 +23,7 @@
|
|||||||
|
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.classification.InterfaceStability;
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
|
import org.apache.hadoop.fs.FileSystem.Statistics;
|
||||||
import org.apache.hadoop.metrics2.MetricStringBuilder;
|
import org.apache.hadoop.metrics2.MetricStringBuilder;
|
||||||
import org.apache.hadoop.metrics2.annotation.Metrics;
|
import org.apache.hadoop.metrics2.annotation.Metrics;
|
||||||
import org.apache.hadoop.metrics2.lib.Interns;
|
import org.apache.hadoop.metrics2.lib.Interns;
|
||||||
@ -30,6 +31,7 @@
|
|||||||
import org.apache.hadoop.metrics2.lib.MutableCounterLong;
|
import org.apache.hadoop.metrics2.lib.MutableCounterLong;
|
||||||
import org.apache.hadoop.metrics2.lib.MutableGaugeLong;
|
import org.apache.hadoop.metrics2.lib.MutableGaugeLong;
|
||||||
import org.apache.hadoop.metrics2.lib.MutableMetric;
|
import org.apache.hadoop.metrics2.lib.MutableMetric;
|
||||||
|
import org.apache.hadoop.metrics2.lib.MutableQuantiles;
|
||||||
|
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
@ -38,7 +40,6 @@
|
|||||||
import java.util.UUID;
|
import java.util.UUID;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
import java.util.concurrent.atomic.AtomicLong;
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
import org.apache.hadoop.fs.FileSystem.Statistics;
|
|
||||||
|
|
||||||
import static org.apache.hadoop.fs.s3a.Statistic.*;
|
import static org.apache.hadoop.fs.s3a.Statistic.*;
|
||||||
|
|
||||||
@ -90,6 +91,10 @@ public class S3AInstrumentation {
|
|||||||
private final Map<String, MutableCounterLong> streamMetrics =
|
private final Map<String, MutableCounterLong> streamMetrics =
|
||||||
new HashMap<>(30);
|
new HashMap<>(30);
|
||||||
|
|
||||||
|
/** Instantiate this without caring whether or not S3Guard is enabled. */
|
||||||
|
private final S3GuardInstrumentation s3GuardInstrumentation
|
||||||
|
= new S3GuardInstrumentation();
|
||||||
|
|
||||||
private static final Statistic[] COUNTERS_TO_CREATE = {
|
private static final Statistic[] COUNTERS_TO_CREATE = {
|
||||||
INVOCATION_COPY_FROM_LOCAL_FILE,
|
INVOCATION_COPY_FROM_LOCAL_FILE,
|
||||||
INVOCATION_EXISTS,
|
INVOCATION_EXISTS,
|
||||||
@ -117,6 +122,8 @@ public class S3AInstrumentation {
|
|||||||
STREAM_WRITE_BLOCK_UPLOADS_ABORTED,
|
STREAM_WRITE_BLOCK_UPLOADS_ABORTED,
|
||||||
STREAM_WRITE_TOTAL_TIME,
|
STREAM_WRITE_TOTAL_TIME,
|
||||||
STREAM_WRITE_TOTAL_DATA,
|
STREAM_WRITE_TOTAL_DATA,
|
||||||
|
S3GUARD_METADATASTORE_PUT_PATH_REQUEST,
|
||||||
|
S3GUARD_METADATASTORE_INITIALIZATION
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -171,6 +178,9 @@ public S3AInstrumentation(URI name) {
|
|||||||
for (Statistic statistic : GAUGES_TO_CREATE) {
|
for (Statistic statistic : GAUGES_TO_CREATE) {
|
||||||
gauge(statistic.getSymbol(), statistic.getDescription());
|
gauge(statistic.getSymbol(), statistic.getDescription());
|
||||||
}
|
}
|
||||||
|
//todo need a config for the quantiles interval?
|
||||||
|
quantiles(S3GUARD_METADATASTORE_PUT_PATH_LATENCY,
|
||||||
|
"ops", "latency", 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -226,6 +236,22 @@ protected final MutableGaugeLong gauge(String name, String desc) {
|
|||||||
return registry.newGauge(name, desc, 0L);
|
return registry.newGauge(name, desc, 0L);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a quantiles in the registry.
|
||||||
|
* @param op statistic to collect
|
||||||
|
* @param sampleName sample name of the quantiles
|
||||||
|
* @param valueName value name of the quantiles
|
||||||
|
* @param interval interval of the quantiles in seconds
|
||||||
|
* @return the created quantiles metric
|
||||||
|
*/
|
||||||
|
protected final MutableQuantiles quantiles(Statistic op,
|
||||||
|
String sampleName,
|
||||||
|
String valueName,
|
||||||
|
int interval) {
|
||||||
|
return registry.newQuantiles(op.getSymbol(), op.getDescription(),
|
||||||
|
sampleName, valueName, interval);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the metrics registry.
|
* Get the metrics registry.
|
||||||
* @return the registry
|
* @return the registry
|
||||||
@ -310,6 +336,20 @@ public MutableGaugeLong lookupGauge(String name) {
|
|||||||
return (MutableGaugeLong) metric;
|
return (MutableGaugeLong) metric;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Look up a quantiles.
|
||||||
|
* @param name quantiles name
|
||||||
|
* @return the quantiles or null
|
||||||
|
* @throws ClassCastException if the metric is not a Quantiles.
|
||||||
|
*/
|
||||||
|
public MutableQuantiles lookupQuantiles(String name) {
|
||||||
|
MutableMetric metric = lookupMetric(name);
|
||||||
|
if (metric == null) {
|
||||||
|
LOG.debug("No quantiles {}", name);
|
||||||
|
}
|
||||||
|
return (MutableQuantiles) metric;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Look up a metric from both the registered set and the lighter weight
|
* Look up a metric from both the registered set and the lighter weight
|
||||||
* stream entries.
|
* stream entries.
|
||||||
@ -391,6 +431,21 @@ public void incrementCounter(Statistic op, long count) {
|
|||||||
counter.incr(count);
|
counter.incr(count);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add a value to a quantiles statistic. No-op if the quantile
|
||||||
|
* isn't found.
|
||||||
|
* @param op operation to look up.
|
||||||
|
* @param value value to add.
|
||||||
|
* @throws ClassCastException if the metric is not a Quantiles.
|
||||||
|
*/
|
||||||
|
public void addValueToQuantiles(Statistic op, long value) {
|
||||||
|
MutableQuantiles quantiles = lookupQuantiles(op.getSymbol());
|
||||||
|
if (quantiles != null) {
|
||||||
|
quantiles.add(value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Increment a specific counter.
|
* Increment a specific counter.
|
||||||
* No-op if not defined.
|
* No-op if not defined.
|
||||||
@ -441,6 +496,15 @@ InputStreamStatistics newInputStreamStatistics() {
|
|||||||
return new InputStreamStatistics();
|
return new InputStreamStatistics();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a S3Guard instrumentation instance.
|
||||||
|
* There's likely to be at most one instance of this per FS instance.
|
||||||
|
* @return the S3Guard instrumentation point.
|
||||||
|
*/
|
||||||
|
public S3GuardInstrumentation getS3GuardInstrumentation() {
|
||||||
|
return s3GuardInstrumentation;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Merge in the statistics of a single input stream into
|
* Merge in the statistics of a single input stream into
|
||||||
* the filesystem-wide statistics.
|
* the filesystem-wide statistics.
|
||||||
@ -840,4 +904,19 @@ public String toString() {
|
|||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Instrumentation exported to S3Guard.
|
||||||
|
*/
|
||||||
|
public final class S3GuardInstrumentation {
|
||||||
|
|
||||||
|
/** Initialized event. */
|
||||||
|
public void initialized() {
|
||||||
|
incrementCounter(S3GUARD_METADATASTORE_INITIALIZATION, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void storeClosed() {
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -20,7 +20,6 @@
|
|||||||
|
|
||||||
import com.amazonaws.AmazonClientException;
|
import com.amazonaws.AmazonClientException;
|
||||||
import com.amazonaws.services.s3.model.ObjectMetadata;
|
import com.amazonaws.services.s3.model.ObjectMetadata;
|
||||||
import com.amazonaws.services.s3.transfer.Upload;
|
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.classification.InterfaceStability;
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
@ -101,19 +100,20 @@ public void close() throws IOException {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
final ObjectMetadata om = fs.newObjectMetadata(backupFile.length());
|
final ObjectMetadata om = fs.newObjectMetadata(backupFile.length());
|
||||||
Upload upload = fs.putObject(
|
UploadInfo info = fs.putObject(
|
||||||
fs.newPutObjectRequest(
|
fs.newPutObjectRequest(
|
||||||
key,
|
key,
|
||||||
om,
|
om,
|
||||||
backupFile));
|
backupFile));
|
||||||
ProgressableProgressListener listener =
|
ProgressableProgressListener listener =
|
||||||
new ProgressableProgressListener(fs, key, upload, progress);
|
new ProgressableProgressListener(fs, key, info.getUpload(), progress);
|
||||||
upload.addProgressListener(listener);
|
info.getUpload().addProgressListener(listener);
|
||||||
|
|
||||||
upload.waitForUploadResult();
|
info.getUpload().waitForUploadResult();
|
||||||
listener.uploadCompleted();
|
listener.uploadCompleted();
|
||||||
// This will delete unnecessary fake parent directories
|
// This will delete unnecessary fake parent directories, update any
|
||||||
fs.finishedWrite(key);
|
// MetadataStore
|
||||||
|
fs.finishedWrite(key, info.getLength());
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
throw (InterruptedIOException) new InterruptedIOException(e.toString())
|
throw (InterruptedIOException) new InterruptedIOException(e.toString())
|
||||||
.initCause(e);
|
.initCause(e);
|
||||||
|
@ -294,12 +294,38 @@ public static S3AFileStatus createFileStatus(Path keyPath,
|
|||||||
S3ObjectSummary summary,
|
S3ObjectSummary summary,
|
||||||
long blockSize,
|
long blockSize,
|
||||||
String owner) {
|
String owner) {
|
||||||
if (objectRepresentsDirectory(summary.getKey(), summary.getSize())) {
|
long size = summary.getSize();
|
||||||
return new S3AFileStatus(true, keyPath, owner);
|
return createFileStatus(keyPath,
|
||||||
|
objectRepresentsDirectory(summary.getKey(), size),
|
||||||
|
size, summary.getLastModified(), blockSize, owner);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a file status for object we just uploaded. For files, we use
|
||||||
|
* current time as modification time, since s3a uses S3's service-based
|
||||||
|
* modification time, which will not be available until we do a
|
||||||
|
* getFileStatus() later on.
|
||||||
|
* @param keyPath path for created object
|
||||||
|
* @param isDir true iff directory
|
||||||
|
* @param size file length
|
||||||
|
* @param blockSize block size for file status
|
||||||
|
* @param owner Hadoop username
|
||||||
|
* @return a status entry
|
||||||
|
*/
|
||||||
|
public static S3AFileStatus createUploadFileStatus(Path keyPath,
|
||||||
|
boolean isDir, long size, long blockSize, String owner) {
|
||||||
|
Date date = isDir ? null : new Date();
|
||||||
|
return createFileStatus(keyPath, isDir, size, date, blockSize, owner);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Date 'modified' is ignored when isDir is true. */
|
||||||
|
private static S3AFileStatus createFileStatus(Path keyPath, boolean isDir,
|
||||||
|
long size, Date modified, long blockSize, String owner) {
|
||||||
|
if (isDir) {
|
||||||
|
return new S3AFileStatus(Tristate.UNKNOWN, keyPath, owner);
|
||||||
} else {
|
} else {
|
||||||
return new S3AFileStatus(summary.getSize(),
|
return new S3AFileStatus(size, dateToLong(modified), keyPath, blockSize,
|
||||||
dateToLong(summary.getLastModified()), keyPath,
|
owner);
|
||||||
blockSize, owner);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -18,33 +18,20 @@
|
|||||||
|
|
||||||
package org.apache.hadoop.fs.s3a;
|
package org.apache.hadoop.fs.s3a;
|
||||||
|
|
||||||
import static org.apache.hadoop.fs.s3a.Constants.*;
|
|
||||||
import static org.apache.hadoop.fs.s3a.S3AUtils.*;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
|
|
||||||
import com.amazonaws.ClientConfiguration;
|
|
||||||
import com.amazonaws.Protocol;
|
|
||||||
import com.amazonaws.auth.AWSCredentialsProvider;
|
|
||||||
import com.amazonaws.services.s3.AmazonS3;
|
import com.amazonaws.services.s3.AmazonS3;
|
||||||
import com.amazonaws.services.s3.AmazonS3Client;
|
|
||||||
import com.amazonaws.services.s3.S3ClientOptions;
|
|
||||||
|
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.classification.InterfaceStability;
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
|
||||||
import org.apache.hadoop.conf.Configured;
|
|
||||||
import org.apache.hadoop.util.VersionInfo;
|
|
||||||
|
|
||||||
import org.slf4j.Logger;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Factory for creation of S3 client instances to be used by {@link S3Store}.
|
* Factory for creation of {@link AmazonS3} client instances.
|
||||||
*/
|
*/
|
||||||
@InterfaceAudience.Private
|
@InterfaceAudience.Private
|
||||||
@InterfaceStability.Unstable
|
@InterfaceStability.Unstable
|
||||||
interface S3ClientFactory {
|
public interface S3ClientFactory {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Creates a new {@link AmazonS3} client. This method accepts the S3A file
|
* Creates a new {@link AmazonS3} client. This method accepts the S3A file
|
||||||
@ -57,177 +44,4 @@ interface S3ClientFactory {
|
|||||||
*/
|
*/
|
||||||
AmazonS3 createS3Client(URI name) throws IOException;
|
AmazonS3 createS3Client(URI name) throws IOException;
|
||||||
|
|
||||||
/**
|
|
||||||
* The default factory implementation, which calls the AWS SDK to configure
|
|
||||||
* and create an {@link AmazonS3Client} that communicates with the S3 service.
|
|
||||||
*/
|
|
||||||
static class DefaultS3ClientFactory extends Configured
|
|
||||||
implements S3ClientFactory {
|
|
||||||
|
|
||||||
private static final Logger LOG = S3AFileSystem.LOG;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public AmazonS3 createS3Client(URI name) throws IOException {
|
|
||||||
Configuration conf = getConf();
|
|
||||||
AWSCredentialsProvider credentials =
|
|
||||||
createAWSCredentialProviderSet(name, conf);
|
|
||||||
ClientConfiguration awsConf = new ClientConfiguration();
|
|
||||||
initConnectionSettings(conf, awsConf);
|
|
||||||
initProxySupport(conf, awsConf);
|
|
||||||
initUserAgent(conf, awsConf);
|
|
||||||
return createAmazonS3Client(conf, credentials, awsConf);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initializes all AWS SDK settings related to connection management.
|
|
||||||
*
|
|
||||||
* @param conf Hadoop configuration
|
|
||||||
* @param awsConf AWS SDK configuration
|
|
||||||
*/
|
|
||||||
private static void initConnectionSettings(Configuration conf,
|
|
||||||
ClientConfiguration awsConf) {
|
|
||||||
awsConf.setMaxConnections(intOption(conf, MAXIMUM_CONNECTIONS,
|
|
||||||
DEFAULT_MAXIMUM_CONNECTIONS, 1));
|
|
||||||
boolean secureConnections = conf.getBoolean(SECURE_CONNECTIONS,
|
|
||||||
DEFAULT_SECURE_CONNECTIONS);
|
|
||||||
awsConf.setProtocol(secureConnections ? Protocol.HTTPS : Protocol.HTTP);
|
|
||||||
awsConf.setMaxErrorRetry(intOption(conf, MAX_ERROR_RETRIES,
|
|
||||||
DEFAULT_MAX_ERROR_RETRIES, 0));
|
|
||||||
awsConf.setConnectionTimeout(intOption(conf, ESTABLISH_TIMEOUT,
|
|
||||||
DEFAULT_ESTABLISH_TIMEOUT, 0));
|
|
||||||
awsConf.setSocketTimeout(intOption(conf, SOCKET_TIMEOUT,
|
|
||||||
DEFAULT_SOCKET_TIMEOUT, 0));
|
|
||||||
int sockSendBuffer = intOption(conf, SOCKET_SEND_BUFFER,
|
|
||||||
DEFAULT_SOCKET_SEND_BUFFER, 2048);
|
|
||||||
int sockRecvBuffer = intOption(conf, SOCKET_RECV_BUFFER,
|
|
||||||
DEFAULT_SOCKET_RECV_BUFFER, 2048);
|
|
||||||
awsConf.setSocketBufferSizeHints(sockSendBuffer, sockRecvBuffer);
|
|
||||||
String signerOverride = conf.getTrimmed(SIGNING_ALGORITHM, "");
|
|
||||||
if (!signerOverride.isEmpty()) {
|
|
||||||
LOG.debug("Signer override = {}", signerOverride);
|
|
||||||
awsConf.setSignerOverride(signerOverride);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initializes AWS SDK proxy support if configured.
|
|
||||||
*
|
|
||||||
* @param conf Hadoop configuration
|
|
||||||
* @param awsConf AWS SDK configuration
|
|
||||||
* @throws IllegalArgumentException if misconfigured
|
|
||||||
*/
|
|
||||||
private static void initProxySupport(Configuration conf,
|
|
||||||
ClientConfiguration awsConf) throws IllegalArgumentException {
|
|
||||||
String proxyHost = conf.getTrimmed(PROXY_HOST, "");
|
|
||||||
int proxyPort = conf.getInt(PROXY_PORT, -1);
|
|
||||||
if (!proxyHost.isEmpty()) {
|
|
||||||
awsConf.setProxyHost(proxyHost);
|
|
||||||
if (proxyPort >= 0) {
|
|
||||||
awsConf.setProxyPort(proxyPort);
|
|
||||||
} else {
|
|
||||||
if (conf.getBoolean(SECURE_CONNECTIONS, DEFAULT_SECURE_CONNECTIONS)) {
|
|
||||||
LOG.warn("Proxy host set without port. Using HTTPS default 443");
|
|
||||||
awsConf.setProxyPort(443);
|
|
||||||
} else {
|
|
||||||
LOG.warn("Proxy host set without port. Using HTTP default 80");
|
|
||||||
awsConf.setProxyPort(80);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
String proxyUsername = conf.getTrimmed(PROXY_USERNAME);
|
|
||||||
String proxyPassword = conf.getTrimmed(PROXY_PASSWORD);
|
|
||||||
if ((proxyUsername == null) != (proxyPassword == null)) {
|
|
||||||
String msg = "Proxy error: " + PROXY_USERNAME + " or " +
|
|
||||||
PROXY_PASSWORD + " set without the other.";
|
|
||||||
LOG.error(msg);
|
|
||||||
throw new IllegalArgumentException(msg);
|
|
||||||
}
|
|
||||||
awsConf.setProxyUsername(proxyUsername);
|
|
||||||
awsConf.setProxyPassword(proxyPassword);
|
|
||||||
awsConf.setProxyDomain(conf.getTrimmed(PROXY_DOMAIN));
|
|
||||||
awsConf.setProxyWorkstation(conf.getTrimmed(PROXY_WORKSTATION));
|
|
||||||
if (LOG.isDebugEnabled()) {
|
|
||||||
LOG.debug("Using proxy server {}:{} as user {} with password {} on " +
|
|
||||||
"domain {} as workstation {}", awsConf.getProxyHost(),
|
|
||||||
awsConf.getProxyPort(),
|
|
||||||
String.valueOf(awsConf.getProxyUsername()),
|
|
||||||
awsConf.getProxyPassword(), awsConf.getProxyDomain(),
|
|
||||||
awsConf.getProxyWorkstation());
|
|
||||||
}
|
|
||||||
} else if (proxyPort >= 0) {
|
|
||||||
String msg =
|
|
||||||
"Proxy error: " + PROXY_PORT + " set without " + PROXY_HOST;
|
|
||||||
LOG.error(msg);
|
|
||||||
throw new IllegalArgumentException(msg);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initializes the User-Agent header to send in HTTP requests to the S3
|
|
||||||
* back-end. We always include the Hadoop version number. The user also
|
|
||||||
* may set an optional custom prefix to put in front of the Hadoop version
|
|
||||||
* number. The AWS SDK interally appends its own information, which seems
|
|
||||||
* to include the AWS SDK version, OS and JVM version.
|
|
||||||
*
|
|
||||||
* @param conf Hadoop configuration
|
|
||||||
* @param awsConf AWS SDK configuration
|
|
||||||
*/
|
|
||||||
private static void initUserAgent(Configuration conf,
|
|
||||||
ClientConfiguration awsConf) {
|
|
||||||
String userAgent = "Hadoop " + VersionInfo.getVersion();
|
|
||||||
String userAgentPrefix = conf.getTrimmed(USER_AGENT_PREFIX, "");
|
|
||||||
if (!userAgentPrefix.isEmpty()) {
|
|
||||||
userAgent = userAgentPrefix + ", " + userAgent;
|
|
||||||
}
|
|
||||||
LOG.debug("Using User-Agent: {}", userAgent);
|
|
||||||
awsConf.setUserAgentPrefix(userAgent);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates an {@link AmazonS3Client} from the established configuration.
|
|
||||||
*
|
|
||||||
* @param conf Hadoop configuration
|
|
||||||
* @param credentials AWS credentials
|
|
||||||
* @param awsConf AWS SDK configuration
|
|
||||||
* @return S3 client
|
|
||||||
* @throws IllegalArgumentException if misconfigured
|
|
||||||
*/
|
|
||||||
private static AmazonS3 createAmazonS3Client(Configuration conf,
|
|
||||||
AWSCredentialsProvider credentials, ClientConfiguration awsConf)
|
|
||||||
throws IllegalArgumentException {
|
|
||||||
AmazonS3 s3 = new AmazonS3Client(credentials, awsConf);
|
|
||||||
String endPoint = conf.getTrimmed(ENDPOINT, "");
|
|
||||||
if (!endPoint.isEmpty()) {
|
|
||||||
try {
|
|
||||||
s3.setEndpoint(endPoint);
|
|
||||||
} catch (IllegalArgumentException e) {
|
|
||||||
String msg = "Incorrect endpoint: " + e.getMessage();
|
|
||||||
LOG.error(msg);
|
|
||||||
throw new IllegalArgumentException(msg, e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
enablePathStyleAccessIfRequired(s3, conf);
|
|
||||||
return s3;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Enables path-style access to S3 buckets if configured. By default, the
|
|
||||||
* behavior is to use virtual hosted-style access with URIs of the form
|
|
||||||
* http://bucketname.s3.amazonaws.com. Enabling path-style access and a
|
|
||||||
* region-specific endpoint switches the behavior to use URIs of the form
|
|
||||||
* http://s3-eu-west-1.amazonaws.com/bucketname.
|
|
||||||
*
|
|
||||||
* @param s3 S3 client
|
|
||||||
* @param conf Hadoop configuration
|
|
||||||
*/
|
|
||||||
private static void enablePathStyleAccessIfRequired(AmazonS3 s3,
|
|
||||||
Configuration conf) {
|
|
||||||
final boolean pathStyleAccess = conf.getBoolean(PATH_STYLE_ACCESS, false);
|
|
||||||
if (pathStyleAccess) {
|
|
||||||
LOG.debug("Enabling path style access!");
|
|
||||||
s3.setS3ClientOptions(S3ClientOptions.builder()
|
|
||||||
.setPathStyleAccess(true)
|
|
||||||
.build());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -140,7 +140,18 @@ public enum Statistic {
|
|||||||
STREAM_WRITE_TOTAL_DATA("stream_write_total_data",
|
STREAM_WRITE_TOTAL_DATA("stream_write_total_data",
|
||||||
"Count of total data uploaded in block output"),
|
"Count of total data uploaded in block output"),
|
||||||
STREAM_WRITE_QUEUE_DURATION("stream_write_queue_duration",
|
STREAM_WRITE_QUEUE_DURATION("stream_write_queue_duration",
|
||||||
"Total queue duration of all block uploads");
|
"Total queue duration of all block uploads"),
|
||||||
|
|
||||||
|
// S3Guard stats
|
||||||
|
S3GUARD_METADATASTORE_PUT_PATH_REQUEST(
|
||||||
|
"s3guard_metadatastore_put_path_request",
|
||||||
|
"s3guard metadata store put one metadata path request"),
|
||||||
|
S3GUARD_METADATASTORE_PUT_PATH_LATENCY(
|
||||||
|
"s3guard_metadatastore_put_path_latency",
|
||||||
|
"s3guard metadata store put one metadata path lantency"),
|
||||||
|
S3GUARD_METADATASTORE_INITIALIZATION("s3guard_metadatastore_initialization",
|
||||||
|
"s3guard metadata store initialization times");
|
||||||
|
|
||||||
|
|
||||||
private static final Map<String, Statistic> SYMBOL_MAP =
|
private static final Map<String, Statistic> SYMBOL_MAP =
|
||||||
new HashMap<>(Statistic.values().length);
|
new HashMap<>(Statistic.values().length);
|
||||||
|
@ -0,0 +1,32 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Simple enum to express {true, false, don't know}.
|
||||||
|
*/
|
||||||
|
public enum Tristate {
|
||||||
|
// Do not add additional values here. Logic will assume there are exactly
|
||||||
|
// three possibilities.
|
||||||
|
TRUE, FALSE, UNKNOWN;
|
||||||
|
|
||||||
|
public static Tristate fromBool(boolean v) {
|
||||||
|
return v ? TRUE : FALSE;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,43 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a;
|
||||||
|
|
||||||
|
import com.amazonaws.services.s3.transfer.Upload;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Simple struct that contains information about a S3 upload.
|
||||||
|
*/
|
||||||
|
public class UploadInfo {
|
||||||
|
private final Upload upload;
|
||||||
|
private final long length;
|
||||||
|
|
||||||
|
public UploadInfo(Upload upload, long length) {
|
||||||
|
this.upload = upload;
|
||||||
|
this.length = length;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Upload getUpload() {
|
||||||
|
return upload;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getLength() {
|
||||||
|
return length;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,142 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a.s3guard;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.NoSuchElementException;
|
||||||
|
import java.util.Queue;
|
||||||
|
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
|
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.RemoteIterator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@code DescendantsIterator} is a {@link RemoteIterator} that implements
|
||||||
|
* pre-ordering breadth-first traversal (BFS) of a path and all of its
|
||||||
|
* descendants recursively. After visiting each path, that path's direct
|
||||||
|
* children are discovered by calling {@link MetadataStore#listChildren(Path)}.
|
||||||
|
* Each iteration returns the next direct child, and if that child is a
|
||||||
|
* directory, also pushes it onto a queue to discover its children later.
|
||||||
|
*
|
||||||
|
* For example, assume the consistent store contains metadata representing this
|
||||||
|
* file system structure:
|
||||||
|
*
|
||||||
|
* <pre>
|
||||||
|
* /dir1
|
||||||
|
* |-- dir2
|
||||||
|
* | |-- file1
|
||||||
|
* | `-- file2
|
||||||
|
* `-- dir3
|
||||||
|
* |-- dir4
|
||||||
|
* | `-- file3
|
||||||
|
* |-- dir5
|
||||||
|
* | `-- file4
|
||||||
|
* `-- dir6
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* Consider this code sample:
|
||||||
|
* <pre>
|
||||||
|
* final PathMetadata dir1 = get(new Path("/dir1"));
|
||||||
|
* for (DescendantsIterator descendants = new DescendantsIterator(dir1);
|
||||||
|
* descendants.hasNext(); ) {
|
||||||
|
* final FileStatus status = descendants.next().getFileStatus();
|
||||||
|
* System.out.printf("%s %s%n", status.isDirectory() ? 'D' : 'F',
|
||||||
|
* status.getPath());
|
||||||
|
* }
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* The output is:
|
||||||
|
* <pre>
|
||||||
|
* D /dir1
|
||||||
|
* D /dir1/dir2
|
||||||
|
* D /dir1/dir3
|
||||||
|
* F /dir1/dir2/file1
|
||||||
|
* F /dir1/dir2/file2
|
||||||
|
* D /dir1/dir3/dir4
|
||||||
|
* D /dir1/dir3/dir5
|
||||||
|
* F /dir1/dir3/dir4/file3
|
||||||
|
* F /dir1/dir3/dir5/file4
|
||||||
|
* D /dir1/dir3/dir6
|
||||||
|
* </pre>
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
@InterfaceStability.Evolving
|
||||||
|
public class DescendantsIterator implements RemoteIterator<FileStatus> {
|
||||||
|
|
||||||
|
private final MetadataStore metadataStore;
|
||||||
|
private final Queue<PathMetadata> queue = new LinkedList<>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new {@code DescendantsIterator}.
|
||||||
|
*
|
||||||
|
* @param ms the associated {@link MetadataStore}
|
||||||
|
* @param meta base path for descendants iteration, which will be the first
|
||||||
|
* returned during iteration (except root). Null makes empty iterator.
|
||||||
|
* @throws IOException if errors happen during metadata store listing
|
||||||
|
*/
|
||||||
|
public DescendantsIterator(MetadataStore ms, PathMetadata meta)
|
||||||
|
throws IOException {
|
||||||
|
Preconditions.checkNotNull(ms);
|
||||||
|
this.metadataStore = ms;
|
||||||
|
|
||||||
|
if (meta != null) {
|
||||||
|
final Path path = meta.getFileStatus().getPath();
|
||||||
|
if (path.isRoot()) {
|
||||||
|
DirListingMetadata rootListing = ms.listChildren(path);
|
||||||
|
if (rootListing != null) {
|
||||||
|
rootListing = rootListing.withoutTombstones();
|
||||||
|
queue.addAll(rootListing.getListing());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
queue.add(meta);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasNext() throws IOException {
|
||||||
|
return !queue.isEmpty();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FileStatus next() throws IOException {
|
||||||
|
if (!hasNext()) {
|
||||||
|
throw new NoSuchElementException("No more descendants.");
|
||||||
|
}
|
||||||
|
PathMetadata next;
|
||||||
|
next = queue.poll();
|
||||||
|
if (next.getFileStatus().isDirectory()) {
|
||||||
|
final Path path = next.getFileStatus().getPath();
|
||||||
|
DirListingMetadata meta = metadataStore.listChildren(path);
|
||||||
|
if (meta != null) {
|
||||||
|
Collection<PathMetadata> more = meta.withoutTombstones().getListing();
|
||||||
|
if (!more.isEmpty()) {
|
||||||
|
queue.addAll(more);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return next.getFileStatus();
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,322 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a.s3guard;
|
||||||
|
|
||||||
|
import java.net.URI;
|
||||||
|
import java.net.URISyntaxException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
|
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.s3a.Tristate;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@code DirListingMetadata} models a directory listing stored in a
|
||||||
|
* {@link MetadataStore}. Instances of this class are mutable and thread-safe.
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
@InterfaceStability.Evolving
|
||||||
|
public class DirListingMetadata {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convenience parameter for passing into constructor.
|
||||||
|
*/
|
||||||
|
public static final Collection<PathMetadata> EMPTY_DIR =
|
||||||
|
Collections.emptyList();
|
||||||
|
|
||||||
|
private final Path path;
|
||||||
|
|
||||||
|
/** Using a map for fast find / remove with large directories. */
|
||||||
|
private Map<Path, PathMetadata> listMap = new ConcurrentHashMap<>();
|
||||||
|
|
||||||
|
private boolean isAuthoritative;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a directory listing metadata container.
|
||||||
|
*
|
||||||
|
* @param path Path of the directory. If this path has a host component, then
|
||||||
|
* all paths added later via {@link #put(FileStatus)} must also have
|
||||||
|
* the same host.
|
||||||
|
* @param listing Entries in the directory.
|
||||||
|
* @param isAuthoritative true iff listing is the full contents of the
|
||||||
|
* directory, and the calling client reports that this may be cached as
|
||||||
|
* the full and authoritative listing of all files in the directory.
|
||||||
|
*/
|
||||||
|
public DirListingMetadata(Path path, Collection<PathMetadata> listing,
|
||||||
|
boolean isAuthoritative) {
|
||||||
|
|
||||||
|
checkPathAbsolute(path);
|
||||||
|
this.path = path;
|
||||||
|
|
||||||
|
if (listing != null) {
|
||||||
|
for (PathMetadata entry : listing) {
|
||||||
|
Path childPath = entry.getFileStatus().getPath();
|
||||||
|
checkChildPath(childPath);
|
||||||
|
listMap.put(childPath, entry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
this.isAuthoritative = isAuthoritative;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Copy constructor.
|
||||||
|
* @param d the existing {@link DirListingMetadata} object.
|
||||||
|
*/
|
||||||
|
public DirListingMetadata(DirListingMetadata d) {
|
||||||
|
path = d.path;
|
||||||
|
isAuthoritative = d.isAuthoritative;
|
||||||
|
listMap = new ConcurrentHashMap<>(d.listMap);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return {@code Path} of the directory that contains this listing.
|
||||||
|
*/
|
||||||
|
public Path getPath() {
|
||||||
|
return path;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return entries in the directory
|
||||||
|
*/
|
||||||
|
public Collection<PathMetadata> getListing() {
|
||||||
|
return Collections.unmodifiableCollection(listMap.values());
|
||||||
|
}
|
||||||
|
|
||||||
|
public Set<Path> listTombstones() {
|
||||||
|
Set<Path> tombstones = new HashSet<>();
|
||||||
|
for (PathMetadata meta : listMap.values()) {
|
||||||
|
if (meta.isDeleted()) {
|
||||||
|
tombstones.add(meta.getFileStatus().getPath());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return tombstones;
|
||||||
|
}
|
||||||
|
|
||||||
|
public DirListingMetadata withoutTombstones() {
|
||||||
|
Collection<PathMetadata> filteredList = new ArrayList<>();
|
||||||
|
for (PathMetadata meta : listMap.values()) {
|
||||||
|
if (!meta.isDeleted()) {
|
||||||
|
filteredList.add(meta);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new DirListingMetadata(path, filteredList, isAuthoritative);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return number of entries tracked. This is not the same as the number
|
||||||
|
* of entries in the actual directory unless {@link #isAuthoritative()} is
|
||||||
|
* true.
|
||||||
|
*/
|
||||||
|
public int numEntries() {
|
||||||
|
return listMap.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return true iff this directory listing is full and authoritative within
|
||||||
|
* the scope of the {@code MetadataStore} that returned it.
|
||||||
|
*/
|
||||||
|
public boolean isAuthoritative() {
|
||||||
|
return isAuthoritative;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Is the underlying directory known to be empty?
|
||||||
|
* @return FALSE if directory is known to have a child entry, TRUE if
|
||||||
|
* directory is known to be empty, UNKNOWN otherwise.
|
||||||
|
*/
|
||||||
|
public Tristate isEmpty() {
|
||||||
|
if (getListing().isEmpty()) {
|
||||||
|
if (isAuthoritative()) {
|
||||||
|
return Tristate.TRUE;
|
||||||
|
} else {
|
||||||
|
// This listing is empty, but may not be full list of underlying dir.
|
||||||
|
return Tristate.UNKNOWN;
|
||||||
|
}
|
||||||
|
} else { // not empty listing
|
||||||
|
// There exists at least one child, dir not empty.
|
||||||
|
return Tristate.FALSE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Marks this directory listing as full and authoritative.
|
||||||
|
* @param authoritative see {@link #isAuthoritative()}.
|
||||||
|
*/
|
||||||
|
public void setAuthoritative(boolean authoritative) {
|
||||||
|
this.isAuthoritative = authoritative;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Lookup entry within this directory listing. This may return null if the
|
||||||
|
* {@code MetadataStore} only tracks a partial set of the directory entries.
|
||||||
|
* In the case where {@link #isAuthoritative()} is true, however, this
|
||||||
|
* function returns null iff the directory is known not to contain the listing
|
||||||
|
* at given path (within the scope of the {@code MetadataStore} that returned
|
||||||
|
* it).
|
||||||
|
*
|
||||||
|
* @param childPath path of entry to look for.
|
||||||
|
* @return entry, or null if it is not present or not being tracked.
|
||||||
|
*/
|
||||||
|
public PathMetadata get(Path childPath) {
|
||||||
|
checkChildPath(childPath);
|
||||||
|
return listMap.get(childPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Replace an entry with a tombstone.
|
||||||
|
* @param childPath path of entry to replace.
|
||||||
|
*/
|
||||||
|
public void markDeleted(Path childPath) {
|
||||||
|
checkChildPath(childPath);
|
||||||
|
listMap.put(childPath, PathMetadata.tombstone(childPath));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Remove entry from this directory.
|
||||||
|
*
|
||||||
|
* @param childPath path of entry to remove.
|
||||||
|
*/
|
||||||
|
public void remove(Path childPath) {
|
||||||
|
checkChildPath(childPath);
|
||||||
|
listMap.remove(childPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add an entry to the directory listing. If this listing already contains a
|
||||||
|
* {@code FileStatus} with the same path, it will be replaced.
|
||||||
|
*
|
||||||
|
* @param childFileStatus entry to add to this directory listing.
|
||||||
|
* @return true if the status was added or replaced with a new value. False
|
||||||
|
* if the same FileStatus value was already present.
|
||||||
|
*/
|
||||||
|
public boolean put(FileStatus childFileStatus) {
|
||||||
|
Preconditions.checkNotNull(childFileStatus,
|
||||||
|
"childFileStatus must be non-null");
|
||||||
|
Path childPath = childStatusToPathKey(childFileStatus);
|
||||||
|
PathMetadata newValue = new PathMetadata(childFileStatus);
|
||||||
|
PathMetadata oldValue = listMap.put(childPath, newValue);
|
||||||
|
return oldValue == null || !oldValue.equals(newValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "DirListingMetadata{" +
|
||||||
|
"path=" + path +
|
||||||
|
", listMap=" + listMap +
|
||||||
|
", isAuthoritative=" + isAuthoritative +
|
||||||
|
'}';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Log contents to supplied StringBuilder in a pretty fashion.
|
||||||
|
* @param sb target StringBuilder
|
||||||
|
*/
|
||||||
|
public void prettyPrint(StringBuilder sb) {
|
||||||
|
sb.append(String.format("DirMeta %-20s %-18s",
|
||||||
|
path.toString(),
|
||||||
|
isAuthoritative ? "Authoritative" : "Not Authoritative"));
|
||||||
|
for (Map.Entry<Path, PathMetadata> entry : listMap.entrySet()) {
|
||||||
|
sb.append("\n key: ").append(entry.getKey()).append(": ");
|
||||||
|
entry.getValue().prettyPrint(sb);
|
||||||
|
}
|
||||||
|
sb.append("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
public String prettyPrint() {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
prettyPrint(sb);
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks that child path is valid.
|
||||||
|
* @param childPath path to check.
|
||||||
|
*/
|
||||||
|
private void checkChildPath(Path childPath) {
|
||||||
|
checkPathAbsolute(childPath);
|
||||||
|
|
||||||
|
// If this dir's path has host (and thus scheme), so must its children
|
||||||
|
URI parentUri = path.toUri();
|
||||||
|
if (parentUri.getHost() != null) {
|
||||||
|
URI childUri = childPath.toUri();
|
||||||
|
Preconditions.checkNotNull(childUri.getHost(), "Expected non-null URI " +
|
||||||
|
"host: %s", childUri);
|
||||||
|
Preconditions.checkArgument(
|
||||||
|
childUri.getHost().equals(parentUri.getHost()),
|
||||||
|
"childUri %s and parentUri %s must have the same host",
|
||||||
|
childUri, parentUri);
|
||||||
|
Preconditions.checkNotNull(childUri.getScheme(), "No scheme in path %s",
|
||||||
|
childUri);
|
||||||
|
}
|
||||||
|
Preconditions.checkArgument(!childPath.isRoot(),
|
||||||
|
"childPath cannot be the root path: %s", childPath);
|
||||||
|
Preconditions.checkArgument(childPath.getParent().equals(path),
|
||||||
|
"childPath %s must be a child of %s", childPath, path);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* For Paths that are handed in directly, we assert they are in consistent
|
||||||
|
* format with checkPath(). For paths that are supplied embedded in
|
||||||
|
* FileStatus, we attempt to fill in missing scheme and host, when this
|
||||||
|
* DirListingMetadata is associated with one.
|
||||||
|
*
|
||||||
|
* @return Path suitable for consistent hashtable lookups
|
||||||
|
* @throws NullPointerException null status argument
|
||||||
|
* @throws IllegalArgumentException bad status values or failure to
|
||||||
|
* create a URI.
|
||||||
|
*/
|
||||||
|
private Path childStatusToPathKey(FileStatus status) {
|
||||||
|
Path p = status.getPath();
|
||||||
|
Preconditions.checkNotNull(p, "Child status' path cannot be null");
|
||||||
|
Preconditions.checkArgument(!p.isRoot(),
|
||||||
|
"childPath cannot be the root path: %s", p);
|
||||||
|
Preconditions.checkArgument(p.getParent().equals(path),
|
||||||
|
"childPath %s must be a child of %s", p, path);
|
||||||
|
URI uri = p.toUri();
|
||||||
|
URI parentUri = path.toUri();
|
||||||
|
// If FileStatus' path is missing host, but should have one, add it.
|
||||||
|
if (uri.getHost() == null && parentUri.getHost() != null) {
|
||||||
|
try {
|
||||||
|
return new Path(new URI(parentUri.getScheme(), parentUri.getHost(),
|
||||||
|
uri.getPath(), uri.getFragment()));
|
||||||
|
} catch (URISyntaxException e) {
|
||||||
|
throw new IllegalArgumentException("FileStatus path invalid with" +
|
||||||
|
" added " + parentUri.getScheme() + "://" + parentUri.getHost() +
|
||||||
|
" added", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkPathAbsolute(Path p) {
|
||||||
|
Preconditions.checkNotNull(p, "path must be non-null");
|
||||||
|
Preconditions.checkArgument(p.isAbsolute(), "path must be absolute: %s", p);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,132 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a.s3guard;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import com.amazonaws.ClientConfiguration;
|
||||||
|
import com.amazonaws.auth.AWSCredentialsProvider;
|
||||||
|
import com.amazonaws.regions.Regions;
|
||||||
|
import com.amazonaws.services.dynamodbv2.AmazonDynamoDB;
|
||||||
|
import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClientBuilder;
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
|
import org.apache.commons.lang.StringUtils;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.conf.Configurable;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.conf.Configured;
|
||||||
|
import org.apache.hadoop.fs.s3a.DefaultS3ClientFactory;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_REGION_KEY;
|
||||||
|
import static org.apache.hadoop.fs.s3a.S3AUtils.createAWSCredentialProviderSet;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Interface to create a DynamoDB client.
|
||||||
|
*
|
||||||
|
* Implementation should be configured for setting and getting configuration.
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
public interface DynamoDBClientFactory extends Configurable {
|
||||||
|
Logger LOG = LoggerFactory.getLogger(DynamoDBClientFactory.class);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a DynamoDB client object from configuration.
|
||||||
|
*
|
||||||
|
* The DynamoDB client to create does not have to relate to any S3 buckets.
|
||||||
|
* All information needed to create a DynamoDB client is from the hadoop
|
||||||
|
* configuration. Specially, if the region is not configured, it will use the
|
||||||
|
* provided region parameter. If region is neither configured nor provided,
|
||||||
|
* it will indicate an error.
|
||||||
|
*
|
||||||
|
* @param defaultRegion the default region of the AmazonDynamoDB client
|
||||||
|
* @return a new DynamoDB client
|
||||||
|
* @throws IOException if any IO error happens
|
||||||
|
*/
|
||||||
|
AmazonDynamoDB createDynamoDBClient(String defaultRegion) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The default implementation for creating an AmazonDynamoDB.
|
||||||
|
*/
|
||||||
|
class DefaultDynamoDBClientFactory extends Configured
|
||||||
|
implements DynamoDBClientFactory {
|
||||||
|
@Override
|
||||||
|
public AmazonDynamoDB createDynamoDBClient(String defaultRegion)
|
||||||
|
throws IOException {
|
||||||
|
Preconditions.checkNotNull(getConf(),
|
||||||
|
"Should have been configured before usage");
|
||||||
|
|
||||||
|
final Configuration conf = getConf();
|
||||||
|
final AWSCredentialsProvider credentials =
|
||||||
|
createAWSCredentialProviderSet(null, conf);
|
||||||
|
final ClientConfiguration awsConf =
|
||||||
|
DefaultS3ClientFactory.createAwsConf(conf);
|
||||||
|
|
||||||
|
final String region = getRegion(conf, defaultRegion);
|
||||||
|
LOG.debug("Creating DynamoDB client in region {}", region);
|
||||||
|
|
||||||
|
return AmazonDynamoDBClientBuilder.standard()
|
||||||
|
.withCredentials(credentials)
|
||||||
|
.withClientConfiguration(awsConf)
|
||||||
|
.withRegion(region)
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper method to get and validate the AWS region for DynamoDBClient.
|
||||||
|
*
|
||||||
|
* @param conf configuration
|
||||||
|
* @param defaultRegion the default region
|
||||||
|
* @return configured region or else the provided default region
|
||||||
|
* @throws IOException if the region is not valid
|
||||||
|
*/
|
||||||
|
static String getRegion(Configuration conf, String defaultRegion)
|
||||||
|
throws IOException {
|
||||||
|
String region = conf.getTrimmed(S3GUARD_DDB_REGION_KEY);
|
||||||
|
if (StringUtils.isEmpty(region)) {
|
||||||
|
region = defaultRegion;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
Regions.fromName(region);
|
||||||
|
} catch (IllegalArgumentException | NullPointerException e) {
|
||||||
|
throw new IOException("Invalid region specified: " + region + "; " +
|
||||||
|
"Region can be configured with " + S3GUARD_DDB_REGION_KEY + ": " +
|
||||||
|
validRegionsString());
|
||||||
|
}
|
||||||
|
return region;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String validRegionsString() {
|
||||||
|
final String delimiter = ", ";
|
||||||
|
Regions[] regions = Regions.values();
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
for (int i = 0; i < regions.length; i++) {
|
||||||
|
if (i > 0) {
|
||||||
|
sb.append(delimiter);
|
||||||
|
}
|
||||||
|
sb.append(regions[i].getName());
|
||||||
|
}
|
||||||
|
return sb.toString();
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,435 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a.s3guard;
|
||||||
|
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.s3a.Tristate;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.URI;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is a local, in-memory, implementation of MetadataStore.
|
||||||
|
* This is <i>not</i> a coherent cache across processes. It is only
|
||||||
|
* locally-coherent.
|
||||||
|
*
|
||||||
|
* The purpose of this is for unit and integration testing.
|
||||||
|
* It could also be used to accelerate local-only operations where only one
|
||||||
|
* process is operating on a given object store, or multiple processes are
|
||||||
|
* accessing a read-only storage bucket.
|
||||||
|
*
|
||||||
|
* This MetadataStore does not enforce filesystem rules such as disallowing
|
||||||
|
* non-recursive removal of non-empty directories. It is assumed the caller
|
||||||
|
* already has to perform these sorts of checks.
|
||||||
|
*/
|
||||||
|
public class LocalMetadataStore implements MetadataStore {
|
||||||
|
|
||||||
|
public static final Logger LOG = LoggerFactory.getLogger(MetadataStore.class);
|
||||||
|
// TODO HADOOP-13649: use time instead of capacity for eviction.
|
||||||
|
public static final int DEFAULT_MAX_RECORDS = 128;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Maximum number of records.
|
||||||
|
*/
|
||||||
|
public static final String CONF_MAX_RECORDS =
|
||||||
|
"fs.metadatastore.local.max_records";
|
||||||
|
|
||||||
|
/** Contains directories and files. */
|
||||||
|
private LruHashMap<Path, PathMetadata> fileHash;
|
||||||
|
|
||||||
|
/** Contains directory listings. */
|
||||||
|
private LruHashMap<Path, DirListingMetadata> dirHash;
|
||||||
|
|
||||||
|
private FileSystem fs;
|
||||||
|
/* Null iff this FS does not have an associated URI host. */
|
||||||
|
private String uriHost;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void initialize(FileSystem fileSystem) throws IOException {
|
||||||
|
Preconditions.checkNotNull(fileSystem);
|
||||||
|
fs = fileSystem;
|
||||||
|
URI fsURI = fs.getUri();
|
||||||
|
uriHost = fsURI.getHost();
|
||||||
|
if (uriHost != null && uriHost.equals("")) {
|
||||||
|
uriHost = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
initialize(fs.getConf());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void initialize(Configuration conf) throws IOException {
|
||||||
|
Preconditions.checkNotNull(conf);
|
||||||
|
int maxRecords = conf.getInt(CONF_MAX_RECORDS, DEFAULT_MAX_RECORDS);
|
||||||
|
if (maxRecords < 4) {
|
||||||
|
maxRecords = 4;
|
||||||
|
}
|
||||||
|
// Start w/ less than max capacity. Space / time trade off.
|
||||||
|
fileHash = new LruHashMap<>(maxRecords/2, maxRecords);
|
||||||
|
dirHash = new LruHashMap<>(maxRecords/4, maxRecords);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
final StringBuilder sb = new StringBuilder(
|
||||||
|
"LocalMetadataStore{");
|
||||||
|
sb.append(", uriHost='").append(uriHost).append('\'');
|
||||||
|
sb.append('}');
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void delete(Path p) throws IOException {
|
||||||
|
doDelete(p, false, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void forgetMetadata(Path p) throws IOException {
|
||||||
|
doDelete(p, false, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void deleteSubtree(Path path) throws IOException {
|
||||||
|
doDelete(path, true, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
private synchronized void doDelete(Path p, boolean recursive, boolean
|
||||||
|
tombstone) {
|
||||||
|
|
||||||
|
Path path = standardize(p);
|
||||||
|
|
||||||
|
// Delete entry from file cache, then from cached parent directory, if any
|
||||||
|
|
||||||
|
deleteHashEntries(path, tombstone);
|
||||||
|
|
||||||
|
if (recursive) {
|
||||||
|
// Remove all entries that have this dir as path prefix.
|
||||||
|
deleteHashByAncestor(path, dirHash, tombstone);
|
||||||
|
deleteHashByAncestor(path, fileHash, tombstone);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized PathMetadata get(Path p) throws IOException {
|
||||||
|
return get(p, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PathMetadata get(Path p, boolean wantEmptyDirectoryFlag)
|
||||||
|
throws IOException {
|
||||||
|
Path path = standardize(p);
|
||||||
|
synchronized (this) {
|
||||||
|
PathMetadata m = fileHash.mruGet(path);
|
||||||
|
|
||||||
|
if (wantEmptyDirectoryFlag && m != null &&
|
||||||
|
m.getFileStatus().isDirectory()) {
|
||||||
|
m.setIsEmptyDirectory(isEmptyDirectory(p));
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG.debug("get({}) -> {}", path, m == null ? "null" : m.prettyPrint());
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determine if directory is empty.
|
||||||
|
* Call with lock held.
|
||||||
|
* @param p a Path, already filtered through standardize()
|
||||||
|
* @return TRUE / FALSE if known empty / not-empty, UNKNOWN otherwise.
|
||||||
|
*/
|
||||||
|
private Tristate isEmptyDirectory(Path p) {
|
||||||
|
DirListingMetadata dirMeta = dirHash.get(p);
|
||||||
|
return dirMeta.withoutTombstones().isEmpty();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized DirListingMetadata listChildren(Path p) throws
|
||||||
|
IOException {
|
||||||
|
Path path = standardize(p);
|
||||||
|
DirListingMetadata listing = dirHash.mruGet(path);
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("listChildren({}) -> {}", path,
|
||||||
|
listing == null ? "null" : listing.prettyPrint());
|
||||||
|
}
|
||||||
|
// Make a copy so callers can mutate without affecting our state
|
||||||
|
return listing == null ? null : new DirListingMetadata(listing);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void move(Collection<Path> pathsToDelete,
|
||||||
|
Collection<PathMetadata> pathsToCreate) throws IOException {
|
||||||
|
|
||||||
|
Preconditions.checkNotNull(pathsToDelete, "pathsToDelete is null");
|
||||||
|
Preconditions.checkNotNull(pathsToCreate, "pathsToCreate is null");
|
||||||
|
Preconditions.checkArgument(pathsToDelete.size() == pathsToCreate.size(),
|
||||||
|
"Must supply same number of paths to delete/create.");
|
||||||
|
|
||||||
|
// I feel dirty for using reentrant lock. :-|
|
||||||
|
synchronized (this) {
|
||||||
|
|
||||||
|
// 1. Delete pathsToDelete
|
||||||
|
for (Path meta : pathsToDelete) {
|
||||||
|
LOG.debug("move: deleting metadata {}", meta);
|
||||||
|
delete(meta);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 2. Create new destination path metadata
|
||||||
|
for (PathMetadata meta : pathsToCreate) {
|
||||||
|
LOG.debug("move: adding metadata {}", meta);
|
||||||
|
put(meta);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. We now know full contents of all dirs in destination subtree
|
||||||
|
for (PathMetadata meta : pathsToCreate) {
|
||||||
|
FileStatus status = meta.getFileStatus();
|
||||||
|
if (status == null || status.isDirectory()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
DirListingMetadata dir = listChildren(status.getPath());
|
||||||
|
if (dir != null) { // could be evicted already
|
||||||
|
dir.setAuthoritative(true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void put(PathMetadata meta) throws IOException {
|
||||||
|
|
||||||
|
Preconditions.checkNotNull(meta);
|
||||||
|
FileStatus status = meta.getFileStatus();
|
||||||
|
Path path = standardize(status.getPath());
|
||||||
|
synchronized (this) {
|
||||||
|
|
||||||
|
/* Add entry for this file. */
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("put {} -> {}", path, meta.prettyPrint());
|
||||||
|
}
|
||||||
|
fileHash.put(path, meta);
|
||||||
|
|
||||||
|
/* Directory case:
|
||||||
|
* We also make sure we have an entry in the dirHash, so subsequent
|
||||||
|
* listStatus(path) at least see the directory.
|
||||||
|
*
|
||||||
|
* If we had a boolean flag argument "isNew", we would know whether this
|
||||||
|
* is an existing directory the client discovered via getFileStatus(),
|
||||||
|
* or if it is a newly-created directory. In the latter case, we would
|
||||||
|
* be able to mark the directory as authoritative (fully-cached),
|
||||||
|
* saving round trips to underlying store for subsequent listStatus()
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (status.isDirectory()) {
|
||||||
|
DirListingMetadata dir = dirHash.mruGet(path);
|
||||||
|
if (dir == null) {
|
||||||
|
dirHash.put(path, new DirListingMetadata(path, DirListingMetadata
|
||||||
|
.EMPTY_DIR, false));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Update cached parent dir. */
|
||||||
|
Path parentPath = path.getParent();
|
||||||
|
if (parentPath != null) {
|
||||||
|
DirListingMetadata parent = dirHash.mruGet(parentPath);
|
||||||
|
if (parent == null) {
|
||||||
|
/* Track this new file's listing in parent. Parent is not
|
||||||
|
* authoritative, since there may be other items in it we don't know
|
||||||
|
* about. */
|
||||||
|
parent = new DirListingMetadata(parentPath,
|
||||||
|
DirListingMetadata.EMPTY_DIR, false);
|
||||||
|
dirHash.put(parentPath, parent);
|
||||||
|
}
|
||||||
|
parent.put(status);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void put(DirListingMetadata meta) throws IOException {
|
||||||
|
if (LOG.isDebugEnabled()) {
|
||||||
|
LOG.debug("put dirMeta {}", meta.prettyPrint());
|
||||||
|
}
|
||||||
|
dirHash.put(standardize(meta.getPath()), meta);
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized void put(Collection<PathMetadata> metas) throws
|
||||||
|
IOException {
|
||||||
|
for (PathMetadata meta : metas) {
|
||||||
|
put(meta);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void destroy() throws IOException {
|
||||||
|
if (dirHash != null) {
|
||||||
|
dirHash.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void prune(long modTime) throws IOException {
|
||||||
|
Iterator<Map.Entry<Path, PathMetadata>> files =
|
||||||
|
fileHash.entrySet().iterator();
|
||||||
|
while (files.hasNext()) {
|
||||||
|
Map.Entry<Path, PathMetadata> entry = files.next();
|
||||||
|
if (expired(entry.getValue().getFileStatus(), modTime)) {
|
||||||
|
files.remove();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Iterator<Map.Entry<Path, DirListingMetadata>> dirs =
|
||||||
|
dirHash.entrySet().iterator();
|
||||||
|
while (dirs.hasNext()) {
|
||||||
|
Map.Entry<Path, DirListingMetadata> entry = dirs.next();
|
||||||
|
Path path = entry.getKey();
|
||||||
|
DirListingMetadata metadata = entry.getValue();
|
||||||
|
Collection<PathMetadata> oldChildren = metadata.getListing();
|
||||||
|
Collection<PathMetadata> newChildren = new LinkedList<>();
|
||||||
|
|
||||||
|
for (PathMetadata child : oldChildren) {
|
||||||
|
FileStatus status = child.getFileStatus();
|
||||||
|
if (!expired(status, modTime)) {
|
||||||
|
newChildren.add(child);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (newChildren.size() != oldChildren.size()) {
|
||||||
|
dirHash.put(path, new DirListingMetadata(path, newChildren, false));
|
||||||
|
if (!path.isRoot()) {
|
||||||
|
DirListingMetadata parent = dirHash.get(path.getParent());
|
||||||
|
if (parent != null) {
|
||||||
|
parent.setAuthoritative(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean expired(FileStatus status, long expiry) {
|
||||||
|
// Note: S3 doesn't track modification time on directories, so for
|
||||||
|
// consistency with the DynamoDB implementation we ignore that here
|
||||||
|
return status.getModificationTime() < expiry && !status.isDirectory();
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
static <T> void deleteHashByAncestor(Path ancestor, Map<Path, T> hash,
|
||||||
|
boolean tombstone) {
|
||||||
|
for (Iterator<Map.Entry<Path, T>> it = hash.entrySet().iterator();
|
||||||
|
it.hasNext();) {
|
||||||
|
Map.Entry<Path, T> entry = it.next();
|
||||||
|
Path f = entry.getKey();
|
||||||
|
T meta = entry.getValue();
|
||||||
|
if (isAncestorOf(ancestor, f)) {
|
||||||
|
if (tombstone) {
|
||||||
|
if (meta instanceof PathMetadata) {
|
||||||
|
entry.setValue((T) PathMetadata.tombstone(f));
|
||||||
|
} else if (meta instanceof DirListingMetadata) {
|
||||||
|
it.remove();
|
||||||
|
} else {
|
||||||
|
throw new IllegalStateException("Unknown type in hash");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
it.remove();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return true iff 'ancestor' is ancestor dir in path 'f'.
|
||||||
|
* All paths here are absolute. Dir does not count as its own ancestor.
|
||||||
|
*/
|
||||||
|
private static boolean isAncestorOf(Path ancestor, Path f) {
|
||||||
|
String aStr = ancestor.toString();
|
||||||
|
if (!ancestor.isRoot()) {
|
||||||
|
aStr += "/";
|
||||||
|
}
|
||||||
|
String fStr = f.toString();
|
||||||
|
return (fStr.startsWith(aStr));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update fileHash and dirHash to reflect deletion of file 'f'. Call with
|
||||||
|
* lock held.
|
||||||
|
*/
|
||||||
|
private void deleteHashEntries(Path path, boolean tombstone) {
|
||||||
|
|
||||||
|
// Remove target file/dir
|
||||||
|
LOG.debug("delete file entry for {}", path);
|
||||||
|
if (tombstone) {
|
||||||
|
fileHash.put(path, PathMetadata.tombstone(path));
|
||||||
|
} else {
|
||||||
|
fileHash.remove(path);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update this and parent dir listing, if any
|
||||||
|
|
||||||
|
/* If this path is a dir, remove its listing */
|
||||||
|
LOG.debug("removing listing of {}", path);
|
||||||
|
|
||||||
|
dirHash.remove(path);
|
||||||
|
|
||||||
|
/* Remove this path from parent's dir listing */
|
||||||
|
Path parent = path.getParent();
|
||||||
|
if (parent != null) {
|
||||||
|
DirListingMetadata dir = dirHash.get(parent);
|
||||||
|
if (dir != null) {
|
||||||
|
LOG.debug("removing parent's entry for {} ", path);
|
||||||
|
if (tombstone) {
|
||||||
|
dir.markDeleted(path);
|
||||||
|
} else {
|
||||||
|
dir.remove(path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return a "standardized" version of a path so we always have a consistent
|
||||||
|
* hash value. Also asserts the path is absolute, and contains host
|
||||||
|
* component.
|
||||||
|
* @param p input Path
|
||||||
|
* @return standardized version of Path, suitable for hash key
|
||||||
|
*/
|
||||||
|
private Path standardize(Path p) {
|
||||||
|
Preconditions.checkArgument(p.isAbsolute(), "Path must be absolute");
|
||||||
|
URI uri = p.toUri();
|
||||||
|
if (uriHost != null) {
|
||||||
|
Preconditions.checkArgument(!isEmpty(uri.getHost()));
|
||||||
|
}
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isEmpty(String s) {
|
||||||
|
return (s == null || s.isEmpty());
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,50 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.fs.s3a.s3guard;
|
||||||
|
|
||||||
|
import java.util.LinkedHashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* LinkedHashMap that implements a maximum size and LRU eviction policy.
|
||||||
|
*/
|
||||||
|
public class LruHashMap<K, V> extends LinkedHashMap<K, V> {
|
||||||
|
private final int maxSize;
|
||||||
|
public LruHashMap(int initialCapacity, int maxSize) {
|
||||||
|
super(initialCapacity);
|
||||||
|
this.maxSize = maxSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected boolean removeEldestEntry(Map.Entry<K, V> eldest) {
|
||||||
|
return size() > maxSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get() plus side-effect of making the element Most Recently Used.
|
||||||
|
* @param key lookup key
|
||||||
|
* @return value
|
||||||
|
*/
|
||||||
|
|
||||||
|
public V mruGet(K key) {
|
||||||
|
V val = remove(key);
|
||||||
|
if (val != null) {
|
||||||
|
put(key, val);
|
||||||
|
}
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,221 @@
|
|||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a.s3guard;
|
||||||
|
|
||||||
|
import java.io.Closeable;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Collection;
|
||||||
|
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@code MetadataStore} defines the set of operations that any metadata store
|
||||||
|
* implementation must provide. Note that all {@link Path} objects provided
|
||||||
|
* to methods must be absolute, not relative paths.
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
@InterfaceStability.Evolving
|
||||||
|
public interface MetadataStore extends Closeable {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs one-time initialization of the metadata store.
|
||||||
|
*
|
||||||
|
* @param fs {@code FileSystem} associated with the MetadataStore
|
||||||
|
* @throws IOException if there is an error
|
||||||
|
*/
|
||||||
|
void initialize(FileSystem fs) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Performs one-time initialization of the metadata store via configuration.
|
||||||
|
* @see #initialize(FileSystem)
|
||||||
|
* @param conf Configuration.
|
||||||
|
* @throws IOException if there is an error
|
||||||
|
*/
|
||||||
|
void initialize(Configuration conf) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Deletes exactly one path, leaving a tombstone to prevent lingering,
|
||||||
|
* inconsistent copies of it from being listed.
|
||||||
|
*
|
||||||
|
* @param path the path to delete
|
||||||
|
* @throws IOException if there is an error
|
||||||
|
*/
|
||||||
|
void delete(Path path) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Removes the record of exactly one path. Does not leave a tombstone (see
|
||||||
|
* {@link MetadataStore#delete(Path)}. It is currently intended for testing
|
||||||
|
* only, and a need to use it as part of normal FileSystem usage is not
|
||||||
|
* anticipated.
|
||||||
|
*
|
||||||
|
* @param path the path to delete
|
||||||
|
* @throws IOException if there is an error
|
||||||
|
*/
|
||||||
|
@VisibleForTesting
|
||||||
|
void forgetMetadata(Path path) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Deletes the entire sub-tree rooted at the given path, leaving tombstones
|
||||||
|
* to prevent lingering, inconsistent copies of it from being listed.
|
||||||
|
*
|
||||||
|
* In addition to affecting future calls to {@link #get(Path)},
|
||||||
|
* implementations must also update any stored {@code DirListingMetadata}
|
||||||
|
* objects which track the parent of this file.
|
||||||
|
*
|
||||||
|
* @param path the root of the sub-tree to delete
|
||||||
|
* @throws IOException if there is an error
|
||||||
|
*/
|
||||||
|
void deleteSubtree(Path path) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets metadata for a path.
|
||||||
|
*
|
||||||
|
* @param path the path to get
|
||||||
|
* @return metadata for {@code path}, {@code null} if not found
|
||||||
|
* @throws IOException if there is an error
|
||||||
|
*/
|
||||||
|
PathMetadata get(Path path) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets metadata for a path. Alternate method that includes a hint
|
||||||
|
* whether or not the MetadataStore should do work to compute the value for
|
||||||
|
* {@link PathMetadata#isEmptyDirectory()}. Since determining emptiness
|
||||||
|
* may be an expensive operation, this can save wasted work.
|
||||||
|
*
|
||||||
|
* @param path the path to get
|
||||||
|
* @param wantEmptyDirectoryFlag Set to true to give a hint to the
|
||||||
|
* MetadataStore that it should try to compute the empty directory flag.
|
||||||
|
* @return metadata for {@code path}, {@code null} if not found
|
||||||
|
* @throws IOException if there is an error
|
||||||
|
*/
|
||||||
|
PathMetadata get(Path path, boolean wantEmptyDirectoryFlag)
|
||||||
|
throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Lists metadata for all direct children of a path.
|
||||||
|
*
|
||||||
|
* @param path the path to list
|
||||||
|
* @return metadata for all direct children of {@code path} which are being
|
||||||
|
* tracked by the MetadataStore, or {@code null} if the path was not found
|
||||||
|
* in the MetadataStore.
|
||||||
|
* @throws IOException if there is an error
|
||||||
|
*/
|
||||||
|
DirListingMetadata listChildren(Path path) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Record the effects of a {@link FileSystem#rename(Path, Path)} in the
|
||||||
|
* MetadataStore. Clients provide explicit enumeration of the affected
|
||||||
|
* paths (recursively), before and after the rename.
|
||||||
|
*
|
||||||
|
* This operation is not atomic, unless specific implementations claim
|
||||||
|
* otherwise.
|
||||||
|
*
|
||||||
|
* On the need to provide an enumeration of directory trees instead of just
|
||||||
|
* source and destination paths:
|
||||||
|
* Since a MetadataStore does not have to track all metadata for the
|
||||||
|
* underlying storage system, and a new MetadataStore may be created on an
|
||||||
|
* existing underlying filesystem, this move() may be the first time the
|
||||||
|
* MetadataStore sees the affected paths. Therefore, simply providing src
|
||||||
|
* and destination paths may not be enough to record the deletions (under
|
||||||
|
* src path) and creations (at destination) that are happening during the
|
||||||
|
* rename().
|
||||||
|
*
|
||||||
|
* @param pathsToDelete Collection of all paths that were removed from the
|
||||||
|
* source directory tree of the move.
|
||||||
|
* @param pathsToCreate Collection of all PathMetadata for the new paths
|
||||||
|
* that were created at the destination of the rename
|
||||||
|
* ().
|
||||||
|
* @throws IOException if there is an error
|
||||||
|
*/
|
||||||
|
void move(Collection<Path> pathsToDelete,
|
||||||
|
Collection<PathMetadata> pathsToCreate) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Saves metadata for exactly one path.
|
||||||
|
*
|
||||||
|
* Implementations may pre-create all the path's ancestors automatically.
|
||||||
|
* Implementations must update any {@code DirListingMetadata} objects which
|
||||||
|
* track the immediate parent of this file.
|
||||||
|
*
|
||||||
|
* @param meta the metadata to save
|
||||||
|
* @throws IOException if there is an error
|
||||||
|
*/
|
||||||
|
void put(PathMetadata meta) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Saves metadata for any number of paths.
|
||||||
|
*
|
||||||
|
* Semantics are otherwise the same as single-path puts.
|
||||||
|
*
|
||||||
|
* @param metas the metadata to save
|
||||||
|
* @throws IOException if there is an error
|
||||||
|
*/
|
||||||
|
void put(Collection<PathMetadata> metas) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Save directory listing metadata. Callers may save a partial directory
|
||||||
|
* listing for a given path, or may store a complete and authoritative copy
|
||||||
|
* of the directory listing. {@code MetadataStore} implementations may
|
||||||
|
* subsequently keep track of all modifications to the directory contents at
|
||||||
|
* this path, and return authoritative results from subsequent calls to
|
||||||
|
* {@link #listChildren(Path)}. See {@link DirListingMetadata}.
|
||||||
|
*
|
||||||
|
* Any authoritative results returned are only authoritative for the scope
|
||||||
|
* of the {@code MetadataStore}: A per-process {@code MetadataStore}, for
|
||||||
|
* example, would only show results visible to that process, potentially
|
||||||
|
* missing metadata updates (create, delete) made to the same path by
|
||||||
|
* another process.
|
||||||
|
*
|
||||||
|
* @param meta Directory listing metadata.
|
||||||
|
* @throws IOException if there is an error
|
||||||
|
*/
|
||||||
|
void put(DirListingMetadata meta) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Destroy all resources associated with the metadata store.
|
||||||
|
*
|
||||||
|
* The destroyed resources can be DynamoDB tables, MySQL databases/tables, or
|
||||||
|
* HDFS directories. Any operations after calling this method may possibly
|
||||||
|
* fail.
|
||||||
|
*
|
||||||
|
* This operation is idempotent.
|
||||||
|
*
|
||||||
|
* @throws IOException if there is an error
|
||||||
|
*/
|
||||||
|
void destroy() throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clear any metadata older than a specified time from the repository.
|
||||||
|
* Implementations MUST clear file metadata, and MAY clear directory metadata
|
||||||
|
* (s3a itself does not track modification time for directories).
|
||||||
|
* Implementations may also choose to throw UnsupportedOperationException
|
||||||
|
* istead. Note that modification times should be in UTC, as returned by
|
||||||
|
* System.currentTimeMillis at the time of modification.
|
||||||
|
*
|
||||||
|
* @param modTime Oldest modification time to allow
|
||||||
|
* @throws IOException if there is an error
|
||||||
|
* @throws UnsupportedOperationException if not implemented
|
||||||
|
*/
|
||||||
|
void prune(long modTime) throws IOException, UnsupportedOperationException;
|
||||||
|
}
|
@ -0,0 +1,169 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a.s3guard;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.LinkedList;
|
||||||
|
import java.util.Queue;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.RemoteIterator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@code MetadataStoreListFilesIterator} is a {@link RemoteIterator} that
|
||||||
|
* is similar to {@code DescendantsIterator} but does not return directories
|
||||||
|
* that have (or may have) children, and will also provide access to the set of
|
||||||
|
* tombstones to allow recently deleted S3 objects to be filtered out from a
|
||||||
|
* corresponding request. In other words, it returns tombstones and the same
|
||||||
|
* set of objects that should exist in S3: empty directories, and files, and not
|
||||||
|
* other directories whose existence is inferred therefrom.
|
||||||
|
*
|
||||||
|
* For example, assume the consistent store contains metadata representing this
|
||||||
|
* file system structure:
|
||||||
|
*
|
||||||
|
* <pre>
|
||||||
|
* /dir1
|
||||||
|
* |-- dir2
|
||||||
|
* | |-- file1
|
||||||
|
* | `-- file2
|
||||||
|
* `-- dir3
|
||||||
|
* |-- dir4
|
||||||
|
* | `-- file3
|
||||||
|
* |-- dir5
|
||||||
|
* | `-- file4
|
||||||
|
* `-- dir6
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* Consider this code sample:
|
||||||
|
* <pre>
|
||||||
|
* final PathMetadata dir1 = get(new Path("/dir1"));
|
||||||
|
* for (MetadataStoreListFilesIterator files =
|
||||||
|
* new MetadataStoreListFilesIterator(dir1); files.hasNext(); ) {
|
||||||
|
* final FileStatus status = files.next().getFileStatus();
|
||||||
|
* System.out.printf("%s %s%n", status.isDirectory() ? 'D' : 'F',
|
||||||
|
* status.getPath());
|
||||||
|
* }
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* The output is:
|
||||||
|
* <pre>
|
||||||
|
* F /dir1/dir2/file1
|
||||||
|
* F /dir1/dir2/file2
|
||||||
|
* F /dir1/dir3/dir4/file3
|
||||||
|
* F /dir1/dir3/dir5/file4
|
||||||
|
* D /dir1/dir3/dir6
|
||||||
|
* </pre>
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
@InterfaceStability.Evolving
|
||||||
|
public class MetadataStoreListFilesIterator implements
|
||||||
|
RemoteIterator<FileStatus> {
|
||||||
|
public static final Logger LOG = LoggerFactory.getLogger(
|
||||||
|
MetadataStoreListFilesIterator.class);
|
||||||
|
|
||||||
|
private final boolean allowAuthoritative;
|
||||||
|
private final MetadataStore metadataStore;
|
||||||
|
private final Set<Path> tombstones = new HashSet<>();
|
||||||
|
private Iterator<FileStatus> leafNodesIterator = null;
|
||||||
|
|
||||||
|
public MetadataStoreListFilesIterator(MetadataStore ms, PathMetadata meta,
|
||||||
|
boolean allowAuthoritative) throws IOException {
|
||||||
|
Preconditions.checkNotNull(ms);
|
||||||
|
this.metadataStore = ms;
|
||||||
|
this.allowAuthoritative = allowAuthoritative;
|
||||||
|
prefetch(meta);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void prefetch(PathMetadata meta) throws IOException {
|
||||||
|
final Queue<PathMetadata> queue = new LinkedList<>();
|
||||||
|
final Collection<FileStatus> leafNodes = new ArrayList<>();
|
||||||
|
|
||||||
|
if (meta != null) {
|
||||||
|
final Path path = meta.getFileStatus().getPath();
|
||||||
|
if (path.isRoot()) {
|
||||||
|
DirListingMetadata rootListing = metadataStore.listChildren(path);
|
||||||
|
if (rootListing != null) {
|
||||||
|
tombstones.addAll(rootListing.listTombstones());
|
||||||
|
queue.addAll(rootListing.withoutTombstones().getListing());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
queue.add(meta);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
while(!queue.isEmpty()) {
|
||||||
|
PathMetadata nextMetadata = queue.poll();
|
||||||
|
FileStatus nextStatus = nextMetadata.getFileStatus();
|
||||||
|
if (nextStatus.isFile()) {
|
||||||
|
// All files are leaf nodes by definition
|
||||||
|
leafNodes.add(nextStatus);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (nextStatus.isDirectory()) {
|
||||||
|
final Path path = nextStatus.getPath();
|
||||||
|
DirListingMetadata children = metadataStore.listChildren(path);
|
||||||
|
if (children != null) {
|
||||||
|
tombstones.addAll(children.listTombstones());
|
||||||
|
Collection<PathMetadata> liveChildren =
|
||||||
|
children.withoutTombstones().getListing();
|
||||||
|
if (!liveChildren.isEmpty()) {
|
||||||
|
// If it's a directory, has children, not all deleted, then we
|
||||||
|
// add the children to the queue and move on to the next node
|
||||||
|
queue.addAll(liveChildren);
|
||||||
|
continue;
|
||||||
|
} else if (allowAuthoritative && children.isAuthoritative()) {
|
||||||
|
leafNodes.add(nextStatus);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Directories that *might* be empty are ignored for now, since we
|
||||||
|
// cannot confirm that they are empty without incurring other costs.
|
||||||
|
// Users of this class can still discover empty directories via S3's
|
||||||
|
// fake directories, subject to the same consistency semantics as before.
|
||||||
|
// The only other possibility is a symlink, which is unsupported on S3A.
|
||||||
|
}
|
||||||
|
leafNodesIterator = leafNodes.iterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasNext() {
|
||||||
|
return leafNodesIterator.hasNext();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FileStatus next() {
|
||||||
|
return leafNodesIterator.next();
|
||||||
|
}
|
||||||
|
|
||||||
|
public Set<Path> listTombstones() {
|
||||||
|
return tombstones;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,104 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a.s3guard;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Collection;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A no-op implementation of MetadataStore. Clients that use this
|
||||||
|
* implementation should behave the same as they would without any
|
||||||
|
* MetadataStore.
|
||||||
|
*/
|
||||||
|
public class NullMetadataStore implements MetadataStore {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void initialize(FileSystem fs) throws IOException {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void initialize(Configuration conf) throws IOException {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void delete(Path path) throws IOException {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void forgetMetadata(Path path) throws IOException {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void deleteSubtree(Path path) throws IOException {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PathMetadata get(Path path) throws IOException {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public PathMetadata get(Path path, boolean wantEmptyDirectoryFlag)
|
||||||
|
throws IOException {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DirListingMetadata listChildren(Path path) throws IOException {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void move(Collection<Path> pathsToDelete,
|
||||||
|
Collection<PathMetadata> pathsToCreate) throws IOException {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void put(PathMetadata meta) throws IOException {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void put(Collection<PathMetadata> meta) throws IOException {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void put(DirListingMetadata meta) throws IOException {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void destroy() throws IOException {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void prune(long modTime) {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "NullMetadataStore";
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,143 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a.s3guard;
|
||||||
|
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.s3a.Tristate;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* {@code PathMetadata} models path metadata stored in the
|
||||||
|
* {@link MetadataStore}.
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
@InterfaceStability.Evolving
|
||||||
|
public class PathMetadata {
|
||||||
|
|
||||||
|
private final FileStatus fileStatus;
|
||||||
|
private Tristate isEmptyDirectory;
|
||||||
|
private boolean isDeleted;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a tombstone from the current time.
|
||||||
|
* @param path path to tombstone
|
||||||
|
* @return the entry.
|
||||||
|
*/
|
||||||
|
public static PathMetadata tombstone(Path path) {
|
||||||
|
long now = System.currentTimeMillis();
|
||||||
|
FileStatus status = new FileStatus(0, false, 0, 0, now, path);
|
||||||
|
return new PathMetadata(status, Tristate.UNKNOWN, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a new {@code PathMetadata} containing given {@code FileStatus}.
|
||||||
|
* @param fileStatus file status containing an absolute path.
|
||||||
|
*/
|
||||||
|
public PathMetadata(FileStatus fileStatus) {
|
||||||
|
this(fileStatus, Tristate.UNKNOWN);
|
||||||
|
}
|
||||||
|
|
||||||
|
public PathMetadata(FileStatus fileStatus, Tristate isEmptyDir) {
|
||||||
|
this(fileStatus, isEmptyDir, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
public PathMetadata(FileStatus fileStatus, Tristate isEmptyDir, boolean
|
||||||
|
isDeleted) {
|
||||||
|
Preconditions.checkNotNull(fileStatus, "fileStatus must be non-null");
|
||||||
|
Preconditions.checkNotNull(fileStatus.getPath(), "fileStatus path must be" +
|
||||||
|
" non-null");
|
||||||
|
Preconditions.checkArgument(fileStatus.getPath().isAbsolute(), "path must" +
|
||||||
|
" be absolute");
|
||||||
|
this.fileStatus = fileStatus;
|
||||||
|
this.isEmptyDirectory = isEmptyDir;
|
||||||
|
this.isDeleted = isDeleted;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return {@code FileStatus} contained in this {@code PathMetadata}.
|
||||||
|
*/
|
||||||
|
public final FileStatus getFileStatus() {
|
||||||
|
return fileStatus;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Query if a directory is empty.
|
||||||
|
* @return Tristate.TRUE if this is known to be an empty directory,
|
||||||
|
* Tristate.FALSE if known to not be empty, and Tristate.UNKNOWN if the
|
||||||
|
* MetadataStore does have enough information to determine either way.
|
||||||
|
*/
|
||||||
|
public Tristate isEmptyDirectory() {
|
||||||
|
return isEmptyDirectory;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setIsEmptyDirectory(Tristate isEmptyDirectory) {
|
||||||
|
this.isEmptyDirectory = isEmptyDirectory;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isDeleted() {
|
||||||
|
return isDeleted;
|
||||||
|
}
|
||||||
|
|
||||||
|
void setIsDeleted(boolean isDeleted) {
|
||||||
|
this.isDeleted = isDeleted;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (!(o instanceof PathMetadata)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return this.fileStatus.equals(((PathMetadata)o).fileStatus);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
return fileStatus.hashCode();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return "PathMetadata{" +
|
||||||
|
"fileStatus=" + fileStatus +
|
||||||
|
"; isEmptyDirectory=" + isEmptyDirectory +
|
||||||
|
"; isDeleted=" + isDeleted +
|
||||||
|
'}';
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Log contents to supplied StringBuilder in a pretty fashion.
|
||||||
|
* @param sb target StringBuilder
|
||||||
|
*/
|
||||||
|
public void prettyPrint(StringBuilder sb) {
|
||||||
|
sb.append(String.format("%-5s %-20s %-7d %-8s %-6s",
|
||||||
|
fileStatus.isDirectory() ? "dir" : "file",
|
||||||
|
fileStatus.getPath().toString(), fileStatus.getLen(),
|
||||||
|
isEmptyDirectory.name(), isDeleted));
|
||||||
|
sb.append(fileStatus);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String prettyPrint() {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
prettyPrint(sb);
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,304 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a.s3guard;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.URI;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collection;
|
||||||
|
|
||||||
|
import com.amazonaws.services.dynamodbv2.document.Item;
|
||||||
|
import com.amazonaws.services.dynamodbv2.document.KeyAttribute;
|
||||||
|
import com.amazonaws.services.dynamodbv2.document.PrimaryKey;
|
||||||
|
import com.amazonaws.services.dynamodbv2.model.AttributeDefinition;
|
||||||
|
import com.amazonaws.services.dynamodbv2.model.KeySchemaElement;
|
||||||
|
import com.amazonaws.services.dynamodbv2.model.KeyType;
|
||||||
|
import com.amazonaws.services.dynamodbv2.model.ScalarAttributeType;
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.s3a.Constants;
|
||||||
|
import org.apache.hadoop.fs.s3a.Tristate;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Defines methods for translating between domain model objects and their
|
||||||
|
* representations in the DynamoDB schema.
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
@InterfaceStability.Evolving
|
||||||
|
final class PathMetadataDynamoDBTranslation {
|
||||||
|
|
||||||
|
/** The HASH key name of each item. */
|
||||||
|
@VisibleForTesting
|
||||||
|
static final String PARENT = "parent";
|
||||||
|
/** The RANGE key name of each item. */
|
||||||
|
@VisibleForTesting
|
||||||
|
static final String CHILD = "child";
|
||||||
|
@VisibleForTesting
|
||||||
|
static final String IS_DIR = "is_dir";
|
||||||
|
@VisibleForTesting
|
||||||
|
static final String MOD_TIME = "mod_time";
|
||||||
|
@VisibleForTesting
|
||||||
|
static final String FILE_LENGTH = "file_length";
|
||||||
|
@VisibleForTesting
|
||||||
|
static final String BLOCK_SIZE = "block_size";
|
||||||
|
static final String IS_DELETED = "is_deleted";
|
||||||
|
|
||||||
|
/** Table version field {@value} in version marker item. */
|
||||||
|
@VisibleForTesting
|
||||||
|
static final String TABLE_VERSION = "table_version";
|
||||||
|
|
||||||
|
/** Table creation timestampfield {@value} in version marker item. */
|
||||||
|
@VisibleForTesting
|
||||||
|
static final String TABLE_CREATED = "table_created";
|
||||||
|
|
||||||
|
/** The version marker field is invalid. */
|
||||||
|
static final String E_NOT_VERSION_MARKER = "Not a version marker: ";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the key schema for the DynamoDB table.
|
||||||
|
*
|
||||||
|
* @return DynamoDB key schema
|
||||||
|
*/
|
||||||
|
static Collection<KeySchemaElement> keySchema() {
|
||||||
|
return Arrays.asList(
|
||||||
|
new KeySchemaElement(PARENT, KeyType.HASH),
|
||||||
|
new KeySchemaElement(CHILD, KeyType.RANGE));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the attribute definitions for the DynamoDB table.
|
||||||
|
*
|
||||||
|
* @return DynamoDB attribute definitions
|
||||||
|
*/
|
||||||
|
static Collection<AttributeDefinition> attributeDefinitions() {
|
||||||
|
return Arrays.asList(
|
||||||
|
new AttributeDefinition(PARENT, ScalarAttributeType.S),
|
||||||
|
new AttributeDefinition(CHILD, ScalarAttributeType.S));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts a DynamoDB item to a {@link PathMetadata}.
|
||||||
|
*
|
||||||
|
* @param item DynamoDB item to convert
|
||||||
|
* @return {@code item} converted to a {@link PathMetadata}
|
||||||
|
*/
|
||||||
|
static PathMetadata itemToPathMetadata(Item item, String username)
|
||||||
|
throws IOException {
|
||||||
|
if (item == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
String parentStr = item.getString(PARENT);
|
||||||
|
Preconditions.checkNotNull(parentStr, "No parent entry in item %s", item);
|
||||||
|
String childStr = item.getString(CHILD);
|
||||||
|
Preconditions.checkNotNull(childStr, "No child entry in item %s", item);
|
||||||
|
|
||||||
|
// Skip table version markers, which are only non-absolute paths stored.
|
||||||
|
Path rawPath = new Path(parentStr, childStr);
|
||||||
|
if (!rawPath.isAbsoluteAndSchemeAuthorityNull()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
Path parent = new Path(Constants.FS_S3A + ":/" + parentStr + "/");
|
||||||
|
Path path = new Path(parent, childStr);
|
||||||
|
|
||||||
|
boolean isDir = item.hasAttribute(IS_DIR) && item.getBoolean(IS_DIR);
|
||||||
|
final FileStatus fileStatus;
|
||||||
|
if (isDir) {
|
||||||
|
fileStatus = DynamoDBMetadataStore.makeDirStatus(path, username);
|
||||||
|
} else {
|
||||||
|
long len = item.hasAttribute(FILE_LENGTH) ? item.getLong(FILE_LENGTH) : 0;
|
||||||
|
long modTime = item.hasAttribute(MOD_TIME) ? item.getLong(MOD_TIME) : 0;
|
||||||
|
long block = item.hasAttribute(BLOCK_SIZE) ? item.getLong(BLOCK_SIZE) : 0;
|
||||||
|
fileStatus = new FileStatus(len, false, 1, block, modTime, 0, null,
|
||||||
|
username, username, path);
|
||||||
|
}
|
||||||
|
boolean isDeleted =
|
||||||
|
item.hasAttribute(IS_DELETED) && item.getBoolean(IS_DELETED);
|
||||||
|
|
||||||
|
return new PathMetadata(fileStatus, Tristate.UNKNOWN, isDeleted);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts a {@link PathMetadata} to a DynamoDB item.
|
||||||
|
*
|
||||||
|
* @param meta {@link PathMetadata} to convert
|
||||||
|
* @return {@code meta} converted to DynamoDB item
|
||||||
|
*/
|
||||||
|
static Item pathMetadataToItem(PathMetadata meta) {
|
||||||
|
Preconditions.checkNotNull(meta);
|
||||||
|
final FileStatus status = meta.getFileStatus();
|
||||||
|
final Item item = new Item().withPrimaryKey(pathToKey(status.getPath()));
|
||||||
|
if (status.isDirectory()) {
|
||||||
|
item.withBoolean(IS_DIR, true);
|
||||||
|
} else {
|
||||||
|
item.withLong(FILE_LENGTH, status.getLen())
|
||||||
|
.withLong(MOD_TIME, status.getModificationTime())
|
||||||
|
.withLong(BLOCK_SIZE, status.getBlockSize());
|
||||||
|
}
|
||||||
|
item.withBoolean(IS_DELETED, meta.isDeleted());
|
||||||
|
return item;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The version marker has a primary key whose PARENT is {@code name};
|
||||||
|
* this MUST NOT be a value which represents an absolute path.
|
||||||
|
* @param name name of the version marker
|
||||||
|
* @param version version number
|
||||||
|
* @param timestamp creation timestamp
|
||||||
|
* @return an item representing a version marker.
|
||||||
|
*/
|
||||||
|
static Item createVersionMarker(String name, int version, long timestamp) {
|
||||||
|
return new Item().withPrimaryKey(createVersionMarkerPrimaryKey(name))
|
||||||
|
.withInt(TABLE_VERSION, version)
|
||||||
|
.withLong(TABLE_CREATED, timestamp);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create the primary key of the version marker.
|
||||||
|
* @param name key name
|
||||||
|
* @return the key to use when registering or resolving version markers
|
||||||
|
*/
|
||||||
|
static PrimaryKey createVersionMarkerPrimaryKey(String name) {
|
||||||
|
return new PrimaryKey(PARENT, name, CHILD, name);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract the version from a version marker item.
|
||||||
|
* @param marker version marker item
|
||||||
|
* @return the extracted version field
|
||||||
|
* @throws IOException if the item is not a version marker
|
||||||
|
*/
|
||||||
|
static int extractVersionFromMarker(Item marker) throws IOException {
|
||||||
|
if (marker.hasAttribute(TABLE_VERSION)) {
|
||||||
|
return marker.getInt(TABLE_VERSION);
|
||||||
|
} else {
|
||||||
|
throw new IOException(E_NOT_VERSION_MARKER + marker);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract the creation time, if present.
|
||||||
|
* @param marker version marker item
|
||||||
|
* @return the creation time, or null
|
||||||
|
* @throws IOException if the item is not a version marker
|
||||||
|
*/
|
||||||
|
static Long extractCreationTimeFromMarker(Item marker) throws IOException {
|
||||||
|
if (marker.hasAttribute(TABLE_CREATED)) {
|
||||||
|
return marker.getLong(TABLE_CREATED);
|
||||||
|
} else {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts a collection {@link PathMetadata} to a collection DynamoDB items.
|
||||||
|
*
|
||||||
|
* @see #pathMetadataToItem(PathMetadata)
|
||||||
|
*/
|
||||||
|
static Item[] pathMetadataToItem(Collection<PathMetadata> metas) {
|
||||||
|
if (metas == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
final Item[] items = new Item[metas.size()];
|
||||||
|
int i = 0;
|
||||||
|
for (PathMetadata meta : metas) {
|
||||||
|
items[i++] = pathMetadataToItem(meta);
|
||||||
|
}
|
||||||
|
return items;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts a {@link Path} to a DynamoDB equality condition on that path as
|
||||||
|
* parent, suitable for querying all direct children of the path.
|
||||||
|
*
|
||||||
|
* @param path the path; can not be null
|
||||||
|
* @return DynamoDB equality condition on {@code path} as parent
|
||||||
|
*/
|
||||||
|
static KeyAttribute pathToParentKeyAttribute(Path path) {
|
||||||
|
return new KeyAttribute(PARENT, pathToParentKey(path));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* e.g. {@code pathToParentKey(s3a://bucket/path/a) -> /bucket/path/a}
|
||||||
|
* @param path path to convert
|
||||||
|
* @return string for parent key
|
||||||
|
*/
|
||||||
|
static String pathToParentKey(Path path) {
|
||||||
|
Preconditions.checkNotNull(path);
|
||||||
|
Preconditions.checkArgument(path.isUriPathAbsolute(), "Path not absolute");
|
||||||
|
URI uri = path.toUri();
|
||||||
|
String bucket = uri.getHost();
|
||||||
|
Preconditions.checkArgument(!StringUtils.isEmpty(bucket),
|
||||||
|
"Path missing bucket");
|
||||||
|
String pKey = "/" + bucket + uri.getPath();
|
||||||
|
|
||||||
|
// Strip trailing slash
|
||||||
|
if (pKey.endsWith("/")) {
|
||||||
|
pKey = pKey.substring(0, pKey.length() - 1);
|
||||||
|
}
|
||||||
|
return pKey;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts a {@link Path} to a DynamoDB key, suitable for getting the item
|
||||||
|
* matching the path.
|
||||||
|
*
|
||||||
|
* @param path the path; can not be null
|
||||||
|
* @return DynamoDB key for item matching {@code path}
|
||||||
|
*/
|
||||||
|
static PrimaryKey pathToKey(Path path) {
|
||||||
|
Preconditions.checkArgument(!path.isRoot(),
|
||||||
|
"Root path is not mapped to any PrimaryKey");
|
||||||
|
return new PrimaryKey(PARENT, pathToParentKey(path.getParent()), CHILD,
|
||||||
|
path.getName());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Converts a collection of {@link Path} to a collection of DynamoDB keys.
|
||||||
|
*
|
||||||
|
* @see #pathToKey(Path)
|
||||||
|
*/
|
||||||
|
static PrimaryKey[] pathToKey(Collection<Path> paths) {
|
||||||
|
if (paths == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
final PrimaryKey[] keys = new PrimaryKey[paths.size()];
|
||||||
|
int i = 0;
|
||||||
|
for (Path p : paths) {
|
||||||
|
keys[i++] = pathToKey(p);
|
||||||
|
}
|
||||||
|
return keys;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* There is no need to instantiate this class.
|
||||||
|
*/
|
||||||
|
private PathMetadataDynamoDBTranslation() {
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,463 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a.s3guard;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.URI;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.s3a.S3AFileStatus;
|
||||||
|
import org.apache.hadoop.fs.s3a.S3AInstrumentation;
|
||||||
|
import org.apache.hadoop.fs.s3a.Tristate;
|
||||||
|
import org.apache.hadoop.util.ReflectionUtils;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.fs.s3a.Constants.S3_METADATA_STORE_IMPL;
|
||||||
|
import static org.apache.hadoop.fs.s3a.Statistic.S3GUARD_METADATASTORE_PUT_PATH_LATENCY;
|
||||||
|
import static org.apache.hadoop.fs.s3a.Statistic.S3GUARD_METADATASTORE_PUT_PATH_REQUEST;
|
||||||
|
import static org.apache.hadoop.fs.s3a.S3AUtils.createUploadFileStatus;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Logic for integrating MetadataStore with S3A.
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
@InterfaceStability.Unstable
|
||||||
|
public final class S3Guard {
|
||||||
|
private static final Logger LOG = LoggerFactory.getLogger(S3Guard.class);
|
||||||
|
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
@InterfaceStability.Unstable
|
||||||
|
@VisibleForTesting
|
||||||
|
public static final String S3GUARD_DDB_CLIENT_FACTORY_IMPL =
|
||||||
|
"fs.s3a.s3guard.ddb.client.factory.impl";
|
||||||
|
|
||||||
|
static final Class<? extends DynamoDBClientFactory>
|
||||||
|
S3GUARD_DDB_CLIENT_FACTORY_IMPL_DEFAULT =
|
||||||
|
DynamoDBClientFactory.DefaultDynamoDBClientFactory.class;
|
||||||
|
private static final FileStatus[] EMPTY_LISTING = new FileStatus[0];
|
||||||
|
|
||||||
|
// Utility class. All static functions.
|
||||||
|
private S3Guard() { }
|
||||||
|
|
||||||
|
/* Utility functions. */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new instance of the configured MetadataStore.
|
||||||
|
* The returned MetadataStore will have been initialized via
|
||||||
|
* {@link MetadataStore#initialize(FileSystem)} by this function before
|
||||||
|
* returning it. Callers must clean up by calling
|
||||||
|
* {@link MetadataStore#close()} when done using the MetadataStore.
|
||||||
|
*
|
||||||
|
* @param fs FileSystem whose Configuration specifies which
|
||||||
|
* implementation to use.
|
||||||
|
* @return Reference to new MetadataStore.
|
||||||
|
* @throws IOException if the metadata store cannot be instantiated
|
||||||
|
*/
|
||||||
|
public static MetadataStore getMetadataStore(FileSystem fs)
|
||||||
|
throws IOException {
|
||||||
|
Preconditions.checkNotNull(fs);
|
||||||
|
Configuration conf = fs.getConf();
|
||||||
|
Preconditions.checkNotNull(conf);
|
||||||
|
MetadataStore msInstance;
|
||||||
|
try {
|
||||||
|
Class<? extends MetadataStore> msClass = getMetadataStoreClass(conf);
|
||||||
|
msInstance = ReflectionUtils.newInstance(msClass, conf);
|
||||||
|
LOG.debug("Using {} metadata store for {} filesystem",
|
||||||
|
msClass.getSimpleName(), fs.getScheme());
|
||||||
|
msInstance.initialize(fs);
|
||||||
|
return msInstance;
|
||||||
|
} catch (RuntimeException | IOException e) {
|
||||||
|
String message = "Failed to instantiate metadata store " +
|
||||||
|
conf.get(S3_METADATA_STORE_IMPL)
|
||||||
|
+ " defined in " + S3_METADATA_STORE_IMPL
|
||||||
|
+ ": " + e;
|
||||||
|
LOG.error(message, e);
|
||||||
|
if (e instanceof IOException) {
|
||||||
|
throw e;
|
||||||
|
} else {
|
||||||
|
throw new IOException(message, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Class<? extends MetadataStore> getMetadataStoreClass(
|
||||||
|
Configuration conf) {
|
||||||
|
if (conf == null) {
|
||||||
|
return NullMetadataStore.class;
|
||||||
|
}
|
||||||
|
|
||||||
|
return conf.getClass(S3_METADATA_STORE_IMPL, NullMetadataStore.class,
|
||||||
|
MetadataStore.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper function which puts a given S3AFileStatus into the MetadataStore and
|
||||||
|
* returns the same S3AFileStatus. Instrumentation monitors the put operation.
|
||||||
|
* @param ms MetadataStore to {@code put()} into.
|
||||||
|
* @param status status to store
|
||||||
|
* @param instrumentation instrumentation of the s3a file system
|
||||||
|
* @return The same status as passed in
|
||||||
|
* @throws IOException if metadata store update failed
|
||||||
|
*/
|
||||||
|
public static S3AFileStatus putAndReturn(MetadataStore ms,
|
||||||
|
S3AFileStatus status,
|
||||||
|
S3AInstrumentation instrumentation) throws IOException {
|
||||||
|
long startTimeNano = System.nanoTime();
|
||||||
|
ms.put(new PathMetadata(status));
|
||||||
|
instrumentation.addValueToQuantiles(S3GUARD_METADATASTORE_PUT_PATH_LATENCY,
|
||||||
|
(System.nanoTime() - startTimeNano));
|
||||||
|
instrumentation.incrementCounter(S3GUARD_METADATASTORE_PUT_PATH_REQUEST, 1);
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert the data of a directory listing to an array of {@link FileStatus}
|
||||||
|
* entries. Tombstones are filtered out at this point. If the listing is null
|
||||||
|
* an empty array is returned.
|
||||||
|
* @param dirMeta directory listing -may be null
|
||||||
|
* @return a possibly-empty array of file status entries
|
||||||
|
*/
|
||||||
|
public static FileStatus[] dirMetaToStatuses(DirListingMetadata dirMeta) {
|
||||||
|
if (dirMeta == null) {
|
||||||
|
return EMPTY_LISTING;
|
||||||
|
}
|
||||||
|
|
||||||
|
Collection<PathMetadata> listing = dirMeta.getListing();
|
||||||
|
List<FileStatus> statuses = new ArrayList<>();
|
||||||
|
|
||||||
|
for (PathMetadata pm : listing) {
|
||||||
|
if (!pm.isDeleted()) {
|
||||||
|
statuses.add(pm.getFileStatus());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return statuses.toArray(new FileStatus[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Given directory listing metadata from both the backing store and the
|
||||||
|
* MetadataStore, merge the two sources of truth to create a consistent
|
||||||
|
* view of the current directory contents, which can be returned to clients.
|
||||||
|
*
|
||||||
|
* Also update the MetadataStore to reflect the resulting directory listing.
|
||||||
|
*
|
||||||
|
* @param ms MetadataStore to use.
|
||||||
|
* @param path path to directory
|
||||||
|
* @param backingStatuses Directory listing from the backing store.
|
||||||
|
* @param dirMeta Directory listing from MetadataStore. May be null.
|
||||||
|
* @param isAuthoritative State of authoritative mode
|
||||||
|
* @return Final result of directory listing.
|
||||||
|
* @throws IOException if metadata store update failed
|
||||||
|
*/
|
||||||
|
public static FileStatus[] dirListingUnion(MetadataStore ms, Path path,
|
||||||
|
List<FileStatus> backingStatuses, DirListingMetadata dirMeta,
|
||||||
|
boolean isAuthoritative) throws IOException {
|
||||||
|
|
||||||
|
// Fast-path for NullMetadataStore
|
||||||
|
if (isNullMetadataStore(ms)) {
|
||||||
|
return backingStatuses.toArray(new FileStatus[backingStatuses.size()]);
|
||||||
|
}
|
||||||
|
|
||||||
|
assertQualified(path);
|
||||||
|
|
||||||
|
if (dirMeta == null) {
|
||||||
|
// The metadataStore had zero state for this directory
|
||||||
|
dirMeta = new DirListingMetadata(path, DirListingMetadata.EMPTY_DIR,
|
||||||
|
false);
|
||||||
|
}
|
||||||
|
|
||||||
|
Set<Path> deleted = dirMeta.listTombstones();
|
||||||
|
|
||||||
|
// Since we treat the MetadataStore as a "fresher" or "consistent" view
|
||||||
|
// of metadata, we always use its metadata first.
|
||||||
|
|
||||||
|
// Since the authoritative case is already handled outside this function,
|
||||||
|
// we will basically start with the set of directory entries in the
|
||||||
|
// DirListingMetadata, and add any that only exist in the backingStatuses.
|
||||||
|
|
||||||
|
boolean changed = false;
|
||||||
|
for (FileStatus s : backingStatuses) {
|
||||||
|
if (deleted.contains(s.getPath())) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Minor race condition here. Multiple threads could add to this
|
||||||
|
// mutable DirListingMetadata. Since it is backed by a
|
||||||
|
// ConcurrentHashMap, the last put() wins.
|
||||||
|
// More concerning is two threads racing on listStatus() and delete().
|
||||||
|
// Any FileSystem has similar race conditions, but we could persist
|
||||||
|
// a stale entry longer. We could expose an atomic
|
||||||
|
// DirListingMetadata#putIfNotPresent()
|
||||||
|
boolean updated = dirMeta.put(s);
|
||||||
|
changed = changed || updated;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (changed && isAuthoritative) {
|
||||||
|
dirMeta.setAuthoritative(true); // This is the full directory contents
|
||||||
|
ms.put(dirMeta);
|
||||||
|
}
|
||||||
|
|
||||||
|
return dirMetaToStatuses(dirMeta);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Although NullMetadataStore does nothing, callers may wish to avoid work
|
||||||
|
* (fast path) when the NullMetadataStore is in use.
|
||||||
|
* @param ms The MetadataStore to test
|
||||||
|
* @return true iff the MetadataStore is the null, or no-op, implementation.
|
||||||
|
*/
|
||||||
|
public static boolean isNullMetadataStore(MetadataStore ms) {
|
||||||
|
return (ms instanceof NullMetadataStore);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Update MetadataStore to reflect creation of the given directories.
|
||||||
|
*
|
||||||
|
* If an IOException is raised while trying to update the entry, this
|
||||||
|
* operation catches the exception and returns.
|
||||||
|
* @param ms MetadataStore to update.
|
||||||
|
* @param dirs null, or an ordered list of directories from leaf to root.
|
||||||
|
* E.g. if /a/ exists, and mkdirs(/a/b/c/d) is called, this
|
||||||
|
* list will contain [/a/b/c/d, /a/b/c, /a/b]. /a/b/c/d is
|
||||||
|
* an empty, dir, and the other dirs only contain their child
|
||||||
|
* dir.
|
||||||
|
* @param owner Hadoop user name.
|
||||||
|
* @param authoritative Whether to mark new directories as authoritative.
|
||||||
|
*/
|
||||||
|
public static void makeDirsOrdered(MetadataStore ms, List<Path> dirs,
|
||||||
|
String owner, boolean authoritative) {
|
||||||
|
if (dirs == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We discussed atomicity of this implementation.
|
||||||
|
* The concern is that multiple clients could race to write different
|
||||||
|
* cached directories to the MetadataStore. Two solutions are proposed:
|
||||||
|
* 1. Move mkdirs() into MetadataStore interface and let implementations
|
||||||
|
* ensure they are atomic.
|
||||||
|
* 2. Specify that the semantics of MetadataStore#putListStatus() is
|
||||||
|
* always additive, That is, if MetadataStore has listStatus() state
|
||||||
|
* for /a/b that contains [/a/b/file0, /a/b/file1], and we then call
|
||||||
|
* putListStatus(/a/b -> [/a/b/file2, /a/b/file3], isAuthoritative=true),
|
||||||
|
* then we will end up with final state of
|
||||||
|
* [/a/b/file0, /a/b/file1, /a/b/file2, /a/b/file3], isAuthoritative =
|
||||||
|
* true
|
||||||
|
*/
|
||||||
|
FileStatus prevStatus = null;
|
||||||
|
|
||||||
|
// Use new batched put to reduce round trips.
|
||||||
|
List<PathMetadata> pathMetas = new ArrayList<>(dirs.size());
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Iterate from leaf to root
|
||||||
|
for (int i = 0; i < dirs.size(); i++) {
|
||||||
|
boolean isLeaf = (prevStatus == null);
|
||||||
|
Path f = dirs.get(i);
|
||||||
|
assertQualified(f);
|
||||||
|
FileStatus status =
|
||||||
|
createUploadFileStatus(f, true, 0, 0, owner);
|
||||||
|
|
||||||
|
// We only need to put a DirListingMetadata if we are setting
|
||||||
|
// authoritative bit
|
||||||
|
DirListingMetadata dirMeta = null;
|
||||||
|
if (authoritative) {
|
||||||
|
Collection<PathMetadata> children;
|
||||||
|
if (isLeaf) {
|
||||||
|
children = DirListingMetadata.EMPTY_DIR;
|
||||||
|
} else {
|
||||||
|
children = new ArrayList<>(1);
|
||||||
|
children.add(new PathMetadata(prevStatus));
|
||||||
|
}
|
||||||
|
dirMeta = new DirListingMetadata(f, children, authoritative);
|
||||||
|
ms.put(dirMeta);
|
||||||
|
}
|
||||||
|
|
||||||
|
pathMetas.add(new PathMetadata(status));
|
||||||
|
prevStatus = status;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Batched put
|
||||||
|
ms.put(pathMetas);
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
LOG.error("MetadataStore#put() failure:", ioe);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper function that records the move of directory paths, adding
|
||||||
|
* resulting metadata to the supplied lists.
|
||||||
|
* Does not store in MetadataStore.
|
||||||
|
* @param ms MetadataStore, used to make this a no-op, when it is
|
||||||
|
* NullMetadataStore.
|
||||||
|
* @param srcPaths stores the source path here
|
||||||
|
* @param dstMetas stores destination metadata here
|
||||||
|
* @param srcPath source path to store
|
||||||
|
* @param dstPath destination path to store
|
||||||
|
* @param owner file owner to use in created records
|
||||||
|
*/
|
||||||
|
public static void addMoveDir(MetadataStore ms, Collection<Path> srcPaths,
|
||||||
|
Collection<PathMetadata> dstMetas, Path srcPath, Path dstPath,
|
||||||
|
String owner) {
|
||||||
|
if (isNullMetadataStore(ms)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
assertQualified(srcPath, dstPath);
|
||||||
|
|
||||||
|
FileStatus dstStatus = createUploadFileStatus(dstPath, true, 0, 0, owner);
|
||||||
|
addMoveStatus(srcPaths, dstMetas, srcPath, dstStatus);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Like {@link #addMoveDir(MetadataStore, Collection, Collection, Path,
|
||||||
|
* Path, String)} (), but for files.
|
||||||
|
* @param ms MetadataStore, used to make this a no-op, when it is
|
||||||
|
* NullMetadataStore.
|
||||||
|
* @param srcPaths stores the source path here
|
||||||
|
* @param dstMetas stores destination metadata here
|
||||||
|
* @param srcPath source path to store
|
||||||
|
* @param dstPath destination path to store
|
||||||
|
* @param size length of file moved
|
||||||
|
* @param blockSize blocksize to associate with destination file
|
||||||
|
* @param owner file owner to use in created records
|
||||||
|
*/
|
||||||
|
public static void addMoveFile(MetadataStore ms, Collection<Path> srcPaths,
|
||||||
|
Collection<PathMetadata> dstMetas, Path srcPath, Path dstPath,
|
||||||
|
long size, long blockSize, String owner) {
|
||||||
|
if (isNullMetadataStore(ms)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
assertQualified(srcPath, dstPath);
|
||||||
|
FileStatus dstStatus = createUploadFileStatus(dstPath, false,
|
||||||
|
size, blockSize, owner);
|
||||||
|
addMoveStatus(srcPaths, dstMetas, srcPath, dstStatus);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper method that records the move of all ancestors of a path.
|
||||||
|
*
|
||||||
|
* In S3A, an optimization is to delete unnecessary fake directory objects if
|
||||||
|
* the directory is non-empty. In that case, for a nested child to move, S3A
|
||||||
|
* is not listing and thus moving all its ancestors (up to source root). So we
|
||||||
|
* take care of those inferred directories of this path explicitly.
|
||||||
|
*
|
||||||
|
* As {@link #addMoveFile} and {@link #addMoveDir}, this method adds resulting
|
||||||
|
* metadata to the supplied lists. It does not store in MetadataStore.
|
||||||
|
*
|
||||||
|
* @param ms MetadataStore, no-op if it is NullMetadataStore
|
||||||
|
* @param srcPaths stores the source path here
|
||||||
|
* @param dstMetas stores destination metadata here
|
||||||
|
* @param srcRoot source root up to which (exclusive) should we add ancestors
|
||||||
|
* @param srcPath source path of the child to add ancestors
|
||||||
|
* @param dstPath destination path of the child to add ancestors
|
||||||
|
* @param owner Hadoop user name
|
||||||
|
*/
|
||||||
|
public static void addMoveAncestors(MetadataStore ms,
|
||||||
|
Collection<Path> srcPaths, Collection<PathMetadata> dstMetas,
|
||||||
|
Path srcRoot, Path srcPath, Path dstPath, String owner) {
|
||||||
|
if (isNullMetadataStore(ms)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
assertQualified(srcRoot, srcPath, dstPath);
|
||||||
|
|
||||||
|
if (srcPath.equals(srcRoot)) {
|
||||||
|
LOG.debug("Skip moving ancestors of source root directory {}", srcRoot);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
Path parentSrc = srcPath.getParent();
|
||||||
|
Path parentDst = dstPath.getParent();
|
||||||
|
while (parentSrc != null
|
||||||
|
&& !parentSrc.isRoot()
|
||||||
|
&& !parentSrc.equals(srcRoot)
|
||||||
|
&& !srcPaths.contains(parentSrc)) {
|
||||||
|
LOG.debug("Renaming non-listed parent {} to {}", parentSrc, parentDst);
|
||||||
|
S3Guard.addMoveDir(ms, srcPaths, dstMetas, parentSrc, parentDst, owner);
|
||||||
|
parentSrc = parentSrc.getParent();
|
||||||
|
parentDst = parentDst.getParent();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void addAncestors(MetadataStore metadataStore,
|
||||||
|
Path qualifiedPath, String username) throws IOException {
|
||||||
|
Collection<PathMetadata> newDirs = new ArrayList<>();
|
||||||
|
Path parent = qualifiedPath.getParent();
|
||||||
|
while (!parent.isRoot()) {
|
||||||
|
PathMetadata directory = metadataStore.get(parent);
|
||||||
|
if (directory == null || directory.isDeleted()) {
|
||||||
|
FileStatus status = new FileStatus(0, true, 1, 0, 0, 0, null, username,
|
||||||
|
null, parent);
|
||||||
|
PathMetadata meta = new PathMetadata(status, Tristate.FALSE, false);
|
||||||
|
newDirs.add(meta);
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
parent = parent.getParent();
|
||||||
|
}
|
||||||
|
metadataStore.put(newDirs);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void addMoveStatus(Collection<Path> srcPaths,
|
||||||
|
Collection<PathMetadata> dstMetas,
|
||||||
|
Path srcPath,
|
||||||
|
FileStatus dstStatus) {
|
||||||
|
srcPaths.add(srcPath);
|
||||||
|
dstMetas.add(new PathMetadata(dstStatus));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Assert that the path is qualified with a host and scheme.
|
||||||
|
* @param p path to check
|
||||||
|
* @throws NullPointerException if either argument does not hold
|
||||||
|
*/
|
||||||
|
public static void assertQualified(Path p) {
|
||||||
|
URI uri = p.toUri();
|
||||||
|
// Paths must include bucket in case MetadataStore is shared between
|
||||||
|
// multiple S3AFileSystem instances
|
||||||
|
Preconditions.checkNotNull(uri.getHost(), "Null host in " + uri);
|
||||||
|
|
||||||
|
// This should never fail, but is retained for completeness.
|
||||||
|
Preconditions.checkNotNull(uri.getScheme(), "Null scheme in " + uri);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Assert that all paths are valid.
|
||||||
|
* @param paths path to check
|
||||||
|
* @throws NullPointerException if either argument does not hold
|
||||||
|
*/
|
||||||
|
public static void assertQualified(Path...paths) {
|
||||||
|
for (Path path : paths) {
|
||||||
|
assertQualified(path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,924 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
* <p>
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* <p>
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a.s3guard;
|
||||||
|
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.PrintStream;
|
||||||
|
import java.net.URI;
|
||||||
|
import java.net.URISyntaxException;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Locale;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.conf.Configured;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.LocatedFileStatus;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.RemoteIterator;
|
||||||
|
import org.apache.hadoop.fs.s3a.Constants;
|
||||||
|
import org.apache.hadoop.fs.s3a.S3AFileStatus;
|
||||||
|
import org.apache.hadoop.fs.s3a.S3AFileSystem;
|
||||||
|
import org.apache.hadoop.fs.shell.CommandFormat;
|
||||||
|
import org.apache.hadoop.util.GenericOptionsParser;
|
||||||
|
import org.apache.hadoop.util.Tool;
|
||||||
|
import org.apache.hadoop.util.ToolRunner;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.fs.s3a.Constants.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* CLI to manage S3Guard Metadata Store.
|
||||||
|
*/
|
||||||
|
public abstract class S3GuardTool extends Configured implements Tool {
|
||||||
|
private static final Logger LOG = LoggerFactory.getLogger(S3GuardTool.class);
|
||||||
|
|
||||||
|
private static final String NAME = "s3guard";
|
||||||
|
private static final String COMMON_USAGE =
|
||||||
|
"When possible and not overridden by more specific options, metadata\n" +
|
||||||
|
"repository information will be inferred from the S3A URL (if provided)" +
|
||||||
|
"\n\n" +
|
||||||
|
"Generic options supported are:\n" +
|
||||||
|
" -conf <config file> - specify an application configuration file\n" +
|
||||||
|
" -D <property=value> - define a value for a given property\n";
|
||||||
|
|
||||||
|
private static final String USAGE = NAME +
|
||||||
|
" [command] [OPTIONS] [s3a://BUCKET]\n\n" +
|
||||||
|
"Commands: \n" +
|
||||||
|
"\t" + Init.NAME + " - " + Init.PURPOSE + "\n" +
|
||||||
|
"\t" + Destroy.NAME + " - " + Destroy.PURPOSE + "\n" +
|
||||||
|
"\t" + Import.NAME + " - " + Import.PURPOSE + "\n" +
|
||||||
|
"\t" + Diff.NAME + " - " + Diff.PURPOSE + "\n" +
|
||||||
|
"\t" + Prune.NAME + " - " + Prune.PURPOSE + "\n";
|
||||||
|
private static final String DATA_IN_S3_IS_PRESERVED
|
||||||
|
= "(all data in S3 is preserved";
|
||||||
|
|
||||||
|
abstract public String getUsage();
|
||||||
|
|
||||||
|
// Exit codes
|
||||||
|
static final int SUCCESS = 0;
|
||||||
|
static final int INVALID_ARGUMENT = 1;
|
||||||
|
static final int ERROR = 99;
|
||||||
|
|
||||||
|
private S3AFileSystem filesystem;
|
||||||
|
private MetadataStore store;
|
||||||
|
private final CommandFormat commandFormat;
|
||||||
|
|
||||||
|
private static final String META_FLAG = "meta";
|
||||||
|
private static final String DAYS_FLAG = "days";
|
||||||
|
private static final String HOURS_FLAG = "hours";
|
||||||
|
private static final String MINUTES_FLAG = "minutes";
|
||||||
|
private static final String SECONDS_FLAG = "seconds";
|
||||||
|
|
||||||
|
private static final String REGION_FLAG = "region";
|
||||||
|
private static final String READ_FLAG = "read";
|
||||||
|
private static final String WRITE_FLAG = "write";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor a S3Guard tool with HDFS configuration.
|
||||||
|
* @param conf Configuration.
|
||||||
|
*/
|
||||||
|
protected S3GuardTool(Configuration conf) {
|
||||||
|
super(conf);
|
||||||
|
|
||||||
|
commandFormat = new CommandFormat(0, Integer.MAX_VALUE);
|
||||||
|
// For metadata store URI
|
||||||
|
commandFormat.addOptionWithValue(META_FLAG);
|
||||||
|
// DDB region.
|
||||||
|
commandFormat.addOptionWithValue(REGION_FLAG);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return sub-command name.
|
||||||
|
*/
|
||||||
|
abstract String getName();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse DynamoDB region from either -m option or a S3 path.
|
||||||
|
*
|
||||||
|
* This function should only be called from {@link Init} or
|
||||||
|
* {@link Destroy}.
|
||||||
|
*
|
||||||
|
* @param paths remaining parameters from CLI.
|
||||||
|
* @return false for invalid parameters.
|
||||||
|
* @throws IOException on I/O errors.
|
||||||
|
*/
|
||||||
|
boolean parseDynamoDBRegion(List<String> paths) throws IOException {
|
||||||
|
Configuration conf = getConf();
|
||||||
|
String fromCli = getCommandFormat().getOptValue(REGION_FLAG);
|
||||||
|
String fromConf = conf.get(S3GUARD_DDB_REGION_KEY);
|
||||||
|
boolean hasS3Path = !paths.isEmpty();
|
||||||
|
|
||||||
|
if (fromCli != null) {
|
||||||
|
if (fromCli.isEmpty()) {
|
||||||
|
System.err.println("No region provided with -" + REGION_FLAG + " flag");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (hasS3Path) {
|
||||||
|
System.err.println("Providing both an S3 path and the -" + REGION_FLAG
|
||||||
|
+ " flag is not supported. If you need to specify a different "
|
||||||
|
+ "region than the S3 bucket, configure " + S3GUARD_DDB_REGION_KEY);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
conf.set(S3GUARD_DDB_REGION_KEY, fromCli);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fromConf != null) {
|
||||||
|
if (fromConf.isEmpty()) {
|
||||||
|
System.err.printf("No region provided with config %s, %n",
|
||||||
|
S3GUARD_DDB_REGION_KEY);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hasS3Path) {
|
||||||
|
String s3Path = paths.get(0);
|
||||||
|
initS3AFileSystem(s3Path);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
System.err.println("No region found from -" + REGION_FLAG + " flag, " +
|
||||||
|
"config, or S3 bucket");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse metadata store from command line option or HDFS configuration.
|
||||||
|
*
|
||||||
|
* @param forceCreate override the auto-creation setting to true.
|
||||||
|
* @return a initialized metadata store.
|
||||||
|
*/
|
||||||
|
MetadataStore initMetadataStore(boolean forceCreate) throws IOException {
|
||||||
|
if (getStore() != null) {
|
||||||
|
return getStore();
|
||||||
|
}
|
||||||
|
Configuration conf;
|
||||||
|
if (filesystem == null) {
|
||||||
|
conf = getConf();
|
||||||
|
} else {
|
||||||
|
conf = filesystem.getConf();
|
||||||
|
}
|
||||||
|
String metaURI = getCommandFormat().getOptValue(META_FLAG);
|
||||||
|
if (metaURI != null && !metaURI.isEmpty()) {
|
||||||
|
URI uri = URI.create(metaURI);
|
||||||
|
LOG.info("create metadata store: {}", uri + " scheme: "
|
||||||
|
+ uri.getScheme());
|
||||||
|
switch (uri.getScheme().toLowerCase(Locale.ENGLISH)) {
|
||||||
|
case "local":
|
||||||
|
setStore(new LocalMetadataStore());
|
||||||
|
break;
|
||||||
|
case "dynamodb":
|
||||||
|
setStore(new DynamoDBMetadataStore());
|
||||||
|
conf.set(S3GUARD_DDB_TABLE_NAME_KEY, uri.getAuthority());
|
||||||
|
if (forceCreate) {
|
||||||
|
conf.setBoolean(S3GUARD_DDB_TABLE_CREATE_KEY, true);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new IOException(
|
||||||
|
String.format("Metadata store %s is not supported", uri));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// CLI does not specify metadata store URI, it uses default metadata store
|
||||||
|
// DynamoDB instead.
|
||||||
|
setStore(new DynamoDBMetadataStore());
|
||||||
|
if (forceCreate) {
|
||||||
|
conf.setBoolean(S3GUARD_DDB_TABLE_CREATE_KEY, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (filesystem == null) {
|
||||||
|
getStore().initialize(conf);
|
||||||
|
} else {
|
||||||
|
getStore().initialize(filesystem);
|
||||||
|
}
|
||||||
|
LOG.info("Metadata store {} is initialized.", getStore());
|
||||||
|
return getStore();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize S3A FileSystem instance.
|
||||||
|
*
|
||||||
|
* @param path s3a URI
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
void initS3AFileSystem(String path) throws IOException {
|
||||||
|
URI uri;
|
||||||
|
try {
|
||||||
|
uri = new URI(path);
|
||||||
|
} catch (URISyntaxException e) {
|
||||||
|
throw new IOException(e);
|
||||||
|
}
|
||||||
|
// Make sure that S3AFileSystem does not hold an actual MetadataStore
|
||||||
|
// implementation.
|
||||||
|
Configuration conf = getConf();
|
||||||
|
conf.setClass(S3_METADATA_STORE_IMPL, NullMetadataStore.class,
|
||||||
|
MetadataStore.class);
|
||||||
|
FileSystem fs = FileSystem.get(uri, getConf());
|
||||||
|
if (!(fs instanceof S3AFileSystem)) {
|
||||||
|
throw new IOException(
|
||||||
|
String.format("URI %s is not a S3A file system: %s", uri,
|
||||||
|
fs.getClass().getName()));
|
||||||
|
}
|
||||||
|
filesystem = (S3AFileSystem) fs;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parse CLI arguments and returns the position arguments.
|
||||||
|
* The options are stored in {@link #commandFormat}
|
||||||
|
*
|
||||||
|
* @param args command line arguments.
|
||||||
|
* @return the position arguments from CLI.
|
||||||
|
*/
|
||||||
|
List<String> parseArgs(String[] args) {
|
||||||
|
return getCommandFormat().parse(args, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected S3AFileSystem getFilesystem() {
|
||||||
|
return filesystem;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void setFilesystem(S3AFileSystem filesystem) {
|
||||||
|
this.filesystem = filesystem;
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public MetadataStore getStore() {
|
||||||
|
return store;
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
protected void setStore(MetadataStore store) {
|
||||||
|
Preconditions.checkNotNull(store);
|
||||||
|
this.store = store;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected CommandFormat getCommandFormat() {
|
||||||
|
return commandFormat;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create the metadata store.
|
||||||
|
*/
|
||||||
|
static class Init extends S3GuardTool {
|
||||||
|
private static final String NAME = "init";
|
||||||
|
public static final String PURPOSE = "initialize metadata repository";
|
||||||
|
private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" +
|
||||||
|
"\t" + PURPOSE + "\n\n" +
|
||||||
|
"Common options:\n" +
|
||||||
|
" -" + META_FLAG + " URL - Metadata repository details " +
|
||||||
|
"(implementation-specific)\n" +
|
||||||
|
"\n" +
|
||||||
|
"Amazon DynamoDB-specific options:\n" +
|
||||||
|
" -" + REGION_FLAG + " REGION - Service region for connections\n" +
|
||||||
|
" -" + READ_FLAG + " UNIT - Provisioned read throughput units\n" +
|
||||||
|
" -" + WRITE_FLAG + " UNIT - Provisioned write through put units\n" +
|
||||||
|
"\n" +
|
||||||
|
" URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.\n" +
|
||||||
|
" Specifying both the -" + REGION_FLAG + " option and an S3A path\n" +
|
||||||
|
" is not supported.";
|
||||||
|
|
||||||
|
Init(Configuration conf) {
|
||||||
|
super(conf);
|
||||||
|
// read capacity.
|
||||||
|
getCommandFormat().addOptionWithValue(READ_FLAG);
|
||||||
|
// write capacity.
|
||||||
|
getCommandFormat().addOptionWithValue(WRITE_FLAG);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
String getName() {
|
||||||
|
return NAME;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getUsage() {
|
||||||
|
return USAGE;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int run(String[] args) throws IOException {
|
||||||
|
List<String> paths = parseArgs(args);
|
||||||
|
|
||||||
|
String readCap = getCommandFormat().getOptValue(READ_FLAG);
|
||||||
|
if (readCap != null && !readCap.isEmpty()) {
|
||||||
|
int readCapacity = Integer.parseInt(readCap);
|
||||||
|
getConf().setInt(S3GUARD_DDB_TABLE_CAPACITY_READ_KEY, readCapacity);
|
||||||
|
}
|
||||||
|
String writeCap = getCommandFormat().getOptValue(WRITE_FLAG);
|
||||||
|
if (writeCap != null && !writeCap.isEmpty()) {
|
||||||
|
int writeCapacity = Integer.parseInt(writeCap);
|
||||||
|
getConf().setInt(S3GUARD_DDB_TABLE_CAPACITY_WRITE_KEY, writeCapacity);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate parameters.
|
||||||
|
if (!parseDynamoDBRegion(paths)) {
|
||||||
|
System.err.println(USAGE);
|
||||||
|
return INVALID_ARGUMENT;
|
||||||
|
}
|
||||||
|
initMetadataStore(true);
|
||||||
|
return SUCCESS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Destroy a metadata store.
|
||||||
|
*/
|
||||||
|
static class Destroy extends S3GuardTool {
|
||||||
|
private static final String NAME = "destroy";
|
||||||
|
public static final String PURPOSE = "destroy Metadata Store data "
|
||||||
|
+ DATA_IN_S3_IS_PRESERVED;
|
||||||
|
private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" +
|
||||||
|
"\t" + PURPOSE + "\n\n" +
|
||||||
|
"Common options:\n" +
|
||||||
|
" -" + META_FLAG + " URL - Metadata repository details " +
|
||||||
|
"(implementation-specific)\n" +
|
||||||
|
"\n" +
|
||||||
|
"Amazon DynamoDB-specific options:\n" +
|
||||||
|
" -" + REGION_FLAG + " REGION - Service region for connections\n" +
|
||||||
|
"\n" +
|
||||||
|
" URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.\n" +
|
||||||
|
" Specifying both the -" + REGION_FLAG + " option and an S3A path\n" +
|
||||||
|
" is not supported.";
|
||||||
|
|
||||||
|
Destroy(Configuration conf) {
|
||||||
|
super(conf);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
String getName() {
|
||||||
|
return NAME;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getUsage() {
|
||||||
|
return USAGE;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int run(String[] args) throws IOException {
|
||||||
|
List<String> paths = parseArgs(args);
|
||||||
|
if (!parseDynamoDBRegion(paths)) {
|
||||||
|
System.err.println(USAGE);
|
||||||
|
return INVALID_ARGUMENT;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
initMetadataStore(false);
|
||||||
|
} catch (FileNotFoundException e) {
|
||||||
|
// indication that the table was not found
|
||||||
|
LOG.debug("Failed to bind to store to be destroyed", e);
|
||||||
|
LOG.info("Metadata Store does not exist.");
|
||||||
|
return SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
Preconditions.checkState(getStore() != null,
|
||||||
|
"Metadata Store is not initialized");
|
||||||
|
|
||||||
|
getStore().destroy();
|
||||||
|
LOG.info("Metadata store is deleted.");
|
||||||
|
return SUCCESS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Import s3 metadata to the metadata store.
|
||||||
|
*/
|
||||||
|
static class Import extends S3GuardTool {
|
||||||
|
private static final String NAME = "import";
|
||||||
|
public static final String PURPOSE = "import metadata from existing S3 " +
|
||||||
|
"data";
|
||||||
|
private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" +
|
||||||
|
"\t" + PURPOSE + "\n\n" +
|
||||||
|
"Common options:\n" +
|
||||||
|
" -" + META_FLAG + " URL - Metadata repository details " +
|
||||||
|
"(implementation-specific)\n" +
|
||||||
|
"\n" +
|
||||||
|
"Amazon DynamoDB-specific options:\n" +
|
||||||
|
" -" + REGION_FLAG + " REGION - Service region for connections\n" +
|
||||||
|
"\n" +
|
||||||
|
" URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.\n" +
|
||||||
|
" Specifying both the -" + REGION_FLAG + " option and an S3A path\n" +
|
||||||
|
" is not supported.";
|
||||||
|
|
||||||
|
private final Set<Path> dirCache = new HashSet<>();
|
||||||
|
|
||||||
|
Import(Configuration conf) {
|
||||||
|
super(conf);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
String getName() {
|
||||||
|
return NAME;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getUsage() {
|
||||||
|
return USAGE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Put parents into MS and cache if the parents are not presented.
|
||||||
|
*
|
||||||
|
* @param f the file or an empty directory.
|
||||||
|
* @throws IOException on I/O errors.
|
||||||
|
*/
|
||||||
|
private void putParentsIfNotPresent(FileStatus f) throws IOException {
|
||||||
|
Preconditions.checkNotNull(f);
|
||||||
|
Path parent = f.getPath().getParent();
|
||||||
|
while (parent != null) {
|
||||||
|
if (dirCache.contains(parent)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
FileStatus dir = DynamoDBMetadataStore.makeDirStatus(parent,
|
||||||
|
f.getOwner());
|
||||||
|
getStore().put(new PathMetadata(dir));
|
||||||
|
dirCache.add(parent);
|
||||||
|
parent = parent.getParent();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Recursively import every path under path.
|
||||||
|
* @return number of items inserted into MetadataStore
|
||||||
|
* @throws IOException on I/O errors.
|
||||||
|
*/
|
||||||
|
private long importDir(FileStatus status) throws IOException {
|
||||||
|
Preconditions.checkArgument(status.isDirectory());
|
||||||
|
RemoteIterator<LocatedFileStatus> it = getFilesystem()
|
||||||
|
.listFilesAndEmptyDirectories(status.getPath(), true);
|
||||||
|
long items = 0;
|
||||||
|
|
||||||
|
while (it.hasNext()) {
|
||||||
|
LocatedFileStatus located = it.next();
|
||||||
|
FileStatus child;
|
||||||
|
if (located.isDirectory()) {
|
||||||
|
child = DynamoDBMetadataStore.makeDirStatus(located.getPath(),
|
||||||
|
located.getOwner());
|
||||||
|
dirCache.add(child.getPath());
|
||||||
|
} else {
|
||||||
|
child = new S3AFileStatus(located.getLen(),
|
||||||
|
located.getModificationTime(),
|
||||||
|
located.getPath(),
|
||||||
|
located.getBlockSize(),
|
||||||
|
located.getOwner());
|
||||||
|
}
|
||||||
|
putParentsIfNotPresent(child);
|
||||||
|
getStore().put(new PathMetadata(child));
|
||||||
|
items++;
|
||||||
|
}
|
||||||
|
return items;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int run(String[] args) throws IOException {
|
||||||
|
List<String> paths = parseArgs(args);
|
||||||
|
if (paths.isEmpty()) {
|
||||||
|
System.err.println(getUsage());
|
||||||
|
return INVALID_ARGUMENT;
|
||||||
|
}
|
||||||
|
String s3Path = paths.get(0);
|
||||||
|
initS3AFileSystem(s3Path);
|
||||||
|
|
||||||
|
URI uri;
|
||||||
|
try {
|
||||||
|
uri = new URI(s3Path);
|
||||||
|
} catch (URISyntaxException e) {
|
||||||
|
throw new IOException(e);
|
||||||
|
}
|
||||||
|
String filePath = uri.getPath();
|
||||||
|
if (filePath.isEmpty()) {
|
||||||
|
// If they specify a naked S3 URI (e.g. s3a://bucket), we'll consider
|
||||||
|
// root to be the path
|
||||||
|
filePath = "/";
|
||||||
|
}
|
||||||
|
Path path = new Path(filePath);
|
||||||
|
FileStatus status = getFilesystem().getFileStatus(path);
|
||||||
|
|
||||||
|
initMetadataStore(false);
|
||||||
|
|
||||||
|
long items = 1;
|
||||||
|
if (status.isFile()) {
|
||||||
|
PathMetadata meta = new PathMetadata(status);
|
||||||
|
getStore().put(meta);
|
||||||
|
} else {
|
||||||
|
items = importDir(status);
|
||||||
|
}
|
||||||
|
|
||||||
|
System.out.printf("Inserted %d items into Metadata Store%n", items);
|
||||||
|
|
||||||
|
return SUCCESS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Show diffs between the s3 and metadata store.
|
||||||
|
*/
|
||||||
|
static class Diff extends S3GuardTool {
|
||||||
|
private static final String NAME = "diff";
|
||||||
|
public static final String PURPOSE = "report on delta between S3 and " +
|
||||||
|
"repository";
|
||||||
|
private static final String USAGE = NAME + " [OPTIONS] s3a://BUCKET\n" +
|
||||||
|
"\t" + PURPOSE + "\n\n" +
|
||||||
|
"Common options:\n" +
|
||||||
|
" -" + META_FLAG + " URL - Metadata repository details " +
|
||||||
|
"(implementation-specific)\n" +
|
||||||
|
"\n" +
|
||||||
|
"Amazon DynamoDB-specific options:\n" +
|
||||||
|
" -" + REGION_FLAG + " REGION - Service region for connections\n" +
|
||||||
|
"\n" +
|
||||||
|
" URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.\n" +
|
||||||
|
" Specifying both the -" + REGION_FLAG + " option and an S3A path\n" +
|
||||||
|
" is not supported.";
|
||||||
|
|
||||||
|
private static final String SEP = "\t";
|
||||||
|
static final String S3_PREFIX = "S3";
|
||||||
|
static final String MS_PREFIX = "MS";
|
||||||
|
|
||||||
|
Diff(Configuration conf) {
|
||||||
|
super(conf);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
String getName() {
|
||||||
|
return NAME;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getUsage() {
|
||||||
|
return USAGE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Formats the output of printing a FileStatus in S3guard diff tool.
|
||||||
|
* @param status the status to print.
|
||||||
|
* @return the string of output.
|
||||||
|
*/
|
||||||
|
private static String formatFileStatus(FileStatus status) {
|
||||||
|
return String.format("%s%s%d%s%s",
|
||||||
|
status.isDirectory() ? "D" : "F",
|
||||||
|
SEP,
|
||||||
|
status.getLen(),
|
||||||
|
SEP,
|
||||||
|
status.getPath().toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compares metadata from 2 S3 FileStatus's to see if they differ.
|
||||||
|
* @param thisOne
|
||||||
|
* @param thatOne
|
||||||
|
* @return true if the metadata is not identical
|
||||||
|
*/
|
||||||
|
private static boolean differ(FileStatus thisOne, FileStatus thatOne) {
|
||||||
|
Preconditions.checkArgument(!(thisOne == null && thatOne == null));
|
||||||
|
return (thisOne == null || thatOne == null) ||
|
||||||
|
(thisOne.getLen() != thatOne.getLen()) ||
|
||||||
|
(thisOne.isDirectory() != thatOne.isDirectory()) ||
|
||||||
|
(!thisOne.isDirectory() &&
|
||||||
|
thisOne.getModificationTime() != thatOne.getModificationTime());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Print difference, if any, between two file statuses to the output stream.
|
||||||
|
*
|
||||||
|
* @param msStatus file status from metadata store.
|
||||||
|
* @param s3Status file status from S3.
|
||||||
|
* @param out output stream.
|
||||||
|
*/
|
||||||
|
private static void printDiff(FileStatus msStatus,
|
||||||
|
FileStatus s3Status,
|
||||||
|
PrintStream out) {
|
||||||
|
Preconditions.checkArgument(!(msStatus == null && s3Status == null));
|
||||||
|
if (msStatus != null && s3Status != null) {
|
||||||
|
Preconditions.checkArgument(
|
||||||
|
msStatus.getPath().equals(s3Status.getPath()),
|
||||||
|
String.format("The path from metadata store and s3 are different:" +
|
||||||
|
" ms=%s s3=%s", msStatus.getPath(), s3Status.getPath()));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (differ(msStatus, s3Status)) {
|
||||||
|
if (s3Status != null) {
|
||||||
|
out.printf("%s%s%s%n", S3_PREFIX, SEP, formatFileStatus(s3Status));
|
||||||
|
}
|
||||||
|
if (msStatus != null) {
|
||||||
|
out.printf("%s%s%s%n", MS_PREFIX, SEP, formatFileStatus(msStatus));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compare the metadata of the directory with the same path, on S3 and
|
||||||
|
* the metadata store, respectively. If one of them is null, consider the
|
||||||
|
* metadata of the directory and all its subdirectories are missing from
|
||||||
|
* the source.
|
||||||
|
*
|
||||||
|
* Pass the FileStatus obtained from s3 and metadata store to avoid one
|
||||||
|
* round trip to fetch the same metadata twice, because the FileStatus
|
||||||
|
* hve already been obtained from listStatus() / listChildren operations.
|
||||||
|
*
|
||||||
|
* @param msDir the directory FileStatus obtained from the metadata store.
|
||||||
|
* @param s3Dir the directory FileStatus obtained from S3.
|
||||||
|
* @param out the output stream to generate diff results.
|
||||||
|
* @throws IOException on I/O errors.
|
||||||
|
*/
|
||||||
|
private void compareDir(FileStatus msDir, FileStatus s3Dir,
|
||||||
|
PrintStream out) throws IOException {
|
||||||
|
Preconditions.checkArgument(!(msDir == null && s3Dir == null));
|
||||||
|
if (msDir != null && s3Dir != null) {
|
||||||
|
Preconditions.checkArgument(msDir.getPath().equals(s3Dir.getPath()),
|
||||||
|
String.format("The path from metadata store and s3 are different:" +
|
||||||
|
" ms=%s s3=%s", msDir.getPath(), s3Dir.getPath()));
|
||||||
|
}
|
||||||
|
|
||||||
|
Map<Path, FileStatus> s3Children = new HashMap<>();
|
||||||
|
if (s3Dir != null && s3Dir.isDirectory()) {
|
||||||
|
for (FileStatus status : getFilesystem().listStatus(s3Dir.getPath())) {
|
||||||
|
s3Children.put(status.getPath(), status);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Map<Path, FileStatus> msChildren = new HashMap<>();
|
||||||
|
if (msDir != null && msDir.isDirectory()) {
|
||||||
|
DirListingMetadata dirMeta =
|
||||||
|
getStore().listChildren(msDir.getPath());
|
||||||
|
|
||||||
|
if (dirMeta != null) {
|
||||||
|
for (PathMetadata meta : dirMeta.getListing()) {
|
||||||
|
FileStatus status = meta.getFileStatus();
|
||||||
|
msChildren.put(status.getPath(), status);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Set<Path> allPaths = new HashSet<>(s3Children.keySet());
|
||||||
|
allPaths.addAll(msChildren.keySet());
|
||||||
|
|
||||||
|
for (Path path : allPaths) {
|
||||||
|
FileStatus s3Status = s3Children.get(path);
|
||||||
|
FileStatus msStatus = msChildren.get(path);
|
||||||
|
printDiff(msStatus, s3Status, out);
|
||||||
|
if ((s3Status != null && s3Status.isDirectory()) ||
|
||||||
|
(msStatus != null && msStatus.isDirectory())) {
|
||||||
|
compareDir(msStatus, s3Status, out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out.flush();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compare both metadata store and S3 on the same path.
|
||||||
|
*
|
||||||
|
* @param path the path to be compared.
|
||||||
|
* @param out the output stream to display results.
|
||||||
|
* @throws IOException on I/O errors.
|
||||||
|
*/
|
||||||
|
private void compareRoot(Path path, PrintStream out) throws IOException {
|
||||||
|
Path qualified = getFilesystem().qualify(path);
|
||||||
|
FileStatus s3Status = null;
|
||||||
|
try {
|
||||||
|
s3Status = getFilesystem().getFileStatus(qualified);
|
||||||
|
} catch (FileNotFoundException e) {
|
||||||
|
}
|
||||||
|
PathMetadata meta = getStore().get(qualified);
|
||||||
|
FileStatus msStatus = (meta != null && !meta.isDeleted()) ?
|
||||||
|
meta.getFileStatus() : null;
|
||||||
|
compareDir(msStatus, s3Status, out);
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public int run(String[] args, PrintStream out) throws IOException {
|
||||||
|
List<String> paths = parseArgs(args);
|
||||||
|
if (paths.isEmpty()) {
|
||||||
|
out.println(USAGE);
|
||||||
|
return INVALID_ARGUMENT;
|
||||||
|
}
|
||||||
|
String s3Path = paths.get(0);
|
||||||
|
initS3AFileSystem(s3Path);
|
||||||
|
initMetadataStore(true);
|
||||||
|
|
||||||
|
URI uri;
|
||||||
|
try {
|
||||||
|
uri = new URI(s3Path);
|
||||||
|
} catch (URISyntaxException e) {
|
||||||
|
throw new IOException(e);
|
||||||
|
}
|
||||||
|
Path root;
|
||||||
|
if (uri.getPath().isEmpty()) {
|
||||||
|
root = new Path("/");
|
||||||
|
} else {
|
||||||
|
root = new Path(uri.getPath());
|
||||||
|
}
|
||||||
|
root = getFilesystem().qualify(root);
|
||||||
|
compareRoot(root, out);
|
||||||
|
out.flush();
|
||||||
|
return SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int run(String[] args) throws IOException {
|
||||||
|
return run(args, System.out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Prune metadata that has not been modified recently.
|
||||||
|
*/
|
||||||
|
static class Prune extends S3GuardTool {
|
||||||
|
private static final String NAME = "prune";
|
||||||
|
public static final String PURPOSE = "truncate older metadata from " +
|
||||||
|
"repository "
|
||||||
|
+ DATA_IN_S3_IS_PRESERVED;;
|
||||||
|
private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" +
|
||||||
|
"\t" + PURPOSE + "\n\n" +
|
||||||
|
"Common options:\n" +
|
||||||
|
" -" + META_FLAG + " URL - Metadata repository details " +
|
||||||
|
"(implementation-specific)\n" +
|
||||||
|
"\n" +
|
||||||
|
"Amazon DynamoDB-specific options:\n" +
|
||||||
|
" -" + REGION_FLAG + " REGION - Service region for connections\n" +
|
||||||
|
"\n" +
|
||||||
|
" URLs for Amazon DynamoDB are of the form dynamodb://TABLE_NAME.\n" +
|
||||||
|
" Specifying both the -" + REGION_FLAG + " option and an S3A path\n" +
|
||||||
|
" is not supported.";
|
||||||
|
|
||||||
|
Prune(Configuration conf) {
|
||||||
|
super(conf);
|
||||||
|
|
||||||
|
CommandFormat format = getCommandFormat();
|
||||||
|
format.addOptionWithValue(DAYS_FLAG);
|
||||||
|
format.addOptionWithValue(HOURS_FLAG);
|
||||||
|
format.addOptionWithValue(MINUTES_FLAG);
|
||||||
|
format.addOptionWithValue(SECONDS_FLAG);
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
void setMetadataStore(MetadataStore ms) {
|
||||||
|
Preconditions.checkNotNull(ms);
|
||||||
|
this.setStore(ms);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
String getName() {
|
||||||
|
return NAME;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getUsage() {
|
||||||
|
return USAGE;
|
||||||
|
}
|
||||||
|
|
||||||
|
private long getDeltaComponent(TimeUnit unit, String arg) {
|
||||||
|
String raw = getCommandFormat().getOptValue(arg);
|
||||||
|
if (raw == null || raw.isEmpty()) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
Long parsed = Long.parseLong(raw);
|
||||||
|
return unit.toMillis(parsed);
|
||||||
|
}
|
||||||
|
|
||||||
|
@VisibleForTesting
|
||||||
|
public int run(String[] args, PrintStream out) throws
|
||||||
|
InterruptedException, IOException {
|
||||||
|
List<String> paths = parseArgs(args);
|
||||||
|
if (!parseDynamoDBRegion(paths)) {
|
||||||
|
System.err.println(USAGE);
|
||||||
|
return INVALID_ARGUMENT;
|
||||||
|
}
|
||||||
|
initMetadataStore(false);
|
||||||
|
|
||||||
|
Configuration conf = getConf();
|
||||||
|
long confDelta = conf.getLong(Constants.S3GUARD_CLI_PRUNE_AGE, 0);
|
||||||
|
|
||||||
|
long cliDelta = 0;
|
||||||
|
cliDelta += getDeltaComponent(TimeUnit.DAYS, "days");
|
||||||
|
cliDelta += getDeltaComponent(TimeUnit.HOURS, "hours");
|
||||||
|
cliDelta += getDeltaComponent(TimeUnit.MINUTES, "minutes");
|
||||||
|
cliDelta += getDeltaComponent(TimeUnit.SECONDS, "seconds");
|
||||||
|
|
||||||
|
if (confDelta <= 0 && cliDelta <= 0) {
|
||||||
|
System.err.println(
|
||||||
|
"You must specify a positive age for metadata to prune.");
|
||||||
|
}
|
||||||
|
|
||||||
|
// A delta provided on the CLI overrides if one is configured
|
||||||
|
long delta = confDelta;
|
||||||
|
if (cliDelta > 0) {
|
||||||
|
delta = cliDelta;
|
||||||
|
}
|
||||||
|
|
||||||
|
long now = System.currentTimeMillis();
|
||||||
|
long divide = now - delta;
|
||||||
|
|
||||||
|
getStore().prune(divide);
|
||||||
|
|
||||||
|
out.flush();
|
||||||
|
return SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int run(String[] args) throws InterruptedException, IOException {
|
||||||
|
return run(args, System.out);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static S3GuardTool command;
|
||||||
|
|
||||||
|
private static void printHelp() {
|
||||||
|
if (command == null) {
|
||||||
|
System.err.println("Usage: hadoop " + USAGE);
|
||||||
|
System.err.println("\tperform S3Guard metadata store " +
|
||||||
|
"administrative commands.");
|
||||||
|
} else {
|
||||||
|
System.err.println("Usage: hadoop " + command.getUsage());
|
||||||
|
}
|
||||||
|
System.err.println();
|
||||||
|
System.err.println(COMMON_USAGE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Execute the command with the given arguments.
|
||||||
|
*
|
||||||
|
* @param args command specific arguments.
|
||||||
|
* @param conf Hadoop configuration.
|
||||||
|
* @return exit code.
|
||||||
|
* @throws Exception on I/O errors.
|
||||||
|
*/
|
||||||
|
public static int run(String[] args, Configuration conf) throws
|
||||||
|
Exception {
|
||||||
|
/* ToolRunner.run does this too, but we must do it before looking at
|
||||||
|
subCommand or instantiating the cmd object below */
|
||||||
|
String[] otherArgs = new GenericOptionsParser(conf, args)
|
||||||
|
.getRemainingArgs();
|
||||||
|
if (otherArgs.length == 0) {
|
||||||
|
printHelp();
|
||||||
|
return INVALID_ARGUMENT;
|
||||||
|
}
|
||||||
|
final String subCommand = otherArgs[0];
|
||||||
|
switch (subCommand) {
|
||||||
|
case Init.NAME:
|
||||||
|
command = new Init(conf);
|
||||||
|
break;
|
||||||
|
case Destroy.NAME:
|
||||||
|
command = new Destroy(conf);
|
||||||
|
break;
|
||||||
|
case Import.NAME:
|
||||||
|
command = new Import(conf);
|
||||||
|
break;
|
||||||
|
case Diff.NAME:
|
||||||
|
command = new Diff(conf);
|
||||||
|
break;
|
||||||
|
case Prune.NAME:
|
||||||
|
command = new Prune(conf);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
printHelp();
|
||||||
|
return INVALID_ARGUMENT;
|
||||||
|
}
|
||||||
|
return ToolRunner.run(conf, command, otherArgs);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Main entry point. Calls {@code System.exit()} on all execution paths.
|
||||||
|
* @param args argument list
|
||||||
|
*/
|
||||||
|
public static void main(String[] args) {
|
||||||
|
try {
|
||||||
|
int ret = run(args, new Configuration());
|
||||||
|
System.exit(ret);
|
||||||
|
} catch (CommandFormat.UnknownOptionException e) {
|
||||||
|
System.err.println(e.getMessage());
|
||||||
|
printHelp();
|
||||||
|
System.exit(INVALID_ARGUMENT);
|
||||||
|
} catch (Throwable e) {
|
||||||
|
e.printStackTrace(System.err);
|
||||||
|
System.exit(ERROR);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,30 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This package contains classes related to S3Guard: a feature of S3A to mask
|
||||||
|
* the eventual consistency behavior of S3 and optimize access patterns by
|
||||||
|
* coordinating with a strongly consistent external store for file system
|
||||||
|
* metadata.
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
@InterfaceStability.Evolving
|
||||||
|
package org.apache.hadoop.fs.s3a.s3guard;
|
||||||
|
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.classification.InterfaceStability;
|
@ -105,6 +105,10 @@ public static Login extractLoginDetailsWithWarnings(URI name) {
|
|||||||
* @return a login tuple, possibly empty.
|
* @return a login tuple, possibly empty.
|
||||||
*/
|
*/
|
||||||
public static Login extractLoginDetails(URI name) {
|
public static Login extractLoginDetails(URI name) {
|
||||||
|
if (name == null) {
|
||||||
|
return Login.EMPTY;
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
String authority = name.getAuthority();
|
String authority = name.getAuthority();
|
||||||
if (authority == null) {
|
if (authority == null) {
|
||||||
|
@ -0,0 +1,37 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# this work for additional information regarding copyright ownership.
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
if ! declare -f hadoop_subcommand_s3guard >/dev/null 2>/dev/null; then
|
||||||
|
|
||||||
|
if [[ "${HADOOP_SHELL_EXECNAME}" = hadoop ]]; then
|
||||||
|
hadoop_add_subcommand "s3guard" client "manage metadata on S3"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# this can't be indented otherwise shelldocs won't get it
|
||||||
|
|
||||||
|
## @description s3guard command for hadoop
|
||||||
|
## @audience public
|
||||||
|
## @stability stable
|
||||||
|
## @replaceable yes
|
||||||
|
function hadoop_subcommand_s3guard
|
||||||
|
{
|
||||||
|
# shellcheck disable=SC2034
|
||||||
|
HADOOP_CLASSNAME=org.apache.hadoop.fs.s3a.s3guard.S3GuardTool
|
||||||
|
hadoop_add_to_classpath_tools hadoop-aws
|
||||||
|
}
|
||||||
|
|
||||||
|
fi
|
@ -46,6 +46,7 @@ See also:
|
|||||||
|
|
||||||
* [Testing](testing.html)
|
* [Testing](testing.html)
|
||||||
* [Troubleshooting S3a](troubleshooting_s3a.html)
|
* [Troubleshooting S3a](troubleshooting_s3a.html)
|
||||||
|
* [S3Guard](s3guard.html)
|
||||||
|
|
||||||
### Warning #1: Object Stores are not filesystems
|
### Warning #1: Object Stores are not filesystems
|
||||||
|
|
||||||
@ -1552,7 +1553,7 @@ for `fs.s3a.server-side-encryption-algorithm` is `AES256`.
|
|||||||
|
|
||||||
SSE-KMS is where the user specifies a Customer Master Key(CMK) that is used to
|
SSE-KMS is where the user specifies a Customer Master Key(CMK) that is used to
|
||||||
encrypt the objects. The user may specify a specific CMK or leave the
|
encrypt the objects. The user may specify a specific CMK or leave the
|
||||||
`fs.s3a.server-side-encryption-key` empty to use the default auto-generated key
|
`fs.s3a.server-side-encryption.key` empty to use the default auto-generated key
|
||||||
in AWS IAM. Each CMK configured in AWS IAM is region specific, and cannot be
|
in AWS IAM. Each CMK configured in AWS IAM is region specific, and cannot be
|
||||||
used in a in a S3 bucket in a different region. There is can also be policies
|
used in a in a S3 bucket in a different region. There is can also be policies
|
||||||
assigned to the CMK that prohibit or restrict its use for users causing S3A
|
assigned to the CMK that prohibit or restrict its use for users causing S3A
|
||||||
|
@ -0,0 +1,610 @@
|
|||||||
|
<!---
|
||||||
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
you may not use this file except in compliance with the License.
|
||||||
|
You may obtain a copy of the License at
|
||||||
|
|
||||||
|
http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
|
||||||
|
Unless required by applicable law or agreed to in writing, software
|
||||||
|
distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
See the License for the specific language governing permissions and
|
||||||
|
limitations under the License. See accompanying LICENSE file.
|
||||||
|
-->
|
||||||
|
|
||||||
|
# S3Guard: Consistency and Metadata Caching for S3A
|
||||||
|
|
||||||
|
**Experimental Feature**
|
||||||
|
|
||||||
|
<!-- MACRO{toc|fromDepth=0|toDepth=5} -->
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
*S3Guard* is an experimental feature for the S3A client of the S3 object store,
|
||||||
|
which can use a (consistent) database as the store of metadata about objects
|
||||||
|
in an S3 bucket.
|
||||||
|
|
||||||
|
S3Guard
|
||||||
|
|
||||||
|
1. May improve performance on directory listing/scanning operations,
|
||||||
|
including those which take place during the partitioning period of query
|
||||||
|
execution, the process where files are listed and the work divided up amongst
|
||||||
|
processes.
|
||||||
|
|
||||||
|
1. Permits a consistent view of the object store. Without this, changes in
|
||||||
|
objects may not be immediately visible, especially in listing operations.
|
||||||
|
|
||||||
|
1. Offers a platform for future performance improvements for running Hadoop
|
||||||
|
workloads on top of object stores
|
||||||
|
|
||||||
|
The basic idea is that, for each operation in the Hadoop S3 client (s3a) that
|
||||||
|
reads or modifies metadata, a shadow copy of that metadata is stored in a
|
||||||
|
separate MetadataStore implementation. Each MetadataStore implementation
|
||||||
|
offers HDFS-like consistency for the metadata, and may also provide faster
|
||||||
|
lookups for things like file status or directory listings.
|
||||||
|
|
||||||
|
For links to early design documents and related patches, see
|
||||||
|
[HADOOP-13345](https://issues.apache.org/jira/browse/HADOOP-13345).
|
||||||
|
|
||||||
|
*Important*
|
||||||
|
|
||||||
|
* S3Guard is experimental and should be considered unstable.
|
||||||
|
|
||||||
|
* While all underlying data is persisted in S3, if, for some reason,
|
||||||
|
the S3Guard-cached metadata becomes inconsistent with that in S3,
|
||||||
|
queries on the data may become incorrect.
|
||||||
|
For example, new datasets may be omitted, objects may be overwritten,
|
||||||
|
or clients may not be aware that some data has been deleted.
|
||||||
|
It is essential for all clients writing to an S3Guard-enabled
|
||||||
|
S3 Repository to use the feature. Clients reading the data may work directly
|
||||||
|
with the S3A data, in which case the normal S3 consistency guarantees apply.
|
||||||
|
|
||||||
|
|
||||||
|
## Setting up S3Guard
|
||||||
|
|
||||||
|
The latest configuration parameters are defined in `core-default.xml`. You
|
||||||
|
should consult that file for full information, but a summary is provided here.
|
||||||
|
|
||||||
|
|
||||||
|
### 1. Choose the Database
|
||||||
|
|
||||||
|
A core concept of S3Guard is that the directory listing data of the object
|
||||||
|
store, *the metadata* is replicated in a higher-performance, consistent,
|
||||||
|
database. In S3Guard, this database is called *The Metadata Store*
|
||||||
|
|
||||||
|
By default, S3Guard is not enabled.
|
||||||
|
|
||||||
|
The Metadata Store to use in production is bonded to Amazon's DynamoDB
|
||||||
|
database service. The following setting will enable this Metadata Store:
|
||||||
|
|
||||||
|
```xml
|
||||||
|
<property>
|
||||||
|
<name>fs.s3a.metadatastore.impl</name>
|
||||||
|
<value>org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore</value>
|
||||||
|
</property>
|
||||||
|
```
|
||||||
|
|
||||||
|
Note that the `NullMetadataStore` store can be explicitly requested if desired.
|
||||||
|
This offers no metadata storage, and effectively disables S3Guard.
|
||||||
|
|
||||||
|
```xml
|
||||||
|
<property>
|
||||||
|
<name>fs.s3a.metadatastore.impl</name>
|
||||||
|
<value>org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore</value>
|
||||||
|
</property>
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Configure S3Guard Settings
|
||||||
|
|
||||||
|
More settings will may be added in the future.
|
||||||
|
Currently the only Metadata Store-independent setting, besides the
|
||||||
|
implementation class above, is the *allow authoritative* flag.
|
||||||
|
|
||||||
|
It is recommended that you leave the default setting here:
|
||||||
|
|
||||||
|
```xml
|
||||||
|
<property>
|
||||||
|
<name>fs.s3a.metadatastore.authoritative</name>
|
||||||
|
<value>false</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
Setting this to `true` is currently an experimental feature. When true, the
|
||||||
|
S3A client will avoid round-trips to S3 when getting directory listings, if
|
||||||
|
there is a fully-cached version of the directory stored in the Metadata Store.
|
||||||
|
|
||||||
|
Note that if this is set to true, it may exacerbate or persist existing race
|
||||||
|
conditions around multiple concurrent modifications and listings of a given
|
||||||
|
directory tree.
|
||||||
|
|
||||||
|
In particular: **If the Metadata Store is declared as authoritative,
|
||||||
|
all interactions with the S3 bucket(s) must be through S3A clients sharing
|
||||||
|
the same Metadata Store**
|
||||||
|
|
||||||
|
|
||||||
|
### 3. Configure the Metadata Store.
|
||||||
|
|
||||||
|
Here are the `DynamoDBMetadataStore` settings. Other Metadata Store
|
||||||
|
implementations will have their own configuration parameters.
|
||||||
|
|
||||||
|
|
||||||
|
### 4. Name Your Table
|
||||||
|
|
||||||
|
First, choose the name of the table you wish to use for the S3Guard metadata
|
||||||
|
storage in your DynamoDB instance. If you leave it unset/empty, a
|
||||||
|
separate table will be created for each S3 bucket you access, and that
|
||||||
|
bucket's name will be used for the name of the DynamoDB table. For example,
|
||||||
|
this sets the table name to `my-ddb-table-name`
|
||||||
|
|
||||||
|
```xml
|
||||||
|
<property>
|
||||||
|
<name>fs.s3a.s3guard.ddb.table</name>
|
||||||
|
<value>my-ddb-table-name</value>
|
||||||
|
<description>
|
||||||
|
The DynamoDB table name to operate. Without this property, the respective
|
||||||
|
S3 bucket names will be used.
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
```
|
||||||
|
|
||||||
|
It is good to share a table across multiple buckets for multiple reasons.
|
||||||
|
|
||||||
|
1. You are billed for the I/O capacity allocated to the table,
|
||||||
|
*even when the table is not used*. Sharing capacity can reduce costs.
|
||||||
|
|
||||||
|
1. You can share the "provision burden" across the buckets. That is, rather
|
||||||
|
than allocating for the peak load on a single bucket, you can allocate for
|
||||||
|
the peak load *across all the buckets*, which is likely to be significantly
|
||||||
|
lower.
|
||||||
|
|
||||||
|
1. It's easier to measure and tune the load requirements and cost of
|
||||||
|
S3Guard, because there is only one table to review and configure in the
|
||||||
|
AWS management console.
|
||||||
|
|
||||||
|
When wouldn't you want to share a table?
|
||||||
|
|
||||||
|
1. When you do explicitly want to provision I/O capacity to a specific bucket
|
||||||
|
and table, isolated from others.
|
||||||
|
|
||||||
|
1. When you are using separate billing for specific buckets allocated
|
||||||
|
to specific projects.
|
||||||
|
|
||||||
|
1. When different users/roles have different access rights to different buckets.
|
||||||
|
As S3Guard requires all users to have R/W access to the table, all users will
|
||||||
|
be able to list the metadata in all buckets, even those to which they lack
|
||||||
|
read access.
|
||||||
|
|
||||||
|
### 5. Locate your Table
|
||||||
|
|
||||||
|
You may also wish to specify the region to use for DynamoDB. If a region
|
||||||
|
is not configured, S3A will assume that it is in the same region as the S3
|
||||||
|
bucket. A list of regions for the DynamoDB service can be found in
|
||||||
|
[Amazon's documentation](http://docs.aws.amazon.com/general/latest/gr/rande.html#ddb_region).
|
||||||
|
In this example, to use the US West 2 region:
|
||||||
|
|
||||||
|
```xml
|
||||||
|
<property>
|
||||||
|
<name>fs.s3a.s3guard.ddb.region</name>
|
||||||
|
<value>us-west-2</value>
|
||||||
|
</property>
|
||||||
|
```
|
||||||
|
|
||||||
|
When working with S3Guard-managed buckets from EC2 VMs running in AWS
|
||||||
|
infrastructure, using a local DynamoDB region ensures the lowest latency
|
||||||
|
and highest reliability, as well as avoiding all long-haul network charges.
|
||||||
|
The S3Guard tables, and indeed, the S3 buckets, should all be in the same
|
||||||
|
region as the VMs.
|
||||||
|
|
||||||
|
### 6. Optional: Create your Table
|
||||||
|
|
||||||
|
Next, you can choose whether or not the table will be automatically created
|
||||||
|
(if it doesn't already exist). If you want this feature, set the
|
||||||
|
`fs.s3a.s3guard.ddb.table.create` option to `true`.
|
||||||
|
|
||||||
|
```xml
|
||||||
|
<property>
|
||||||
|
<name>fs.s3a.s3guard.ddb.table.create</name>
|
||||||
|
<value>true</value>
|
||||||
|
<description>
|
||||||
|
If true, the S3A client will create the table if it does not already exist.
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
```
|
||||||
|
|
||||||
|
### 7. If creating a table: Set your DynamoDB IO Capacity
|
||||||
|
|
||||||
|
Next, you need to set the DynamoDB read and write throughput requirements you
|
||||||
|
expect to need for your cluster. Setting higher values will cost you more
|
||||||
|
money. *Note* that these settings only affect table creation when
|
||||||
|
`fs.s3a.s3guard.ddb.table.create` is enabled. To change the throughput for
|
||||||
|
an existing table, use the AWS console or CLI tool.
|
||||||
|
|
||||||
|
For more details on DynamoDB capacity units, see the AWS page on [Capacity
|
||||||
|
Unit Calculations](http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/WorkingWithTables.html#CapacityUnitCalculations).
|
||||||
|
|
||||||
|
The charges are incurred per hour for the life of the table, *even when the
|
||||||
|
table and the underlying S3 buckets are not being used*.
|
||||||
|
|
||||||
|
There are also charges incurred for data storage and for data IO outside of the
|
||||||
|
region of the DynamoDB instance. S3Guard only stores metadata in DynamoDB: path names
|
||||||
|
and summary details of objects —the actual data is stored in S3, so billed at S3
|
||||||
|
rates.
|
||||||
|
|
||||||
|
```xml
|
||||||
|
<property>
|
||||||
|
<name>fs.s3a.s3guard.ddb.table.capacity.read</name>
|
||||||
|
<value>500</value>
|
||||||
|
<description>
|
||||||
|
Provisioned throughput requirements for read operations in terms of capacity
|
||||||
|
units for the DynamoDB table. This config value will only be used when
|
||||||
|
creating a new DynamoDB table, though later you can manually provision by
|
||||||
|
increasing or decreasing read capacity as needed for existing tables.
|
||||||
|
See DynamoDB documents for more information.
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>fs.s3a.s3guard.ddb.table.capacity.write</name>
|
||||||
|
<value>100</value>
|
||||||
|
<description>
|
||||||
|
Provisioned throughput requirements for write operations in terms of
|
||||||
|
capacity units for the DynamoDB table. Refer to related config
|
||||||
|
fs.s3a.s3guard.ddb.table.capacity.read before usage.
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
|
```
|
||||||
|
|
||||||
|
Attempting to perform more IO than the capacity requested simply throttles the
|
||||||
|
IO; small capacity numbers are recommended when initially experimenting
|
||||||
|
with S3Guard.
|
||||||
|
|
||||||
|
## Authenticating with S3Guard
|
||||||
|
|
||||||
|
The DynamoDB metadata store takes advantage of the fact that the DynamoDB
|
||||||
|
service uses the same authentication mechanisms as S3. S3Guard
|
||||||
|
gets all its credentials from the S3A client that is using it.
|
||||||
|
|
||||||
|
All existing S3 authentication mechanisms can be used, except for one
|
||||||
|
exception. Credentials placed in URIs are not supported for S3Guard, for security
|
||||||
|
reasons.
|
||||||
|
|
||||||
|
## Per-bucket S3Guard configuration
|
||||||
|
|
||||||
|
In production, it is likely only some buckets will have S3Guard enabled;
|
||||||
|
those which are read-only may have disabled, for example. Equally importantly,
|
||||||
|
buckets in different regions should have different tables, each
|
||||||
|
in the relevant region.
|
||||||
|
|
||||||
|
These options can be managed through S3A's [per-bucket configuration
|
||||||
|
mechanism](./index.html#Configuring_different_S3_buckets).
|
||||||
|
All options with the under `fs.s3a.bucket.BUCKETNAME.KEY` are propagated
|
||||||
|
to the options `fs.s3a.KEY` *for that bucket only*.
|
||||||
|
|
||||||
|
As an example, here is a configuration to use different metadata stores
|
||||||
|
and tables for different buckets
|
||||||
|
|
||||||
|
First, we define shortcuts for the metadata store classnames
|
||||||
|
|
||||||
|
|
||||||
|
```xml
|
||||||
|
<property>
|
||||||
|
<name>s3guard.null</name>
|
||||||
|
<value>org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>s3guard.dynamo</name>
|
||||||
|
<value>org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore</value>
|
||||||
|
</property>
|
||||||
|
```
|
||||||
|
|
||||||
|
Next, Amazon's public landsat database is configured with no
|
||||||
|
metadata store
|
||||||
|
|
||||||
|
```xml
|
||||||
|
<property>
|
||||||
|
<name>fs.s3a.bucket.landsat-pds.metadatastore.impl</name>
|
||||||
|
<value>${s3guard.null}</value>
|
||||||
|
<description>The read-only landsat-pds repository isn't
|
||||||
|
managed by S3Guard</description>
|
||||||
|
</property>
|
||||||
|
```
|
||||||
|
|
||||||
|
Next the `ireland-2` and `ireland-offline` buckets are configured with
|
||||||
|
DynamoDB as the store, and a shared table `production-table`
|
||||||
|
|
||||||
|
|
||||||
|
```xml
|
||||||
|
<property>
|
||||||
|
<name>fs.s3a.bucket.ireland-2.metadatastore.impl</name>
|
||||||
|
<value>${s3guard.dynamo}</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>fs.s3a.bucket.ireland-offline.metadatastore.impl</name>
|
||||||
|
<value>${s3guard.dynamo}</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>fs.s3a.bucket.ireland-2.s3guard.ddb.table</name>
|
||||||
|
<value>production-table</value>
|
||||||
|
</property>
|
||||||
|
```
|
||||||
|
|
||||||
|
The region of this table is automatically set to be that of the buckets,
|
||||||
|
here `eu-west-1`; the same table name may actually be used in different
|
||||||
|
regions.
|
||||||
|
|
||||||
|
Together then, this configuration enables the DynamoDB Metadata Store
|
||||||
|
for two buckets with a shared table, while disabling it for the public
|
||||||
|
bucket.
|
||||||
|
|
||||||
|
|
||||||
|
## S3Guard Command Line Interface (CLI)
|
||||||
|
|
||||||
|
Note that in some cases an AWS region or `s3a://` URI can be provided.
|
||||||
|
|
||||||
|
Metadata store URIs include a scheme that designates the backing store. For
|
||||||
|
example (e.g. `dynamodb://table_name`;). As documented above, the
|
||||||
|
AWS region can be inferred if the URI to an existing bucket is provided.
|
||||||
|
|
||||||
|
|
||||||
|
The S3A URI must also be provided for per-bucket configuration options
|
||||||
|
to be picked up. That is: when an s3a URL is provided on the command line,
|
||||||
|
all its "resolved" per-bucket settings are used to connect to, authenticate
|
||||||
|
with and configure the S3Guard table. If no such URL is provided, then
|
||||||
|
the base settings are picked up.
|
||||||
|
|
||||||
|
|
||||||
|
### Create a table: `s3guard init`
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hadoop s3guard init -meta URI ( -region REGION | s3a://BUCKET )
|
||||||
|
```
|
||||||
|
|
||||||
|
Creates and initializes an empty metadata store.
|
||||||
|
|
||||||
|
A DynamoDB metadata store can be initialized with additional parameters
|
||||||
|
pertaining to [Provisioned Throughput](http://docs.aws.amazon.com/amazondynamodb/latest/developerguide/HowItWorks.ProvisionedThroughput.html):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
[-write PROVISIONED_WRITES] [-read PROVISIONED_READS]
|
||||||
|
```
|
||||||
|
|
||||||
|
Example 1
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hadoop s3guard init -meta dynamodb://ireland-team -write 5 -read 10 s3a://ireland-1
|
||||||
|
```
|
||||||
|
|
||||||
|
Creates a table "ireland-team" with a capacity of 5 for writes, 10 for reads,
|
||||||
|
in the same location as the bucket "ireland-1".
|
||||||
|
|
||||||
|
|
||||||
|
Example 2
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hadoop s3guard init -meta dynamodb://ireland-team -region eu-west-1
|
||||||
|
```
|
||||||
|
|
||||||
|
Creates a table "ireland-team" in the same region "s3-eu-west-1.amazonaws.com"
|
||||||
|
|
||||||
|
|
||||||
|
### Import a bucket: `s3guard import`
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hadoop s3guard import [-meta URI] s3a://BUCKET
|
||||||
|
```
|
||||||
|
|
||||||
|
Pre-populates a metadata store according to the current contents of an S3
|
||||||
|
bucket. If the `-meta` option is omitted, the binding information is taken
|
||||||
|
from the `core-site.xml` configuration.
|
||||||
|
|
||||||
|
Example
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hadoop s3guard import s3a://ireland-1
|
||||||
|
```
|
||||||
|
|
||||||
|
### Audit a table: `s3guard diff`
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hadoop s3guard diff [-meta URI] s3a://BUCKET
|
||||||
|
```
|
||||||
|
|
||||||
|
Lists discrepancies between a metadata store and bucket. Note that depending on
|
||||||
|
how S3Guard is used, certain discrepancies are to be expected.
|
||||||
|
|
||||||
|
Example
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hadoop s3guard diff s3a://ireland-1
|
||||||
|
```
|
||||||
|
|
||||||
|
### Delete a table: `s3guard destroy`
|
||||||
|
|
||||||
|
|
||||||
|
Deletes a metadata store. With DynamoDB as the store, this means
|
||||||
|
the specific DynamoDB table use to store the metadata.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hadoop s3guard destroy [-meta URI] ( -region REGION | s3a://BUCKET )
|
||||||
|
```
|
||||||
|
|
||||||
|
This *does not* delete the bucket, only the S3Guard table which it is bound
|
||||||
|
to.
|
||||||
|
|
||||||
|
|
||||||
|
Examples
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hadoop s3guard destroy s3a://ireland-1
|
||||||
|
```
|
||||||
|
|
||||||
|
Deletes the table which the bucket ireland-1 is configured to use
|
||||||
|
as its MetadataStore.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hadoop s3guard destroy -meta dynamodb://ireland-team -region eu-west-1
|
||||||
|
```
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
### Clean up a table, `s3guard prune`
|
||||||
|
|
||||||
|
Delete all file entries in the MetadataStore table whose object "modification
|
||||||
|
time" is older than the specified age.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hadoop s3guard prune [-days DAYS] [-hours HOURS] [-minutes MINUTES]
|
||||||
|
[-seconds SECONDS] [-m URI] ( -region REGION | s3a://BUCKET )
|
||||||
|
```
|
||||||
|
|
||||||
|
A time value must be supplied.
|
||||||
|
|
||||||
|
1. This does not delete the entries in the bucket itself.
|
||||||
|
1. The modification time is effectively the creation time of the objects
|
||||||
|
in the S3 Bucket.
|
||||||
|
1. Even when an S3A URI is supplied, all entries in the table older than
|
||||||
|
a specific age are deleted — even those from other buckets.
|
||||||
|
|
||||||
|
Example
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hadoop s3guard prune -days 7 s3a://ireland-1
|
||||||
|
```
|
||||||
|
|
||||||
|
Deletes all entries in the S3Guard table for files older than seven days from
|
||||||
|
the table associated with `s3a://ireland-1`.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hadoop s3guard prune -hours 1 -minutes 30 -meta dynamodb://ireland-team -region eu-west-1
|
||||||
|
```
|
||||||
|
|
||||||
|
Delete all entries more than 90 minutes old from the table "ireland-team" in
|
||||||
|
the region "eu-west-1".
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
## Debugging and Error Handling
|
||||||
|
|
||||||
|
If you run into network connectivity issues, or have a machine failure in the
|
||||||
|
middle of an operation, you may end up with your metadata store having state
|
||||||
|
that differs from S3. The S3Guard CLI commands, covered in the CLI section
|
||||||
|
above, can be used to diagnose and repair these issues.
|
||||||
|
|
||||||
|
There are some logs whose log level can be increased to provide more
|
||||||
|
information.
|
||||||
|
|
||||||
|
```properties
|
||||||
|
# Log S3Guard classes
|
||||||
|
log4j.logger.org.apache.hadoop.fs.s3a.s3guard=DEBUG
|
||||||
|
|
||||||
|
# Log all S3A classes
|
||||||
|
log4j.logger.org.apache.hadoop.fs.s3a=DEBUG
|
||||||
|
|
||||||
|
# Enable debug logging of AWS DynamoDB client
|
||||||
|
log4j.logger.com.amazonaws.services.dynamodbv2.AmazonDynamoDB
|
||||||
|
|
||||||
|
# Log all HTTP requests made; includes S3 interaction. This may
|
||||||
|
# include sensitive information such as account IDs in HTTP headers.
|
||||||
|
log4j.logger.com.amazonaws.request=DEBUG
|
||||||
|
|
||||||
|
```
|
||||||
|
|
||||||
|
If all else fails, S3Guard is designed to allow for easy recovery by deleting
|
||||||
|
the metadata store data. In DynamoDB, this can be accomplished by simply
|
||||||
|
deleting the table, and allowing S3Guard to recreate it from scratch. Note
|
||||||
|
that S3Guard tracks recent changes to file metadata to implement consistency.
|
||||||
|
Deleting the metadata store table will simply result in a period of eventual
|
||||||
|
consistency for any file modifications that were made right before the table
|
||||||
|
was deleted.
|
||||||
|
|
||||||
|
### Failure Semantics
|
||||||
|
|
||||||
|
Operations which modify metadata will make changes to S3 first. If, and only
|
||||||
|
if, those operations succeed, the equivalent changes will be made to the
|
||||||
|
Metadata Store.
|
||||||
|
|
||||||
|
These changes to S3 and Metadata Store are not fully-transactional: If the S3
|
||||||
|
operations succeed, and the subsequent Metadata Store updates fail, the S3
|
||||||
|
changes will *not* be rolled back. In this case, an error message will be
|
||||||
|
logged.
|
||||||
|
|
||||||
|
### Versioning
|
||||||
|
|
||||||
|
S3Guard tables are created with a version marker, an entry with the primary
|
||||||
|
key and child entry of `../VERSION`; the use of a relative path guarantees
|
||||||
|
that it will not be resolved.
|
||||||
|
|
||||||
|
#### Versioning policy.
|
||||||
|
|
||||||
|
1. The version number of an S3Guard table will only be incremented when
|
||||||
|
an incompatible change is made to the table structure —that is, the structure
|
||||||
|
has changed so that it is no longer readable by older versions, or because
|
||||||
|
it has added new mandatory fields which older versions do not create.
|
||||||
|
1. The version number of S3Guard tables will only be changed by incrementing
|
||||||
|
the value.
|
||||||
|
1. Updated versions of S3Guard MAY continue to support older version tables.
|
||||||
|
1. If an incompatible change is made such that existing tables are not compatible,
|
||||||
|
then a means shall be provided to update existing tables. For example:
|
||||||
|
an option in the Command Line Interface, or an option to upgrade tables
|
||||||
|
during S3Guard initialization.
|
||||||
|
|
||||||
|
*Note*: this policy does not indicate any intent to upgrade table structures
|
||||||
|
in an incompatible manner. The version marker in tables exists to support
|
||||||
|
such an option if it ever becomes necessary, by ensuring that all S3Guard
|
||||||
|
client can recognise any version mismatch.
|
||||||
|
|
||||||
|
### Security
|
||||||
|
|
||||||
|
All users of the DynamoDB table must have write access to it. This
|
||||||
|
effectively means they must have write access to the entire object store.
|
||||||
|
|
||||||
|
There's not been much testing of using a S3Guard Metadata Store
|
||||||
|
with a read-only S3 Bucket. It *should* work, provided all users
|
||||||
|
have write access to the DynamoDB table. And, as updates to the Metadata Store
|
||||||
|
are only made after successful file creation, deletion and rename, the
|
||||||
|
store is *unlikely* to get out of sync, it is still something which
|
||||||
|
merits more testing before it could be considered reliable.
|
||||||
|
|
||||||
|
### Troubleshooting
|
||||||
|
|
||||||
|
#### Error: `S3Guard table lacks version marker.`
|
||||||
|
|
||||||
|
The table which was intended to be used as a S3guard metadata store
|
||||||
|
does not have any version marker indicating that it is a S3Guard table.
|
||||||
|
|
||||||
|
It may be that this is not a S3Guard table.
|
||||||
|
|
||||||
|
* Make sure that this is the correct table name.
|
||||||
|
* Delete the table, so it can be rebuilt.
|
||||||
|
|
||||||
|
#### Error: `Database table is from an incompatible S3Guard version`
|
||||||
|
|
||||||
|
This indicates that the version of S3Guard which created (or possibly updated)
|
||||||
|
the database table is from a different version that that expected by the S3A
|
||||||
|
client.
|
||||||
|
|
||||||
|
This error will also include the expected and actual version numbers.
|
||||||
|
|
||||||
|
If the expected version is lower than the actual version, then the version
|
||||||
|
of the S3A client library is too old to interact with this S3Guard-managed
|
||||||
|
bucket. Upgrade the application/library.
|
||||||
|
|
||||||
|
If the expected version is higher than the actual version, then the table
|
||||||
|
itself will need upgrading.
|
||||||
|
|
||||||
|
#### Error `"DynamoDB table TABLE does not exist in region REGION; auto-creation is turned off"`
|
||||||
|
|
||||||
|
S3Guard could not find the DynamoDB table for the Metadata Store,
|
||||||
|
and it was not configured to create it. Either the table was missing,
|
||||||
|
or the configuration is preventing S3Guard from finding the table.
|
||||||
|
|
||||||
|
1. Verify that the value of `fs.s3a.s3guard.ddb.table` is correct.
|
||||||
|
1. If the region for an existing table has been set in
|
||||||
|
`fs.s3a.s3guard.ddb.region`, verify that the value is correct.
|
||||||
|
1. If the region is not set, verify that the table exists in the same
|
||||||
|
region as the bucket being used.
|
||||||
|
1. Create the table if necessary.
|
@ -107,6 +107,10 @@ each filesystem for its testing.
|
|||||||
1. `test.fs.s3n.name` : the URL of the bucket for S3n tests
|
1. `test.fs.s3n.name` : the URL of the bucket for S3n tests
|
||||||
1. `test.fs.s3a.name` : the URL of the bucket for S3a tests
|
1. `test.fs.s3a.name` : the URL of the bucket for S3a tests
|
||||||
|
|
||||||
|
*Note* that running s3a and s3n tests in parallel mode, against the same bucket
|
||||||
|
is unreliable. We recommend using separate buckets or testing one connector
|
||||||
|
at a time.
|
||||||
|
|
||||||
The contents of each bucket will be destroyed during the test process:
|
The contents of each bucket will be destroyed during the test process:
|
||||||
do not use the bucket for any purpose other than testing. Furthermore, for
|
do not use the bucket for any purpose other than testing. Furthermore, for
|
||||||
s3a, all in-progress multi-part uploads to the bucket will be aborted at the
|
s3a, all in-progress multi-part uploads to the bucket will be aborted at the
|
||||||
@ -691,7 +695,7 @@ use requires the presence of secret credentials, where tests may be slow,
|
|||||||
and where finding out why something failed from nothing but the test output
|
and where finding out why something failed from nothing but the test output
|
||||||
is critical.
|
is critical.
|
||||||
|
|
||||||
#### Subclasses Existing Shared Base Blasses
|
#### Subclasses Existing Shared Base Classes
|
||||||
|
|
||||||
Extend `AbstractS3ATestBase` or `AbstractSTestS3AHugeFiles` unless justifiable.
|
Extend `AbstractS3ATestBase` or `AbstractSTestS3AHugeFiles` unless justifiable.
|
||||||
These set things up for testing against the object stores, provide good threadnames,
|
These set things up for testing against the object stores, provide good threadnames,
|
||||||
@ -798,7 +802,7 @@ We really appreciate this — you will too.
|
|||||||
|
|
||||||
### How to keep your credentials really safe
|
### How to keep your credentials really safe
|
||||||
|
|
||||||
Although the `auth-keys.xml` file is marged as ignored in git and subversion,
|
Although the `auth-keys.xml` file is marked as ignored in git and subversion,
|
||||||
it is still in your source tree, and there's always that risk that it may
|
it is still in your source tree, and there's always that risk that it may
|
||||||
creep out.
|
creep out.
|
||||||
|
|
||||||
@ -813,3 +817,283 @@ using an absolute XInclude reference to it.
|
|||||||
|
|
||||||
</configuration>
|
</configuration>
|
||||||
```
|
```
|
||||||
|
|
||||||
|
# Failure Injection
|
||||||
|
|
||||||
|
**Warning do not enable any type of failure injection in production. The
|
||||||
|
following settings are for testing only.**
|
||||||
|
|
||||||
|
One of the challenges with S3A integration tests is the fact that S3 is an
|
||||||
|
eventually-consistent storage system. In practice, we rarely see delays in
|
||||||
|
visibility of recently created objects both in listings (`listStatus()`) and
|
||||||
|
when getting a single file's metadata (`getFileStatus()`). Since this behavior
|
||||||
|
is rare and non-deterministic, thorough integration testing is challenging.
|
||||||
|
|
||||||
|
To address this, S3A supports a shim layer on top of the `AmazonS3Client`
|
||||||
|
class which artificially delays certain paths from appearing in listings.
|
||||||
|
This is implemented in the class `InconsistentAmazonS3Client`.
|
||||||
|
|
||||||
|
## Simulating List Inconsistencies
|
||||||
|
|
||||||
|
### Enabling the InconsistentAmazonS3CClient
|
||||||
|
|
||||||
|
There are two ways of enabling the `InconsistentAmazonS3Client`: at
|
||||||
|
config-time, or programmatically. For an example of programmatic test usage,
|
||||||
|
see `ITestS3GuardListConsistency`.
|
||||||
|
|
||||||
|
To enable the fault-injecting client via configuration, switch the
|
||||||
|
S3A client to use the "Inconsistent S3 Client Factory" when connecting to
|
||||||
|
S3:
|
||||||
|
|
||||||
|
```xml
|
||||||
|
<property>
|
||||||
|
<name>fs.s3a.s3.client.factory.impl</name>
|
||||||
|
<value>org.apache.hadoop.fs.s3a.InconsistentS3ClientFactory</value>
|
||||||
|
</property>
|
||||||
|
```
|
||||||
|
|
||||||
|
The inconsistent client works by:
|
||||||
|
|
||||||
|
1. Choosing which objects will be "inconsistent" at the time the object is
|
||||||
|
created or deleted.
|
||||||
|
2. When `listObjects()` is called, any keys that we have marked as
|
||||||
|
inconsistent above will not be returned in the results (until the
|
||||||
|
configured delay has elapsed). Similarly, deleted items may be *added* to
|
||||||
|
missing results to delay the visibility of the delete.
|
||||||
|
|
||||||
|
There are two ways of choosing which keys (filenames) will be affected: By
|
||||||
|
substring, and by random probability.
|
||||||
|
|
||||||
|
```xml
|
||||||
|
<property>
|
||||||
|
<name>fs.s3a.failinject.inconsistency.key.substring</name>
|
||||||
|
<value>DELAY_LISTING_ME</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>fs.s3a.failinject.inconsistency.probability</name>
|
||||||
|
<value>1.0</value>
|
||||||
|
</property>
|
||||||
|
```
|
||||||
|
|
||||||
|
By default, any object which has the substring "DELAY_LISTING_ME" in its key
|
||||||
|
will subject to delayed visibility. For example, the path
|
||||||
|
`s3a://my-bucket/test/DELAY_LISTING_ME/file.txt` would match this condition.
|
||||||
|
To match all keys use the value "\*" (a single asterisk). This is a special
|
||||||
|
value: *We don't support arbitrary wildcards.*
|
||||||
|
|
||||||
|
The default probability of delaying an object is 1.0. This means that *all*
|
||||||
|
keys that match the substring will get delayed visibility. Note that we take
|
||||||
|
the logical *and* of the two conditions (substring matches *and* probability
|
||||||
|
random chance occurs). Here are some example configurations:
|
||||||
|
|
||||||
|
```
|
||||||
|
| substring | probability | behavior |
|
||||||
|
|-----------|-------------|--------------------------------------------|
|
||||||
|
| | 0.001 | An empty <value> tag in .xml config will |
|
||||||
|
| | | be interpreted as unset and revert to the |
|
||||||
|
| | | default value, "DELAY_LISTING_ME" |
|
||||||
|
| | | |
|
||||||
|
| * | 0.001 | 1/1000 chance of *any* key being delayed. |
|
||||||
|
| | | |
|
||||||
|
| delay | 0.01 | 1/100 chance of any key containing "delay" |
|
||||||
|
| | | |
|
||||||
|
| delay | 1.0 | All keys containing substring "delay" .. |
|
||||||
|
```
|
||||||
|
|
||||||
|
You can also configure how long you want the delay in visibility to last.
|
||||||
|
The default is 5000 milliseconds (five seconds).
|
||||||
|
|
||||||
|
```xml
|
||||||
|
<property>
|
||||||
|
<name>fs.s3a.failinject.inconsistency.msec</name>
|
||||||
|
<value>5000</value>
|
||||||
|
</property>
|
||||||
|
```
|
||||||
|
|
||||||
|
Future versions of this client will introduce new failure modes,
|
||||||
|
with simulation of S3 throttling exceptions the next feature under
|
||||||
|
development.
|
||||||
|
|
||||||
|
### Limitations of Inconsistency Injection
|
||||||
|
|
||||||
|
Although `InconsistentAmazonS3Client` can delay the visibility of an object
|
||||||
|
or parent directory, it does not prevent the key of that object from
|
||||||
|
appearing in all prefix searches. For example, if we create the following
|
||||||
|
object with the default configuration above, in an otherwise empty bucket:
|
||||||
|
|
||||||
|
```
|
||||||
|
s3a://bucket/a/b/c/DELAY_LISTING_ME
|
||||||
|
```
|
||||||
|
|
||||||
|
Then the following paths will still be visible as directories (ignoring
|
||||||
|
possible real-world inconsistencies):
|
||||||
|
|
||||||
|
```
|
||||||
|
s3a://bucket/a
|
||||||
|
s3a://bucket/a/b
|
||||||
|
```
|
||||||
|
|
||||||
|
Whereas `getFileStatus()` on the following *will* be subject to delayed
|
||||||
|
visibility (`FileNotFoundException` until delay has elapsed):
|
||||||
|
|
||||||
|
```
|
||||||
|
s3a://bucket/a/b/c
|
||||||
|
s3a://bucket/a/b/c/DELAY_LISTING_ME
|
||||||
|
```
|
||||||
|
|
||||||
|
In real-life S3 inconsistency, however, we expect that all the above paths
|
||||||
|
(including `a` and `b`) will be subject to delayed visiblity.
|
||||||
|
|
||||||
|
### Using the `InconsistentAmazonS3CClient` in downstream integration tests
|
||||||
|
|
||||||
|
The inconsistent client is shipped in the `hadoop-aws` JAR, so it can
|
||||||
|
be used in applications which work with S3 to see how they handle
|
||||||
|
inconsistent directory listings.
|
||||||
|
|
||||||
|
## Testing S3Guard
|
||||||
|
|
||||||
|
The basic strategy for testing S3Guard correctness consists of:
|
||||||
|
|
||||||
|
1. MetadataStore Contract tests.
|
||||||
|
|
||||||
|
The MetadataStore contract tests are inspired by the Hadoop FileSystem and
|
||||||
|
`FileContext` contract tests. Each implementation of the `MetadataStore` interface
|
||||||
|
subclasses the `MetadataStoreTestBase` class and customizes it to initialize
|
||||||
|
their MetadataStore. This test ensures that the different implementations
|
||||||
|
all satisfy the semantics of the MetadataStore API.
|
||||||
|
|
||||||
|
2. Running existing S3A unit and integration tests with S3Guard enabled.
|
||||||
|
|
||||||
|
You can run the S3A integration tests on top of S3Guard by configuring your
|
||||||
|
`MetadataStore` in your
|
||||||
|
`hadoop-tools/hadoop-aws/src/test/resources/core-site.xml` or
|
||||||
|
`hadoop-tools/hadoop-aws/src/test/resources/auth-keys.xml` files.
|
||||||
|
Next run the S3A integration tests as outlined in the *Running the Tests* section
|
||||||
|
of the [S3A documentation](./index.html)
|
||||||
|
|
||||||
|
3. Running fault-injection tests that test S3Guard's consistency features.
|
||||||
|
|
||||||
|
The `ITestS3GuardListConsistency` uses failure injection to ensure
|
||||||
|
that list consistency logic is correct even when the underlying storage is
|
||||||
|
eventually consistent.
|
||||||
|
|
||||||
|
The integration test adds a shim above the Amazon S3 Client layer that injects
|
||||||
|
delays in object visibility.
|
||||||
|
|
||||||
|
All of these tests will be run if you follow the steps listed in step 2 above.
|
||||||
|
|
||||||
|
No charges are incurred for using this store, and its consistency
|
||||||
|
guarantees are that of the underlying object store instance. <!-- :) -->
|
||||||
|
|
||||||
|
## Testing S3A with S3Guard Enabled
|
||||||
|
|
||||||
|
All the S3A tests which work with a private repository can be configured to
|
||||||
|
run with S3Guard by using the `s3guard` profile. When set, this will run
|
||||||
|
all the tests with local memory for the metadata set to "non-authoritative" mode.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
mvn -T 1C verify -Dparallel-tests -DtestsThreadCount=6 -Ds3guard
|
||||||
|
```
|
||||||
|
|
||||||
|
When the `s3guard` profile is enabled, following profiles can be specified:
|
||||||
|
|
||||||
|
* `dynamo`: use an AWS-hosted DynamoDB table; creating the table if it does
|
||||||
|
not exist. You will have to pay the bills for DynamoDB web service.
|
||||||
|
* `dynamodblocal`: use an in-memory DynamoDBLocal server instead of real AWS
|
||||||
|
DynamoDB web service; launch the server and creating the table.
|
||||||
|
You won't be charged bills for using DynamoDB in test. As it runs in-JVM,
|
||||||
|
the table isn't shared across other tests running in parallel.
|
||||||
|
* `non-auth`: treat the S3Guard metadata as authorative.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
mvn -T 1C verify -Dparallel-tests -DtestsThreadCount=6 -Ds3guard -Ddynamo -Dauth
|
||||||
|
```
|
||||||
|
|
||||||
|
When experimenting with options, it is usually best to run a single test suite
|
||||||
|
at a time until the operations appear to be working.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
mvn -T 1C verify -Dtest=skip -Dit.test=ITestS3AMiscOperations -Ds3guard -Ddynamo
|
||||||
|
```
|
||||||
|
|
||||||
|
### Notes
|
||||||
|
|
||||||
|
1. If the `s3guard` profile is not set, then the S3Guard properties are those
|
||||||
|
of the test configuration set in `contract-test-options.xml` or `auth-keys.xml`
|
||||||
|
|
||||||
|
If the `s3guard` profile *is* set,
|
||||||
|
1. The S3Guard options from maven (the dynamo and authoritative flags)
|
||||||
|
overwrite any previously set in the configuration files.
|
||||||
|
1. DynamoDB will be configured to create any missing tables.
|
||||||
|
|
||||||
|
### Warning About Concurrent Tests
|
||||||
|
|
||||||
|
You must not run S3A and S3N tests in parallel on the same bucket. This is
|
||||||
|
especially true when S3Guard is enabled. S3Guard requires that all clients
|
||||||
|
that are modifying the bucket have S3Guard enabled, so having S3N
|
||||||
|
integration tests running in parallel with S3A tests will cause strange
|
||||||
|
failures.
|
||||||
|
|
||||||
|
### Scale Testing MetadataStore Directly
|
||||||
|
|
||||||
|
There are some scale tests that exercise Metadata Store implementations
|
||||||
|
directly. These ensure that S3Guard is are robust to things like DynamoDB
|
||||||
|
throttling, and compare performance for different implementations. These
|
||||||
|
are included in the scale tests executed when `-Dscale` is passed to
|
||||||
|
the maven command line.
|
||||||
|
|
||||||
|
The two S3Guard scale testse are `ITestDynamoDBMetadataStoreScale` and
|
||||||
|
`ITestLocalMetadataStoreScale`. To run the DynamoDB test, you will need to
|
||||||
|
define your table name and region in your test configuration. For example,
|
||||||
|
the following settings allow us to run `ITestDynamoDBMetadataStoreScale` with
|
||||||
|
artificially low read and write capacity provisioned, so we can judge the
|
||||||
|
effects of being throttled by the DynamoDB service:
|
||||||
|
|
||||||
|
```xml
|
||||||
|
<property>
|
||||||
|
<name>scale.test.operation.count</name>
|
||||||
|
<value>10</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>scale.test.directory.count</name>
|
||||||
|
<value>3</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>fs.s3a.scale.test.enabled</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>fs.s3a.s3guard.ddb.table</name>
|
||||||
|
<value>my-scale-test</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>fs.s3a.s3guard.ddb.region</name>
|
||||||
|
<value>us-west-2</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>fs.s3a.s3guard.ddb.table.create</name>
|
||||||
|
<value>true</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>fs.s3a.s3guard.ddb.table.capacity.read</name>
|
||||||
|
<value>10</value>
|
||||||
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>fs.s3a.s3guard.ddb.table.capacity.write</name>
|
||||||
|
<value>10</value>
|
||||||
|
</property>
|
||||||
|
```
|
||||||
|
|
||||||
|
### Testing only: Local Metadata Store
|
||||||
|
|
||||||
|
There is an in-memory Metadata Store for testing.
|
||||||
|
|
||||||
|
```xml
|
||||||
|
<property>
|
||||||
|
<name>fs.s3a.metadatastore.impl</name>
|
||||||
|
<value>org.apache.hadoop.fs.s3a.s3guard.LocalMetadataStore</value>
|
||||||
|
</property>
|
||||||
|
```
|
||||||
|
|
||||||
|
This is not for use in production.
|
||||||
|
@ -22,11 +22,25 @@
|
|||||||
import org.apache.hadoop.fs.contract.AbstractContractCreateTest;
|
import org.apache.hadoop.fs.contract.AbstractContractCreateTest;
|
||||||
import org.apache.hadoop.fs.contract.AbstractFSContract;
|
import org.apache.hadoop.fs.contract.AbstractFSContract;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* S3A contract tests creating files.
|
* S3A contract tests creating files.
|
||||||
*/
|
*/
|
||||||
public class ITestS3AContractCreate extends AbstractContractCreateTest {
|
public class ITestS3AContractCreate extends AbstractContractCreateTest {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a configuration, possibly patching in S3Guard options.
|
||||||
|
* @return a configuration
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
protected Configuration createConfiguration() {
|
||||||
|
Configuration conf = super.createConfiguration();
|
||||||
|
// patch in S3Guard options
|
||||||
|
maybeEnableS3Guard(conf);
|
||||||
|
return conf;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected AbstractFSContract createContract(Configuration conf) {
|
protected AbstractFSContract createContract(Configuration conf) {
|
||||||
return new S3AContract(conf);
|
return new S3AContract(conf);
|
||||||
|
@ -22,11 +22,25 @@
|
|||||||
import org.apache.hadoop.fs.contract.AbstractContractDeleteTest;
|
import org.apache.hadoop.fs.contract.AbstractContractDeleteTest;
|
||||||
import org.apache.hadoop.fs.contract.AbstractFSContract;
|
import org.apache.hadoop.fs.contract.AbstractFSContract;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* S3A contract tests covering deletes.
|
* S3A contract tests covering deletes.
|
||||||
*/
|
*/
|
||||||
public class ITestS3AContractDelete extends AbstractContractDeleteTest {
|
public class ITestS3AContractDelete extends AbstractContractDeleteTest {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a configuration, possibly patching in S3Guard options.
|
||||||
|
* @return a configuration
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
protected Configuration createConfiguration() {
|
||||||
|
Configuration conf = super.createConfiguration();
|
||||||
|
// patch in S3Guard options
|
||||||
|
maybeEnableS3Guard(conf);
|
||||||
|
return conf;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected AbstractFSContract createContract(Configuration conf) {
|
protected AbstractFSContract createContract(Configuration conf) {
|
||||||
return new S3AContract(conf);
|
return new S3AContract(conf);
|
||||||
|
@ -20,6 +20,7 @@
|
|||||||
|
|
||||||
import static org.apache.hadoop.fs.s3a.Constants.*;
|
import static org.apache.hadoop.fs.s3a.Constants.*;
|
||||||
import static org.apache.hadoop.fs.s3a.S3ATestConstants.SCALE_TEST_TIMEOUT_MILLIS;
|
import static org.apache.hadoop.fs.s3a.S3ATestConstants.SCALE_TEST_TIMEOUT_MILLIS;
|
||||||
|
import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard;
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.tools.contract.AbstractContractDistCpTest;
|
import org.apache.hadoop.tools.contract.AbstractContractDistCpTest;
|
||||||
@ -38,12 +39,18 @@ protected int getTestTimeoutMillis() {
|
|||||||
return SCALE_TEST_TIMEOUT_MILLIS;
|
return SCALE_TEST_TIMEOUT_MILLIS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a configuration, possibly patching in S3Guard options.
|
||||||
|
* @return a configuration
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
protected Configuration createConfiguration() {
|
protected Configuration createConfiguration() {
|
||||||
Configuration newConf = super.createConfiguration();
|
Configuration newConf = super.createConfiguration();
|
||||||
newConf.setLong(MULTIPART_SIZE, MULTIPART_SETTING);
|
newConf.setLong(MULTIPART_SIZE, MULTIPART_SETTING);
|
||||||
newConf.setBoolean(FAST_UPLOAD, true);
|
newConf.setBoolean(FAST_UPLOAD, true);
|
||||||
newConf.set(FAST_UPLOAD_BUFFER, FAST_UPLOAD_BUFFER_DISK);
|
newConf.set(FAST_UPLOAD_BUFFER, FAST_UPLOAD_BUFFER_DISK);
|
||||||
|
// patch in S3Guard options
|
||||||
|
maybeEnableS3Guard(newConf);
|
||||||
return newConf;
|
return newConf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -23,6 +23,8 @@
|
|||||||
import org.apache.hadoop.fs.s3a.Constants;
|
import org.apache.hadoop.fs.s3a.Constants;
|
||||||
import org.apache.hadoop.fs.s3a.S3ATestUtils;
|
import org.apache.hadoop.fs.s3a.S3ATestUtils;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* S3A contract tests covering getFileStatus.
|
* S3A contract tests covering getFileStatus.
|
||||||
*/
|
*/
|
||||||
@ -46,6 +48,8 @@ protected Configuration createConfiguration() {
|
|||||||
S3ATestUtils.disableFilesystemCaching(conf);
|
S3ATestUtils.disableFilesystemCaching(conf);
|
||||||
// aggressively low page size forces tests to go multipage
|
// aggressively low page size forces tests to go multipage
|
||||||
conf.setInt(Constants.MAX_PAGING_KEYS, 2);
|
conf.setInt(Constants.MAX_PAGING_KEYS, 2);
|
||||||
|
// patch in S3Guard options
|
||||||
|
maybeEnableS3Guard(conf);
|
||||||
return conf;
|
return conf;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -22,11 +22,25 @@
|
|||||||
import org.apache.hadoop.fs.contract.AbstractContractMkdirTest;
|
import org.apache.hadoop.fs.contract.AbstractContractMkdirTest;
|
||||||
import org.apache.hadoop.fs.contract.AbstractFSContract;
|
import org.apache.hadoop.fs.contract.AbstractFSContract;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test dir operations on S3A.
|
* Test dir operations on S3A.
|
||||||
*/
|
*/
|
||||||
public class ITestS3AContractMkdir extends AbstractContractMkdirTest {
|
public class ITestS3AContractMkdir extends AbstractContractMkdirTest {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a configuration, possibly patching in S3Guard options.
|
||||||
|
* @return a configuration
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
protected Configuration createConfiguration() {
|
||||||
|
Configuration conf = super.createConfiguration();
|
||||||
|
// patch in S3Guard options
|
||||||
|
maybeEnableS3Guard(conf);
|
||||||
|
return conf;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected AbstractFSContract createContract(Configuration conf) {
|
protected AbstractFSContract createContract(Configuration conf) {
|
||||||
return new S3AContract(conf);
|
return new S3AContract(conf);
|
||||||
|
@ -22,11 +22,25 @@
|
|||||||
import org.apache.hadoop.fs.contract.AbstractContractOpenTest;
|
import org.apache.hadoop.fs.contract.AbstractContractOpenTest;
|
||||||
import org.apache.hadoop.fs.contract.AbstractFSContract;
|
import org.apache.hadoop.fs.contract.AbstractFSContract;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* S3A contract tests opening files.
|
* S3A contract tests opening files.
|
||||||
*/
|
*/
|
||||||
public class ITestS3AContractOpen extends AbstractContractOpenTest {
|
public class ITestS3AContractOpen extends AbstractContractOpenTest {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a configuration, possibly patching in S3Guard options.
|
||||||
|
* @return a configuration
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
protected Configuration createConfiguration() {
|
||||||
|
Configuration conf = super.createConfiguration();
|
||||||
|
// patch in S3Guard options
|
||||||
|
maybeEnableS3Guard(conf);
|
||||||
|
return conf;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected AbstractFSContract createContract(Configuration conf) {
|
protected AbstractFSContract createContract(Configuration conf) {
|
||||||
return new S3AContract(conf);
|
return new S3AContract(conf);
|
||||||
|
@ -26,12 +26,25 @@
|
|||||||
|
|
||||||
import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
|
import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
|
||||||
import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset;
|
import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset;
|
||||||
|
import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* S3A contract tests covering rename.
|
* S3A contract tests covering rename.
|
||||||
*/
|
*/
|
||||||
public class ITestS3AContractRename extends AbstractContractRenameTest {
|
public class ITestS3AContractRename extends AbstractContractRenameTest {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a configuration, possibly patching in S3Guard options.
|
||||||
|
* @return a configuration
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
protected Configuration createConfiguration() {
|
||||||
|
Configuration conf = super.createConfiguration();
|
||||||
|
// patch in S3Guard options
|
||||||
|
maybeEnableS3Guard(conf);
|
||||||
|
return conf;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected AbstractFSContract createContract(Configuration conf) {
|
protected AbstractFSContract createContract(Configuration conf) {
|
||||||
return new S3AContract(conf);
|
return new S3AContract(conf);
|
||||||
|
@ -28,6 +28,8 @@
|
|||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* root dir operations against an S3 bucket.
|
* root dir operations against an S3 bucket.
|
||||||
*/
|
*/
|
||||||
@ -37,6 +39,18 @@ public class ITestS3AContractRootDir extends
|
|||||||
private static final Logger LOG =
|
private static final Logger LOG =
|
||||||
LoggerFactory.getLogger(ITestS3AContractRootDir.class);
|
LoggerFactory.getLogger(ITestS3AContractRootDir.class);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a configuration, possibly patching in S3Guard options.
|
||||||
|
* @return a configuration
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
protected Configuration createConfiguration() {
|
||||||
|
Configuration conf = super.createConfiguration();
|
||||||
|
// patch in S3Guard options
|
||||||
|
maybeEnableS3Guard(conf);
|
||||||
|
return conf;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected AbstractFSContract createContract(Configuration conf) {
|
protected AbstractFSContract createContract(Configuration conf) {
|
||||||
return new S3AContract(conf);
|
return new S3AContract(conf);
|
||||||
|
@ -22,11 +22,25 @@
|
|||||||
import org.apache.hadoop.fs.contract.AbstractContractSeekTest;
|
import org.apache.hadoop.fs.contract.AbstractContractSeekTest;
|
||||||
import org.apache.hadoop.fs.contract.AbstractFSContract;
|
import org.apache.hadoop.fs.contract.AbstractFSContract;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* S3A contract tests covering file seek.
|
* S3A contract tests covering file seek.
|
||||||
*/
|
*/
|
||||||
public class ITestS3AContractSeek extends AbstractContractSeekTest {
|
public class ITestS3AContractSeek extends AbstractContractSeekTest {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a configuration, possibly patching in S3Guard options.
|
||||||
|
* @return a configuration
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
protected Configuration createConfiguration() {
|
||||||
|
Configuration conf = super.createConfiguration();
|
||||||
|
// patch in S3Guard options
|
||||||
|
maybeEnableS3Guard(conf);
|
||||||
|
return conf;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected AbstractFSContract createContract(Configuration conf) {
|
protected AbstractFSContract createContract(Configuration conf) {
|
||||||
return new S3AContract(conf);
|
return new S3AContract(conf);
|
||||||
|
@ -26,6 +26,8 @@
|
|||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.s3a.s3guard.MetadataStore;
|
||||||
|
import org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore;
|
||||||
|
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
@ -33,7 +35,8 @@
|
|||||||
import org.junit.rules.ExpectedException;
|
import org.junit.rules.ExpectedException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Abstract base class for S3A unit tests using a mock S3 client.
|
* Abstract base class for S3A unit tests using a mock S3 client and a null
|
||||||
|
* metadata store.
|
||||||
*/
|
*/
|
||||||
public abstract class AbstractS3AMockTest {
|
public abstract class AbstractS3AMockTest {
|
||||||
|
|
||||||
@ -55,6 +58,10 @@ public void setup() throws Exception {
|
|||||||
Configuration conf = new Configuration();
|
Configuration conf = new Configuration();
|
||||||
conf.setClass(S3_CLIENT_FACTORY_IMPL, MockS3ClientFactory.class,
|
conf.setClass(S3_CLIENT_FACTORY_IMPL, MockS3ClientFactory.class,
|
||||||
S3ClientFactory.class);
|
S3ClientFactory.class);
|
||||||
|
// We explicitly disable MetadataStore even if it's configured. For unit
|
||||||
|
// test we don't issue request to AWS DynamoDB service.
|
||||||
|
conf.setClass(S3_METADATA_STORE_IMPL, NullMetadataStore.class,
|
||||||
|
MetadataStore.class);
|
||||||
fs = new S3AFileSystem();
|
fs = new S3AFileSystem();
|
||||||
URI uri = URI.create(FS_S3A + "://" + BUCKET);
|
URI uri = URI.create(FS_S3A + "://" + BUCKET);
|
||||||
fs.initialize(uri, conf);
|
fs.initialize(uri, conf);
|
||||||
|
@ -33,6 +33,7 @@
|
|||||||
|
|
||||||
import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
|
import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
|
||||||
import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset;
|
import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset;
|
||||||
|
import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An extension of the contract test base set up for S3A tests.
|
* An extension of the contract test base set up for S3A tests.
|
||||||
@ -65,6 +66,18 @@ protected int getTestTimeoutMillis() {
|
|||||||
return S3A_TEST_TIMEOUT;
|
return S3A_TEST_TIMEOUT;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a configuration, possibly patching in S3Guard options.
|
||||||
|
* @return a configuration
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
protected Configuration createConfiguration() {
|
||||||
|
Configuration conf = super.createConfiguration();
|
||||||
|
// patch in S3Guard options
|
||||||
|
maybeEnableS3Guard(conf);
|
||||||
|
return conf;
|
||||||
|
}
|
||||||
|
|
||||||
protected Configuration getConfiguration() {
|
protected Configuration getConfiguration() {
|
||||||
return getContract().getConf();
|
return getContract().getConf();
|
||||||
}
|
}
|
||||||
@ -99,10 +112,21 @@ protected void describe(String text, Object... args) {
|
|||||||
*/
|
*/
|
||||||
protected Path writeThenReadFile(String name, int len) throws IOException {
|
protected Path writeThenReadFile(String name, int len) throws IOException {
|
||||||
Path path = path(name);
|
Path path = path(name);
|
||||||
|
writeThenReadFile(path, len);
|
||||||
|
return path;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write a file, read it back, validate the dataset. Overwrites the file
|
||||||
|
* if it is present
|
||||||
|
* @param path path to file
|
||||||
|
* @param len length of file
|
||||||
|
* @throws IOException any IO problem
|
||||||
|
*/
|
||||||
|
protected void writeThenReadFile(Path path, int len) throws IOException {
|
||||||
byte[] data = dataset(len, 'a', 'z');
|
byte[] data = dataset(len, 'a', 'z');
|
||||||
writeDataset(getFileSystem(), path, data, data.length, 1024 * 1024, true);
|
writeDataset(getFileSystem(), path, data, data.length, 1024 * 1024, true);
|
||||||
ContractTestUtils.verifyFileContents(getFileSystem(), path, data);
|
ContractTestUtils.verifyFileContents(getFileSystem(), path, data);
|
||||||
return path;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -140,6 +140,10 @@ public void testBadCredentials() throws Exception {
|
|||||||
createFailingFS(conf);
|
createFailingFS(conf);
|
||||||
} catch (AccessDeniedException e) {
|
} catch (AccessDeniedException e) {
|
||||||
// expected
|
// expected
|
||||||
|
} catch (AWSServiceIOException e) {
|
||||||
|
GenericTestUtils.assertExceptionContains(
|
||||||
|
"UnrecognizedClientException", e);
|
||||||
|
// expected
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -25,6 +25,7 @@
|
|||||||
import org.apache.commons.lang.StringUtils;
|
import org.apache.commons.lang.StringUtils;
|
||||||
import org.apache.commons.lang.reflect.FieldUtils;
|
import org.apache.commons.lang.reflect.FieldUtils;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.fs.contract.ContractTestUtils;
|
import org.apache.hadoop.fs.contract.ContractTestUtils;
|
||||||
import org.apache.hadoop.fs.s3native.S3xLoginHelper;
|
import org.apache.hadoop.fs.s3native.S3xLoginHelper;
|
||||||
@ -483,7 +484,7 @@ public S3AFileSystem run() throws Exception{
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
assertEquals("username", alice, fs.getUsername());
|
assertEquals("username", alice, fs.getUsername());
|
||||||
S3AFileStatus status = fs.getFileStatus(new Path("/"));
|
FileStatus status = fs.getFileStatus(new Path("/"));
|
||||||
assertEquals("owner in " + status, alice, status.getOwner());
|
assertEquals("owner in " + status, alice, status.getOwner());
|
||||||
assertEquals("group in " + status, alice, status.getGroup());
|
assertEquals("group in " + status, alice, status.getGroup());
|
||||||
}
|
}
|
||||||
|
@ -29,6 +29,7 @@
|
|||||||
import org.apache.commons.io.FileUtils;
|
import org.apache.commons.io.FileUtils;
|
||||||
import org.apache.commons.io.IOUtils;
|
import org.apache.commons.io.IOUtils;
|
||||||
import org.apache.hadoop.fs.FileAlreadyExistsException;
|
import org.apache.hadoop.fs.FileAlreadyExistsException;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
|
||||||
import static org.apache.hadoop.test.LambdaTestUtils.intercept;
|
import static org.apache.hadoop.test.LambdaTestUtils.intercept;
|
||||||
@ -63,7 +64,7 @@ public void testCopyFile() throws Throwable {
|
|||||||
Path dest = upload(file, true);
|
Path dest = upload(file, true);
|
||||||
assertPathExists("uploaded file not found", dest);
|
assertPathExists("uploaded file not found", dest);
|
||||||
S3AFileSystem fs = getFileSystem();
|
S3AFileSystem fs = getFileSystem();
|
||||||
S3AFileStatus status = fs.getFileStatus(dest);
|
FileStatus status = fs.getFileStatus(dest);
|
||||||
assertEquals("File length of " + status,
|
assertEquals("File length of " + status,
|
||||||
message.getBytes(ASCII).length, status.getLen());
|
message.getBytes(ASCII).length, status.getLen());
|
||||||
assertFileTextEquals(dest, message);
|
assertFileTextEquals(dest, message);
|
||||||
|
@ -19,6 +19,7 @@
|
|||||||
package org.apache.hadoop.fs.s3a;
|
package org.apache.hadoop.fs.s3a;
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.io.IOUtils;
|
import org.apache.hadoop.io.IOUtils;
|
||||||
import org.junit.After;
|
import org.junit.After;
|
||||||
@ -37,6 +38,7 @@
|
|||||||
import java.nio.file.AccessDeniedException;
|
import java.nio.file.AccessDeniedException;
|
||||||
|
|
||||||
import static org.apache.hadoop.fs.s3a.S3ATestConstants.TEST_FS_S3A_NAME;
|
import static org.apache.hadoop.fs.s3a.S3ATestConstants.TEST_FS_S3A_NAME;
|
||||||
|
import static org.apache.hadoop.fs.s3a.S3ATestUtils.assumeS3GuardState;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests that credentials can go into the URL. This includes a valid
|
* Tests that credentials can go into the URL. This includes a valid
|
||||||
@ -63,6 +65,11 @@ public void teardown() {
|
|||||||
public void testInstantiateFromURL() throws Throwable {
|
public void testInstantiateFromURL() throws Throwable {
|
||||||
|
|
||||||
Configuration conf = new Configuration();
|
Configuration conf = new Configuration();
|
||||||
|
|
||||||
|
// Skip in the case of S3Guard with DynamoDB because it cannot get
|
||||||
|
// credentials for its own use if they're only in S3 URLs
|
||||||
|
assumeS3GuardState(false, conf);
|
||||||
|
|
||||||
String accessKey = conf.get(Constants.ACCESS_KEY);
|
String accessKey = conf.get(Constants.ACCESS_KEY);
|
||||||
String secretKey = conf.get(Constants.SECRET_KEY);
|
String secretKey = conf.get(Constants.SECRET_KEY);
|
||||||
String fsname = conf.getTrimmed(TEST_FS_S3A_NAME, "");
|
String fsname = conf.getTrimmed(TEST_FS_S3A_NAME, "");
|
||||||
@ -84,6 +91,7 @@ public void testInstantiateFromURL() throws Throwable {
|
|||||||
conf.unset(Constants.ACCESS_KEY);
|
conf.unset(Constants.ACCESS_KEY);
|
||||||
conf.unset(Constants.SECRET_KEY);
|
conf.unset(Constants.SECRET_KEY);
|
||||||
fs = S3ATestUtils.createTestFileSystem(conf);
|
fs = S3ATestUtils.createTestFileSystem(conf);
|
||||||
|
|
||||||
String fsURI = fs.getUri().toString();
|
String fsURI = fs.getUri().toString();
|
||||||
assertFalse("FS URI contains a @ symbol", fsURI.contains("@"));
|
assertFalse("FS URI contains a @ symbol", fsURI.contains("@"));
|
||||||
assertFalse("FS URI contains a % symbol", fsURI.contains("%"));
|
assertFalse("FS URI contains a % symbol", fsURI.contains("%"));
|
||||||
@ -119,13 +127,14 @@ public void testInvalidCredentialsFail() throws Throwable {
|
|||||||
Configuration conf = new Configuration();
|
Configuration conf = new Configuration();
|
||||||
String fsname = conf.getTrimmed(TEST_FS_S3A_NAME, "");
|
String fsname = conf.getTrimmed(TEST_FS_S3A_NAME, "");
|
||||||
Assume.assumeNotNull(fsname);
|
Assume.assumeNotNull(fsname);
|
||||||
|
assumeS3GuardState(false, conf);
|
||||||
URI original = new URI(fsname);
|
URI original = new URI(fsname);
|
||||||
URI testURI = createUriWithEmbeddedSecrets(original, "user", "//");
|
URI testURI = createUriWithEmbeddedSecrets(original, "user", "//");
|
||||||
|
|
||||||
conf.set(TEST_FS_S3A_NAME, testURI.toString());
|
conf.set(TEST_FS_S3A_NAME, testURI.toString());
|
||||||
fs = S3ATestUtils.createTestFileSystem(conf);
|
|
||||||
try {
|
try {
|
||||||
S3AFileStatus status = fs.getFileStatus(new Path("/"));
|
fs = S3ATestUtils.createTestFileSystem(conf);
|
||||||
|
FileStatus status = fs.getFileStatus(new Path("/"));
|
||||||
fail("Expected an AccessDeniedException, got " + status);
|
fail("Expected an AccessDeniedException, got " + status);
|
||||||
} catch (AccessDeniedException e) {
|
} catch (AccessDeniedException e) {
|
||||||
// expected
|
// expected
|
||||||
|
@ -0,0 +1,62 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a;
|
||||||
|
|
||||||
|
import org.apache.hadoop.fs.FSDataInputStream;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.contract.ContractTestUtils;
|
||||||
|
import org.apache.hadoop.test.LambdaTestUtils;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.util.concurrent.Callable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests behavior of a FileNotFound error that happens after open(), i.e. on
|
||||||
|
* the first read.
|
||||||
|
*/
|
||||||
|
public class ITestS3ADelayedFNF extends AbstractS3ATestBase {
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* See debugging documentation
|
||||||
|
* <a href="https://cwiki.apache.org/confluence/display/HADOOP/S3A%3A+FileNotFound+Exception+on+Read">here</a>.
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testNotFoundFirstRead() throws Exception {
|
||||||
|
FileSystem fs = getFileSystem();
|
||||||
|
Path p = path("some-file");
|
||||||
|
ContractTestUtils.createFile(fs, p, false, new byte[] {20, 21, 22});
|
||||||
|
|
||||||
|
final FSDataInputStream in = fs.open(p);
|
||||||
|
assertDeleted(p, false);
|
||||||
|
|
||||||
|
// This should fail since we deleted after the open.
|
||||||
|
LambdaTestUtils.intercept(FileNotFoundException.class,
|
||||||
|
new Callable<Integer>() {
|
||||||
|
@Override
|
||||||
|
public Integer call() throws Exception {
|
||||||
|
return in.read();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,83 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a;
|
||||||
|
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.contract.ContractTestUtils;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests which exercise treatment of empty/non-empty directories.
|
||||||
|
*/
|
||||||
|
public class ITestS3AEmptyDirectory extends AbstractS3ATestBase {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDirectoryBecomesEmpty() throws Exception {
|
||||||
|
S3AFileSystem fs = getFileSystem();
|
||||||
|
|
||||||
|
// 1. set up non-empty dir
|
||||||
|
Path dir = path("testEmptyDir");
|
||||||
|
Path child = path("testEmptyDir/dir2");
|
||||||
|
mkdirs(child);
|
||||||
|
|
||||||
|
S3AFileStatus status = getS3AFileStatus(fs, dir);
|
||||||
|
assertEmptyDirectory(false, status);
|
||||||
|
|
||||||
|
// 2. Make testEmptyDir empty
|
||||||
|
assertDeleted(child, false);
|
||||||
|
status = getS3AFileStatus(fs, dir);
|
||||||
|
|
||||||
|
assertEmptyDirectory(true, status);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void assertEmptyDirectory(boolean isEmpty, S3AFileStatus s) {
|
||||||
|
String msg = "dir is empty";
|
||||||
|
// Should *not* be Tristate.UNKNOWN since we request a definitive value
|
||||||
|
// in getS3AFileStatus() below
|
||||||
|
Tristate expected = Tristate.fromBool(isEmpty);
|
||||||
|
assertEquals(msg, expected, s.isEmptyDirectory());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDirectoryBecomesNonEmpty() throws Exception {
|
||||||
|
S3AFileSystem fs = getFileSystem();
|
||||||
|
|
||||||
|
// 1. create empty dir
|
||||||
|
Path dir = path("testEmptyDir");
|
||||||
|
mkdirs(dir);
|
||||||
|
|
||||||
|
S3AFileStatus status = getS3AFileStatus(fs, dir);
|
||||||
|
assertEmptyDirectory(true, status);
|
||||||
|
|
||||||
|
// 2. Make testEmptyDir non-empty
|
||||||
|
|
||||||
|
ContractTestUtils.touch(fs, path("testEmptyDir/file1"));
|
||||||
|
status = getS3AFileStatus(fs, dir);
|
||||||
|
|
||||||
|
assertEmptyDirectory(false, status);
|
||||||
|
}
|
||||||
|
|
||||||
|
private S3AFileStatus getS3AFileStatus(S3AFileSystem fs, Path p) throws
|
||||||
|
IOException {
|
||||||
|
return fs.innerGetFileStatus(p, true /* want isEmptyDirectory value */);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -18,19 +18,21 @@
|
|||||||
|
|
||||||
package org.apache.hadoop.fs.s3a;
|
package org.apache.hadoop.fs.s3a;
|
||||||
|
|
||||||
import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
|
|
||||||
import static org.apache.hadoop.fs.contract.ContractTestUtils.rm;
|
|
||||||
import static org.apache.hadoop.fs.s3a.S3ATestUtils.skipIfEncryptionTestsDisabled;
|
|
||||||
import static org.apache.hadoop.test.LambdaTestUtils.intercept;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.nio.file.AccessDeniedException;
|
||||||
|
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.fs.contract.ContractTestUtils;
|
import org.apache.hadoop.fs.contract.ContractTestUtils;
|
||||||
import org.apache.hadoop.fs.contract.s3a.S3AContract;
|
import org.apache.hadoop.fs.contract.s3a.S3AContract;
|
||||||
import org.junit.Test;
|
import org.apache.hadoop.io.IOUtils;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
|
||||||
|
import static org.apache.hadoop.fs.s3a.S3ATestUtils.*;
|
||||||
|
import static org.apache.hadoop.test.LambdaTestUtils.intercept;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Concrete class that extends {@link AbstractTestS3AEncryption}
|
* Concrete class that extends {@link AbstractTestS3AEncryption}
|
||||||
@ -38,17 +40,39 @@
|
|||||||
*/
|
*/
|
||||||
public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption {
|
public class ITestS3AEncryptionSSEC extends AbstractTestS3AEncryption {
|
||||||
|
|
||||||
|
private static final String SERVICE_AMAZON_S3_STATUS_CODE_403
|
||||||
|
= "Service: Amazon S3; Status Code: 403;";
|
||||||
|
private static final String KEY_1
|
||||||
|
= "4niV/jPK5VFRHY+KNb6wtqYd4xXyMgdJ9XQJpcQUVbs=";
|
||||||
|
private static final String KEY_2
|
||||||
|
= "G61nz31Q7+zpjJWbakxfTOZW4VS0UmQWAq2YXhcTXoo=";
|
||||||
|
private static final String KEY_3
|
||||||
|
= "NTx0dUPrxoo9+LbNiT/gqf3z9jILqL6ilismFmJO50U=";
|
||||||
|
private static final String KEY_4
|
||||||
|
= "msdo3VvvZznp66Gth58a91Hxe/UpExMkwU9BHkIjfW8=";
|
||||||
|
private static final int TEST_FILE_LEN = 2048;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Filesystem created with a different key.
|
||||||
|
*/
|
||||||
|
private FileSystem fsKeyB;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Configuration createConfiguration() {
|
protected Configuration createConfiguration() {
|
||||||
Configuration conf = super.createConfiguration();
|
Configuration conf = super.createConfiguration();
|
||||||
S3ATestUtils.disableFilesystemCaching(conf);
|
disableFilesystemCaching(conf);
|
||||||
conf.set(Constants.SERVER_SIDE_ENCRYPTION_ALGORITHM,
|
conf.set(Constants.SERVER_SIDE_ENCRYPTION_ALGORITHM,
|
||||||
getSSEAlgorithm().getMethod());
|
getSSEAlgorithm().getMethod());
|
||||||
conf.set(Constants.SERVER_SIDE_ENCRYPTION_KEY,
|
conf.set(Constants.SERVER_SIDE_ENCRYPTION_KEY, KEY_1);
|
||||||
"4niV/jPK5VFRHY+KNb6wtqYd4xXyMgdJ9XQJpcQUVbs=");
|
|
||||||
return conf;
|
return conf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void teardown() throws Exception {
|
||||||
|
super.teardown();
|
||||||
|
IOUtils.closeStream(fsKeyB);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This will create and write to a file using encryption key A, then attempt
|
* This will create and write to a file using encryption key A, then attempt
|
||||||
* to read from it again with encryption key B. This will not work as it
|
* to read from it again with encryption key B. This will not work as it
|
||||||
@ -64,26 +88,25 @@ public void testCreateFileAndReadWithDifferentEncryptionKey() throws
|
|||||||
assumeEnabled();
|
assumeEnabled();
|
||||||
skipIfEncryptionTestsDisabled(getConfiguration());
|
skipIfEncryptionTestsDisabled(getConfiguration());
|
||||||
|
|
||||||
final Path[] path = new Path[1];
|
intercept(AccessDeniedException.class,
|
||||||
intercept(java.nio.file.AccessDeniedException.class,
|
SERVICE_AMAZON_S3_STATUS_CODE_403,
|
||||||
"Service: Amazon S3; Status Code: 403;", () -> {
|
() -> {
|
||||||
|
int len = TEST_FILE_LEN;
|
||||||
|
describe("Create an encrypted file of size " + len);
|
||||||
|
Path src = path("testCreateFileAndReadWithDifferentEncryptionKey");
|
||||||
|
writeThenReadFile(src, len);
|
||||||
|
|
||||||
int len = 2048;
|
//extract the test FS
|
||||||
describe("Create an encrypted file of size " + len);
|
fsKeyB = createNewFileSystemWithSSECKey(
|
||||||
String src = createFilename(len);
|
"kX7SdwVc/1VXJr76kfKnkQ3ONYhxianyL2+C3rPVT9s=");
|
||||||
path[0] = writeThenReadFile(src, len);
|
byte[] data = dataset(len, 'a', 'z');
|
||||||
|
ContractTestUtils.verifyFileContents(fsKeyB, src, data);
|
||||||
//extract the test FS
|
return fsKeyB.getFileStatus(src);
|
||||||
FileSystem fileSystem = createNewFileSystemWithSSECKey(
|
});
|
||||||
"kX7SdwVc/1VXJr76kfKnkQ3ONYhxianyL2+C3rPVT9s=");
|
|
||||||
byte[] data = dataset(len, 'a', 'z');
|
|
||||||
ContractTestUtils.verifyFileContents(fileSystem, path[0], data);
|
|
||||||
throw new Exception("Fail");
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* While each object has it's own key and should be distinct, this verifies
|
* While each object has its own key and should be distinct, this verifies
|
||||||
* that hadoop treats object keys as a filesystem path. So if a top level
|
* that hadoop treats object keys as a filesystem path. So if a top level
|
||||||
* dir is encrypted with keyA, a sublevel dir cannot be accessed with a
|
* dir is encrypted with keyA, a sublevel dir cannot be accessed with a
|
||||||
* different keyB.
|
* different keyB.
|
||||||
@ -96,25 +119,20 @@ public void testCreateFileAndReadWithDifferentEncryptionKey() throws
|
|||||||
public void testCreateSubdirWithDifferentKey() throws Exception {
|
public void testCreateSubdirWithDifferentKey() throws Exception {
|
||||||
assumeEnabled();
|
assumeEnabled();
|
||||||
skipIfEncryptionTestsDisabled(getConfiguration());
|
skipIfEncryptionTestsDisabled(getConfiguration());
|
||||||
|
assumeS3GuardState(false, getConfiguration());
|
||||||
|
|
||||||
final Path[] path = new Path[1];
|
intercept(AccessDeniedException.class,
|
||||||
intercept(java.nio.file.AccessDeniedException.class,
|
SERVICE_AMAZON_S3_STATUS_CODE_403,
|
||||||
"Service: Amazon S3; Status Code: 403;", () -> {
|
() -> {
|
||||||
|
Path base = path("testCreateSubdirWithDifferentKey");
|
||||||
path[0] = S3ATestUtils.createTestPath(
|
Path nestedDirectory = new Path(base, "nestedDir");
|
||||||
new Path(createFilename("dir/"))
|
fsKeyB = createNewFileSystemWithSSECKey(
|
||||||
);
|
KEY_2);
|
||||||
Path nestedDirectory = S3ATestUtils.createTestPath(
|
getFileSystem().mkdirs(base);
|
||||||
new Path(createFilename("dir/nestedDir/"))
|
fsKeyB.mkdirs(nestedDirectory);
|
||||||
);
|
// expected to fail
|
||||||
FileSystem fsKeyB = createNewFileSystemWithSSECKey(
|
return fsKeyB.getFileStatus(nestedDirectory);
|
||||||
"G61nz31Q7+zpjJWbakxfTOZW4VS0UmQWAq2YXhcTXoo=");
|
});
|
||||||
getFileSystem().mkdirs(path[0]);
|
|
||||||
fsKeyB.mkdirs(nestedDirectory);
|
|
||||||
|
|
||||||
throw new Exception("Exception should be thrown.");
|
|
||||||
});
|
|
||||||
rm(getFileSystem(), path[0], true, false);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -130,20 +148,17 @@ public void testCreateFileThenMoveWithDifferentSSECKey() throws Exception {
|
|||||||
assumeEnabled();
|
assumeEnabled();
|
||||||
skipIfEncryptionTestsDisabled(getConfiguration());
|
skipIfEncryptionTestsDisabled(getConfiguration());
|
||||||
|
|
||||||
final Path[] path = new Path[1];
|
intercept(AccessDeniedException.class,
|
||||||
intercept(java.nio.file.AccessDeniedException.class,
|
SERVICE_AMAZON_S3_STATUS_CODE_403,
|
||||||
"Service: Amazon S3; Status Code: 403;", () -> {
|
() -> {
|
||||||
|
int len = TEST_FILE_LEN;
|
||||||
int len = 2048;
|
Path src = path(createFilename(len));
|
||||||
String src = createFilename(len);
|
writeThenReadFile(src, len);
|
||||||
path[0] = writeThenReadFile(src, len);
|
fsKeyB = createNewFileSystemWithSSECKey(KEY_3);
|
||||||
|
Path dest = path(createFilename("different-path.txt"));
|
||||||
FileSystem fsKeyB = createNewFileSystemWithSSECKey(
|
getFileSystem().mkdirs(dest.getParent());
|
||||||
"NTx0dUPrxoo9+LbNiT/gqf3z9jILqL6ilismFmJO50U=");
|
return fsKeyB.rename(src, dest);
|
||||||
fsKeyB.rename(path[0], new Path(createFilename("different-path.txt")));
|
});
|
||||||
|
|
||||||
throw new Exception("Exception should be thrown.");
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -157,11 +172,11 @@ public void testRenameFile() throws Exception {
|
|||||||
assumeEnabled();
|
assumeEnabled();
|
||||||
skipIfEncryptionTestsDisabled(getConfiguration());
|
skipIfEncryptionTestsDisabled(getConfiguration());
|
||||||
|
|
||||||
String src = createFilename("original-path.txt");
|
Path src = path("original-path.txt");
|
||||||
Path path = writeThenReadFile(src, 2048);
|
writeThenReadFile(src, TEST_FILE_LEN);
|
||||||
Path newPath = path(createFilename("different-path.txt"));
|
Path newPath = path("different-path.txt");
|
||||||
getFileSystem().rename(path, newPath);
|
getFileSystem().rename(src, newPath);
|
||||||
byte[] data = dataset(2048, 'a', 'z');
|
byte[] data = dataset(TEST_FILE_LEN, 'a', 'z');
|
||||||
ContractTestUtils.verifyFileContents(getFileSystem(), newPath, data);
|
ContractTestUtils.verifyFileContents(getFileSystem(), newPath, data);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -175,30 +190,26 @@ public void testRenameFile() throws Exception {
|
|||||||
public void testListEncryptedDir() throws Exception {
|
public void testListEncryptedDir() throws Exception {
|
||||||
assumeEnabled();
|
assumeEnabled();
|
||||||
skipIfEncryptionTestsDisabled(getConfiguration());
|
skipIfEncryptionTestsDisabled(getConfiguration());
|
||||||
|
assumeS3GuardState(false, getConfiguration());
|
||||||
|
|
||||||
Path nestedDirectory = S3ATestUtils.createTestPath(
|
Path pathABC = path("testListEncryptedDir/a/b/c/");
|
||||||
path(createFilename("/a/b/c/"))
|
Path pathAB = pathABC.getParent();
|
||||||
);
|
Path pathA = pathAB.getParent();
|
||||||
|
|
||||||
|
Path nestedDirectory = createTestPath(pathABC);
|
||||||
assertTrue(getFileSystem().mkdirs(nestedDirectory));
|
assertTrue(getFileSystem().mkdirs(nestedDirectory));
|
||||||
|
|
||||||
FileSystem fsKeyB = createNewFileSystemWithSSECKey(
|
fsKeyB = createNewFileSystemWithSSECKey(KEY_4);
|
||||||
"msdo3VvvZznp66Gth58a91Hxe/UpExMkwU9BHkIjfW8=");
|
|
||||||
|
|
||||||
fsKeyB.listFiles(S3ATestUtils.createTestPath(
|
fsKeyB.listFiles(pathA, true);
|
||||||
path(createFilename("/a/"))
|
fsKeyB.listFiles(pathAB, true);
|
||||||
), true);
|
|
||||||
fsKeyB.listFiles(S3ATestUtils.createTestPath(
|
|
||||||
path(createFilename("/a/b/"))
|
|
||||||
), true);
|
|
||||||
|
|
||||||
//Until this point, no exception is thrown about access
|
//Until this point, no exception is thrown about access
|
||||||
intercept(java.nio.file.AccessDeniedException.class,
|
intercept(AccessDeniedException.class,
|
||||||
"Service: Amazon S3; Status Code: 403;", () -> {
|
SERVICE_AMAZON_S3_STATUS_CODE_403,
|
||||||
fsKeyB.listFiles(S3ATestUtils.createTestPath(
|
() -> {
|
||||||
path(createFilename("/a/b/c/"))
|
fsKeyB.listFiles(pathABC, false);
|
||||||
), false);
|
});
|
||||||
throw new Exception("Exception should be thrown.");
|
|
||||||
});
|
|
||||||
|
|
||||||
Configuration conf = this.createConfiguration();
|
Configuration conf = this.createConfiguration();
|
||||||
conf.unset(Constants.SERVER_SIDE_ENCRYPTION_ALGORITHM);
|
conf.unset(Constants.SERVER_SIDE_ENCRYPTION_ALGORITHM);
|
||||||
@ -209,22 +220,13 @@ public void testListEncryptedDir() throws Exception {
|
|||||||
FileSystem unencryptedFileSystem = contract.getTestFileSystem();
|
FileSystem unencryptedFileSystem = contract.getTestFileSystem();
|
||||||
|
|
||||||
//unencrypted can access until the final directory
|
//unencrypted can access until the final directory
|
||||||
unencryptedFileSystem.listFiles(S3ATestUtils.createTestPath(
|
unencryptedFileSystem.listFiles(pathA, true);
|
||||||
path(createFilename("/a/"))
|
unencryptedFileSystem.listFiles(pathAB, true);
|
||||||
), true);
|
AWSS3IOException ex = intercept(AWSS3IOException.class,
|
||||||
unencryptedFileSystem.listFiles(S3ATestUtils.createTestPath(
|
() -> {
|
||||||
path(createFilename("/a/b/"))
|
unencryptedFileSystem.listFiles(pathABC, false);
|
||||||
), true);
|
});
|
||||||
intercept(org.apache.hadoop.fs.s3a.AWSS3IOException.class,
|
assertStatusCode(ex, 400);
|
||||||
"Bad Request (Service: Amazon S3; Status Code: 400; Error" +
|
|
||||||
" Code: 400 Bad Request;", () -> {
|
|
||||||
|
|
||||||
unencryptedFileSystem.listFiles(S3ATestUtils.createTestPath(
|
|
||||||
path(createFilename("/a/b/c/"))
|
|
||||||
), false);
|
|
||||||
throw new Exception("Exception should be thrown.");
|
|
||||||
});
|
|
||||||
rm(getFileSystem(), path(createFilename("/")), true, false);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -236,31 +238,27 @@ public void testListEncryptedDir() throws Exception {
|
|||||||
public void testListStatusEncryptedDir() throws Exception {
|
public void testListStatusEncryptedDir() throws Exception {
|
||||||
assumeEnabled();
|
assumeEnabled();
|
||||||
skipIfEncryptionTestsDisabled(getConfiguration());
|
skipIfEncryptionTestsDisabled(getConfiguration());
|
||||||
|
assumeS3GuardState(false, getConfiguration());
|
||||||
|
|
||||||
Path nestedDirectory = S3ATestUtils.createTestPath(
|
Path pathABC = path("testListStatusEncryptedDir/a/b/c/");
|
||||||
path(createFilename("/a/b/c/"))
|
Path pathAB = pathABC.getParent();
|
||||||
);
|
Path pathA = pathAB.getParent();
|
||||||
assertTrue(getFileSystem().mkdirs(nestedDirectory));
|
assertTrue(getFileSystem().mkdirs(pathABC));
|
||||||
|
|
||||||
FileSystem fsKeyB = createNewFileSystemWithSSECKey(
|
fsKeyB = createNewFileSystemWithSSECKey(KEY_4);
|
||||||
"msdo3VvvZznp66Gth58a91Hxe/UpExMkwU9BHkIjfW8=");
|
|
||||||
|
|
||||||
fsKeyB.listStatus(S3ATestUtils.createTestPath(
|
fsKeyB.listStatus(pathA);
|
||||||
path(createFilename("/a/"))));
|
fsKeyB.listStatus(pathAB);
|
||||||
fsKeyB.listStatus(S3ATestUtils.createTestPath(
|
|
||||||
path(createFilename("/a/b/"))));
|
|
||||||
|
|
||||||
//Until this point, no exception is thrown about access
|
//Until this point, no exception is thrown about access
|
||||||
intercept(java.nio.file.AccessDeniedException.class,
|
intercept(AccessDeniedException.class,
|
||||||
"Service: Amazon S3; Status Code: 403;", () -> {
|
SERVICE_AMAZON_S3_STATUS_CODE_403,
|
||||||
fsKeyB.listStatus(S3ATestUtils.createTestPath(
|
() -> {
|
||||||
path(createFilename("/a/b/c/"))));
|
fsKeyB.listStatus(pathABC);
|
||||||
|
|
||||||
throw new Exception("Exception should be thrown.");
|
|
||||||
});
|
});
|
||||||
|
|
||||||
//Now try it with an unencrypted filesystem.
|
//Now try it with an unencrypted filesystem.
|
||||||
Configuration conf = this.createConfiguration();
|
Configuration conf = createConfiguration();
|
||||||
conf.unset(Constants.SERVER_SIDE_ENCRYPTION_ALGORITHM);
|
conf.unset(Constants.SERVER_SIDE_ENCRYPTION_ALGORITHM);
|
||||||
conf.unset(Constants.SERVER_SIDE_ENCRYPTION_KEY);
|
conf.unset(Constants.SERVER_SIDE_ENCRYPTION_KEY);
|
||||||
|
|
||||||
@ -269,20 +267,14 @@ public void testListStatusEncryptedDir() throws Exception {
|
|||||||
FileSystem unencryptedFileSystem = contract.getTestFileSystem();
|
FileSystem unencryptedFileSystem = contract.getTestFileSystem();
|
||||||
|
|
||||||
//unencrypted can access until the final directory
|
//unencrypted can access until the final directory
|
||||||
unencryptedFileSystem.listStatus(S3ATestUtils.createTestPath(
|
unencryptedFileSystem.listStatus(pathA);
|
||||||
path(createFilename("/a/"))));
|
unencryptedFileSystem.listStatus(pathAB);
|
||||||
unencryptedFileSystem.listStatus(S3ATestUtils.createTestPath(
|
|
||||||
path(createFilename("/a/b/"))));
|
|
||||||
|
|
||||||
intercept(org.apache.hadoop.fs.s3a.AWSS3IOException.class,
|
AWSS3IOException ex = intercept(AWSS3IOException.class,
|
||||||
"Bad Request (Service: Amazon S3; Status Code: 400; Error Code: 400" +
|
() -> {
|
||||||
" Bad Request;", () -> {
|
unencryptedFileSystem.listStatus(pathABC);
|
||||||
|
|
||||||
unencryptedFileSystem.listStatus(S3ATestUtils.createTestPath(
|
|
||||||
path(createFilename("/a/b/c/"))));
|
|
||||||
throw new Exception("Exception should be thrown.");
|
|
||||||
});
|
});
|
||||||
rm(getFileSystem(), path(createFilename("/")), true, false);
|
assertStatusCode(ex, 400);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -294,31 +286,24 @@ public void testListStatusEncryptedDir() throws Exception {
|
|||||||
public void testListStatusEncryptedFile() throws Exception {
|
public void testListStatusEncryptedFile() throws Exception {
|
||||||
assumeEnabled();
|
assumeEnabled();
|
||||||
skipIfEncryptionTestsDisabled(getConfiguration());
|
skipIfEncryptionTestsDisabled(getConfiguration());
|
||||||
|
assumeS3GuardState(false, getConfiguration());
|
||||||
|
Path pathABC = path("testListStatusEncryptedFile/a/b/c/");
|
||||||
|
assertTrue(getFileSystem().mkdirs(pathABC));
|
||||||
|
|
||||||
Path nestedDirectory = S3ATestUtils.createTestPath(
|
Path fileToStat = new Path(pathABC, "fileToStat.txt");
|
||||||
path(createFilename("/a/b/c/"))
|
writeThenReadFile(fileToStat, TEST_FILE_LEN);
|
||||||
);
|
|
||||||
assertTrue(getFileSystem().mkdirs(nestedDirectory));
|
|
||||||
|
|
||||||
String src = createFilename("/a/b/c/fileToStat.txt");
|
fsKeyB = createNewFileSystemWithSSECKey(KEY_4);
|
||||||
Path fileToStat = writeThenReadFile(src, 2048);
|
|
||||||
|
|
||||||
FileSystem fsKeyB = createNewFileSystemWithSSECKey(
|
|
||||||
"msdo3VvvZznp66Gth58a91Hxe/UpExMkwU9BHkIjfW8=");
|
|
||||||
|
|
||||||
//Until this point, no exception is thrown about access
|
//Until this point, no exception is thrown about access
|
||||||
intercept(java.nio.file.AccessDeniedException.class,
|
intercept(AccessDeniedException.class,
|
||||||
"Service: Amazon S3; Status Code: 403;", () -> {
|
SERVICE_AMAZON_S3_STATUS_CODE_403,
|
||||||
fsKeyB.listStatus(S3ATestUtils.createTestPath(fileToStat));
|
() -> {
|
||||||
|
fsKeyB.listStatus(fileToStat);
|
||||||
throw new Exception("Exception should be thrown.");
|
});
|
||||||
});
|
|
||||||
rm(getFileSystem(), path(createFilename("/")), true, false);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* It is possible to delete directories without the proper encryption key and
|
* It is possible to delete directories without the proper encryption key and
|
||||||
* the hierarchy above it.
|
* the hierarchy above it.
|
||||||
@ -329,31 +314,26 @@ public void testListStatusEncryptedFile() throws Exception {
|
|||||||
public void testDeleteEncryptedObjectWithDifferentKey() throws Exception {
|
public void testDeleteEncryptedObjectWithDifferentKey() throws Exception {
|
||||||
assumeEnabled();
|
assumeEnabled();
|
||||||
skipIfEncryptionTestsDisabled(getConfiguration());
|
skipIfEncryptionTestsDisabled(getConfiguration());
|
||||||
|
assumeS3GuardState(false, getConfiguration());
|
||||||
|
Path pathABC = path("testDeleteEncryptedObjectWithDifferentKey/a/b/c/");
|
||||||
|
|
||||||
Path nestedDirectory = S3ATestUtils.createTestPath(
|
Path pathAB = pathABC.getParent();
|
||||||
path(createFilename("/a/b/c/"))
|
Path pathA = pathAB.getParent();
|
||||||
);
|
assertTrue(getFileSystem().mkdirs(pathABC));
|
||||||
assertTrue(getFileSystem().mkdirs(nestedDirectory));
|
Path fileToDelete = new Path(pathABC, "filetobedeleted.txt");
|
||||||
String src = createFilename("/a/b/c/filetobedeleted.txt");
|
writeThenReadFile(fileToDelete, TEST_FILE_LEN);
|
||||||
Path fileToDelete = writeThenReadFile(src, 2048);
|
fsKeyB = createNewFileSystemWithSSECKey(KEY_4);
|
||||||
|
intercept(AccessDeniedException.class,
|
||||||
FileSystem fsKeyB = createNewFileSystemWithSSECKey(
|
SERVICE_AMAZON_S3_STATUS_CODE_403,
|
||||||
"msdo3VvvZznp66Gth58a91Hxe/UpExMkwU9BHkIjfW8=");
|
() -> {
|
||||||
intercept(java.nio.file.AccessDeniedException.class,
|
fsKeyB.delete(fileToDelete, false);
|
||||||
"Forbidden (Service: Amazon S3; Status Code: 403; Error Code: " +
|
});
|
||||||
"403 Forbidden", () -> {
|
|
||||||
|
|
||||||
fsKeyB.delete(fileToDelete, false);
|
|
||||||
throw new Exception("Exception should be thrown.");
|
|
||||||
});
|
|
||||||
|
|
||||||
//This is possible
|
//This is possible
|
||||||
fsKeyB.delete(S3ATestUtils.createTestPath(
|
fsKeyB.delete(pathABC, true);
|
||||||
path(createFilename("/a/b/c/"))), true);
|
fsKeyB.delete(pathAB, true);
|
||||||
fsKeyB.delete(S3ATestUtils.createTestPath(
|
fsKeyB.delete(pathA, true);
|
||||||
path(createFilename("/a/b/"))), true);
|
assertPathDoesNotExist("expected recursive delete", fileToDelete);
|
||||||
fsKeyB.delete(S3ATestUtils.createTestPath(
|
|
||||||
path(createFilename("/a/"))), true);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private FileSystem createNewFileSystemWithSSECKey(String sseCKey) throws
|
private FileSystem createNewFileSystemWithSSECKey(String sseCKey) throws
|
||||||
@ -371,4 +351,5 @@ private FileSystem createNewFileSystemWithSSECKey(String sseCKey) throws
|
|||||||
protected S3AEncryptionMethods getSSEAlgorithm() {
|
protected S3AEncryptionMethods getSSEAlgorithm() {
|
||||||
return S3AEncryptionMethods.SSE_C;
|
return S3AEncryptionMethods.SSE_C;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
|
|
||||||
package org.apache.hadoop.fs.s3a;
|
package org.apache.hadoop.fs.s3a;
|
||||||
|
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.fs.contract.ContractTestUtils;
|
import org.apache.hadoop.fs.contract.ContractTestUtils;
|
||||||
@ -32,8 +33,8 @@
|
|||||||
import static org.apache.hadoop.fs.contract.ContractTestUtils.*;
|
import static org.apache.hadoop.fs.contract.ContractTestUtils.*;
|
||||||
import static org.apache.hadoop.fs.s3a.Statistic.*;
|
import static org.apache.hadoop.fs.s3a.Statistic.*;
|
||||||
import static org.apache.hadoop.fs.s3a.S3ATestUtils.*;
|
import static org.apache.hadoop.fs.s3a.S3ATestUtils.*;
|
||||||
import static org.apache.hadoop.fs.s3a.S3ATestUtils.MetricDiff;
|
|
||||||
import static org.apache.hadoop.test.GenericTestUtils.getTestDir;
|
import static org.apache.hadoop.test.GenericTestUtils.getTestDir;
|
||||||
|
import static org.junit.Assume.assumeFalse;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Use metrics to assert about the cost of file status queries.
|
* Use metrics to assert about the cost of file status queries.
|
||||||
@ -62,9 +63,11 @@ public void testCostOfGetFileStatusOnFile() throws Throwable {
|
|||||||
S3AFileSystem fs = getFileSystem();
|
S3AFileSystem fs = getFileSystem();
|
||||||
touch(fs, simpleFile);
|
touch(fs, simpleFile);
|
||||||
resetMetricDiffs();
|
resetMetricDiffs();
|
||||||
S3AFileStatus status = fs.getFileStatus(simpleFile);
|
FileStatus status = fs.getFileStatus(simpleFile);
|
||||||
assertTrue("not a file: " + status, status.isFile());
|
assertTrue("not a file: " + status, status.isFile());
|
||||||
metadataRequests.assertDiffEquals(1);
|
if (!fs.hasMetadataStore()) {
|
||||||
|
metadataRequests.assertDiffEquals(1);
|
||||||
|
}
|
||||||
listRequests.assertDiffEquals(0);
|
listRequests.assertDiffEquals(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -79,9 +82,13 @@ public void testCostOfGetFileStatusOnEmptyDir() throws Throwable {
|
|||||||
Path dir = path("empty");
|
Path dir = path("empty");
|
||||||
fs.mkdirs(dir);
|
fs.mkdirs(dir);
|
||||||
resetMetricDiffs();
|
resetMetricDiffs();
|
||||||
S3AFileStatus status = fs.getFileStatus(dir);
|
S3AFileStatus status = fs.innerGetFileStatus(dir, true);
|
||||||
assertTrue("not empty: " + status, status.isEmptyDirectory());
|
assertTrue("not empty: " + status,
|
||||||
metadataRequests.assertDiffEquals(2);
|
status.isEmptyDirectory() == Tristate.TRUE);
|
||||||
|
|
||||||
|
if (!fs.hasMetadataStore()) {
|
||||||
|
metadataRequests.assertDiffEquals(2);
|
||||||
|
}
|
||||||
listRequests.assertDiffEquals(0);
|
listRequests.assertDiffEquals(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -92,7 +99,7 @@ public void testCostOfGetFileStatusOnMissingFile() throws Throwable {
|
|||||||
Path path = path("missing");
|
Path path = path("missing");
|
||||||
resetMetricDiffs();
|
resetMetricDiffs();
|
||||||
try {
|
try {
|
||||||
S3AFileStatus status = fs.getFileStatus(path);
|
FileStatus status = fs.getFileStatus(path);
|
||||||
fail("Got a status back from a missing file path " + status);
|
fail("Got a status back from a missing file path " + status);
|
||||||
} catch (FileNotFoundException expected) {
|
} catch (FileNotFoundException expected) {
|
||||||
// expected
|
// expected
|
||||||
@ -108,7 +115,7 @@ public void testCostOfGetFileStatusOnMissingSubPath() throws Throwable {
|
|||||||
Path path = path("missingdir/missingpath");
|
Path path = path("missingdir/missingpath");
|
||||||
resetMetricDiffs();
|
resetMetricDiffs();
|
||||||
try {
|
try {
|
||||||
S3AFileStatus status = fs.getFileStatus(path);
|
FileStatus status = fs.getFileStatus(path);
|
||||||
fail("Got a status back from a missing file path " + status);
|
fail("Got a status back from a missing file path " + status);
|
||||||
} catch (FileNotFoundException expected) {
|
} catch (FileNotFoundException expected) {
|
||||||
// expected
|
// expected
|
||||||
@ -126,16 +133,18 @@ public void testCostOfGetFileStatusOnNonEmptyDir() throws Throwable {
|
|||||||
Path simpleFile = new Path(dir, "simple.txt");
|
Path simpleFile = new Path(dir, "simple.txt");
|
||||||
touch(fs, simpleFile);
|
touch(fs, simpleFile);
|
||||||
resetMetricDiffs();
|
resetMetricDiffs();
|
||||||
S3AFileStatus status = fs.getFileStatus(dir);
|
S3AFileStatus status = fs.innerGetFileStatus(dir, true);
|
||||||
if (status.isEmptyDirectory()) {
|
if (status.isEmptyDirectory() == Tristate.TRUE) {
|
||||||
// erroneous state
|
// erroneous state
|
||||||
String fsState = fs.toString();
|
String fsState = fs.toString();
|
||||||
fail("FileStatus says directory isempty: " + status
|
fail("FileStatus says directory isempty: " + status
|
||||||
+ "\n" + ContractTestUtils.ls(fs, dir)
|
+ "\n" + ContractTestUtils.ls(fs, dir)
|
||||||
+ "\n" + fsState);
|
+ "\n" + fsState);
|
||||||
}
|
}
|
||||||
metadataRequests.assertDiffEquals(2);
|
if (!fs.hasMetadataStore()) {
|
||||||
listRequests.assertDiffEquals(1);
|
metadataRequests.assertDiffEquals(2);
|
||||||
|
listRequests.assertDiffEquals(1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
@ -187,6 +196,13 @@ public void testFakeDirectoryDeletion() throws Throwable {
|
|||||||
+ "In S3, rename deletes any fake directories as a part of "
|
+ "In S3, rename deletes any fake directories as a part of "
|
||||||
+ "clean up activity");
|
+ "clean up activity");
|
||||||
S3AFileSystem fs = getFileSystem();
|
S3AFileSystem fs = getFileSystem();
|
||||||
|
|
||||||
|
// As this test uses the s3 metrics to count the number of fake directory
|
||||||
|
// operations, it depends on side effects happening internally. With
|
||||||
|
// metadata store enabled, it is brittle to change. We disable this test
|
||||||
|
// before the internal behavior w/ or w/o metadata store.
|
||||||
|
assumeFalse(fs.hasMetadataStore());
|
||||||
|
|
||||||
Path srcBaseDir = path("src");
|
Path srcBaseDir = path("src");
|
||||||
mkdirs(srcBaseDir);
|
mkdirs(srcBaseDir);
|
||||||
MetricDiff deleteRequests =
|
MetricDiff deleteRequests =
|
||||||
|
@ -27,6 +27,7 @@
|
|||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FileSystemContractBaseTest;
|
import org.apache.hadoop.fs.FileSystemContractBaseTest;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
|
||||||
import static org.junit.Assume.*;
|
import static org.junit.Assume.*;
|
||||||
import static org.junit.Assert.*;
|
import static org.junit.Assert.*;
|
||||||
|
|
||||||
|
@ -0,0 +1,100 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.contract.AbstractFSContract;
|
||||||
|
import org.apache.hadoop.fs.contract.s3a.S3AContract;
|
||||||
|
import org.apache.hadoop.test.LambdaTestUtils;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.util.concurrent.Callable;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.fs.contract.ContractTestUtils.touch;
|
||||||
|
import static org.apache.hadoop.fs.s3a.Constants.*;
|
||||||
|
import static org.apache.hadoop.fs.s3a.InconsistentAmazonS3Client.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests S3A behavior under forced inconsistency via {@link
|
||||||
|
* InconsistentAmazonS3Client}.
|
||||||
|
*
|
||||||
|
* These tests are for validating expected behavior *without* S3Guard, but
|
||||||
|
* may also run with S3Guard enabled. For tests that validate S3Guard's
|
||||||
|
* consistency features, see {@link ITestS3GuardListConsistency}.
|
||||||
|
*/
|
||||||
|
public class ITestS3AInconsistency extends AbstractS3ATestBase {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected AbstractFSContract createContract(Configuration conf) {
|
||||||
|
conf.setClass(S3_CLIENT_FACTORY_IMPL, InconsistentS3ClientFactory.class,
|
||||||
|
S3ClientFactory.class);
|
||||||
|
conf.set(FAIL_INJECT_INCONSISTENCY_KEY, DEFAULT_DELAY_KEY_SUBSTRING);
|
||||||
|
conf.setFloat(FAIL_INJECT_INCONSISTENCY_PROBABILITY, 1.0f);
|
||||||
|
conf.setLong(FAIL_INJECT_INCONSISTENCY_MSEC, DEFAULT_DELAY_KEY_MSEC);
|
||||||
|
return new S3AContract(conf);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetFileStatus() throws Exception {
|
||||||
|
S3AFileSystem fs = getFileSystem();
|
||||||
|
|
||||||
|
// 1. Make sure no ancestor dirs exist
|
||||||
|
Path dir = path("ancestor");
|
||||||
|
fs.delete(dir, true);
|
||||||
|
waitUntilDeleted(dir);
|
||||||
|
|
||||||
|
// 2. Create a descendant file, which implicitly creates ancestors
|
||||||
|
// This file has delayed visibility.
|
||||||
|
touch(getFileSystem(),
|
||||||
|
path("ancestor/file-" + DEFAULT_DELAY_KEY_SUBSTRING));
|
||||||
|
|
||||||
|
// 3. Assert expected behavior. If S3Guard is enabled, we should be able
|
||||||
|
// to get status for ancestor. If S3Guard is *not* enabled, S3A will
|
||||||
|
// fail to infer the existence of the ancestor since visibility of the
|
||||||
|
// child file is delayed, and its key prefix search will return nothing.
|
||||||
|
try {
|
||||||
|
FileStatus status = fs.getFileStatus(dir);
|
||||||
|
if (fs.hasMetadataStore()) {
|
||||||
|
assertTrue("Ancestor is dir", status.isDirectory());
|
||||||
|
} else {
|
||||||
|
fail("getFileStatus should fail due to delayed visibility.");
|
||||||
|
}
|
||||||
|
} catch (FileNotFoundException e) {
|
||||||
|
if (fs.hasMetadataStore()) {
|
||||||
|
fail("S3Guard failed to list parent of inconsistent child.");
|
||||||
|
}
|
||||||
|
LOG.info("File not found, as expected.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void waitUntilDeleted(final Path p) throws Exception {
|
||||||
|
LambdaTestUtils.eventually(30 * 1000, 1000,
|
||||||
|
new Callable<Void>() {
|
||||||
|
@Override
|
||||||
|
public Void call() throws Exception {
|
||||||
|
assertPathDoesNotExist("Dir should be deleted", p);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
@ -22,10 +22,17 @@
|
|||||||
import org.apache.hadoop.fs.FileAlreadyExistsException;
|
import org.apache.hadoop.fs.FileAlreadyExistsException;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.fs.contract.ContractTestUtils;
|
import org.apache.hadoop.fs.contract.ContractTestUtils;
|
||||||
|
import org.apache.hadoop.test.LambdaTestUtils;
|
||||||
|
|
||||||
|
import com.amazonaws.services.s3.model.ObjectMetadata;
|
||||||
|
import com.amazonaws.services.s3.model.PutObjectRequest;
|
||||||
|
import com.amazonaws.services.s3.model.PutObjectResult;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.concurrent.Callable;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests of the S3A FileSystem which don't have a specific home and can share
|
* Tests of the S3A FileSystem which don't have a specific home and can share
|
||||||
@ -55,6 +62,26 @@ public void testCreateNonRecursiveParentIsFile() throws IOException {
|
|||||||
createNonRecursive(new Path(parent, "fail"));
|
createNonRecursive(new Path(parent, "fail"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPutObjectDirect() throws Throwable {
|
||||||
|
final S3AFileSystem fs = getFileSystem();
|
||||||
|
ObjectMetadata metadata = fs.newObjectMetadata(-1);
|
||||||
|
metadata.setContentLength(-1);
|
||||||
|
Path path = path("putDirect");
|
||||||
|
final PutObjectRequest put = new PutObjectRequest(fs.getBucket(),
|
||||||
|
path.toUri().getPath(),
|
||||||
|
new ByteArrayInputStream("PUT".getBytes()),
|
||||||
|
metadata);
|
||||||
|
LambdaTestUtils.intercept(IllegalStateException.class,
|
||||||
|
new Callable<PutObjectResult>() {
|
||||||
|
@Override
|
||||||
|
public PutObjectResult call() throws Exception {
|
||||||
|
return fs.putObjectDirect(put);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
assertPathDoesNotExist("put object was created", path);
|
||||||
|
}
|
||||||
|
|
||||||
private FSDataOutputStream createNonRecursive(Path path) throws IOException {
|
private FSDataOutputStream createNonRecursive(Path path) throws IOException {
|
||||||
return getFileSystem().createNonRecursive(path, false, 4096,
|
return getFileSystem().createNonRecursive(path, false, 4096,
|
||||||
(short) 3, (short) 4096,
|
(short) 3, (short) 4096,
|
||||||
|
@ -0,0 +1,61 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a;
|
||||||
|
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.s3a.s3guard.DirListingMetadata;
|
||||||
|
import org.apache.hadoop.fs.s3a.s3guard.MetadataStore;
|
||||||
|
import org.junit.Assume;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.fs.contract.ContractTestUtils.touch;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Home for testing the creation of new files and directories with S3Guard
|
||||||
|
* enabled.
|
||||||
|
*/
|
||||||
|
public class ITestS3GuardCreate extends AbstractS3ATestBase {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that ancestor creation during S3AFileSystem#create() is properly
|
||||||
|
* accounted for in the MetadataStore. This should be handled by the
|
||||||
|
* FileSystem, and be a FS contract test, but S3A does not handle ancestors on
|
||||||
|
* create(), so we need to take care in the S3Guard code to do the right
|
||||||
|
* thing. This may change: See HADOOP-13221 for more detail.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testCreatePopulatesFileAncestors() throws Exception {
|
||||||
|
final S3AFileSystem fs = getFileSystem();
|
||||||
|
Assume.assumeTrue(fs.hasMetadataStore());
|
||||||
|
final MetadataStore ms = fs.getMetadataStore();
|
||||||
|
final Path parent = path("testCreatePopulatesFileAncestors");
|
||||||
|
|
||||||
|
try {
|
||||||
|
fs.mkdirs(parent);
|
||||||
|
final Path nestedFile = new Path(parent, "dir1/dir2/file4");
|
||||||
|
touch(fs, nestedFile);
|
||||||
|
|
||||||
|
DirListingMetadata list = ms.listChildren(parent);
|
||||||
|
assertFalse("MetadataStore falsely reports authoritative empty list",
|
||||||
|
list.isEmpty() == Tristate.TRUE);
|
||||||
|
} finally {
|
||||||
|
fs.delete(parent, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,85 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a;
|
||||||
|
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.s3a.s3guard.MetadataStore;
|
||||||
|
import org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore;
|
||||||
|
import org.junit.Assume;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.fs.contract.ContractTestUtils.touch;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test logic around whether or not a directory is empty, with S3Guard enabled.
|
||||||
|
* The fact that S3AFileStatus has an isEmptyDirectory flag in it makes caching
|
||||||
|
* S3AFileStatus's really tricky, as the flag can change as a side effect of
|
||||||
|
* changes to other paths.
|
||||||
|
* After S3Guard is merged to trunk, we should try to remove the
|
||||||
|
* isEmptyDirectory flag from S3AFileStatus, or maintain it outside
|
||||||
|
* of the MetadataStore.
|
||||||
|
*/
|
||||||
|
public class ITestS3GuardEmptyDirs extends AbstractS3ATestBase {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testEmptyDirs() throws Exception {
|
||||||
|
S3AFileSystem fs = getFileSystem();
|
||||||
|
Assume.assumeTrue(fs.hasMetadataStore());
|
||||||
|
MetadataStore configuredMs = fs.getMetadataStore();
|
||||||
|
Path existingDir = path("existing-dir");
|
||||||
|
Path existingFile = path("existing-dir/existing-file");
|
||||||
|
try {
|
||||||
|
// 1. Simulate files already existing in the bucket before we started our
|
||||||
|
// cluster. Temporarily disable the MetadataStore so it doesn't witness
|
||||||
|
// us creating these files.
|
||||||
|
|
||||||
|
fs.setMetadataStore(new NullMetadataStore());
|
||||||
|
assertTrue(fs.mkdirs(existingDir));
|
||||||
|
touch(fs, existingFile);
|
||||||
|
|
||||||
|
|
||||||
|
// 2. Simulate (from MetadataStore's perspective) starting our cluster and
|
||||||
|
// creating a file in an existing directory.
|
||||||
|
fs.setMetadataStore(configuredMs); // "start cluster"
|
||||||
|
Path newFile = path("existing-dir/new-file");
|
||||||
|
touch(fs, newFile);
|
||||||
|
|
||||||
|
S3AFileStatus status = fs.innerGetFileStatus(existingDir, true);
|
||||||
|
assertEquals("Should not be empty dir", Tristate.FALSE,
|
||||||
|
status.isEmptyDirectory());
|
||||||
|
|
||||||
|
// 3. Assert that removing the only file the MetadataStore witnessed
|
||||||
|
// being created doesn't cause it to think the directory is now empty.
|
||||||
|
fs.delete(newFile, false);
|
||||||
|
status = fs.innerGetFileStatus(existingDir, true);
|
||||||
|
assertEquals("Should not be empty dir", Tristate.FALSE,
|
||||||
|
status.isEmptyDirectory());
|
||||||
|
|
||||||
|
// 4. Assert that removing the final file, that existed "before"
|
||||||
|
// MetadataStore started, *does* cause the directory to be marked empty.
|
||||||
|
fs.delete(existingFile, false);
|
||||||
|
status = fs.innerGetFileStatus(existingDir, true);
|
||||||
|
assertEquals("Should be empty dir now", Tristate.TRUE,
|
||||||
|
status.isEmptyDirectory());
|
||||||
|
} finally {
|
||||||
|
configuredMs.forgetMetadata(existingFile);
|
||||||
|
configuredMs.forgetMetadata(existingDir);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,544 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a;
|
||||||
|
|
||||||
|
import com.amazonaws.services.s3.model.ObjectListing;
|
||||||
|
import com.amazonaws.services.s3.AmazonS3;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
|
import org.apache.hadoop.fs.LocatedFileStatus;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.RemoteIterator;
|
||||||
|
import org.apache.hadoop.fs.contract.AbstractFSContract;
|
||||||
|
import org.apache.hadoop.fs.contract.s3a.S3AContract;
|
||||||
|
import org.junit.Assume;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.fs.contract.ContractTestUtils.touch;
|
||||||
|
import static org.apache.hadoop.fs.contract.ContractTestUtils.writeTextFile;
|
||||||
|
import static org.apache.hadoop.fs.s3a.Constants.*;
|
||||||
|
import static org.apache.hadoop.fs.s3a.InconsistentAmazonS3Client.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test S3Guard list consistency feature by injecting delayed listObjects()
|
||||||
|
* visibility via {@link InconsistentAmazonS3Client}.
|
||||||
|
*
|
||||||
|
* Tests here generally:
|
||||||
|
* 1. Use the inconsistency injection mentioned above.
|
||||||
|
* 2. Only run when S3Guard is enabled.
|
||||||
|
*/
|
||||||
|
public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected AbstractFSContract createContract(Configuration conf) {
|
||||||
|
conf.setClass(S3_CLIENT_FACTORY_IMPL, InconsistentS3ClientFactory.class,
|
||||||
|
S3ClientFactory.class);
|
||||||
|
// Other configs would break test assumptions
|
||||||
|
conf.set(FAIL_INJECT_INCONSISTENCY_KEY, DEFAULT_DELAY_KEY_SUBSTRING);
|
||||||
|
conf.setFloat(FAIL_INJECT_INCONSISTENCY_PROBABILITY, 1.0f);
|
||||||
|
conf.setLong(FAIL_INJECT_INCONSISTENCY_MSEC, DEFAULT_DELAY_KEY_MSEC);
|
||||||
|
return new S3AContract(conf);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper function for other test cases: does a single rename operation and
|
||||||
|
* validates the aftermath.
|
||||||
|
* @param mkdirs Directories to create
|
||||||
|
* @param srcdirs Source paths for rename operation
|
||||||
|
* @param dstdirs Destination paths for rename operation
|
||||||
|
* @param yesdirs Files that must exist post-rename (e.g. srcdirs children)
|
||||||
|
* @param nodirs Files that must not exist post-rename (e.g. dstdirs children)
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
private void doTestRenameSequence(Path[] mkdirs, Path[] srcdirs,
|
||||||
|
Path[] dstdirs, Path[] yesdirs, Path[] nodirs) throws Exception {
|
||||||
|
S3AFileSystem fs = getFileSystem();
|
||||||
|
Assume.assumeTrue(fs.hasMetadataStore());
|
||||||
|
|
||||||
|
if (mkdirs != null) {
|
||||||
|
for (Path mkdir : mkdirs) {
|
||||||
|
assertTrue(fs.mkdirs(mkdir));
|
||||||
|
}
|
||||||
|
clearInconsistency(fs);
|
||||||
|
}
|
||||||
|
|
||||||
|
assertTrue("srcdirs and dstdirs must have equal length",
|
||||||
|
srcdirs.length == dstdirs.length);
|
||||||
|
for (int i = 0; i < srcdirs.length; i++) {
|
||||||
|
assertTrue("Rename returned false: " + srcdirs[i] + " -> " + dstdirs[i],
|
||||||
|
fs.rename(srcdirs[i], dstdirs[i]));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (Path yesdir : yesdirs) {
|
||||||
|
assertTrue("Path was supposed to exist: " + yesdir, fs.exists(yesdir));
|
||||||
|
}
|
||||||
|
for (Path nodir : nodirs) {
|
||||||
|
assertFalse("Path is not supposed to exist: " + nodir, fs.exists(nodir));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests that after renaming a directory, the original directory and its
|
||||||
|
* contents are indeed missing and the corresponding new paths are visible.
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testConsistentListAfterRename() throws Exception {
|
||||||
|
Path[] mkdirs = {
|
||||||
|
path("d1/f"),
|
||||||
|
path("d1/f" + DEFAULT_DELAY_KEY_SUBSTRING)
|
||||||
|
};
|
||||||
|
Path[] srcdirs = {path("d1")};
|
||||||
|
Path[] dstdirs = {path("d2")};
|
||||||
|
Path[] yesdirs = {path("d2"), path("d2/f"),
|
||||||
|
path("d2/f" + DEFAULT_DELAY_KEY_SUBSTRING)};
|
||||||
|
Path[] nodirs = {path("d1"), path("d1/f"),
|
||||||
|
path("d1/f" + DEFAULT_DELAY_KEY_SUBSTRING)};
|
||||||
|
doTestRenameSequence(mkdirs, srcdirs, dstdirs, yesdirs, nodirs);
|
||||||
|
getFileSystem().delete(path("d1"), true);
|
||||||
|
getFileSystem().delete(path("d2"), true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests a circular sequence of renames to verify that overwriting recently
|
||||||
|
* deleted files and reading recently created files from rename operations
|
||||||
|
* works as expected.
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testRollingRenames() throws Exception {
|
||||||
|
Path[] dir0 = {path("rolling/1")};
|
||||||
|
Path[] dir1 = {path("rolling/2")};
|
||||||
|
Path[] dir2 = {path("rolling/3")};
|
||||||
|
// These sets have to be in reverse order compared to the movement
|
||||||
|
Path[] setA = {dir1[0], dir0[0]};
|
||||||
|
Path[] setB = {dir2[0], dir1[0]};
|
||||||
|
Path[] setC = {dir0[0], dir2[0]};
|
||||||
|
|
||||||
|
for(int i = 0; i < 2; i++) {
|
||||||
|
Path[] firstSet = i == 0 ? setA : null;
|
||||||
|
doTestRenameSequence(firstSet, setA, setB, setB, dir0);
|
||||||
|
doTestRenameSequence(null, setB, setC, setC, dir1);
|
||||||
|
doTestRenameSequence(null, setC, setA, setA, dir2);
|
||||||
|
}
|
||||||
|
|
||||||
|
S3AFileSystem fs = getFileSystem();
|
||||||
|
assertFalse("Renaming deleted file should have failed",
|
||||||
|
fs.rename(dir2[0], dir1[0]));
|
||||||
|
assertTrue("Renaming over existing file should have succeeded",
|
||||||
|
fs.rename(dir1[0], dir0[0]));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests that deleted files immediately stop manifesting in list operations
|
||||||
|
* even when the effect in S3 is delayed.
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testConsistentListAfterDelete() throws Exception {
|
||||||
|
S3AFileSystem fs = getFileSystem();
|
||||||
|
// test will fail if NullMetadataStore (the default) is configured: skip it.
|
||||||
|
Assume.assumeTrue(fs.hasMetadataStore());
|
||||||
|
|
||||||
|
// Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed
|
||||||
|
// in listObjects() results via InconsistentS3Client
|
||||||
|
Path inconsistentPath =
|
||||||
|
path("a/b/dir3-" + DEFAULT_DELAY_KEY_SUBSTRING);
|
||||||
|
|
||||||
|
Path[] testDirs = {path("a/b/dir1"),
|
||||||
|
path("a/b/dir2"),
|
||||||
|
inconsistentPath};
|
||||||
|
|
||||||
|
for (Path path : testDirs) {
|
||||||
|
assertTrue(fs.mkdirs(path));
|
||||||
|
}
|
||||||
|
clearInconsistency(fs);
|
||||||
|
for (Path path : testDirs) {
|
||||||
|
assertTrue(fs.delete(path, false));
|
||||||
|
}
|
||||||
|
|
||||||
|
FileStatus[] paths = fs.listStatus(path("a/b/"));
|
||||||
|
List<Path> list = new ArrayList<>();
|
||||||
|
for (FileStatus fileState : paths) {
|
||||||
|
list.add(fileState.getPath());
|
||||||
|
}
|
||||||
|
assertFalse(list.contains(path("a/b/dir1")));
|
||||||
|
assertFalse(list.contains(path("a/b/dir2")));
|
||||||
|
// This should fail without S3Guard, and succeed with it.
|
||||||
|
assertFalse(list.contains(inconsistentPath));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests that rename immediately after files in the source directory are
|
||||||
|
* deleted results in exactly the correct set of destination files and none
|
||||||
|
* of the source files.
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testConsistentRenameAfterDelete() throws Exception {
|
||||||
|
S3AFileSystem fs = getFileSystem();
|
||||||
|
// test will fail if NullMetadataStore (the default) is configured: skip it.
|
||||||
|
Assume.assumeTrue(fs.hasMetadataStore());
|
||||||
|
|
||||||
|
// Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed
|
||||||
|
// in listObjects() results via InconsistentS3Client
|
||||||
|
Path inconsistentPath =
|
||||||
|
path("a/b/dir3-" + DEFAULT_DELAY_KEY_SUBSTRING);
|
||||||
|
|
||||||
|
Path[] testDirs = {path("a/b/dir1"),
|
||||||
|
path("a/b/dir2"),
|
||||||
|
inconsistentPath};
|
||||||
|
|
||||||
|
for (Path path : testDirs) {
|
||||||
|
assertTrue(fs.mkdirs(path));
|
||||||
|
}
|
||||||
|
clearInconsistency(fs);
|
||||||
|
assertTrue(fs.delete(testDirs[1], false));
|
||||||
|
assertTrue(fs.delete(testDirs[2], false));
|
||||||
|
|
||||||
|
fs.rename(path("a"), path("a3"));
|
||||||
|
FileStatus[] paths = fs.listStatus(path("a3/b"));
|
||||||
|
List<Path> list = new ArrayList<>();
|
||||||
|
for (FileStatus fileState : paths) {
|
||||||
|
list.add(fileState.getPath());
|
||||||
|
}
|
||||||
|
assertTrue(list.contains(path("a3/b/dir1")));
|
||||||
|
assertFalse(list.contains(path("a3/b/dir2")));
|
||||||
|
// This should fail without S3Guard, and succeed with it.
|
||||||
|
assertFalse(list.contains(path("a3/b/dir3-" +
|
||||||
|
DEFAULT_DELAY_KEY_SUBSTRING)));
|
||||||
|
|
||||||
|
try {
|
||||||
|
RemoteIterator<LocatedFileStatus> old = fs.listFilesAndEmptyDirectories(
|
||||||
|
path("a"), true);
|
||||||
|
fail("Recently renamed dir should not be visible");
|
||||||
|
} catch(FileNotFoundException e) {
|
||||||
|
// expected
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testConsistentListStatusAfterPut() throws Exception {
|
||||||
|
|
||||||
|
S3AFileSystem fs = getFileSystem();
|
||||||
|
|
||||||
|
// This test will fail if NullMetadataStore (the default) is configured:
|
||||||
|
// skip it.
|
||||||
|
Assume.assumeTrue(fs.hasMetadataStore());
|
||||||
|
|
||||||
|
// Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed
|
||||||
|
// in listObjects() results via InconsistentS3Client
|
||||||
|
Path inconsistentPath =
|
||||||
|
path("a/b/dir3-" + DEFAULT_DELAY_KEY_SUBSTRING);
|
||||||
|
|
||||||
|
Path[] testDirs = {path("a/b/dir1"),
|
||||||
|
path("a/b/dir2"),
|
||||||
|
inconsistentPath};
|
||||||
|
|
||||||
|
for (Path path : testDirs) {
|
||||||
|
assertTrue(fs.mkdirs(path));
|
||||||
|
}
|
||||||
|
|
||||||
|
FileStatus[] paths = fs.listStatus(path("a/b/"));
|
||||||
|
List<Path> list = new ArrayList<>();
|
||||||
|
for (FileStatus fileState : paths) {
|
||||||
|
list.add(fileState.getPath());
|
||||||
|
}
|
||||||
|
assertTrue(list.contains(path("a/b/dir1")));
|
||||||
|
assertTrue(list.contains(path("a/b/dir2")));
|
||||||
|
// This should fail without S3Guard, and succeed with it.
|
||||||
|
assertTrue(list.contains(inconsistentPath));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Similar to {@link #testConsistentListStatusAfterPut()}, this tests that the
|
||||||
|
* FS listLocatedStatus() call will return consistent list.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testConsistentListLocatedStatusAfterPut() throws Exception {
|
||||||
|
final S3AFileSystem fs = getFileSystem();
|
||||||
|
// This test will fail if NullMetadataStore (the default) is configured:
|
||||||
|
// skip it.
|
||||||
|
Assume.assumeTrue(fs.hasMetadataStore());
|
||||||
|
String rootDir = "doTestConsistentListLocatedStatusAfterPut";
|
||||||
|
fs.mkdirs(path(rootDir));
|
||||||
|
|
||||||
|
final int[] numOfPaths = {0, 1, 5};
|
||||||
|
for (int normalPathNum : numOfPaths) {
|
||||||
|
for (int delayedPathNum : new int[] {0, 2}) {
|
||||||
|
LOG.info("Testing with normalPathNum={}, delayedPathNum={}",
|
||||||
|
normalPathNum, delayedPathNum);
|
||||||
|
doTestConsistentListLocatedStatusAfterPut(fs, rootDir, normalPathNum,
|
||||||
|
delayedPathNum);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper method to implement the tests of consistent listLocatedStatus().
|
||||||
|
* @param fs The S3 file system from contract
|
||||||
|
* @param normalPathNum number paths listed directly from S3 without delaying
|
||||||
|
* @param delayedPathNum number paths listed with delaying
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
private void doTestConsistentListLocatedStatusAfterPut(S3AFileSystem fs,
|
||||||
|
String rootDir, int normalPathNum, int delayedPathNum) throws Exception {
|
||||||
|
final List<Path> testDirs = new ArrayList<>(normalPathNum + delayedPathNum);
|
||||||
|
int index = 0;
|
||||||
|
for (; index < normalPathNum; index++) {
|
||||||
|
testDirs.add(path(rootDir + "/dir-" +
|
||||||
|
index));
|
||||||
|
}
|
||||||
|
for (; index < normalPathNum + delayedPathNum; index++) {
|
||||||
|
// Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed
|
||||||
|
// in listObjects() results via InconsistentS3Client
|
||||||
|
testDirs.add(path(rootDir + "/dir-" + index +
|
||||||
|
DEFAULT_DELAY_KEY_SUBSTRING));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (Path path : testDirs) {
|
||||||
|
// delete the old test path (if any) so that when we call mkdirs() later,
|
||||||
|
// the to delay directories will be tracked via putObject() request.
|
||||||
|
fs.delete(path, true);
|
||||||
|
assertTrue(fs.mkdirs(path));
|
||||||
|
}
|
||||||
|
|
||||||
|
// this should return the union data from S3 and MetadataStore
|
||||||
|
final RemoteIterator<LocatedFileStatus> statusIterator =
|
||||||
|
fs.listLocatedStatus(path(rootDir + "/"));
|
||||||
|
List<Path> list = new ArrayList<>();
|
||||||
|
for (; statusIterator.hasNext();) {
|
||||||
|
list.add(statusIterator.next().getPath());
|
||||||
|
}
|
||||||
|
|
||||||
|
// This should fail without S3Guard, and succeed with it because part of the
|
||||||
|
// children under test path are delaying visibility
|
||||||
|
for (Path path : testDirs) {
|
||||||
|
assertTrue("listLocatedStatus should list " + path, list.contains(path));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests that the S3AFS listFiles() call will return consistent file list.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testConsistentListFiles() throws Exception {
|
||||||
|
final S3AFileSystem fs = getFileSystem();
|
||||||
|
// This test will fail if NullMetadataStore (the default) is configured:
|
||||||
|
// skip it.
|
||||||
|
Assume.assumeTrue(fs.hasMetadataStore());
|
||||||
|
|
||||||
|
final int[] numOfPaths = {0, 2};
|
||||||
|
for (int dirNum : numOfPaths) {
|
||||||
|
for (int normalFile : numOfPaths) {
|
||||||
|
for (int delayedFile : new int[] {0, 1}) {
|
||||||
|
for (boolean recursive : new boolean[] {true, false}) {
|
||||||
|
doTestListFiles(fs, dirNum, normalFile, delayedFile, recursive);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper method to implement the tests of consistent listFiles().
|
||||||
|
*
|
||||||
|
* The file structure has dirNum subdirectories, and each directory (including
|
||||||
|
* the test base directory itself) has normalFileNum normal files and
|
||||||
|
* delayedFileNum delayed files.
|
||||||
|
*
|
||||||
|
* @param fs The S3 file system from contract
|
||||||
|
* @param dirNum number of subdirectories
|
||||||
|
* @param normalFileNum number files in each directory without delay to list
|
||||||
|
* @param delayedFileNum number files in each directory with delay to list
|
||||||
|
* @param recursive listFiles recursively if true
|
||||||
|
* @throws Exception if any unexpected error
|
||||||
|
*/
|
||||||
|
private void doTestListFiles(S3AFileSystem fs, int dirNum, int normalFileNum,
|
||||||
|
int delayedFileNum, boolean recursive) throws Exception {
|
||||||
|
describe("Testing dirNum=%d, normalFile=%d, delayedFile=%d, "
|
||||||
|
+ "recursive=%s", dirNum, normalFileNum, delayedFileNum, recursive);
|
||||||
|
final Path baseTestDir = path("doTestListFiles-" + dirNum + "-"
|
||||||
|
+ normalFileNum + "-" + delayedFileNum + "-" + recursive);
|
||||||
|
// delete the old test path (if any) so that when we call mkdirs() later,
|
||||||
|
// the to delay sub directories will be tracked via putObject() request.
|
||||||
|
fs.delete(baseTestDir, true);
|
||||||
|
|
||||||
|
// make subdirectories (if any)
|
||||||
|
final List<Path> testDirs = new ArrayList<>(dirNum + 1);
|
||||||
|
assertTrue(fs.mkdirs(baseTestDir));
|
||||||
|
testDirs.add(baseTestDir);
|
||||||
|
for (int i = 0; i < dirNum; i++) {
|
||||||
|
final Path subdir = path(baseTestDir + "/dir-" + i);
|
||||||
|
assertTrue(fs.mkdirs(subdir));
|
||||||
|
testDirs.add(subdir);
|
||||||
|
}
|
||||||
|
|
||||||
|
final Collection<String> fileNames
|
||||||
|
= new ArrayList<>(normalFileNum + delayedFileNum);
|
||||||
|
int index = 0;
|
||||||
|
for (; index < normalFileNum; index++) {
|
||||||
|
fileNames.add("file-" + index);
|
||||||
|
}
|
||||||
|
for (; index < normalFileNum + delayedFileNum; index++) {
|
||||||
|
// Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed
|
||||||
|
// in listObjects() results via InconsistentS3Client
|
||||||
|
fileNames.add("file-" + index + "-" + DEFAULT_DELAY_KEY_SUBSTRING);
|
||||||
|
}
|
||||||
|
|
||||||
|
int filesAndEmptyDirectories = 0;
|
||||||
|
|
||||||
|
// create files under each test directory
|
||||||
|
for (Path dir : testDirs) {
|
||||||
|
for (String fileName : fileNames) {
|
||||||
|
writeTextFile(fs, new Path(dir, fileName), "I, " + fileName, false);
|
||||||
|
filesAndEmptyDirectories++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// this should return the union data from S3 and MetadataStore
|
||||||
|
final RemoteIterator<LocatedFileStatus> statusIterator
|
||||||
|
= fs.listFiles(baseTestDir, recursive);
|
||||||
|
final Collection<Path> listedFiles = new HashSet<>();
|
||||||
|
for (; statusIterator.hasNext();) {
|
||||||
|
final FileStatus status = statusIterator.next();
|
||||||
|
assertTrue("FileStatus " + status + " is not a file!", status.isFile());
|
||||||
|
listedFiles.add(status.getPath());
|
||||||
|
}
|
||||||
|
LOG.info("S3AFileSystem::listFiles('{}', {}) -> {}",
|
||||||
|
baseTestDir, recursive, listedFiles);
|
||||||
|
|
||||||
|
// This should fail without S3Guard, and succeed with it because part of the
|
||||||
|
// files to list are delaying visibility
|
||||||
|
if (!recursive) {
|
||||||
|
// in this case only the top level files are listed
|
||||||
|
assertEquals("Unexpected number of files returned by listFiles() call",
|
||||||
|
normalFileNum + delayedFileNum, listedFiles.size());
|
||||||
|
verifyFileIsListed(listedFiles, baseTestDir, fileNames);
|
||||||
|
} else {
|
||||||
|
assertEquals("Unexpected number of files returned by listFiles() call",
|
||||||
|
filesAndEmptyDirectories,
|
||||||
|
listedFiles.size());
|
||||||
|
for (Path dir : testDirs) {
|
||||||
|
verifyFileIsListed(listedFiles, dir, fileNames);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void verifyFileIsListed(Collection<Path> listedFiles,
|
||||||
|
Path currentDir, Collection<String> fileNames) {
|
||||||
|
for (String fileName : fileNames) {
|
||||||
|
final Path file = new Path(currentDir, fileName);
|
||||||
|
assertTrue(file + " should have been listed", listedFiles.contains(file));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCommitByRenameOperations() throws Throwable {
|
||||||
|
S3AFileSystem fs = getFileSystem();
|
||||||
|
Assume.assumeTrue(fs.hasMetadataStore());
|
||||||
|
Path work = path("test-commit-by-rename-" + DEFAULT_DELAY_KEY_SUBSTRING);
|
||||||
|
Path task00 = new Path(work, "task00");
|
||||||
|
fs.mkdirs(task00);
|
||||||
|
String name = "part-00";
|
||||||
|
try (FSDataOutputStream out =
|
||||||
|
fs.create(new Path(task00, name), false)) {
|
||||||
|
out.writeChars("hello");
|
||||||
|
}
|
||||||
|
for (FileStatus stat : fs.listStatus(task00)) {
|
||||||
|
fs.rename(stat.getPath(), work);
|
||||||
|
}
|
||||||
|
List<FileStatus> files = new ArrayList<>(2);
|
||||||
|
for (FileStatus stat : fs.listStatus(work)) {
|
||||||
|
if (stat.isFile()) {
|
||||||
|
files.add(stat);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assertFalse("renamed file " + name + " not found in " + work,
|
||||||
|
files.isEmpty());
|
||||||
|
assertEquals("more files found than expected in " + work
|
||||||
|
+ " " + ls(work), 1, files.size());
|
||||||
|
FileStatus status = files.get(0);
|
||||||
|
assertEquals("Wrong filename in " + status,
|
||||||
|
name, status.getPath().getName());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testInconsistentS3ClientDeletes() throws Throwable {
|
||||||
|
S3AFileSystem fs = getFileSystem();
|
||||||
|
Path root = path("testInconsistentClient" + DEFAULT_DELAY_KEY_SUBSTRING);
|
||||||
|
for (int i = 0; i < 3; i++) {
|
||||||
|
fs.mkdirs(new Path(root, "dir" + i));
|
||||||
|
touch(fs, new Path(root, "file" + i));
|
||||||
|
for (int j = 0; j < 3; j++) {
|
||||||
|
touch(fs, new Path(new Path(root, "dir" + i), "file" + i + "-" + j));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
clearInconsistency(fs);
|
||||||
|
|
||||||
|
AmazonS3 client = fs.getAmazonS3Client();
|
||||||
|
String key = fs.pathToKey(root) + "/";
|
||||||
|
|
||||||
|
ObjectListing preDeleteDelimited = client.listObjects(
|
||||||
|
fs.createListObjectsRequest(key, "/"));
|
||||||
|
ObjectListing preDeleteUndelimited = client.listObjects(
|
||||||
|
fs.createListObjectsRequest(key, null));
|
||||||
|
|
||||||
|
fs.delete(root, true);
|
||||||
|
|
||||||
|
ObjectListing postDeleteDelimited = client.listObjects(
|
||||||
|
fs.createListObjectsRequest(key, "/"));
|
||||||
|
ObjectListing postDeleteUndelimited = client.listObjects(
|
||||||
|
fs.createListObjectsRequest(key, null));
|
||||||
|
|
||||||
|
assertEquals("InconsistentAmazonS3Client added back objects incorrectly " +
|
||||||
|
"in a non-recursive listing",
|
||||||
|
preDeleteDelimited.getObjectSummaries().size(),
|
||||||
|
postDeleteDelimited.getObjectSummaries().size()
|
||||||
|
);
|
||||||
|
assertEquals("InconsistentAmazonS3Client added back prefixes incorrectly " +
|
||||||
|
"in a non-recursive listing",
|
||||||
|
preDeleteDelimited.getCommonPrefixes().size(),
|
||||||
|
postDeleteDelimited.getCommonPrefixes().size()
|
||||||
|
);
|
||||||
|
assertEquals("InconsistentAmazonS3Client added back objects incorrectly " +
|
||||||
|
"in a recursive listing",
|
||||||
|
preDeleteUndelimited.getObjectSummaries().size(),
|
||||||
|
postDeleteUndelimited.getObjectSummaries().size()
|
||||||
|
);
|
||||||
|
assertEquals("InconsistentAmazonS3Client added back prefixes incorrectly " +
|
||||||
|
"in a recursive listing",
|
||||||
|
preDeleteUndelimited.getCommonPrefixes().size(),
|
||||||
|
postDeleteUndelimited.getCommonPrefixes().size()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void clearInconsistency(S3AFileSystem fs) throws Exception {
|
||||||
|
AmazonS3 s3 = fs.getAmazonS3Client();
|
||||||
|
InconsistentAmazonS3Client ic = InconsistentAmazonS3Client.castFrom(s3);
|
||||||
|
ic.clearInconsistency();
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,141 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.s3a.s3guard.DirListingMetadata;
|
||||||
|
import org.junit.Assume;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.URI;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test cases that validate S3Guard's behavior for writing things like
|
||||||
|
* directory listings back to the MetadataStore.
|
||||||
|
*/
|
||||||
|
public class ITestS3GuardWriteBack extends AbstractS3ATestBase {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* In listStatus(), when S3Guard is enabled, the full listing for a
|
||||||
|
* directory is "written back" to the MetadataStore before the listing is
|
||||||
|
* returned. Currently this "write back" behavior occurs when
|
||||||
|
* fs.s3a.metadatastore.authoritative is true. This test validates this
|
||||||
|
* behavior.
|
||||||
|
* @throws Exception on failure
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testListStatusWriteBack() throws Exception {
|
||||||
|
Assume.assumeTrue(getFileSystem().hasMetadataStore());
|
||||||
|
|
||||||
|
Path directory = path("ListStatusWriteBack");
|
||||||
|
|
||||||
|
// "raw" S3AFileSystem without S3Guard
|
||||||
|
S3AFileSystem noS3Guard = createTestFS(directory.toUri(), true, false);
|
||||||
|
|
||||||
|
// Another with S3Guard and write-back disabled
|
||||||
|
S3AFileSystem noWriteBack = createTestFS(directory.toUri(), false, false);
|
||||||
|
|
||||||
|
// Another S3Guard and write-back enabled
|
||||||
|
S3AFileSystem yesWriteBack = createTestFS(directory.toUri(), false, true);
|
||||||
|
|
||||||
|
// delete the existing directory (in case of last test failure)
|
||||||
|
noS3Guard.delete(directory, true);
|
||||||
|
// Create a directory on S3 only
|
||||||
|
noS3Guard.mkdirs(new Path(directory, "OnS3"));
|
||||||
|
// Create a directory on both S3 and metadata store
|
||||||
|
Path p = new Path(directory, "OnS3AndMS");
|
||||||
|
assertPathDoesntExist(noWriteBack, p);
|
||||||
|
noWriteBack.mkdirs(p);
|
||||||
|
|
||||||
|
FileStatus[] fsResults;
|
||||||
|
DirListingMetadata mdResults;
|
||||||
|
|
||||||
|
// FS should return both even though S3Guard is not writing back to MS
|
||||||
|
fsResults = noWriteBack.listStatus(directory);
|
||||||
|
assertEquals("Filesystem enabled S3Guard without write back should have "
|
||||||
|
+ "both /OnS3 and /OnS3AndMS: " + Arrays.toString(fsResults),
|
||||||
|
2, fsResults.length);
|
||||||
|
|
||||||
|
// Metadata store without write-back should still only contain /OnS3AndMS,
|
||||||
|
// because newly discovered /OnS3 is not written back to metadata store
|
||||||
|
mdResults = noWriteBack.getMetadataStore().listChildren(directory);
|
||||||
|
assertEquals("Metadata store without write back should still only know "
|
||||||
|
+ "about /OnS3AndMS, but it has: " + mdResults,
|
||||||
|
1, mdResults.numEntries());
|
||||||
|
|
||||||
|
// FS should return both (and will write it back)
|
||||||
|
fsResults = yesWriteBack.listStatus(directory);
|
||||||
|
assertEquals("Filesystem enabled S3Guard with write back should have "
|
||||||
|
+ " both /OnS3 and /OnS3AndMS: " + Arrays.toString(fsResults),
|
||||||
|
2, fsResults.length);
|
||||||
|
|
||||||
|
// Metadata store with write-back should contain both because the newly
|
||||||
|
// discovered /OnS3 should have been written back to metadata store
|
||||||
|
mdResults = yesWriteBack.getMetadataStore().listChildren(directory);
|
||||||
|
assertEquals("Unexpected number of results from metadata store. "
|
||||||
|
+ "Should have /OnS3 and /OnS3AndMS: " + mdResults,
|
||||||
|
2, mdResults.numEntries());
|
||||||
|
|
||||||
|
// If we don't clean this up, the next test run will fail because it will
|
||||||
|
// have recorded /OnS3 being deleted even after it's written to noS3Guard.
|
||||||
|
getFileSystem().getMetadataStore().forgetMetadata(
|
||||||
|
new Path(directory, "OnS3"));
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Create a separate S3AFileSystem instance for testing. */
|
||||||
|
private S3AFileSystem createTestFS(URI fsURI, boolean disableS3Guard,
|
||||||
|
boolean authoritativeMeta) throws IOException {
|
||||||
|
Configuration conf;
|
||||||
|
|
||||||
|
// Create a FileSystem that is S3-backed only
|
||||||
|
conf = createConfiguration();
|
||||||
|
S3ATestUtils.disableFilesystemCaching(conf);
|
||||||
|
if (disableS3Guard) {
|
||||||
|
conf.set(Constants.S3_METADATA_STORE_IMPL,
|
||||||
|
Constants.S3GUARD_METASTORE_NULL);
|
||||||
|
} else {
|
||||||
|
S3ATestUtils.maybeEnableS3Guard(conf);
|
||||||
|
conf.setBoolean(Constants.METADATASTORE_AUTHORITATIVE, authoritativeMeta);
|
||||||
|
}
|
||||||
|
FileSystem fs = FileSystem.get(fsURI, conf);
|
||||||
|
return asS3AFS(fs);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static S3AFileSystem asS3AFS(FileSystem fs) {
|
||||||
|
assertTrue("Not a S3AFileSystem: " + fs, fs instanceof S3AFileSystem);
|
||||||
|
return (S3AFileSystem)fs;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void assertPathDoesntExist(FileSystem fs, Path p)
|
||||||
|
throws IOException {
|
||||||
|
try {
|
||||||
|
FileStatus s = fs.getFileStatus(p);
|
||||||
|
} catch (FileNotFoundException e) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
fail("Path should not exist: " + p);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -23,6 +23,7 @@
|
|||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
|
|
||||||
import com.amazonaws.services.s3.AmazonS3;
|
import com.amazonaws.services.s3.AmazonS3;
|
||||||
|
import com.amazonaws.services.s3.model.Region;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An {@link S3ClientFactory} that returns Mockito mocks of the {@link AmazonS3}
|
* An {@link S3ClientFactory} that returns Mockito mocks of the {@link AmazonS3}
|
||||||
@ -35,6 +36,8 @@ public AmazonS3 createS3Client(URI name) {
|
|||||||
String bucket = name.getHost();
|
String bucket = name.getHost();
|
||||||
AmazonS3 s3 = mock(AmazonS3.class);
|
AmazonS3 s3 = mock(AmazonS3.class);
|
||||||
when(s3.doesBucketExist(bucket)).thenReturn(true);
|
when(s3.doesBucketExist(bucket)).thenReturn(true);
|
||||||
|
when(s3.getBucketLocation(anyString()))
|
||||||
|
.thenReturn(Region.US_West.toString());
|
||||||
return s3;
|
return s3;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -134,6 +134,18 @@ public interface S3ATestConstants {
|
|||||||
String TEST_STS_ENABLED = "test.fs.s3a.sts.enabled";
|
String TEST_STS_ENABLED = "test.fs.s3a.sts.enabled";
|
||||||
String TEST_STS_ENDPOINT = "test.fs.s3a.sts.endpoint";
|
String TEST_STS_ENDPOINT = "test.fs.s3a.sts.endpoint";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Various S3Guard tests.
|
||||||
|
*/
|
||||||
|
String TEST_S3GUARD_PREFIX = "fs.s3a.s3guard.test";
|
||||||
|
String TEST_S3GUARD_ENABLED = TEST_S3GUARD_PREFIX + ".enabled";
|
||||||
|
String TEST_S3GUARD_AUTHORITATIVE = TEST_S3GUARD_PREFIX + ".authoritative";
|
||||||
|
String TEST_S3GUARD_IMPLEMENTATION = TEST_S3GUARD_PREFIX + ".implementation";
|
||||||
|
String TEST_S3GUARD_IMPLEMENTATION_LOCAL = "local";
|
||||||
|
String TEST_S3GUARD_IMPLEMENTATION_DYNAMO = "dynamo";
|
||||||
|
String TEST_S3GUARD_IMPLEMENTATION_DYNAMODBLOCAL = "dynamodblocal";
|
||||||
|
String TEST_S3GUARD_IMPLEMENTATION_NONE = "none";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Timeout in Milliseconds for standard tests: {@value}.
|
* Timeout in Milliseconds for standard tests: {@value}.
|
||||||
*/
|
*/
|
||||||
|
@ -22,7 +22,14 @@
|
|||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
import org.apache.hadoop.fs.FileContext;
|
import org.apache.hadoop.fs.FileContext;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.permission.FsPermission;
|
||||||
|
import org.apache.hadoop.fs.s3a.s3guard.DynamoDBClientFactory;
|
||||||
|
import org.apache.hadoop.fs.s3a.s3guard.DynamoDBLocalClientFactory;
|
||||||
|
import org.apache.hadoop.fs.s3a.s3guard.S3Guard;
|
||||||
|
|
||||||
|
import org.hamcrest.core.Is;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Assume;
|
import org.junit.Assume;
|
||||||
import org.junit.internal.AssumptionViolatedException;
|
import org.junit.internal.AssumptionViolatedException;
|
||||||
@ -31,11 +38,13 @@
|
|||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
|
import java.net.URISyntaxException;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import static org.apache.hadoop.fs.contract.ContractTestUtils.skip;
|
import static org.apache.hadoop.fs.contract.ContractTestUtils.skip;
|
||||||
import static org.apache.hadoop.fs.s3a.S3ATestConstants.*;
|
import static org.apache.hadoop.fs.s3a.S3ATestConstants.*;
|
||||||
import static org.apache.hadoop.fs.s3a.Constants.*;
|
import static org.apache.hadoop.fs.s3a.Constants.*;
|
||||||
|
import static org.apache.hadoop.fs.s3a.S3AUtils.propagateBucketOptions;
|
||||||
import static org.junit.Assert.*;
|
import static org.junit.Assert.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -51,6 +60,15 @@ public final class S3ATestUtils {
|
|||||||
*/
|
*/
|
||||||
public static final String UNSET_PROPERTY = "unset";
|
public static final String UNSET_PROPERTY = "unset";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get S3A FS name.
|
||||||
|
* @param conf configuration.
|
||||||
|
* @return S3A fs name.
|
||||||
|
*/
|
||||||
|
public static String getFsName(Configuration conf) {
|
||||||
|
return conf.getTrimmed(TEST_FS_S3A_NAME, "");
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create the test filesystem.
|
* Create the test filesystem.
|
||||||
*
|
*
|
||||||
@ -97,6 +115,8 @@ public static S3AFileSystem createTestFileSystem(Configuration conf,
|
|||||||
throw new AssumptionViolatedException(
|
throw new AssumptionViolatedException(
|
||||||
"No test filesystem in " + TEST_FS_S3A_NAME);
|
"No test filesystem in " + TEST_FS_S3A_NAME);
|
||||||
}
|
}
|
||||||
|
// patch in S3Guard options
|
||||||
|
maybeEnableS3Guard(conf);
|
||||||
S3AFileSystem fs1 = new S3AFileSystem();
|
S3AFileSystem fs1 = new S3AFileSystem();
|
||||||
//enable purging in tests
|
//enable purging in tests
|
||||||
if (purge) {
|
if (purge) {
|
||||||
@ -137,6 +157,8 @@ public static FileContext createTestFileContext(Configuration conf)
|
|||||||
throw new AssumptionViolatedException("No test filesystem in "
|
throw new AssumptionViolatedException("No test filesystem in "
|
||||||
+ TEST_FS_S3A_NAME);
|
+ TEST_FS_S3A_NAME);
|
||||||
}
|
}
|
||||||
|
// patch in S3Guard options
|
||||||
|
maybeEnableS3Guard(conf);
|
||||||
FileContext fc = FileContext.getFileContext(testURI, conf);
|
FileContext fc = FileContext.getFileContext(testURI, conf);
|
||||||
return fc;
|
return fc;
|
||||||
}
|
}
|
||||||
@ -301,12 +323,95 @@ public static void skipIfEncryptionTestsDisabled(
|
|||||||
* @return a path
|
* @return a path
|
||||||
*/
|
*/
|
||||||
public static Path createTestPath(Path defVal) {
|
public static Path createTestPath(Path defVal) {
|
||||||
String testUniqueForkId = System.getProperty(
|
String testUniqueForkId =
|
||||||
S3ATestConstants.TEST_UNIQUE_FORK_ID);
|
System.getProperty(S3ATestConstants.TEST_UNIQUE_FORK_ID);
|
||||||
return testUniqueForkId == null ? defVal :
|
return testUniqueForkId == null ? defVal :
|
||||||
new Path("/" + testUniqueForkId, "test");
|
new Path("/" + testUniqueForkId, "test");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test assumption that S3Guard is/is not enabled.
|
||||||
|
* @param shouldBeEnabled should S3Guard be enabled?
|
||||||
|
* @param originalConf configuration to check
|
||||||
|
* @throws URISyntaxException
|
||||||
|
*/
|
||||||
|
public static void assumeS3GuardState(boolean shouldBeEnabled,
|
||||||
|
Configuration originalConf) throws URISyntaxException {
|
||||||
|
boolean isEnabled = getTestPropertyBool(originalConf, TEST_S3GUARD_ENABLED,
|
||||||
|
originalConf.getBoolean(TEST_S3GUARD_ENABLED, false));
|
||||||
|
Assume.assumeThat("Unexpected S3Guard test state:"
|
||||||
|
+ " shouldBeEnabled=" + shouldBeEnabled
|
||||||
|
+ " and isEnabled=" + isEnabled,
|
||||||
|
shouldBeEnabled, Is.is(isEnabled));
|
||||||
|
|
||||||
|
final String fsname = originalConf.getTrimmed(TEST_FS_S3A_NAME);
|
||||||
|
Assume.assumeNotNull(fsname);
|
||||||
|
final String bucket = new URI(fsname).getHost();
|
||||||
|
final Configuration conf = propagateBucketOptions(originalConf, bucket);
|
||||||
|
boolean usingNullImpl = S3GUARD_METASTORE_NULL.equals(
|
||||||
|
conf.getTrimmed(S3_METADATA_STORE_IMPL, S3GUARD_METASTORE_NULL));
|
||||||
|
Assume.assumeThat("Unexpected S3Guard test state:"
|
||||||
|
+ " shouldBeEnabled=" + shouldBeEnabled
|
||||||
|
+ " but usingNullImpl=" + usingNullImpl,
|
||||||
|
shouldBeEnabled, Is.is(!usingNullImpl));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Conditionally set the S3Guard options from test properties.
|
||||||
|
* @param conf configuration
|
||||||
|
*/
|
||||||
|
public static void maybeEnableS3Guard(Configuration conf) {
|
||||||
|
if (getTestPropertyBool(conf, TEST_S3GUARD_ENABLED,
|
||||||
|
conf.getBoolean(TEST_S3GUARD_ENABLED, false))) {
|
||||||
|
// S3Guard is enabled.
|
||||||
|
boolean authoritative = getTestPropertyBool(conf,
|
||||||
|
TEST_S3GUARD_AUTHORITATIVE,
|
||||||
|
conf.getBoolean(TEST_S3GUARD_AUTHORITATIVE, true));
|
||||||
|
String impl = getTestProperty(conf, TEST_S3GUARD_IMPLEMENTATION,
|
||||||
|
conf.get(TEST_S3GUARD_IMPLEMENTATION,
|
||||||
|
TEST_S3GUARD_IMPLEMENTATION_LOCAL));
|
||||||
|
String implClass = "";
|
||||||
|
switch (impl) {
|
||||||
|
case TEST_S3GUARD_IMPLEMENTATION_LOCAL:
|
||||||
|
implClass = S3GUARD_METASTORE_LOCAL;
|
||||||
|
break;
|
||||||
|
case TEST_S3GUARD_IMPLEMENTATION_DYNAMODBLOCAL:
|
||||||
|
conf.setClass(S3Guard.S3GUARD_DDB_CLIENT_FACTORY_IMPL,
|
||||||
|
DynamoDBLocalClientFactory.class, DynamoDBClientFactory.class);
|
||||||
|
case TEST_S3GUARD_IMPLEMENTATION_DYNAMO:
|
||||||
|
implClass = S3GUARD_METASTORE_DYNAMO;
|
||||||
|
break;
|
||||||
|
case TEST_S3GUARD_IMPLEMENTATION_NONE:
|
||||||
|
implClass = S3GUARD_METASTORE_NULL;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
fail("Unknown s3guard back end: \"" + impl + "\"");
|
||||||
|
}
|
||||||
|
LOG.debug("Enabling S3Guard, authoritative={}, implementation={}",
|
||||||
|
authoritative, implClass);
|
||||||
|
conf.setBoolean(METADATASTORE_AUTHORITATIVE, authoritative);
|
||||||
|
conf.set(S3_METADATA_STORE_IMPL, implClass);
|
||||||
|
conf.setBoolean(S3GUARD_DDB_TABLE_CREATE_KEY, true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Is there a MetadataStore configured for s3a with authoritative enabled?
|
||||||
|
* @param conf Configuration to test.
|
||||||
|
* @return true iff there is a MetadataStore configured, and it is
|
||||||
|
* configured allow authoritative results. This can result in reducing
|
||||||
|
* round trips to S3 service for cached results, which may affect FS/FC
|
||||||
|
* statistics.
|
||||||
|
*/
|
||||||
|
public static boolean isMetadataStoreAuthoritative(Configuration conf) {
|
||||||
|
if (conf == null) {
|
||||||
|
return Constants.DEFAULT_METADATASTORE_AUTHORITATIVE;
|
||||||
|
}
|
||||||
|
return conf.getBoolean(
|
||||||
|
Constants.METADATASTORE_AUTHORITATIVE,
|
||||||
|
Constants.DEFAULT_METADATASTORE_AUTHORITATIVE);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reset all metrics in a list.
|
* Reset all metrics in a list.
|
||||||
* @param metrics metrics to reset
|
* @param metrics metrics to reset
|
||||||
@ -503,6 +608,94 @@ public static <T extends Class<?>> String buildClassListString(
|
|||||||
private S3ATestUtils() {
|
private S3ATestUtils() {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Verify the core size, block size and timestamp values of a file.
|
||||||
|
* @param status status entry to check
|
||||||
|
* @param size file size
|
||||||
|
* @param blockSize block size
|
||||||
|
* @param modTime modified time
|
||||||
|
*/
|
||||||
|
public static void verifyFileStatus(FileStatus status, long size,
|
||||||
|
long blockSize, long modTime) {
|
||||||
|
verifyFileStatus(status, size, 0, modTime, 0, blockSize, null, null, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Verify the status entry of a file matches that expected.
|
||||||
|
* @param status status entry to check
|
||||||
|
* @param size file size
|
||||||
|
* @param replication replication factor (may be 0)
|
||||||
|
* @param modTime modified time
|
||||||
|
* @param accessTime access time (may be 0)
|
||||||
|
* @param blockSize block size
|
||||||
|
* @param owner owner (may be null)
|
||||||
|
* @param group user group (may be null)
|
||||||
|
* @param permission permission (may be null)
|
||||||
|
*/
|
||||||
|
public static void verifyFileStatus(FileStatus status,
|
||||||
|
long size,
|
||||||
|
int replication,
|
||||||
|
long modTime,
|
||||||
|
long accessTime,
|
||||||
|
long blockSize,
|
||||||
|
String owner,
|
||||||
|
String group,
|
||||||
|
FsPermission permission) {
|
||||||
|
String details = status.toString();
|
||||||
|
assertFalse("Not a dir: " + details, status.isDirectory());
|
||||||
|
assertEquals("Mod time: " + details, modTime, status.getModificationTime());
|
||||||
|
assertEquals("File size: " + details, size, status.getLen());
|
||||||
|
assertEquals("Block size: " + details, blockSize, status.getBlockSize());
|
||||||
|
if (replication > 0) {
|
||||||
|
assertEquals("Replication value: " + details, replication,
|
||||||
|
status.getReplication());
|
||||||
|
}
|
||||||
|
if (accessTime != 0) {
|
||||||
|
assertEquals("Access time: " + details, accessTime,
|
||||||
|
status.getAccessTime());
|
||||||
|
}
|
||||||
|
if (owner != null) {
|
||||||
|
assertEquals("Owner: " + details, owner, status.getOwner());
|
||||||
|
}
|
||||||
|
if (group != null) {
|
||||||
|
assertEquals("Group: " + details, group, status.getGroup());
|
||||||
|
}
|
||||||
|
if (permission != null) {
|
||||||
|
assertEquals("Permission: " + details, permission,
|
||||||
|
status.getPermission());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Verify the status entry of a directory matches that expected.
|
||||||
|
* @param status status entry to check
|
||||||
|
* @param replication replication factor
|
||||||
|
* @param modTime modified time
|
||||||
|
* @param accessTime access time
|
||||||
|
* @param owner owner
|
||||||
|
* @param group user group
|
||||||
|
* @param permission permission.
|
||||||
|
*/
|
||||||
|
public static void verifyDirStatus(FileStatus status,
|
||||||
|
int replication,
|
||||||
|
long modTime,
|
||||||
|
long accessTime,
|
||||||
|
String owner,
|
||||||
|
String group,
|
||||||
|
FsPermission permission) {
|
||||||
|
String details = status.toString();
|
||||||
|
assertTrue("Is a dir: " + details, status.isDirectory());
|
||||||
|
assertEquals("zero length: " + details, 0, status.getLen());
|
||||||
|
|
||||||
|
assertEquals("Mod time: " + details, modTime, status.getModificationTime());
|
||||||
|
assertEquals("Replication value: " + details, replication,
|
||||||
|
status.getReplication());
|
||||||
|
assertEquals("Access time: " + details, accessTime, status.getAccessTime());
|
||||||
|
assertEquals("Owner: " + details, owner, status.getOwner());
|
||||||
|
assertEquals("Group: " + details, group, status.getGroup());
|
||||||
|
assertEquals("Permission: " + details, permission, status.getPermission());
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set a bucket specific property to a particular value.
|
* Set a bucket specific property to a particular value.
|
||||||
* If the generic key passed in has an {@code fs.s3a. prefix},
|
* If the generic key passed in has an {@code fs.s3a. prefix},
|
||||||
|
@ -0,0 +1,118 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a;
|
||||||
|
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
|
import org.apache.hadoop.fs.LocatedFileStatus;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.RemoteIterator;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.NoSuchElementException;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.fs.s3a.Listing.ACCEPT_ALL;
|
||||||
|
import static org.apache.hadoop.fs.s3a.Listing.ProvidedFileStatusIterator;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Place for the S3A listing classes; keeps all the small classes under control.
|
||||||
|
*/
|
||||||
|
public class TestListing extends AbstractS3AMockTest {
|
||||||
|
|
||||||
|
private static class MockRemoteIterator<FileStatus> implements
|
||||||
|
RemoteIterator<FileStatus> {
|
||||||
|
private Iterator<FileStatus> iterator;
|
||||||
|
|
||||||
|
MockRemoteIterator(Collection<FileStatus> source) {
|
||||||
|
iterator = source.iterator();
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasNext() {
|
||||||
|
return iterator.hasNext();
|
||||||
|
}
|
||||||
|
|
||||||
|
public FileStatus next() {
|
||||||
|
return iterator.next();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private FileStatus blankFileStatus(Path path) {
|
||||||
|
return new FileStatus(0, true, 0, 0, 0, path);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testTombstoneReconcilingIterator() throws Exception {
|
||||||
|
Path parent = new Path("/parent");
|
||||||
|
Path liveChild = new Path(parent, "/liveChild");
|
||||||
|
Path deletedChild = new Path(parent, "/deletedChild");
|
||||||
|
Path[] allFiles = {parent, liveChild, deletedChild};
|
||||||
|
Path[] liveFiles = {parent, liveChild};
|
||||||
|
|
||||||
|
Listing listing = new Listing(fs);
|
||||||
|
Collection<FileStatus> statuses = new ArrayList<>();
|
||||||
|
statuses.add(blankFileStatus(parent));
|
||||||
|
statuses.add(blankFileStatus(liveChild));
|
||||||
|
statuses.add(blankFileStatus(deletedChild));
|
||||||
|
|
||||||
|
Set<Path> tombstones = new HashSet<>();
|
||||||
|
tombstones.add(deletedChild);
|
||||||
|
|
||||||
|
RemoteIterator<FileStatus> sourceIterator = new MockRemoteIterator(
|
||||||
|
statuses);
|
||||||
|
RemoteIterator<LocatedFileStatus> locatedIterator =
|
||||||
|
listing.createLocatedFileStatusIterator(sourceIterator);
|
||||||
|
RemoteIterator<LocatedFileStatus> reconcilingIterator =
|
||||||
|
listing.createTombstoneReconcilingIterator(locatedIterator, tombstones);
|
||||||
|
|
||||||
|
Set<Path> expectedPaths = new HashSet<>();
|
||||||
|
expectedPaths.add(parent);
|
||||||
|
expectedPaths.add(liveChild);
|
||||||
|
|
||||||
|
Set<Path> actualPaths = new HashSet<>();
|
||||||
|
while (reconcilingIterator.hasNext()) {
|
||||||
|
actualPaths.add(reconcilingIterator.next().getPath());
|
||||||
|
}
|
||||||
|
Assert.assertTrue(actualPaths.equals(expectedPaths));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testProvidedFileStatusIteratorEnd() throws Exception {
|
||||||
|
FileStatus[] statuses = {
|
||||||
|
new FileStatus(100, false, 1, 8192, 0, new Path("s3a://blah/blah"))
|
||||||
|
};
|
||||||
|
ProvidedFileStatusIterator it = new ProvidedFileStatusIterator(statuses,
|
||||||
|
ACCEPT_ALL, new Listing.AcceptAllButS3nDirs());
|
||||||
|
|
||||||
|
Assert.assertTrue("hasNext() should return true first time", it.hasNext());
|
||||||
|
Assert.assertNotNull("first element should not be null", it.next());
|
||||||
|
Assert.assertFalse("hasNext() should now be false", it.hasNext());
|
||||||
|
try {
|
||||||
|
it.next();
|
||||||
|
Assert.fail("next() should have thrown exception");
|
||||||
|
} catch (NoSuchElementException e) {
|
||||||
|
// Correct behavior. Any other exceptions are propagated as failure.
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -39,7 +39,9 @@ public void setUp() throws Exception {
|
|||||||
|
|
||||||
@After
|
@After
|
||||||
public void tearDown() throws Exception {
|
public void tearDown() throws Exception {
|
||||||
fc.delete(fileContextTestHelper.getTestRootPath(fc, "test"), true);
|
if (fc != null) {
|
||||||
|
fc.delete(fileContextTestHelper.getTestRootPath(fc, "test"), true);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -16,19 +16,29 @@
|
|||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FileContextURIBase;
|
import org.apache.hadoop.fs.FileContextURIBase;
|
||||||
|
import org.apache.hadoop.fs.s3a.S3AFileSystem;
|
||||||
import org.apache.hadoop.fs.s3a.S3ATestUtils;
|
import org.apache.hadoop.fs.s3a.S3ATestUtils;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.Ignore;
|
import org.junit.Ignore;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.fs.s3a.S3ATestUtils.assume;
|
||||||
|
import static org.apache.hadoop.fs.s3a.S3ATestUtils.createTestFileSystem;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* S3a implementation of FileContextURIBase.
|
* S3a implementation of FileContextURIBase.
|
||||||
*/
|
*/
|
||||||
public class ITestS3AFileContextURI extends FileContextURIBase {
|
public class ITestS3AFileContextURI extends FileContextURIBase {
|
||||||
|
|
||||||
|
private Configuration conf;
|
||||||
|
private boolean hasMetadataStore;
|
||||||
|
|
||||||
@Before
|
@Before
|
||||||
public void setUp() throws IOException, Exception {
|
public void setUp() throws IOException, Exception {
|
||||||
Configuration conf = new Configuration();
|
conf = new Configuration();
|
||||||
|
try(S3AFileSystem s3aFS = createTestFileSystem(conf)) {
|
||||||
|
hasMetadataStore = s3aFS.hasMetadataStore();
|
||||||
|
}
|
||||||
fc1 = S3ATestUtils.createTestFileContext(conf);
|
fc1 = S3ATestUtils.createTestFileContext(conf);
|
||||||
fc2 = S3ATestUtils.createTestFileContext(conf); //different object, same FS
|
fc2 = S3ATestUtils.createTestFileContext(conf); //different object, same FS
|
||||||
super.setUp();
|
super.setUp();
|
||||||
@ -41,4 +51,11 @@ public void testFileStatus() throws IOException {
|
|||||||
// (the statistics tested with this method are not relevant for an S3FS)
|
// (the statistics tested with this method are not relevant for an S3FS)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@Override
|
||||||
|
public void testModificationTime() throws IOException {
|
||||||
|
// skip modtime tests as there may be some inconsistency during creation
|
||||||
|
assume("modification time tests are skipped", !hasMetadataStore);
|
||||||
|
super.testModificationTime();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,33 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a.s3guard;
|
||||||
|
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test specification for MetadataStore contract tests. Supplies configuration
|
||||||
|
* and MetadataStore instance.
|
||||||
|
*/
|
||||||
|
public abstract class AbstractMSContract {
|
||||||
|
|
||||||
|
public abstract FileSystem getFileSystem() throws IOException;
|
||||||
|
public abstract MetadataStore getMetadataStore() throws IOException;
|
||||||
|
}
|
@ -0,0 +1,161 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
* <p>
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* <p>
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a.s3guard;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.contract.ContractTestUtils;
|
||||||
|
import org.apache.hadoop.fs.s3a.AbstractS3ATestBase;
|
||||||
|
import org.apache.hadoop.fs.s3a.Constants;
|
||||||
|
import org.apache.hadoop.fs.s3a.S3AFileStatus;
|
||||||
|
import org.apache.hadoop.fs.s3a.S3ATestUtils;
|
||||||
|
import org.apache.hadoop.io.IOUtils;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.SUCCESS;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Common functionality for S3GuardTool test cases.
|
||||||
|
*/
|
||||||
|
public abstract class AbstractS3GuardToolTestBase extends AbstractS3ATestBase {
|
||||||
|
|
||||||
|
protected static final String OWNER = "hdfs";
|
||||||
|
|
||||||
|
private MetadataStore ms;
|
||||||
|
|
||||||
|
protected static void expectResult(int expected,
|
||||||
|
String message,
|
||||||
|
S3GuardTool tool,
|
||||||
|
String... args) throws Exception {
|
||||||
|
assertEquals(message, expected, tool.run(args));
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static void expectSuccess(
|
||||||
|
String message,
|
||||||
|
S3GuardTool tool,
|
||||||
|
String... args) throws Exception {
|
||||||
|
assertEquals(message, SUCCESS, tool.run(args));
|
||||||
|
}
|
||||||
|
|
||||||
|
protected MetadataStore getMetadataStore() {
|
||||||
|
return ms;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected abstract MetadataStore newMetadataStore();
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setup() throws Exception {
|
||||||
|
super.setup();
|
||||||
|
S3ATestUtils.assumeS3GuardState(true, getConfiguration());
|
||||||
|
ms = newMetadataStore();
|
||||||
|
ms.initialize(getFileSystem());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void teardown() throws Exception {
|
||||||
|
super.teardown();
|
||||||
|
IOUtils.cleanupWithLogger(LOG, ms);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void mkdirs(Path path, boolean onS3, boolean onMetadataStore)
|
||||||
|
throws IOException {
|
||||||
|
if (onS3) {
|
||||||
|
getFileSystem().mkdirs(path);
|
||||||
|
}
|
||||||
|
if (onMetadataStore) {
|
||||||
|
S3AFileStatus status = new S3AFileStatus(true, path, OWNER);
|
||||||
|
ms.put(new PathMetadata(status));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static void putFile(MetadataStore ms, S3AFileStatus f)
|
||||||
|
throws IOException {
|
||||||
|
assertNotNull(f);
|
||||||
|
ms.put(new PathMetadata(f));
|
||||||
|
Path parent = f.getPath().getParent();
|
||||||
|
while (parent != null) {
|
||||||
|
S3AFileStatus dir = new S3AFileStatus(false, parent, f.getOwner());
|
||||||
|
ms.put(new PathMetadata(dir));
|
||||||
|
parent = parent.getParent();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create file either on S3 or in metadata store.
|
||||||
|
* @param path the file path.
|
||||||
|
* @param onS3 set to true to create the file on S3.
|
||||||
|
* @param onMetadataStore set to true to create the file on the
|
||||||
|
* metadata store.
|
||||||
|
* @throws IOException IO problem
|
||||||
|
*/
|
||||||
|
protected void createFile(Path path, boolean onS3, boolean onMetadataStore)
|
||||||
|
throws IOException {
|
||||||
|
if (onS3) {
|
||||||
|
ContractTestUtils.touch(getFileSystem(), path);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (onMetadataStore) {
|
||||||
|
S3AFileStatus status = new S3AFileStatus(100L, System.currentTimeMillis(),
|
||||||
|
getFileSystem().qualify(path), 512L, "hdfs");
|
||||||
|
putFile(ms, status);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void testPruneCommand(Configuration cmdConf, String...args)
|
||||||
|
throws Exception {
|
||||||
|
Path parent = path("prune-cli");
|
||||||
|
try {
|
||||||
|
getFileSystem().mkdirs(parent);
|
||||||
|
|
||||||
|
S3GuardTool.Prune cmd = new S3GuardTool.Prune(cmdConf);
|
||||||
|
cmd.setMetadataStore(ms);
|
||||||
|
|
||||||
|
createFile(new Path(parent, "stale"), true, true);
|
||||||
|
Thread.sleep(TimeUnit.SECONDS.toMillis(2));
|
||||||
|
createFile(new Path(parent, "fresh"), true, true);
|
||||||
|
|
||||||
|
assertEquals(2, ms.listChildren(parent).getListing().size());
|
||||||
|
expectSuccess("Prune command did not exit successfully - see output", cmd,
|
||||||
|
args);
|
||||||
|
assertEquals(1, ms.listChildren(parent).getListing().size());
|
||||||
|
} finally {
|
||||||
|
getFileSystem().delete(parent, true);
|
||||||
|
ms.prune(Long.MAX_VALUE);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPruneCommandCLI() throws Exception {
|
||||||
|
String testPath = path("testPruneCommandCLI").toString();
|
||||||
|
testPruneCommand(getFileSystem().getConf(),
|
||||||
|
"prune", "-seconds", "1", testPath);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPruneCommandConf() throws Exception {
|
||||||
|
getConfiguration().setLong(Constants.S3GUARD_CLI_PRUNE_AGE,
|
||||||
|
TimeUnit.SECONDS.toMillis(1));
|
||||||
|
String testPath = path("testPruneCommandConf").toString();
|
||||||
|
testPruneCommand(getConfiguration(), "prune", testPath);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,157 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a.s3guard;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import com.amazonaws.ClientConfiguration;
|
||||||
|
import com.amazonaws.auth.AWSCredentialsProvider;
|
||||||
|
import com.amazonaws.client.builder.AwsClientBuilder;
|
||||||
|
import com.amazonaws.services.dynamodbv2.AmazonDynamoDB;
|
||||||
|
import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClientBuilder;
|
||||||
|
import com.amazonaws.services.dynamodbv2.local.main.ServerRunner;
|
||||||
|
import com.amazonaws.services.dynamodbv2.local.server.DynamoDBProxyServer;
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.conf.Configured;
|
||||||
|
import org.apache.hadoop.fs.s3a.DefaultS3ClientFactory;
|
||||||
|
import org.apache.hadoop.net.ServerSocketUtil;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.fs.s3a.S3AUtils.createAWSCredentialProviderSet;
|
||||||
|
import static org.apache.hadoop.fs.s3a.s3guard.DynamoDBClientFactory.DefaultDynamoDBClientFactory.getRegion;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A DynamoDBClientFactory implementation that creates AmazonDynamoDB clients
|
||||||
|
* against an in-memory DynamoDBLocal server instance.
|
||||||
|
*
|
||||||
|
* You won't be charged bills for issuing any DynamoDB requests. However, the
|
||||||
|
* DynamoDBLocal is considered a simulator of the DynamoDB web service, so it
|
||||||
|
* may be stale or different. For example, the throttling is not yet supported
|
||||||
|
* in DynamoDBLocal. This is for testing purpose only.
|
||||||
|
*
|
||||||
|
* To use this for creating DynamoDB client in tests:
|
||||||
|
* <ol>
|
||||||
|
* <li>
|
||||||
|
* As all DynamoDBClientFactory implementations, this should be configured.
|
||||||
|
* </li>
|
||||||
|
* <li>
|
||||||
|
* The singleton DynamoDBLocal server instance is started automatically when
|
||||||
|
* creating the AmazonDynamoDB client for the first time. It still merits to
|
||||||
|
* launch the server before all the tests and fail fast if error happens.
|
||||||
|
* </li>
|
||||||
|
* <li>
|
||||||
|
* The server can be stopped explicitly, which is not actually needed in
|
||||||
|
* tests as JVM termination will do that.
|
||||||
|
* </li>
|
||||||
|
* </ol>
|
||||||
|
*
|
||||||
|
* @see DefaultDynamoDBClientFactory
|
||||||
|
*/
|
||||||
|
public class DynamoDBLocalClientFactory extends Configured
|
||||||
|
implements DynamoDBClientFactory {
|
||||||
|
|
||||||
|
/** The DynamoDBLocal dynamoDBLocalServer instance for testing. */
|
||||||
|
private static DynamoDBProxyServer dynamoDBLocalServer;
|
||||||
|
private static String ddbEndpoint;
|
||||||
|
|
||||||
|
private static final String SYSPROP_SQLITE_LIB = "sqlite4java.library.path";
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public AmazonDynamoDB createDynamoDBClient(String defaultRegion)
|
||||||
|
throws IOException {
|
||||||
|
startSingletonServer();
|
||||||
|
|
||||||
|
final Configuration conf = getConf();
|
||||||
|
final AWSCredentialsProvider credentials =
|
||||||
|
createAWSCredentialProviderSet(null, conf);
|
||||||
|
final ClientConfiguration awsConf =
|
||||||
|
DefaultS3ClientFactory.createAwsConf(conf);
|
||||||
|
// fail fast in case of service errors
|
||||||
|
awsConf.setMaxErrorRetry(3);
|
||||||
|
|
||||||
|
final String region = getRegion(conf, defaultRegion);
|
||||||
|
LOG.info("Creating DynamoDBLocal client using endpoint {} in region {}",
|
||||||
|
ddbEndpoint, region);
|
||||||
|
|
||||||
|
return AmazonDynamoDBClientBuilder.standard()
|
||||||
|
.withCredentials(credentials)
|
||||||
|
.withClientConfiguration(awsConf)
|
||||||
|
.withEndpointConfiguration(
|
||||||
|
new AwsClientBuilder.EndpointConfiguration(ddbEndpoint, region))
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Start a singleton in-memory DynamoDBLocal server if not started yet.
|
||||||
|
* @throws IOException if any error occurs
|
||||||
|
*/
|
||||||
|
public synchronized static void startSingletonServer() throws IOException {
|
||||||
|
if (dynamoDBLocalServer != null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set this property if it has not been set elsewhere
|
||||||
|
if (StringUtils.isEmpty(System.getProperty(SYSPROP_SQLITE_LIB))) {
|
||||||
|
String projectBuildDir = System.getProperty("project.build.directory");
|
||||||
|
if (StringUtils.isEmpty(projectBuildDir)) {
|
||||||
|
projectBuildDir = "target";
|
||||||
|
}
|
||||||
|
// sqlite4java lib should have been copied to $projectBuildDir/native-libs
|
||||||
|
System.setProperty(SYSPROP_SQLITE_LIB,
|
||||||
|
projectBuildDir + File.separator + "native-libs");
|
||||||
|
LOG.info("Setting {} -> {}",
|
||||||
|
SYSPROP_SQLITE_LIB, System.getProperty(SYSPROP_SQLITE_LIB));
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Start an in-memory local DynamoDB instance
|
||||||
|
final String port = String.valueOf(ServerSocketUtil.getPort(0, 100));
|
||||||
|
ddbEndpoint = "http://localhost:" + port;
|
||||||
|
dynamoDBLocalServer = ServerRunner.createServerFromCommandLineArgs(
|
||||||
|
new String[]{"-inMemory", "-port", port});
|
||||||
|
dynamoDBLocalServer.start();
|
||||||
|
LOG.info("DynamoDBLocal singleton server was started at {}", ddbEndpoint);
|
||||||
|
} catch (Exception t) {
|
||||||
|
String msg = "Error starting DynamoDBLocal server at " + ddbEndpoint
|
||||||
|
+ " " + t;
|
||||||
|
LOG.error(msg, t);
|
||||||
|
throw new IOException(msg, t);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Stop the in-memory DynamoDBLocal server if it is started.
|
||||||
|
* @throws IOException if any error occurs
|
||||||
|
*/
|
||||||
|
public synchronized static void stopSingletonServer() throws IOException {
|
||||||
|
if (dynamoDBLocalServer != null) {
|
||||||
|
LOG.info("Shutting down the in-memory DynamoDBLocal server");
|
||||||
|
try {
|
||||||
|
dynamoDBLocalServer.stop();
|
||||||
|
} catch (Throwable t) {
|
||||||
|
String msg = "Error stopping DynamoDBLocal server at " + ddbEndpoint;
|
||||||
|
LOG.error(msg, t);
|
||||||
|
throw new IOException(msg, t);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,160 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a.s3guard;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.concurrent.Callable;
|
||||||
|
import java.util.concurrent.ExecutorService;
|
||||||
|
import java.util.concurrent.Executors;
|
||||||
|
import java.util.concurrent.Future;
|
||||||
|
import java.util.concurrent.ThreadFactory;
|
||||||
|
import java.util.concurrent.ThreadPoolExecutor;
|
||||||
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
|
import com.amazonaws.services.dynamodbv2.document.DynamoDB;
|
||||||
|
import com.amazonaws.services.dynamodbv2.document.Table;
|
||||||
|
import com.amazonaws.services.dynamodbv2.model.ResourceNotFoundException;
|
||||||
|
import org.junit.Assume;
|
||||||
|
import org.junit.Rule;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.junit.rules.Timeout;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.contract.ContractTestUtils;
|
||||||
|
import org.apache.hadoop.fs.s3a.AbstractS3ATestBase;
|
||||||
|
import org.apache.hadoop.fs.s3a.Constants;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_REGION_KEY;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests concurrent operations on S3Guard.
|
||||||
|
*/
|
||||||
|
public class ITestS3GuardConcurrentOps extends AbstractS3ATestBase {
|
||||||
|
|
||||||
|
@Rule
|
||||||
|
public final Timeout timeout = new Timeout(5 * 60 * 1000);
|
||||||
|
|
||||||
|
private void failIfTableExists(DynamoDB db, String tableName) {
|
||||||
|
boolean tableExists = true;
|
||||||
|
try {
|
||||||
|
Table table = db.getTable(tableName);
|
||||||
|
table.describe();
|
||||||
|
} catch (ResourceNotFoundException e) {
|
||||||
|
tableExists = false;
|
||||||
|
}
|
||||||
|
if (tableExists) {
|
||||||
|
fail("Table already exists: " + tableName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void deleteTable(DynamoDB db, String tableName) throws
|
||||||
|
InterruptedException {
|
||||||
|
try {
|
||||||
|
Table table = db.getTable(tableName);
|
||||||
|
table.waitForActive();
|
||||||
|
table.delete();
|
||||||
|
table.waitForDelete();
|
||||||
|
} catch (ResourceNotFoundException e) {
|
||||||
|
LOG.warn("Failed to delete {}, as it was not found", tableName, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testConcurrentTableCreations() throws Exception {
|
||||||
|
final Configuration conf = getConfiguration();
|
||||||
|
Assume.assumeTrue("Test only applies when DynamoDB is used for S3Guard",
|
||||||
|
conf.get(Constants.S3_METADATA_STORE_IMPL).equals(
|
||||||
|
Constants.S3GUARD_METASTORE_DYNAMO));
|
||||||
|
|
||||||
|
DynamoDBMetadataStore ms = new DynamoDBMetadataStore();
|
||||||
|
ms.initialize(getFileSystem());
|
||||||
|
DynamoDB db = ms.getDynamoDB();
|
||||||
|
|
||||||
|
String tableName = "testConcurrentTableCreations" + new Random().nextInt();
|
||||||
|
conf.setBoolean(Constants.S3GUARD_DDB_TABLE_CREATE_KEY, true);
|
||||||
|
conf.set(Constants.S3GUARD_DDB_TABLE_NAME_KEY, tableName);
|
||||||
|
|
||||||
|
String region = conf.getTrimmed(S3GUARD_DDB_REGION_KEY);
|
||||||
|
if (StringUtils.isEmpty(region)) {
|
||||||
|
// no region set, so pick it up from the test bucket
|
||||||
|
conf.set(S3GUARD_DDB_REGION_KEY, getFileSystem().getBucketLocation());
|
||||||
|
}
|
||||||
|
int concurrentOps = 16;
|
||||||
|
int iterations = 4;
|
||||||
|
|
||||||
|
failIfTableExists(db, tableName);
|
||||||
|
|
||||||
|
for (int i = 0; i < iterations; i++) {
|
||||||
|
ExecutorService executor = Executors.newFixedThreadPool(
|
||||||
|
concurrentOps, new ThreadFactory() {
|
||||||
|
private AtomicInteger count = new AtomicInteger(0);
|
||||||
|
|
||||||
|
public Thread newThread(Runnable r) {
|
||||||
|
return new Thread(r,
|
||||||
|
"testConcurrentTableCreations" + count.getAndIncrement());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
((ThreadPoolExecutor) executor).prestartAllCoreThreads();
|
||||||
|
Future<Exception>[] futures = new Future[concurrentOps];
|
||||||
|
for (int f = 0; f < concurrentOps; f++) {
|
||||||
|
final int index = f;
|
||||||
|
futures[f] = executor.submit(new Callable<Exception>() {
|
||||||
|
@Override
|
||||||
|
public Exception call() throws Exception {
|
||||||
|
|
||||||
|
ContractTestUtils.NanoTimer timer =
|
||||||
|
new ContractTestUtils.NanoTimer();
|
||||||
|
|
||||||
|
Exception result = null;
|
||||||
|
try (DynamoDBMetadataStore store = new DynamoDBMetadataStore()) {
|
||||||
|
store.initialize(conf);
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOG.error(e.getClass() + ": " + e.getMessage());
|
||||||
|
result = e;
|
||||||
|
}
|
||||||
|
|
||||||
|
timer.end("Parallel DynamoDB client creation %d", index);
|
||||||
|
LOG.info("Parallel DynamoDB client creation {} ran from {} to {}",
|
||||||
|
index, timer.getStartTime(), timer.getEndTime());
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
List<Exception> exceptions = new ArrayList<>(concurrentOps);
|
||||||
|
for (int f = 0; f < concurrentOps; f++) {
|
||||||
|
Exception outcome = futures[f].get();
|
||||||
|
if (outcome != null) {
|
||||||
|
exceptions.add(outcome);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
deleteTable(db, tableName);
|
||||||
|
int exceptionsThrown = exceptions.size();
|
||||||
|
if (exceptionsThrown > 0) {
|
||||||
|
// at least one exception was thrown. Fail the test & nest the first
|
||||||
|
// exception caught
|
||||||
|
throw new AssertionError(exceptionsThrown + "/" + concurrentOps +
|
||||||
|
" threads threw exceptions while initializing on iteration " + i,
|
||||||
|
exceptions.get(0));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,134 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
* <p>
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* <p>
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a.s3guard;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Random;
|
||||||
|
import java.util.concurrent.Callable;
|
||||||
|
|
||||||
|
import com.amazonaws.services.dynamodbv2.document.DynamoDB;
|
||||||
|
import com.amazonaws.services.dynamodbv2.document.Table;
|
||||||
|
import com.amazonaws.services.dynamodbv2.model.ResourceNotFoundException;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import org.apache.hadoop.fs.s3a.S3AFileSystem;
|
||||||
|
import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.Destroy;
|
||||||
|
import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.Init;
|
||||||
|
import org.apache.hadoop.test.LambdaTestUtils;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test S3Guard related CLI commands against DynamoDB.
|
||||||
|
*/
|
||||||
|
public class ITestS3GuardToolDynamoDB extends AbstractS3GuardToolTestBase {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected MetadataStore newMetadataStore() {
|
||||||
|
return new DynamoDBMetadataStore();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check the existence of a given DynamoDB table.
|
||||||
|
private static boolean exist(DynamoDB dynamoDB, String tableName) {
|
||||||
|
assertNotNull(dynamoDB);
|
||||||
|
assertNotNull(tableName);
|
||||||
|
assertFalse("empty table name", tableName.isEmpty());
|
||||||
|
try {
|
||||||
|
Table table = dynamoDB.getTable(tableName);
|
||||||
|
table.describe();
|
||||||
|
} catch (ResourceNotFoundException e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testInvalidRegion() throws Exception {
|
||||||
|
final String testTableName = "testInvalidRegion" + new Random().nextInt();
|
||||||
|
final String testRegion = "invalidRegion";
|
||||||
|
// Initialize MetadataStore
|
||||||
|
final Init initCmd = new Init(getFileSystem().getConf());
|
||||||
|
LambdaTestUtils.intercept(IOException.class,
|
||||||
|
new Callable<String>() {
|
||||||
|
@Override
|
||||||
|
public String call() throws Exception {
|
||||||
|
int res = initCmd.run(new String[]{
|
||||||
|
"init",
|
||||||
|
"-region", testRegion,
|
||||||
|
"-meta", "dynamodb://" + testTableName
|
||||||
|
});
|
||||||
|
return "Use of invalid region did not fail, returning " + res
|
||||||
|
+ "- table may have been " +
|
||||||
|
"created and not cleaned up: " + testTableName;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDynamoDBInitDestroyCycle() throws Exception {
|
||||||
|
String testTableName = "testDynamoDBInitDestroy" + new Random().nextInt();
|
||||||
|
String testS3Url = path(testTableName).toString();
|
||||||
|
S3AFileSystem fs = getFileSystem();
|
||||||
|
DynamoDB db = null;
|
||||||
|
try {
|
||||||
|
// Initialize MetadataStore
|
||||||
|
Init initCmd = new Init(fs.getConf());
|
||||||
|
expectSuccess("Init command did not exit successfully - see output",
|
||||||
|
initCmd,
|
||||||
|
"init", "-meta", "dynamodb://" + testTableName, testS3Url);
|
||||||
|
// Verify it exists
|
||||||
|
MetadataStore ms = getMetadataStore();
|
||||||
|
assertTrue("metadata store should be DynamoDBMetadataStore",
|
||||||
|
ms instanceof DynamoDBMetadataStore);
|
||||||
|
DynamoDBMetadataStore dynamoMs = (DynamoDBMetadataStore) ms;
|
||||||
|
db = dynamoMs.getDynamoDB();
|
||||||
|
assertTrue(String.format("%s does not exist", testTableName),
|
||||||
|
exist(db, testTableName));
|
||||||
|
|
||||||
|
// Destroy MetadataStore
|
||||||
|
Destroy destroyCmd = new Destroy(fs.getConf());
|
||||||
|
|
||||||
|
expectSuccess("Destroy command did not exit successfully - see output",
|
||||||
|
destroyCmd,
|
||||||
|
"destroy", "-meta", "dynamodb://" + testTableName, testS3Url);
|
||||||
|
// Verify it does not exist
|
||||||
|
assertFalse(String.format("%s still exists", testTableName),
|
||||||
|
exist(db, testTableName));
|
||||||
|
|
||||||
|
// delete again and expect success again
|
||||||
|
expectSuccess("Destroy command did not exit successfully - see output",
|
||||||
|
destroyCmd,
|
||||||
|
"destroy", "-meta", "dynamodb://" + testTableName, testS3Url);
|
||||||
|
} catch (ResourceNotFoundException e) {
|
||||||
|
throw new AssertionError(
|
||||||
|
String.format("DynamoDB table %s does not exist", testTableName),
|
||||||
|
e);
|
||||||
|
} finally {
|
||||||
|
LOG.warn("Table may have not been cleaned up: " +
|
||||||
|
testTableName);
|
||||||
|
if (db != null) {
|
||||||
|
Table table = db.getTable(testTableName);
|
||||||
|
if (table != null) {
|
||||||
|
try {
|
||||||
|
table.delete();
|
||||||
|
table.waitForDelete();
|
||||||
|
} catch (ResourceNotFoundException e) { /* Ignore */ }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,149 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
* <p>
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
* <p>
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a.s3guard;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
import java.io.PrintStream;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.s3a.S3AFileSystem;
|
||||||
|
import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.Diff;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.fs.s3a.s3guard.S3GuardTool.SUCCESS;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test S3Guard related CLI commands against a LocalMetadataStore.
|
||||||
|
*/
|
||||||
|
public class ITestS3GuardToolLocal extends AbstractS3GuardToolTestBase {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected MetadataStore newMetadataStore() {
|
||||||
|
return new LocalMetadataStore();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testImportCommand() throws Exception {
|
||||||
|
S3AFileSystem fs = getFileSystem();
|
||||||
|
MetadataStore ms = getMetadataStore();
|
||||||
|
Path parent = path("test-import");
|
||||||
|
fs.mkdirs(parent);
|
||||||
|
Path dir = new Path(parent, "a");
|
||||||
|
fs.mkdirs(dir);
|
||||||
|
Path emptyDir = new Path(parent, "emptyDir");
|
||||||
|
fs.mkdirs(emptyDir);
|
||||||
|
for (int i = 0; i < 10; i++) {
|
||||||
|
String child = String.format("file-%d", i);
|
||||||
|
try (FSDataOutputStream out = fs.create(new Path(dir, child))) {
|
||||||
|
out.write(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
S3GuardTool.Import cmd = new S3GuardTool.Import(fs.getConf());
|
||||||
|
cmd.setStore(ms);
|
||||||
|
|
||||||
|
expectSuccess("Import command did not exit successfully - see output",
|
||||||
|
cmd,
|
||||||
|
"import", parent.toString());
|
||||||
|
|
||||||
|
DirListingMetadata children =
|
||||||
|
ms.listChildren(dir);
|
||||||
|
assertEquals("Unexpected number of paths imported", 10, children
|
||||||
|
.getListing().size());
|
||||||
|
assertEquals("Expected 2 items: empty directory and a parent directory", 2,
|
||||||
|
ms.listChildren(parent).getListing().size());
|
||||||
|
// assertTrue(children.isAuthoritative());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDiffCommand() throws IOException {
|
||||||
|
S3AFileSystem fs = getFileSystem();
|
||||||
|
MetadataStore ms = getMetadataStore();
|
||||||
|
Set<Path> filesOnS3 = new HashSet<>(); // files on S3.
|
||||||
|
Set<Path> filesOnMS = new HashSet<>(); // files on metadata store.
|
||||||
|
|
||||||
|
Path testPath = path("test-diff");
|
||||||
|
mkdirs(testPath, true, true);
|
||||||
|
|
||||||
|
Path msOnlyPath = new Path(testPath, "ms_only");
|
||||||
|
mkdirs(msOnlyPath, false, true);
|
||||||
|
filesOnMS.add(msOnlyPath);
|
||||||
|
for (int i = 0; i < 5; i++) {
|
||||||
|
Path file = new Path(msOnlyPath, String.format("file-%d", i));
|
||||||
|
createFile(file, false, true);
|
||||||
|
filesOnMS.add(file);
|
||||||
|
}
|
||||||
|
|
||||||
|
Path s3OnlyPath = new Path(testPath, "s3_only");
|
||||||
|
mkdirs(s3OnlyPath, true, false);
|
||||||
|
filesOnS3.add(s3OnlyPath);
|
||||||
|
for (int i = 0; i < 5; i++) {
|
||||||
|
Path file = new Path(s3OnlyPath, String.format("file-%d", i));
|
||||||
|
createFile(file, true, false);
|
||||||
|
filesOnS3.add(file);
|
||||||
|
}
|
||||||
|
|
||||||
|
ByteArrayOutputStream buf = new ByteArrayOutputStream();
|
||||||
|
PrintStream out = new PrintStream(buf);
|
||||||
|
Diff cmd = new Diff(fs.getConf());
|
||||||
|
cmd.setStore(ms);
|
||||||
|
assertEquals("Diff command did not exit successfully - see output", SUCCESS,
|
||||||
|
cmd.run(new String[]{"diff", "-meta", "local://metadata",
|
||||||
|
testPath.toString()}, out));
|
||||||
|
out.close();
|
||||||
|
|
||||||
|
Set<Path> actualOnS3 = new HashSet<>();
|
||||||
|
Set<Path> actualOnMS = new HashSet<>();
|
||||||
|
boolean duplicates = false;
|
||||||
|
try (BufferedReader reader =
|
||||||
|
new BufferedReader(new InputStreamReader(
|
||||||
|
new ByteArrayInputStream(buf.toByteArray())))) {
|
||||||
|
String line;
|
||||||
|
while ((line = reader.readLine()) != null) {
|
||||||
|
String[] fields = line.split("\\s");
|
||||||
|
assertEquals("[" + line + "] does not have enough fields",
|
||||||
|
4, fields.length);
|
||||||
|
String where = fields[0];
|
||||||
|
Path path = new Path(fields[3]);
|
||||||
|
if (Diff.S3_PREFIX.equals(where)) {
|
||||||
|
duplicates = duplicates || actualOnS3.contains(path);
|
||||||
|
actualOnS3.add(path);
|
||||||
|
} else if (Diff.MS_PREFIX.equals(where)) {
|
||||||
|
duplicates = duplicates || actualOnMS.contains(path);
|
||||||
|
actualOnMS.add(path);
|
||||||
|
} else {
|
||||||
|
fail("Unknown prefix: " + where);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
String actualOut = out.toString();
|
||||||
|
assertEquals("Mismatched metadata store outputs: " + actualOut,
|
||||||
|
filesOnMS, actualOnMS);
|
||||||
|
assertEquals("Mismatched s3 outputs: " + actualOut, filesOnS3, actualOnS3);
|
||||||
|
assertFalse("Diff contained duplicates", duplicates);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,887 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a.s3guard;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import com.google.common.collect.Sets;
|
||||||
|
import org.junit.After;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Assume;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.RemoteIterator;
|
||||||
|
import org.apache.hadoop.fs.permission.FsPermission;
|
||||||
|
import org.apache.hadoop.fs.s3a.S3ATestUtils;
|
||||||
|
import org.apache.hadoop.fs.s3a.Tristate;
|
||||||
|
import org.apache.hadoop.io.IOUtils;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Main test class for MetadataStore implementations.
|
||||||
|
* Implementations should each create a test by subclassing this and
|
||||||
|
* overriding {@link #createContract()}.
|
||||||
|
* If your implementation may return missing results for recently set paths,
|
||||||
|
* override {@link MetadataStoreTestBase#allowMissing()}.
|
||||||
|
*/
|
||||||
|
public abstract class MetadataStoreTestBase extends Assert {
|
||||||
|
|
||||||
|
private static final Logger LOG =
|
||||||
|
LoggerFactory.getLogger(MetadataStoreTestBase.class);
|
||||||
|
|
||||||
|
/** Some dummy values for sanity-checking FileStatus contents. */
|
||||||
|
static final long BLOCK_SIZE = 32 * 1024 * 1024;
|
||||||
|
static final int REPLICATION = 1;
|
||||||
|
static final FsPermission PERMISSION = new FsPermission((short)0755);
|
||||||
|
static final String OWNER = "bob";
|
||||||
|
static final String GROUP = "uncles";
|
||||||
|
private final long accessTime = System.currentTimeMillis();
|
||||||
|
private final long modTime = accessTime - 5000;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Each test should override this. Will use a new Configuration instance.
|
||||||
|
* @return Contract which specifies the MetadataStore under test plus config.
|
||||||
|
*/
|
||||||
|
public abstract AbstractMSContract createContract() throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Each test should override this.
|
||||||
|
* @param conf Base configuration instance to use.
|
||||||
|
* @return Contract which specifies the MetadataStore under test plus config.
|
||||||
|
*/
|
||||||
|
public abstract AbstractMSContract createContract(Configuration conf)
|
||||||
|
throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests assume that implementations will return recently set results. If
|
||||||
|
* your implementation does not always hold onto metadata (e.g. LRU or
|
||||||
|
* time-based expiry) you can override this to return false.
|
||||||
|
* @return true if the test should succeed when null results are returned
|
||||||
|
* from the MetadataStore under test.
|
||||||
|
*/
|
||||||
|
public boolean allowMissing() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Pruning is an optional feature for metadata store implementations.
|
||||||
|
* Tests will only check that functionality if it is expected to work.
|
||||||
|
* @return true if the test should expect pruning to work.
|
||||||
|
*/
|
||||||
|
public boolean supportsPruning() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** The MetadataStore contract used to test against. */
|
||||||
|
private AbstractMSContract contract;
|
||||||
|
|
||||||
|
private MetadataStore ms;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return reference to the test contract.
|
||||||
|
*/
|
||||||
|
protected AbstractMSContract getContract() {
|
||||||
|
return contract;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setUp() throws Exception {
|
||||||
|
LOG.debug("== Setup. ==");
|
||||||
|
contract = createContract();
|
||||||
|
ms = contract.getMetadataStore();
|
||||||
|
assertNotNull("null MetadataStore", ms);
|
||||||
|
assertNotNull("null FileSystem", contract.getFileSystem());
|
||||||
|
ms.initialize(contract.getFileSystem());
|
||||||
|
}
|
||||||
|
|
||||||
|
@After
|
||||||
|
public void tearDown() throws Exception {
|
||||||
|
LOG.debug("== Tear down. ==");
|
||||||
|
if (ms != null) {
|
||||||
|
try {
|
||||||
|
ms.destroy();
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOG.warn("Failed to destroy tables in teardown", e);
|
||||||
|
}
|
||||||
|
IOUtils.closeStream(ms);
|
||||||
|
ms = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper function for verifying DescendantsIterator and
|
||||||
|
* MetadataStoreListFilesIterator behavior.
|
||||||
|
* @param createNodes List of paths to create
|
||||||
|
* @param checkNodes List of paths that the iterator should return
|
||||||
|
*/
|
||||||
|
private void doTestDescendantsIterator(
|
||||||
|
Class implementation, String[] createNodes,
|
||||||
|
String[] checkNodes) throws Exception {
|
||||||
|
// we set up the example file system tree in metadata store
|
||||||
|
for (String pathStr : createNodes) {
|
||||||
|
final FileStatus status = pathStr.contains("file")
|
||||||
|
? basicFileStatus(strToPath(pathStr), 100, false)
|
||||||
|
: basicFileStatus(strToPath(pathStr), 0, true);
|
||||||
|
ms.put(new PathMetadata(status));
|
||||||
|
}
|
||||||
|
|
||||||
|
final PathMetadata rootMeta = new PathMetadata(makeDirStatus("/"));
|
||||||
|
RemoteIterator<FileStatus> iterator;
|
||||||
|
if (implementation == DescendantsIterator.class) {
|
||||||
|
iterator = new DescendantsIterator(ms, rootMeta);
|
||||||
|
} else if (implementation == MetadataStoreListFilesIterator.class) {
|
||||||
|
iterator = new MetadataStoreListFilesIterator(ms, rootMeta, false);
|
||||||
|
} else {
|
||||||
|
throw new UnsupportedOperationException("Unrecognized class");
|
||||||
|
}
|
||||||
|
|
||||||
|
final Set<String> actual = new HashSet<>();
|
||||||
|
while (iterator.hasNext()) {
|
||||||
|
final Path p = iterator.next().getPath();
|
||||||
|
actual.add(Path.getPathWithoutSchemeAndAuthority(p).toString());
|
||||||
|
}
|
||||||
|
LOG.info("We got {} by iterating DescendantsIterator", actual);
|
||||||
|
|
||||||
|
if (!allowMissing()) {
|
||||||
|
assertEquals(Sets.newHashSet(checkNodes), actual);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that we can get the whole sub-tree by iterating DescendantsIterator.
|
||||||
|
*
|
||||||
|
* The tree is similar to or same as the example in code comment.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testDescendantsIterator() throws Exception {
|
||||||
|
final String[] tree = new String[] {
|
||||||
|
"/dir1",
|
||||||
|
"/dir1/dir2",
|
||||||
|
"/dir1/dir3",
|
||||||
|
"/dir1/dir2/file1",
|
||||||
|
"/dir1/dir2/file2",
|
||||||
|
"/dir1/dir3/dir4",
|
||||||
|
"/dir1/dir3/dir5",
|
||||||
|
"/dir1/dir3/dir4/file3",
|
||||||
|
"/dir1/dir3/dir5/file4",
|
||||||
|
"/dir1/dir3/dir6"
|
||||||
|
};
|
||||||
|
doTestDescendantsIterator(DescendantsIterator.class,
|
||||||
|
tree, tree);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that we can get the correct subset of the tree with
|
||||||
|
* MetadataStoreListFilesIterator.
|
||||||
|
*
|
||||||
|
* The tree is similar to or same as the example in code comment.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testMetadataStoreListFilesIterator() throws Exception {
|
||||||
|
final String[] wholeTree = new String[] {
|
||||||
|
"/dir1",
|
||||||
|
"/dir1/dir2",
|
||||||
|
"/dir1/dir3",
|
||||||
|
"/dir1/dir2/file1",
|
||||||
|
"/dir1/dir2/file2",
|
||||||
|
"/dir1/dir3/dir4",
|
||||||
|
"/dir1/dir3/dir5",
|
||||||
|
"/dir1/dir3/dir4/file3",
|
||||||
|
"/dir1/dir3/dir5/file4",
|
||||||
|
"/dir1/dir3/dir6"
|
||||||
|
};
|
||||||
|
final String[] leafNodes = new String[] {
|
||||||
|
"/dir1/dir2/file1",
|
||||||
|
"/dir1/dir2/file2",
|
||||||
|
"/dir1/dir3/dir4/file3",
|
||||||
|
"/dir1/dir3/dir5/file4"
|
||||||
|
};
|
||||||
|
doTestDescendantsIterator(MetadataStoreListFilesIterator.class, wholeTree,
|
||||||
|
leafNodes);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPutNew() throws Exception {
|
||||||
|
/* create three dirs /da1, /da2, /da3 */
|
||||||
|
createNewDirs("/da1", "/da2", "/da3");
|
||||||
|
|
||||||
|
/* It is caller's responsibility to set up ancestor entries beyond the
|
||||||
|
* containing directory. We only track direct children of the directory.
|
||||||
|
* Thus this will not affect entry for /da1.
|
||||||
|
*/
|
||||||
|
ms.put(new PathMetadata(makeFileStatus("/da1/db1/fc1", 100)));
|
||||||
|
|
||||||
|
assertEmptyDirs("/da2", "/da3");
|
||||||
|
assertDirectorySize("/da1/db1", 1);
|
||||||
|
|
||||||
|
/* Check contents of dir status. */
|
||||||
|
PathMetadata dirMeta = ms.get(strToPath("/da1"));
|
||||||
|
if (!allowMissing() || dirMeta != null) {
|
||||||
|
verifyDirStatus(dirMeta.getFileStatus());
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This already exists, and should silently replace it. */
|
||||||
|
ms.put(new PathMetadata(makeDirStatus("/da1/db1")));
|
||||||
|
|
||||||
|
/* If we had putNew(), and used it above, this would be empty again. */
|
||||||
|
assertDirectorySize("/da1", 1);
|
||||||
|
|
||||||
|
assertEmptyDirs("/da2", "/da3");
|
||||||
|
|
||||||
|
/* Ensure new files update correct parent dirs. */
|
||||||
|
ms.put(new PathMetadata(makeFileStatus("/da1/db1/fc1", 100)));
|
||||||
|
ms.put(new PathMetadata(makeFileStatus("/da1/db1/fc2", 200)));
|
||||||
|
assertDirectorySize("/da1", 1);
|
||||||
|
assertDirectorySize("/da1/db1", 2);
|
||||||
|
assertEmptyDirs("/da2", "/da3");
|
||||||
|
PathMetadata meta = ms.get(strToPath("/da1/db1/fc2"));
|
||||||
|
if (!allowMissing() || meta != null) {
|
||||||
|
assertNotNull("Get file after put new.", meta);
|
||||||
|
verifyFileStatus(meta.getFileStatus(), 200);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPutOverwrite() throws Exception {
|
||||||
|
final String filePath = "/a1/b1/c1/some_file";
|
||||||
|
final String dirPath = "/a1/b1/c1/d1";
|
||||||
|
ms.put(new PathMetadata(makeFileStatus(filePath, 100)));
|
||||||
|
ms.put(new PathMetadata(makeDirStatus(dirPath)));
|
||||||
|
PathMetadata meta = ms.get(strToPath(filePath));
|
||||||
|
if (!allowMissing() || meta != null) {
|
||||||
|
verifyFileStatus(meta.getFileStatus(), 100);
|
||||||
|
}
|
||||||
|
|
||||||
|
ms.put(new PathMetadata(basicFileStatus(strToPath(filePath), 9999, false)));
|
||||||
|
meta = ms.get(strToPath(filePath));
|
||||||
|
if (!allowMissing() || meta != null) {
|
||||||
|
verifyFileStatus(meta.getFileStatus(), 9999);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRootDirPutNew() throws Exception {
|
||||||
|
Path rootPath = strToPath("/");
|
||||||
|
|
||||||
|
ms.put(new PathMetadata(makeFileStatus("/file1", 100)));
|
||||||
|
DirListingMetadata dir = ms.listChildren(rootPath);
|
||||||
|
if (!allowMissing() || dir != null) {
|
||||||
|
assertNotNull("Root dir cached", dir);
|
||||||
|
assertFalse("Root not fully cached", dir.isAuthoritative());
|
||||||
|
assertNotNull("have root dir file listing", dir.getListing());
|
||||||
|
assertEquals("One file in root dir", 1, dir.getListing().size());
|
||||||
|
assertEquals("file1 in root dir", strToPath("/file1"),
|
||||||
|
dir.getListing().iterator().next().getFileStatus().getPath());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDelete() throws Exception {
|
||||||
|
setUpDeleteTest();
|
||||||
|
|
||||||
|
ms.delete(strToPath("/ADirectory1/db1/file2"));
|
||||||
|
|
||||||
|
/* Ensure delete happened. */
|
||||||
|
assertDirectorySize("/ADirectory1/db1", 1);
|
||||||
|
PathMetadata meta = ms.get(strToPath("/ADirectory1/db1/file2"));
|
||||||
|
assertTrue("File deleted", meta == null || meta.isDeleted());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDeleteSubtree() throws Exception {
|
||||||
|
deleteSubtreeHelper("");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDeleteSubtreeHostPath() throws Exception {
|
||||||
|
deleteSubtreeHelper(contract.getFileSystem().getUri().toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void deleteSubtreeHelper(String pathPrefix) throws Exception {
|
||||||
|
|
||||||
|
String p = pathPrefix;
|
||||||
|
setUpDeleteTest(p);
|
||||||
|
createNewDirs(p + "/ADirectory1/db1/dc1", p + "/ADirectory1/db1/dc1/dd1");
|
||||||
|
ms.put(new PathMetadata(
|
||||||
|
makeFileStatus(p + "/ADirectory1/db1/dc1/dd1/deepFile", 100)));
|
||||||
|
if (!allowMissing()) {
|
||||||
|
assertCached(p + "/ADirectory1/db1");
|
||||||
|
}
|
||||||
|
ms.deleteSubtree(strToPath(p + "/ADirectory1/db1/"));
|
||||||
|
|
||||||
|
assertEmptyDirectory(p + "/ADirectory1");
|
||||||
|
assertDeleted(p + "/ADirectory1/db1");
|
||||||
|
assertDeleted(p + "/ADirectory1/file1");
|
||||||
|
assertDeleted(p + "/ADirectory1/file2");
|
||||||
|
assertDeleted(p + "/ADirectory1/db1/dc1/dd1/deepFile");
|
||||||
|
assertEmptyDirectory(p + "/ADirectory2");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Some implementations might not support this. It was useful to test
|
||||||
|
* correctness of the LocalMetadataStore implementation, but feel free to
|
||||||
|
* override this to be a no-op.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testDeleteRecursiveRoot() throws Exception {
|
||||||
|
setUpDeleteTest();
|
||||||
|
|
||||||
|
ms.deleteSubtree(strToPath("/"));
|
||||||
|
assertDeleted("/ADirectory1");
|
||||||
|
assertDeleted("/ADirectory2");
|
||||||
|
assertDeleted("/ADirectory2/db1");
|
||||||
|
assertDeleted("/ADirectory2/db1/file1");
|
||||||
|
assertDeleted("/ADirectory2/db1/file2");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDeleteNonExisting() throws Exception {
|
||||||
|
// Path doesn't exist, but should silently succeed
|
||||||
|
ms.delete(strToPath("/bobs/your/uncle"));
|
||||||
|
|
||||||
|
// Ditto.
|
||||||
|
ms.deleteSubtree(strToPath("/internets"));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void setUpDeleteTest() throws IOException {
|
||||||
|
setUpDeleteTest("");
|
||||||
|
}
|
||||||
|
|
||||||
|
private void setUpDeleteTest(String prefix) throws IOException {
|
||||||
|
createNewDirs(prefix + "/ADirectory1", prefix + "/ADirectory2",
|
||||||
|
prefix + "/ADirectory1/db1");
|
||||||
|
ms.put(new PathMetadata(makeFileStatus(prefix + "/ADirectory1/db1/file1",
|
||||||
|
100)));
|
||||||
|
ms.put(new PathMetadata(makeFileStatus(prefix + "/ADirectory1/db1/file2",
|
||||||
|
100)));
|
||||||
|
|
||||||
|
PathMetadata meta = ms.get(strToPath(prefix + "/ADirectory1/db1/file2"));
|
||||||
|
if (!allowMissing() || meta != null) {
|
||||||
|
assertNotNull("Found test file", meta);
|
||||||
|
assertDirectorySize(prefix + "/ADirectory1/db1", 2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGet() throws Exception {
|
||||||
|
final String filePath = "/a1/b1/c1/some_file";
|
||||||
|
final String dirPath = "/a1/b1/c1/d1";
|
||||||
|
ms.put(new PathMetadata(makeFileStatus(filePath, 100)));
|
||||||
|
ms.put(new PathMetadata(makeDirStatus(dirPath)));
|
||||||
|
PathMetadata meta = ms.get(strToPath(filePath));
|
||||||
|
if (!allowMissing() || meta != null) {
|
||||||
|
assertNotNull("Get found file", meta);
|
||||||
|
verifyFileStatus(meta.getFileStatus(), 100);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!(ms instanceof NullMetadataStore)) {
|
||||||
|
ms.delete(strToPath(filePath));
|
||||||
|
meta = ms.get(strToPath(filePath));
|
||||||
|
assertTrue("Tombstone not left for deleted file", meta.isDeleted());
|
||||||
|
}
|
||||||
|
|
||||||
|
meta = ms.get(strToPath(dirPath));
|
||||||
|
if (!allowMissing() || meta != null) {
|
||||||
|
assertNotNull("Get found file (dir)", meta);
|
||||||
|
assertTrue("Found dir", meta.getFileStatus().isDirectory());
|
||||||
|
}
|
||||||
|
|
||||||
|
meta = ms.get(strToPath("/bollocks"));
|
||||||
|
assertNull("Don't get non-existent file", meta);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetEmptyDir() throws Exception {
|
||||||
|
final String dirPath = "/a1/b1/c1/d1";
|
||||||
|
// Creates /a1/b1/c1/d1 as an empty dir
|
||||||
|
setupListStatus();
|
||||||
|
|
||||||
|
// 1. Tell MetadataStore (MS) that there are zero children
|
||||||
|
putListStatusFiles(dirPath, true /* authoritative */
|
||||||
|
/* zero children */);
|
||||||
|
|
||||||
|
// 2. Request a file status for dir, including whether or not the dir
|
||||||
|
// is empty.
|
||||||
|
PathMetadata meta = ms.get(strToPath(dirPath), true);
|
||||||
|
|
||||||
|
// 3. Check that either (a) the MS doesn't track whether or not it is
|
||||||
|
// empty (which is allowed), or (b) the MS knows the dir is empty.
|
||||||
|
if (!allowMissing() || meta != null) {
|
||||||
|
assertNotNull("Get should find meta for dir", meta);
|
||||||
|
assertNotEquals("Dir is empty or unknown", Tristate.FALSE,
|
||||||
|
meta.isEmptyDirectory());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetNonEmptyDir() throws Exception {
|
||||||
|
final String dirPath = "/a1/b1/c1";
|
||||||
|
// Creates /a1/b1/c1 as an non-empty dir
|
||||||
|
setupListStatus();
|
||||||
|
|
||||||
|
// Request a file status for dir, including whether or not the dir
|
||||||
|
// is empty.
|
||||||
|
PathMetadata meta = ms.get(strToPath(dirPath), true);
|
||||||
|
|
||||||
|
// MetadataStore knows /a1/b1/c1 has at least one child. It is valid
|
||||||
|
// for it to answer either (a) UNKNOWN: the MS doesn't track whether
|
||||||
|
// or not the dir is empty, or (b) the MS knows the dir is non-empty.
|
||||||
|
if (!allowMissing() || meta != null) {
|
||||||
|
assertNotNull("Get should find meta for dir", meta);
|
||||||
|
assertNotEquals("Dir is non-empty or unknown", Tristate.TRUE,
|
||||||
|
meta.isEmptyDirectory());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetDirUnknownIfEmpty() throws Exception {
|
||||||
|
final String dirPath = "/a1/b1/c1/d1";
|
||||||
|
// 1. Create /a1/b1/c1/d1 as an empty dir, but do not tell MetadataStore
|
||||||
|
// (MS) whether or not it has any children.
|
||||||
|
setupListStatus();
|
||||||
|
|
||||||
|
// 2. Request a file status for dir, including whether or not the dir
|
||||||
|
// is empty.
|
||||||
|
PathMetadata meta = ms.get(strToPath(dirPath), true);
|
||||||
|
|
||||||
|
// 3. Assert MS reports isEmptyDir as UNKONWN: We haven't told MS
|
||||||
|
// whether or not the directory has any children.
|
||||||
|
if (!allowMissing() || meta != null) {
|
||||||
|
assertNotNull("Get should find meta for dir", meta);
|
||||||
|
assertEquals("Dir empty is unknown", Tristate.UNKNOWN,
|
||||||
|
meta.isEmptyDirectory());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testListChildren() throws Exception {
|
||||||
|
setupListStatus();
|
||||||
|
|
||||||
|
DirListingMetadata dirMeta;
|
||||||
|
dirMeta = ms.listChildren(strToPath("/"));
|
||||||
|
if (!allowMissing()) {
|
||||||
|
assertNotNull(dirMeta);
|
||||||
|
/* Cache has no way of knowing it has all entries for root unless we
|
||||||
|
* specifically tell it via put() with
|
||||||
|
* DirListingMetadata.isAuthoritative = true */
|
||||||
|
assertFalse("Root dir is not cached, or partially cached",
|
||||||
|
dirMeta.isAuthoritative());
|
||||||
|
assertListingsEqual(dirMeta.getListing(), "/a1", "/a2");
|
||||||
|
}
|
||||||
|
|
||||||
|
dirMeta = ms.listChildren(strToPath("/a1"));
|
||||||
|
if (!allowMissing() || dirMeta != null) {
|
||||||
|
dirMeta = dirMeta.withoutTombstones();
|
||||||
|
assertListingsEqual(dirMeta.getListing(), "/a1/b1", "/a1/b2");
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO HADOOP-14756 instrument MetadataStore for asserting & testing
|
||||||
|
dirMeta = ms.listChildren(strToPath("/a1/b1"));
|
||||||
|
if (!allowMissing() || dirMeta != null) {
|
||||||
|
assertListingsEqual(dirMeta.getListing(), "/a1/b1/file1", "/a1/b1/file2",
|
||||||
|
"/a1/b1/c1");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDirListingRoot() throws Exception {
|
||||||
|
commonTestPutListStatus("/");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPutDirListing() throws Exception {
|
||||||
|
commonTestPutListStatus("/a");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testInvalidListChildren() throws Exception {
|
||||||
|
setupListStatus();
|
||||||
|
assertNull("missing path returns null",
|
||||||
|
ms.listChildren(strToPath("/a1/b1x")));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMove() throws Exception {
|
||||||
|
// Create test dir structure
|
||||||
|
createNewDirs("/a1", "/a2", "/a3");
|
||||||
|
createNewDirs("/a1/b1", "/a1/b2");
|
||||||
|
putListStatusFiles("/a1/b1", false, "/a1/b1/file1", "/a1/b1/file2");
|
||||||
|
|
||||||
|
// Assert root listing as expected
|
||||||
|
Collection<PathMetadata> entries;
|
||||||
|
DirListingMetadata dirMeta = ms.listChildren(strToPath("/"));
|
||||||
|
if (!allowMissing() || dirMeta != null) {
|
||||||
|
dirMeta = dirMeta.withoutTombstones();
|
||||||
|
assertNotNull("Listing root", dirMeta);
|
||||||
|
entries = dirMeta.getListing();
|
||||||
|
assertListingsEqual(entries, "/a1", "/a2", "/a3");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Assert src listing as expected
|
||||||
|
dirMeta = ms.listChildren(strToPath("/a1/b1"));
|
||||||
|
if (!allowMissing() || dirMeta != null) {
|
||||||
|
assertNotNull("Listing /a1/b1", dirMeta);
|
||||||
|
entries = dirMeta.getListing();
|
||||||
|
assertListingsEqual(entries, "/a1/b1/file1", "/a1/b1/file2");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do the move(): rename(/a1/b1, /b1)
|
||||||
|
Collection<Path> srcPaths = Arrays.asList(strToPath("/a1/b1"),
|
||||||
|
strToPath("/a1/b1/file1"), strToPath("/a1/b1/file2"));
|
||||||
|
|
||||||
|
ArrayList<PathMetadata> destMetas = new ArrayList<>();
|
||||||
|
destMetas.add(new PathMetadata(makeDirStatus("/b1")));
|
||||||
|
destMetas.add(new PathMetadata(makeFileStatus("/b1/file1", 100)));
|
||||||
|
destMetas.add(new PathMetadata(makeFileStatus("/b1/file2", 100)));
|
||||||
|
ms.move(srcPaths, destMetas);
|
||||||
|
|
||||||
|
// Assert src is no longer there
|
||||||
|
dirMeta = ms.listChildren(strToPath("/a1"));
|
||||||
|
if (!allowMissing() || dirMeta != null) {
|
||||||
|
assertNotNull("Listing /a1", dirMeta);
|
||||||
|
entries = dirMeta.withoutTombstones().getListing();
|
||||||
|
assertListingsEqual(entries, "/a1/b2");
|
||||||
|
}
|
||||||
|
|
||||||
|
PathMetadata meta = ms.get(strToPath("/a1/b1/file1"));
|
||||||
|
assertTrue("Src path deleted", meta == null || meta.isDeleted());
|
||||||
|
|
||||||
|
// Assert dest looks right
|
||||||
|
meta = ms.get(strToPath("/b1/file1"));
|
||||||
|
if (!allowMissing() || meta != null) {
|
||||||
|
assertNotNull("dest file not null", meta);
|
||||||
|
verifyFileStatus(meta.getFileStatus(), 100);
|
||||||
|
}
|
||||||
|
|
||||||
|
dirMeta = ms.listChildren(strToPath("/b1"));
|
||||||
|
if (!allowMissing() || dirMeta != null) {
|
||||||
|
assertNotNull("dest listing not null", dirMeta);
|
||||||
|
entries = dirMeta.getListing();
|
||||||
|
assertListingsEqual(entries, "/b1/file1", "/b1/file2");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that the MetadataStore differentiates between the same path in two
|
||||||
|
* different buckets.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testMultiBucketPaths() throws Exception {
|
||||||
|
String p1 = "s3a://bucket-a/path1";
|
||||||
|
String p2 = "s3a://bucket-b/path2";
|
||||||
|
|
||||||
|
// Make sure we start out empty
|
||||||
|
PathMetadata meta = ms.get(new Path(p1));
|
||||||
|
assertNull("Path should not be present yet.", meta);
|
||||||
|
meta = ms.get(new Path(p2));
|
||||||
|
assertNull("Path2 should not be present yet.", meta);
|
||||||
|
|
||||||
|
// Put p1, assert p2 doesn't match
|
||||||
|
ms.put(new PathMetadata(makeFileStatus(p1, 100)));
|
||||||
|
meta = ms.get(new Path(p2));
|
||||||
|
assertNull("Path 2 should not match path 1.", meta);
|
||||||
|
|
||||||
|
// Make sure delete is correct as well
|
||||||
|
if (!allowMissing()) {
|
||||||
|
ms.delete(new Path(p2));
|
||||||
|
meta = ms.get(new Path(p1));
|
||||||
|
assertNotNull("Path should not have been deleted", meta);
|
||||||
|
}
|
||||||
|
ms.delete(new Path(p1));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPruneFiles() throws Exception {
|
||||||
|
Assume.assumeTrue(supportsPruning());
|
||||||
|
createNewDirs("/pruneFiles");
|
||||||
|
|
||||||
|
long oldTime = getTime();
|
||||||
|
ms.put(new PathMetadata(makeFileStatus("/pruneFiles/old", 1, oldTime,
|
||||||
|
oldTime)));
|
||||||
|
DirListingMetadata ls2 = ms.listChildren(strToPath("/pruneFiles"));
|
||||||
|
if (!allowMissing()) {
|
||||||
|
assertListingsEqual(ls2.getListing(), "/pruneFiles/old");
|
||||||
|
}
|
||||||
|
|
||||||
|
// It's possible for the Local implementation to get from /pruneFiles/old's
|
||||||
|
// modification time to here in under 1ms, causing it to not get pruned
|
||||||
|
Thread.sleep(1);
|
||||||
|
long cutoff = System.currentTimeMillis();
|
||||||
|
long newTime = getTime();
|
||||||
|
ms.put(new PathMetadata(makeFileStatus("/pruneFiles/new", 1, newTime,
|
||||||
|
newTime)));
|
||||||
|
|
||||||
|
DirListingMetadata ls;
|
||||||
|
ls = ms.listChildren(strToPath("/pruneFiles"));
|
||||||
|
if (!allowMissing()) {
|
||||||
|
assertListingsEqual(ls.getListing(), "/pruneFiles/new",
|
||||||
|
"/pruneFiles/old");
|
||||||
|
}
|
||||||
|
ms.prune(cutoff);
|
||||||
|
ls = ms.listChildren(strToPath("/pruneFiles"));
|
||||||
|
if (allowMissing()) {
|
||||||
|
assertDeleted("/pruneFiles/old");
|
||||||
|
} else {
|
||||||
|
assertListingsEqual(ls.getListing(), "/pruneFiles/new");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPruneDirs() throws Exception {
|
||||||
|
Assume.assumeTrue(supportsPruning());
|
||||||
|
|
||||||
|
// We only test that files, not dirs, are removed during prune.
|
||||||
|
// We specifically allow directories to remain, as it is more robust
|
||||||
|
// for DynamoDBMetadataStore's prune() implementation: If a
|
||||||
|
// file was created in a directory while it was being pruned, it would
|
||||||
|
// violate the invariant that all ancestors of a file exist in the table.
|
||||||
|
|
||||||
|
createNewDirs("/pruneDirs/dir");
|
||||||
|
|
||||||
|
long oldTime = getTime();
|
||||||
|
ms.put(new PathMetadata(makeFileStatus("/pruneDirs/dir/file",
|
||||||
|
1, oldTime, oldTime)));
|
||||||
|
|
||||||
|
// It's possible for the Local implementation to get from the old
|
||||||
|
// modification time to here in under 1ms, causing it to not get pruned
|
||||||
|
Thread.sleep(1);
|
||||||
|
long cutoff = getTime();
|
||||||
|
|
||||||
|
ms.prune(cutoff);
|
||||||
|
|
||||||
|
assertDeleted("/pruneDirs/dir/file");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPruneUnsetsAuthoritative() throws Exception {
|
||||||
|
String rootDir = "/unpruned-root-dir";
|
||||||
|
String grandparentDir = rootDir + "/pruned-grandparent-dir";
|
||||||
|
String parentDir = grandparentDir + "/pruned-parent-dir";
|
||||||
|
String staleFile = parentDir + "/stale-file";
|
||||||
|
String freshFile = rootDir + "/fresh-file";
|
||||||
|
String[] directories = {rootDir, grandparentDir, parentDir};
|
||||||
|
|
||||||
|
createNewDirs(rootDir, grandparentDir, parentDir);
|
||||||
|
long time = System.currentTimeMillis();
|
||||||
|
ms.put(new PathMetadata(
|
||||||
|
new FileStatus(0, false, 0, 0, time - 1, strToPath(staleFile)),
|
||||||
|
Tristate.FALSE, false));
|
||||||
|
ms.put(new PathMetadata(
|
||||||
|
new FileStatus(0, false, 0, 0, time + 1, strToPath(freshFile)),
|
||||||
|
Tristate.FALSE, false));
|
||||||
|
|
||||||
|
ms.prune(time);
|
||||||
|
DirListingMetadata listing;
|
||||||
|
for (String directory : directories) {
|
||||||
|
Path path = strToPath(directory);
|
||||||
|
if (ms.get(path) != null) {
|
||||||
|
listing = ms.listChildren(path);
|
||||||
|
assertFalse(listing.isAuthoritative());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Helper functions.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/** Modifies paths input array and returns it. */
|
||||||
|
private String[] buildPathStrings(String parent, String... paths)
|
||||||
|
throws IOException {
|
||||||
|
for (int i = 0; i < paths.length; i++) {
|
||||||
|
Path p = new Path(strToPath(parent), paths[i]);
|
||||||
|
paths[i] = p.toString();
|
||||||
|
}
|
||||||
|
return paths;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void commonTestPutListStatus(final String parent) throws IOException {
|
||||||
|
putListStatusFiles(parent, true, buildPathStrings(parent, "file1", "file2",
|
||||||
|
"file3"));
|
||||||
|
DirListingMetadata dirMeta = ms.listChildren(strToPath(parent));
|
||||||
|
if (!allowMissing() || dirMeta != null) {
|
||||||
|
dirMeta = dirMeta.withoutTombstones();
|
||||||
|
assertNotNull("list after putListStatus", dirMeta);
|
||||||
|
Collection<PathMetadata> entries = dirMeta.getListing();
|
||||||
|
assertNotNull("listStatus has entries", entries);
|
||||||
|
assertListingsEqual(entries,
|
||||||
|
buildPathStrings(parent, "file1", "file2", "file3"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void setupListStatus() throws IOException {
|
||||||
|
createNewDirs("/a1", "/a2", "/a1/b1", "/a1/b2", "/a1/b1/c1",
|
||||||
|
"/a1/b1/c1/d1");
|
||||||
|
ms.put(new PathMetadata(makeFileStatus("/a1/b1/file1", 100)));
|
||||||
|
ms.put(new PathMetadata(makeFileStatus("/a1/b1/file2", 100)));
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertListingsEqual(Collection<PathMetadata> listing,
|
||||||
|
String ...pathStrs) throws IOException {
|
||||||
|
Set<Path> a = new HashSet<>();
|
||||||
|
for (PathMetadata meta : listing) {
|
||||||
|
a.add(meta.getFileStatus().getPath());
|
||||||
|
}
|
||||||
|
|
||||||
|
Set<Path> b = new HashSet<>();
|
||||||
|
for (String ps : pathStrs) {
|
||||||
|
b.add(strToPath(ps));
|
||||||
|
}
|
||||||
|
assertEquals("Same set of files", b, a);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void putListStatusFiles(String dirPath, boolean authoritative,
|
||||||
|
String... filenames) throws IOException {
|
||||||
|
ArrayList<PathMetadata> metas = new ArrayList<>(filenames .length);
|
||||||
|
for (String filename : filenames) {
|
||||||
|
metas.add(new PathMetadata(makeFileStatus(filename, 100)));
|
||||||
|
}
|
||||||
|
DirListingMetadata dirMeta =
|
||||||
|
new DirListingMetadata(strToPath(dirPath), metas, authoritative);
|
||||||
|
ms.put(dirMeta);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void createNewDirs(String... dirs)
|
||||||
|
throws IOException {
|
||||||
|
for (String pathStr : dirs) {
|
||||||
|
ms.put(new PathMetadata(makeDirStatus(pathStr)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertDirectorySize(String pathStr, int size)
|
||||||
|
throws IOException {
|
||||||
|
DirListingMetadata dirMeta = ms.listChildren(strToPath(pathStr));
|
||||||
|
if (!allowMissing()) {
|
||||||
|
assertNotNull("Directory " + pathStr + " in cache", dirMeta);
|
||||||
|
}
|
||||||
|
if (!allowMissing() || dirMeta != null) {
|
||||||
|
dirMeta = dirMeta.withoutTombstones();
|
||||||
|
assertEquals("Number of entries in dir " + pathStr, size,
|
||||||
|
nonDeleted(dirMeta.getListing()).size());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @return only file statuses which are *not* marked deleted. */
|
||||||
|
private Collection<PathMetadata> nonDeleted(
|
||||||
|
Collection<PathMetadata> statuses) {
|
||||||
|
Collection<PathMetadata> currentStatuses = new ArrayList<>();
|
||||||
|
for (PathMetadata status : statuses) {
|
||||||
|
if (!status.isDeleted()) {
|
||||||
|
currentStatuses.add(status);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return currentStatuses;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertDeleted(String pathStr) throws IOException {
|
||||||
|
Path path = strToPath(pathStr);
|
||||||
|
PathMetadata meta = ms.get(path);
|
||||||
|
boolean cached = meta != null && !meta.isDeleted();
|
||||||
|
assertFalse(pathStr + " should not be cached.", cached);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void assertCached(String pathStr) throws IOException {
|
||||||
|
Path path = strToPath(pathStr);
|
||||||
|
PathMetadata meta = ms.get(path);
|
||||||
|
boolean cached = meta != null && !meta.isDeleted();
|
||||||
|
assertTrue(pathStr + " should be cached.", cached);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convenience to create a fully qualified Path from string.
|
||||||
|
*/
|
||||||
|
Path strToPath(String p) throws IOException {
|
||||||
|
final Path path = new Path(p);
|
||||||
|
assert path.isAbsolute();
|
||||||
|
return path.makeQualified(contract.getFileSystem().getUri(), null);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertEmptyDirectory(String pathStr) throws IOException {
|
||||||
|
assertDirectorySize(pathStr, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assertEmptyDirs(String ...dirs) throws IOException {
|
||||||
|
for (String pathStr : dirs) {
|
||||||
|
assertEmptyDirectory(pathStr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
FileStatus basicFileStatus(Path path, int size, boolean isDir) throws
|
||||||
|
IOException {
|
||||||
|
return basicFileStatus(path, size, isDir, modTime, accessTime);
|
||||||
|
}
|
||||||
|
|
||||||
|
FileStatus basicFileStatus(Path path, int size, boolean isDir,
|
||||||
|
long newModTime, long newAccessTime) throws IOException {
|
||||||
|
return new FileStatus(size, isDir, REPLICATION, BLOCK_SIZE, newModTime,
|
||||||
|
newAccessTime, PERMISSION, OWNER, GROUP, path);
|
||||||
|
}
|
||||||
|
|
||||||
|
private FileStatus makeFileStatus(String pathStr, int size) throws
|
||||||
|
IOException {
|
||||||
|
return makeFileStatus(pathStr, size, modTime, accessTime);
|
||||||
|
}
|
||||||
|
|
||||||
|
private FileStatus makeFileStatus(String pathStr, int size, long newModTime,
|
||||||
|
long newAccessTime) throws IOException {
|
||||||
|
return basicFileStatus(strToPath(pathStr), size, false,
|
||||||
|
newModTime, newAccessTime);
|
||||||
|
}
|
||||||
|
|
||||||
|
void verifyFileStatus(FileStatus status, long size) {
|
||||||
|
S3ATestUtils.verifyFileStatus(status, size, BLOCK_SIZE, modTime);
|
||||||
|
}
|
||||||
|
|
||||||
|
private FileStatus makeDirStatus(String pathStr) throws IOException {
|
||||||
|
return basicFileStatus(strToPath(pathStr), 0, true, modTime, accessTime);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Verify the directory file status. Subclass may verify additional fields.
|
||||||
|
*/
|
||||||
|
void verifyDirStatus(FileStatus status) {
|
||||||
|
assertTrue("Is a dir", status.isDirectory());
|
||||||
|
assertEquals("zero length", 0, status.getLen());
|
||||||
|
}
|
||||||
|
|
||||||
|
long getModTime() {
|
||||||
|
return modTime;
|
||||||
|
}
|
||||||
|
|
||||||
|
long getAccessTime() {
|
||||||
|
return accessTime;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static long getTime() {
|
||||||
|
return System.currentTimeMillis();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,303 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a.s3guard;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.junit.Rule;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.junit.rules.ExpectedException;
|
||||||
|
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.s3a.S3AFileStatus;
|
||||||
|
|
||||||
|
import static org.hamcrest.CoreMatchers.notNullValue;
|
||||||
|
import static org.junit.Assert.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unit tests of {@link DirListingMetadata}.
|
||||||
|
*/
|
||||||
|
public class TestDirListingMetadata {
|
||||||
|
|
||||||
|
private static final String TEST_OWNER = "hadoop";
|
||||||
|
|
||||||
|
@Rule
|
||||||
|
public ExpectedException exception = ExpectedException.none();
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNullPath() {
|
||||||
|
exception.expect(NullPointerException.class);
|
||||||
|
exception.expectMessage(notNullValue(String.class));
|
||||||
|
new DirListingMetadata(null, null, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNullListing() {
|
||||||
|
Path path = new Path("/path");
|
||||||
|
DirListingMetadata meta = new DirListingMetadata(path, null, false);
|
||||||
|
assertEquals(path, meta.getPath());
|
||||||
|
assertNotNull(meta.getListing());
|
||||||
|
assertTrue(meta.getListing().isEmpty());
|
||||||
|
assertFalse(meta.isAuthoritative());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testEmptyListing() {
|
||||||
|
Path path = new Path("/path");
|
||||||
|
DirListingMetadata meta = new DirListingMetadata(path,
|
||||||
|
new ArrayList<PathMetadata>(0),
|
||||||
|
false);
|
||||||
|
assertEquals(path, meta.getPath());
|
||||||
|
assertNotNull(meta.getListing());
|
||||||
|
assertTrue(meta.getListing().isEmpty());
|
||||||
|
assertFalse(meta.isAuthoritative());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testListing() {
|
||||||
|
Path path = new Path("/path");
|
||||||
|
PathMetadata pathMeta1 = new PathMetadata(
|
||||||
|
new S3AFileStatus(true, new Path(path, "dir1"), TEST_OWNER));
|
||||||
|
PathMetadata pathMeta2 = new PathMetadata(
|
||||||
|
new S3AFileStatus(true, new Path(path, "dir2"), TEST_OWNER));
|
||||||
|
PathMetadata pathMeta3 = new PathMetadata(
|
||||||
|
new S3AFileStatus(123, 456, new Path(path, "file1"), 8192, TEST_OWNER));
|
||||||
|
List<PathMetadata> listing = Arrays.asList(pathMeta1, pathMeta2, pathMeta3);
|
||||||
|
DirListingMetadata meta = new DirListingMetadata(path, listing, false);
|
||||||
|
assertEquals(path, meta.getPath());
|
||||||
|
assertNotNull(meta.getListing());
|
||||||
|
assertFalse(meta.getListing().isEmpty());
|
||||||
|
assertTrue(meta.getListing().contains(pathMeta1));
|
||||||
|
assertTrue(meta.getListing().contains(pathMeta2));
|
||||||
|
assertTrue(meta.getListing().contains(pathMeta3));
|
||||||
|
assertFalse(meta.isAuthoritative());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testListingUnmodifiable() {
|
||||||
|
Path path = new Path("/path");
|
||||||
|
DirListingMetadata meta = makeTwoDirsOneFile(path);
|
||||||
|
assertNotNull(meta.getListing());
|
||||||
|
exception.expect(UnsupportedOperationException.class);
|
||||||
|
meta.getListing().clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAuthoritative() {
|
||||||
|
Path path = new Path("/path");
|
||||||
|
DirListingMetadata meta = new DirListingMetadata(path, null, true);
|
||||||
|
assertEquals(path, meta.getPath());
|
||||||
|
assertNotNull(meta.getListing());
|
||||||
|
assertTrue(meta.getListing().isEmpty());
|
||||||
|
assertTrue(meta.isAuthoritative());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSetAuthoritative() {
|
||||||
|
Path path = new Path("/path");
|
||||||
|
DirListingMetadata meta = new DirListingMetadata(path, null, false);
|
||||||
|
assertEquals(path, meta.getPath());
|
||||||
|
assertNotNull(meta.getListing());
|
||||||
|
assertTrue(meta.getListing().isEmpty());
|
||||||
|
assertFalse(meta.isAuthoritative());
|
||||||
|
meta.setAuthoritative(true);
|
||||||
|
assertTrue(meta.isAuthoritative());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGet() {
|
||||||
|
Path path = new Path("/path");
|
||||||
|
PathMetadata pathMeta1 = new PathMetadata(
|
||||||
|
new S3AFileStatus(true, new Path(path, "dir1"), TEST_OWNER));
|
||||||
|
PathMetadata pathMeta2 = new PathMetadata(
|
||||||
|
new S3AFileStatus(true, new Path(path, "dir2"), TEST_OWNER));
|
||||||
|
PathMetadata pathMeta3 = new PathMetadata(
|
||||||
|
new S3AFileStatus(123, 456, new Path(path, "file1"), 8192, TEST_OWNER));
|
||||||
|
List<PathMetadata> listing = Arrays.asList(pathMeta1, pathMeta2, pathMeta3);
|
||||||
|
DirListingMetadata meta = new DirListingMetadata(path, listing, false);
|
||||||
|
assertEquals(path, meta.getPath());
|
||||||
|
assertNotNull(meta.getListing());
|
||||||
|
assertFalse(meta.getListing().isEmpty());
|
||||||
|
assertTrue(meta.getListing().contains(pathMeta1));
|
||||||
|
assertTrue(meta.getListing().contains(pathMeta2));
|
||||||
|
assertTrue(meta.getListing().contains(pathMeta3));
|
||||||
|
assertFalse(meta.isAuthoritative());
|
||||||
|
assertEquals(pathMeta1, meta.get(pathMeta1.getFileStatus().getPath()));
|
||||||
|
assertEquals(pathMeta2, meta.get(pathMeta2.getFileStatus().getPath()));
|
||||||
|
assertEquals(pathMeta3, meta.get(pathMeta3.getFileStatus().getPath()));
|
||||||
|
assertNull(meta.get(new Path(path, "notfound")));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetNull() {
|
||||||
|
Path path = new Path("/path");
|
||||||
|
DirListingMetadata meta = new DirListingMetadata(path, null, false);
|
||||||
|
exception.expect(NullPointerException.class);
|
||||||
|
exception.expectMessage(notNullValue(String.class));
|
||||||
|
meta.get(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetRoot() {
|
||||||
|
Path path = new Path("/path");
|
||||||
|
DirListingMetadata meta = new DirListingMetadata(path, null, false);
|
||||||
|
exception.expect(IllegalArgumentException.class);
|
||||||
|
exception.expectMessage(notNullValue(String.class));
|
||||||
|
meta.get(new Path("/"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetNotChild() {
|
||||||
|
Path path = new Path("/path");
|
||||||
|
DirListingMetadata meta = new DirListingMetadata(path, null, false);
|
||||||
|
exception.expect(IllegalArgumentException.class);
|
||||||
|
exception.expectMessage(notNullValue(String.class));
|
||||||
|
meta.get(new Path("/different/ancestor"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPut() {
|
||||||
|
Path path = new Path("/path");
|
||||||
|
PathMetadata pathMeta1 = new PathMetadata(
|
||||||
|
new S3AFileStatus(true, new Path(path, "dir1"), TEST_OWNER));
|
||||||
|
PathMetadata pathMeta2 = new PathMetadata(
|
||||||
|
new S3AFileStatus(true, new Path(path, "dir2"), TEST_OWNER));
|
||||||
|
PathMetadata pathMeta3 = new PathMetadata(
|
||||||
|
new S3AFileStatus(123, 456, new Path(path, "file1"), 8192, TEST_OWNER));
|
||||||
|
List<PathMetadata> listing = Arrays.asList(pathMeta1, pathMeta2, pathMeta3);
|
||||||
|
DirListingMetadata meta = new DirListingMetadata(path, listing, false);
|
||||||
|
assertEquals(path, meta.getPath());
|
||||||
|
assertNotNull(meta.getListing());
|
||||||
|
assertFalse(meta.getListing().isEmpty());
|
||||||
|
assertTrue(meta.getListing().contains(pathMeta1));
|
||||||
|
assertTrue(meta.getListing().contains(pathMeta2));
|
||||||
|
assertTrue(meta.getListing().contains(pathMeta3));
|
||||||
|
assertFalse(meta.isAuthoritative());
|
||||||
|
PathMetadata pathMeta4 = new PathMetadata(
|
||||||
|
new S3AFileStatus(true, new Path(path, "dir3"), TEST_OWNER));
|
||||||
|
meta.put(pathMeta4.getFileStatus());
|
||||||
|
assertTrue(meta.getListing().contains(pathMeta4));
|
||||||
|
assertEquals(pathMeta4, meta.get(pathMeta4.getFileStatus().getPath()));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPutNull() {
|
||||||
|
Path path = new Path("/path");
|
||||||
|
DirListingMetadata meta = new DirListingMetadata(path, null, false);
|
||||||
|
exception.expect(NullPointerException.class);
|
||||||
|
exception.expectMessage(notNullValue(String.class));
|
||||||
|
meta.put(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPutNullPath() {
|
||||||
|
Path path = new Path("/path");
|
||||||
|
DirListingMetadata meta = new DirListingMetadata(path, null, false);
|
||||||
|
exception.expect(NullPointerException.class);
|
||||||
|
exception.expectMessage(notNullValue(String.class));
|
||||||
|
meta.put(new S3AFileStatus(true, null, TEST_OWNER));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPutRoot() {
|
||||||
|
Path path = new Path("/path");
|
||||||
|
DirListingMetadata meta = new DirListingMetadata(path, null, false);
|
||||||
|
exception.expect(IllegalArgumentException.class);
|
||||||
|
exception.expectMessage(notNullValue(String.class));
|
||||||
|
meta.put(new S3AFileStatus(true, new Path("/"), TEST_OWNER));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPutNotChild() {
|
||||||
|
Path path = new Path("/path");
|
||||||
|
DirListingMetadata meta = new DirListingMetadata(path, null, false);
|
||||||
|
exception.expect(IllegalArgumentException.class);
|
||||||
|
exception.expectMessage(notNullValue(String.class));
|
||||||
|
meta.put(new S3AFileStatus(true, new Path("/different/ancestor"),
|
||||||
|
TEST_OWNER));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRemove() {
|
||||||
|
Path path = new Path("/path");
|
||||||
|
PathMetadata pathMeta1 = new PathMetadata(
|
||||||
|
new S3AFileStatus(true, new Path(path, "dir1"), TEST_OWNER));
|
||||||
|
PathMetadata pathMeta2 = new PathMetadata(
|
||||||
|
new S3AFileStatus(true, new Path(path, "dir2"), TEST_OWNER));
|
||||||
|
PathMetadata pathMeta3 = new PathMetadata(
|
||||||
|
new S3AFileStatus(123, 456, new Path(path, "file1"), 8192, TEST_OWNER));
|
||||||
|
List<PathMetadata> listing = Arrays.asList(pathMeta1, pathMeta2, pathMeta3);
|
||||||
|
DirListingMetadata meta = new DirListingMetadata(path, listing, false);
|
||||||
|
assertEquals(path, meta.getPath());
|
||||||
|
assertNotNull(meta.getListing());
|
||||||
|
assertFalse(meta.getListing().isEmpty());
|
||||||
|
assertTrue(meta.getListing().contains(pathMeta1));
|
||||||
|
assertTrue(meta.getListing().contains(pathMeta2));
|
||||||
|
assertTrue(meta.getListing().contains(pathMeta3));
|
||||||
|
assertFalse(meta.isAuthoritative());
|
||||||
|
meta.remove(pathMeta1.getFileStatus().getPath());
|
||||||
|
assertFalse(meta.getListing().contains(pathMeta1));
|
||||||
|
assertNull(meta.get(pathMeta1.getFileStatus().getPath()));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRemoveNull() {
|
||||||
|
Path path = new Path("/path");
|
||||||
|
DirListingMetadata meta = new DirListingMetadata(path, null, false);
|
||||||
|
exception.expect(NullPointerException.class);
|
||||||
|
exception.expectMessage(notNullValue(String.class));
|
||||||
|
meta.remove(null);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRemoveRoot() {
|
||||||
|
Path path = new Path("/path");
|
||||||
|
DirListingMetadata meta = new DirListingMetadata(path, null, false);
|
||||||
|
exception.expect(IllegalArgumentException.class);
|
||||||
|
exception.expectMessage(notNullValue(String.class));
|
||||||
|
meta.remove(new Path("/"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRemoveNotChild() {
|
||||||
|
Path path = new Path("/path");
|
||||||
|
DirListingMetadata meta = new DirListingMetadata(path, null, false);
|
||||||
|
exception.expect(IllegalArgumentException.class);
|
||||||
|
exception.expectMessage(notNullValue(String.class));
|
||||||
|
meta.remove(new Path("/different/ancestor"));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Create DirListingMetadata with two dirs and one file living in directory
|
||||||
|
* 'parent'
|
||||||
|
*/
|
||||||
|
private static DirListingMetadata makeTwoDirsOneFile(Path parent) {
|
||||||
|
PathMetadata pathMeta1 = new PathMetadata(
|
||||||
|
new S3AFileStatus(true, new Path(parent, "dir1"), TEST_OWNER));
|
||||||
|
PathMetadata pathMeta2 = new PathMetadata(
|
||||||
|
new S3AFileStatus(true, new Path(parent, "dir2"), TEST_OWNER));
|
||||||
|
PathMetadata pathMeta3 = new PathMetadata(
|
||||||
|
new S3AFileStatus(123, 456, new Path(parent, "file1"), 8192,
|
||||||
|
TEST_OWNER));
|
||||||
|
List<PathMetadata> listing = Arrays.asList(pathMeta1, pathMeta2, pathMeta3);
|
||||||
|
return new DirListingMetadata(parent, listing, false);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,594 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a.s3guard;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.URI;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import com.amazonaws.AmazonServiceException;
|
||||||
|
import com.amazonaws.services.dynamodbv2.document.DynamoDB;
|
||||||
|
import com.amazonaws.services.dynamodbv2.document.Item;
|
||||||
|
import com.amazonaws.services.dynamodbv2.document.PrimaryKey;
|
||||||
|
import com.amazonaws.services.dynamodbv2.document.Table;
|
||||||
|
import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughputDescription;
|
||||||
|
import com.amazonaws.services.dynamodbv2.model.ResourceNotFoundException;
|
||||||
|
import com.amazonaws.services.dynamodbv2.model.TableDescription;
|
||||||
|
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
import org.apache.commons.collections.CollectionUtils;
|
||||||
|
import org.apache.hadoop.fs.s3a.Tristate;
|
||||||
|
|
||||||
|
import org.junit.AfterClass;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Rule;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.junit.rules.Timeout;
|
||||||
|
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.s3a.MockS3ClientFactory;
|
||||||
|
import org.apache.hadoop.fs.s3a.S3AFileStatus;
|
||||||
|
import org.apache.hadoop.fs.s3a.S3AFileSystem;
|
||||||
|
import org.apache.hadoop.fs.s3a.S3ClientFactory;
|
||||||
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.fs.s3a.Constants.*;
|
||||||
|
import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.*;
|
||||||
|
import static org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore.*;
|
||||||
|
import static org.apache.hadoop.test.LambdaTestUtils.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that {@link DynamoDBMetadataStore} implements {@link MetadataStore}.
|
||||||
|
*
|
||||||
|
* In this unit test, we use an in-memory DynamoDBLocal server instead of real
|
||||||
|
* AWS DynamoDB. An {@link S3AFileSystem} object is created and shared for
|
||||||
|
* initializing {@link DynamoDBMetadataStore} objects. There are no real S3
|
||||||
|
* request issued as the underlying AWS S3Client is mocked. You won't be
|
||||||
|
* charged bills for AWS S3 or DynamoDB when you run this test.
|
||||||
|
*
|
||||||
|
* According to the base class, every test case will have independent contract
|
||||||
|
* to create a new {@link DynamoDBMetadataStore} instance and initializes it.
|
||||||
|
* A table will be created for each test by the test contract, and will be
|
||||||
|
* destroyed after the test case finishes.
|
||||||
|
*/
|
||||||
|
public class TestDynamoDBMetadataStore extends MetadataStoreTestBase {
|
||||||
|
private static final Logger LOG =
|
||||||
|
LoggerFactory.getLogger(TestDynamoDBMetadataStore.class);
|
||||||
|
private static final String BUCKET = "TestDynamoDBMetadataStore";
|
||||||
|
private static final String S3URI =
|
||||||
|
URI.create(FS_S3A + "://" + BUCKET + "/").toString();
|
||||||
|
public static final PrimaryKey
|
||||||
|
VERSION_MARKER_PRIMARY_KEY = createVersionMarkerPrimaryKey(
|
||||||
|
DynamoDBMetadataStore.VERSION_MARKER);
|
||||||
|
|
||||||
|
/** The DynamoDB instance that can issue requests directly to server. */
|
||||||
|
private static DynamoDB dynamoDB;
|
||||||
|
|
||||||
|
@Rule
|
||||||
|
public final Timeout timeout = new Timeout(60 * 1000);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Start the in-memory DynamoDBLocal server and initializes s3 file system.
|
||||||
|
*/
|
||||||
|
@BeforeClass
|
||||||
|
public static void setUpBeforeClass() throws Exception {
|
||||||
|
DynamoDBLocalClientFactory.startSingletonServer();
|
||||||
|
try {
|
||||||
|
dynamoDB = new DynamoDBMSContract().getMetadataStore().getDynamoDB();
|
||||||
|
} catch (AmazonServiceException e) {
|
||||||
|
final String msg = "Cannot initialize a DynamoDBMetadataStore instance "
|
||||||
|
+ "against the local DynamoDB server. Perhaps the DynamoDBLocal "
|
||||||
|
+ "server is not configured correctly. ";
|
||||||
|
LOG.error(msg, e);
|
||||||
|
// fail fast if the DynamoDBLocal server can not work
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@AfterClass
|
||||||
|
public static void tearDownAfterClass() throws Exception {
|
||||||
|
if (dynamoDB != null) {
|
||||||
|
dynamoDB.shutdown();
|
||||||
|
}
|
||||||
|
DynamoDBLocalClientFactory.stopSingletonServer();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Each contract has its own S3AFileSystem and DynamoDBMetadataStore objects.
|
||||||
|
*/
|
||||||
|
private static class DynamoDBMSContract extends AbstractMSContract {
|
||||||
|
private final S3AFileSystem s3afs;
|
||||||
|
private final DynamoDBMetadataStore ms = new DynamoDBMetadataStore();
|
||||||
|
|
||||||
|
DynamoDBMSContract() throws IOException {
|
||||||
|
this(new Configuration());
|
||||||
|
}
|
||||||
|
|
||||||
|
DynamoDBMSContract(Configuration conf) throws IOException {
|
||||||
|
// using mocked S3 clients
|
||||||
|
conf.setClass(S3_CLIENT_FACTORY_IMPL, MockS3ClientFactory.class,
|
||||||
|
S3ClientFactory.class);
|
||||||
|
conf.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY, S3URI);
|
||||||
|
// setting config for creating a DynamoDBClient against local server
|
||||||
|
conf.set(ACCESS_KEY, "dummy-access-key");
|
||||||
|
conf.set(SECRET_KEY, "dummy-secret-key");
|
||||||
|
conf.setBoolean(S3GUARD_DDB_TABLE_CREATE_KEY, true);
|
||||||
|
conf.setClass(S3Guard.S3GUARD_DDB_CLIENT_FACTORY_IMPL,
|
||||||
|
DynamoDBLocalClientFactory.class, DynamoDBClientFactory.class);
|
||||||
|
|
||||||
|
// always create new file system object for a test contract
|
||||||
|
s3afs = (S3AFileSystem) FileSystem.newInstance(conf);
|
||||||
|
ms.initialize(s3afs);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public S3AFileSystem getFileSystem() {
|
||||||
|
return s3afs;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DynamoDBMetadataStore getMetadataStore() {
|
||||||
|
return ms;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DynamoDBMSContract createContract() throws IOException {
|
||||||
|
return new DynamoDBMSContract();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DynamoDBMSContract createContract(Configuration conf) throws
|
||||||
|
IOException {
|
||||||
|
return new DynamoDBMSContract(conf);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
FileStatus basicFileStatus(Path path, int size, boolean isDir)
|
||||||
|
throws IOException {
|
||||||
|
String owner = UserGroupInformation.getCurrentUser().getShortUserName();
|
||||||
|
return isDir
|
||||||
|
? new S3AFileStatus(true, path, owner)
|
||||||
|
: new S3AFileStatus(size, getModTime(), path, BLOCK_SIZE, owner);
|
||||||
|
}
|
||||||
|
|
||||||
|
private DynamoDBMetadataStore getDynamoMetadataStore() throws IOException {
|
||||||
|
return (DynamoDBMetadataStore) getContract().getMetadataStore();
|
||||||
|
}
|
||||||
|
|
||||||
|
private S3AFileSystem getFileSystem() throws IOException {
|
||||||
|
return (S3AFileSystem) getContract().getFileSystem();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This tests that after initialize() using an S3AFileSystem object, the
|
||||||
|
* instance should have been initialized successfully, and tables are ACTIVE.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testInitialize() throws IOException {
|
||||||
|
final String tableName = "testInitializeWithFileSystem";
|
||||||
|
final S3AFileSystem s3afs = getFileSystem();
|
||||||
|
final Configuration conf = s3afs.getConf();
|
||||||
|
conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName);
|
||||||
|
try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) {
|
||||||
|
ddbms.initialize(s3afs);
|
||||||
|
verifyTableInitialized(tableName);
|
||||||
|
assertNotNull(ddbms.getTable());
|
||||||
|
assertEquals(tableName, ddbms.getTable().getTableName());
|
||||||
|
String expectedRegion = conf.get(S3GUARD_DDB_REGION_KEY,
|
||||||
|
s3afs.getBucketLocation(tableName));
|
||||||
|
assertEquals("DynamoDB table should be in configured region or the same" +
|
||||||
|
" region as S3 bucket",
|
||||||
|
expectedRegion,
|
||||||
|
ddbms.getRegion());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This tests that after initialize() using a Configuration object, the
|
||||||
|
* instance should have been initialized successfully, and tables are ACTIVE.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testInitializeWithConfiguration() throws IOException {
|
||||||
|
final String tableName = "testInitializeWithConfiguration";
|
||||||
|
final Configuration conf = getFileSystem().getConf();
|
||||||
|
conf.unset(S3GUARD_DDB_TABLE_NAME_KEY);
|
||||||
|
String savedRegion = conf.get(S3GUARD_DDB_REGION_KEY,
|
||||||
|
getFileSystem().getBucketLocation());
|
||||||
|
conf.unset(S3GUARD_DDB_REGION_KEY);
|
||||||
|
try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) {
|
||||||
|
ddbms.initialize(conf);
|
||||||
|
fail("Should have failed because the table name is not set!");
|
||||||
|
} catch (IllegalArgumentException ignored) {
|
||||||
|
}
|
||||||
|
// config table name
|
||||||
|
conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName);
|
||||||
|
try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) {
|
||||||
|
ddbms.initialize(conf);
|
||||||
|
fail("Should have failed because as the region is not set!");
|
||||||
|
} catch (IllegalArgumentException ignored) {
|
||||||
|
}
|
||||||
|
// config region
|
||||||
|
conf.set(S3GUARD_DDB_REGION_KEY, savedRegion);
|
||||||
|
try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) {
|
||||||
|
ddbms.initialize(conf);
|
||||||
|
verifyTableInitialized(tableName);
|
||||||
|
assertNotNull(ddbms.getTable());
|
||||||
|
assertEquals(tableName, ddbms.getTable().getTableName());
|
||||||
|
assertEquals("Unexpected key schema found!",
|
||||||
|
keySchema(),
|
||||||
|
ddbms.getTable().describe().getKeySchema());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that for a large batch write request, the limit is handled correctly.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testBatchWrite() throws IOException {
|
||||||
|
final int[] numMetasToDeleteOrPut = {
|
||||||
|
-1, // null
|
||||||
|
0, // empty collection
|
||||||
|
1, // one path
|
||||||
|
S3GUARD_DDB_BATCH_WRITE_REQUEST_LIMIT, // exact limit of a batch request
|
||||||
|
S3GUARD_DDB_BATCH_WRITE_REQUEST_LIMIT + 1 // limit + 1
|
||||||
|
};
|
||||||
|
for (int numOldMetas : numMetasToDeleteOrPut) {
|
||||||
|
for (int numNewMetas : numMetasToDeleteOrPut) {
|
||||||
|
doTestBatchWrite(numOldMetas, numNewMetas);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void doTestBatchWrite(int numDelete, int numPut) throws IOException {
|
||||||
|
final String root = S3URI + "/testBatchWrite_" + numDelete + '_' + numPut;
|
||||||
|
final Path oldDir = new Path(root, "oldDir");
|
||||||
|
final Path newDir = new Path(root, "newDir");
|
||||||
|
LOG.info("doTestBatchWrite: oldDir={}, newDir={}", oldDir, newDir);
|
||||||
|
|
||||||
|
DynamoDBMetadataStore ms = getDynamoMetadataStore();
|
||||||
|
ms.put(new PathMetadata(basicFileStatus(oldDir, 0, true)));
|
||||||
|
ms.put(new PathMetadata(basicFileStatus(newDir, 0, true)));
|
||||||
|
|
||||||
|
final List<PathMetadata> oldMetas =
|
||||||
|
numDelete < 0 ? null : new ArrayList<PathMetadata>(numDelete);
|
||||||
|
for (int i = 0; i < numDelete; i++) {
|
||||||
|
oldMetas.add(new PathMetadata(
|
||||||
|
basicFileStatus(new Path(oldDir, "child" + i), i, true)));
|
||||||
|
}
|
||||||
|
final List<PathMetadata> newMetas =
|
||||||
|
numPut < 0 ? null : new ArrayList<PathMetadata>(numPut);
|
||||||
|
for (int i = 0; i < numPut; i++) {
|
||||||
|
newMetas.add(new PathMetadata(
|
||||||
|
basicFileStatus(new Path(newDir, "child" + i), i, false)));
|
||||||
|
}
|
||||||
|
|
||||||
|
Collection<Path> pathsToDelete = null;
|
||||||
|
if (oldMetas != null) {
|
||||||
|
// put all metadata of old paths and verify
|
||||||
|
ms.put(new DirListingMetadata(oldDir, oldMetas, false));
|
||||||
|
assertEquals(0, ms.listChildren(newDir).withoutTombstones().numEntries());
|
||||||
|
assertTrue(CollectionUtils.isEqualCollection(oldMetas,
|
||||||
|
ms.listChildren(oldDir).getListing()));
|
||||||
|
|
||||||
|
pathsToDelete = new ArrayList<>(oldMetas.size());
|
||||||
|
for (PathMetadata meta : oldMetas) {
|
||||||
|
pathsToDelete.add(meta.getFileStatus().getPath());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// move the old paths to new paths and verify
|
||||||
|
ms.move(pathsToDelete, newMetas);
|
||||||
|
assertEquals(0, ms.listChildren(oldDir).withoutTombstones().numEntries());
|
||||||
|
if (newMetas != null) {
|
||||||
|
assertTrue(CollectionUtils.isEqualCollection(newMetas,
|
||||||
|
ms.listChildren(newDir).getListing()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testInitExistingTable() throws IOException {
|
||||||
|
final DynamoDBMetadataStore ddbms = getDynamoMetadataStore();
|
||||||
|
final String tableName = ddbms.getTable().getTableName();
|
||||||
|
verifyTableInitialized(tableName);
|
||||||
|
// create existing table
|
||||||
|
ddbms.initTable();
|
||||||
|
verifyTableInitialized(tableName);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test the low level version check code.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testItemVersionCompatibility() throws Throwable {
|
||||||
|
verifyVersionCompatibility("table",
|
||||||
|
createVersionMarker(VERSION_MARKER, VERSION, 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that a version marker entry without the version number field
|
||||||
|
* is rejected as incompatible with a meaningful error message.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testItemLacksVersion() throws Throwable {
|
||||||
|
intercept(IOException.class, E_NOT_VERSION_MARKER,
|
||||||
|
new VoidCallable() {
|
||||||
|
@Override
|
||||||
|
public void call() throws Exception {
|
||||||
|
verifyVersionCompatibility("table",
|
||||||
|
new Item().withPrimaryKey(
|
||||||
|
createVersionMarkerPrimaryKey(VERSION_MARKER)));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delete the version marker and verify that table init fails.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testTableVersionRequired() throws Exception {
|
||||||
|
Configuration conf = getFileSystem().getConf();
|
||||||
|
int maxRetries = conf.getInt(S3GUARD_DDB_MAX_RETRIES,
|
||||||
|
S3GUARD_DDB_MAX_RETRIES_DEFAULT);
|
||||||
|
conf.setInt(S3GUARD_DDB_MAX_RETRIES, 3);
|
||||||
|
|
||||||
|
final DynamoDBMetadataStore ddbms = createContract(conf).getMetadataStore();
|
||||||
|
String tableName = conf.get(S3GUARD_DDB_TABLE_NAME_KEY, BUCKET);
|
||||||
|
Table table = verifyTableInitialized(tableName);
|
||||||
|
table.deleteItem(VERSION_MARKER_PRIMARY_KEY);
|
||||||
|
|
||||||
|
// create existing table
|
||||||
|
intercept(IOException.class, E_NO_VERSION_MARKER,
|
||||||
|
new VoidCallable() {
|
||||||
|
@Override
|
||||||
|
public void call() throws Exception {
|
||||||
|
ddbms.initTable();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
conf.setInt(S3GUARD_DDB_MAX_RETRIES, maxRetries);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the version value to a different number and verify that
|
||||||
|
* table init fails.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testTableVersionMismatch() throws Exception {
|
||||||
|
final DynamoDBMetadataStore ddbms = createContract().getMetadataStore();
|
||||||
|
String tableName = getFileSystem().getConf()
|
||||||
|
.get(S3GUARD_DDB_TABLE_NAME_KEY, BUCKET);
|
||||||
|
Table table = verifyTableInitialized(tableName);
|
||||||
|
table.deleteItem(VERSION_MARKER_PRIMARY_KEY);
|
||||||
|
Item v200 = createVersionMarker(VERSION_MARKER, 200, 0);
|
||||||
|
table.putItem(v200);
|
||||||
|
|
||||||
|
// create existing table
|
||||||
|
intercept(IOException.class, E_INCOMPATIBLE_VERSION,
|
||||||
|
new VoidCallable() {
|
||||||
|
@Override
|
||||||
|
public void call() throws Exception {
|
||||||
|
ddbms.initTable();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that initTable fails with IOException when table does not exist and
|
||||||
|
* table auto-creation is disabled.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testFailNonexistentTable() throws IOException {
|
||||||
|
final String tableName = "testFailNonexistentTable";
|
||||||
|
final S3AFileSystem s3afs = getFileSystem();
|
||||||
|
final Configuration conf = s3afs.getConf();
|
||||||
|
conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName);
|
||||||
|
conf.unset(S3GUARD_DDB_TABLE_CREATE_KEY);
|
||||||
|
try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) {
|
||||||
|
ddbms.initialize(s3afs);
|
||||||
|
fail("Should have failed as table does not exist and table auto-creation"
|
||||||
|
+ " is disabled");
|
||||||
|
} catch (IOException ignored) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test cases about root directory as it is not in the DynamoDB table.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testRootDirectory() throws IOException {
|
||||||
|
final DynamoDBMetadataStore ddbms = getDynamoMetadataStore();
|
||||||
|
Path rootPath = new Path(S3URI);
|
||||||
|
verifyRootDirectory(ddbms.get(rootPath), true);
|
||||||
|
|
||||||
|
ddbms.put(new PathMetadata(new S3AFileStatus(true,
|
||||||
|
new Path(rootPath, "foo"),
|
||||||
|
UserGroupInformation.getCurrentUser().getShortUserName())));
|
||||||
|
verifyRootDirectory(ddbms.get(new Path(S3URI)), false);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void verifyRootDirectory(PathMetadata rootMeta, boolean isEmpty) {
|
||||||
|
assertNotNull(rootMeta);
|
||||||
|
final FileStatus status = rootMeta.getFileStatus();
|
||||||
|
assertNotNull(status);
|
||||||
|
assertTrue(status.isDirectory());
|
||||||
|
// UNKNOWN is always a valid option, but true / false should not contradict
|
||||||
|
if (isEmpty) {
|
||||||
|
assertNotSame("Should not be marked non-empty",
|
||||||
|
Tristate.FALSE,
|
||||||
|
rootMeta.isEmptyDirectory());
|
||||||
|
} else {
|
||||||
|
assertNotSame("Should not be marked empty",
|
||||||
|
Tristate.TRUE,
|
||||||
|
rootMeta.isEmptyDirectory());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test that when moving nested paths, all its ancestors up to destination
|
||||||
|
* root will also be created.
|
||||||
|
* Here is the directory tree before move:
|
||||||
|
* <pre>
|
||||||
|
* testMovePopulateAncestors
|
||||||
|
* ├── a
|
||||||
|
* │ └── b
|
||||||
|
* │ └── src
|
||||||
|
* │ ├── dir1
|
||||||
|
* │ │ └── dir2
|
||||||
|
* │ └── file1.txt
|
||||||
|
* └── c
|
||||||
|
* └── d
|
||||||
|
* └── dest
|
||||||
|
*</pre>
|
||||||
|
* As part of rename(a/b/src, d/c/dest), S3A will enumerate the subtree at
|
||||||
|
* a/b/src. This test verifies that after the move, the new subtree at
|
||||||
|
* 'dest' is reachable from the root (i.e. c/ and c/d exist in the table.
|
||||||
|
* DynamoDBMetadataStore depends on this property to do recursive delete
|
||||||
|
* without a full table scan.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testMovePopulatesAncestors() throws IOException {
|
||||||
|
final DynamoDBMetadataStore ddbms = getDynamoMetadataStore();
|
||||||
|
final String testRoot = "/testMovePopulatesAncestors";
|
||||||
|
final String srcRoot = testRoot + "/a/b/src";
|
||||||
|
final String destRoot = testRoot + "/c/d/e/dest";
|
||||||
|
|
||||||
|
final Path nestedPath1 = strToPath(srcRoot + "/file1.txt");
|
||||||
|
ddbms.put(new PathMetadata(basicFileStatus(nestedPath1, 1024, false)));
|
||||||
|
final Path nestedPath2 = strToPath(srcRoot + "/dir1/dir2");
|
||||||
|
ddbms.put(new PathMetadata(basicFileStatus(nestedPath2, 0, true)));
|
||||||
|
|
||||||
|
// We don't put the destRoot path here, since put() would create ancestor
|
||||||
|
// entries, and we want to ensure that move() does it, instead.
|
||||||
|
|
||||||
|
// Build enumeration of src / dest paths and do the move()
|
||||||
|
final Collection<Path> fullSourcePaths = Lists.newArrayList(
|
||||||
|
strToPath(srcRoot),
|
||||||
|
strToPath(srcRoot + "/dir1"),
|
||||||
|
strToPath(srcRoot + "/dir1/dir2"),
|
||||||
|
strToPath(srcRoot + "/file1.txt")
|
||||||
|
);
|
||||||
|
final Collection<PathMetadata> pathsToCreate = Lists.newArrayList(
|
||||||
|
new PathMetadata(basicFileStatus(strToPath(destRoot),
|
||||||
|
0, true)),
|
||||||
|
new PathMetadata(basicFileStatus(strToPath(destRoot + "/dir1"),
|
||||||
|
0, true)),
|
||||||
|
new PathMetadata(basicFileStatus(strToPath(destRoot + "/dir1/dir2"),
|
||||||
|
0, true)),
|
||||||
|
new PathMetadata(basicFileStatus(strToPath(destRoot + "/file1.txt"),
|
||||||
|
1024, false))
|
||||||
|
);
|
||||||
|
|
||||||
|
ddbms.move(fullSourcePaths, pathsToCreate);
|
||||||
|
|
||||||
|
// assert that all the ancestors should have been populated automatically
|
||||||
|
assertCached(testRoot + "/c");
|
||||||
|
assertCached(testRoot + "/c/d");
|
||||||
|
assertCached(testRoot + "/c/d/e");
|
||||||
|
assertCached(destRoot /* /c/d/e/dest */);
|
||||||
|
|
||||||
|
// Also check moved files while we're at it
|
||||||
|
assertCached(destRoot + "/dir1");
|
||||||
|
assertCached(destRoot + "/dir1/dir2");
|
||||||
|
assertCached(destRoot + "/file1.txt");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testProvisionTable() throws IOException {
|
||||||
|
final DynamoDBMetadataStore ddbms = getDynamoMetadataStore();
|
||||||
|
final String tableName = ddbms.getTable().getTableName();
|
||||||
|
final ProvisionedThroughputDescription oldProvision =
|
||||||
|
dynamoDB.getTable(tableName).describe().getProvisionedThroughput();
|
||||||
|
ddbms.provisionTable(oldProvision.getReadCapacityUnits() * 2,
|
||||||
|
oldProvision.getWriteCapacityUnits() * 2);
|
||||||
|
final ProvisionedThroughputDescription newProvision =
|
||||||
|
dynamoDB.getTable(tableName).describe().getProvisionedThroughput();
|
||||||
|
LOG.info("Old provision = {}, new provision = {}",
|
||||||
|
oldProvision, newProvision);
|
||||||
|
assertEquals(oldProvision.getReadCapacityUnits() * 2,
|
||||||
|
newProvision.getReadCapacityUnits().longValue());
|
||||||
|
assertEquals(oldProvision.getWriteCapacityUnits() * 2,
|
||||||
|
newProvision.getWriteCapacityUnits().longValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDeleteTable() throws IOException {
|
||||||
|
final String tableName = "testDeleteTable";
|
||||||
|
final S3AFileSystem s3afs = getFileSystem();
|
||||||
|
final Configuration conf = s3afs.getConf();
|
||||||
|
conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName);
|
||||||
|
try (DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore()) {
|
||||||
|
ddbms.initialize(s3afs);
|
||||||
|
// we can list the empty table
|
||||||
|
ddbms.listChildren(new Path(S3URI));
|
||||||
|
|
||||||
|
ddbms.destroy();
|
||||||
|
verifyTableNotExist(tableName);
|
||||||
|
|
||||||
|
// delete table once more; be ResourceNotFoundException swallowed silently
|
||||||
|
ddbms.destroy();
|
||||||
|
verifyTableNotExist(tableName);
|
||||||
|
|
||||||
|
try {
|
||||||
|
// we can no longer list the destroyed table
|
||||||
|
ddbms.listChildren(new Path(S3URI));
|
||||||
|
fail("Should have failed after the table is destroyed!");
|
||||||
|
} catch (IOException ignored) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This validates the table is created and ACTIVE in DynamoDB.
|
||||||
|
*
|
||||||
|
* This should not rely on the {@link DynamoDBMetadataStore} implementation.
|
||||||
|
* Return the table
|
||||||
|
*/
|
||||||
|
private static Table verifyTableInitialized(String tableName) {
|
||||||
|
final Table table = dynamoDB.getTable(tableName);
|
||||||
|
final TableDescription td = table.describe();
|
||||||
|
assertEquals(tableName, td.getTableName());
|
||||||
|
assertEquals("ACTIVE", td.getTableStatus());
|
||||||
|
return table;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This validates the table is not found in DynamoDB.
|
||||||
|
*
|
||||||
|
* This should not rely on the {@link DynamoDBMetadataStore} implementation.
|
||||||
|
*/
|
||||||
|
private static void verifyTableNotExist(String tableName) {
|
||||||
|
final Table table = dynamoDB.getTable(tableName);
|
||||||
|
try {
|
||||||
|
table.describe();
|
||||||
|
fail("Expecting ResourceNotFoundException for table '" + tableName + "'");
|
||||||
|
} catch (ResourceNotFoundException ignored) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,140 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a.s3guard;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.s3a.S3ATestUtils;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* MetadataStore unit test for {@link LocalMetadataStore}.
|
||||||
|
*/
|
||||||
|
public class TestLocalMetadataStore extends MetadataStoreTestBase {
|
||||||
|
|
||||||
|
private static final String MAX_ENTRIES_STR = "16";
|
||||||
|
|
||||||
|
private final static class LocalMSContract extends AbstractMSContract {
|
||||||
|
|
||||||
|
private FileSystem fs;
|
||||||
|
|
||||||
|
private LocalMSContract() throws IOException {
|
||||||
|
this(new Configuration());
|
||||||
|
}
|
||||||
|
|
||||||
|
private LocalMSContract(Configuration config) throws IOException {
|
||||||
|
config.set(LocalMetadataStore.CONF_MAX_RECORDS, MAX_ENTRIES_STR);
|
||||||
|
fs = FileSystem.getLocal(config);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public FileSystem getFileSystem() {
|
||||||
|
return fs;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public MetadataStore getMetadataStore() throws IOException {
|
||||||
|
LocalMetadataStore lms = new LocalMetadataStore();
|
||||||
|
return lms;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public AbstractMSContract createContract() throws IOException {
|
||||||
|
return new LocalMSContract();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public AbstractMSContract createContract(Configuration conf) throws
|
||||||
|
IOException {
|
||||||
|
return new LocalMSContract(conf);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testClearByAncestor() {
|
||||||
|
Map<Path, PathMetadata> map = new HashMap<>();
|
||||||
|
|
||||||
|
// 1. Test paths without scheme/host
|
||||||
|
assertClearResult(map, "", "/", 0);
|
||||||
|
assertClearResult(map, "", "/dirA/dirB", 2);
|
||||||
|
assertClearResult(map, "", "/invalid", 5);
|
||||||
|
|
||||||
|
|
||||||
|
// 2. Test paths w/ scheme/host
|
||||||
|
String p = "s3a://fake-bucket-name";
|
||||||
|
assertClearResult(map, p, "/", 0);
|
||||||
|
assertClearResult(map, p, "/dirA/dirB", 2);
|
||||||
|
assertClearResult(map, p, "/invalid", 5);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void populateMap(Map<Path, PathMetadata> map,
|
||||||
|
String prefix) {
|
||||||
|
populateEntry(map, new Path(prefix + "/dirA/dirB/"));
|
||||||
|
populateEntry(map, new Path(prefix + "/dirA/dirB/dirC"));
|
||||||
|
populateEntry(map, new Path(prefix + "/dirA/dirB/dirC/file1"));
|
||||||
|
populateEntry(map, new Path(prefix + "/dirA/dirB/dirC/file2"));
|
||||||
|
populateEntry(map, new Path(prefix + "/dirA/file1"));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void populateEntry(Map<Path, PathMetadata> map,
|
||||||
|
Path path) {
|
||||||
|
map.put(path, new PathMetadata(new FileStatus(0, true, 0, 0, 0, path)));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int sizeOfMap(Map<Path, PathMetadata> map) {
|
||||||
|
int count = 0;
|
||||||
|
for (PathMetadata meta : map.values()) {
|
||||||
|
if (!meta.isDeleted()) {
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void assertClearResult(Map <Path, PathMetadata> map,
|
||||||
|
String prefixStr, String pathStr, int leftoverSize) {
|
||||||
|
populateMap(map, prefixStr);
|
||||||
|
LocalMetadataStore.deleteHashByAncestor(new Path(prefixStr + pathStr), map,
|
||||||
|
true);
|
||||||
|
assertEquals(String.format("Map should have %d entries", leftoverSize),
|
||||||
|
leftoverSize, sizeOfMap(map));
|
||||||
|
map.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void verifyFileStatus(FileStatus status, long size) {
|
||||||
|
S3ATestUtils.verifyFileStatus(status, size, REPLICATION, getModTime(),
|
||||||
|
getAccessTime(),
|
||||||
|
BLOCK_SIZE, OWNER, GROUP, PERMISSION);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void verifyDirStatus(FileStatus status) {
|
||||||
|
S3ATestUtils.verifyDirStatus(status, REPLICATION, getModTime(),
|
||||||
|
getAccessTime(), OWNER, GROUP, PERMISSION);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,58 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a.s3guard;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run MetadataStore unit tests on the NullMetadataStore implementation.
|
||||||
|
*/
|
||||||
|
public class TestNullMetadataStore extends MetadataStoreTestBase {
|
||||||
|
private static class NullMSContract extends AbstractMSContract {
|
||||||
|
@Override
|
||||||
|
public FileSystem getFileSystem() throws IOException {
|
||||||
|
Configuration config = new Configuration();
|
||||||
|
return FileSystem.getLocal(config);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public MetadataStore getMetadataStore() throws IOException {
|
||||||
|
return new NullMetadataStore();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** This MetadataStore always says "I don't know, ask the backing store". */
|
||||||
|
@Override
|
||||||
|
public boolean allowMissing() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public AbstractMSContract createContract() {
|
||||||
|
return new NullMSContract();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public AbstractMSContract createContract(Configuration conf) {
|
||||||
|
return createContract();
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,238 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a.s3guard;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.URI;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.concurrent.Callable;
|
||||||
|
|
||||||
|
import com.amazonaws.services.dynamodbv2.document.Item;
|
||||||
|
import com.amazonaws.services.dynamodbv2.document.KeyAttribute;
|
||||||
|
import com.amazonaws.services.dynamodbv2.document.PrimaryKey;
|
||||||
|
import com.amazonaws.services.dynamodbv2.model.AttributeDefinition;
|
||||||
|
import com.amazonaws.services.dynamodbv2.model.KeySchemaElement;
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.BeforeClass;
|
||||||
|
import org.junit.Rule;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.junit.rules.Timeout;
|
||||||
|
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.s3a.S3AFileStatus;
|
||||||
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
|
import org.apache.hadoop.test.LambdaTestUtils;
|
||||||
|
|
||||||
|
import static com.amazonaws.services.dynamodbv2.model.KeyType.HASH;
|
||||||
|
import static com.amazonaws.services.dynamodbv2.model.KeyType.RANGE;
|
||||||
|
import static com.amazonaws.services.dynamodbv2.model.ScalarAttributeType.S;
|
||||||
|
import static org.hamcrest.CoreMatchers.anyOf;
|
||||||
|
import static org.hamcrest.CoreMatchers.is;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.*;
|
||||||
|
import static org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore.VERSION_MARKER;
|
||||||
|
import static org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore.VERSION;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test the PathMetadataDynamoDBTranslation is able to translate between domain
|
||||||
|
* model objects and DynamoDB items.
|
||||||
|
*/
|
||||||
|
public class TestPathMetadataDynamoDBTranslation extends Assert {
|
||||||
|
|
||||||
|
private static final Path TEST_DIR_PATH = new Path("s3a://test-bucket/myDir");
|
||||||
|
private static final Item TEST_DIR_ITEM = new Item();
|
||||||
|
private static PathMetadata testDirPathMetadata;
|
||||||
|
|
||||||
|
private static final long TEST_FILE_LENGTH = 100;
|
||||||
|
private static final long TEST_MOD_TIME = 9999;
|
||||||
|
private static final long TEST_BLOCK_SIZE = 128;
|
||||||
|
private static final Path TEST_FILE_PATH = new Path(TEST_DIR_PATH, "myFile");
|
||||||
|
private static final Item TEST_FILE_ITEM = new Item();
|
||||||
|
private static PathMetadata testFilePathMetadata;
|
||||||
|
|
||||||
|
@BeforeClass
|
||||||
|
public static void setUpBeforeClass() throws IOException {
|
||||||
|
String username = UserGroupInformation.getCurrentUser().getShortUserName();
|
||||||
|
|
||||||
|
testDirPathMetadata =
|
||||||
|
new PathMetadata(new S3AFileStatus(false, TEST_DIR_PATH, username));
|
||||||
|
TEST_DIR_ITEM
|
||||||
|
.withPrimaryKey(PARENT, "/test-bucket", CHILD, TEST_DIR_PATH.getName())
|
||||||
|
.withBoolean(IS_DIR, true);
|
||||||
|
|
||||||
|
testFilePathMetadata = new PathMetadata(
|
||||||
|
new S3AFileStatus(TEST_FILE_LENGTH, TEST_MOD_TIME, TEST_FILE_PATH,
|
||||||
|
TEST_BLOCK_SIZE, username));
|
||||||
|
TEST_FILE_ITEM
|
||||||
|
.withPrimaryKey(PARENT, pathToParentKey(TEST_FILE_PATH.getParent()),
|
||||||
|
CHILD, TEST_FILE_PATH.getName())
|
||||||
|
.withBoolean(IS_DIR, false)
|
||||||
|
.withLong(FILE_LENGTH, TEST_FILE_LENGTH)
|
||||||
|
.withLong(MOD_TIME, TEST_MOD_TIME)
|
||||||
|
.withLong(BLOCK_SIZE, TEST_BLOCK_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* It should not take long time as it doesn't involve remote server operation.
|
||||||
|
*/
|
||||||
|
@Rule
|
||||||
|
public final Timeout timeout = new Timeout(30 * 1000);
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testKeySchema() {
|
||||||
|
final Collection<KeySchemaElement> keySchema =
|
||||||
|
PathMetadataDynamoDBTranslation.keySchema();
|
||||||
|
assertNotNull(keySchema);
|
||||||
|
assertEquals("There should be HASH and RANGE key in key schema",
|
||||||
|
2, keySchema.size());
|
||||||
|
for (KeySchemaElement element : keySchema) {
|
||||||
|
assertThat(element.getAttributeName(), anyOf(is(PARENT), is(CHILD)));
|
||||||
|
assertThat(element.getKeyType(),
|
||||||
|
anyOf(is(HASH.toString()), is(RANGE.toString())));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testAttributeDefinitions() {
|
||||||
|
final Collection<AttributeDefinition> attrs =
|
||||||
|
PathMetadataDynamoDBTranslation.attributeDefinitions();
|
||||||
|
assertNotNull(attrs);
|
||||||
|
assertEquals("There should be HASH and RANGE attributes", 2, attrs.size());
|
||||||
|
for (AttributeDefinition definition : attrs) {
|
||||||
|
assertThat(definition.getAttributeName(), anyOf(is(PARENT), is(CHILD)));
|
||||||
|
assertEquals(S.toString(), definition.getAttributeType());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testItemToPathMetadata() throws IOException {
|
||||||
|
final String user =
|
||||||
|
UserGroupInformation.getCurrentUser().getShortUserName();
|
||||||
|
assertNull(itemToPathMetadata(null, user));
|
||||||
|
|
||||||
|
verify(TEST_DIR_ITEM, itemToPathMetadata(TEST_DIR_ITEM, user));
|
||||||
|
verify(TEST_FILE_ITEM, itemToPathMetadata(TEST_FILE_ITEM, user));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Verify that the Item and PathMetadata objects hold the same information.
|
||||||
|
*/
|
||||||
|
private static void verify(Item item, PathMetadata meta) {
|
||||||
|
assertNotNull(meta);
|
||||||
|
final FileStatus status = meta.getFileStatus();
|
||||||
|
final Path path = status.getPath();
|
||||||
|
assertEquals(item.get(PARENT), pathToParentKey(path.getParent()));
|
||||||
|
assertEquals(item.get(CHILD), path.getName());
|
||||||
|
boolean isDir = item.hasAttribute(IS_DIR) && item.getBoolean(IS_DIR);
|
||||||
|
assertEquals(isDir, status.isDirectory());
|
||||||
|
long len = item.hasAttribute(FILE_LENGTH) ? item.getLong(FILE_LENGTH) : 0;
|
||||||
|
assertEquals(len, status.getLen());
|
||||||
|
long bSize = item.hasAttribute(BLOCK_SIZE) ? item.getLong(BLOCK_SIZE) : 0;
|
||||||
|
assertEquals(bSize, status.getBlockSize());
|
||||||
|
|
||||||
|
/*
|
||||||
|
* S3AFileStatue#getModificationTime() reports the current time, so the
|
||||||
|
* following assertion is failing.
|
||||||
|
*
|
||||||
|
* long modTime = item.hasAttribute(MOD_TIME) ? item.getLong(MOD_TIME) : 0;
|
||||||
|
* assertEquals(modTime, status.getModificationTime());
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPathMetadataToItem() {
|
||||||
|
verify(pathMetadataToItem(testDirPathMetadata), testDirPathMetadata);
|
||||||
|
verify(pathMetadataToItem(testFilePathMetadata),
|
||||||
|
testFilePathMetadata);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPathToParentKeyAttribute() {
|
||||||
|
doTestPathToParentKeyAttribute(TEST_DIR_PATH);
|
||||||
|
doTestPathToParentKeyAttribute(TEST_FILE_PATH);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void doTestPathToParentKeyAttribute(Path path) {
|
||||||
|
final KeyAttribute attr = pathToParentKeyAttribute(path);
|
||||||
|
assertNotNull(attr);
|
||||||
|
assertEquals(PARENT, attr.getName());
|
||||||
|
// this path is expected as parent filed
|
||||||
|
assertEquals(pathToParentKey(path), attr.getValue());
|
||||||
|
}
|
||||||
|
|
||||||
|
private static String pathToParentKey(Path p) {
|
||||||
|
Preconditions.checkArgument(p.isUriPathAbsolute());
|
||||||
|
URI parentUri = p.toUri();
|
||||||
|
String bucket = parentUri.getHost();
|
||||||
|
Preconditions.checkNotNull(bucket);
|
||||||
|
String s = "/" + bucket + parentUri.getPath();
|
||||||
|
// strip trailing slash
|
||||||
|
if (s.endsWith("/")) {
|
||||||
|
s = s.substring(0, s.length()-1);
|
||||||
|
}
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPathToKey() throws Exception {
|
||||||
|
LambdaTestUtils.intercept(IllegalArgumentException.class,
|
||||||
|
new Callable<PrimaryKey>() {
|
||||||
|
@Override
|
||||||
|
public PrimaryKey call() throws Exception {
|
||||||
|
return pathToKey(new Path("/"));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
doTestPathToKey(TEST_DIR_PATH);
|
||||||
|
doTestPathToKey(TEST_FILE_PATH);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void doTestPathToKey(Path path) {
|
||||||
|
final PrimaryKey key = pathToKey(path);
|
||||||
|
assertNotNull(key);
|
||||||
|
assertEquals("There should be both HASH and RANGE keys",
|
||||||
|
2, key.getComponents().size());
|
||||||
|
|
||||||
|
for (KeyAttribute keyAttribute : key.getComponents()) {
|
||||||
|
assertThat(keyAttribute.getName(), anyOf(is(PARENT), is(CHILD)));
|
||||||
|
if (PARENT.equals(keyAttribute.getName())) {
|
||||||
|
assertEquals(pathToParentKey(path.getParent()),
|
||||||
|
keyAttribute.getValue());
|
||||||
|
} else {
|
||||||
|
assertEquals(path.getName(), keyAttribute.getValue());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testVersionRoundTrip() throws Throwable {
|
||||||
|
final Item marker = createVersionMarker(VERSION_MARKER, VERSION, 0);
|
||||||
|
assertEquals("Extracted version from " + marker,
|
||||||
|
VERSION, extractVersionFromMarker(marker));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testVersionMarkerNotStatusIllegalPath() throws Throwable {
|
||||||
|
final Item marker = createVersionMarker(VERSION_MARKER, VERSION, 0);
|
||||||
|
assertNull("Path metadata fromfrom " + marker,
|
||||||
|
itemToPathMetadata(marker, "alice"));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,93 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a.s3guard;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests for the {@link S3Guard} utility class.
|
||||||
|
*/
|
||||||
|
public class TestS3Guard extends Assert {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Basic test to ensure results from S3 and MetadataStore are merged
|
||||||
|
* correctly.
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testDirListingUnion() throws Exception {
|
||||||
|
MetadataStore ms = new LocalMetadataStore();
|
||||||
|
|
||||||
|
Path dirPath = new Path("s3a://bucket/dir");
|
||||||
|
|
||||||
|
// Two files in metadata store listing
|
||||||
|
PathMetadata m1 = makePathMeta("s3a://bucket/dir/ms-file1", false);
|
||||||
|
PathMetadata m2 = makePathMeta("s3a://bucket/dir/ms-file2", false);
|
||||||
|
DirListingMetadata dirMeta = new DirListingMetadata(dirPath,
|
||||||
|
Arrays.asList(m1, m2), false);
|
||||||
|
|
||||||
|
// Two other files in s3
|
||||||
|
List<FileStatus> s3Listing = Arrays.asList(
|
||||||
|
makeFileStatus("s3a://bucket/dir/s3-file3", false),
|
||||||
|
makeFileStatus("s3a://bucket/dir/s3-file4", false)
|
||||||
|
);
|
||||||
|
|
||||||
|
FileStatus[] result = S3Guard.dirListingUnion(ms, dirPath, s3Listing,
|
||||||
|
dirMeta, false);
|
||||||
|
|
||||||
|
assertEquals("listing length", 4, result.length);
|
||||||
|
assertContainsPath(result, "s3a://bucket/dir/ms-file1");
|
||||||
|
assertContainsPath(result, "s3a://bucket/dir/ms-file2");
|
||||||
|
assertContainsPath(result, "s3a://bucket/dir/s3-file3");
|
||||||
|
assertContainsPath(result, "s3a://bucket/dir/s3-file4");
|
||||||
|
}
|
||||||
|
|
||||||
|
void assertContainsPath(FileStatus[] statuses, String pathStr) {
|
||||||
|
assertTrue("listing doesn't contain " + pathStr,
|
||||||
|
containsPath(statuses, pathStr));
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean containsPath(FileStatus[] statuses, String pathStr) {
|
||||||
|
for (FileStatus s : statuses) {
|
||||||
|
if (s.getPath().toString().equals(pathStr)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private PathMetadata makePathMeta(String pathStr, boolean isDir) {
|
||||||
|
return new PathMetadata(makeFileStatus(pathStr, isDir));
|
||||||
|
}
|
||||||
|
|
||||||
|
private FileStatus makeFileStatus(String pathStr, boolean isDir) {
|
||||||
|
Path p = new Path(pathStr);
|
||||||
|
if (isDir) {
|
||||||
|
return new FileStatus(0, true, 1, 1, System.currentTimeMillis(), p);
|
||||||
|
} else {
|
||||||
|
return new FileStatus(100, false, 1, 1, System.currentTimeMillis(), p);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,250 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a.scale;
|
||||||
|
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.s3a.S3AFileStatus;
|
||||||
|
import org.apache.hadoop.fs.s3a.s3guard.MetadataStore;
|
||||||
|
import org.apache.hadoop.fs.s3a.s3guard.PathMetadata;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.fs.contract.ContractTestUtils.NanoTimer;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test the performance of a MetadataStore. Useful for load testing.
|
||||||
|
* Could be separated from S3A code, but we're using the S3A scale test
|
||||||
|
* framework for convenience.
|
||||||
|
*/
|
||||||
|
public abstract class AbstractITestS3AMetadataStoreScale extends
|
||||||
|
S3AScaleTestBase {
|
||||||
|
private static final Logger LOG = LoggerFactory.getLogger(
|
||||||
|
AbstractITestS3AMetadataStoreScale.class);
|
||||||
|
|
||||||
|
/** Some dummy values for FileStatus contents. */
|
||||||
|
static final long BLOCK_SIZE = 32 * 1024 * 1024;
|
||||||
|
static final long SIZE = BLOCK_SIZE * 2;
|
||||||
|
static final String OWNER = "bob";
|
||||||
|
static final long ACCESS_TIME = System.currentTimeMillis();
|
||||||
|
|
||||||
|
static final Path BUCKET_ROOT = new Path("s3a://fake-bucket/");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Subclasses should override this to provide the MetadataStore they which
|
||||||
|
* to test.
|
||||||
|
* @return MetadataStore to test against
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public abstract MetadataStore createMetadataStore() throws IOException;
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPut() throws Throwable {
|
||||||
|
describe("Test workload of put() operations");
|
||||||
|
|
||||||
|
// As described in hadoop-aws site docs, count parameter is used for
|
||||||
|
// width and depth of directory tree
|
||||||
|
int width = getConf().getInt(KEY_DIRECTORY_COUNT, DEFAULT_DIRECTORY_COUNT);
|
||||||
|
int depth = width;
|
||||||
|
|
||||||
|
List<PathMetadata> paths = new ArrayList<>();
|
||||||
|
createDirTree(BUCKET_ROOT, depth, width, paths);
|
||||||
|
|
||||||
|
long count = 1; // Some value in case we throw an exception below
|
||||||
|
try (MetadataStore ms = createMetadataStore()) {
|
||||||
|
|
||||||
|
try {
|
||||||
|
count = populateMetadataStore(paths, ms);
|
||||||
|
} finally {
|
||||||
|
clearMetadataStore(ms, count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testMoves() throws Throwable {
|
||||||
|
describe("Test workload of batched move() operations");
|
||||||
|
|
||||||
|
// As described in hadoop-aws site docs, count parameter is used for
|
||||||
|
// width and depth of directory tree
|
||||||
|
int width = getConf().getInt(KEY_DIRECTORY_COUNT, DEFAULT_DIRECTORY_COUNT);
|
||||||
|
int depth = width;
|
||||||
|
|
||||||
|
long operations = getConf().getLong(KEY_OPERATION_COUNT,
|
||||||
|
DEFAULT_OPERATION_COUNT);
|
||||||
|
|
||||||
|
List<PathMetadata> origMetas = new ArrayList<>();
|
||||||
|
createDirTree(BUCKET_ROOT, depth, width, origMetas);
|
||||||
|
|
||||||
|
// Pre-compute source and destination paths for move() loop below
|
||||||
|
List<Path> origPaths = metasToPaths(origMetas);
|
||||||
|
List<PathMetadata> movedMetas = moveMetas(origMetas, BUCKET_ROOT,
|
||||||
|
new Path(BUCKET_ROOT, "moved-here"));
|
||||||
|
List<Path> movedPaths = metasToPaths(movedMetas);
|
||||||
|
|
||||||
|
long count = 1; // Some value in case we throw an exception below
|
||||||
|
try (MetadataStore ms = createMetadataStore()) {
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Setup
|
||||||
|
count = populateMetadataStore(origMetas, ms);
|
||||||
|
|
||||||
|
// Main loop: move things back and forth
|
||||||
|
describe("Running move workload");
|
||||||
|
NanoTimer moveTimer = new NanoTimer();
|
||||||
|
LOG.info("Running {} moves of {} paths each", operations,
|
||||||
|
origMetas.size());
|
||||||
|
for (int i = 0; i < operations; i++) {
|
||||||
|
Collection<Path> toDelete;
|
||||||
|
Collection<PathMetadata> toCreate;
|
||||||
|
if (i % 2 == 0) {
|
||||||
|
toDelete = origPaths;
|
||||||
|
toCreate = movedMetas;
|
||||||
|
} else {
|
||||||
|
toDelete = movedPaths;
|
||||||
|
toCreate = origMetas;
|
||||||
|
}
|
||||||
|
ms.move(toDelete, toCreate);
|
||||||
|
}
|
||||||
|
moveTimer.end();
|
||||||
|
printTiming(LOG, "move", moveTimer, operations);
|
||||||
|
} finally {
|
||||||
|
// Cleanup
|
||||||
|
clearMetadataStore(ms, count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a copy of given list of PathMetadatas with the paths moved from
|
||||||
|
* src to dest.
|
||||||
|
*/
|
||||||
|
private List<PathMetadata> moveMetas(List<PathMetadata> metas, Path src,
|
||||||
|
Path dest) throws IOException {
|
||||||
|
List<PathMetadata> moved = new ArrayList<>(metas.size());
|
||||||
|
for (PathMetadata srcMeta : metas) {
|
||||||
|
S3AFileStatus status = copyStatus((S3AFileStatus)srcMeta.getFileStatus());
|
||||||
|
status.setPath(movePath(status.getPath(), src, dest));
|
||||||
|
moved.add(new PathMetadata(status));
|
||||||
|
}
|
||||||
|
return moved;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Path movePath(Path p, Path src, Path dest) {
|
||||||
|
String srcStr = src.toUri().getPath();
|
||||||
|
String pathStr = p.toUri().getPath();
|
||||||
|
// Strip off src dir
|
||||||
|
pathStr = pathStr.substring(srcStr.length());
|
||||||
|
// Prepend new dest
|
||||||
|
return new Path(dest, pathStr);
|
||||||
|
}
|
||||||
|
|
||||||
|
private S3AFileStatus copyStatus(S3AFileStatus status) {
|
||||||
|
if (status.isDirectory()) {
|
||||||
|
return new S3AFileStatus(status.isEmptyDirectory(), status.getPath(),
|
||||||
|
status.getOwner());
|
||||||
|
} else {
|
||||||
|
return new S3AFileStatus(status.getLen(), status.getModificationTime(),
|
||||||
|
status.getPath(), status.getBlockSize(), status.getOwner());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** @return number of PathMetadatas put() into MetadataStore */
|
||||||
|
private long populateMetadataStore(Collection<PathMetadata> paths,
|
||||||
|
MetadataStore ms) throws IOException {
|
||||||
|
long count = 0;
|
||||||
|
NanoTimer putTimer = new NanoTimer();
|
||||||
|
describe("Inserting into MetadataStore");
|
||||||
|
for (PathMetadata p : paths) {
|
||||||
|
ms.put(p);
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
putTimer.end();
|
||||||
|
printTiming(LOG, "put", putTimer, count);
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void clearMetadataStore(MetadataStore ms, long count)
|
||||||
|
throws IOException {
|
||||||
|
describe("Recursive deletion");
|
||||||
|
NanoTimer deleteTimer = new NanoTimer();
|
||||||
|
ms.deleteSubtree(BUCKET_ROOT);
|
||||||
|
deleteTimer.end();
|
||||||
|
printTiming(LOG, "delete", deleteTimer, count);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void printTiming(Logger log, String op, NanoTimer timer,
|
||||||
|
long count) {
|
||||||
|
double msec = (double)timer.duration() / 1000;
|
||||||
|
double msecPerOp = msec / count;
|
||||||
|
log.info(String.format("Elapsed %.2f msec. %.3f msec / %s (%d ops)", msec,
|
||||||
|
msecPerOp, op, count));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static S3AFileStatus makeFileStatus(Path path) throws IOException {
|
||||||
|
return new S3AFileStatus(SIZE, ACCESS_TIME, path, BLOCK_SIZE, OWNER);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static S3AFileStatus makeDirStatus(Path p) throws IOException {
|
||||||
|
return new S3AFileStatus(false, p, OWNER);
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<Path> metasToPaths(List<PathMetadata> metas) {
|
||||||
|
List<Path> paths = new ArrayList<>(metas.size());
|
||||||
|
for (PathMetadata meta : metas) {
|
||||||
|
paths.add(meta.getFileStatus().getPath());
|
||||||
|
}
|
||||||
|
return paths;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Recursively create a directory tree.
|
||||||
|
* @param parent Parent dir of the paths to create.
|
||||||
|
* @param depth How many more levels deep past parent to create.
|
||||||
|
* @param width Number of files (and directories, if depth > 0) per directory.
|
||||||
|
* @param paths List to add generated paths to.
|
||||||
|
*/
|
||||||
|
private static void createDirTree(Path parent, int depth, int width,
|
||||||
|
Collection<PathMetadata> paths) throws IOException {
|
||||||
|
|
||||||
|
// Create files
|
||||||
|
for (int i = 0; i < width; i++) {
|
||||||
|
Path p = new Path(parent, String.format("file-%d", i));
|
||||||
|
PathMetadata meta = new PathMetadata(makeFileStatus(p));
|
||||||
|
paths.add(meta);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (depth == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create directories if there is depth remaining
|
||||||
|
for (int i = 0; i < width; i++) {
|
||||||
|
Path dir = new Path(parent, String.format("dir-%d", i));
|
||||||
|
PathMetadata meta = new PathMetadata(makeDirStatus(dir));
|
||||||
|
paths.add(meta);
|
||||||
|
createDirTree(dir, depth-1, width, paths);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -25,6 +25,7 @@
|
|||||||
import com.amazonaws.event.ProgressEvent;
|
import com.amazonaws.event.ProgressEvent;
|
||||||
import com.amazonaws.event.ProgressEventType;
|
import com.amazonaws.event.ProgressEventType;
|
||||||
import com.amazonaws.event.ProgressListener;
|
import com.amazonaws.event.ProgressListener;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
import org.junit.FixMethodOrder;
|
import org.junit.FixMethodOrder;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.junit.runners.MethodSorters;
|
import org.junit.runners.MethodSorters;
|
||||||
@ -34,11 +35,9 @@
|
|||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FSDataInputStream;
|
import org.apache.hadoop.fs.FSDataInputStream;
|
||||||
import org.apache.hadoop.fs.FSDataOutputStream;
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.fs.StorageStatistics;
|
import org.apache.hadoop.fs.StorageStatistics;
|
||||||
import org.apache.hadoop.fs.contract.ContractTestUtils;
|
import org.apache.hadoop.fs.contract.ContractTestUtils;
|
||||||
import org.apache.hadoop.fs.s3a.S3AFileStatus;
|
|
||||||
import org.apache.hadoop.fs.s3a.S3AFileSystem;
|
import org.apache.hadoop.fs.s3a.S3AFileSystem;
|
||||||
import org.apache.hadoop.fs.s3a.S3AInstrumentation;
|
import org.apache.hadoop.fs.s3a.S3AInstrumentation;
|
||||||
import org.apache.hadoop.fs.s3a.Statistic;
|
import org.apache.hadoop.fs.s3a.Statistic;
|
||||||
@ -222,7 +221,7 @@ public void test_010_CreateHugeFile() throws IOException {
|
|||||||
assertEquals("active put requests in \n" + fs,
|
assertEquals("active put requests in \n" + fs,
|
||||||
0, gaugeValue(putRequestsActive));
|
0, gaugeValue(putRequestsActive));
|
||||||
ContractTestUtils.assertPathExists(fs, "Huge file", hugefile);
|
ContractTestUtils.assertPathExists(fs, "Huge file", hugefile);
|
||||||
S3AFileStatus status = fs.getFileStatus(hugefile);
|
FileStatus status = fs.getFileStatus(hugefile);
|
||||||
ContractTestUtils.assertIsFile(hugefile, status);
|
ContractTestUtils.assertIsFile(hugefile, status);
|
||||||
assertEquals("File size in " + status, filesize, status.getLen());
|
assertEquals("File size in " + status, filesize, status.getLen());
|
||||||
if (progress != null) {
|
if (progress != null) {
|
||||||
@ -324,7 +323,7 @@ public void test_040_PositionedReadHugeFile() throws Throwable {
|
|||||||
String filetype = encrypted ? "encrypted file" : "file";
|
String filetype = encrypted ? "encrypted file" : "file";
|
||||||
describe("Positioned reads of %s %s", filetype, hugefile);
|
describe("Positioned reads of %s %s", filetype, hugefile);
|
||||||
S3AFileSystem fs = getFileSystem();
|
S3AFileSystem fs = getFileSystem();
|
||||||
S3AFileStatus status = fs.getFileStatus(hugefile);
|
FileStatus status = fs.getFileStatus(hugefile);
|
||||||
long filesize = status.getLen();
|
long filesize = status.getLen();
|
||||||
int ops = 0;
|
int ops = 0;
|
||||||
final int bufferSize = 8192;
|
final int bufferSize = 8192;
|
||||||
@ -364,7 +363,7 @@ public void test_050_readHugeFile() throws Throwable {
|
|||||||
assumeHugeFileExists();
|
assumeHugeFileExists();
|
||||||
describe("Reading %s", hugefile);
|
describe("Reading %s", hugefile);
|
||||||
S3AFileSystem fs = getFileSystem();
|
S3AFileSystem fs = getFileSystem();
|
||||||
S3AFileStatus status = fs.getFileStatus(hugefile);
|
FileStatus status = fs.getFileStatus(hugefile);
|
||||||
long filesize = status.getLen();
|
long filesize = status.getLen();
|
||||||
long blocks = filesize / uploadBlockSize;
|
long blocks = filesize / uploadBlockSize;
|
||||||
byte[] data = new byte[uploadBlockSize];
|
byte[] data = new byte[uploadBlockSize];
|
||||||
@ -390,7 +389,7 @@ public void test_100_renameHugeFile() throws Throwable {
|
|||||||
assumeHugeFileExists();
|
assumeHugeFileExists();
|
||||||
describe("renaming %s to %s", hugefile, hugefileRenamed);
|
describe("renaming %s to %s", hugefile, hugefileRenamed);
|
||||||
S3AFileSystem fs = getFileSystem();
|
S3AFileSystem fs = getFileSystem();
|
||||||
S3AFileStatus status = fs.getFileStatus(hugefile);
|
FileStatus status = fs.getFileStatus(hugefile);
|
||||||
long filesize = status.getLen();
|
long filesize = status.getLen();
|
||||||
fs.delete(hugefileRenamed, false);
|
fs.delete(hugefileRenamed, false);
|
||||||
ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer();
|
ContractTestUtils.NanoTimer timer = new ContractTestUtils.NanoTimer();
|
||||||
@ -401,7 +400,7 @@ public void test_100_renameHugeFile() throws Throwable {
|
|||||||
toHuman(timer.nanosPerOperation(mb)));
|
toHuman(timer.nanosPerOperation(mb)));
|
||||||
bandwidth(timer, filesize);
|
bandwidth(timer, filesize);
|
||||||
logFSState();
|
logFSState();
|
||||||
S3AFileStatus destFileStatus = fs.getFileStatus(hugefileRenamed);
|
FileStatus destFileStatus = fs.getFileStatus(hugefileRenamed);
|
||||||
assertEquals(filesize, destFileStatus.getLen());
|
assertEquals(filesize, destFileStatus.getLen());
|
||||||
|
|
||||||
// rename back
|
// rename back
|
||||||
|
@ -0,0 +1,48 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a.scale;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore;
|
||||||
|
import org.apache.hadoop.fs.s3a.s3guard.MetadataStore;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import static org.junit.Assume.*;
|
||||||
|
import static org.apache.hadoop.fs.s3a.Constants.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scale test for DynamoDBMetadataStore.
|
||||||
|
*/
|
||||||
|
public class ITestDynamoDBMetadataStoreScale
|
||||||
|
extends AbstractITestS3AMetadataStoreScale {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public MetadataStore createMetadataStore() throws IOException {
|
||||||
|
Configuration conf = getFileSystem().getConf();
|
||||||
|
String ddbTable = conf.get(S3GUARD_DDB_TABLE_NAME_KEY);
|
||||||
|
assumeNotNull("DynamoDB table is configured", ddbTable);
|
||||||
|
String ddbEndpoint = conf.get(S3GUARD_DDB_REGION_KEY);
|
||||||
|
assumeNotNull("DynamoDB endpoint is configured", ddbEndpoint);
|
||||||
|
|
||||||
|
DynamoDBMetadataStore ms = new DynamoDBMetadataStore();
|
||||||
|
ms.initialize(getFileSystem().getConf());
|
||||||
|
return ms;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,37 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a.scale;
|
||||||
|
|
||||||
|
import org.apache.hadoop.fs.s3a.s3guard.LocalMetadataStore;
|
||||||
|
import org.apache.hadoop.fs.s3a.s3guard.MetadataStore;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scale test for LocalMetadataStore.
|
||||||
|
*/
|
||||||
|
public class ITestLocalMetadataStoreScale
|
||||||
|
extends AbstractITestS3AMetadataStoreScale {
|
||||||
|
@Override
|
||||||
|
public MetadataStore createMetadataStore() throws IOException {
|
||||||
|
MetadataStore ms = new LocalMetadataStore();
|
||||||
|
ms.initialize(getFileSystem());
|
||||||
|
return ms;
|
||||||
|
}
|
||||||
|
}
|
@ -107,7 +107,7 @@ private S3AFileSystem getRestrictedFileSystem() throws Exception {
|
|||||||
|
|
||||||
private S3AFileSystem getNormalFileSystem() throws Exception {
|
private S3AFileSystem getNormalFileSystem() throws Exception {
|
||||||
S3AFileSystem s3a = new S3AFileSystem();
|
S3AFileSystem s3a = new S3AFileSystem();
|
||||||
Configuration conf = new Configuration();
|
Configuration conf = createScaleConfiguration();
|
||||||
URI rootURI = new URI(conf.get(TEST_FS_S3A_NAME));
|
URI rootURI = new URI(conf.get(TEST_FS_S3A_NAME));
|
||||||
s3a.initialize(rootURI, conf);
|
s3a.initialize(rootURI, conf);
|
||||||
return s3a;
|
return s3a;
|
||||||
@ -115,6 +115,7 @@ private S3AFileSystem getNormalFileSystem() throws Exception {
|
|||||||
|
|
||||||
@After
|
@After
|
||||||
public void teardown() throws Exception {
|
public void teardown() throws Exception {
|
||||||
|
super.teardown();
|
||||||
if (auxFs != null) {
|
if (auxFs != null) {
|
||||||
auxFs.delete(testRoot, true);
|
auxFs.delete(testRoot, true);
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,86 @@
|
|||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a.scale;
|
||||||
|
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.s3a.S3AFileSystem;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.OutputStream;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.fs.contract.ContractTestUtils.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests for create(): performance and/or load testing.
|
||||||
|
*/
|
||||||
|
public class ITestS3ACreatePerformance extends S3AScaleTestBase {
|
||||||
|
private static final Logger LOG = LoggerFactory.getLogger(
|
||||||
|
ITestS3ADirectoryPerformance.class);
|
||||||
|
|
||||||
|
private Path basePath;
|
||||||
|
private int basePathDepth;
|
||||||
|
private static final int PATH_DEPTH = 10;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setup() throws Exception {
|
||||||
|
super.setup();
|
||||||
|
basePath = getTestPath();
|
||||||
|
basePathDepth = basePath.depth();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test rate at which we can create deeply-nested files from a single thread.
|
||||||
|
* @throws Exception
|
||||||
|
*/
|
||||||
|
@Test
|
||||||
|
public void testDeepSequentialCreate() throws Exception {
|
||||||
|
long numOperations = getOperationCount();
|
||||||
|
S3AFileSystem fs = getFileSystem();
|
||||||
|
|
||||||
|
NanoTimer timer = new NanoTimer();
|
||||||
|
for (int i = 0; i < numOperations; i++) {
|
||||||
|
Path p = getPathIteration(i, PATH_DEPTH);
|
||||||
|
OutputStream out = fs.create(p);
|
||||||
|
out.write(40); // one byte file with some value 40
|
||||||
|
out.close();
|
||||||
|
}
|
||||||
|
timer.end("Time to create %d files of depth %d", getOperationCount(),
|
||||||
|
PATH_DEPTH);
|
||||||
|
LOG.info("Time per create: {} msec",
|
||||||
|
timer.nanosPerOperation(numOperations) / 1000);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Get a unique path of depth totalDepth for given test iteration. */
|
||||||
|
private Path getPathIteration(long iter, int totalDepth) throws Exception {
|
||||||
|
assertTrue("Test path too long, increase PATH_DEPTH in test.",
|
||||||
|
totalDepth > basePathDepth);
|
||||||
|
|
||||||
|
int neededDirs = totalDepth - basePathDepth - 1;
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
for (int i = 0; i < neededDirs; i++) {
|
||||||
|
sb.append("iter-").append(iter);
|
||||||
|
sb.append("-dir-").append(i);
|
||||||
|
sb.append("/");
|
||||||
|
}
|
||||||
|
sb.append("file").append(iter);
|
||||||
|
return new Path(basePath, sb.toString());
|
||||||
|
}
|
||||||
|
}
|
@ -113,14 +113,15 @@ public void testListOperations() throws Throwable {
|
|||||||
listContinueRequests,
|
listContinueRequests,
|
||||||
listStatusCalls,
|
listStatusCalls,
|
||||||
getFileStatusCalls);
|
getFileStatusCalls);
|
||||||
assertEquals(listRequests.toString(), 2, listRequests.diff());
|
if (!fs.hasMetadataStore()) {
|
||||||
|
assertEquals(listRequests.toString(), 2, listRequests.diff());
|
||||||
|
}
|
||||||
reset(metadataRequests,
|
reset(metadataRequests,
|
||||||
listRequests,
|
listRequests,
|
||||||
listContinueRequests,
|
listContinueRequests,
|
||||||
listStatusCalls,
|
listStatusCalls,
|
||||||
getFileStatusCalls);
|
getFileStatusCalls);
|
||||||
|
|
||||||
|
|
||||||
} finally {
|
} finally {
|
||||||
describe("deletion");
|
describe("deletion");
|
||||||
// deletion at the end of the run
|
// deletion at the end of the run
|
||||||
|
@ -20,10 +20,10 @@
|
|||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FSDataInputStream;
|
import org.apache.hadoop.fs.FSDataInputStream;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.fs.contract.ContractTestUtils;
|
import org.apache.hadoop.fs.contract.ContractTestUtils;
|
||||||
import org.apache.hadoop.fs.s3a.S3AFileStatus;
|
|
||||||
import org.apache.hadoop.fs.s3a.S3AFileSystem;
|
import org.apache.hadoop.fs.s3a.S3AFileSystem;
|
||||||
import org.apache.hadoop.fs.s3a.S3AInputPolicy;
|
import org.apache.hadoop.fs.s3a.S3AInputPolicy;
|
||||||
import org.apache.hadoop.fs.s3a.S3AInputStream;
|
import org.apache.hadoop.fs.s3a.S3AInputStream;
|
||||||
@ -56,7 +56,7 @@ public class ITestS3AInputStreamPerformance extends S3AScaleTestBase {
|
|||||||
|
|
||||||
private S3AFileSystem s3aFS;
|
private S3AFileSystem s3aFS;
|
||||||
private Path testData;
|
private Path testData;
|
||||||
private S3AFileStatus testDataStatus;
|
private FileStatus testDataStatus;
|
||||||
private FSDataInputStream in;
|
private FSDataInputStream in;
|
||||||
private S3AInstrumentation.InputStreamStatistics streamStatistics;
|
private S3AInstrumentation.InputStreamStatistics streamStatistics;
|
||||||
public static final int BLOCK_SIZE = 32 * 1024;
|
public static final int BLOCK_SIZE = 32 * 1024;
|
||||||
|
@ -126,7 +126,7 @@ protected final Configuration createConfiguration() {
|
|||||||
* @return a configuration with which to create FS instances
|
* @return a configuration with which to create FS instances
|
||||||
*/
|
*/
|
||||||
protected Configuration createScaleConfiguration() {
|
protected Configuration createScaleConfiguration() {
|
||||||
return new Configuration();
|
return super.createConfiguration();
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Path getTestPath() {
|
protected Path getTestPath() {
|
||||||
|
@ -36,6 +36,25 @@
|
|||||||
<description>The endpoint for s3a://landsat-pds URLs</description>
|
<description>The endpoint for s3a://landsat-pds URLs</description>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
<!-- Make sure S3Guard is disabled for read-only bucket tests. -->
|
||||||
|
<property>
|
||||||
|
<name>fs.s3a.bucket.landsat-pds.metadatastore.impl</name>
|
||||||
|
<value>${s3guard.null}</value>
|
||||||
|
<description>The read-only landsat-pds repository isn't
|
||||||
|
managed by s3guard</description>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<!-- Convenience definitions. -->
|
||||||
|
<property>
|
||||||
|
<name>s3guard.null</name>
|
||||||
|
<value>org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>s3guard.dynamo</name>
|
||||||
|
<value>org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
<!--
|
<!--
|
||||||
This is the default endpoint, which can be used to interact
|
This is the default endpoint, which can be used to interact
|
||||||
with any v2 region.
|
with any v2 region.
|
||||||
@ -110,6 +129,13 @@
|
|||||||
<value>${central.endpoint}</value>
|
<value>${central.endpoint}</value>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
<!-- Scale integration tests may time out on slower connections
|
||||||
|
you can reduce the operation count like so to mitigate this.
|
||||||
|
<property>
|
||||||
|
<name>scale.test.operation.count</name>
|
||||||
|
<value>500</value>
|
||||||
|
</property>
|
||||||
|
-->
|
||||||
|
|
||||||
<!-- Turn security off for tests by default -->
|
<!-- Turn security off for tests by default -->
|
||||||
<property>
|
<property>
|
||||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user