HADOOP-16384: S3A: Avoid inconsistencies between DDB and S3.
Contributed by Steve Loughran Contains - HADOOP-16397. Hadoop S3Guard Prune command to support a -tombstone option. - HADOOP-16406. ITestDynamoDBMetadataStore.testProvisionTable times out intermittently This patch doesn't fix the underlying problem but it * changes some tests to clean up better * does a lot more in logging operations in against DDB, if enabled * adds an entry point to dump the state of the metastore and s3 tables (precursor to fsck) * adds a purge entry point to help clean up after a test run has got a store into a mess * s3guard prune command adds -tombstone option to only clear tombstones The outcome is that tests should pass consistently and if problems occur we have better diagnostics. Change-Id: I3eca3f5529d7f6fec398c0ff0472919f08f054eb
This commit is contained in:
parent
f9fab9f22a
commit
b15ef7dc3d
|
@ -78,4 +78,18 @@ public class ServiceLaunchException extends ExitUtil.ExitException
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a formatted exception.
|
||||||
|
* <p>
|
||||||
|
* This uses {@link String#format(String, Object...)}
|
||||||
|
* to build the formatted exception in the ENGLISH locale.
|
||||||
|
* @param exitCode exit code
|
||||||
|
* @param cause inner cause
|
||||||
|
* @param format format for message to use in exception
|
||||||
|
* @param args list of arguments
|
||||||
|
*/
|
||||||
|
public ServiceLaunchException(int exitCode, Throwable cause,
|
||||||
|
String format, Object... args) {
|
||||||
|
super(exitCode, String.format(Locale.ENGLISH, format, args), cause);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -200,7 +200,7 @@ public class ServiceLauncher<S extends Service>
|
||||||
* Get the service.
|
* Get the service.
|
||||||
*
|
*
|
||||||
* Null until
|
* Null until
|
||||||
* {@link #coreServiceLaunch(Configuration, List, boolean, boolean)}
|
* {@link #coreServiceLaunch(Configuration, Service, List, boolean, boolean)}
|
||||||
* has completed.
|
* has completed.
|
||||||
* @return the service
|
* @return the service
|
||||||
*/
|
*/
|
||||||
|
@ -303,7 +303,7 @@ public class ServiceLauncher<S extends Service>
|
||||||
exitException = e;
|
exitException = e;
|
||||||
noteException(exitException);
|
noteException(exitException);
|
||||||
}
|
}
|
||||||
if (exitException.getExitCode() != 0) {
|
if (exitException.getExitCode() == LauncherExitCodes.EXIT_USAGE) {
|
||||||
// something went wrong. Print the usage and commands
|
// something went wrong. Print the usage and commands
|
||||||
System.err.println(getUsageMessage());
|
System.err.println(getUsageMessage());
|
||||||
System.err.println("Command: " + argumentString);
|
System.err.println("Command: " + argumentString);
|
||||||
|
@ -328,8 +328,18 @@ public class ServiceLauncher<S extends Service>
|
||||||
* @param exitException exception
|
* @param exitException exception
|
||||||
*/
|
*/
|
||||||
void noteException(ExitUtil.ExitException exitException) {
|
void noteException(ExitUtil.ExitException exitException) {
|
||||||
LOG.debug("Exception raised", exitException);
|
int exitCode = exitException.getExitCode();
|
||||||
serviceExitCode = exitException.getExitCode();
|
if (exitCode != 0) {
|
||||||
|
LOG.debug("Exception raised with exit code {}",
|
||||||
|
exitCode,
|
||||||
|
exitException);
|
||||||
|
Throwable cause = exitException.getCause();
|
||||||
|
if (cause != null) {
|
||||||
|
// log the nested exception in more detail
|
||||||
|
LOG.warn("{}", cause.toString(), cause);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
serviceExitCode = exitCode;
|
||||||
serviceException = exitException;
|
serviceException = exitException;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -451,17 +461,38 @@ public class ServiceLauncher<S extends Service>
|
||||||
* @param execute execute/wait for the service to stop.
|
* @param execute execute/wait for the service to stop.
|
||||||
* @return an exit exception, which will have a status code of 0 if it worked
|
* @return an exit exception, which will have a status code of 0 if it worked
|
||||||
*/
|
*/
|
||||||
@VisibleForTesting
|
|
||||||
public ExitUtil.ExitException launchService(Configuration conf,
|
public ExitUtil.ExitException launchService(Configuration conf,
|
||||||
List<String> processedArgs,
|
List<String> processedArgs,
|
||||||
boolean addShutdownHook,
|
boolean addShutdownHook,
|
||||||
boolean execute) {
|
boolean execute) {
|
||||||
|
return launchService(conf, null, processedArgs, addShutdownHook, execute);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Launch a service catching all exceptions and downgrading them to exit codes
|
||||||
|
* after logging.
|
||||||
|
*
|
||||||
|
* Sets {@link #serviceException} to this value.
|
||||||
|
* @param conf configuration to use
|
||||||
|
* @param instance optional instance of the service.
|
||||||
|
* @param processedArgs command line after the launcher-specific arguments
|
||||||
|
* have been stripped out.
|
||||||
|
* @param addShutdownHook should a shutdown hook be added to terminate
|
||||||
|
* this service on shutdown. Tests should set this to false.
|
||||||
|
* @param execute execute/wait for the service to stop.
|
||||||
|
* @return an exit exception, which will have a status code of 0 if it worked
|
||||||
|
*/
|
||||||
|
public ExitUtil.ExitException launchService(Configuration conf,
|
||||||
|
S instance,
|
||||||
|
List<String> processedArgs,
|
||||||
|
boolean addShutdownHook,
|
||||||
|
boolean execute) {
|
||||||
|
|
||||||
ExitUtil.ExitException exitException;
|
ExitUtil.ExitException exitException;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
int exitCode = coreServiceLaunch(conf, processedArgs, addShutdownHook,
|
int exitCode = coreServiceLaunch(conf, instance, processedArgs,
|
||||||
execute);
|
addShutdownHook, execute);
|
||||||
if (service != null) {
|
if (service != null) {
|
||||||
// check to see if the service failed
|
// check to see if the service failed
|
||||||
Throwable failure = service.getFailureCause();
|
Throwable failure = service.getFailureCause();
|
||||||
|
@ -495,6 +526,12 @@ public class ServiceLauncher<S extends Service>
|
||||||
// exit exceptions are passed through unchanged
|
// exit exceptions are passed through unchanged
|
||||||
exitException = ee;
|
exitException = ee;
|
||||||
} catch (Throwable thrown) {
|
} catch (Throwable thrown) {
|
||||||
|
// other errors need a full log.
|
||||||
|
LOG.error("Exception raised {}",
|
||||||
|
service != null
|
||||||
|
? (service.toString() + " in state " + service.getServiceState())
|
||||||
|
: "during service instantiation",
|
||||||
|
thrown);
|
||||||
exitException = convertToExitException(thrown);
|
exitException = convertToExitException(thrown);
|
||||||
}
|
}
|
||||||
noteException(exitException);
|
noteException(exitException);
|
||||||
|
@ -514,6 +551,7 @@ public class ServiceLauncher<S extends Service>
|
||||||
* {@link #getService()}.
|
* {@link #getService()}.
|
||||||
*
|
*
|
||||||
* @param conf configuration
|
* @param conf configuration
|
||||||
|
* @param instance optional instance of the service.
|
||||||
* @param processedArgs arguments after the configuration parameters
|
* @param processedArgs arguments after the configuration parameters
|
||||||
* have been stripped out.
|
* have been stripped out.
|
||||||
* @param addShutdownHook should a shutdown hook be added to terminate
|
* @param addShutdownHook should a shutdown hook be added to terminate
|
||||||
|
@ -530,12 +568,19 @@ public class ServiceLauncher<S extends Service>
|
||||||
*/
|
*/
|
||||||
|
|
||||||
protected int coreServiceLaunch(Configuration conf,
|
protected int coreServiceLaunch(Configuration conf,
|
||||||
|
S instance,
|
||||||
List<String> processedArgs,
|
List<String> processedArgs,
|
||||||
boolean addShutdownHook,
|
boolean addShutdownHook,
|
||||||
boolean execute) throws Exception {
|
boolean execute) throws Exception {
|
||||||
|
|
||||||
// create the service instance
|
// create the service instance
|
||||||
instantiateService(conf);
|
if (instance == null) {
|
||||||
|
instantiateService(conf);
|
||||||
|
} else {
|
||||||
|
// service already exists, so instantiate
|
||||||
|
configuration = conf;
|
||||||
|
service = instance;
|
||||||
|
}
|
||||||
ServiceShutdownHook shutdownHook = null;
|
ServiceShutdownHook shutdownHook = null;
|
||||||
|
|
||||||
// and the shutdown hook if requested
|
// and the shutdown hook if requested
|
||||||
|
@ -685,8 +730,7 @@ public class ServiceLauncher<S extends Service>
|
||||||
}
|
}
|
||||||
// construct the new exception with the original message and
|
// construct the new exception with the original message and
|
||||||
// an exit code
|
// an exit code
|
||||||
exitException = new ServiceLaunchException(exitCode, message);
|
exitException = new ServiceLaunchException(exitCode, thrown, message);
|
||||||
exitException.initCause(thrown);
|
|
||||||
return exitException;
|
return exitException;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -917,7 +961,7 @@ public class ServiceLauncher<S extends Service>
|
||||||
throw new ServiceLaunchException(EXIT_COMMAND_ARGUMENT_ERROR, e);
|
throw new ServiceLaunchException(EXIT_COMMAND_ARGUMENT_ERROR, e);
|
||||||
} catch (RuntimeException e) {
|
} catch (RuntimeException e) {
|
||||||
// lower level issue such as XML parse failure
|
// lower level issue such as XML parse failure
|
||||||
throw new ServiceLaunchException(EXIT_COMMAND_ARGUMENT_ERROR,
|
throw new ServiceLaunchException(EXIT_COMMAND_ARGUMENT_ERROR, e,
|
||||||
E_PARSE_FAILED + " %s : %s", argString, e);
|
E_PARSE_FAILED + " %s : %s", argString, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,8 +31,10 @@ import java.util.concurrent.Callable;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
|
import org.apache.hadoop.fs.RemoteIterator;
|
||||||
import org.apache.hadoop.test.LambdaTestUtils;
|
import org.apache.hadoop.test.LambdaTestUtils;
|
||||||
|
|
||||||
|
import static org.apache.commons.lang3.StringUtils.join;
|
||||||
import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile;
|
import static org.apache.hadoop.fs.contract.ContractTestUtils.createFile;
|
||||||
import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
|
import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset;
|
||||||
import static org.apache.hadoop.fs.contract.ContractTestUtils.deleteChildren;
|
import static org.apache.hadoop.fs.contract.ContractTestUtils.deleteChildren;
|
||||||
|
@ -149,14 +151,18 @@ public abstract class AbstractContractRootDirectoryTest extends AbstractFSContra
|
||||||
Path root = new Path("/");
|
Path root = new Path("/");
|
||||||
assertIsDirectory(root);
|
assertIsDirectory(root);
|
||||||
Path file = new Path("/testRmRootRecursive");
|
Path file = new Path("/testRmRootRecursive");
|
||||||
ContractTestUtils.touch(getFileSystem(), file);
|
try {
|
||||||
boolean deleted = getFileSystem().delete(root, true);
|
ContractTestUtils.touch(getFileSystem(), file);
|
||||||
assertIsDirectory(root);
|
boolean deleted = getFileSystem().delete(root, true);
|
||||||
LOG.info("rm -rf / result is {}", deleted);
|
assertIsDirectory(root);
|
||||||
if (deleted) {
|
LOG.info("rm -rf / result is {}", deleted);
|
||||||
assertPathDoesNotExist("expected file to be deleted", file);
|
if (deleted) {
|
||||||
} else {
|
assertPathDoesNotExist("expected file to be deleted", file);
|
||||||
assertPathExists("expected file to be preserved", file);;
|
} else {
|
||||||
|
assertPathExists("expected file to be preserved", file);
|
||||||
|
}
|
||||||
|
} finally{
|
||||||
|
getFileSystem().delete(file, false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -185,28 +191,57 @@ public abstract class AbstractContractRootDirectoryTest extends AbstractFSContra
|
||||||
for (FileStatus status : statuses) {
|
for (FileStatus status : statuses) {
|
||||||
ContractTestUtils.assertDeleted(fs, status.getPath(), true);
|
ContractTestUtils.assertDeleted(fs, status.getPath(), true);
|
||||||
}
|
}
|
||||||
assertEquals("listStatus on empty root-directory returned a non-empty list",
|
FileStatus[] rootListStatus = fs.listStatus(root);
|
||||||
0, fs.listStatus(root).length);
|
assertEquals("listStatus on empty root-directory returned found: "
|
||||||
assertFalse("listFiles(/, false).hasNext",
|
+ join("\n", rootListStatus),
|
||||||
fs.listFiles(root, false).hasNext());
|
0, rootListStatus.length);
|
||||||
assertFalse("listFiles(/, true).hasNext",
|
assertNoElements("listFiles(/, false)",
|
||||||
fs.listFiles(root, true).hasNext());
|
fs.listFiles(root, false));
|
||||||
assertFalse("listLocatedStatus(/).hasNext",
|
assertNoElements("listFiles(/, true)",
|
||||||
fs.listLocatedStatus(root).hasNext());
|
fs.listFiles(root, true));
|
||||||
|
assertNoElements("listLocatedStatus(/)",
|
||||||
|
fs.listLocatedStatus(root));
|
||||||
assertIsDirectory(root);
|
assertIsDirectory(root);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Assert that an iterator has no elements; the raised exception
|
||||||
|
* will include the element list.
|
||||||
|
* @param operation operation for assertion text.
|
||||||
|
* @param iter iterator
|
||||||
|
* @throws IOException failure retrieving the values.
|
||||||
|
*/
|
||||||
|
protected void assertNoElements(String operation,
|
||||||
|
RemoteIterator<LocatedFileStatus> iter) throws IOException {
|
||||||
|
List<LocatedFileStatus> resultList = toList(iter);
|
||||||
|
if (!resultList.isEmpty()) {
|
||||||
|
fail("Expected no results from " + operation + ", but got "
|
||||||
|
+ resultList.size() + " elements:\n"
|
||||||
|
+ join(resultList, "\n"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSimpleRootListing() throws IOException {
|
public void testSimpleRootListing() throws IOException {
|
||||||
describe("test the nonrecursive root listing calls");
|
describe("test the nonrecursive root listing calls");
|
||||||
FileSystem fs = getFileSystem();
|
FileSystem fs = getFileSystem();
|
||||||
Path root = new Path("/");
|
Path root = new Path("/");
|
||||||
FileStatus[] statuses = fs.listStatus(root);
|
FileStatus[] statuses = fs.listStatus(root);
|
||||||
|
String listStatusResult = join(statuses, "\n");
|
||||||
List<LocatedFileStatus> locatedStatusList = toList(
|
List<LocatedFileStatus> locatedStatusList = toList(
|
||||||
fs.listLocatedStatus(root));
|
fs.listLocatedStatus(root));
|
||||||
assertEquals(statuses.length, locatedStatusList.size());
|
String locatedStatusResult = join(locatedStatusList, "\n");
|
||||||
|
|
||||||
|
assertEquals("listStatus(/) vs listLocatedStatus(/) with \n"
|
||||||
|
+ "listStatus =" + listStatusResult
|
||||||
|
+" listLocatedStatus = " + locatedStatusResult,
|
||||||
|
statuses.length, locatedStatusList.size());
|
||||||
List<LocatedFileStatus> fileList = toList(fs.listFiles(root, false));
|
List<LocatedFileStatus> fileList = toList(fs.listFiles(root, false));
|
||||||
assertTrue(fileList.size() <= statuses.length);
|
String listFilesResult = join(fileList, "\n");
|
||||||
|
assertTrue("listStatus(/) vs listFiles(/, false) with \n"
|
||||||
|
+ "listStatus = " + listStatusResult
|
||||||
|
+ "listFiles = " + listFilesResult,
|
||||||
|
fileList.size() <= statuses.length);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
|
|
@ -29,7 +29,7 @@ import org.apache.hadoop.fs.RemoteIterator;
|
||||||
import org.apache.hadoop.fs.StreamCapabilities;
|
import org.apache.hadoop.fs.StreamCapabilities;
|
||||||
import org.apache.hadoop.io.IOUtils;
|
import org.apache.hadoop.io.IOUtils;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.internal.AssumptionViolatedException;
|
import org.junit.AssumptionViolatedException;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
@ -457,8 +457,10 @@ public class ContractTestUtils extends Assert {
|
||||||
public static FileStatus[] deleteChildren(FileSystem fileSystem,
|
public static FileStatus[] deleteChildren(FileSystem fileSystem,
|
||||||
Path path,
|
Path path,
|
||||||
boolean recursive) throws IOException {
|
boolean recursive) throws IOException {
|
||||||
|
LOG.debug("Deleting children of {} (recursive={})", path, recursive);
|
||||||
FileStatus[] children = listChildren(fileSystem, path);
|
FileStatus[] children = listChildren(fileSystem, path);
|
||||||
for (FileStatus entry : children) {
|
for (FileStatus entry : children) {
|
||||||
|
LOG.debug("Deleting {}", entry);
|
||||||
fileSystem.delete(entry.getPath(), recursive);
|
fileSystem.delete(entry.getPath(), recursive);
|
||||||
}
|
}
|
||||||
return children;
|
return children;
|
||||||
|
|
|
@ -186,7 +186,12 @@
|
||||||
<exclude>**/ITestS3AHuge*.java</exclude>
|
<exclude>**/ITestS3AHuge*.java</exclude>
|
||||||
<!-- this sets out to overlaod DynamoDB, so must be run standalone -->
|
<!-- this sets out to overlaod DynamoDB, so must be run standalone -->
|
||||||
<exclude>**/ITestDynamoDBMetadataStoreScale.java</exclude>
|
<exclude>**/ITestDynamoDBMetadataStoreScale.java</exclude>
|
||||||
|
<!-- Terasort MR jobs spawn enough processes that they use up all RAM -->
|
||||||
<exclude>**/ITestTerasort*.java</exclude>
|
<exclude>**/ITestTerasort*.java</exclude>
|
||||||
|
<!-- MR jobs spawn enough processes that they use up all RAM -->
|
||||||
|
<exclude>**/ITest*CommitMRJob.java</exclude>
|
||||||
|
<!-- operations across the metastore -->
|
||||||
|
<exclude>**/ITestS3GuardDDBRootOperations.java</exclude>
|
||||||
</excludes>
|
</excludes>
|
||||||
</configuration>
|
</configuration>
|
||||||
</execution>
|
</execution>
|
||||||
|
@ -215,15 +220,22 @@
|
||||||
<!-- Do a sequential run for tests that cannot handle -->
|
<!-- Do a sequential run for tests that cannot handle -->
|
||||||
<!-- parallel execution. -->
|
<!-- parallel execution. -->
|
||||||
<includes>
|
<includes>
|
||||||
<include>**/ITestS3AContractRootDir.java</include>
|
|
||||||
<include>**/ITestS3AFileContextStatistics.java</include>
|
<include>**/ITestS3AFileContextStatistics.java</include>
|
||||||
|
<!-- large uploads consuming all bandwidth -->
|
||||||
<include>**/ITestS3AHuge*.java</include>
|
<include>**/ITestS3AHuge*.java</include>
|
||||||
|
<!-- SSE encrypted files confuse everything else -->
|
||||||
<include>**/ITestS3AEncryptionSSEC*.java</include>
|
<include>**/ITestS3AEncryptionSSEC*.java</include>
|
||||||
<!-- this sets out to overlaod DynamoDB, so must be run standalone -->
|
<!-- this sets out to overlaod DynamoDB, so must be run standalone -->
|
||||||
<include>**/ITestDynamoDBMetadataStoreScale.java</include>
|
<include>**/ITestDynamoDBMetadataStoreScale.java</include>
|
||||||
<!-- the terasort tests both work with a file in the same path in -->
|
<!-- the terasort tests both work with a file in the same path in -->
|
||||||
<!-- the local FS. Running them sequentially guarantees isolation -->
|
<!-- the local FS. Running them sequentially guarantees isolation -->
|
||||||
|
<!-- and that they don't conflict with the other MR jobs for RAM -->
|
||||||
<include>**/ITestTerasort*.java</include>
|
<include>**/ITestTerasort*.java</include>
|
||||||
|
<!-- MR jobs spawn enough processes that they use up all RAM -->
|
||||||
|
<include>**/ITest*CommitMRJob.java</include>
|
||||||
|
<!-- operations across the metastore -->
|
||||||
|
<include>**/ITestS3AContractRootDir.java</include>
|
||||||
|
<include>**/ITestS3GuardDDBRootOperations.java</include>
|
||||||
</includes>
|
</includes>
|
||||||
</configuration>
|
</configuration>
|
||||||
</execution>
|
</execution>
|
||||||
|
|
|
@ -21,6 +21,8 @@ package org.apache.hadoop.fs.s3a;
|
||||||
import com.amazonaws.AmazonClientException;
|
import com.amazonaws.AmazonClientException;
|
||||||
import com.amazonaws.services.s3.model.S3ObjectSummary;
|
import com.amazonaws.services.s3.model.S3ObjectSummary;
|
||||||
import com.google.common.annotations.VisibleForTesting;
|
import com.google.common.annotations.VisibleForTesting;
|
||||||
|
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
import org.apache.hadoop.fs.LocatedFileStatus;
|
import org.apache.hadoop.fs.LocatedFileStatus;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
|
@ -50,6 +52,7 @@ import static org.apache.hadoop.fs.s3a.S3AUtils.translateException;
|
||||||
/**
|
/**
|
||||||
* Place for the S3A listing classes; keeps all the small classes under control.
|
* Place for the S3A listing classes; keeps all the small classes under control.
|
||||||
*/
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
public class Listing {
|
public class Listing {
|
||||||
|
|
||||||
private final S3AFileSystem owner;
|
private final S3AFileSystem owner;
|
||||||
|
@ -87,7 +90,7 @@ public class Listing {
|
||||||
* @return the iterator
|
* @return the iterator
|
||||||
* @throws IOException IO Problems
|
* @throws IOException IO Problems
|
||||||
*/
|
*/
|
||||||
FileStatusListingIterator createFileStatusListingIterator(
|
public FileStatusListingIterator createFileStatusListingIterator(
|
||||||
Path listPath,
|
Path listPath,
|
||||||
S3ListRequest request,
|
S3ListRequest request,
|
||||||
PathFilter filter,
|
PathFilter filter,
|
||||||
|
@ -110,7 +113,7 @@ public class Listing {
|
||||||
* @throws IOException IO Problems
|
* @throws IOException IO Problems
|
||||||
*/
|
*/
|
||||||
@Retries.RetryRaw
|
@Retries.RetryRaw
|
||||||
FileStatusListingIterator createFileStatusListingIterator(
|
public FileStatusListingIterator createFileStatusListingIterator(
|
||||||
Path listPath,
|
Path listPath,
|
||||||
S3ListRequest request,
|
S3ListRequest request,
|
||||||
PathFilter filter,
|
PathFilter filter,
|
||||||
|
@ -129,7 +132,7 @@ public class Listing {
|
||||||
* @return a new remote iterator
|
* @return a new remote iterator
|
||||||
*/
|
*/
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
LocatedFileStatusIterator createLocatedFileStatusIterator(
|
public LocatedFileStatusIterator createLocatedFileStatusIterator(
|
||||||
RemoteIterator<S3AFileStatus> statusIterator) {
|
RemoteIterator<S3AFileStatus> statusIterator) {
|
||||||
return new LocatedFileStatusIterator(statusIterator);
|
return new LocatedFileStatusIterator(statusIterator);
|
||||||
}
|
}
|
||||||
|
@ -789,7 +792,7 @@ public class Listing {
|
||||||
* Accept all entries except the base path and those which map to S3N
|
* Accept all entries except the base path and those which map to S3N
|
||||||
* pseudo directory markers.
|
* pseudo directory markers.
|
||||||
*/
|
*/
|
||||||
static class AcceptAllButSelfAndS3nDirs implements FileStatusAcceptor {
|
public static class AcceptAllButSelfAndS3nDirs implements FileStatusAcceptor {
|
||||||
|
|
||||||
/** Base path. */
|
/** Base path. */
|
||||||
private final Path qualifiedPath;
|
private final Path qualifiedPath;
|
||||||
|
|
|
@ -1588,7 +1588,8 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
|
||||||
* @throws IOException if the retry invocation raises one (it shouldn't).
|
* @throws IOException if the retry invocation raises one (it shouldn't).
|
||||||
*/
|
*/
|
||||||
@Retries.RetryRaw
|
@Retries.RetryRaw
|
||||||
protected ObjectMetadata getObjectMetadata(String key) throws IOException {
|
@VisibleForTesting
|
||||||
|
ObjectMetadata getObjectMetadata(String key) throws IOException {
|
||||||
return getObjectMetadata(key, null, invoker,null);
|
return getObjectMetadata(key, null, invoker,null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2231,6 +2232,11 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
|
||||||
throws IOException, AmazonClientException {
|
throws IOException, AmazonClientException {
|
||||||
Path f = status.getPath();
|
Path f = status.getPath();
|
||||||
LOG.debug("Delete path {} - recursive {}", f, recursive);
|
LOG.debug("Delete path {} - recursive {}", f, recursive);
|
||||||
|
LOG.debug("Type = {}",
|
||||||
|
status.isFile() ? "File"
|
||||||
|
: (status.isEmptyDirectory() == Tristate.TRUE
|
||||||
|
? "Empty Directory"
|
||||||
|
: "Directory"));
|
||||||
|
|
||||||
String key = pathToKey(f);
|
String key = pathToKey(f);
|
||||||
|
|
||||||
|
@ -2290,7 +2296,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
|
||||||
metadataStore.deleteSubtree(f, ttlTimeProvider);
|
metadataStore.deleteSubtree(f, ttlTimeProvider);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
LOG.debug("delete: Path is a file");
|
LOG.debug("delete: Path is a file: {}", key);
|
||||||
deleteObjectAtPath(f, key, true);
|
deleteObjectAtPath(f, key, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2439,7 +2445,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities,
|
||||||
* @return the request
|
* @return the request
|
||||||
*/
|
*/
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
S3ListRequest createListObjectsRequest(String key,
|
public S3ListRequest createListObjectsRequest(String key,
|
||||||
String delimiter) {
|
String delimiter) {
|
||||||
return createListObjectsRequest(key, delimiter, null);
|
return createListObjectsRequest(key, delimiter, null);
|
||||||
}
|
}
|
||||||
|
|
|
@ -60,4 +60,19 @@ public class S3ALocatedFileStatus extends LocatedFileStatus {
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return super.hashCode();
|
return super.hashCode();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate an S3AFileStatus instance, including etag and
|
||||||
|
* version ID, if present.
|
||||||
|
*/
|
||||||
|
public S3AFileStatus toS3AFileStatus() {
|
||||||
|
return new S3AFileStatus(
|
||||||
|
getLen(),
|
||||||
|
getModificationTime(),
|
||||||
|
getPath(),
|
||||||
|
getBlockSize(),
|
||||||
|
getOwner(),
|
||||||
|
getETag(),
|
||||||
|
getVersionId());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -538,7 +538,7 @@ public class WriteOperationHelper {
|
||||||
public BulkOperationState initiateCommitOperation(
|
public BulkOperationState initiateCommitOperation(
|
||||||
Path path) throws IOException {
|
Path path) throws IOException {
|
||||||
return S3Guard.initiateBulkWrite(owner.getMetadataStore(),
|
return S3Guard.initiateBulkWrite(owner.getMetadataStore(),
|
||||||
BulkOperationState.OperationType.Put, path);
|
BulkOperationState.OperationType.Commit, path);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -0,0 +1,223 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a.s3guard;
|
||||||
|
|
||||||
|
import javax.annotation.Nullable;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.URI;
|
||||||
|
import java.net.URISyntaxException;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.s3a.S3AFileSystem;
|
||||||
|
import org.apache.hadoop.service.launcher.AbstractLaunchableService;
|
||||||
|
import org.apache.hadoop.service.launcher.ServiceLaunchException;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_FAIL;
|
||||||
|
import static org.apache.hadoop.service.launcher.LauncherExitCodes.EXIT_USAGE;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Entry point for S3Guard diagnostics operations against DynamoDB tables.
|
||||||
|
*/
|
||||||
|
public class AbstractS3GuardDynamoDBDiagnostic
|
||||||
|
extends AbstractLaunchableService {
|
||||||
|
|
||||||
|
private S3AFileSystem filesystem;
|
||||||
|
|
||||||
|
private DynamoDBMetadataStore store;
|
||||||
|
|
||||||
|
private URI uri;
|
||||||
|
|
||||||
|
private List<String> arguments;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor.
|
||||||
|
* @param name entry point name.
|
||||||
|
*/
|
||||||
|
public AbstractS3GuardDynamoDBDiagnostic(final String name) {
|
||||||
|
super(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor. If the store is set then that is the store for the operation,
|
||||||
|
* otherwise the filesystem's binding is used instead.
|
||||||
|
* @param name entry point name.
|
||||||
|
* @param filesystem filesystem
|
||||||
|
* @param store optional metastore.
|
||||||
|
* @param uri URI. Must be set if filesystem == null.
|
||||||
|
*/
|
||||||
|
public AbstractS3GuardDynamoDBDiagnostic(
|
||||||
|
final String name,
|
||||||
|
@Nullable final S3AFileSystem filesystem,
|
||||||
|
@Nullable final DynamoDBMetadataStore store,
|
||||||
|
@Nullable final URI uri) {
|
||||||
|
super(name);
|
||||||
|
this.store = store;
|
||||||
|
this.filesystem = filesystem;
|
||||||
|
if (store == null) {
|
||||||
|
require(filesystem != null, "No filesystem or URI");
|
||||||
|
bindStore(filesystem);
|
||||||
|
}
|
||||||
|
if (uri == null) {
|
||||||
|
require(filesystem != null, "No filesystem or URI");
|
||||||
|
setUri(filesystem.getUri());
|
||||||
|
} else {
|
||||||
|
setUri(uri);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Require a condition to hold, otherwise an exception is thrown.
|
||||||
|
* @param condition condition to be true
|
||||||
|
* @param error text on failure.
|
||||||
|
* @throws ServiceLaunchException if the condition is not met
|
||||||
|
*/
|
||||||
|
protected static void require(boolean condition, String error) {
|
||||||
|
if (!condition) {
|
||||||
|
throw failure(error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a failure exception for throwing.
|
||||||
|
* @param message message
|
||||||
|
* @param ex optional nested exception.
|
||||||
|
* @return an exception to throw
|
||||||
|
*/
|
||||||
|
protected static ServiceLaunchException failure(String message,
|
||||||
|
Throwable ex) {
|
||||||
|
return new ServiceLaunchException(EXIT_FAIL, message, ex);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a failure exception for throwing.
|
||||||
|
* @param message message
|
||||||
|
* @return an exception to throw
|
||||||
|
*/
|
||||||
|
protected static ServiceLaunchException failure(String message) {
|
||||||
|
return new ServiceLaunchException(EXIT_FAIL, message);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Configuration bindArgs(final Configuration config,
|
||||||
|
final List<String> args)
|
||||||
|
throws Exception {
|
||||||
|
this.arguments = args;
|
||||||
|
return super.bindArgs(config, args);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the argument list.
|
||||||
|
* @return the argument list.
|
||||||
|
*/
|
||||||
|
protected List<String> getArguments() {
|
||||||
|
return arguments;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Bind to the store from a CLI argument.
|
||||||
|
* @param fsURI filesystem URI
|
||||||
|
* @throws IOException failure
|
||||||
|
*/
|
||||||
|
protected void bindFromCLI(String fsURI)
|
||||||
|
throws IOException {
|
||||||
|
Configuration conf = getConfig();
|
||||||
|
setUri(fsURI);
|
||||||
|
FileSystem fs = FileSystem.get(getUri(), conf);
|
||||||
|
require(fs instanceof S3AFileSystem,
|
||||||
|
"Not an S3A Filesystem: " + fsURI);
|
||||||
|
filesystem = (S3AFileSystem) fs;
|
||||||
|
bindStore(filesystem);
|
||||||
|
setUri(fs.getUri());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Binds the {@link #store} field to the metastore of
|
||||||
|
* the filesystem -which must have a DDB metastore.
|
||||||
|
* @param fs filesystem to bind the store to.
|
||||||
|
*/
|
||||||
|
private void bindStore(final S3AFileSystem fs) {
|
||||||
|
require(fs.hasMetadataStore(),
|
||||||
|
"Filesystem has no metadata store: " + fs.getUri());
|
||||||
|
MetadataStore ms = fs.getMetadataStore();
|
||||||
|
require(ms instanceof DynamoDBMetadataStore,
|
||||||
|
"Filesystem " + fs.getUri()
|
||||||
|
+ " does not have a DynamoDB metadata store: " + ms);
|
||||||
|
store = (DynamoDBMetadataStore) ms;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected DynamoDBMetadataStore getStore() {
|
||||||
|
return store;
|
||||||
|
}
|
||||||
|
|
||||||
|
public S3AFileSystem getFilesystem() {
|
||||||
|
return filesystem;
|
||||||
|
}
|
||||||
|
|
||||||
|
public URI getUri() {
|
||||||
|
return uri;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setUri(final URI uri) {
|
||||||
|
String fsURI = uri.toString();
|
||||||
|
if (!fsURI.endsWith("/")) {
|
||||||
|
setUri(fsURI);
|
||||||
|
} else {
|
||||||
|
this.uri = uri;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the URI from a string; will add a "/" if needed.
|
||||||
|
* @param fsURI filesystem URI.
|
||||||
|
* @throws RuntimeException if the fsURI parameter is not a valid URI.
|
||||||
|
*/
|
||||||
|
public void setUri(String fsURI) {
|
||||||
|
if (fsURI != null) {
|
||||||
|
if (!fsURI.endsWith("/")) {
|
||||||
|
fsURI += "/";
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
setUri(new URI(fsURI));
|
||||||
|
} catch (URISyntaxException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the list of arguments, after validating the list size.
|
||||||
|
* @param argMin minimum number of entries.
|
||||||
|
* @param argMax maximum number of entries.
|
||||||
|
* @param usage Usage message.
|
||||||
|
* @return the argument list, which will be in the range.
|
||||||
|
* @throws ServiceLaunchException if the argument list is not valid.
|
||||||
|
*/
|
||||||
|
protected List<String> getArgumentList(final int argMin,
|
||||||
|
final int argMax,
|
||||||
|
final String usage) {
|
||||||
|
List<String> arg = getArguments();
|
||||||
|
if (arg == null || arg.size() < argMin || arg.size() > argMax) {
|
||||||
|
// no arguments: usage message
|
||||||
|
throw new ServiceLaunchException(EXIT_USAGE, usage);
|
||||||
|
}
|
||||||
|
return arg;
|
||||||
|
}
|
||||||
|
}
|
|
@ -78,5 +78,11 @@ public class BulkOperationState implements Closeable {
|
||||||
Rename,
|
Rename,
|
||||||
/** Pruning: deleting entries and updating parents. */
|
/** Pruning: deleting entries and updating parents. */
|
||||||
Prune,
|
Prune,
|
||||||
|
/** Commit operation. */
|
||||||
|
Commit,
|
||||||
|
/** Deletion operation. */
|
||||||
|
Delete,
|
||||||
|
/** FSCK operation. */
|
||||||
|
Fsck
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,787 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a.s3guard;
|
||||||
|
|
||||||
|
import javax.annotation.Nullable;
|
||||||
|
import java.io.Closeable;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.PrintWriter;
|
||||||
|
import java.net.URI;
|
||||||
|
import java.util.ArrayDeque;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Date;
|
||||||
|
import java.util.Deque;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import com.amazonaws.services.dynamodbv2.xspec.ExpressionSpecBuilder;
|
||||||
|
import com.google.common.collect.Lists;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.tuple.Pair;
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.PathIOException;
|
||||||
|
import org.apache.hadoop.fs.RemoteIterator;
|
||||||
|
import org.apache.hadoop.fs.s3a.Listing;
|
||||||
|
import org.apache.hadoop.fs.s3a.S3AFileStatus;
|
||||||
|
import org.apache.hadoop.fs.s3a.S3AFileSystem;
|
||||||
|
import org.apache.hadoop.fs.s3a.S3ALocatedFileStatus;
|
||||||
|
import org.apache.hadoop.fs.s3a.S3ListRequest;
|
||||||
|
import org.apache.hadoop.service.Service;
|
||||||
|
import org.apache.hadoop.service.launcher.LauncherExitCodes;
|
||||||
|
import org.apache.hadoop.service.launcher.ServiceLaunchException;
|
||||||
|
import org.apache.hadoop.service.launcher.ServiceLauncher;
|
||||||
|
import org.apache.hadoop.util.DurationInfo;
|
||||||
|
import org.apache.hadoop.util.ExitUtil;
|
||||||
|
|
||||||
|
import static com.google.common.base.Preconditions.checkNotNull;
|
||||||
|
import static org.apache.hadoop.fs.s3a.S3AUtils.ACCEPT_ALL;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is a low-level diagnostics entry point which does a CVE/TSV dump of
|
||||||
|
* the DDB state.
|
||||||
|
* As it also lists the filesystem, it actually changes the state of the store
|
||||||
|
* during the operation.
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
@InterfaceStability.Unstable
|
||||||
|
public class DumpS3GuardDynamoTable extends AbstractS3GuardDynamoDBDiagnostic {
|
||||||
|
|
||||||
|
private static final Logger LOG =
|
||||||
|
LoggerFactory.getLogger(DumpS3GuardDynamoTable.class);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Application name.
|
||||||
|
*/
|
||||||
|
public static final String NAME = "DumpS3GuardDynamoTable";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Usage.
|
||||||
|
*/
|
||||||
|
private static final String USAGE_MESSAGE = NAME
|
||||||
|
+ " <filesystem> <dest-file>";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Suffix for the flat list: {@value}.
|
||||||
|
*/
|
||||||
|
public static final String FLAT_CSV = "-flat.csv";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Suffix for the raw S3 dump: {@value}.
|
||||||
|
*/
|
||||||
|
public static final String RAW_CSV = "-s3.csv";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Suffix for the DDB scan: {@value}.
|
||||||
|
*/
|
||||||
|
public static final String SCAN_CSV = "-scan.csv";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Suffix for the second DDB scan: : {@value}.
|
||||||
|
*/
|
||||||
|
public static final String SCAN2_CSV = "-scan-2.csv";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Suffix for the treewalk scan of the S3A Filesystem: {@value}.
|
||||||
|
*/
|
||||||
|
public static final String TREE_CSV = "-tree.csv";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Suffix for a recursive treewalk through the metastore: {@value}.
|
||||||
|
*/
|
||||||
|
public static final String STORE_CSV = "-store.csv";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Path in the local filesystem to save the data.
|
||||||
|
*/
|
||||||
|
private String destPath;
|
||||||
|
|
||||||
|
private Pair<Long, Long> scanEntryResult;
|
||||||
|
|
||||||
|
private Pair<Long, Long> secondScanResult;
|
||||||
|
|
||||||
|
private long rawObjectStoreCount;
|
||||||
|
|
||||||
|
private long listStatusCount;
|
||||||
|
|
||||||
|
private long treewalkCount;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Instantiate.
|
||||||
|
* @param name application name.
|
||||||
|
*/
|
||||||
|
public DumpS3GuardDynamoTable(final String name) {
|
||||||
|
super(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Instantiate with default name.
|
||||||
|
*/
|
||||||
|
public DumpS3GuardDynamoTable() {
|
||||||
|
this(NAME);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Bind to a specific FS + store.
|
||||||
|
* @param fs filesystem
|
||||||
|
* @param store metastore to use
|
||||||
|
* @param destFile the base filename for output
|
||||||
|
* @param uri URI of store -only needed if FS is null.
|
||||||
|
*/
|
||||||
|
public DumpS3GuardDynamoTable(
|
||||||
|
final S3AFileSystem fs,
|
||||||
|
final DynamoDBMetadataStore store,
|
||||||
|
final File destFile,
|
||||||
|
final URI uri) {
|
||||||
|
super(NAME, fs, store, uri);
|
||||||
|
this.destPath = destFile.getAbsolutePath();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Bind to the argument list, including validating the CLI.
|
||||||
|
* @throws Exception failure.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
protected void serviceStart() throws Exception {
|
||||||
|
if (getStore() == null) {
|
||||||
|
List<String> arg = getArgumentList(2, 2, USAGE_MESSAGE);
|
||||||
|
bindFromCLI(arg.get(0));
|
||||||
|
destPath = arg.get(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Dump the filesystem and the metastore.
|
||||||
|
* @return the exit code.
|
||||||
|
* @throws ServiceLaunchException on failure.
|
||||||
|
* @throws IOException IO failure.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public int execute() throws ServiceLaunchException, IOException {
|
||||||
|
|
||||||
|
try {
|
||||||
|
final File scanFile = new File(
|
||||||
|
destPath + SCAN_CSV).getCanonicalFile();
|
||||||
|
File parentDir = scanFile.getParentFile();
|
||||||
|
if (!parentDir.mkdirs() && !parentDir.isDirectory()) {
|
||||||
|
throw new PathIOException(parentDir.toString(),
|
||||||
|
"Could not create destination directory");
|
||||||
|
}
|
||||||
|
|
||||||
|
try (CsvFile csv = new CsvFile(scanFile);
|
||||||
|
DurationInfo ignored = new DurationInfo(LOG,
|
||||||
|
"scanFile dump to %s", scanFile)) {
|
||||||
|
scanEntryResult = scanMetastore(csv);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (getFilesystem() != null) {
|
||||||
|
|
||||||
|
Path basePath = getFilesystem().qualify(new Path(getUri()));
|
||||||
|
|
||||||
|
final File destFile = new File(destPath + STORE_CSV)
|
||||||
|
.getCanonicalFile();
|
||||||
|
LOG.info("Writing Store details to {}", destFile);
|
||||||
|
try (CsvFile csv = new CsvFile(destFile);
|
||||||
|
DurationInfo ignored = new DurationInfo(LOG, "List metastore")) {
|
||||||
|
|
||||||
|
LOG.info("Base path: {}", basePath);
|
||||||
|
dumpMetastore(csv, basePath);
|
||||||
|
}
|
||||||
|
|
||||||
|
// these operations all update the metastore as they list,
|
||||||
|
// that is: they are side-effecting.
|
||||||
|
final File treewalkFile = new File(destPath + TREE_CSV)
|
||||||
|
.getCanonicalFile();
|
||||||
|
|
||||||
|
try (CsvFile csv = new CsvFile(treewalkFile);
|
||||||
|
DurationInfo ignored = new DurationInfo(LOG,
|
||||||
|
"Treewalk to %s", treewalkFile)) {
|
||||||
|
treewalkCount = treewalkFilesystem(csv, basePath);
|
||||||
|
}
|
||||||
|
final File flatlistFile = new File(
|
||||||
|
destPath + FLAT_CSV).getCanonicalFile();
|
||||||
|
|
||||||
|
try (CsvFile csv = new CsvFile(flatlistFile);
|
||||||
|
DurationInfo ignored = new DurationInfo(LOG,
|
||||||
|
"Flat list to %s", flatlistFile)) {
|
||||||
|
listStatusCount = listStatusFilesystem(csv, basePath);
|
||||||
|
}
|
||||||
|
final File rawFile = new File(
|
||||||
|
destPath + RAW_CSV).getCanonicalFile();
|
||||||
|
|
||||||
|
try (CsvFile csv = new CsvFile(rawFile);
|
||||||
|
DurationInfo ignored = new DurationInfo(LOG,
|
||||||
|
"Raw dump to %s", rawFile)) {
|
||||||
|
rawObjectStoreCount = dumpRawS3ObjectStore(csv);
|
||||||
|
}
|
||||||
|
final File scanFile2 = new File(
|
||||||
|
destPath + SCAN2_CSV).getCanonicalFile();
|
||||||
|
|
||||||
|
try (CsvFile csv = new CsvFile(scanFile);
|
||||||
|
DurationInfo ignored = new DurationInfo(LOG,
|
||||||
|
"scanFile dump to %s", scanFile2)) {
|
||||||
|
secondScanResult = scanMetastore(csv);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return LauncherExitCodes.EXIT_SUCCESS;
|
||||||
|
} catch (IOException | RuntimeException e) {
|
||||||
|
LOG.error("failure", e);
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Push all elements of a list to a queue, such that the first entry
|
||||||
|
* on the list becomes the head of the queue.
|
||||||
|
* @param queue queue to update
|
||||||
|
* @param entries list of entries to add.
|
||||||
|
* @param <T> type of queue
|
||||||
|
*/
|
||||||
|
private <T> void pushAll(Deque<T> queue, List<T> entries) {
|
||||||
|
List<T> reversed = Lists.reverse(entries);
|
||||||
|
for (T t : reversed) {
|
||||||
|
queue.push(t);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Dump the filesystem via a treewalk.
|
||||||
|
* If metastore entries mark directories as deleted, this
|
||||||
|
* walk will not explore them.
|
||||||
|
* @param csv destination.
|
||||||
|
* @param base base path.
|
||||||
|
* @return number of entries found.
|
||||||
|
* @throws IOException IO failure.
|
||||||
|
*/
|
||||||
|
protected long treewalkFilesystem(
|
||||||
|
final CsvFile csv,
|
||||||
|
final Path base) throws IOException {
|
||||||
|
ArrayDeque<Path> queue = new ArrayDeque<>();
|
||||||
|
queue.add(base);
|
||||||
|
long count = 0;
|
||||||
|
while (!queue.isEmpty()) {
|
||||||
|
Path path = queue.pop();
|
||||||
|
count++;
|
||||||
|
FileStatus[] fileStatuses;
|
||||||
|
try {
|
||||||
|
fileStatuses = getFilesystem().listStatus(path);
|
||||||
|
} catch (FileNotFoundException e) {
|
||||||
|
LOG.warn("File {} was not found", path);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// entries
|
||||||
|
for (FileStatus fileStatus : fileStatuses) {
|
||||||
|
csv.entry((S3AFileStatus) fileStatus);
|
||||||
|
}
|
||||||
|
// scan through the list, building up a reverse list of all directories
|
||||||
|
// found.
|
||||||
|
List<Path> dirs = new ArrayList<>(fileStatuses.length);
|
||||||
|
for (FileStatus fileStatus : fileStatuses) {
|
||||||
|
if (fileStatus.isDirectory()
|
||||||
|
&& !(fileStatus.getPath().equals(path))) {
|
||||||
|
// directory: add to the end of the queue.
|
||||||
|
dirs.add(fileStatus.getPath());
|
||||||
|
} else {
|
||||||
|
// file: just increment the count
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
// now push the dirs list in reverse
|
||||||
|
// so that they have been added in the sort order as returned.
|
||||||
|
pushAll(queue, dirs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Dump the filesystem via a recursive listStatus call.
|
||||||
|
* @param csv destination.
|
||||||
|
* @return number of entries found.
|
||||||
|
* @throws IOException IO failure.
|
||||||
|
*/
|
||||||
|
protected long listStatusFilesystem(
|
||||||
|
final CsvFile csv,
|
||||||
|
final Path path) throws IOException {
|
||||||
|
long count = 0;
|
||||||
|
RemoteIterator<S3ALocatedFileStatus> iterator = getFilesystem()
|
||||||
|
.listFilesAndEmptyDirectories(path, true);
|
||||||
|
while (iterator.hasNext()) {
|
||||||
|
S3ALocatedFileStatus status = iterator.next();
|
||||||
|
csv.entry(status.toS3AFileStatus());
|
||||||
|
}
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Dump the raw S3 Object Store.
|
||||||
|
* @param csv destination.
|
||||||
|
* @return number of entries found.
|
||||||
|
* @throws IOException IO failure.
|
||||||
|
*/
|
||||||
|
protected long dumpRawS3ObjectStore(
|
||||||
|
final CsvFile csv) throws IOException {
|
||||||
|
S3AFileSystem fs = getFilesystem();
|
||||||
|
Path rootPath = fs.qualify(new Path("/"));
|
||||||
|
Listing listing = new Listing(fs);
|
||||||
|
S3ListRequest request = fs.createListObjectsRequest("", null);
|
||||||
|
long count = 0;
|
||||||
|
RemoteIterator<S3AFileStatus> st =
|
||||||
|
listing.createFileStatusListingIterator(rootPath, request,
|
||||||
|
ACCEPT_ALL,
|
||||||
|
new Listing.AcceptAllButSelfAndS3nDirs(rootPath));
|
||||||
|
while (st.hasNext()) {
|
||||||
|
count++;
|
||||||
|
S3AFileStatus next = st.next();
|
||||||
|
LOG.debug("[{}] {}", count, next);
|
||||||
|
csv.entry(next);
|
||||||
|
}
|
||||||
|
LOG.info("entry count: {}", count);
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* list children under the metastore from a base path, through
|
||||||
|
* a recursive query + walk strategy.
|
||||||
|
* @param csv dest
|
||||||
|
* @param basePath base path
|
||||||
|
* @throws IOException failure.
|
||||||
|
*/
|
||||||
|
protected void dumpMetastore(final CsvFile csv,
|
||||||
|
final Path basePath) throws IOException {
|
||||||
|
dumpStoreEntries(csv, getStore().listChildren(basePath));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Recursive Store Dump.
|
||||||
|
* @param csv open CSV file.
|
||||||
|
* @param dir directory listing
|
||||||
|
* @return (directories, files)
|
||||||
|
* @throws IOException failure
|
||||||
|
*/
|
||||||
|
private Pair<Long, Long> dumpStoreEntries(
|
||||||
|
CsvFile csv,
|
||||||
|
DirListingMetadata dir) throws IOException {
|
||||||
|
ArrayDeque<DirListingMetadata> queue = new ArrayDeque<>();
|
||||||
|
queue.add(dir);
|
||||||
|
long files = 0, dirs = 1;
|
||||||
|
while (!queue.isEmpty()) {
|
||||||
|
DirListingMetadata next = queue.pop();
|
||||||
|
List<DDBPathMetadata> childDirs = new ArrayList<>();
|
||||||
|
Collection<PathMetadata> listing = next.getListing();
|
||||||
|
// sort by name
|
||||||
|
List<PathMetadata> sorted = new ArrayList<>(listing);
|
||||||
|
sorted.sort(new PathOrderComparators.PathMetadataComparator(
|
||||||
|
(l, r) -> l.compareTo(r)));
|
||||||
|
|
||||||
|
for (PathMetadata pmd : sorted) {
|
||||||
|
DDBPathMetadata ddbMd = (DDBPathMetadata) pmd;
|
||||||
|
dumpEntry(csv, ddbMd);
|
||||||
|
if (ddbMd.getFileStatus().isDirectory()) {
|
||||||
|
childDirs.add(ddbMd);
|
||||||
|
} else {
|
||||||
|
files++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
List<DirListingMetadata> childMD = new ArrayList<>(childDirs.size());
|
||||||
|
for (DDBPathMetadata childDir : childDirs) {
|
||||||
|
childMD.add(getStore().listChildren(
|
||||||
|
childDir.getFileStatus().getPath()));
|
||||||
|
}
|
||||||
|
pushAll(queue, childMD);
|
||||||
|
}
|
||||||
|
|
||||||
|
return Pair.of(dirs, files);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Dump a single entry, and log it.
|
||||||
|
* @param csv CSV output file.
|
||||||
|
* @param md metadata to log.
|
||||||
|
*/
|
||||||
|
private void dumpEntry(CsvFile csv, DDBPathMetadata md) {
|
||||||
|
LOG.debug("{}", md.prettyPrint());
|
||||||
|
csv.entry(md);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scan the metastore for all entries and dump them.
|
||||||
|
* There's no attempt to sort the output.
|
||||||
|
* @param csv file
|
||||||
|
* @return tuple of (live entries, tombstones).
|
||||||
|
*/
|
||||||
|
private Pair<Long, Long> scanMetastore(CsvFile csv) {
|
||||||
|
S3GuardTableAccess tableAccess = new S3GuardTableAccess(getStore());
|
||||||
|
ExpressionSpecBuilder builder = new ExpressionSpecBuilder();
|
||||||
|
Iterable<DDBPathMetadata> results = tableAccess.scanMetadata(
|
||||||
|
builder);
|
||||||
|
long live = 0;
|
||||||
|
long tombstone = 0;
|
||||||
|
for (DDBPathMetadata md : results) {
|
||||||
|
if (!(md instanceof S3GuardTableAccess.VersionMarker)) {
|
||||||
|
// print it
|
||||||
|
csv.entry(md);
|
||||||
|
if (md.isDeleted()) {
|
||||||
|
tombstone++;
|
||||||
|
} else {
|
||||||
|
live++;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Pair.of(live, tombstone);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Pair<Long, Long> getScanEntryResult() {
|
||||||
|
return scanEntryResult;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Pair<Long, Long> getSecondScanResult() {
|
||||||
|
return secondScanResult;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getRawObjectStoreCount() {
|
||||||
|
return rawObjectStoreCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getListStatusCount() {
|
||||||
|
return listStatusCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getTreewalkCount() {
|
||||||
|
return treewalkCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert a timestamp in milliseconds to a human string.
|
||||||
|
* @param millis epoch time in millis
|
||||||
|
* @return a string for the CSV file.
|
||||||
|
*/
|
||||||
|
private static String stringify(long millis) {
|
||||||
|
return new Date(millis).toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is the JVM entry point for the service launcher.
|
||||||
|
*
|
||||||
|
* Converts the arguments to a list, then invokes
|
||||||
|
* {@link #serviceMain(List, AbstractS3GuardDynamoDBDiagnostic)}.
|
||||||
|
* @param args command line arguments.
|
||||||
|
*/
|
||||||
|
public static void main(String[] args) {
|
||||||
|
try {
|
||||||
|
serviceMain(Arrays.asList(args), new DumpS3GuardDynamoTable());
|
||||||
|
} catch (ExitUtil.ExitException e) {
|
||||||
|
ExitUtil.terminate(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The real main function, which takes the arguments as a list.
|
||||||
|
* Argument 0 MUST be the service classname
|
||||||
|
* @param argsList the list of arguments
|
||||||
|
* @param service service to launch.
|
||||||
|
*/
|
||||||
|
static void serviceMain(
|
||||||
|
final List<String> argsList,
|
||||||
|
final AbstractS3GuardDynamoDBDiagnostic service) {
|
||||||
|
ServiceLauncher<Service> serviceLauncher =
|
||||||
|
new ServiceLauncher<>(service.getName());
|
||||||
|
|
||||||
|
ExitUtil.ExitException ex = serviceLauncher.launchService(
|
||||||
|
new Configuration(),
|
||||||
|
service,
|
||||||
|
argsList,
|
||||||
|
false,
|
||||||
|
true);
|
||||||
|
if (ex != null) {
|
||||||
|
throw ex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Entry point to dump the metastore and s3 store world views
|
||||||
|
* <p>
|
||||||
|
* Both the FS and the store will be dumped: the store is scanned
|
||||||
|
* before and after the sequence to show what changes were made to
|
||||||
|
* the store during the list operation.
|
||||||
|
* @param fs fs to dump. If null a store must be provided.
|
||||||
|
* @param store store to dump (fallback to FS)
|
||||||
|
* @param conf configuration to use (fallback to fs)
|
||||||
|
* @param destFile base name of the output files.
|
||||||
|
* @param uri URI of store -only needed if FS is null.
|
||||||
|
* @throws ExitUtil.ExitException failure.
|
||||||
|
* @return the store
|
||||||
|
*/
|
||||||
|
public static DumpS3GuardDynamoTable dumpStore(
|
||||||
|
@Nullable final S3AFileSystem fs,
|
||||||
|
@Nullable DynamoDBMetadataStore store,
|
||||||
|
@Nullable Configuration conf,
|
||||||
|
final File destFile,
|
||||||
|
@Nullable URI uri) throws ExitUtil.ExitException {
|
||||||
|
ServiceLauncher<Service> serviceLauncher =
|
||||||
|
new ServiceLauncher<>(NAME);
|
||||||
|
|
||||||
|
if (conf == null) {
|
||||||
|
conf = checkNotNull(fs, "No filesystem").getConf();
|
||||||
|
}
|
||||||
|
if (store == null) {
|
||||||
|
store = (DynamoDBMetadataStore) checkNotNull(fs, "No filesystem")
|
||||||
|
.getMetadataStore();
|
||||||
|
}
|
||||||
|
DumpS3GuardDynamoTable dump = new DumpS3GuardDynamoTable(fs,
|
||||||
|
store,
|
||||||
|
destFile,
|
||||||
|
uri);
|
||||||
|
ExitUtil.ExitException ex = serviceLauncher.launchService(
|
||||||
|
conf,
|
||||||
|
dump,
|
||||||
|
Collections.emptyList(),
|
||||||
|
false,
|
||||||
|
true);
|
||||||
|
if (ex != null && ex.getExitCode() != 0) {
|
||||||
|
throw ex;
|
||||||
|
}
|
||||||
|
LOG.info("Results:");
|
||||||
|
Pair<Long, Long> r = dump.getScanEntryResult();
|
||||||
|
LOG.info("Metastore entries: {}", r);
|
||||||
|
LOG.info("Metastore scan total {}, entries {}, tombstones {}",
|
||||||
|
r.getLeft() + r.getRight(),
|
||||||
|
r.getLeft(),
|
||||||
|
r.getRight());
|
||||||
|
LOG.info("S3 count {}", dump.getRawObjectStoreCount());
|
||||||
|
LOG.info("Treewalk Count {}", dump.getTreewalkCount());
|
||||||
|
LOG.info("List Status Count {}", dump.getListStatusCount());
|
||||||
|
r = dump.getSecondScanResult();
|
||||||
|
if (r != null) {
|
||||||
|
LOG.info("Second metastore scan total {}, entries {}, tombstones {}",
|
||||||
|
r.getLeft() + r.getRight(),
|
||||||
|
r.getLeft(),
|
||||||
|
r.getRight());
|
||||||
|
}
|
||||||
|
return dump;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writer for generating test CSV files.
|
||||||
|
*
|
||||||
|
* Quotes are manged by passing in a long whose specific bits control
|
||||||
|
* whether or not a row is quoted, bit 0 for column 0, etc.
|
||||||
|
*
|
||||||
|
* There is no escaping of values here.
|
||||||
|
*/
|
||||||
|
private static final class CsvFile implements Closeable {
|
||||||
|
|
||||||
|
|
||||||
|
/** constant to quote all columns. */
|
||||||
|
public static final long ALL_QUOTES = 0x7fffffff;
|
||||||
|
|
||||||
|
/** least significant bit is used for first column; 1 mean 'quote'. */
|
||||||
|
public static final int ROW_QUOTE_MAP = 0b1110_1001_1111;
|
||||||
|
|
||||||
|
/** quote nothing: {@value}. */
|
||||||
|
public static final long NO_QUOTES = 0;
|
||||||
|
|
||||||
|
private final Path path;
|
||||||
|
|
||||||
|
private final PrintWriter out;
|
||||||
|
|
||||||
|
private final String separator;
|
||||||
|
|
||||||
|
private final String eol;
|
||||||
|
|
||||||
|
private final String quote;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create.
|
||||||
|
* @param path filesystem path.
|
||||||
|
* @param out output write.
|
||||||
|
* @param separator separator of entries.
|
||||||
|
* @param eol EOL marker.
|
||||||
|
* @param quote quote marker.
|
||||||
|
* @throws IOException failure.
|
||||||
|
*/
|
||||||
|
private CsvFile(
|
||||||
|
final Path path,
|
||||||
|
final PrintWriter out,
|
||||||
|
final String separator,
|
||||||
|
final String eol,
|
||||||
|
final String quote) throws IOException {
|
||||||
|
this.separator = checkNotNull(separator);
|
||||||
|
this.eol = checkNotNull(eol);
|
||||||
|
this.quote = checkNotNull(quote);
|
||||||
|
this.path = path;
|
||||||
|
this.out = checkNotNull(out);
|
||||||
|
header();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create to a file, with UTF-8 output and the standard
|
||||||
|
* options of the TSV file.
|
||||||
|
* @param file destination file.
|
||||||
|
* @throws IOException failure.
|
||||||
|
*/
|
||||||
|
private CsvFile(File file) throws IOException {
|
||||||
|
this(null,
|
||||||
|
new PrintWriter(file, "UTF-8"), "\t", "\n", "\"");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Close the file, if not already done.
|
||||||
|
* @throws IOException on a failure.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public synchronized void close() throws IOException {
|
||||||
|
if (out != null) {
|
||||||
|
out.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public Path getPath() {
|
||||||
|
return path;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getSeparator() {
|
||||||
|
return separator;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getEol() {
|
||||||
|
return eol;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write a row.
|
||||||
|
* Entries are quoted if the bit for that column is true.
|
||||||
|
* @param quotes quote policy: every bit defines the rule for that element
|
||||||
|
* @param columns columns to write
|
||||||
|
* @return self for ease of chaining.
|
||||||
|
*/
|
||||||
|
public CsvFile row(long quotes, Object... columns) {
|
||||||
|
checkNotNull(out);
|
||||||
|
for (int i = 0; i < columns.length; i++) {
|
||||||
|
if (i != 0) {
|
||||||
|
out.write(separator);
|
||||||
|
}
|
||||||
|
boolean toQuote = (quotes & 1) == 1;
|
||||||
|
// unsigned right shift to make next column flag @ position 0
|
||||||
|
quotes = quotes >>> 1;
|
||||||
|
if (toQuote) {
|
||||||
|
out.write(quote);
|
||||||
|
}
|
||||||
|
Object column = columns[i];
|
||||||
|
out.write(column != null ? column.toString() : "");
|
||||||
|
if (toQuote) {
|
||||||
|
out.write(quote);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out.write(eol);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write a line.
|
||||||
|
* @param line line to print
|
||||||
|
* @return self for ease of chaining.
|
||||||
|
*/
|
||||||
|
public CsvFile line(String line) {
|
||||||
|
out.write(line);
|
||||||
|
out.write(eol);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the output stream.
|
||||||
|
* @return the stream.
|
||||||
|
*/
|
||||||
|
public PrintWriter getOut() {
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Print the header.
|
||||||
|
*/
|
||||||
|
void header() {
|
||||||
|
row(CsvFile.ALL_QUOTES,
|
||||||
|
"type",
|
||||||
|
"deleted",
|
||||||
|
"path",
|
||||||
|
"is_auth_dir",
|
||||||
|
"is_empty_dir",
|
||||||
|
"len",
|
||||||
|
"updated",
|
||||||
|
"updated_s",
|
||||||
|
"last_modified",
|
||||||
|
"last_modified_s",
|
||||||
|
"etag",
|
||||||
|
"version");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add a metadata entry.
|
||||||
|
* @param md metadata.
|
||||||
|
*/
|
||||||
|
void entry(DDBPathMetadata md) {
|
||||||
|
S3AFileStatus fileStatus = md.getFileStatus();
|
||||||
|
row(ROW_QUOTE_MAP,
|
||||||
|
fileStatus.isDirectory() ? "dir" : "file",
|
||||||
|
md.isDeleted(),
|
||||||
|
fileStatus.getPath().toString(),
|
||||||
|
md.isAuthoritativeDir(),
|
||||||
|
md.isEmptyDirectory().name(),
|
||||||
|
fileStatus.getLen(),
|
||||||
|
md.getLastUpdated(),
|
||||||
|
stringify(md.getLastUpdated()),
|
||||||
|
fileStatus.getModificationTime(),
|
||||||
|
stringify(fileStatus.getModificationTime()),
|
||||||
|
fileStatus.getETag(),
|
||||||
|
fileStatus.getVersionId());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* filesystem entry: no metadata.
|
||||||
|
* @param fileStatus file status
|
||||||
|
*/
|
||||||
|
void entry(S3AFileStatus fileStatus) {
|
||||||
|
row(ROW_QUOTE_MAP,
|
||||||
|
fileStatus.isDirectory() ? "dir" : "file",
|
||||||
|
"false",
|
||||||
|
fileStatus.getPath().toString(),
|
||||||
|
"",
|
||||||
|
fileStatus.isEmptyDirectory().name(),
|
||||||
|
fileStatus.getLen(),
|
||||||
|
"",
|
||||||
|
"",
|
||||||
|
fileStatus.getModificationTime(),
|
||||||
|
stringify(fileStatus.getModificationTime()),
|
||||||
|
fileStatus.getETag(),
|
||||||
|
fileStatus.getVersionId());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -107,6 +107,7 @@ import org.apache.hadoop.io.retry.RetryPolicies;
|
||||||
import org.apache.hadoop.io.retry.RetryPolicy;
|
import org.apache.hadoop.io.retry.RetryPolicy;
|
||||||
import org.apache.hadoop.security.UserGroupInformation;
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
import org.apache.hadoop.util.BlockingThreadPoolExecutorService;
|
import org.apache.hadoop.util.BlockingThreadPoolExecutorService;
|
||||||
|
import org.apache.hadoop.util.DurationInfo;
|
||||||
import org.apache.hadoop.util.ReflectionUtils;
|
import org.apache.hadoop.util.ReflectionUtils;
|
||||||
|
|
||||||
import static org.apache.hadoop.fs.s3a.Constants.*;
|
import static org.apache.hadoop.fs.s3a.Constants.*;
|
||||||
|
@ -217,6 +218,19 @@ public class DynamoDBMetadataStore implements MetadataStore,
|
||||||
public static final Logger LOG = LoggerFactory.getLogger(
|
public static final Logger LOG = LoggerFactory.getLogger(
|
||||||
DynamoDBMetadataStore.class);
|
DynamoDBMetadataStore.class);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Name of the operations log.
|
||||||
|
*/
|
||||||
|
public static final String OPERATIONS_LOG_NAME =
|
||||||
|
"org.apache.hadoop.fs.s3a.s3guard.Operations";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A log of all state changing operations to the store;
|
||||||
|
* only updated at debug level.
|
||||||
|
*/
|
||||||
|
public static final Logger OPERATIONS_LOG = LoggerFactory.getLogger(
|
||||||
|
OPERATIONS_LOG_NAME);
|
||||||
|
|
||||||
/** parent/child name to use in the version marker. */
|
/** parent/child name to use in the version marker. */
|
||||||
public static final String VERSION_MARKER = "../VERSION";
|
public static final String VERSION_MARKER = "../VERSION";
|
||||||
|
|
||||||
|
@ -528,14 +542,14 @@ public class DynamoDBMetadataStore implements MetadataStore,
|
||||||
@Retries.RetryTranslated
|
@Retries.RetryTranslated
|
||||||
public void delete(Path path, ITtlTimeProvider ttlTimeProvider)
|
public void delete(Path path, ITtlTimeProvider ttlTimeProvider)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
innerDelete(path, true, ttlTimeProvider);
|
innerDelete(path, true, ttlTimeProvider, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@Retries.RetryTranslated
|
@Retries.RetryTranslated
|
||||||
public void forgetMetadata(Path path) throws IOException {
|
public void forgetMetadata(Path path) throws IOException {
|
||||||
LOG.debug("Forget metadata for {}", path);
|
LOG.debug("Forget metadata for {}", path);
|
||||||
innerDelete(path, false, null);
|
innerDelete(path, false, null, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -546,11 +560,14 @@ public class DynamoDBMetadataStore implements MetadataStore,
|
||||||
* @param tombstone flag to create a tombstone marker
|
* @param tombstone flag to create a tombstone marker
|
||||||
* @param ttlTimeProvider The time provider to set last_updated. Must not
|
* @param ttlTimeProvider The time provider to set last_updated. Must not
|
||||||
* be null if tombstone is true.
|
* be null if tombstone is true.
|
||||||
|
* @param ancestorState ancestor state for logging
|
||||||
* @throws IOException I/O error.
|
* @throws IOException I/O error.
|
||||||
*/
|
*/
|
||||||
@Retries.RetryTranslated
|
@Retries.RetryTranslated
|
||||||
private void innerDelete(final Path path, boolean tombstone,
|
private void innerDelete(final Path path,
|
||||||
ITtlTimeProvider ttlTimeProvider)
|
final boolean tombstone,
|
||||||
|
final ITtlTimeProvider ttlTimeProvider,
|
||||||
|
final AncestorState ancestorState)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
checkPath(path);
|
checkPath(path);
|
||||||
LOG.debug("Deleting from table {} in region {}: {}",
|
LOG.debug("Deleting from table {} in region {}: {}",
|
||||||
|
@ -577,7 +594,7 @@ public class DynamoDBMetadataStore implements MetadataStore,
|
||||||
path.toString(),
|
path.toString(),
|
||||||
idempotent,
|
idempotent,
|
||||||
() -> {
|
() -> {
|
||||||
LOG.debug("Adding tombstone to {}", path);
|
logPut(ancestorState, item);
|
||||||
recordsWritten(1);
|
recordsWritten(1);
|
||||||
table.putItem(item);
|
table.putItem(item);
|
||||||
});
|
});
|
||||||
|
@ -589,7 +606,7 @@ public class DynamoDBMetadataStore implements MetadataStore,
|
||||||
idempotent,
|
idempotent,
|
||||||
() -> {
|
() -> {
|
||||||
// record the attempt so even on retry the counter goes up.
|
// record the attempt so even on retry the counter goes up.
|
||||||
LOG.debug("Delete key {}", path);
|
logDelete(ancestorState, key);
|
||||||
recordsDeleted(1);
|
recordsDeleted(1);
|
||||||
table.deleteItem(key);
|
table.deleteItem(key);
|
||||||
});
|
});
|
||||||
|
@ -605,28 +622,35 @@ public class DynamoDBMetadataStore implements MetadataStore,
|
||||||
tableName, region, path);
|
tableName, region, path);
|
||||||
|
|
||||||
final PathMetadata meta = get(path);
|
final PathMetadata meta = get(path);
|
||||||
if (meta == null || meta.isDeleted()) {
|
if (meta == null) {
|
||||||
LOG.debug("Subtree path {} does not exist; this will be a no-op", path);
|
LOG.debug("Subtree path {} does not exist; this will be a no-op", path);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (meta.isDeleted()) {
|
||||||
// Execute via the bounded threadpool.
|
LOG.debug("Subtree path {} is deleted; this will be a no-op", path);
|
||||||
final List<CompletableFuture<Void>> futures = new ArrayList<>();
|
return;
|
||||||
for (DescendantsIterator desc = new DescendantsIterator(this, meta);
|
}
|
||||||
desc.hasNext();) {
|
|
||||||
final Path pathToDelete = desc.next().getPath();
|
try(AncestorState state = new AncestorState(this,
|
||||||
futures.add(submit(executor, () -> {
|
BulkOperationState.OperationType.Delete, path)) {
|
||||||
innerDelete(pathToDelete, true, ttlTimeProvider);
|
// Execute via the bounded threadpool.
|
||||||
return null;
|
final List<CompletableFuture<Void>> futures = new ArrayList<>();
|
||||||
}));
|
for (DescendantsIterator desc = new DescendantsIterator(this, meta);
|
||||||
if (futures.size() > S3GUARD_DDB_SUBMITTED_TASK_LIMIT) {
|
desc.hasNext();) {
|
||||||
// first batch done; block for completion.
|
final Path pathToDelete = desc.next().getPath();
|
||||||
waitForCompletion(futures);
|
futures.add(submit(executor, () -> {
|
||||||
futures.clear();
|
innerDelete(pathToDelete, true, ttlTimeProvider, state);
|
||||||
}
|
return null;
|
||||||
|
}));
|
||||||
|
if (futures.size() > S3GUARD_DDB_SUBMITTED_TASK_LIMIT) {
|
||||||
|
// first batch done; block for completion.
|
||||||
|
waitForCompletion(futures);
|
||||||
|
futures.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// now wait for the final set.
|
||||||
|
waitForCompletion(futures);
|
||||||
}
|
}
|
||||||
// now wait for the final set.
|
|
||||||
waitForCompletion(futures);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -806,7 +830,8 @@ public class DynamoDBMetadataStore implements MetadataStore,
|
||||||
final Collection<DDBPathMetadata> pathsToCreate,
|
final Collection<DDBPathMetadata> pathsToCreate,
|
||||||
final AncestorState ancestorState,
|
final AncestorState ancestorState,
|
||||||
final ITtlTimeProvider ttlTimeProvider) throws PathIOException {
|
final ITtlTimeProvider ttlTimeProvider) throws PathIOException {
|
||||||
List<DDBPathMetadata> ancestorsToAdd = new ArrayList<>(0);
|
// Key on path to allow fast lookup
|
||||||
|
Map<Path, DDBPathMetadata> ancestry = new HashMap<>();
|
||||||
LOG.debug("Completing ancestry for {} paths", pathsToCreate.size());
|
LOG.debug("Completing ancestry for {} paths", pathsToCreate.size());
|
||||||
// we sort the inputs to guarantee that the topmost entries come first.
|
// we sort the inputs to guarantee that the topmost entries come first.
|
||||||
// that way if the put request contains both parents and children
|
// that way if the put request contains both parents and children
|
||||||
|
@ -832,7 +857,7 @@ public class DynamoDBMetadataStore implements MetadataStore,
|
||||||
if (!oldEntry.getFileStatus().isDirectory()
|
if (!oldEntry.getFileStatus().isDirectory()
|
||||||
|| !entry.getFileStatus().isDirectory()) {
|
|| !entry.getFileStatus().isDirectory()) {
|
||||||
// check for and warn if the existing bulk operation overwrote it.
|
// check for and warn if the existing bulk operation overwrote it.
|
||||||
// this should never occur outside tests explicitly crating it
|
// this should never occur outside tests explicitly creating it
|
||||||
LOG.warn("Overwriting a S3Guard file created in the operation: {}",
|
LOG.warn("Overwriting a S3Guard file created in the operation: {}",
|
||||||
oldEntry);
|
oldEntry);
|
||||||
LOG.warn("With new entry: {}", entry);
|
LOG.warn("With new entry: {}", entry);
|
||||||
|
@ -846,9 +871,9 @@ public class DynamoDBMetadataStore implements MetadataStore,
|
||||||
path, entry);
|
path, entry);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ancestorsToAdd.add(entry);
|
ancestry.put(path, entry);
|
||||||
Path parent = path.getParent();
|
Path parent = path.getParent();
|
||||||
while (!parent.isRoot()) {
|
while (!parent.isRoot() && !ancestry.containsKey(parent)) {
|
||||||
if (!ancestorState.findEntry(parent, true)) {
|
if (!ancestorState.findEntry(parent, true)) {
|
||||||
// don't add this entry, but carry on with the parents
|
// don't add this entry, but carry on with the parents
|
||||||
LOG.debug("auto-create ancestor path {} for child path {}",
|
LOG.debug("auto-create ancestor path {} for child path {}",
|
||||||
|
@ -857,12 +882,12 @@ public class DynamoDBMetadataStore implements MetadataStore,
|
||||||
DDBPathMetadata md = new DDBPathMetadata(status, Tristate.FALSE,
|
DDBPathMetadata md = new DDBPathMetadata(status, Tristate.FALSE,
|
||||||
false, false, ttlTimeProvider.getNow());
|
false, false, ttlTimeProvider.getNow());
|
||||||
ancestorState.put(parent, md);
|
ancestorState.put(parent, md);
|
||||||
ancestorsToAdd.add(md);
|
ancestry.put(parent, md);
|
||||||
}
|
}
|
||||||
parent = parent.getParent();
|
parent = parent.getParent();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return ancestorsToAdd;
|
return ancestry.values();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -936,7 +961,7 @@ public class DynamoDBMetadataStore implements MetadataStore,
|
||||||
entryFound = true;
|
entryFound = true;
|
||||||
if (directory.getFileStatus().isFile()) {
|
if (directory.getFileStatus().isFile()) {
|
||||||
throw new PathIOException(parent.toString(),
|
throw new PathIOException(parent.toString(),
|
||||||
"Cannot overwrite parent file: metadatstore is"
|
"Cannot overwrite parent file: metastore is"
|
||||||
+ " in an inconsistent state");
|
+ " in an inconsistent state");
|
||||||
}
|
}
|
||||||
// the directory exists. Add it to the ancestor state for next time.
|
// the directory exists. Add it to the ancestor state for next time.
|
||||||
|
@ -1029,7 +1054,8 @@ public class DynamoDBMetadataStore implements MetadataStore,
|
||||||
newItems.addAll(tombstones);
|
newItems.addAll(tombstones);
|
||||||
}
|
}
|
||||||
|
|
||||||
processBatchWriteRequest(null, pathMetadataToItem(newItems));
|
processBatchWriteRequest(ancestorState,
|
||||||
|
null, pathMetadataToItem(newItems));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1039,13 +1065,17 @@ public class DynamoDBMetadataStore implements MetadataStore,
|
||||||
* <li>No attempt is made to sort the input: the caller must do that</li>
|
* <li>No attempt is made to sort the input: the caller must do that</li>
|
||||||
* </ol>
|
* </ol>
|
||||||
* As well as retrying on the operation invocation, incomplete
|
* As well as retrying on the operation invocation, incomplete
|
||||||
* batches are retried until all have been processed..
|
* batches are retried until all have been processed.
|
||||||
|
*
|
||||||
|
* @param ancestorState ancestor state for logging
|
||||||
* @param keysToDelete primary keys to be deleted; can be null
|
* @param keysToDelete primary keys to be deleted; can be null
|
||||||
* @param itemsToPut new items to be put; can be null
|
* @param itemsToPut new items to be put; can be null
|
||||||
* @return the number of iterations needed to complete the call.
|
* @return the number of iterations needed to complete the call.
|
||||||
*/
|
*/
|
||||||
@Retries.RetryTranslated("Outstanding batch items are updated with backoff")
|
@Retries.RetryTranslated("Outstanding batch items are updated with backoff")
|
||||||
private int processBatchWriteRequest(PrimaryKey[] keysToDelete,
|
private int processBatchWriteRequest(
|
||||||
|
@Nullable AncestorState ancestorState,
|
||||||
|
PrimaryKey[] keysToDelete,
|
||||||
Item[] itemsToPut) throws IOException {
|
Item[] itemsToPut) throws IOException {
|
||||||
final int totalToDelete = (keysToDelete == null ? 0 : keysToDelete.length);
|
final int totalToDelete = (keysToDelete == null ? 0 : keysToDelete.length);
|
||||||
final int totalToPut = (itemsToPut == null ? 0 : itemsToPut.length);
|
final int totalToPut = (itemsToPut == null ? 0 : itemsToPut.length);
|
||||||
|
@ -1062,8 +1092,10 @@ public class DynamoDBMetadataStore implements MetadataStore,
|
||||||
&& count < totalToDelete) {
|
&& count < totalToDelete) {
|
||||||
numToDelete = Math.min(S3GUARD_DDB_BATCH_WRITE_REQUEST_LIMIT,
|
numToDelete = Math.min(S3GUARD_DDB_BATCH_WRITE_REQUEST_LIMIT,
|
||||||
totalToDelete - count);
|
totalToDelete - count);
|
||||||
writeItems.withPrimaryKeysToDelete(
|
PrimaryKey[] toDelete = Arrays.copyOfRange(keysToDelete,
|
||||||
Arrays.copyOfRange(keysToDelete, count, count + numToDelete));
|
count, count + numToDelete);
|
||||||
|
LOG.debug("Deleting {} entries: {}", toDelete.length, toDelete);
|
||||||
|
writeItems.withPrimaryKeysToDelete(toDelete);
|
||||||
count += numToDelete;
|
count += numToDelete;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1106,9 +1138,12 @@ public class DynamoDBMetadataStore implements MetadataStore,
|
||||||
}
|
}
|
||||||
if (itemsToPut != null) {
|
if (itemsToPut != null) {
|
||||||
recordsWritten(itemsToPut.length);
|
recordsWritten(itemsToPut.length);
|
||||||
|
logPut(ancestorState, itemsToPut);
|
||||||
}
|
}
|
||||||
if (keysToDelete != null) {
|
if (keysToDelete != null) {
|
||||||
recordsDeleted(keysToDelete.length);
|
recordsDeleted(keysToDelete.length);
|
||||||
|
logDelete(ancestorState, keysToDelete);
|
||||||
|
|
||||||
}
|
}
|
||||||
return batches;
|
return batches;
|
||||||
}
|
}
|
||||||
|
@ -1227,7 +1262,7 @@ public class DynamoDBMetadataStore implements MetadataStore,
|
||||||
}
|
}
|
||||||
LOG.debug("Saving batch of {} items to table {}, region {}", items.length,
|
LOG.debug("Saving batch of {} items to table {}, region {}", items.length,
|
||||||
tableName, region);
|
tableName, region);
|
||||||
processBatchWriteRequest(null, items);
|
processBatchWriteRequest(ancestorState, null, items);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1290,7 +1325,7 @@ public class DynamoDBMetadataStore implements MetadataStore,
|
||||||
* @return true iff the item isn't null and, if there is an is_deleted
|
* @return true iff the item isn't null and, if there is an is_deleted
|
||||||
* column, that its value is false.
|
* column, that its value is false.
|
||||||
*/
|
*/
|
||||||
private boolean itemExists(Item item) {
|
private static boolean itemExists(Item item) {
|
||||||
if (item == null) {
|
if (item == null) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -1309,7 +1344,8 @@ public class DynamoDBMetadataStore implements MetadataStore,
|
||||||
/**
|
/**
|
||||||
* {@inheritDoc}.
|
* {@inheritDoc}.
|
||||||
* There is retry around building the list of paths to update, but
|
* There is retry around building the list of paths to update, but
|
||||||
* the call to {@link #processBatchWriteRequest(PrimaryKey[], Item[])}
|
* the call to
|
||||||
|
* {@link #processBatchWriteRequest(DynamoDBMetadataStore.AncestorState, PrimaryKey[], Item[])}
|
||||||
* is only tried once.
|
* is only tried once.
|
||||||
* @param meta Directory listing metadata.
|
* @param meta Directory listing metadata.
|
||||||
* @param operationState operational state for a bulk update
|
* @param operationState operational state for a bulk update
|
||||||
|
@ -1320,15 +1356,17 @@ public class DynamoDBMetadataStore implements MetadataStore,
|
||||||
public void put(
|
public void put(
|
||||||
final DirListingMetadata meta,
|
final DirListingMetadata meta,
|
||||||
@Nullable final BulkOperationState operationState) throws IOException {
|
@Nullable final BulkOperationState operationState) throws IOException {
|
||||||
LOG.debug("Saving to table {} in region {}: {}", tableName, region, meta);
|
LOG.debug("Saving {} dir meta for {} to table {} in region {}: {}",
|
||||||
|
tableName,
|
||||||
|
meta.isAuthoritative() ? "auth" : "nonauth",
|
||||||
|
meta.getPath(),
|
||||||
|
tableName, region, meta);
|
||||||
// directory path
|
// directory path
|
||||||
Path path = meta.getPath();
|
Path path = meta.getPath();
|
||||||
DDBPathMetadata ddbPathMeta =
|
DDBPathMetadata ddbPathMeta =
|
||||||
new DDBPathMetadata(makeDirStatus(path, username), meta.isEmpty(),
|
new DDBPathMetadata(makeDirStatus(path, username), meta.isEmpty(),
|
||||||
false, meta.isAuthoritative(), meta.getLastUpdated());
|
false, meta.isAuthoritative(), meta.getLastUpdated());
|
||||||
// put all its ancestors if not present; as an optimization we return at its
|
// put all its ancestors if not present
|
||||||
// first existent ancestor
|
|
||||||
final AncestorState ancestorState = extractOrCreate(operationState,
|
final AncestorState ancestorState = extractOrCreate(operationState,
|
||||||
BulkOperationState.OperationType.Put);
|
BulkOperationState.OperationType.Put);
|
||||||
// First add any missing ancestors...
|
// First add any missing ancestors...
|
||||||
|
@ -1341,7 +1379,9 @@ public class DynamoDBMetadataStore implements MetadataStore,
|
||||||
// sort so highest-level entries are written to the store first.
|
// sort so highest-level entries are written to the store first.
|
||||||
// if a sequence fails, no orphan entries will have been written.
|
// if a sequence fails, no orphan entries will have been written.
|
||||||
metasToPut.sort(PathOrderComparators.TOPMOST_PM_FIRST);
|
metasToPut.sort(PathOrderComparators.TOPMOST_PM_FIRST);
|
||||||
processBatchWriteRequest(null, pathMetadataToItem(metasToPut));
|
processBatchWriteRequest(ancestorState,
|
||||||
|
null,
|
||||||
|
pathMetadataToItem(metasToPut));
|
||||||
// and add the ancestors
|
// and add the ancestors
|
||||||
synchronized (ancestorState) {
|
synchronized (ancestorState) {
|
||||||
metasToPut.forEach(ancestorState::put);
|
metasToPut.forEach(ancestorState::put);
|
||||||
|
@ -1455,7 +1495,10 @@ public class DynamoDBMetadataStore implements MetadataStore,
|
||||||
@Retries.RetryTranslated
|
@Retries.RetryTranslated
|
||||||
public void prune(PruneMode pruneMode, long cutoff, String keyPrefix)
|
public void prune(PruneMode pruneMode, long cutoff, String keyPrefix)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
LOG.debug("Prune files under {} with age {}", keyPrefix, cutoff);
|
LOG.debug("Prune {} under {} with age {}",
|
||||||
|
pruneMode == PruneMode.ALL_BY_MODTIME
|
||||||
|
? "files and tombstones" : "tombstones",
|
||||||
|
keyPrefix, cutoff);
|
||||||
final ItemCollection<ScanOutcome> items =
|
final ItemCollection<ScanOutcome> items =
|
||||||
expiredFiles(pruneMode, cutoff, keyPrefix);
|
expiredFiles(pruneMode, cutoff, keyPrefix);
|
||||||
innerPrune(keyPrefix, items);
|
innerPrune(keyPrefix, items);
|
||||||
|
@ -1465,7 +1508,9 @@ public class DynamoDBMetadataStore implements MetadataStore,
|
||||||
throws IOException {
|
throws IOException {
|
||||||
int itemCount = 0;
|
int itemCount = 0;
|
||||||
try (AncestorState state = initiateBulkWrite(
|
try (AncestorState state = initiateBulkWrite(
|
||||||
BulkOperationState.OperationType.Prune, null)) {
|
BulkOperationState.OperationType.Prune, null);
|
||||||
|
DurationInfo ignored =
|
||||||
|
new DurationInfo(LOG, "Pruning DynamoDB Store")) {
|
||||||
ArrayList<Path> deletionBatch =
|
ArrayList<Path> deletionBatch =
|
||||||
new ArrayList<>(S3GUARD_DDB_BATCH_WRITE_REQUEST_LIMIT);
|
new ArrayList<>(S3GUARD_DDB_BATCH_WRITE_REQUEST_LIMIT);
|
||||||
long delay = conf.getTimeDuration(
|
long delay = conf.getTimeDuration(
|
||||||
|
@ -1478,12 +1523,19 @@ public class DynamoDBMetadataStore implements MetadataStore,
|
||||||
DDBPathMetadata md = PathMetadataDynamoDBTranslation
|
DDBPathMetadata md = PathMetadataDynamoDBTranslation
|
||||||
.itemToPathMetadata(item, username);
|
.itemToPathMetadata(item, username);
|
||||||
Path path = md.getFileStatus().getPath();
|
Path path = md.getFileStatus().getPath();
|
||||||
|
boolean tombstone = md.isDeleted();
|
||||||
|
LOG.debug("Prune entry {}", path);
|
||||||
deletionBatch.add(path);
|
deletionBatch.add(path);
|
||||||
|
|
||||||
// add parent path of what we remove if it has not
|
// add parent path of item so it can be marked as non-auth.
|
||||||
// already been processed
|
// this is only done if
|
||||||
|
// * it has not already been processed
|
||||||
|
// * the entry pruned is not a tombstone (no need to update)
|
||||||
|
// * the file is not in the root dir
|
||||||
Path parentPath = path.getParent();
|
Path parentPath = path.getParent();
|
||||||
if (parentPath != null && !clearedParentPathSet.contains(parentPath)) {
|
if (!tombstone
|
||||||
|
&& parentPath != null
|
||||||
|
&& !clearedParentPathSet.contains(parentPath)) {
|
||||||
parentPathSet.add(parentPath);
|
parentPathSet.add(parentPath);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1491,7 +1543,7 @@ public class DynamoDBMetadataStore implements MetadataStore,
|
||||||
if (deletionBatch.size() == S3GUARD_DDB_BATCH_WRITE_REQUEST_LIMIT) {
|
if (deletionBatch.size() == S3GUARD_DDB_BATCH_WRITE_REQUEST_LIMIT) {
|
||||||
// lowest path entries get deleted first.
|
// lowest path entries get deleted first.
|
||||||
deletionBatch.sort(PathOrderComparators.TOPMOST_PATH_LAST);
|
deletionBatch.sort(PathOrderComparators.TOPMOST_PATH_LAST);
|
||||||
processBatchWriteRequest(pathToKey(deletionBatch), null);
|
processBatchWriteRequest(state, pathToKey(deletionBatch), null);
|
||||||
|
|
||||||
// set authoritative false for each pruned dir listing
|
// set authoritative false for each pruned dir listing
|
||||||
removeAuthoritativeDirFlag(parentPathSet, state);
|
removeAuthoritativeDirFlag(parentPathSet, state);
|
||||||
|
@ -1507,7 +1559,7 @@ public class DynamoDBMetadataStore implements MetadataStore,
|
||||||
}
|
}
|
||||||
// final batch of deletes
|
// final batch of deletes
|
||||||
if (!deletionBatch.isEmpty()) {
|
if (!deletionBatch.isEmpty()) {
|
||||||
processBatchWriteRequest(pathToKey(deletionBatch), null);
|
processBatchWriteRequest(state, pathToKey(deletionBatch), null);
|
||||||
|
|
||||||
// set authoritative false for each pruned dir listing
|
// set authoritative false for each pruned dir listing
|
||||||
removeAuthoritativeDirFlag(parentPathSet, state);
|
removeAuthoritativeDirFlag(parentPathSet, state);
|
||||||
|
@ -1527,6 +1579,20 @@ public class DynamoDBMetadataStore implements MetadataStore,
|
||||||
/**
|
/**
|
||||||
* Remove the Authoritative Directory Marker from a set of paths, if
|
* Remove the Authoritative Directory Marker from a set of paths, if
|
||||||
* those paths are in the store.
|
* those paths are in the store.
|
||||||
|
* <p>
|
||||||
|
* This operation is <i>only</i>for pruning; it does not raise an error
|
||||||
|
* if, during the prune phase, the table appears inconsistent.
|
||||||
|
* This is not unusual as it can happen in a number of ways
|
||||||
|
* <ol>
|
||||||
|
* <li>The state of the table changes during a slow prune operation which
|
||||||
|
* deliberately inserts pauses to avoid overloading prepaid IO capacity.
|
||||||
|
* </li>
|
||||||
|
* <li>Tombstone markers have been left in the table after many other
|
||||||
|
* operations have taken place, including deleting/replacing
|
||||||
|
* parents.</li>
|
||||||
|
* </ol>
|
||||||
|
* <p>
|
||||||
|
*
|
||||||
* If an exception is raised in the get/update process, then the exception
|
* If an exception is raised in the get/update process, then the exception
|
||||||
* is caught and only rethrown after all the other paths are processed.
|
* is caught and only rethrown after all the other paths are processed.
|
||||||
* This is to ensure a best-effort attempt to update the store.
|
* This is to ensure a best-effort attempt to update the store.
|
||||||
|
@ -1548,10 +1614,22 @@ public class DynamoDBMetadataStore implements MetadataStore,
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
DDBPathMetadata ddbPathMetadata = get(path);
|
DDBPathMetadata ddbPathMetadata = get(path);
|
||||||
if(ddbPathMetadata == null) {
|
if (ddbPathMetadata == null) {
|
||||||
|
// there is no entry.
|
||||||
|
LOG.debug("No parent {}; skipping", path);
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
LOG.debug("Setting false isAuthoritativeDir on {}", ddbPathMetadata);
|
if (ddbPathMetadata.isDeleted()) {
|
||||||
|
// the parent itself is deleted
|
||||||
|
LOG.debug("Parent has been deleted {}; skipping", path);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
if (!ddbPathMetadata.getFileStatus().isDirectory()) {
|
||||||
|
// the parent itself is deleted
|
||||||
|
LOG.debug("Parent is not a directory {}; skipping", path);
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
LOG.debug("Setting isAuthoritativeDir==false on {}", ddbPathMetadata);
|
||||||
ddbPathMetadata.setAuthoritativeDir(false);
|
ddbPathMetadata.setAuthoritativeDir(false);
|
||||||
return ddbPathMetadata;
|
return ddbPathMetadata;
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
@ -2232,14 +2310,14 @@ public class DynamoDBMetadataStore implements MetadataStore,
|
||||||
final S3AFileStatus sourceStatus,
|
final S3AFileStatus sourceStatus,
|
||||||
final Path dest) {
|
final Path dest) {
|
||||||
return new ProgressiveRenameTracker(storeContext, this, source, dest,
|
return new ProgressiveRenameTracker(storeContext, this, source, dest,
|
||||||
new AncestorState(BulkOperationState.OperationType.Rename, dest));
|
new AncestorState(this, BulkOperationState.OperationType.Rename, dest));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public AncestorState initiateBulkWrite(
|
public AncestorState initiateBulkWrite(
|
||||||
final BulkOperationState.OperationType operation,
|
final BulkOperationState.OperationType operation,
|
||||||
final Path dest) {
|
final Path dest) {
|
||||||
return new AncestorState(operation, dest);
|
return new AncestorState(this, operation, dest);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -2253,6 +2331,14 @@ public class DynamoDBMetadataStore implements MetadataStore,
|
||||||
return ttlTimeProvider != null ? ttlTimeProvider : timeProvider;
|
return ttlTimeProvider != null ? ttlTimeProvider : timeProvider;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Username.
|
||||||
|
* @return the current username
|
||||||
|
*/
|
||||||
|
String getUsername() {
|
||||||
|
return username;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Take an {@code IllegalArgumentException} raised by a DDB operation
|
* Take an {@code IllegalArgumentException} raised by a DDB operation
|
||||||
* and if it contains an inner SDK exception, unwrap it.
|
* and if it contains an inner SDK exception, unwrap it.
|
||||||
|
@ -2295,19 +2381,84 @@ public class DynamoDBMetadataStore implements MetadataStore,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Log a PUT into the operations log at debug level.
|
||||||
|
* @param state optional ancestor state.
|
||||||
|
* @param items items which have been PUT
|
||||||
|
*/
|
||||||
|
private static void logPut(
|
||||||
|
@Nullable AncestorState state,
|
||||||
|
Item[] items) {
|
||||||
|
if (OPERATIONS_LOG.isDebugEnabled()) {
|
||||||
|
// log the operations
|
||||||
|
String stateStr = AncestorState.stateAsString(state);
|
||||||
|
for (Item item : items) {
|
||||||
|
boolean tombstone = itemExists(item);
|
||||||
|
OPERATIONS_LOG.debug("{} {} {}",
|
||||||
|
stateStr,
|
||||||
|
tombstone ? "TOMBSTONE" : "PUT",
|
||||||
|
itemPrimaryKeyToString(item));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Log a PUT into the operations log at debug level.
|
||||||
|
* @param state optional ancestor state.
|
||||||
|
* @param item item PUT.
|
||||||
|
*/
|
||||||
|
private static void logPut(
|
||||||
|
@Nullable AncestorState state,
|
||||||
|
Item item) {
|
||||||
|
if (OPERATIONS_LOG.isDebugEnabled()) {
|
||||||
|
// log the operations
|
||||||
|
logPut(state, new Item[]{item});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Log a DELETE into the operations log at debug level.
|
||||||
|
* @param state optional ancestor state.
|
||||||
|
* @param keysDeleted keys which were deleted.
|
||||||
|
*/
|
||||||
|
private static void logDelete(
|
||||||
|
@Nullable AncestorState state,
|
||||||
|
PrimaryKey[] keysDeleted) {
|
||||||
|
if (OPERATIONS_LOG.isDebugEnabled()) {
|
||||||
|
// log the operations
|
||||||
|
String stateStr = AncestorState.stateAsString(state);
|
||||||
|
for (PrimaryKey key : keysDeleted) {
|
||||||
|
OPERATIONS_LOG.debug("{} DELETE {}",
|
||||||
|
stateStr, primaryKeyToString(key));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Log a DELETE into the operations log at debug level.
|
||||||
|
* @param state optional ancestor state.
|
||||||
|
* @param key Deleted key
|
||||||
|
*/
|
||||||
|
private static void logDelete(
|
||||||
|
@Nullable AncestorState state,
|
||||||
|
PrimaryKey key) {
|
||||||
|
if (OPERATIONS_LOG.isDebugEnabled()) {
|
||||||
|
logDelete(state, new PrimaryKey[]{key});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the move state passed in; create a new one if needed.
|
* Get the move state passed in; create a new one if needed.
|
||||||
* @param state state.
|
* @param state state.
|
||||||
* @param operation the type of the operation to use if the state is created.
|
* @param operation the type of the operation to use if the state is created.
|
||||||
* @return the cast or created state.
|
* @return the cast or created state.
|
||||||
*/
|
*/
|
||||||
@VisibleForTesting
|
private AncestorState extractOrCreate(@Nullable BulkOperationState state,
|
||||||
static AncestorState extractOrCreate(@Nullable BulkOperationState state,
|
|
||||||
BulkOperationState.OperationType operation) {
|
BulkOperationState.OperationType operation) {
|
||||||
if (state != null) {
|
if (state != null) {
|
||||||
return (AncestorState) state;
|
return (AncestorState) state;
|
||||||
} else {
|
} else {
|
||||||
return new AncestorState(operation, null);
|
return new AncestorState(this, operation, null);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2320,18 +2471,42 @@ public class DynamoDBMetadataStore implements MetadataStore,
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
static final class AncestorState extends BulkOperationState {
|
static final class AncestorState extends BulkOperationState {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Counter of IDs issued.
|
||||||
|
*/
|
||||||
|
private static final AtomicLong ID_COUNTER = new AtomicLong(0);
|
||||||
|
|
||||||
|
/** Owning store. */
|
||||||
|
private final DynamoDBMetadataStore store;
|
||||||
|
|
||||||
|
/** The ID of the state; for logging. */
|
||||||
|
private final long id;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Map of ancestors.
|
||||||
|
*/
|
||||||
private final Map<Path, DDBPathMetadata> ancestry = new HashMap<>();
|
private final Map<Path, DDBPathMetadata> ancestry = new HashMap<>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Destination path.
|
||||||
|
*/
|
||||||
private final Path dest;
|
private final Path dest;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create the state.
|
* Create the state.
|
||||||
|
* @param store the store, for use in validation.
|
||||||
|
* If null: no validation (test only operation)
|
||||||
* @param operation the type of the operation.
|
* @param operation the type of the operation.
|
||||||
* @param dest destination path.
|
* @param dest destination path.
|
||||||
*/
|
*/
|
||||||
AncestorState(final OperationType operation, @Nullable final Path dest) {
|
AncestorState(
|
||||||
|
@Nullable final DynamoDBMetadataStore store,
|
||||||
|
final OperationType operation,
|
||||||
|
@Nullable final Path dest) {
|
||||||
super(operation);
|
super(operation);
|
||||||
|
this.store = store;
|
||||||
this.dest = dest;
|
this.dest = dest;
|
||||||
|
this.id = ID_COUNTER.addAndGet(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
int size() {
|
int size() {
|
||||||
|
@ -2342,11 +2517,16 @@ public class DynamoDBMetadataStore implements MetadataStore,
|
||||||
return dest;
|
return dest;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
long getId() {
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
final StringBuilder sb = new StringBuilder(
|
final StringBuilder sb = new StringBuilder(
|
||||||
"AncestorState{");
|
"AncestorState{");
|
||||||
sb.append("operation=").append(getOperation());
|
sb.append("operation=").append(getOperation());
|
||||||
|
sb.append("id=").append(id);
|
||||||
sb.append("; dest=").append(dest);
|
sb.append("; dest=").append(dest);
|
||||||
sb.append("; size=").append(size());
|
sb.append("; size=").append(size());
|
||||||
sb.append("; paths={")
|
sb.append("; paths={")
|
||||||
|
@ -2362,7 +2542,7 @@ public class DynamoDBMetadataStore implements MetadataStore,
|
||||||
* @return true if the state has an entry
|
* @return true if the state has an entry
|
||||||
*/
|
*/
|
||||||
boolean contains(Path p) {
|
boolean contains(Path p) {
|
||||||
return ancestry.containsKey(p);
|
return get(p) != null;
|
||||||
}
|
}
|
||||||
|
|
||||||
DDBPathMetadata put(Path p, DDBPathMetadata md) {
|
DDBPathMetadata put(Path p, DDBPathMetadata md) {
|
||||||
|
@ -2406,5 +2586,74 @@ public class DynamoDBMetadataStore implements MetadataStore,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If debug logging is enabled, this does an audit of the store state.
|
||||||
|
* it only logs this; the error messages are created so as they could
|
||||||
|
* be turned into exception messages.
|
||||||
|
* Audit failures aren't being turned into IOEs is that
|
||||||
|
* rename operations delete the source entry and that ends up in the
|
||||||
|
* ancestor state as present
|
||||||
|
* @throws IOException failure
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
if (LOG.isDebugEnabled() && store != null) {
|
||||||
|
LOG.debug("Auditing {}", stateAsString(this));
|
||||||
|
for (Map.Entry<Path, DDBPathMetadata> entry : ancestry
|
||||||
|
.entrySet()) {
|
||||||
|
Path path = entry.getKey();
|
||||||
|
DDBPathMetadata expected = entry.getValue();
|
||||||
|
if (expected.isDeleted()) {
|
||||||
|
// file was deleted in bulk op; we don't care about it
|
||||||
|
// any more
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
DDBPathMetadata actual;
|
||||||
|
try {
|
||||||
|
actual = store.get(path);
|
||||||
|
} catch (IOException e) {
|
||||||
|
LOG.debug("Retrieving {}", path, e);
|
||||||
|
// this is for debug; don't be ambitious
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (actual == null || actual.isDeleted()) {
|
||||||
|
String message = "Metastore entry for path "
|
||||||
|
+ path + " deleted during bulk "
|
||||||
|
+ getOperation() + " operation";
|
||||||
|
LOG.debug(message);
|
||||||
|
} else {
|
||||||
|
if (actual.getFileStatus().isDirectory() !=
|
||||||
|
expected.getFileStatus().isDirectory()) {
|
||||||
|
// the type of the entry has changed
|
||||||
|
String message = "Metastore entry for path "
|
||||||
|
+ path + " changed during bulk "
|
||||||
|
+ getOperation() + " operation"
|
||||||
|
+ " from " + expected
|
||||||
|
+ " to " + actual;
|
||||||
|
LOG.debug(message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a string from the state including operation and ID.
|
||||||
|
* @param state state to use -may be null
|
||||||
|
* @return a string for logging.
|
||||||
|
*/
|
||||||
|
private static String stateAsString(@Nullable AncestorState state) {
|
||||||
|
String stateStr;
|
||||||
|
if (state != null) {
|
||||||
|
stateStr = String.format("#(%s-%04d)",
|
||||||
|
state.getOperation(),
|
||||||
|
state.getId());
|
||||||
|
} else {
|
||||||
|
stateStr = "#()";
|
||||||
|
}
|
||||||
|
return stateStr;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -301,7 +301,7 @@ public interface MetadataStore extends Closeable {
|
||||||
* </li>
|
* </li>
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
*
|
||||||
* @param pruneMode
|
* @param pruneMode Prune Mode
|
||||||
* @param cutoff Oldest time to allow (UTC)
|
* @param cutoff Oldest time to allow (UTC)
|
||||||
* @throws IOException if there is an error
|
* @throws IOException if there is an error
|
||||||
* @throws UnsupportedOperationException if not implemented
|
* @throws UnsupportedOperationException if not implemented
|
||||||
|
@ -313,7 +313,7 @@ public interface MetadataStore extends Closeable {
|
||||||
* Same as {@link MetadataStore#prune(PruneMode, long)}, but with an
|
* Same as {@link MetadataStore#prune(PruneMode, long)}, but with an
|
||||||
* additional keyPrefix parameter to filter the pruned keys with a prefix.
|
* additional keyPrefix parameter to filter the pruned keys with a prefix.
|
||||||
*
|
*
|
||||||
* @param pruneMode
|
* @param pruneMode Prune Mode
|
||||||
* @param cutoff Oldest time to allow (UTC)
|
* @param cutoff Oldest time to allow (UTC)
|
||||||
* @param keyPrefix The prefix for the keys that should be removed
|
* @param keyPrefix The prefix for the keys that should be removed
|
||||||
* @throws IOException if there is an error
|
* @throws IOException if there is an error
|
||||||
|
|
|
@ -293,11 +293,12 @@ public final class PathMetadataDynamoDBTranslation {
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
public static String pathToParentKey(Path path) {
|
public static String pathToParentKey(Path path) {
|
||||||
Preconditions.checkNotNull(path);
|
Preconditions.checkNotNull(path);
|
||||||
Preconditions.checkArgument(path.isUriPathAbsolute(), "Path not absolute");
|
Preconditions.checkArgument(path.isUriPathAbsolute(),
|
||||||
|
"Path not absolute: '%s'", path);
|
||||||
URI uri = path.toUri();
|
URI uri = path.toUri();
|
||||||
String bucket = uri.getHost();
|
String bucket = uri.getHost();
|
||||||
Preconditions.checkArgument(!StringUtils.isEmpty(bucket),
|
Preconditions.checkArgument(!StringUtils.isEmpty(bucket),
|
||||||
"Path missing bucket");
|
"Path missing bucket %s", path);
|
||||||
String pKey = "/" + bucket + uri.getPath();
|
String pKey = "/" + bucket + uri.getPath();
|
||||||
|
|
||||||
// Strip trailing slash
|
// Strip trailing slash
|
||||||
|
@ -363,4 +364,38 @@ public final class PathMetadataDynamoDBTranslation {
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert an item's (parent, child) key to a string value
|
||||||
|
* for logging. There is no validation of the item.
|
||||||
|
* @param item item.
|
||||||
|
* @return an s3a:// prefixed string.
|
||||||
|
*/
|
||||||
|
static String itemPrimaryKeyToString(Item item) {
|
||||||
|
String parent = item.getString(PARENT);
|
||||||
|
String child = item.getString(CHILD);
|
||||||
|
return "s3a://" + parent + "/" + child;
|
||||||
|
}
|
||||||
|
/**
|
||||||
|
* Convert an item's (parent, child) key to a string value
|
||||||
|
* for logging. There is no validation of the item.
|
||||||
|
* @param item item.
|
||||||
|
* @return an s3a:// prefixed string.
|
||||||
|
*/
|
||||||
|
static String primaryKeyToString(PrimaryKey item) {
|
||||||
|
Collection<KeyAttribute> c = item.getComponents();
|
||||||
|
String parent = "";
|
||||||
|
String child = "";
|
||||||
|
for (KeyAttribute attr : c) {
|
||||||
|
switch (attr.getName()) {
|
||||||
|
case PARENT:
|
||||||
|
parent = attr.getValue().toString();
|
||||||
|
break;
|
||||||
|
case CHILD:
|
||||||
|
child = attr.getValue().toString();
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "s3a://" + parent + "/" + child;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -115,12 +115,12 @@ final class PathOrderComparators {
|
||||||
/**
|
/**
|
||||||
* Compare on path status.
|
* Compare on path status.
|
||||||
*/
|
*/
|
||||||
private static final class PathMetadataComparator implements
|
static final class PathMetadataComparator implements
|
||||||
Comparator<PathMetadata>, Serializable {
|
Comparator<PathMetadata>, Serializable {
|
||||||
|
|
||||||
private final Comparator<Path> inner;
|
private final Comparator<Path> inner;
|
||||||
|
|
||||||
private PathMetadataComparator(final Comparator<Path> inner) {
|
PathMetadataComparator(final Comparator<Path> inner) {
|
||||||
this.inner = inner;
|
this.inner = inner;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,248 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a.s3guard;
|
||||||
|
|
||||||
|
import javax.annotation.Nullable;
|
||||||
|
import java.net.URI;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import com.amazonaws.services.dynamodbv2.xspec.ExpressionSpecBuilder;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.tuple.Pair;
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.s3a.S3AFileSystem;
|
||||||
|
import org.apache.hadoop.service.Service;
|
||||||
|
import org.apache.hadoop.service.launcher.LauncherExitCodes;
|
||||||
|
import org.apache.hadoop.service.launcher.ServiceLaunchException;
|
||||||
|
import org.apache.hadoop.service.launcher.ServiceLauncher;
|
||||||
|
import org.apache.hadoop.util.DurationInfo;
|
||||||
|
import org.apache.hadoop.util.ExitUtil;
|
||||||
|
|
||||||
|
import static com.google.common.base.Preconditions.checkNotNull;
|
||||||
|
import static org.apache.hadoop.fs.s3a.s3guard.DumpS3GuardDynamoTable.serviceMain;
|
||||||
|
import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.PARENT;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Purge the S3Guard table of a FileSystem from all entries related to
|
||||||
|
* that table.
|
||||||
|
* Will fail if there is no table, or the store is in auth mode.
|
||||||
|
* <pre>
|
||||||
|
* hadoop org.apache.hadoop.fs.s3a.s3guard.PurgeS3GuardDynamoTable \
|
||||||
|
* -force s3a://example-bucket/
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
@InterfaceStability.Unstable
|
||||||
|
public class PurgeS3GuardDynamoTable
|
||||||
|
extends AbstractS3GuardDynamoDBDiagnostic {
|
||||||
|
|
||||||
|
private static final Logger LOG =
|
||||||
|
LoggerFactory.getLogger(PurgeS3GuardDynamoTable.class);
|
||||||
|
|
||||||
|
public static final String NAME = "PurgeS3GuardDynamoTable";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Name of the force option.
|
||||||
|
*/
|
||||||
|
public static final String FORCE = "-force";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Usage message.
|
||||||
|
*/
|
||||||
|
private static final String USAGE_MESSAGE = NAME
|
||||||
|
+ " [-force] <filesystem>";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Flag which actually triggers the delete.
|
||||||
|
*/
|
||||||
|
private boolean force;
|
||||||
|
|
||||||
|
private long filesFound;
|
||||||
|
private long filesDeleted;
|
||||||
|
|
||||||
|
public PurgeS3GuardDynamoTable(final String name) {
|
||||||
|
super(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
public PurgeS3GuardDynamoTable() {
|
||||||
|
this(NAME);
|
||||||
|
}
|
||||||
|
|
||||||
|
public PurgeS3GuardDynamoTable(
|
||||||
|
final S3AFileSystem filesystem,
|
||||||
|
final DynamoDBMetadataStore store,
|
||||||
|
final URI uri,
|
||||||
|
final boolean force) {
|
||||||
|
super(NAME, filesystem, store, uri);
|
||||||
|
this.force = force;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Bind to the argument list, including validating the CLI.
|
||||||
|
* @throws Exception failure.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
protected void serviceStart() throws Exception {
|
||||||
|
if (getStore() == null) {
|
||||||
|
List<String> arg = getArgumentList(1, 2, USAGE_MESSAGE);
|
||||||
|
String fsURI = arg.get(0);
|
||||||
|
if (arg.size() == 2) {
|
||||||
|
if (!arg.get(0).equals(FORCE)) {
|
||||||
|
throw new ServiceLaunchException(LauncherExitCodes.EXIT_USAGE,
|
||||||
|
USAGE_MESSAGE);
|
||||||
|
}
|
||||||
|
force = true;
|
||||||
|
fsURI = arg.get(1);
|
||||||
|
}
|
||||||
|
bindFromCLI(fsURI);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract the host from the FS URI, then scan and
|
||||||
|
* delete all entries from that bucket.
|
||||||
|
* @return the exit code.
|
||||||
|
* @throws ServiceLaunchException on failure.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public int execute() throws ServiceLaunchException {
|
||||||
|
|
||||||
|
URI uri = getUri();
|
||||||
|
String host = uri.getHost();
|
||||||
|
String prefix = "/" + host + "/";
|
||||||
|
DynamoDBMetadataStore ddbms = getStore();
|
||||||
|
S3GuardTableAccess tableAccess = new S3GuardTableAccess(ddbms);
|
||||||
|
ExpressionSpecBuilder builder = new ExpressionSpecBuilder();
|
||||||
|
builder.withKeyCondition(
|
||||||
|
ExpressionSpecBuilder.S(PARENT).beginsWith(prefix));
|
||||||
|
|
||||||
|
LOG.info("Scanning for entries with prefix {} to delete from {}",
|
||||||
|
prefix, ddbms);
|
||||||
|
|
||||||
|
Iterable<DDBPathMetadata> entries = tableAccess.scanMetadata(builder);
|
||||||
|
List<Path> list = new ArrayList<>();
|
||||||
|
entries.iterator().forEachRemaining(e -> {
|
||||||
|
if (!(e instanceof S3GuardTableAccess.VersionMarker)) {
|
||||||
|
Path p = e.getFileStatus().getPath();
|
||||||
|
String type = e.getFileStatus().isFile() ? "file" : "directory";
|
||||||
|
boolean tombstone = e.isDeleted();
|
||||||
|
if (tombstone) {
|
||||||
|
type = "tombstone " + type;
|
||||||
|
}
|
||||||
|
LOG.info("{} {}", type, p);
|
||||||
|
list.add(p);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
int count = list.size();
|
||||||
|
filesFound = count;
|
||||||
|
LOG.info("Found {} entries{}",
|
||||||
|
count,
|
||||||
|
(count == 0 ? " -nothing to purge": ""));
|
||||||
|
if (count > 0) {
|
||||||
|
if (force) {
|
||||||
|
DurationInfo duration =
|
||||||
|
new DurationInfo(LOG,
|
||||||
|
"deleting %s entries from %s",
|
||||||
|
count, ddbms.toString());
|
||||||
|
tableAccess.delete(list);
|
||||||
|
duration.close();
|
||||||
|
long durationMillis = duration.value();
|
||||||
|
long timePerEntry = durationMillis / count;
|
||||||
|
LOG.info("Time per entry: {} ms", timePerEntry);
|
||||||
|
filesDeleted = count;
|
||||||
|
} else {
|
||||||
|
LOG.info("Delete process will only be executed when "
|
||||||
|
+ FORCE + " is set");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return LauncherExitCodes.EXIT_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is the Main entry point for the service launcher.
|
||||||
|
*
|
||||||
|
* Converts the arguments to a list, instantiates a instance of the class
|
||||||
|
* then executes it.
|
||||||
|
* @param args command line arguments.
|
||||||
|
*/
|
||||||
|
public static void main(String[] args) {
|
||||||
|
try {
|
||||||
|
serviceMain(Arrays.asList(args), new PurgeS3GuardDynamoTable());
|
||||||
|
} catch (ExitUtil.ExitException e) {
|
||||||
|
ExitUtil.terminate(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* API Entry point to dump the metastore and S3 store world views
|
||||||
|
* <p>
|
||||||
|
* Both the FS and the store will be dumped: the store is scanned
|
||||||
|
* before and after the sequence to show what changes were made to
|
||||||
|
* the store during the list operation.
|
||||||
|
* @param fs fs to dump. If null a store must be provided.
|
||||||
|
* @param store store to dump (fallback to FS)
|
||||||
|
* @param conf configuration to use (fallback to fs)
|
||||||
|
* @param uri URI of store -only needed if FS is null.
|
||||||
|
* @param force force the actual delete
|
||||||
|
* @return (filesFound, filesDeleted)
|
||||||
|
* @throws ExitUtil.ExitException failure.
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
@InterfaceStability.Unstable
|
||||||
|
public static Pair<Long, Long> purgeStore(
|
||||||
|
@Nullable final S3AFileSystem fs,
|
||||||
|
@Nullable DynamoDBMetadataStore store,
|
||||||
|
@Nullable Configuration conf,
|
||||||
|
@Nullable URI uri,
|
||||||
|
boolean force) throws ExitUtil.ExitException {
|
||||||
|
ServiceLauncher<Service> serviceLauncher =
|
||||||
|
new ServiceLauncher<>(NAME);
|
||||||
|
|
||||||
|
if (conf == null) {
|
||||||
|
conf = checkNotNull(fs, "No filesystem").getConf();
|
||||||
|
}
|
||||||
|
if (store == null) {
|
||||||
|
store = (DynamoDBMetadataStore) checkNotNull(fs, "No filesystem")
|
||||||
|
.getMetadataStore();
|
||||||
|
}
|
||||||
|
PurgeS3GuardDynamoTable purge = new PurgeS3GuardDynamoTable(fs,
|
||||||
|
store,
|
||||||
|
uri,
|
||||||
|
force);
|
||||||
|
ExitUtil.ExitException ex = serviceLauncher.launchService(
|
||||||
|
conf,
|
||||||
|
purge,
|
||||||
|
Collections.emptyList(),
|
||||||
|
false,
|
||||||
|
true);
|
||||||
|
if (ex != null && ex.getExitCode() != 0) {
|
||||||
|
throw ex;
|
||||||
|
}
|
||||||
|
return Pair.of(purge.filesFound, purge.filesDeleted);
|
||||||
|
}
|
||||||
|
}
|
|
@ -94,7 +94,7 @@ public abstract class RenameTracker extends AbstractStoreOperation {
|
||||||
* Constructor.
|
* Constructor.
|
||||||
* @param name tracker name for logs.
|
* @param name tracker name for logs.
|
||||||
* @param storeContext store context.
|
* @param storeContext store context.
|
||||||
* @param metadataStore the stopre
|
* @param metadataStore the store
|
||||||
* @param sourceRoot source path.
|
* @param sourceRoot source path.
|
||||||
* @param dest destination path.
|
* @param dest destination path.
|
||||||
* @param operationState ongoing move state.
|
* @param operationState ongoing move state.
|
||||||
|
|
|
@ -0,0 +1,241 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a.s3guard;
|
||||||
|
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
import com.amazonaws.services.dynamodbv2.document.Item;
|
||||||
|
import com.amazonaws.services.dynamodbv2.document.ItemCollection;
|
||||||
|
import com.amazonaws.services.dynamodbv2.document.QueryOutcome;
|
||||||
|
import com.amazonaws.services.dynamodbv2.document.ScanOutcome;
|
||||||
|
import com.amazonaws.services.dynamodbv2.document.Table;
|
||||||
|
import com.amazonaws.services.dynamodbv2.document.internal.IteratorSupport;
|
||||||
|
import com.amazonaws.services.dynamodbv2.document.spec.QuerySpec;
|
||||||
|
import com.amazonaws.services.dynamodbv2.xspec.ExpressionSpecBuilder;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import org.apache.commons.lang3.tuple.Pair;
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.s3a.S3AFileStatus;
|
||||||
|
|
||||||
|
import static com.google.common.base.Preconditions.checkNotNull;
|
||||||
|
import static org.apache.hadoop.fs.s3a.s3guard.DynamoDBMetadataStore.VERSION_MARKER;
|
||||||
|
import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.CHILD;
|
||||||
|
import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.PARENT;
|
||||||
|
import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.TABLE_VERSION;
|
||||||
|
import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.itemToPathMetadata;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Package-scoped accessor to table state in S3Guard.
|
||||||
|
* This is for maintenance, diagnostics and testing: it is <i>not</i> to
|
||||||
|
* be used otherwise.
|
||||||
|
* <ol>
|
||||||
|
* <li>
|
||||||
|
* Some of the operations here may dramatically alter the state of
|
||||||
|
* a table, so use carefully.
|
||||||
|
* </li>
|
||||||
|
* <li>
|
||||||
|
* Operations to assess consistency of a store are best executed
|
||||||
|
* against a table which is otherwise inactive.
|
||||||
|
* </li>
|
||||||
|
* <li>
|
||||||
|
* No retry/throttling or AWS to IOE logic here.
|
||||||
|
* </li>
|
||||||
|
* <li>
|
||||||
|
* If a scan or query includes the version marker in the result, it
|
||||||
|
* is converted to a {@link VersionMarker} instance.
|
||||||
|
* </li>
|
||||||
|
* </ol>
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Private
|
||||||
|
@InterfaceStability.Unstable
|
||||||
|
class S3GuardTableAccess {
|
||||||
|
|
||||||
|
private static final Logger LOG =
|
||||||
|
LoggerFactory.getLogger(S3GuardTableAccess.class);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Store instance to work with.
|
||||||
|
*/
|
||||||
|
private final DynamoDBMetadataStore store;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Table; retrieved from the store.
|
||||||
|
*/
|
||||||
|
private final Table table;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Construct.
|
||||||
|
* @param store store to work with.
|
||||||
|
*/
|
||||||
|
S3GuardTableAccess(final DynamoDBMetadataStore store) {
|
||||||
|
this.store = checkNotNull(store);
|
||||||
|
this.table = checkNotNull(store.getTable());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Username of user in store.
|
||||||
|
* @return a string.
|
||||||
|
*/
|
||||||
|
private String getUsername() {
|
||||||
|
return store.getUsername();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Execute a query.
|
||||||
|
* @param spec query spec.
|
||||||
|
* @return the outcome.
|
||||||
|
*/
|
||||||
|
ItemCollection<QueryOutcome> query(QuerySpec spec) {
|
||||||
|
return table.query(spec);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Issue a query where the result is to be an iterator over
|
||||||
|
* the entries
|
||||||
|
* of DDBPathMetadata instances.
|
||||||
|
* @param spec query spec.
|
||||||
|
* @return an iterator over path entries.
|
||||||
|
*/
|
||||||
|
Iterable<DDBPathMetadata> queryMetadata(QuerySpec spec) {
|
||||||
|
return new DDBPathMetadataCollection<>(query(spec));
|
||||||
|
}
|
||||||
|
|
||||||
|
ItemCollection<ScanOutcome> scan(ExpressionSpecBuilder spec) {
|
||||||
|
return table.scan(spec.buildForScan());
|
||||||
|
}
|
||||||
|
|
||||||
|
Iterable<DDBPathMetadata> scanMetadata(ExpressionSpecBuilder spec) {
|
||||||
|
return new DDBPathMetadataCollection<>(scan(spec));
|
||||||
|
}
|
||||||
|
|
||||||
|
void delete(Collection<Path> paths) {
|
||||||
|
paths.stream()
|
||||||
|
.map(PathMetadataDynamoDBTranslation::pathToKey)
|
||||||
|
.forEach(table::deleteItem);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A collection which wraps the result of a query or scan.
|
||||||
|
* Important: iterate through this only once; the outcome
|
||||||
|
* of repeating an iteration is "undefined"
|
||||||
|
* @param <T> type of outcome.
|
||||||
|
*/
|
||||||
|
private final class DDBPathMetadataCollection<T>
|
||||||
|
implements Iterable<DDBPathMetadata> {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Query/scan result.
|
||||||
|
*/
|
||||||
|
private final ItemCollection<T> outcome;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Instantiate.
|
||||||
|
* @param outcome query/scan outcome.
|
||||||
|
*/
|
||||||
|
private DDBPathMetadataCollection(final ItemCollection<T> outcome) {
|
||||||
|
this.outcome = outcome;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the iterator.
|
||||||
|
* @return the iterator.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public Iterator<DDBPathMetadata> iterator() {
|
||||||
|
return new DDBPathMetadataIterator<>(outcome.iterator());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An iterator which converts the iterated-over result of
|
||||||
|
* a query or scan into a {@code DDBPathMetadataIterator} entry.
|
||||||
|
* @param <T> type of source.
|
||||||
|
*/
|
||||||
|
private final class DDBPathMetadataIterator<T> implements
|
||||||
|
Iterator<DDBPathMetadata> {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Iterator to invoke.
|
||||||
|
*/
|
||||||
|
private final IteratorSupport<Item, T> it;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Instantiate.
|
||||||
|
* @param it Iterator to invoke.
|
||||||
|
*/
|
||||||
|
private DDBPathMetadataIterator(final IteratorSupport<Item, T> it) {
|
||||||
|
this.it = it;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean hasNext() {
|
||||||
|
return it.hasNext();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public DDBPathMetadata next() {
|
||||||
|
Item item = it.next();
|
||||||
|
Pair<String, String> key = primaryKey(item);
|
||||||
|
if (VERSION_MARKER.equals(key.getLeft()) &&
|
||||||
|
VERSION_MARKER.equals(key.getRight())) {
|
||||||
|
// a version marker is found, return the special type
|
||||||
|
return new VersionMarker(item);
|
||||||
|
} else {
|
||||||
|
return itemToPathMetadata(item, getUsername());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DDBPathMetadata subclass returned when a query returns
|
||||||
|
* the version marker.
|
||||||
|
* There is a FileStatus returned where the owner field contains
|
||||||
|
* the table version; the path is always the unqualified path "/VERSION".
|
||||||
|
* Because it is unqualified, operations which treat this as a normal
|
||||||
|
* DDB metadata entry usually fail.
|
||||||
|
*/
|
||||||
|
static final class VersionMarker extends DDBPathMetadata {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Instantiate.
|
||||||
|
* @param versionMarker the version marker.
|
||||||
|
*/
|
||||||
|
VersionMarker(Item versionMarker) {
|
||||||
|
super(new S3AFileStatus(true, new Path("/VERSION"),
|
||||||
|
"" + versionMarker.getString(TABLE_VERSION)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Given an item, split it to the parent and child fields.
|
||||||
|
* @param item item to split.
|
||||||
|
* @return (parent, child).
|
||||||
|
*/
|
||||||
|
private static Pair<String, String> primaryKey(Item item) {
|
||||||
|
return Pair.of(item.getString(PARENT), item.getString(CHILD));
|
||||||
|
}
|
||||||
|
}
|
|
@ -755,13 +755,7 @@ public abstract class S3GuardTool extends Configured implements Tool {
|
||||||
located.getOwner());
|
located.getOwner());
|
||||||
dirCache.add(child.getPath());
|
dirCache.add(child.getPath());
|
||||||
} else {
|
} else {
|
||||||
child = new S3AFileStatus(located.getLen(),
|
child = located.toS3AFileStatus();
|
||||||
located.getModificationTime(),
|
|
||||||
located.getPath(),
|
|
||||||
located.getBlockSize(),
|
|
||||||
located.getOwner(),
|
|
||||||
located.getETag(),
|
|
||||||
located.getVersionId());
|
|
||||||
}
|
}
|
||||||
putParentsIfNotPresent(child, operationState);
|
putParentsIfNotPresent(child, operationState);
|
||||||
S3Guard.putWithTtl(getStore(),
|
S3Guard.putWithTtl(getStore(),
|
||||||
|
@ -1026,11 +1020,15 @@ public abstract class S3GuardTool extends Configured implements Tool {
|
||||||
public static final String PURPOSE = "truncate older metadata from " +
|
public static final String PURPOSE = "truncate older metadata from " +
|
||||||
"repository "
|
"repository "
|
||||||
+ DATA_IN_S3_IS_PRESERVED;;
|
+ DATA_IN_S3_IS_PRESERVED;;
|
||||||
|
|
||||||
|
public static final String TOMBSTONE = "tombstone";
|
||||||
|
|
||||||
private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" +
|
private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" +
|
||||||
"\t" + PURPOSE + "\n\n" +
|
"\t" + PURPOSE + "\n\n" +
|
||||||
"Common options:\n" +
|
"Common options:\n" +
|
||||||
" -" + META_FLAG + " URL - Metadata repository details " +
|
" -" + META_FLAG + " URL - Metadata repository details " +
|
||||||
"(implementation-specific)\n" +
|
"(implementation-specific)\n" +
|
||||||
|
"[-" + TOMBSTONE + "]\n" +
|
||||||
"Age options. Any combination of these integer-valued options:\n" +
|
"Age options. Any combination of these integer-valued options:\n" +
|
||||||
AGE_OPTIONS_USAGE + "\n" +
|
AGE_OPTIONS_USAGE + "\n" +
|
||||||
"Amazon DynamoDB-specific options:\n" +
|
"Amazon DynamoDB-specific options:\n" +
|
||||||
|
@ -1041,7 +1039,7 @@ public abstract class S3GuardTool extends Configured implements Tool {
|
||||||
" is not supported.";
|
" is not supported.";
|
||||||
|
|
||||||
Prune(Configuration conf) {
|
Prune(Configuration conf) {
|
||||||
super(conf);
|
super(conf, TOMBSTONE);
|
||||||
addAgeOptions();
|
addAgeOptions();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1098,8 +1096,13 @@ public abstract class S3GuardTool extends Configured implements Tool {
|
||||||
keyPrefix = PathMetadataDynamoDBTranslation.pathToParentKey(path);
|
keyPrefix = PathMetadataDynamoDBTranslation.pathToParentKey(path);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MetadataStore.PruneMode mode
|
||||||
|
= MetadataStore.PruneMode.ALL_BY_MODTIME;
|
||||||
|
if (getCommandFormat().getOpt(TOMBSTONE)) {
|
||||||
|
mode = MetadataStore.PruneMode.TOMBSTONES_BY_LASTUPDATED;
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
getStore().prune(MetadataStore.PruneMode.ALL_BY_MODTIME, divide,
|
getStore().prune(mode, divide,
|
||||||
keyPrefix);
|
keyPrefix);
|
||||||
} catch (UnsupportedOperationException e){
|
} catch (UnsupportedOperationException e){
|
||||||
errorln("Prune operation not supported in metadata store.");
|
errorln("Prune operation not supported in metadata store.");
|
||||||
|
|
|
@ -796,7 +796,7 @@ time" is older than the specified age.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
hadoop s3guard prune [-days DAYS] [-hours HOURS] [-minutes MINUTES]
|
hadoop s3guard prune [-days DAYS] [-hours HOURS] [-minutes MINUTES]
|
||||||
[-seconds SECONDS] [-m URI] ( -region REGION | s3a://BUCKET )
|
[-seconds SECONDS] [-tombstone] [-meta URI] ( -region REGION | s3a://BUCKET )
|
||||||
```
|
```
|
||||||
|
|
||||||
A time value of hours, minutes and/or seconds must be supplied.
|
A time value of hours, minutes and/or seconds must be supplied.
|
||||||
|
@ -807,6 +807,13 @@ in the S3 Bucket.
|
||||||
1. If an S3A URI is supplied, only the entries in the table specified by the
|
1. If an S3A URI is supplied, only the entries in the table specified by the
|
||||||
URI and older than a specific age are deleted.
|
URI and older than a specific age are deleted.
|
||||||
|
|
||||||
|
|
||||||
|
The `-tombstone` option instructs the operation to only purge "tombstones",
|
||||||
|
markers of deleted files. These tombstone markers are only used briefly,
|
||||||
|
to indicate that a recently deleted file should not be found in listings.
|
||||||
|
As a result, there is no adverse consequences in regularly pruning old
|
||||||
|
tombstones.
|
||||||
|
|
||||||
Example
|
Example
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
@ -817,18 +824,18 @@ Deletes all entries in the S3Guard table for files older than seven days from
|
||||||
the table associated with `s3a://ireland-1`.
|
the table associated with `s3a://ireland-1`.
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
hadoop s3guard prune -days 7 s3a://ireland-1/path_prefix/
|
hadoop s3guard prune -tombstone -days 7 s3a://ireland-1/path_prefix/
|
||||||
```
|
```
|
||||||
|
|
||||||
Deletes all entries in the S3Guard table for files older than seven days from
|
Deletes all entries in the S3Guard table for tombstones older than seven days from
|
||||||
the table associated with `s3a://ireland-1` and with the prefix "path_prefix"
|
the table associated with `s3a://ireland-1` and with the prefix `path_prefix`
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
hadoop s3guard prune -hours 1 -minutes 30 -meta dynamodb://ireland-team -region eu-west-1
|
hadoop s3guard prune -hours 1 -minutes 30 -meta dynamodb://ireland-team -region eu-west-1
|
||||||
```
|
```
|
||||||
|
|
||||||
Delete all entries more than 90 minutes old from the table "ireland-team" in
|
Delete all entries more than 90 minutes old from the table "`ireland-team"` in
|
||||||
the region "eu-west-1".
|
the region `eu-west-1`.
|
||||||
|
|
||||||
|
|
||||||
### Tune the I/O capacity of the DynamoDB Table, `s3guard set-capacity`
|
### Tune the I/O capacity of the DynamoDB Table, `s3guard set-capacity`
|
||||||
|
|
|
@ -845,7 +845,7 @@ it can be manually done:
|
||||||
hadoop s3guard uploads -abort -force s3a://test-bucket/
|
hadoop s3guard uploads -abort -force s3a://test-bucket/
|
||||||
* If you don't need it, destroy the S3Guard DDB table.
|
* If you don't need it, destroy the S3Guard DDB table.
|
||||||
|
|
||||||
hadoop s3guard destroy s3a://hwdev-steve-ireland-new/
|
hadoop s3guard destroy s3a://test-bucket/
|
||||||
|
|
||||||
The S3Guard tests will automatically create the Dynamo DB table in runs with
|
The S3Guard tests will automatically create the Dynamo DB table in runs with
|
||||||
`-Ds3guard -Ddynamo` set; default capacity of these buckets
|
`-Ds3guard -Ddynamo` set; default capacity of these buckets
|
||||||
|
@ -881,7 +881,7 @@ using an absolute XInclude reference to it.
|
||||||
</configuration>
|
</configuration>
|
||||||
```
|
```
|
||||||
|
|
||||||
# <a name="failure-injection"></a>Failure Injection
|
## <a name="failure-injection"></a>Failure Injection
|
||||||
|
|
||||||
**Warning do not enable any type of failure injection in production. The
|
**Warning do not enable any type of failure injection in production. The
|
||||||
following settings are for testing only.**
|
following settings are for testing only.**
|
||||||
|
@ -1014,7 +1014,7 @@ The inconsistent client is shipped in the `hadoop-aws` JAR, so it can
|
||||||
be used in applications which work with S3 to see how they handle
|
be used in applications which work with S3 to see how they handle
|
||||||
inconsistent directory listings.
|
inconsistent directory listings.
|
||||||
|
|
||||||
##<a name="s3guard"></a> Testing S3Guard
|
## <a name="s3guard"></a> Testing S3Guard
|
||||||
|
|
||||||
[S3Guard](./s3guard.html) is an extension to S3A which adds consistent metadata
|
[S3Guard](./s3guard.html) is an extension to S3A which adds consistent metadata
|
||||||
listings to the S3A client. As it is part of S3A, it also needs to be tested.
|
listings to the S3A client. As it is part of S3A, it also needs to be tested.
|
||||||
|
@ -1052,7 +1052,7 @@ The basic strategy for testing S3Guard correctness consists of:
|
||||||
No charges are incurred for using this store, and its consistency
|
No charges are incurred for using this store, and its consistency
|
||||||
guarantees are that of the underlying object store instance. <!-- :) -->
|
guarantees are that of the underlying object store instance. <!-- :) -->
|
||||||
|
|
||||||
## Testing S3A with S3Guard Enabled
|
### Testing S3A with S3Guard Enabled
|
||||||
|
|
||||||
All the S3A tests which work with a private repository can be configured to
|
All the S3A tests which work with a private repository can be configured to
|
||||||
run with S3Guard by using the `s3guard` profile. When set, this will run
|
run with S3Guard by using the `s3guard` profile. When set, this will run
|
||||||
|
@ -1084,13 +1084,14 @@ mvn -T 1C verify -Dtest=skip -Dit.test=ITestS3AMiscOperations -Ds3guard -Ddynamo
|
||||||
1. If the `s3guard` profile is not set, then the S3Guard properties are those
|
1. If the `s3guard` profile is not set, then the S3Guard properties are those
|
||||||
of the test configuration set in `contract-test-options.xml` or `auth-keys.xml`
|
of the test configuration set in `contract-test-options.xml` or `auth-keys.xml`
|
||||||
|
|
||||||
If the `s3guard` profile *is* set,
|
If the `s3guard` profile *is* set:
|
||||||
1. The S3Guard options from maven (the dynamo and authoritative flags)
|
1. The S3Guard options from maven (the dynamo and authoritative flags)
|
||||||
overwrite any previously set in the configuration files.
|
overwrite any previously set in the configuration files.
|
||||||
1. DynamoDB will be configured to create any missing tables.
|
1. DynamoDB will be configured to create any missing tables.
|
||||||
1. When using DynamoDB and running ITestDynamoDBMetadataStore, the fs.s3a.s3guard.ddb.test.table
|
1. When using DynamoDB and running `ITestDynamoDBMetadataStore`,
|
||||||
property should be configured, and the name of that table should be different
|
the `fs.s3a.s3guard.ddb.test.table`
|
||||||
than what is used for fs.s3a.s3guard.ddb.table. The test table is destroyed
|
property MUST be configured, and the name of that table MUST be different
|
||||||
|
than what is used for `fs.s3a.s3guard.ddb.table`. The test table is destroyed
|
||||||
and modified multiple times during the test.
|
and modified multiple times during the test.
|
||||||
1. Several of the tests create and destroy DynamoDB tables. The table names
|
1. Several of the tests create and destroy DynamoDB tables. The table names
|
||||||
are prefixed with the value defined by
|
are prefixed with the value defined by
|
||||||
|
@ -1100,6 +1101,88 @@ property should be configured, and the name of that table should be different
|
||||||
incurring AWS charges.
|
incurring AWS charges.
|
||||||
|
|
||||||
|
|
||||||
|
### How to Dump the Table and Metastore State
|
||||||
|
|
||||||
|
There's an unstable entry point to list the contents of a table
|
||||||
|
and S3 filesystem ot a set of Tab Separated Value files:
|
||||||
|
|
||||||
|
```
|
||||||
|
hadoop org.apache.hadoop.fs.s3a.s3guard.DumpS3GuardDynamoTable s3a://bucket/ dir/out
|
||||||
|
```
|
||||||
|
|
||||||
|
This generates a set of files prefixed `dir/out-` with different views of the
|
||||||
|
world which can then be viewed on the command line or editor:
|
||||||
|
|
||||||
|
```
|
||||||
|
"type" "deleted" "path" "is_auth_dir" "is_empty_dir" "len" "updated" "updated_s" "last_modified" "last_modified_s" "etag" "version"
|
||||||
|
"file" "true" "s3a://bucket/fork-0001/test/ITestS3AContractDistCp/testDirectWrite/remote" "false" "UNKNOWN" 0 1562171244451 "Wed Jul 03 17:27:24 BST 2019" 1562171244451 "Wed Jul 03 17:27:24 BST 2019" "" ""
|
||||||
|
"file" "true" "s3a://bucket/Users/stevel/Projects/hadoop-trunk/hadoop-tools/hadoop-aws/target/test-dir/1/5xlPpalRwv/test/new/newdir/file1" "false" "UNKNOWN" 0 1562171518435 "Wed Jul 03 17:31:58 BST 2019" 1562171518435 "Wed Jul 03 17:31:58 BST 2019" "" ""
|
||||||
|
"file" "true" "s3a://bucket/Users/stevel/Projects/hadoop-trunk/hadoop-tools/hadoop-aws/target/test-dir/1/5xlPpalRwv/test/new/newdir/subdir" "false" "UNKNOWN" 0 1562171518535 "Wed Jul 03 17:31:58 BST 2019" 1562171518535 "Wed Jul 03 17:31:58 BST 2019" "" ""
|
||||||
|
"file" "true" "s3a://bucket/test/DELAY_LISTING_ME/testMRJob" "false" "UNKNOWN" 0 1562172036299 "Wed Jul 03 17:40:36 BST 2019" 1562172036299 "Wed Jul 03 17:40:36 BST 2019" "" ""
|
||||||
|
```
|
||||||
|
|
||||||
|
This is unstable: the output format may change without warning.
|
||||||
|
To understand the meaning of the fields, consult the documentation.
|
||||||
|
They are, currently:
|
||||||
|
|
||||||
|
| field | meaning | source |
|
||||||
|
|-------|---------| -------|
|
||||||
|
| `type` | type | filestatus |
|
||||||
|
| `deleted` | tombstone marker | metadata |
|
||||||
|
| `path` | path of an entry | filestatus |
|
||||||
|
| `is_auth_dir` | directory entry authoritative status | metadata |
|
||||||
|
| `is_empty_dir` | does the entry represent an empty directory | metadata |
|
||||||
|
| `len` | file length | filestatus |
|
||||||
|
| `last_modified` | file status last modified | filestatus |
|
||||||
|
| `last_modified_s` | file status last modified as string | filestatus |
|
||||||
|
| `updated` | time (millis) metadata was updated | metadata |
|
||||||
|
| `updated_s` | updated time as a string | metadata |
|
||||||
|
| `etag` | any etag | filestatus |
|
||||||
|
| `version` | any version| filestatus |
|
||||||
|
|
||||||
|
Files generated
|
||||||
|
|
||||||
|
| suffix | content |
|
||||||
|
|---------------|---------|
|
||||||
|
| `-scan.csv` | Full scan/dump of the metastore |
|
||||||
|
| `-store.csv` | Recursive walk through the metastore |
|
||||||
|
| `-tree.csv` | Treewalk through filesystem `listStatus("/")` calls |
|
||||||
|
| `-flat.csv` | Flat listing through filesystem `listFiles("/", recursive)` |
|
||||||
|
| `-s3.csv` | Dump of the S3 Store *only* |
|
||||||
|
| `-scan-2.csv` | Scan of the store after the previous operations |
|
||||||
|
|
||||||
|
Why the two scan entries? The S3A listing and treewalk operations
|
||||||
|
may add new entries to the metastore/DynamoDB table.
|
||||||
|
|
||||||
|
Note 1: this is unstable; entry list and meaning may change, sorting of output,
|
||||||
|
the listing algorithm, representation of types, etc. It's expected
|
||||||
|
uses are: diagnostics, support calls and helping us developers
|
||||||
|
work out what we've just broken.
|
||||||
|
|
||||||
|
Note 2: This *is* safe to use against an active store; the tables may appear
|
||||||
|
to be inconsistent due to changes taking place during the dump sequence.
|
||||||
|
|
||||||
|
### Resetting the Metastore: `PurgeS3GuardDynamoTable`
|
||||||
|
|
||||||
|
The `PurgeS3GuardDynamoTable` entry point
|
||||||
|
`org.apache.hadoop.fs.s3a.s3guard.PurgeS3GuardDynamoTable` can
|
||||||
|
list all entries in a store for a specific filesystem, and delete them.
|
||||||
|
It *only* deletes those entries in the store for that specific filesystem,
|
||||||
|
even if the store is shared.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hadoop org.apache.hadoop.fs.s3a.s3guard.PurgeS3GuardDynamoTable \
|
||||||
|
-force s3a://bucket/
|
||||||
|
```
|
||||||
|
|
||||||
|
Without the `-force` option the table is scanned, but no entries deleted;
|
||||||
|
with it then all entries for that filesystem are deleted.
|
||||||
|
No attempt is made to order the deletion; while the operation is under way
|
||||||
|
the store is not fully connected (i.e. there may be entries whose parent has
|
||||||
|
already been deleted).
|
||||||
|
|
||||||
|
Needless to say: *it is not safe to use this against a table in active use.*
|
||||||
|
|
||||||
### Scale Testing MetadataStore Directly
|
### Scale Testing MetadataStore Directly
|
||||||
|
|
||||||
There are some scale tests that exercise Metadata Store implementations
|
There are some scale tests that exercise Metadata Store implementations
|
||||||
|
|
|
@ -24,6 +24,7 @@ import java.io.IOException;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.contract.AbstractContractRootDirectoryTest;
|
import org.apache.hadoop.fs.contract.AbstractContractRootDirectoryTest;
|
||||||
import org.apache.hadoop.fs.contract.AbstractFSContract;
|
import org.apache.hadoop.fs.contract.AbstractFSContract;
|
||||||
|
import org.apache.hadoop.fs.s3a.S3AFileSystem;
|
||||||
|
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
@ -56,9 +57,24 @@ public class ITestS3AContractRootDir extends
|
||||||
return new S3AContract(conf);
|
return new S3AContract(conf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public S3AFileSystem getFileSystem() {
|
||||||
|
return (S3AFileSystem) super.getFileSystem();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is overridden to allow for eventual consistency on listings,
|
||||||
|
* but only if the store does not have S3Guard protecting it.
|
||||||
|
*/
|
||||||
@Override
|
@Override
|
||||||
public void testListEmptyRootDirectory() throws IOException {
|
public void testListEmptyRootDirectory() throws IOException {
|
||||||
for (int attempt = 1, maxAttempts = 10; attempt <= maxAttempts; ++attempt) {
|
int maxAttempts = 10;
|
||||||
|
if (getFileSystem().hasMetadataStore()) {
|
||||||
|
maxAttempts = 1;
|
||||||
|
}
|
||||||
|
describe("Listing root directory; for consistency allowing "
|
||||||
|
+ maxAttempts + " attempts");
|
||||||
|
for (int attempt = 1; attempt <= maxAttempts; ++attempt) {
|
||||||
try {
|
try {
|
||||||
super.testListEmptyRootDirectory();
|
super.testListEmptyRootDirectory();
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -49,8 +49,15 @@ public abstract class AbstractS3ATestBase extends AbstractFSContractTestBase
|
||||||
return new S3AContract(conf);
|
return new S3AContract(conf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setup() throws Exception {
|
||||||
|
Thread.currentThread().setName("setup");
|
||||||
|
super.setup();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void teardown() throws Exception {
|
public void teardown() throws Exception {
|
||||||
|
Thread.currentThread().setName("teardown");
|
||||||
super.teardown();
|
super.teardown();
|
||||||
describe("closing file system");
|
describe("closing file system");
|
||||||
IOUtils.closeStream(getFileSystem());
|
IOUtils.closeStream(getFileSystem());
|
||||||
|
|
|
@ -18,13 +18,18 @@
|
||||||
|
|
||||||
package org.apache.hadoop.fs.s3a;
|
package org.apache.hadoop.fs.s3a;
|
||||||
|
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.fs.s3a.s3guard.MetadataStore;
|
import org.apache.hadoop.fs.s3a.s3guard.MetadataStore;
|
||||||
import org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore;
|
import org.apache.hadoop.fs.s3a.s3guard.NullMetadataStore;
|
||||||
import org.junit.Assume;
|
import org.junit.Assume;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.fs.contract.ContractTestUtils.assertRenameOutcome;
|
||||||
import static org.apache.hadoop.fs.contract.ContractTestUtils.touch;
|
import static org.apache.hadoop.fs.contract.ContractTestUtils.touch;
|
||||||
|
import static org.apache.hadoop.test.LambdaTestUtils.intercept;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test logic around whether or not a directory is empty, with S3Guard enabled.
|
* Test logic around whether or not a directory is empty, with S3Guard enabled.
|
||||||
|
@ -37,6 +42,45 @@ import static org.apache.hadoop.fs.contract.ContractTestUtils.touch;
|
||||||
*/
|
*/
|
||||||
public class ITestS3GuardEmptyDirs extends AbstractS3ATestBase {
|
public class ITestS3GuardEmptyDirs extends AbstractS3ATestBase {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testRenameEmptyDir() throws Throwable {
|
||||||
|
S3AFileSystem fs = getFileSystem();
|
||||||
|
Path basePath = path(getMethodName());
|
||||||
|
Path sourceDir = new Path(basePath, "AAA-source");
|
||||||
|
String sourceDirMarker = fs.pathToKey(sourceDir) + "/";
|
||||||
|
Path destDir = new Path(basePath, "BBB-dest");
|
||||||
|
String destDirMarker = fs.pathToKey(destDir) + "/";
|
||||||
|
// set things up.
|
||||||
|
mkdirs(sourceDir);
|
||||||
|
// there'a source directory marker
|
||||||
|
fs.getObjectMetadata(sourceDirMarker);
|
||||||
|
S3AFileStatus srcStatus = getEmptyDirStatus(sourceDir);
|
||||||
|
assertEquals("Must be an empty dir: " + srcStatus, Tristate.TRUE,
|
||||||
|
srcStatus.isEmptyDirectory());
|
||||||
|
// do the rename
|
||||||
|
assertRenameOutcome(fs, sourceDir, destDir, true);
|
||||||
|
S3AFileStatus destStatus = getEmptyDirStatus(destDir);
|
||||||
|
assertEquals("Must be an empty dir: " + destStatus, Tristate.TRUE,
|
||||||
|
destStatus.isEmptyDirectory());
|
||||||
|
// source does not exist.
|
||||||
|
intercept(FileNotFoundException.class,
|
||||||
|
() -> getEmptyDirStatus(sourceDir));
|
||||||
|
// and verify that there's no dir marker hidden under a tombstone
|
||||||
|
intercept(FileNotFoundException.class,
|
||||||
|
() -> Invoker.once("HEAD", sourceDirMarker,
|
||||||
|
() -> fs.getObjectMetadata(sourceDirMarker)));
|
||||||
|
// the parent dir mustn't be confused
|
||||||
|
S3AFileStatus baseStatus = getEmptyDirStatus(basePath);
|
||||||
|
assertEquals("Must not be an empty dir: " + baseStatus, Tristate.FALSE,
|
||||||
|
baseStatus.isEmptyDirectory());
|
||||||
|
// and verify the dest dir has a marker
|
||||||
|
fs.getObjectMetadata(destDirMarker);
|
||||||
|
}
|
||||||
|
|
||||||
|
private S3AFileStatus getEmptyDirStatus(Path dir) throws IOException {
|
||||||
|
return getFileSystem().innerGetFileStatus(dir, true);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testEmptyDirs() throws Exception {
|
public void testEmptyDirs() throws Exception {
|
||||||
S3AFileSystem fs = getFileSystem();
|
S3AFileSystem fs = getFileSystem();
|
||||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.hadoop.fs.contract.AbstractFSContract;
|
||||||
import org.apache.hadoop.fs.contract.s3a.S3AContract;
|
import org.apache.hadoop.fs.contract.s3a.S3AContract;
|
||||||
|
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
|
import org.assertj.core.api.Assertions;
|
||||||
import org.junit.Assume;
|
import org.junit.Assume;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
@ -67,6 +68,14 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
|
||||||
invoker = new Invoker(new S3ARetryPolicy(getConfiguration()),
|
invoker = new Invoker(new S3ARetryPolicy(getConfiguration()),
|
||||||
Invoker.NO_OP
|
Invoker.NO_OP
|
||||||
);
|
);
|
||||||
|
Assume.assumeTrue("No metadata store in test filesystem",
|
||||||
|
getFileSystem().hasMetadataStore());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void teardown() throws Exception {
|
||||||
|
clearInconsistency(getFileSystem());
|
||||||
|
super.teardown();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -95,7 +104,7 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
|
||||||
private void doTestRenameSequence(Path[] mkdirs, Path[] srcdirs,
|
private void doTestRenameSequence(Path[] mkdirs, Path[] srcdirs,
|
||||||
Path[] dstdirs, Path[] yesdirs, Path[] nodirs) throws Exception {
|
Path[] dstdirs, Path[] yesdirs, Path[] nodirs) throws Exception {
|
||||||
S3AFileSystem fs = getFileSystem();
|
S3AFileSystem fs = getFileSystem();
|
||||||
Assume.assumeTrue(fs.hasMetadataStore());
|
|
||||||
|
|
||||||
if (mkdirs != null) {
|
if (mkdirs != null) {
|
||||||
for (Path mkdir : mkdirs) {
|
for (Path mkdir : mkdirs) {
|
||||||
|
@ -104,8 +113,8 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
|
||||||
clearInconsistency(fs);
|
clearInconsistency(fs);
|
||||||
}
|
}
|
||||||
|
|
||||||
assertTrue("srcdirs and dstdirs must have equal length",
|
assertEquals("srcdirs and dstdirs must have equal length",
|
||||||
srcdirs.length == dstdirs.length);
|
srcdirs.length, dstdirs.length);
|
||||||
for (int i = 0; i < srcdirs.length; i++) {
|
for (int i = 0; i < srcdirs.length; i++) {
|
||||||
assertTrue("Rename returned false: " + srcdirs[i] + " -> " + dstdirs[i],
|
assertTrue("Rename returned false: " + srcdirs[i] + " -> " + dstdirs[i],
|
||||||
fs.rename(srcdirs[i], dstdirs[i]));
|
fs.rename(srcdirs[i], dstdirs[i]));
|
||||||
|
@ -119,6 +128,21 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Delete an array of paths; log exceptions.
|
||||||
|
* @param paths paths to delete
|
||||||
|
*/
|
||||||
|
private void deletePathsQuietly(Path...paths) {
|
||||||
|
for (Path dir : paths) {
|
||||||
|
try {
|
||||||
|
getFileSystem().delete(dir, true);
|
||||||
|
} catch (IOException e) {
|
||||||
|
LOG.info("Failed to delete {}: {}", dir, e.toString());
|
||||||
|
LOG.debug("Delete failure:, e");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests that after renaming a directory, the original directory and its
|
* Tests that after renaming a directory, the original directory and its
|
||||||
* contents are indeed missing and the corresponding new paths are visible.
|
* contents are indeed missing and the corresponding new paths are visible.
|
||||||
|
@ -126,19 +150,23 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testConsistentListAfterRename() throws Exception {
|
public void testConsistentListAfterRename() throws Exception {
|
||||||
Path[] mkdirs = {
|
Path d1f = path("d1/f");
|
||||||
path("d1/f"),
|
Path d1f2 = path("d1/f" + DEFAULT_DELAY_KEY_SUBSTRING);
|
||||||
path("d1/f" + DEFAULT_DELAY_KEY_SUBSTRING)
|
Path[] mkdirs = {d1f, d1f2};
|
||||||
};
|
Path d1 = path("d1");
|
||||||
Path[] srcdirs = {path("d1")};
|
Path[] srcdirs = {d1};
|
||||||
Path[] dstdirs = {path("d2")};
|
Path d2 = path("d2");
|
||||||
Path[] yesdirs = {path("d2"), path("d2/f"),
|
Path[] dstdirs = {d2};
|
||||||
path("d2/f" + DEFAULT_DELAY_KEY_SUBSTRING)};
|
Path d2f2 = path("d2/f" + DEFAULT_DELAY_KEY_SUBSTRING);
|
||||||
Path[] nodirs = {path("d1"), path("d1/f"),
|
Path[] yesdirs = {d2, path("d2/f"), d2f2};
|
||||||
path("d1/f" + DEFAULT_DELAY_KEY_SUBSTRING)};
|
Path[] nodirs = {
|
||||||
doTestRenameSequence(mkdirs, srcdirs, dstdirs, yesdirs, nodirs);
|
d1, d1f, d1f2};
|
||||||
getFileSystem().delete(path("d1"), true);
|
try {
|
||||||
getFileSystem().delete(path("d2"), true);
|
doTestRenameSequence(mkdirs, srcdirs, dstdirs, yesdirs, nodirs);
|
||||||
|
} finally {
|
||||||
|
clearInconsistency(getFileSystem());
|
||||||
|
deletePathsQuietly(d1, d2, d1f, d1f2, d2f2);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -157,18 +185,23 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
|
||||||
Path[] setB = {dir2[0], dir1[0]};
|
Path[] setB = {dir2[0], dir1[0]};
|
||||||
Path[] setC = {dir0[0], dir2[0]};
|
Path[] setC = {dir0[0], dir2[0]};
|
||||||
|
|
||||||
for(int i = 0; i < 2; i++) {
|
try {
|
||||||
Path[] firstSet = i == 0 ? setA : null;
|
for(int i = 0; i < 2; i++) {
|
||||||
doTestRenameSequence(firstSet, setA, setB, setB, dir0);
|
Path[] firstSet = i == 0 ? setA : null;
|
||||||
doTestRenameSequence(null, setB, setC, setC, dir1);
|
doTestRenameSequence(firstSet, setA, setB, setB, dir0);
|
||||||
doTestRenameSequence(null, setC, setA, setA, dir2);
|
doTestRenameSequence(null, setB, setC, setC, dir1);
|
||||||
}
|
doTestRenameSequence(null, setC, setA, setA, dir2);
|
||||||
|
}
|
||||||
|
|
||||||
S3AFileSystem fs = getFileSystem();
|
S3AFileSystem fs = getFileSystem();
|
||||||
assertFalse("Renaming deleted file should have failed",
|
assertFalse("Renaming deleted file should have failed",
|
||||||
fs.rename(dir2[0], dir1[0]));
|
fs.rename(dir2[0], dir1[0]));
|
||||||
assertTrue("Renaming over existing file should have succeeded",
|
assertTrue("Renaming over existing file should have succeeded",
|
||||||
fs.rename(dir1[0], dir0[0]));
|
fs.rename(dir1[0], dir0[0]));
|
||||||
|
} finally {
|
||||||
|
clearInconsistency(getFileSystem());
|
||||||
|
deletePathsQuietly(dir0[0], dir1[0], dir2[0]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -179,9 +212,6 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
|
||||||
@Test
|
@Test
|
||||||
public void testConsistentListAfterDelete() throws Exception {
|
public void testConsistentListAfterDelete() throws Exception {
|
||||||
S3AFileSystem fs = getFileSystem();
|
S3AFileSystem fs = getFileSystem();
|
||||||
// test will fail if NullMetadataStore (the default) is configured: skip it.
|
|
||||||
Assume.assumeTrue("FS needs to have a metadatastore.",
|
|
||||||
fs.hasMetadataStore());
|
|
||||||
|
|
||||||
// Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed
|
// Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed
|
||||||
// in listObjects() results via InconsistentS3Client
|
// in listObjects() results via InconsistentS3Client
|
||||||
|
@ -223,8 +253,6 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
|
||||||
@Test
|
@Test
|
||||||
public void testConsistentRenameAfterDelete() throws Exception {
|
public void testConsistentRenameAfterDelete() throws Exception {
|
||||||
S3AFileSystem fs = getFileSystem();
|
S3AFileSystem fs = getFileSystem();
|
||||||
// test will fail if NullMetadataStore (the default) is configured: skip it.
|
|
||||||
Assume.assumeTrue(fs.hasMetadataStore());
|
|
||||||
|
|
||||||
// Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed
|
// Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed
|
||||||
// in listObjects() results via InconsistentS3Client
|
// in listObjects() results via InconsistentS3Client
|
||||||
|
@ -266,10 +294,6 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
|
||||||
|
|
||||||
S3AFileSystem fs = getFileSystem();
|
S3AFileSystem fs = getFileSystem();
|
||||||
|
|
||||||
// This test will fail if NullMetadataStore (the default) is configured:
|
|
||||||
// skip it.
|
|
||||||
Assume.assumeTrue(fs.hasMetadataStore());
|
|
||||||
|
|
||||||
// Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed
|
// Any S3 keys that contain DELAY_KEY_SUBSTRING will be delayed
|
||||||
// in listObjects() results via InconsistentS3Client
|
// in listObjects() results via InconsistentS3Client
|
||||||
Path inconsistentPath =
|
Path inconsistentPath =
|
||||||
|
@ -301,9 +325,6 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
|
||||||
@Test
|
@Test
|
||||||
public void testConsistentListLocatedStatusAfterPut() throws Exception {
|
public void testConsistentListLocatedStatusAfterPut() throws Exception {
|
||||||
final S3AFileSystem fs = getFileSystem();
|
final S3AFileSystem fs = getFileSystem();
|
||||||
// This test will fail if NullMetadataStore (the default) is configured:
|
|
||||||
// skip it.
|
|
||||||
Assume.assumeTrue(fs.hasMetadataStore());
|
|
||||||
String rootDir = "doTestConsistentListLocatedStatusAfterPut";
|
String rootDir = "doTestConsistentListLocatedStatusAfterPut";
|
||||||
fs.mkdirs(path(rootDir));
|
fs.mkdirs(path(rootDir));
|
||||||
|
|
||||||
|
@ -368,9 +389,6 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
|
||||||
@Test
|
@Test
|
||||||
public void testConsistentListFiles() throws Exception {
|
public void testConsistentListFiles() throws Exception {
|
||||||
final S3AFileSystem fs = getFileSystem();
|
final S3AFileSystem fs = getFileSystem();
|
||||||
// This test will fail if NullMetadataStore (the default) is configured:
|
|
||||||
// skip it.
|
|
||||||
Assume.assumeTrue(fs.hasMetadataStore());
|
|
||||||
|
|
||||||
final int[] numOfPaths = {0, 2};
|
final int[] numOfPaths = {0, 2};
|
||||||
for (int dirNum : numOfPaths) {
|
for (int dirNum : numOfPaths) {
|
||||||
|
@ -480,7 +498,6 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
|
||||||
@Test
|
@Test
|
||||||
public void testCommitByRenameOperations() throws Throwable {
|
public void testCommitByRenameOperations() throws Throwable {
|
||||||
S3AFileSystem fs = getFileSystem();
|
S3AFileSystem fs = getFileSystem();
|
||||||
Assume.assumeTrue(fs.hasMetadataStore());
|
|
||||||
Path work = path("test-commit-by-rename-" + DEFAULT_DELAY_KEY_SUBSTRING);
|
Path work = path("test-commit-by-rename-" + DEFAULT_DELAY_KEY_SUBSTRING);
|
||||||
Path task00 = new Path(work, "task00");
|
Path task00 = new Path(work, "task00");
|
||||||
fs.mkdirs(task00);
|
fs.mkdirs(task00);
|
||||||
|
@ -564,10 +581,10 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
|
||||||
@Test
|
@Test
|
||||||
public void testListingReturnsVersionMetadata() throws Throwable {
|
public void testListingReturnsVersionMetadata() throws Throwable {
|
||||||
S3AFileSystem fs = getFileSystem();
|
S3AFileSystem fs = getFileSystem();
|
||||||
Assume.assumeTrue(fs.hasMetadataStore());
|
|
||||||
|
|
||||||
// write simple file
|
// write simple file
|
||||||
Path file = path("file1");
|
Path parent = path(getMethodName());
|
||||||
|
Path file = new Path(parent, "file1");
|
||||||
try (FSDataOutputStream outputStream = fs.create(file)) {
|
try (FSDataOutputStream outputStream = fs.create(file)) {
|
||||||
outputStream.writeChars("hello");
|
outputStream.writeChars("hello");
|
||||||
}
|
}
|
||||||
|
@ -577,22 +594,26 @@ public class ITestS3GuardListConsistency extends AbstractS3ATestBase {
|
||||||
assertEquals(1, fileStatuses.length);
|
assertEquals(1, fileStatuses.length);
|
||||||
S3AFileStatus status = (S3AFileStatus) fileStatuses[0];
|
S3AFileStatus status = (S3AFileStatus) fileStatuses[0];
|
||||||
String eTag = status.getETag();
|
String eTag = status.getETag();
|
||||||
|
assertNotNull("Etag in " + eTag, eTag);
|
||||||
String versionId = status.getVersionId();
|
String versionId = status.getVersionId();
|
||||||
|
|
||||||
// get status through recursive directory listing
|
// get status through recursive directory listing
|
||||||
RemoteIterator<LocatedFileStatus> filesIterator = fs.listFiles(
|
RemoteIterator<LocatedFileStatus> filesIterator = fs.listFiles(
|
||||||
file.getParent(), true);
|
parent, true);
|
||||||
List<LocatedFileStatus> files = Lists.newArrayList();
|
List<LocatedFileStatus> files = Lists.newArrayList();
|
||||||
while (filesIterator.hasNext()) {
|
while (filesIterator.hasNext()) {
|
||||||
files.add(filesIterator.next());
|
files.add(filesIterator.next());
|
||||||
}
|
}
|
||||||
assertEquals(1, files.size());
|
Assertions.assertThat(files)
|
||||||
|
.hasSize(1);
|
||||||
|
|
||||||
// ensure eTag and versionId are preserved in directory listing
|
// ensure eTag and versionId are preserved in directory listing
|
||||||
S3ALocatedFileStatus locatedFileStatus =
|
S3ALocatedFileStatus locatedFileStatus =
|
||||||
(S3ALocatedFileStatus) files.get(0);
|
(S3ALocatedFileStatus) files.get(0);
|
||||||
assertEquals(eTag, locatedFileStatus.getETag());
|
assertEquals("etag of " + locatedFileStatus,
|
||||||
assertEquals(versionId, locatedFileStatus.getVersionId());
|
eTag, locatedFileStatus.getETag());
|
||||||
|
assertEquals("versionID of " + locatedFileStatus,
|
||||||
|
versionId, locatedFileStatus.getVersionId());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -36,6 +36,7 @@ import org.junit.rules.TemporaryFolder;
|
||||||
import org.slf4j.Logger;
|
import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
import org.apache.hadoop.fs.Path;
|
import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.fs.s3a.S3AFileSystem;
|
import org.apache.hadoop.fs.s3a.S3AFileSystem;
|
||||||
|
@ -51,6 +52,7 @@ import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
|
||||||
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
|
||||||
import org.apache.hadoop.util.DurationInfo;
|
import org.apache.hadoop.util.DurationInfo;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.fs.s3a.S3ATestUtils.disableFilesystemCaching;
|
||||||
import static org.apache.hadoop.fs.s3a.commit.InternalCommitterConstants.FS_S3A_COMMITTER_STAGING_UUID;
|
import static org.apache.hadoop.fs.s3a.commit.InternalCommitterConstants.FS_S3A_COMMITTER_STAGING_UUID;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -61,6 +63,13 @@ public abstract class AbstractITCommitMRJob extends AbstractYarnClusterITest {
|
||||||
private static final Logger LOG =
|
private static final Logger LOG =
|
||||||
LoggerFactory.getLogger(AbstractITCommitMRJob.class);
|
LoggerFactory.getLogger(AbstractITCommitMRJob.class);
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Configuration createConfiguration() {
|
||||||
|
Configuration conf = super.createConfiguration();
|
||||||
|
disableFilesystemCaching(conf);
|
||||||
|
return conf;
|
||||||
|
}
|
||||||
|
|
||||||
@Rule
|
@Rule
|
||||||
public final TemporaryFolder temp = new TemporaryFolder();
|
public final TemporaryFolder temp = new TemporaryFolder();
|
||||||
|
|
||||||
|
|
|
@ -48,8 +48,6 @@ import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.fs.contract.ContractTestUtils;
|
import org.apache.hadoop.fs.contract.ContractTestUtils;
|
||||||
import org.apache.hadoop.fs.s3a.AbstractS3ATestBase;
|
import org.apache.hadoop.fs.s3a.AbstractS3ATestBase;
|
||||||
import org.apache.hadoop.fs.s3a.S3AFileSystem;
|
import org.apache.hadoop.fs.s3a.S3AFileSystem;
|
||||||
import org.apache.hadoop.fs.s3a.s3guard.MetadataStore;
|
|
||||||
import org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation;
|
|
||||||
import org.apache.hadoop.util.BlockingThreadPoolExecutorService;
|
import org.apache.hadoop.util.BlockingThreadPoolExecutorService;
|
||||||
import org.apache.hadoop.util.DurationInfo;
|
import org.apache.hadoop.util.DurationInfo;
|
||||||
|
|
||||||
|
@ -286,23 +284,6 @@ public class ITestPartialRenamesDeletes extends AbstractS3ATestBase {
|
||||||
super.teardown();
|
super.teardown();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Directory cleanup includes pruning everything under the path.
|
|
||||||
* This ensures that any in the tree from failed tests don't fill up
|
|
||||||
* the store with many, many, deleted entries.
|
|
||||||
* @throws IOException failure.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
protected void deleteTestDirInTeardown() throws IOException {
|
|
||||||
super.deleteTestDirInTeardown();
|
|
||||||
Path path = getContract().getTestPath();
|
|
||||||
try {
|
|
||||||
prune(path);
|
|
||||||
} catch (IOException e) {
|
|
||||||
LOG.warn("When pruning the test directory {}", path, e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void assumeRoleTests() {
|
private void assumeRoleTests() {
|
||||||
assume("No ARN for role tests", !getAssumedRoleARN().isEmpty());
|
assume("No ARN for role tests", !getAssumedRoleARN().isEmpty());
|
||||||
}
|
}
|
||||||
|
@ -692,10 +673,6 @@ public class ITestPartialRenamesDeletes extends AbstractS3ATestBase {
|
||||||
Assertions.assertThat(readOnlyListing)
|
Assertions.assertThat(readOnlyListing)
|
||||||
.as("ReadOnly directory " + directoryList)
|
.as("ReadOnly directory " + directoryList)
|
||||||
.containsAll(readOnlyFiles);
|
.containsAll(readOnlyFiles);
|
||||||
|
|
||||||
// do this prune in the test as well as teardown, so that the test
|
|
||||||
// reporting includes it in the runtime of a successful run.
|
|
||||||
prune(basePath);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -753,25 +730,6 @@ public class ITestPartialRenamesDeletes extends AbstractS3ATestBase {
|
||||||
eval(() -> assertPathExists("Missing path", p));
|
eval(() -> assertPathExists("Missing path", p));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Prune the store for everything under the test path.
|
|
||||||
* @param path path.
|
|
||||||
* @throws IOException on failure.
|
|
||||||
*/
|
|
||||||
private void prune(Path path) throws IOException {
|
|
||||||
S3AFileSystem fs = getFileSystem();
|
|
||||||
if (fs.hasMetadataStore()) {
|
|
||||||
MetadataStore store = fs.getMetadataStore();
|
|
||||||
try (DurationInfo ignored =
|
|
||||||
new DurationInfo(LOG, true, "prune %s", path)) {
|
|
||||||
store.prune(
|
|
||||||
MetadataStore.PruneMode.ALL_BY_MODTIME,
|
|
||||||
System.currentTimeMillis(),
|
|
||||||
PathMetadataDynamoDBTranslation.pathToParentKey(fs.qualify(path)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* List all files under a path.
|
* List all files under a path.
|
||||||
* @param path path to list
|
* @param path path to list
|
||||||
|
|
|
@ -105,7 +105,7 @@ public abstract class AbstractS3GuardToolTestBase extends AbstractS3ATestBase {
|
||||||
* @return the output of any successful run
|
* @return the output of any successful run
|
||||||
* @throws Exception failure
|
* @throws Exception failure
|
||||||
*/
|
*/
|
||||||
protected static String expectSuccess(
|
public static String expectSuccess(
|
||||||
String message,
|
String message,
|
||||||
S3GuardTool tool,
|
S3GuardTool tool,
|
||||||
String... args) throws Exception {
|
String... args) throws Exception {
|
||||||
|
@ -322,6 +322,19 @@ public abstract class AbstractS3GuardToolTestBase extends AbstractS3ATestBase {
|
||||||
testPath.toString());
|
testPath.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPruneCommandTombstones() throws Exception {
|
||||||
|
Path testPath = path("testPruneCommandTombstones");
|
||||||
|
getFileSystem().mkdirs(testPath);
|
||||||
|
getFileSystem().delete(testPath, true);
|
||||||
|
S3GuardTool.Prune cmd = new S3GuardTool.Prune(getFileSystem().getConf());
|
||||||
|
cmd.setMetadataStore(ms);
|
||||||
|
exec(cmd,
|
||||||
|
"prune", "-" + S3GuardTool.Prune.TOMBSTONE,
|
||||||
|
"-seconds", "0",
|
||||||
|
testPath.toString());
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testPruneCommandConf() throws Exception {
|
public void testPruneCommandConf() throws Exception {
|
||||||
getConfiguration().setLong(Constants.S3GUARD_CLI_PRUNE_AGE,
|
getConfiguration().setLong(Constants.S3GUARD_CLI_PRUNE_AGE,
|
||||||
|
|
|
@ -18,9 +18,14 @@
|
||||||
|
|
||||||
package org.apache.hadoop.fs.s3a.s3guard;
|
package org.apache.hadoop.fs.s3a.s3guard;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
|
import java.nio.charset.Charset;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
@ -41,6 +46,7 @@ import org.assertj.core.api.Assertions;
|
||||||
|
|
||||||
import org.apache.commons.collections.CollectionUtils;
|
import org.apache.commons.collections.CollectionUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.apache.commons.lang3.tuple.Pair;
|
||||||
import org.apache.hadoop.fs.PathIOException;
|
import org.apache.hadoop.fs.PathIOException;
|
||||||
import org.apache.hadoop.fs.contract.s3a.S3AContract;
|
import org.apache.hadoop.fs.contract.s3a.S3AContract;
|
||||||
import org.apache.hadoop.fs.s3a.Constants;
|
import org.apache.hadoop.fs.s3a.Constants;
|
||||||
|
@ -94,6 +100,8 @@ import static org.apache.hadoop.test.LambdaTestUtils.*;
|
||||||
*/
|
*/
|
||||||
public class ITestDynamoDBMetadataStore extends MetadataStoreTestBase {
|
public class ITestDynamoDBMetadataStore extends MetadataStoreTestBase {
|
||||||
|
|
||||||
|
public static final int MINUTE = 60_000;
|
||||||
|
|
||||||
public ITestDynamoDBMetadataStore() {
|
public ITestDynamoDBMetadataStore() {
|
||||||
super();
|
super();
|
||||||
}
|
}
|
||||||
|
@ -281,22 +289,6 @@ public class ITestDynamoDBMetadataStore extends MetadataStoreTestBase {
|
||||||
public static void deleteMetadataUnderPath(final DynamoDBMetadataStore ms,
|
public static void deleteMetadataUnderPath(final DynamoDBMetadataStore ms,
|
||||||
final Path path, final boolean suppressErrors) throws IOException {
|
final Path path, final boolean suppressErrors) throws IOException {
|
||||||
ThrottleTracker throttleTracker = new ThrottleTracker(ms);
|
ThrottleTracker throttleTracker = new ThrottleTracker(ms);
|
||||||
try (DurationInfo ignored = new DurationInfo(LOG, true, "prune")) {
|
|
||||||
ms.prune(PruneMode.ALL_BY_MODTIME,
|
|
||||||
System.currentTimeMillis(),
|
|
||||||
PathMetadataDynamoDBTranslation.pathToParentKey(path));
|
|
||||||
LOG.info("Throttle statistics: {}", throttleTracker);
|
|
||||||
} catch (FileNotFoundException fnfe) {
|
|
||||||
// there is no table.
|
|
||||||
return;
|
|
||||||
} catch (IOException ioe) {
|
|
||||||
// prune failed. warn and then fall back to forget.
|
|
||||||
LOG.warn("Failed to prune {}", path, ioe);
|
|
||||||
if (!suppressErrors) {
|
|
||||||
throw ioe;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// and after the pruning, make sure all other metadata is gone
|
|
||||||
int forgotten = 0;
|
int forgotten = 0;
|
||||||
try (DurationInfo ignored = new DurationInfo(LOG, true, "forget")) {
|
try (DurationInfo ignored = new DurationInfo(LOG, true, "forget")) {
|
||||||
PathMetadata meta = ms.get(path);
|
PathMetadata meta = ms.get(path);
|
||||||
|
@ -921,43 +913,6 @@ public class ITestDynamoDBMetadataStore extends MetadataStoreTestBase {
|
||||||
return md;
|
return md;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testProvisionTable() throws Exception {
|
|
||||||
final String tableName
|
|
||||||
= getTestTableName("testProvisionTable-" + UUID.randomUUID());
|
|
||||||
final Configuration conf = getTableCreationConfig();
|
|
||||||
conf.set(S3GUARD_DDB_TABLE_NAME_KEY, tableName);
|
|
||||||
conf.setInt(S3GUARD_DDB_TABLE_CAPACITY_WRITE_KEY, 2);
|
|
||||||
conf.setInt(S3GUARD_DDB_TABLE_CAPACITY_READ_KEY, 2);
|
|
||||||
DynamoDBMetadataStore ddbms = new DynamoDBMetadataStore();
|
|
||||||
try {
|
|
||||||
ddbms.initialize(conf);
|
|
||||||
DynamoDB dynamoDB = ddbms.getDynamoDB();
|
|
||||||
final DDBCapacities oldProvision = DDBCapacities.extractCapacities(
|
|
||||||
dynamoDB.getTable(tableName).describe().getProvisionedThroughput());
|
|
||||||
Assume.assumeFalse("Table is on-demand", oldProvision.isOnDemandTable());
|
|
||||||
long desiredReadCapacity = oldProvision.getRead() - 1;
|
|
||||||
long desiredWriteCapacity = oldProvision.getWrite() - 1;
|
|
||||||
ddbms.provisionTable(desiredReadCapacity,
|
|
||||||
desiredWriteCapacity);
|
|
||||||
ddbms.initTable();
|
|
||||||
// we have to wait until the provisioning settings are applied,
|
|
||||||
// so until the table is ACTIVE again and not in UPDATING
|
|
||||||
ddbms.getTable().waitForActive();
|
|
||||||
final DDBCapacities newProvision = DDBCapacities.extractCapacities(
|
|
||||||
dynamoDB.getTable(tableName).describe().getProvisionedThroughput());
|
|
||||||
assertEquals("Check newly provisioned table read capacity units.",
|
|
||||||
desiredReadCapacity,
|
|
||||||
newProvision.getRead());
|
|
||||||
assertEquals("Check newly provisioned table write capacity units.",
|
|
||||||
desiredWriteCapacity,
|
|
||||||
newProvision.getWrite());
|
|
||||||
} finally {
|
|
||||||
ddbms.destroy();
|
|
||||||
ddbms.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testDeleteTable() throws Exception {
|
public void testDeleteTable() throws Exception {
|
||||||
final String tableName = getTestTableName("testDeleteTable");
|
final String tableName = getTestTableName("testDeleteTable");
|
||||||
|
@ -1107,7 +1062,7 @@ public class ITestDynamoDBMetadataStore extends MetadataStoreTestBase {
|
||||||
describe("Create an Invalid listing and prune it");
|
describe("Create an Invalid listing and prune it");
|
||||||
DynamoDBMetadataStore ms
|
DynamoDBMetadataStore ms
|
||||||
= ITestDynamoDBMetadataStore.ddbmsStatic;
|
= ITestDynamoDBMetadataStore.ddbmsStatic;
|
||||||
String base = "/testPruneAgainstInvalidTable";
|
String base = "/" + getMethodName();
|
||||||
String subdir = base + "/subdir";
|
String subdir = base + "/subdir";
|
||||||
Path subDirPath = strToPath(subdir);
|
Path subDirPath = strToPath(subdir);
|
||||||
createNewDirs(base, subdir);
|
createNewDirs(base, subdir);
|
||||||
|
@ -1125,13 +1080,9 @@ public class ITestDynamoDBMetadataStore extends MetadataStoreTestBase {
|
||||||
// over the subdirectory
|
// over the subdirectory
|
||||||
|
|
||||||
long now = getTime();
|
long now = getTime();
|
||||||
long oldTime = now - 60_000;
|
long oldTime = now - MINUTE;
|
||||||
putFile(subdir, oldTime, null);
|
putFile(subdir, oldTime, null);
|
||||||
final DDBPathMetadata subDirAsFile = ms.get(subDirPath);
|
getFile(subdir);
|
||||||
|
|
||||||
Assertions.assertThat(subDirAsFile.getFileStatus().isFile())
|
|
||||||
.describedAs("Subdirectory entry %s is now file", subDirMetadataOrig)
|
|
||||||
.isTrue();
|
|
||||||
|
|
||||||
Path basePath = strToPath(base);
|
Path basePath = strToPath(base);
|
||||||
DirListingMetadata listing = ms.listChildren(basePath);
|
DirListingMetadata listing = ms.listChildren(basePath);
|
||||||
|
@ -1147,13 +1098,13 @@ public class ITestDynamoDBMetadataStore extends MetadataStoreTestBase {
|
||||||
Assertions.assertThat(status.isFile())
|
Assertions.assertThat(status.isFile())
|
||||||
.as("Entry %s", (Object)pm)
|
.as("Entry %s", (Object)pm)
|
||||||
.isTrue();
|
.isTrue();
|
||||||
DDBPathMetadata subFilePm = checkNotNull(ms.get(subFilePath));
|
getNonNull(subFile);
|
||||||
LOG.info("Pruning");
|
|
||||||
|
|
||||||
|
LOG.info("Pruning");
|
||||||
// now prune
|
// now prune
|
||||||
ms.prune(PruneMode.ALL_BY_MODTIME,
|
ms.prune(PruneMode.ALL_BY_MODTIME,
|
||||||
now + 60_000, subdir);
|
now + MINUTE, subdir);
|
||||||
DDBPathMetadata prunedFile = ms.get(subFilePath);
|
ms.get(subFilePath);
|
||||||
|
|
||||||
final PathMetadata subDirMetadataFinal = getNonNull(subdir);
|
final PathMetadata subDirMetadataFinal = getNonNull(subdir);
|
||||||
|
|
||||||
|
@ -1165,8 +1116,8 @@ public class ITestDynamoDBMetadataStore extends MetadataStoreTestBase {
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testPutFileDirectlyUnderTombstone() throws Throwable {
|
public void testPutFileDirectlyUnderTombstone() throws Throwable {
|
||||||
describe("Put a file under a tombstone");
|
describe("Put a file under a tombstone; verify the tombstone");
|
||||||
String base = "/testPutFileDirectlyUnderTombstone";
|
String base = "/" + getMethodName();
|
||||||
long now = getTime();
|
long now = getTime();
|
||||||
putTombstone(base, now, null);
|
putTombstone(base, now, null);
|
||||||
PathMetadata baseMeta1 = get(base);
|
PathMetadata baseMeta1 = get(base);
|
||||||
|
@ -1175,35 +1126,114 @@ public class ITestDynamoDBMetadataStore extends MetadataStoreTestBase {
|
||||||
.isTrue();
|
.isTrue();
|
||||||
String child = base + "/file";
|
String child = base + "/file";
|
||||||
putFile(child, now, null);
|
putFile(child, now, null);
|
||||||
PathMetadata baseMeta2 = get(base);
|
getDirectory(base);
|
||||||
Assertions.assertThat(baseMeta2.isDeleted())
|
|
||||||
.as("Metadata %s", baseMeta2)
|
|
||||||
.isFalse();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPruneTombstoneUnderTombstone() throws Throwable {
|
||||||
|
describe("Put a tombsteone under a tombstone, prune the pair");
|
||||||
|
String base = "/" + getMethodName();
|
||||||
|
long now = getTime();
|
||||||
|
String dir = base + "/dir";
|
||||||
|
putTombstone(dir, now, null);
|
||||||
|
assertIsTombstone(dir);
|
||||||
|
// parent dir is created
|
||||||
|
assertCached(base);
|
||||||
|
String child = dir + "/file";
|
||||||
|
String child2 = dir + "/file2";
|
||||||
|
|
||||||
|
// this will actually mark the parent as a dir,
|
||||||
|
// so that lists of that dir will pick up the tombstone
|
||||||
|
putTombstone(child, now, null);
|
||||||
|
getDirectory(dir);
|
||||||
|
// tombstone the dir
|
||||||
|
putTombstone(dir, now, null);
|
||||||
|
// add another child entry; this will update the dir entry from being
|
||||||
|
// tombstone to dir
|
||||||
|
putFile(child2, now, null);
|
||||||
|
getDirectory(dir);
|
||||||
|
|
||||||
|
// put a tombstone over the directory again
|
||||||
|
putTombstone(dir, now, null);
|
||||||
|
// verify
|
||||||
|
assertIsTombstone(dir);
|
||||||
|
|
||||||
|
//prune all tombstones
|
||||||
|
getDynamoMetadataStore().prune(PruneMode.TOMBSTONES_BY_LASTUPDATED,
|
||||||
|
now + MINUTE);
|
||||||
|
|
||||||
|
// the child is gone
|
||||||
|
assertNotFound(child);
|
||||||
|
|
||||||
|
// *AND* the parent dir has not been created
|
||||||
|
assertNotFound(dir);
|
||||||
|
|
||||||
|
// the child2 entry is still there, though it's now orphan (the store isn't
|
||||||
|
// meeting the rule "all entries must have a parent which exists"
|
||||||
|
getFile(child2);
|
||||||
|
// a full prune will still find and delete it, as this
|
||||||
|
// doesn't walk the tree
|
||||||
|
getDynamoMetadataStore().prune(PruneMode.ALL_BY_MODTIME,
|
||||||
|
now + MINUTE);
|
||||||
|
assertNotFound(child2);
|
||||||
|
assertNotFound(dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPruneFileUnderTombstone() throws Throwable {
|
||||||
|
describe("Put a file under a tombstone, prune the pair");
|
||||||
|
String base = "/" + getMethodName();
|
||||||
|
long now = getTime();
|
||||||
|
String dir = base + "/dir";
|
||||||
|
putTombstone(dir, now, null);
|
||||||
|
assertIsTombstone(dir);
|
||||||
|
// parent dir is created
|
||||||
|
assertCached(base);
|
||||||
|
String child = dir + "/file";
|
||||||
|
|
||||||
|
// this will actually mark the parent as a dir,
|
||||||
|
// so that lists of that dir will pick up the tombstone
|
||||||
|
putFile(child, now, null);
|
||||||
|
// dir is reinstated
|
||||||
|
getDirectory(dir);
|
||||||
|
|
||||||
|
// put a tombstone
|
||||||
|
putTombstone(dir, now, null);
|
||||||
|
// prune all entries
|
||||||
|
getDynamoMetadataStore().prune(PruneMode.ALL_BY_MODTIME,
|
||||||
|
now + MINUTE);
|
||||||
|
// the child is gone
|
||||||
|
assertNotFound(child);
|
||||||
|
|
||||||
|
// *AND* the parent dir has not been created
|
||||||
|
assertNotFound(dir);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Keep in sync with code changes in S3AFileSystem.finishedWrite() so that
|
||||||
|
* the production code can be tested here.
|
||||||
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testPutFileDeepUnderTombstone() throws Throwable {
|
public void testPutFileDeepUnderTombstone() throws Throwable {
|
||||||
describe("Put a file two levels under a tombstone");
|
describe("Put a file two levels under a tombstone");
|
||||||
String base = "/testPutFileDeepUnderTombstone";
|
String base = "/" + getMethodName();
|
||||||
String subdir = base + "/subdir";
|
String dir = base + "/dir";
|
||||||
long now = getTime();
|
long now = getTime();
|
||||||
// creating a file MUST create its parents
|
// creating a file MUST create its parents
|
||||||
String child = subdir + "/file";
|
String child = dir + "/file";
|
||||||
Path childPath = strToPath(child);
|
Path childPath = strToPath(child);
|
||||||
putFile(child, now, null);
|
putFile(child, now, null);
|
||||||
getFile(child);
|
getFile(child);
|
||||||
getDirectory(subdir);
|
getDirectory(dir);
|
||||||
getDirectory(base);
|
getDirectory(base);
|
||||||
|
|
||||||
// now put the tombstone
|
// now put the tombstone
|
||||||
putTombstone(base, now, null);
|
putTombstone(base, now, null);
|
||||||
PathMetadata baseMeta1 = getNonNull(base);
|
assertIsTombstone(base);
|
||||||
Assertions.assertThat(baseMeta1.isDeleted())
|
|
||||||
.as("Metadata %s", baseMeta1)
|
|
||||||
.isTrue();
|
|
||||||
|
|
||||||
// this is the same ordering as S3FileSystem.finishedWrite()
|
|
||||||
|
|
||||||
|
/*- --------------------------------------------*/
|
||||||
|
/* Begin S3FileSystem.finishedWrite() sequence. */
|
||||||
|
/* ---------------------------------------------*/
|
||||||
AncestorState ancestorState = getDynamoMetadataStore()
|
AncestorState ancestorState = getDynamoMetadataStore()
|
||||||
.initiateBulkWrite(BulkOperationState.OperationType.Put,
|
.initiateBulkWrite(BulkOperationState.OperationType.Put,
|
||||||
childPath);
|
childPath);
|
||||||
|
@ -1213,8 +1243,103 @@ public class ITestDynamoDBMetadataStore extends MetadataStoreTestBase {
|
||||||
ancestorState);
|
ancestorState);
|
||||||
// now write the file again.
|
// now write the file again.
|
||||||
putFile(child, now, ancestorState);
|
putFile(child, now, ancestorState);
|
||||||
|
/* -------------------------------------------*/
|
||||||
|
/* End S3FileSystem.finishedWrite() sequence. */
|
||||||
|
/* -------------------------------------------*/
|
||||||
|
|
||||||
|
getFile(child);
|
||||||
// the ancestor will now exist.
|
// the ancestor will now exist.
|
||||||
|
getDirectory(dir);
|
||||||
getDirectory(base);
|
getDirectory(base);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testDumpTable() throws Throwable {
|
||||||
|
describe("Dump the table contents, but not the S3 Store");
|
||||||
|
String target = System.getProperty("test.build.dir", "target");
|
||||||
|
File buildDir = new File(target).getAbsoluteFile();
|
||||||
|
String name = "ITestDynamoDBMetadataStore";
|
||||||
|
File destFile = new File(buildDir, name);
|
||||||
|
DumpS3GuardDynamoTable.dumpStore(
|
||||||
|
null,
|
||||||
|
ddbmsStatic,
|
||||||
|
getFileSystem().getConf(),
|
||||||
|
destFile,
|
||||||
|
fsUri);
|
||||||
|
File storeFile = new File(buildDir, name + DumpS3GuardDynamoTable.SCAN_CSV);
|
||||||
|
try (BufferedReader in = new BufferedReader(new InputStreamReader(
|
||||||
|
new FileInputStream(storeFile), Charset.forName("UTF-8")))) {
|
||||||
|
for (String line : org.apache.commons.io.IOUtils.readLines(in)) {
|
||||||
|
LOG.info(line);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPurgeTableNoForce() throws Throwable {
|
||||||
|
describe("Purge the table");
|
||||||
|
|
||||||
|
putTombstone("/" + getMethodName(), getTime(), null);
|
||||||
|
Pair<Long, Long> r = PurgeS3GuardDynamoTable.purgeStore(
|
||||||
|
null,
|
||||||
|
ddbmsStatic,
|
||||||
|
getFileSystem().getConf(),
|
||||||
|
fsUri,
|
||||||
|
false);
|
||||||
|
|
||||||
|
Assertions.assertThat(r.getLeft()).
|
||||||
|
describedAs("entries found in %s", r)
|
||||||
|
.isGreaterThanOrEqualTo(1);
|
||||||
|
Assertions.assertThat(r.getRight()).
|
||||||
|
describedAs("entries deleted in %s", r)
|
||||||
|
.isZero();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testPurgeTableForce() throws Throwable {
|
||||||
|
describe("Purge the table -force");
|
||||||
|
putTombstone("/" + getMethodName(), getTime(), null);
|
||||||
|
Pair<Long, Long> r = PurgeS3GuardDynamoTable.purgeStore(
|
||||||
|
null,
|
||||||
|
ddbmsStatic,
|
||||||
|
getFileSystem().getConf(),
|
||||||
|
fsUri,
|
||||||
|
true);
|
||||||
|
Assertions.assertThat(r.getLeft()).
|
||||||
|
describedAs("entries found in %s", r)
|
||||||
|
.isGreaterThanOrEqualTo(1);
|
||||||
|
Assertions.assertThat(r.getRight()).
|
||||||
|
describedAs("entries deleted in %s", r)
|
||||||
|
.isEqualTo(r.getLeft());
|
||||||
|
|
||||||
|
// second iteration will have zero entries
|
||||||
|
|
||||||
|
r = PurgeS3GuardDynamoTable.purgeStore(
|
||||||
|
null,
|
||||||
|
ddbmsStatic,
|
||||||
|
getFileSystem().getConf(),
|
||||||
|
fsUri,
|
||||||
|
true);
|
||||||
|
Assertions.assertThat(r.getLeft()).
|
||||||
|
describedAs("entries found in %s", r)
|
||||||
|
.isZero();
|
||||||
|
Assertions.assertThat(r.getRight()).
|
||||||
|
describedAs("entries deleted in %s", r)
|
||||||
|
.isZero();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Assert that an entry exists and is a directory.
|
||||||
|
* @param pathStr path
|
||||||
|
* @throws IOException IO failure.
|
||||||
|
*/
|
||||||
|
protected DDBPathMetadata verifyAuthDirStatus(String pathStr,
|
||||||
|
boolean authDirFlag)
|
||||||
|
throws IOException {
|
||||||
|
DDBPathMetadata md = (DDBPathMetadata) getDirectory(pathStr);
|
||||||
|
assertEquals("isAuthoritativeDir() mismatch in " + md,
|
||||||
|
authDirFlag,
|
||||||
|
md.isAuthoritativeDir());
|
||||||
|
return md;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,6 +32,7 @@ import java.util.concurrent.atomic.AtomicInteger;
|
||||||
import com.amazonaws.services.dynamodbv2.document.DynamoDB;
|
import com.amazonaws.services.dynamodbv2.document.DynamoDB;
|
||||||
import com.amazonaws.services.dynamodbv2.document.Table;
|
import com.amazonaws.services.dynamodbv2.document.Table;
|
||||||
import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughputDescription;
|
import com.amazonaws.services.dynamodbv2.model.ProvisionedThroughputDescription;
|
||||||
|
import com.amazonaws.services.dynamodbv2.xspec.ExpressionSpecBuilder;
|
||||||
import org.junit.Assume;
|
import org.junit.Assume;
|
||||||
import org.junit.FixMethodOrder;
|
import org.junit.FixMethodOrder;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
@ -59,6 +60,7 @@ import org.apache.hadoop.util.DurationInfo;
|
||||||
import static com.google.common.base.Preconditions.checkNotNull;
|
import static com.google.common.base.Preconditions.checkNotNull;
|
||||||
import static org.apache.hadoop.fs.s3a.Constants.*;
|
import static org.apache.hadoop.fs.s3a.Constants.*;
|
||||||
import static org.apache.hadoop.fs.s3a.s3guard.MetadataStoreTestBase.basicFileStatus;
|
import static org.apache.hadoop.fs.s3a.s3guard.MetadataStoreTestBase.basicFileStatus;
|
||||||
|
import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.PARENT;
|
||||||
import static org.junit.Assume.*;
|
import static org.junit.Assume.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -172,6 +174,21 @@ public class ITestDynamoDBMetadataStoreScale
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void teardown() throws Exception {
|
public void teardown() throws Exception {
|
||||||
|
if (ddbms != null) {
|
||||||
|
S3GuardTableAccess tableAccess = new S3GuardTableAccess(ddbms);
|
||||||
|
ExpressionSpecBuilder builder = new ExpressionSpecBuilder();
|
||||||
|
builder.withCondition(
|
||||||
|
ExpressionSpecBuilder.S(PARENT).beginsWith("/test/"));
|
||||||
|
|
||||||
|
Iterable<DDBPathMetadata> entries = tableAccess.scanMetadata(builder);
|
||||||
|
List<Path> list = new ArrayList<>();
|
||||||
|
entries.iterator().forEachRemaining(e -> {
|
||||||
|
Path p = e.getFileStatus().getPath();
|
||||||
|
LOG.info("Deleting {}", p);
|
||||||
|
list.add(p);
|
||||||
|
});
|
||||||
|
tableAccess.delete(list);
|
||||||
|
}
|
||||||
IOUtils.cleanupWithLogger(LOG, ddbms);
|
IOUtils.cleanupWithLogger(LOG, ddbms);
|
||||||
super.teardown();
|
super.teardown();
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,269 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.fs.s3a.s3guard;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.net.URI;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
|
import org.assertj.core.api.Assertions;
|
||||||
|
import org.junit.FixMethodOrder;
|
||||||
|
import org.junit.Ignore;
|
||||||
|
import org.junit.Test;
|
||||||
|
import org.junit.runners.MethodSorters;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.apache.hadoop.fs.contract.ContractTestUtils;
|
||||||
|
import org.apache.hadoop.fs.s3a.AbstractS3ATestBase;
|
||||||
|
import org.apache.hadoop.fs.s3a.S3AFileSystem;
|
||||||
|
import org.apache.hadoop.fs.s3a.S3ATestUtils;
|
||||||
|
import org.apache.hadoop.fs.s3a.impl.StoreContext;
|
||||||
|
|
||||||
|
import static com.google.common.base.Preconditions.checkNotNull;
|
||||||
|
import static org.apache.hadoop.fs.s3a.Constants.ENABLE_MULTI_DELETE;
|
||||||
|
import static org.apache.hadoop.fs.s3a.Constants.S3GUARD_DDB_BACKGROUND_SLEEP_MSEC_KEY;
|
||||||
|
import static org.apache.hadoop.fs.s3a.S3ATestUtils.assume;
|
||||||
|
import static org.apache.hadoop.fs.s3a.S3ATestUtils.getTestBucketName;
|
||||||
|
import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBucketOverrides;
|
||||||
|
import static org.apache.hadoop.fs.s3a.S3AUtils.applyLocatedFiles;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This test run against the root of the FS, and operations which span the DDB
|
||||||
|
* table and the filesystem.
|
||||||
|
* For this reason, these tests are executed in the sequential phase of the
|
||||||
|
* integration tests.
|
||||||
|
* <p>
|
||||||
|
* The tests only run if DynamoDB is the metastore.
|
||||||
|
*/
|
||||||
|
@FixMethodOrder(MethodSorters.NAME_ASCENDING)
|
||||||
|
public class ITestS3GuardDDBRootOperations extends AbstractS3ATestBase {
|
||||||
|
|
||||||
|
private StoreContext storeContext;
|
||||||
|
|
||||||
|
private String fsUriStr;
|
||||||
|
|
||||||
|
private DynamoDBMetadataStore metastore;
|
||||||
|
|
||||||
|
private String metastoreUriStr;
|
||||||
|
|
||||||
|
// this is a switch you can change in your IDE to enable
|
||||||
|
// or disable those tests which clean up the metastore.
|
||||||
|
private final boolean cleaning = true;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The test timeout is increased in case previous tests have created
|
||||||
|
* many tombstone markers which now need to be purged.
|
||||||
|
* @return the test timeout.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
protected int getTestTimeoutMillis() {
|
||||||
|
return SCALE_TEST_TIMEOUT_SECONDS * 1000;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Configuration createConfiguration() {
|
||||||
|
Configuration conf = super.createConfiguration();
|
||||||
|
String bucketName = getTestBucketName(conf);
|
||||||
|
|
||||||
|
// set a sleep time of 0 on pruning, for speedier test runs.
|
||||||
|
removeBucketOverrides(bucketName, conf, ENABLE_MULTI_DELETE);
|
||||||
|
conf.setTimeDuration(
|
||||||
|
S3GUARD_DDB_BACKGROUND_SLEEP_MSEC_KEY,
|
||||||
|
0,
|
||||||
|
TimeUnit.MILLISECONDS);
|
||||||
|
return conf;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setup() throws Exception {
|
||||||
|
super.setup();
|
||||||
|
S3AFileSystem fs = getFileSystem();
|
||||||
|
Configuration conf = fs.getConf();
|
||||||
|
S3ATestUtils.assumeS3GuardState(true, conf);
|
||||||
|
storeContext = fs.createStoreContext();
|
||||||
|
assume("Filesystem isn't running DDB",
|
||||||
|
storeContext.getMetadataStore() instanceof DynamoDBMetadataStore);
|
||||||
|
metastore = (DynamoDBMetadataStore) storeContext.getMetadataStore();
|
||||||
|
URI fsURI = storeContext.getFsURI();
|
||||||
|
fsUriStr = fsURI.toString();
|
||||||
|
if (!fsUriStr.endsWith("/")) {
|
||||||
|
fsUriStr = fsUriStr + "/";
|
||||||
|
}
|
||||||
|
metastoreUriStr = "dynamodb://" + metastore.getTableName() + "/";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void teardown() throws Exception {
|
||||||
|
Thread.currentThread().setName("teardown");
|
||||||
|
super.teardown();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void assumeCleaningOperation() {
|
||||||
|
assume("Cleaning operation skipped", cleaning);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@Ignore
|
||||||
|
public void test_050_dump_metastore() throws Throwable {
|
||||||
|
File destFile = calculateDumpFileBase();
|
||||||
|
describe("Dumping S3Guard store under %s", destFile);
|
||||||
|
DumpS3GuardDynamoTable.dumpStore(
|
||||||
|
null,
|
||||||
|
metastore,
|
||||||
|
getConfiguration(),
|
||||||
|
destFile,
|
||||||
|
getFileSystem().getUri());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test_060_dump_metastore_and_s3() throws Throwable {
|
||||||
|
File destFile = calculateDumpFileBase();
|
||||||
|
describe("Dumping S3Guard store under %s", destFile);
|
||||||
|
DumpS3GuardDynamoTable.dumpStore(
|
||||||
|
getFileSystem(),
|
||||||
|
metastore,
|
||||||
|
getConfiguration(),
|
||||||
|
destFile,
|
||||||
|
getFileSystem().getUri());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test_100_FilesystemPrune() throws Throwable {
|
||||||
|
describe("Execute prune against a filesystem URI");
|
||||||
|
assumeCleaningOperation();
|
||||||
|
S3AFileSystem fs = getFileSystem();
|
||||||
|
Configuration conf = fs.getConf();
|
||||||
|
int result = S3GuardTool.run(conf,
|
||||||
|
S3GuardTool.Prune.NAME,
|
||||||
|
fsUriStr);
|
||||||
|
Assertions.assertThat(result)
|
||||||
|
.describedAs("Result of prune %s", fsUriStr)
|
||||||
|
.isEqualTo(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test_200_MetastorePruneTombstones() throws Throwable {
|
||||||
|
describe("Execute prune against a dynamo URL");
|
||||||
|
assumeCleaningOperation();
|
||||||
|
S3AFileSystem fs = getFileSystem();
|
||||||
|
Configuration conf = fs.getConf();
|
||||||
|
int result = S3GuardTool.run(conf,
|
||||||
|
S3GuardTool.Prune.NAME,
|
||||||
|
"-tombstone",
|
||||||
|
"-meta", checkNotNull(metastoreUriStr),
|
||||||
|
"-seconds", "1",
|
||||||
|
fs.qualify(new Path("/")).toString());
|
||||||
|
Assertions.assertThat(result)
|
||||||
|
.describedAs("Result of prune %s", fsUriStr)
|
||||||
|
.isEqualTo(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test_300_MetastorePrune() throws Throwable {
|
||||||
|
describe("Execute prune against a dynamo URL");
|
||||||
|
assumeCleaningOperation();
|
||||||
|
S3AFileSystem fs = getFileSystem();
|
||||||
|
Configuration conf = fs.getConf();
|
||||||
|
int result = S3GuardTool.run(conf,
|
||||||
|
S3GuardTool.Prune.NAME,
|
||||||
|
"-meta", checkNotNull(metastoreUriStr),
|
||||||
|
"-seconds", "1");
|
||||||
|
Assertions.assertThat(result)
|
||||||
|
.describedAs("Result of prune %s", fsUriStr)
|
||||||
|
.isEqualTo(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void test_400_rm_root_recursive() throws Throwable {
|
||||||
|
describe("Remove the root directory");
|
||||||
|
assumeCleaningOperation();
|
||||||
|
S3AFileSystem fs = getFileSystem();
|
||||||
|
Path root = new Path("/");
|
||||||
|
Path file = new Path("/test_400_rm_root_recursive-01");
|
||||||
|
Path file2 = new Path("/test_400_rm_root_recursive-02");
|
||||||
|
// recursive treewalk to delete all files
|
||||||
|
// does not delete directories.
|
||||||
|
applyLocatedFiles(fs.listFilesAndEmptyDirectories(root, true),
|
||||||
|
f -> {
|
||||||
|
Path p = f.getPath();
|
||||||
|
fs.delete(p, true);
|
||||||
|
assertPathDoesNotExist("expected file to be deleted", p);
|
||||||
|
});
|
||||||
|
ContractTestUtils.deleteChildren(fs, root, true);
|
||||||
|
// everything must be done by now
|
||||||
|
StringBuffer sb = new StringBuffer();
|
||||||
|
AtomicInteger foundFile = new AtomicInteger(0);
|
||||||
|
applyLocatedFiles(fs.listFilesAndEmptyDirectories(root, true),
|
||||||
|
f -> {
|
||||||
|
foundFile.addAndGet(1);
|
||||||
|
Path p = f.getPath();
|
||||||
|
sb.append(f.isDirectory()
|
||||||
|
? "Dir "
|
||||||
|
: "File ")
|
||||||
|
.append(p);
|
||||||
|
if (!f.isDirectory()) {
|
||||||
|
sb.append("[").append(f.getLen()).append("]");
|
||||||
|
}
|
||||||
|
|
||||||
|
fs.delete(p, true);
|
||||||
|
});
|
||||||
|
|
||||||
|
assertEquals("Remaining files " + sb,
|
||||||
|
0, foundFile.get());
|
||||||
|
try {
|
||||||
|
ContractTestUtils.touch(fs, file);
|
||||||
|
assertDeleted(file, false);
|
||||||
|
|
||||||
|
|
||||||
|
assertTrue("Root directory delete failed",
|
||||||
|
fs.delete(root, true));
|
||||||
|
|
||||||
|
ContractTestUtils.touch(fs, file2);
|
||||||
|
assertFalse("Root directory delete should have failed",
|
||||||
|
fs.delete(root, true));
|
||||||
|
} finally {
|
||||||
|
fs.delete(file, false);
|
||||||
|
fs.delete(file2, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
@Ignore
|
||||||
|
public void test_600_dump_metastore() throws Throwable {
|
||||||
|
File destFile = calculateDumpFileBase();
|
||||||
|
describe("Dumping S3Guard store under %s", destFile);
|
||||||
|
DumpS3GuardDynamoTable.dumpStore(
|
||||||
|
getFileSystem(),
|
||||||
|
metastore,
|
||||||
|
getConfiguration(),
|
||||||
|
destFile,
|
||||||
|
getFileSystem().getUri());
|
||||||
|
}
|
||||||
|
|
||||||
|
protected File calculateDumpFileBase() {
|
||||||
|
String target = System.getProperty("test.build.dir", "target");
|
||||||
|
File buildDir = new File(target,
|
||||||
|
this.getClass().getSimpleName()).getAbsoluteFile();
|
||||||
|
buildDir.mkdirs();
|
||||||
|
return new File(buildDir, getMethodName());
|
||||||
|
}
|
||||||
|
}
|
|
@ -34,6 +34,7 @@ import com.amazonaws.services.dynamodbv2.model.ResourceNotFoundException;
|
||||||
import com.amazonaws.services.dynamodbv2.model.Tag;
|
import com.amazonaws.services.dynamodbv2.model.Tag;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Assume;
|
import org.junit.Assume;
|
||||||
|
import org.junit.AssumptionViolatedException;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
@ -60,10 +61,18 @@ public class ITestS3GuardToolDynamoDB extends AbstractS3GuardToolTestBase {
|
||||||
@Override
|
@Override
|
||||||
public void setup() throws Exception {
|
public void setup() throws Exception {
|
||||||
super.setup();
|
super.setup();
|
||||||
MetadataStore ms = getMetadataStore();
|
try {
|
||||||
Assume.assumeTrue("Test only applies when DynamoDB is used for S3Guard;"
|
getMetadataStore();
|
||||||
+ "Store is " + (ms == null ? "none" : ms.toString()),
|
} catch (ClassCastException e) {
|
||||||
ms instanceof DynamoDBMetadataStore);
|
throw new AssumptionViolatedException(
|
||||||
|
"Test only applies when DynamoDB is used for S3Guard Store",
|
||||||
|
e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected DynamoDBMetadataStore getMetadataStore() {
|
||||||
|
return (DynamoDBMetadataStore) super.getMetadataStore();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check the existence of a given DynamoDB table.
|
// Check the existence of a given DynamoDB table.
|
||||||
|
|
|
@ -120,6 +120,7 @@ public abstract class MetadataStoreTestBase extends HadoopTestBase {
|
||||||
|
|
||||||
@Before
|
@Before
|
||||||
public void setUp() throws Exception {
|
public void setUp() throws Exception {
|
||||||
|
Thread.currentThread().setName("setup");
|
||||||
LOG.debug("== Setup. ==");
|
LOG.debug("== Setup. ==");
|
||||||
contract = createContract();
|
contract = createContract();
|
||||||
ms = contract.getMetadataStore();
|
ms = contract.getMetadataStore();
|
||||||
|
@ -132,6 +133,7 @@ public abstract class MetadataStoreTestBase extends HadoopTestBase {
|
||||||
|
|
||||||
@After
|
@After
|
||||||
public void tearDown() throws Exception {
|
public void tearDown() throws Exception {
|
||||||
|
Thread.currentThread().setName("teardown");
|
||||||
LOG.debug("== Tear down. ==");
|
LOG.debug("== Tear down. ==");
|
||||||
if (ms != null) {
|
if (ms != null) {
|
||||||
try {
|
try {
|
||||||
|
@ -1050,6 +1052,11 @@ public abstract class MetadataStoreTestBase extends HadoopTestBase {
|
||||||
return checkNotNull(get(pathStr), "No metastore entry for %s", pathStr);
|
return checkNotNull(get(pathStr), "No metastore entry for %s", pathStr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Assert that either a path has no entry or that it is marked as deleted.
|
||||||
|
* @param pathStr path
|
||||||
|
* @throws IOException IO failure.
|
||||||
|
*/
|
||||||
protected void assertDeleted(String pathStr) throws IOException {
|
protected void assertDeleted(String pathStr) throws IOException {
|
||||||
PathMetadata meta = get(pathStr);
|
PathMetadata meta = get(pathStr);
|
||||||
boolean cached = meta != null && !meta.isDeleted();
|
boolean cached = meta != null && !meta.isDeleted();
|
||||||
|
@ -1073,6 +1080,39 @@ public abstract class MetadataStoreTestBase extends HadoopTestBase {
|
||||||
return meta;
|
return meta;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Assert that an entry exists and is a file.
|
||||||
|
* @param pathStr path
|
||||||
|
* @throws IOException IO failure.
|
||||||
|
*/
|
||||||
|
protected PathMetadata verifyIsFile(String pathStr) throws IOException {
|
||||||
|
PathMetadata md = verifyCached(pathStr);
|
||||||
|
assertTrue("Not a file: " + md,
|
||||||
|
md.getFileStatus().isFile());
|
||||||
|
return md;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Assert that an entry exists and is a tombstone.
|
||||||
|
* @param pathStr path
|
||||||
|
* @throws IOException IO failure.
|
||||||
|
*/
|
||||||
|
protected void assertIsTombstone(String pathStr) throws IOException {
|
||||||
|
PathMetadata meta = getNonNull(pathStr);
|
||||||
|
assertTrue(pathStr + " must be a tombstone: " + meta, meta.isDeleted());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Assert that an entry does not exist.
|
||||||
|
* @param pathStr path
|
||||||
|
* @throws IOException IO failure.
|
||||||
|
*/
|
||||||
|
protected void assertNotFound(String pathStr) throws IOException {
|
||||||
|
PathMetadata meta = get(pathStr);
|
||||||
|
assertNull("Unexpectedly found entry at path " + pathStr,
|
||||||
|
meta);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get an entry which must be a file.
|
* Get an entry which must be a file.
|
||||||
* @param pathStr path
|
* @param pathStr path
|
||||||
|
@ -1099,6 +1139,40 @@ public abstract class MetadataStoreTestBase extends HadoopTestBase {
|
||||||
return meta;
|
return meta;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get an entry which must not be marked as an empty directory:
|
||||||
|
* its empty directory field must be FALSE or UNKNOWN.
|
||||||
|
* @param pathStr path
|
||||||
|
* @return the entry
|
||||||
|
* @throws IOException IO failure.
|
||||||
|
*/
|
||||||
|
protected PathMetadata getNonEmptyDirectory(final String pathStr)
|
||||||
|
throws IOException {
|
||||||
|
PathMetadata meta = getDirectory(pathStr);
|
||||||
|
assertNotEquals("Path " + pathStr
|
||||||
|
+ " is considered an empty dir " + meta,
|
||||||
|
Tristate.TRUE,
|
||||||
|
meta.isEmptyDirectory());
|
||||||
|
return meta;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get an entry which must be an empty directory.
|
||||||
|
* its empty directory field must be TRUE.
|
||||||
|
* @param pathStr path
|
||||||
|
* @return the entry
|
||||||
|
* @throws IOException IO failure.
|
||||||
|
*/
|
||||||
|
protected PathMetadata getEmptyDirectory(final String pathStr)
|
||||||
|
throws IOException {
|
||||||
|
PathMetadata meta = getDirectory(pathStr);
|
||||||
|
assertEquals("Path " + pathStr
|
||||||
|
+ " is not considered an empty dir " + meta,
|
||||||
|
Tristate.TRUE,
|
||||||
|
meta.isEmptyDirectory());
|
||||||
|
return meta;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Convenience to create a fully qualified Path from string.
|
* Convenience to create a fully qualified Path from string.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -115,7 +115,7 @@ public class TestDynamoDBMiscOperations extends HadoopTestBase {
|
||||||
public void testAncestorStateForDir() throws Throwable {
|
public void testAncestorStateForDir() throws Throwable {
|
||||||
final DynamoDBMetadataStore.AncestorState ancestorState
|
final DynamoDBMetadataStore.AncestorState ancestorState
|
||||||
= new DynamoDBMetadataStore.AncestorState(
|
= new DynamoDBMetadataStore.AncestorState(
|
||||||
BulkOperationState.OperationType.Rename, null);
|
null, BulkOperationState.OperationType.Rename, null);
|
||||||
|
|
||||||
// path 1 is a directory
|
// path 1 is a directory
|
||||||
final Path path1 = new Path("s3a://bucket/1");
|
final Path path1 = new Path("s3a://bucket/1");
|
||||||
|
@ -143,7 +143,7 @@ public class TestDynamoDBMiscOperations extends HadoopTestBase {
|
||||||
public void testAncestorStateForFile() throws Throwable {
|
public void testAncestorStateForFile() throws Throwable {
|
||||||
final DynamoDBMetadataStore.AncestorState ancestorState
|
final DynamoDBMetadataStore.AncestorState ancestorState
|
||||||
= new DynamoDBMetadataStore.AncestorState(
|
= new DynamoDBMetadataStore.AncestorState(
|
||||||
BulkOperationState.OperationType.Rename, null);
|
null, BulkOperationState.OperationType.Rename, null);
|
||||||
|
|
||||||
// path 1 is a file
|
// path 1 is a file
|
||||||
final Path path1 = new Path("s3a://bucket/1");
|
final Path path1 = new Path("s3a://bucket/1");
|
||||||
|
|
|
@ -57,6 +57,8 @@ log4j.logger.org.apache.hadoop.ipc.Server=WARN
|
||||||
#log4j.logger.org.apache.hadoop.fs.s3a.Listing=INFO
|
#log4j.logger.org.apache.hadoop.fs.s3a.Listing=INFO
|
||||||
# Log S3Guard classes
|
# Log S3Guard classes
|
||||||
#log4j.logger.org.apache.hadoop.fs.s3a.s3guard=DEBUG
|
#log4j.logger.org.apache.hadoop.fs.s3a.s3guard=DEBUG
|
||||||
|
# if set to debug, this will log the PUT/DELETE operations on a store
|
||||||
|
#log4j.logger.org.apache.hadoop.fs.s3a.s3guard.Operations=DEBUG
|
||||||
|
|
||||||
# Log Committer classes
|
# Log Committer classes
|
||||||
#log4j.logger.org.apache.hadoop.fs.s3a.commit=DEBUG
|
#log4j.logger.org.apache.hadoop.fs.s3a.commit=DEBUG
|
||||||
|
|
Loading…
Reference in New Issue