HADOOP-17227. S3A Marker Tool tuning (#2254)

Contributed by Steve Loughran.
This commit is contained in:
Steve Loughran 2020-09-04 14:58:03 +01:00 committed by GitHub
parent 696e4fe50e
commit 5346cc3263
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 393 additions and 108 deletions

View File

@ -46,6 +46,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.time.DurationFormatUtils;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
@ -758,8 +759,8 @@ public abstract class S3GuardTool extends Configured implements Tool,
*/ */
static class Destroy extends S3GuardTool { static class Destroy extends S3GuardTool {
public static final String NAME = "destroy"; public static final String NAME = "destroy";
public static final String PURPOSE = "destroy Metadata Store data " public static final String PURPOSE = "destroy the Metadata Store including its"
+ DATA_IN_S3_IS_PRESERVED; + " contents" + DATA_IN_S3_IS_PRESERVED;
private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" + private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" +
"\t" + PURPOSE + "\n\n" + "\t" + PURPOSE + "\n\n" +
"Common options:\n" + "Common options:\n" +
@ -1252,7 +1253,7 @@ public abstract class S3GuardTool extends Configured implements Tool,
@VisibleForTesting @VisibleForTesting
public static final String IS_MARKER_AWARE = public static final String IS_MARKER_AWARE =
"The S3A connector is compatible with buckets where" "\tThe S3A connector is compatible with buckets where"
+ " directory markers are not deleted"; + " directory markers are not deleted";
public BucketInfo(Configuration conf) { public BucketInfo(Configuration conf) {
@ -1328,8 +1329,9 @@ public abstract class S3GuardTool extends Configured implements Tool,
authMode = conf.getBoolean(METADATASTORE_AUTHORITATIVE, false); authMode = conf.getBoolean(METADATASTORE_AUTHORITATIVE, false);
final long ttl = conf.getTimeDuration(METADATASTORE_METADATA_TTL, final long ttl = conf.getTimeDuration(METADATASTORE_METADATA_TTL,
DEFAULT_METADATASTORE_METADATA_TTL, TimeUnit.MILLISECONDS); DEFAULT_METADATASTORE_METADATA_TTL, TimeUnit.MILLISECONDS);
println(out, "\tMetadata time to live: %s=%s milliseconds", println(out, "\tMetadata time to live: (set in %s) = %s",
METADATASTORE_METADATA_TTL, ttl); METADATASTORE_METADATA_TTL,
DurationFormatUtils.formatDurationHMS(ttl));
printStoreDiagnostics(out, store); printStoreDiagnostics(out, store);
} else { } else {
println(out, "Filesystem %s is not using S3Guard", fsUri); println(out, "Filesystem %s is not using S3Guard", fsUri);
@ -1463,10 +1465,18 @@ public abstract class S3GuardTool extends Configured implements Tool,
private void processMarkerOption(final PrintStream out, private void processMarkerOption(final PrintStream out,
final S3AFileSystem fs, final S3AFileSystem fs,
final String marker) { final String marker) {
println(out, "%nSecurity");
DirectoryPolicy markerPolicy = fs.getDirectoryMarkerPolicy(); DirectoryPolicy markerPolicy = fs.getDirectoryMarkerPolicy();
String desc = markerPolicy.describe(); String desc = markerPolicy.describe();
println(out, "%nThe directory marker policy is \"%s\"%n", desc); println(out, "\tThe directory marker policy is \"%s\"", desc);
String pols = DirectoryPolicyImpl.availablePolicies()
.stream()
.map(DirectoryPolicy.MarkerPolicy::getOptionName)
.collect(Collectors.joining(", "));
println(out, "\tAvailable Policies: %s", pols);
printOption(out, "\tAuthoritative paths",
AUTHORITATIVE_PATH, "");
DirectoryPolicy.MarkerPolicy mp = markerPolicy.getMarkerPolicy(); DirectoryPolicy.MarkerPolicy mp = markerPolicy.getMarkerPolicy();
String desiredMarker = marker == null String desiredMarker = marker == null
@ -1478,12 +1488,6 @@ public abstract class S3GuardTool extends Configured implements Tool,
// simple awareness test -provides a way to validate compatibility // simple awareness test -provides a way to validate compatibility
// on the command line // on the command line
println(out, IS_MARKER_AWARE); println(out, IS_MARKER_AWARE);
String pols = DirectoryPolicyImpl.availablePolicies()
.stream()
.map(DirectoryPolicy.MarkerPolicy::getOptionName)
.collect(Collectors.joining(", "));
println(out, "Available Policies: %s", pols);
} else { } else {
// compare with current policy // compare with current policy
if (!optionName.equalsIgnoreCase(desiredMarker)) { if (!optionName.equalsIgnoreCase(desiredMarker)) {

View File

@ -53,6 +53,7 @@ import org.apache.hadoop.fs.s3a.S3ALocatedFileStatus;
import org.apache.hadoop.fs.s3a.UnknownStoreException; import org.apache.hadoop.fs.s3a.UnknownStoreException;
import org.apache.hadoop.fs.s3a.impl.DirMarkerTracker; import org.apache.hadoop.fs.s3a.impl.DirMarkerTracker;
import org.apache.hadoop.fs.s3a.impl.DirectoryPolicy; import org.apache.hadoop.fs.s3a.impl.DirectoryPolicy;
import org.apache.hadoop.fs.s3a.impl.DirectoryPolicyImpl;
import org.apache.hadoop.fs.s3a.impl.StoreContext; import org.apache.hadoop.fs.s3a.impl.StoreContext;
import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool; import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool;
import org.apache.hadoop.fs.shell.CommandFormat; import org.apache.hadoop.fs.shell.CommandFormat;
@ -113,9 +114,14 @@ public final class MarkerTool extends S3GuardTool {
public static final String CLEAN = "-" + OPT_CLEAN; public static final String CLEAN = "-" + OPT_CLEAN;
/** /**
* Expected number of markers to find: {@value}. * Min number of markers to find: {@value}.
*/ */
public static final String OPT_EXPECTED = "expected"; public static final String OPT_MIN = "min";
/**
* Max number of markers to find: {@value}.
*/
public static final String OPT_MAX = "max";
/** /**
* Name of a file to save the list of markers to: {@value}. * Name of a file to save the list of markers to: {@value}.
@ -151,13 +157,21 @@ public final class MarkerTool extends S3GuardTool {
public static final int UNLIMITED_LISTING = 0; public static final int UNLIMITED_LISTING = 0;
/**
* Constant to use when there is no minimum number of
* markers: {@value}.
*/
public static final int UNLIMITED_MIN_MARKERS = -1;
/** /**
* Usage string: {@value}. * Usage string: {@value}.
*/ */
private static final String USAGE = MARKERS private static final String USAGE = MARKERS
+ " (-" + OPT_AUDIT + " (-" + OPT_AUDIT
+ " | -" + OPT_CLEAN + ")" + " | -" + OPT_CLEAN + ")"
+ " [-" + OPT_EXPECTED + " <count>]" + " [-" + OPT_MIN + " <count>]"
+ " [-" + OPT_MAX + " <count>]"
+ " [-" + OPT_OUT + " <filename>]" + " [-" + OPT_OUT + " <filename>]"
+ " [-" + OPT_LIMIT + " <limit>]" + " [-" + OPT_LIMIT + " <limit>]"
+ " [-" + OPT_NONAUTH + "]" + " [-" + OPT_NONAUTH + "]"
@ -195,7 +209,8 @@ public final class MarkerTool extends S3GuardTool {
VERBOSE, VERBOSE,
OPT_NONAUTH); OPT_NONAUTH);
CommandFormat format = getCommandFormat(); CommandFormat format = getCommandFormat();
format.addOptionWithValue(OPT_EXPECTED); format.addOptionWithValue(OPT_MIN);
format.addOptionWithValue(OPT_MAX);
format.addOptionWithValue(OPT_LIMIT); format.addOptionWithValue(OPT_LIMIT);
format.addOptionWithValue(OPT_OUT); format.addOptionWithValue(OPT_OUT);
} }
@ -231,8 +246,7 @@ public final class MarkerTool extends S3GuardTool {
if (parsedArgs.size() != 1) { if (parsedArgs.size() != 1) {
errorln(getUsage()); errorln(getUsage());
println(out, "Supplied arguments: [" println(out, "Supplied arguments: ["
+ parsedArgs.stream() + String.join(", ", parsedArgs)
.collect(Collectors.joining(", "))
+ "]"); + "]");
throw new ExitUtil.ExitException(EXIT_USAGE, throw new ExitUtil.ExitException(EXIT_USAGE,
String.format(E_ARGUMENTS, parsedArgs.size())); String.format(E_ARGUMENTS, parsedArgs.size()));
@ -241,12 +255,11 @@ public final class MarkerTool extends S3GuardTool {
CommandFormat command = getCommandFormat(); CommandFormat command = getCommandFormat();
verbose = command.getOpt(VERBOSE); verbose = command.getOpt(VERBOSE);
// How many markers are expected? // minimum number of markers expected
int expected = 0; int expectedMin = getOptValue(OPT_MIN, 0);
String value = command.getOptValue(OPT_EXPECTED); // max number of markers allowed
if (value != null && !value.isEmpty()) { int expectedMax = getOptValue(OPT_MAX, 0);
expected = Integer.parseInt(value);
}
// determine the action // determine the action
boolean audit = command.getOpt(OPT_AUDIT); boolean audit = command.getOpt(OPT_AUDIT);
@ -258,11 +271,7 @@ public final class MarkerTool extends S3GuardTool {
throw new ExitUtil.ExitException(EXIT_USAGE, throw new ExitUtil.ExitException(EXIT_USAGE,
"Exactly one of " + AUDIT + " and " + CLEAN); "Exactly one of " + AUDIT + " and " + CLEAN);
} }
int limit = UNLIMITED_LISTING; int limit = getOptValue(OPT_LIMIT, UNLIMITED_LISTING);
value = command.getOptValue(OPT_LIMIT);
if (value != null && !value.isEmpty()) {
limit = Integer.parseInt(value);
}
final String dir = parsedArgs.get(0); final String dir = parsedArgs.get(0);
Path path = new Path(dir); Path path = new Path(dir);
URI uri = path.toUri(); URI uri = path.toUri();
@ -271,13 +280,17 @@ public final class MarkerTool extends S3GuardTool {
path = new Path(path, "/"); path = new Path(path, "/");
} }
FileSystem fs = path.getFileSystem(getConf()); FileSystem fs = path.getFileSystem(getConf());
boolean nonAuth = command.getOpt(OPT_NONAUTH);
ScanResult result = execute( ScanResult result = execute(
fs, new ScanArgsBuilder()
path, .withSourceFS(fs)
clean, .withPath(path)
expected, .withDoPurge(clean)
limit, .withMinMarkerCount(expectedMin)
command.getOpt(OPT_NONAUTH)); .withMaxMarkerCount(expectedMax)
.withLimit(limit)
.withNonAuth(nonAuth)
.build());
if (verbose) { if (verbose) {
dumpFileSystemStatistics(out); dumpFileSystemStatistics(out);
} }
@ -300,30 +313,43 @@ public final class MarkerTool extends S3GuardTool {
IOUtils.writeLines(surplus, "\n", writer); IOUtils.writeLines(surplus, "\n", writer);
} }
} }
return result.exitCode;
return result.finish();
}
/**
* Get the value of an option, or the default if the option
* is unset/empty.
* @param option option key
* @param defVal default
* @return the value to use
*/
private int getOptValue(String option, int defVal) {
CommandFormat command = getCommandFormat();
String value = command.getOptValue(option);
if (value != null && !value.isEmpty()) {
try {
return Integer.parseInt(value);
} catch (NumberFormatException e) {
throw new ExitUtil.ExitException(EXIT_USAGE,
String.format("Argument for %s is not a number: %s",
option, value));
}
} else {
return defVal;
}
} }
/** /**
* Execute the scan/purge. * Execute the scan/purge.
* @param sourceFS source FS; must be or wrap an S3A FS. *
* @param path path to scan. * @param scanArgs@return scan+purge result.
* @param doPurge purge?
* @param expectedMarkerCount expected marker count
* @param limit limit of files to scan; -1 for 'unlimited'
* @param nonAuth consider only markers in nonauth paths as errors
* @return scan+purge result.
* @throws IOException failure * @throws IOException failure
*/ */
@VisibleForTesting @VisibleForTesting
ScanResult execute( ScanResult execute(final ScanArgs scanArgs)
final FileSystem sourceFS,
final Path path,
final boolean doPurge,
final int expectedMarkerCount,
final int limit,
final boolean nonAuth)
throws IOException { throws IOException {
S3AFileSystem fs = bindFilesystem(sourceFS); S3AFileSystem fs = bindFilesystem(scanArgs.getSourceFS());
// extract the callbacks needed for the rest of the work // extract the callbacks needed for the rest of the work
storeContext = fs.createStoreContext(); storeContext = fs.createStoreContext();
@ -344,6 +370,7 @@ public final class MarkerTool extends S3GuardTool {
println(out, "Authoritative path list is \"%s\"", authPath); println(out, "Authoritative path list is \"%s\"", authPath);
} }
// qualify the path // qualify the path
Path path = scanArgs.getPath();
Path target = path.makeQualified(fs.getUri(), new Path("/")); Path target = path.makeQualified(fs.getUri(), new Path("/"));
// initial safety check: does the path exist? // initial safety check: does the path exist?
try { try {
@ -360,10 +387,19 @@ public final class MarkerTool extends S3GuardTool {
} }
// the default filter policy is that all entries should be deleted // the default filter policy is that all entries should be deleted
DirectoryPolicy filterPolicy = nonAuth DirectoryPolicy filterPolicy;
? activePolicy if (scanArgs.isNonAuth()) {
: null; filterPolicy = new DirectoryPolicyImpl(
ScanResult result = scan(target, doPurge, expectedMarkerCount, limit, DirectoryPolicy.MarkerPolicy.Authoritative,
fs::allowAuthoritative);
} else {
filterPolicy = null;
}
ScanResult result = scan(target,
scanArgs.isDoPurge(),
scanArgs.getMaxMarkerCount(),
scanArgs.getMinMarkerCount(),
scanArgs.getLimit(),
filterPolicy); filterPolicy);
return result; return result;
} }
@ -378,6 +414,22 @@ public final class MarkerTool extends S3GuardTool {
*/ */
private int exitCode; private int exitCode;
/**
* Text to include if raising an exception.
*/
private String exitText = "";
/**
* Count of all markers found.
*/
private int totalMarkerCount;
/**
* Count of all markers found after excluding
* any from a [-nonauth] qualification.
*/
private int filteredMarkerCount;
/** /**
* The tracker. * The tracker.
*/ */
@ -395,6 +447,9 @@ public final class MarkerTool extends S3GuardTool {
public String toString() { public String toString() {
return "ScanResult{" + return "ScanResult{" +
"exitCode=" + exitCode + "exitCode=" + exitCode +
", exitText=" + exitText +
", totalMarkerCount=" + totalMarkerCount +
", filteredMarkerCount=" + filteredMarkerCount +
", tracker=" + tracker + ", tracker=" + tracker +
", purgeSummary=" + purgeSummary + ", purgeSummary=" + purgeSummary +
'}'; '}';
@ -414,13 +469,34 @@ public final class MarkerTool extends S3GuardTool {
public MarkerPurgeSummary getPurgeSummary() { public MarkerPurgeSummary getPurgeSummary() {
return purgeSummary; return purgeSummary;
} }
public int getTotalMarkerCount() {
return totalMarkerCount;
}
public int getFilteredMarkerCount() {
return filteredMarkerCount;
}
/**
* Throw an exception if the exit code is non-zero.
* @return 0 if everything is good.
* @throws ExitUtil.ExitException if code != 0
*/
public int finish() throws ExitUtil.ExitException {
if (exitCode != 0) {
throw new ExitUtil.ExitException(exitCode, exitText);
}
return 0;
}
} }
/** /**
* Do the scan/purge. * Do the scan/purge.
* @param path path to scan. * @param path path to scan.
* @param clean purge? * @param doPurge purge rather than just scan/audit?
* @param expectedMarkerCount expected marker count * @param minMarkerCount min marker count (ignored on purge)
* @param maxMarkerCount max marker count (ignored on purge)
* @param limit limit of files to scan; 0 for 'unlimited' * @param limit limit of files to scan; 0 for 'unlimited'
* @param filterPolicy filter policy on a nonauth scan; may be null * @param filterPolicy filter policy on a nonauth scan; may be null
* @return result. * @return result.
@ -430,8 +506,9 @@ public final class MarkerTool extends S3GuardTool {
@Retries.RetryTranslated @Retries.RetryTranslated
private ScanResult scan( private ScanResult scan(
final Path path, final Path path,
final boolean clean, final boolean doPurge,
final int expectedMarkerCount, final int minMarkerCount,
final int maxMarkerCount,
final int limit, final int limit,
final DirectoryPolicy filterPolicy) final DirectoryPolicy filterPolicy)
throws IOException, ExitUtil.ExitException { throws IOException, ExitUtil.ExitException {
@ -458,13 +535,16 @@ public final class MarkerTool extends S3GuardTool {
= tracker.getSurplusMarkers(); = tracker.getSurplusMarkers();
Map<Path, DirMarkerTracker.Marker> leafMarkers Map<Path, DirMarkerTracker.Marker> leafMarkers
= tracker.getLeafMarkers(); = tracker.getLeafMarkers();
int surplus = surplusMarkers.size(); // determine marker count
if (surplus == 0) { int markerCount = surplusMarkers.size();
result.totalMarkerCount = markerCount;
result.filteredMarkerCount = markerCount;
if (markerCount == 0) {
println(out, "No surplus directory markers were found under %s", path); println(out, "No surplus directory markers were found under %s", path);
} else { } else {
println(out, "Found %d surplus directory marker%s under %s", println(out, "Found %d surplus directory marker%s under %s",
surplus, markerCount,
suffix(surplus), suffix(markerCount),
path); path);
for (Path markers : surplusMarkers.keySet()) { for (Path markers : surplusMarkers.keySet()) {
@ -482,9 +562,9 @@ public final class MarkerTool extends S3GuardTool {
println(out, "These are required to indicate empty directories"); println(out, "These are required to indicate empty directories");
} }
if (clean) { if (doPurge) {
// clean: remove the markers, do not worry about their // clean: remove the markers, do not worry about their
// presence when reporting success/failiure // presence when reporting success/failure
int deletePageSize = storeContext.getConfiguration() int deletePageSize = storeContext.getConfiguration()
.getInt(BULK_DELETE_PAGE_SIZE, .getInt(BULK_DELETE_PAGE_SIZE,
BULK_DELETE_PAGE_SIZE_DEFAULT); BULK_DELETE_PAGE_SIZE_DEFAULT);
@ -503,28 +583,45 @@ public final class MarkerTool extends S3GuardTool {
allowed.forEach(p -> println(out, p.toString())); allowed.forEach(p -> println(out, p.toString()));
} }
// recalculate the marker size // recalculate the marker size
surplus = surplusMarkers.size(); markerCount = surplusMarkers.size();
result.filteredMarkerCount = markerCount;
} }
if (surplus > expectedMarkerCount) { if (markerCount < minMarkerCount || markerCount > maxMarkerCount) {
// failure // failure
if (expectedMarkerCount > 0) { return failScan(result, EXIT_NOT_ACCEPTABLE,
println(out, "Expected %d marker%s", expectedMarkerCount, "Marker count %d out of range "
suffix(surplus)); + "[%d - %d]",
} markerCount, minMarkerCount, maxMarkerCount);
println(out, "Surplus markers were found -failing audit");
result.exitCode = EXIT_NOT_ACCEPTABLE;
} }
} }
// now one little check for whether a limit was reached. // now one little check for whether a limit was reached.
if (!completed) { if (!completed) {
println(out, "Listing limit reached before completing the scan"); failScan(result, EXIT_INTERRUPTED,
result.exitCode = EXIT_INTERRUPTED; "Listing limit (%d) reached before completing the scan", limit);
} }
return result; return result;
} }
/**
* Fail the scan; print the formatted error and update the result.
* @param result result to update
* @param code Exit code
* @param message Error message
* @param args arguments for the error message
* @return scan result
*/
private ScanResult failScan(
ScanResult result,
int code,
String message,
Object...args) {
String text = String.format(message, args);
result.exitCode = code;
result.exitText = text;
return result;
}
/** /**
* Suffix for plurals. * Suffix for plurals.
* @param size size to generate a suffix for * @param size size to generate a suffix for
@ -587,7 +684,7 @@ public final class MarkerTool extends S3GuardTool {
* Result of a call of {@link #purgeMarkers(DirMarkerTracker, int)}; * Result of a call of {@link #purgeMarkers(DirMarkerTracker, int)};
* included in {@link ScanResult} so must share visibility. * included in {@link ScanResult} so must share visibility.
*/ */
static final class MarkerPurgeSummary { public static final class MarkerPurgeSummary {
/** Number of markers deleted. */ /** Number of markers deleted. */
private int markersDeleted; private int markersDeleted;
@ -613,14 +710,26 @@ public final class MarkerTool extends S3GuardTool {
} }
/**
* Count of markers deleted.
* @return a number, zero when prune==false.
*/
int getMarkersDeleted() { int getMarkersDeleted() {
return markersDeleted; return markersDeleted;
} }
/**
* Count of bulk delete requests issued.
* @return count of calls made to S3.
*/
int getDeleteRequests() { int getDeleteRequests() {
return deleteRequests; return deleteRequests;
} }
/**
* Total duration of delete requests.
* @return a time interval in millis.
*/
long getTotalDeleteRequestDuration() { long getTotalDeleteRequestDuration() {
return totalDeleteRequestDuration; return totalDeleteRequestDuration;
} }
@ -699,25 +808,181 @@ public final class MarkerTool extends S3GuardTool {
/** /**
* Execute the marker tool, with no checks on return codes. * Execute the marker tool, with no checks on return codes.
* *
* @param sourceFS filesystem to use * @param scanArgs set of args for the scanner.
* @param path path to scan
* @param doPurge should markers be purged
* @param expectedMarkers number of markers expected
* @param limit limit of files to scan; -1 for 'unlimited'
* @param nonAuth only use nonauth path count for failure rules
* @return the result * @return the result
*/ */
@SuppressWarnings("IOResourceOpenedButNotSafelyClosed") @SuppressWarnings("IOResourceOpenedButNotSafelyClosed")
public static MarkerTool.ScanResult execMarkerTool( public static MarkerTool.ScanResult execMarkerTool(
final FileSystem sourceFS, ScanArgs scanArgs) throws IOException {
final Path path, MarkerTool tool = new MarkerTool(scanArgs.getSourceFS().getConf());
final boolean doPurge,
final int expectedMarkers,
final int limit, boolean nonAuth) throws IOException {
MarkerTool tool = new MarkerTool(sourceFS.getConf());
tool.setVerbose(LOG.isDebugEnabled()); tool.setVerbose(LOG.isDebugEnabled());
return tool.execute(sourceFS, path, doPurge, return tool.execute(scanArgs);
expectedMarkers, limit, nonAuth); }
/**
* Arguments for the scan.
* <p></p>
* Uses a builder/argument object because too many arguments were
* being created and it was making maintenance harder.
*/
public static final class ScanArgs {
/** Source FS; must be or wrap an S3A FS. */
private final FileSystem sourceFS;
/** Path to scan. */
private final Path path;
/** Purge? */
private final boolean doPurge;
/** Min marker count (ignored on purge). */
private final int minMarkerCount;
/** Max marker count (ignored on purge). */
private final int maxMarkerCount;
/** Limit of files to scan; 0 for 'unlimited'. */
private final int limit;
/** Consider only markers in nonauth paths as errors. */
private final boolean nonAuth;
/**
* @param sourceFS source FS; must be or wrap an S3A FS.
* @param path path to scan.
* @param doPurge purge?
* @param minMarkerCount min marker count (ignored on purge)
* @param maxMarkerCount max marker count (ignored on purge)
* @param limit limit of files to scan; 0 for 'unlimited'
* @param nonAuth consider only markers in nonauth paths as errors
*/
private ScanArgs(final FileSystem sourceFS,
final Path path,
final boolean doPurge,
final int minMarkerCount,
final int maxMarkerCount,
final int limit,
final boolean nonAuth) {
this.sourceFS = sourceFS;
this.path = path;
this.doPurge = doPurge;
this.minMarkerCount = minMarkerCount;
this.maxMarkerCount = maxMarkerCount;
this.limit = limit;
this.nonAuth = nonAuth;
}
FileSystem getSourceFS() {
return sourceFS;
}
Path getPath() {
return path;
}
boolean isDoPurge() {
return doPurge;
}
int getMinMarkerCount() {
return minMarkerCount;
}
int getMaxMarkerCount() {
return maxMarkerCount;
}
int getLimit() {
return limit;
}
boolean isNonAuth() {
return nonAuth;
}
}
/**
* Builder of the scan arguments.
*/
public static final class ScanArgsBuilder {
/** Source FS; must be or wrap an S3A FS. */
private FileSystem sourceFS;
/** Path to scan. */
private Path path;
/** Purge? */
private boolean doPurge = false;
/** Min marker count (ignored on purge). */
private int minMarkerCount = 0;
/** Max marker count (ignored on purge). */
private int maxMarkerCount = 0;
/** Limit of files to scan; 0 for 'unlimited'. */
private int limit = UNLIMITED_LISTING;
/** Consider only markers in nonauth paths as errors. */
private boolean nonAuth = false;
/** Source FS; must be or wrap an S3A FS. */
public ScanArgsBuilder withSourceFS(final FileSystem source) {
this.sourceFS = source;
return this;
}
/** Path to scan. */
public ScanArgsBuilder withPath(final Path p) {
this.path = p;
return this;
}
/** Purge? */
public ScanArgsBuilder withDoPurge(final boolean d) {
this.doPurge = d;
return this;
}
/** Min marker count (ignored on purge). */
public ScanArgsBuilder withMinMarkerCount(final int min) {
this.minMarkerCount = min;
return this;
}
/** Max marker count (ignored on purge). */
public ScanArgsBuilder withMaxMarkerCount(final int max) {
this.maxMarkerCount = max;
return this;
}
/** Limit of files to scan; 0 for 'unlimited'. */
public ScanArgsBuilder withLimit(final int l) {
this.limit = l;
return this;
}
/** Consider only markers in nonauth paths as errors. */
public ScanArgsBuilder withNonAuth(final boolean b) {
this.nonAuth = b;
return this;
}
/**
* Build the actual argument instance.
* @return the arguments to pass in
*/
public ScanArgs build() {
return new ScanArgs(sourceFS,
path,
doPurge,
minMarkerCount,
maxMarkerCount,
limit,
nonAuth);
}
} }
} }

View File

@ -366,7 +366,7 @@ Syntax
``` ```
> hadoop s3guard markers -verbose -nonauth > hadoop s3guard markers -verbose -nonauth
markers (-audit | -clean) [-expected <count>] [-out <filename>] [-limit <limit>] [-nonauth] [-verbose] <PATH> markers (-audit | -clean) [-min <count>] [-max <count>] [-out <filename>] [-limit <limit>] [-nonauth] [-verbose] <PATH>
View and manipulate S3 directory markers View and manipulate S3 directory markers
``` ```
@ -377,7 +377,8 @@ markers (-audit | -clean) [-expected <count>] [-out <filename>] [-limit <limit>]
|-------------------------|-------------------------| |-------------------------|-------------------------|
| `-audit` | Audit the path for surplus markers | | `-audit` | Audit the path for surplus markers |
| `-clean` | Clean all surplus markers under a path | | `-clean` | Clean all surplus markers under a path |
| `-expected <count>]` | Expected number of markers to find (primarily for testing) | | `-min <count>` | Minimum number of markers an audit must find (default: 0) |
| `-max <count>]` | Minimum number of markers an audit must find (default: 0) |
| `-limit <count>]` | Limit the number of objects to scan | | `-limit <count>]` | Limit the number of objects to scan |
| `-nonauth` | Only consider markers in non-authoritative paths as errors | | `-nonauth` | Only consider markers in non-authoritative paths as errors |
| `-out <filename>` | Save a list of all markers found to the nominated file | | `-out <filename>` | Save a list of all markers found to the nominated file |
@ -489,7 +490,7 @@ The `markers clean` command will clean the directory tree of all surplus markers
The `-verbose` option prints more detail on the operation as well as some IO statistics The `-verbose` option prints more detail on the operation as well as some IO statistics
``` ```
> hadoop s3guard markers -verbose -clean s3a://london/ > hadoop s3guard markers -clean -verbose s3a://london/
2020-08-05 18:33:25,303 [main] INFO impl.DirectoryPolicyImpl (DirectoryPolicyImpl.java:getDirectoryPolicy(143)) - Directory markers will be kept on authoritative paths 2020-08-05 18:33:25,303 [main] INFO impl.DirectoryPolicyImpl (DirectoryPolicyImpl.java:getDirectoryPolicy(143)) - Directory markers will be kept on authoritative paths
The directory marker policy of s3a://london is "Authoritative" The directory marker policy of s3a://london is "Authoritative"

View File

@ -90,10 +90,23 @@ public abstract class AbstractS3ATestBase extends AbstractFSContractTestBase
&& !fs.getDirectoryMarkerPolicy() && !fs.getDirectoryMarkerPolicy()
.keepDirectoryMarkers(methodPath) .keepDirectoryMarkers(methodPath)
&& fs.isDirectory(methodPath)) { && fs.isDirectory(methodPath)) {
MarkerTool.ScanResult result = MarkerTool.execMarkerTool(fs, MarkerTool.ScanResult result = MarkerTool.execMarkerTool(
methodPath, true, 0, UNLIMITED_LISTING, false); new MarkerTool.ScanArgsBuilder()
assertEquals("Audit of " + methodPath + " failed: " + result, .withSourceFS(fs)
.withPath(methodPath)
.withDoPurge(true)
.withMinMarkerCount(0)
.withMaxMarkerCount(0)
.withLimit(UNLIMITED_LISTING)
.withNonAuth(false)
.build());
final String resultStr = result.toString();
assertEquals("Audit of " + methodPath + " failed: "
+ resultStr,
0, result.getExitCode()); 0, result.getExitCode());
assertEquals("Marker Count under " + methodPath
+ " non-zero: " + resultStr,
0, result.getFilteredMarkerCount());
} }
} catch (FileNotFoundException ignored) { } catch (FileNotFoundException ignored) {
} catch (Exception e) { } catch (Exception e) {

View File

@ -309,11 +309,15 @@ public class AbstractMarkerToolTest extends AbstractS3ATestBase {
final boolean nonAuth) throws IOException { final boolean nonAuth) throws IOException {
MarkerTool.ScanResult result = MarkerTool.execMarkerTool( MarkerTool.ScanResult result = MarkerTool.execMarkerTool(
sourceFS, new MarkerTool.ScanArgsBuilder()
path, .withSourceFS(sourceFS)
doPurge, .withPath(path)
expectedMarkers, .withDoPurge(doPurge)
limit, nonAuth); .withMinMarkerCount(expectedMarkers)
.withMaxMarkerCount(expectedMarkers)
.withLimit(limit)
.withNonAuth(nonAuth)
.build());
Assertions.assertThat(result.getExitCode()) Assertions.assertThat(result.getExitCode())
.describedAs("Exit code of marker(%s, %s, %d) -> %s", .describedAs("Exit code of marker(%s, %s, %d) -> %s",
path, doPurge, expectedMarkers, result) path, doPurge, expectedMarkers, result)
@ -330,5 +334,4 @@ public class AbstractMarkerToolTest extends AbstractS3ATestBase {
return "-" + s; return "-" + s;
} }
} }

View File

@ -259,7 +259,8 @@ public class ITestMarkerTool extends AbstractMarkerToolTest {
AUDIT, AUDIT,
m(OPT_LIMIT), 0, m(OPT_LIMIT), 0,
m(OPT_OUT), audit, m(OPT_OUT), audit,
m(OPT_EXPECTED), expectedMarkersWithBaseDir, m(OPT_MIN), expectedMarkersWithBaseDir,
m(OPT_MAX), expectedMarkersWithBaseDir,
createdPaths.base); createdPaths.base);
expectMarkersInOutput(audit, expectedMarkersWithBaseDir); expectMarkersInOutput(audit, expectedMarkersWithBaseDir);
} }
@ -286,9 +287,6 @@ public class ITestMarkerTool extends AbstractMarkerToolTest {
m(OPT_LIMIT), 2, m(OPT_LIMIT), 2,
CLEAN, CLEAN,
createdPaths.base); createdPaths.base);
run(MARKERS, V,
AUDIT,
createdPaths.base);
} }
/** /**
@ -302,7 +300,8 @@ public class ITestMarkerTool extends AbstractMarkerToolTest {
describe("Audit a few thousand landsat objects"); describe("Audit a few thousand landsat objects");
final File audit = tempAuditFile(); final File audit = tempAuditFile();
run(MARKERS, runToFailure(EXIT_INTERRUPTED,
MARKERS,
AUDIT, AUDIT,
m(OPT_LIMIT), 3000, m(OPT_LIMIT), 3000,
m(OPT_OUT), audit, m(OPT_OUT), audit,