HADOOP-17227. S3A Marker Tool tuning (#2254)
Contributed by Steve Loughran.
This commit is contained in:
parent
696e4fe50e
commit
5346cc3263
|
@ -46,6 +46,7 @@ import org.slf4j.Logger;
|
||||||
import org.slf4j.LoggerFactory;
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
import org.apache.commons.lang3.time.DurationFormatUtils;
|
||||||
import org.apache.hadoop.classification.InterfaceAudience;
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
import org.apache.hadoop.classification.InterfaceStability;
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
@ -758,8 +759,8 @@ public abstract class S3GuardTool extends Configured implements Tool,
|
||||||
*/
|
*/
|
||||||
static class Destroy extends S3GuardTool {
|
static class Destroy extends S3GuardTool {
|
||||||
public static final String NAME = "destroy";
|
public static final String NAME = "destroy";
|
||||||
public static final String PURPOSE = "destroy Metadata Store data "
|
public static final String PURPOSE = "destroy the Metadata Store including its"
|
||||||
+ DATA_IN_S3_IS_PRESERVED;
|
+ " contents" + DATA_IN_S3_IS_PRESERVED;
|
||||||
private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" +
|
private static final String USAGE = NAME + " [OPTIONS] [s3a://BUCKET]\n" +
|
||||||
"\t" + PURPOSE + "\n\n" +
|
"\t" + PURPOSE + "\n\n" +
|
||||||
"Common options:\n" +
|
"Common options:\n" +
|
||||||
|
@ -1252,7 +1253,7 @@ public abstract class S3GuardTool extends Configured implements Tool,
|
||||||
|
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
public static final String IS_MARKER_AWARE =
|
public static final String IS_MARKER_AWARE =
|
||||||
"The S3A connector is compatible with buckets where"
|
"\tThe S3A connector is compatible with buckets where"
|
||||||
+ " directory markers are not deleted";
|
+ " directory markers are not deleted";
|
||||||
|
|
||||||
public BucketInfo(Configuration conf) {
|
public BucketInfo(Configuration conf) {
|
||||||
|
@ -1328,8 +1329,9 @@ public abstract class S3GuardTool extends Configured implements Tool,
|
||||||
authMode = conf.getBoolean(METADATASTORE_AUTHORITATIVE, false);
|
authMode = conf.getBoolean(METADATASTORE_AUTHORITATIVE, false);
|
||||||
final long ttl = conf.getTimeDuration(METADATASTORE_METADATA_TTL,
|
final long ttl = conf.getTimeDuration(METADATASTORE_METADATA_TTL,
|
||||||
DEFAULT_METADATASTORE_METADATA_TTL, TimeUnit.MILLISECONDS);
|
DEFAULT_METADATASTORE_METADATA_TTL, TimeUnit.MILLISECONDS);
|
||||||
println(out, "\tMetadata time to live: %s=%s milliseconds",
|
println(out, "\tMetadata time to live: (set in %s) = %s",
|
||||||
METADATASTORE_METADATA_TTL, ttl);
|
METADATASTORE_METADATA_TTL,
|
||||||
|
DurationFormatUtils.formatDurationHMS(ttl));
|
||||||
printStoreDiagnostics(out, store);
|
printStoreDiagnostics(out, store);
|
||||||
} else {
|
} else {
|
||||||
println(out, "Filesystem %s is not using S3Guard", fsUri);
|
println(out, "Filesystem %s is not using S3Guard", fsUri);
|
||||||
|
@ -1463,10 +1465,18 @@ public abstract class S3GuardTool extends Configured implements Tool,
|
||||||
private void processMarkerOption(final PrintStream out,
|
private void processMarkerOption(final PrintStream out,
|
||||||
final S3AFileSystem fs,
|
final S3AFileSystem fs,
|
||||||
final String marker) {
|
final String marker) {
|
||||||
|
println(out, "%nSecurity");
|
||||||
DirectoryPolicy markerPolicy = fs.getDirectoryMarkerPolicy();
|
DirectoryPolicy markerPolicy = fs.getDirectoryMarkerPolicy();
|
||||||
String desc = markerPolicy.describe();
|
String desc = markerPolicy.describe();
|
||||||
println(out, "%nThe directory marker policy is \"%s\"%n", desc);
|
println(out, "\tThe directory marker policy is \"%s\"", desc);
|
||||||
|
|
||||||
|
String pols = DirectoryPolicyImpl.availablePolicies()
|
||||||
|
.stream()
|
||||||
|
.map(DirectoryPolicy.MarkerPolicy::getOptionName)
|
||||||
|
.collect(Collectors.joining(", "));
|
||||||
|
println(out, "\tAvailable Policies: %s", pols);
|
||||||
|
printOption(out, "\tAuthoritative paths",
|
||||||
|
AUTHORITATIVE_PATH, "");
|
||||||
DirectoryPolicy.MarkerPolicy mp = markerPolicy.getMarkerPolicy();
|
DirectoryPolicy.MarkerPolicy mp = markerPolicy.getMarkerPolicy();
|
||||||
|
|
||||||
String desiredMarker = marker == null
|
String desiredMarker = marker == null
|
||||||
|
@ -1478,12 +1488,6 @@ public abstract class S3GuardTool extends Configured implements Tool,
|
||||||
// simple awareness test -provides a way to validate compatibility
|
// simple awareness test -provides a way to validate compatibility
|
||||||
// on the command line
|
// on the command line
|
||||||
println(out, IS_MARKER_AWARE);
|
println(out, IS_MARKER_AWARE);
|
||||||
String pols = DirectoryPolicyImpl.availablePolicies()
|
|
||||||
.stream()
|
|
||||||
.map(DirectoryPolicy.MarkerPolicy::getOptionName)
|
|
||||||
.collect(Collectors.joining(", "));
|
|
||||||
println(out, "Available Policies: %s", pols);
|
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
// compare with current policy
|
// compare with current policy
|
||||||
if (!optionName.equalsIgnoreCase(desiredMarker)) {
|
if (!optionName.equalsIgnoreCase(desiredMarker)) {
|
||||||
|
|
|
@ -53,6 +53,7 @@ import org.apache.hadoop.fs.s3a.S3ALocatedFileStatus;
|
||||||
import org.apache.hadoop.fs.s3a.UnknownStoreException;
|
import org.apache.hadoop.fs.s3a.UnknownStoreException;
|
||||||
import org.apache.hadoop.fs.s3a.impl.DirMarkerTracker;
|
import org.apache.hadoop.fs.s3a.impl.DirMarkerTracker;
|
||||||
import org.apache.hadoop.fs.s3a.impl.DirectoryPolicy;
|
import org.apache.hadoop.fs.s3a.impl.DirectoryPolicy;
|
||||||
|
import org.apache.hadoop.fs.s3a.impl.DirectoryPolicyImpl;
|
||||||
import org.apache.hadoop.fs.s3a.impl.StoreContext;
|
import org.apache.hadoop.fs.s3a.impl.StoreContext;
|
||||||
import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool;
|
import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool;
|
||||||
import org.apache.hadoop.fs.shell.CommandFormat;
|
import org.apache.hadoop.fs.shell.CommandFormat;
|
||||||
|
@ -113,9 +114,14 @@ public final class MarkerTool extends S3GuardTool {
|
||||||
public static final String CLEAN = "-" + OPT_CLEAN;
|
public static final String CLEAN = "-" + OPT_CLEAN;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Expected number of markers to find: {@value}.
|
* Min number of markers to find: {@value}.
|
||||||
*/
|
*/
|
||||||
public static final String OPT_EXPECTED = "expected";
|
public static final String OPT_MIN = "min";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Max number of markers to find: {@value}.
|
||||||
|
*/
|
||||||
|
public static final String OPT_MAX = "max";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Name of a file to save the list of markers to: {@value}.
|
* Name of a file to save the list of markers to: {@value}.
|
||||||
|
@ -151,13 +157,21 @@ public final class MarkerTool extends S3GuardTool {
|
||||||
public static final int UNLIMITED_LISTING = 0;
|
public static final int UNLIMITED_LISTING = 0;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constant to use when there is no minimum number of
|
||||||
|
* markers: {@value}.
|
||||||
|
*/
|
||||||
|
public static final int UNLIMITED_MIN_MARKERS = -1;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Usage string: {@value}.
|
* Usage string: {@value}.
|
||||||
*/
|
*/
|
||||||
private static final String USAGE = MARKERS
|
private static final String USAGE = MARKERS
|
||||||
+ " (-" + OPT_AUDIT
|
+ " (-" + OPT_AUDIT
|
||||||
+ " | -" + OPT_CLEAN + ")"
|
+ " | -" + OPT_CLEAN + ")"
|
||||||
+ " [-" + OPT_EXPECTED + " <count>]"
|
+ " [-" + OPT_MIN + " <count>]"
|
||||||
|
+ " [-" + OPT_MAX + " <count>]"
|
||||||
+ " [-" + OPT_OUT + " <filename>]"
|
+ " [-" + OPT_OUT + " <filename>]"
|
||||||
+ " [-" + OPT_LIMIT + " <limit>]"
|
+ " [-" + OPT_LIMIT + " <limit>]"
|
||||||
+ " [-" + OPT_NONAUTH + "]"
|
+ " [-" + OPT_NONAUTH + "]"
|
||||||
|
@ -195,7 +209,8 @@ public final class MarkerTool extends S3GuardTool {
|
||||||
VERBOSE,
|
VERBOSE,
|
||||||
OPT_NONAUTH);
|
OPT_NONAUTH);
|
||||||
CommandFormat format = getCommandFormat();
|
CommandFormat format = getCommandFormat();
|
||||||
format.addOptionWithValue(OPT_EXPECTED);
|
format.addOptionWithValue(OPT_MIN);
|
||||||
|
format.addOptionWithValue(OPT_MAX);
|
||||||
format.addOptionWithValue(OPT_LIMIT);
|
format.addOptionWithValue(OPT_LIMIT);
|
||||||
format.addOptionWithValue(OPT_OUT);
|
format.addOptionWithValue(OPT_OUT);
|
||||||
}
|
}
|
||||||
|
@ -231,8 +246,7 @@ public final class MarkerTool extends S3GuardTool {
|
||||||
if (parsedArgs.size() != 1) {
|
if (parsedArgs.size() != 1) {
|
||||||
errorln(getUsage());
|
errorln(getUsage());
|
||||||
println(out, "Supplied arguments: ["
|
println(out, "Supplied arguments: ["
|
||||||
+ parsedArgs.stream()
|
+ String.join(", ", parsedArgs)
|
||||||
.collect(Collectors.joining(", "))
|
|
||||||
+ "]");
|
+ "]");
|
||||||
throw new ExitUtil.ExitException(EXIT_USAGE,
|
throw new ExitUtil.ExitException(EXIT_USAGE,
|
||||||
String.format(E_ARGUMENTS, parsedArgs.size()));
|
String.format(E_ARGUMENTS, parsedArgs.size()));
|
||||||
|
@ -241,12 +255,11 @@ public final class MarkerTool extends S3GuardTool {
|
||||||
CommandFormat command = getCommandFormat();
|
CommandFormat command = getCommandFormat();
|
||||||
verbose = command.getOpt(VERBOSE);
|
verbose = command.getOpt(VERBOSE);
|
||||||
|
|
||||||
// How many markers are expected?
|
// minimum number of markers expected
|
||||||
int expected = 0;
|
int expectedMin = getOptValue(OPT_MIN, 0);
|
||||||
String value = command.getOptValue(OPT_EXPECTED);
|
// max number of markers allowed
|
||||||
if (value != null && !value.isEmpty()) {
|
int expectedMax = getOptValue(OPT_MAX, 0);
|
||||||
expected = Integer.parseInt(value);
|
|
||||||
}
|
|
||||||
|
|
||||||
// determine the action
|
// determine the action
|
||||||
boolean audit = command.getOpt(OPT_AUDIT);
|
boolean audit = command.getOpt(OPT_AUDIT);
|
||||||
|
@ -258,11 +271,7 @@ public final class MarkerTool extends S3GuardTool {
|
||||||
throw new ExitUtil.ExitException(EXIT_USAGE,
|
throw new ExitUtil.ExitException(EXIT_USAGE,
|
||||||
"Exactly one of " + AUDIT + " and " + CLEAN);
|
"Exactly one of " + AUDIT + " and " + CLEAN);
|
||||||
}
|
}
|
||||||
int limit = UNLIMITED_LISTING;
|
int limit = getOptValue(OPT_LIMIT, UNLIMITED_LISTING);
|
||||||
value = command.getOptValue(OPT_LIMIT);
|
|
||||||
if (value != null && !value.isEmpty()) {
|
|
||||||
limit = Integer.parseInt(value);
|
|
||||||
}
|
|
||||||
final String dir = parsedArgs.get(0);
|
final String dir = parsedArgs.get(0);
|
||||||
Path path = new Path(dir);
|
Path path = new Path(dir);
|
||||||
URI uri = path.toUri();
|
URI uri = path.toUri();
|
||||||
|
@ -271,13 +280,17 @@ public final class MarkerTool extends S3GuardTool {
|
||||||
path = new Path(path, "/");
|
path = new Path(path, "/");
|
||||||
}
|
}
|
||||||
FileSystem fs = path.getFileSystem(getConf());
|
FileSystem fs = path.getFileSystem(getConf());
|
||||||
|
boolean nonAuth = command.getOpt(OPT_NONAUTH);
|
||||||
ScanResult result = execute(
|
ScanResult result = execute(
|
||||||
fs,
|
new ScanArgsBuilder()
|
||||||
path,
|
.withSourceFS(fs)
|
||||||
clean,
|
.withPath(path)
|
||||||
expected,
|
.withDoPurge(clean)
|
||||||
limit,
|
.withMinMarkerCount(expectedMin)
|
||||||
command.getOpt(OPT_NONAUTH));
|
.withMaxMarkerCount(expectedMax)
|
||||||
|
.withLimit(limit)
|
||||||
|
.withNonAuth(nonAuth)
|
||||||
|
.build());
|
||||||
if (verbose) {
|
if (verbose) {
|
||||||
dumpFileSystemStatistics(out);
|
dumpFileSystemStatistics(out);
|
||||||
}
|
}
|
||||||
|
@ -300,30 +313,43 @@ public final class MarkerTool extends S3GuardTool {
|
||||||
IOUtils.writeLines(surplus, "\n", writer);
|
IOUtils.writeLines(surplus, "\n", writer);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return result.exitCode;
|
|
||||||
|
return result.finish();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the value of an option, or the default if the option
|
||||||
|
* is unset/empty.
|
||||||
|
* @param option option key
|
||||||
|
* @param defVal default
|
||||||
|
* @return the value to use
|
||||||
|
*/
|
||||||
|
private int getOptValue(String option, int defVal) {
|
||||||
|
CommandFormat command = getCommandFormat();
|
||||||
|
String value = command.getOptValue(option);
|
||||||
|
if (value != null && !value.isEmpty()) {
|
||||||
|
try {
|
||||||
|
return Integer.parseInt(value);
|
||||||
|
} catch (NumberFormatException e) {
|
||||||
|
throw new ExitUtil.ExitException(EXIT_USAGE,
|
||||||
|
String.format("Argument for %s is not a number: %s",
|
||||||
|
option, value));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return defVal;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Execute the scan/purge.
|
* Execute the scan/purge.
|
||||||
* @param sourceFS source FS; must be or wrap an S3A FS.
|
*
|
||||||
* @param path path to scan.
|
* @param scanArgs@return scan+purge result.
|
||||||
* @param doPurge purge?
|
|
||||||
* @param expectedMarkerCount expected marker count
|
|
||||||
* @param limit limit of files to scan; -1 for 'unlimited'
|
|
||||||
* @param nonAuth consider only markers in nonauth paths as errors
|
|
||||||
* @return scan+purge result.
|
|
||||||
* @throws IOException failure
|
* @throws IOException failure
|
||||||
*/
|
*/
|
||||||
@VisibleForTesting
|
@VisibleForTesting
|
||||||
ScanResult execute(
|
ScanResult execute(final ScanArgs scanArgs)
|
||||||
final FileSystem sourceFS,
|
|
||||||
final Path path,
|
|
||||||
final boolean doPurge,
|
|
||||||
final int expectedMarkerCount,
|
|
||||||
final int limit,
|
|
||||||
final boolean nonAuth)
|
|
||||||
throws IOException {
|
throws IOException {
|
||||||
S3AFileSystem fs = bindFilesystem(sourceFS);
|
S3AFileSystem fs = bindFilesystem(scanArgs.getSourceFS());
|
||||||
|
|
||||||
// extract the callbacks needed for the rest of the work
|
// extract the callbacks needed for the rest of the work
|
||||||
storeContext = fs.createStoreContext();
|
storeContext = fs.createStoreContext();
|
||||||
|
@ -344,6 +370,7 @@ public final class MarkerTool extends S3GuardTool {
|
||||||
println(out, "Authoritative path list is \"%s\"", authPath);
|
println(out, "Authoritative path list is \"%s\"", authPath);
|
||||||
}
|
}
|
||||||
// qualify the path
|
// qualify the path
|
||||||
|
Path path = scanArgs.getPath();
|
||||||
Path target = path.makeQualified(fs.getUri(), new Path("/"));
|
Path target = path.makeQualified(fs.getUri(), new Path("/"));
|
||||||
// initial safety check: does the path exist?
|
// initial safety check: does the path exist?
|
||||||
try {
|
try {
|
||||||
|
@ -360,10 +387,19 @@ public final class MarkerTool extends S3GuardTool {
|
||||||
}
|
}
|
||||||
|
|
||||||
// the default filter policy is that all entries should be deleted
|
// the default filter policy is that all entries should be deleted
|
||||||
DirectoryPolicy filterPolicy = nonAuth
|
DirectoryPolicy filterPolicy;
|
||||||
? activePolicy
|
if (scanArgs.isNonAuth()) {
|
||||||
: null;
|
filterPolicy = new DirectoryPolicyImpl(
|
||||||
ScanResult result = scan(target, doPurge, expectedMarkerCount, limit,
|
DirectoryPolicy.MarkerPolicy.Authoritative,
|
||||||
|
fs::allowAuthoritative);
|
||||||
|
} else {
|
||||||
|
filterPolicy = null;
|
||||||
|
}
|
||||||
|
ScanResult result = scan(target,
|
||||||
|
scanArgs.isDoPurge(),
|
||||||
|
scanArgs.getMaxMarkerCount(),
|
||||||
|
scanArgs.getMinMarkerCount(),
|
||||||
|
scanArgs.getLimit(),
|
||||||
filterPolicy);
|
filterPolicy);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -378,6 +414,22 @@ public final class MarkerTool extends S3GuardTool {
|
||||||
*/
|
*/
|
||||||
private int exitCode;
|
private int exitCode;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Text to include if raising an exception.
|
||||||
|
*/
|
||||||
|
private String exitText = "";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Count of all markers found.
|
||||||
|
*/
|
||||||
|
private int totalMarkerCount;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Count of all markers found after excluding
|
||||||
|
* any from a [-nonauth] qualification.
|
||||||
|
*/
|
||||||
|
private int filteredMarkerCount;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The tracker.
|
* The tracker.
|
||||||
*/
|
*/
|
||||||
|
@ -395,6 +447,9 @@ public final class MarkerTool extends S3GuardTool {
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "ScanResult{" +
|
return "ScanResult{" +
|
||||||
"exitCode=" + exitCode +
|
"exitCode=" + exitCode +
|
||||||
|
", exitText=" + exitText +
|
||||||
|
", totalMarkerCount=" + totalMarkerCount +
|
||||||
|
", filteredMarkerCount=" + filteredMarkerCount +
|
||||||
", tracker=" + tracker +
|
", tracker=" + tracker +
|
||||||
", purgeSummary=" + purgeSummary +
|
", purgeSummary=" + purgeSummary +
|
||||||
'}';
|
'}';
|
||||||
|
@ -414,13 +469,34 @@ public final class MarkerTool extends S3GuardTool {
|
||||||
public MarkerPurgeSummary getPurgeSummary() {
|
public MarkerPurgeSummary getPurgeSummary() {
|
||||||
return purgeSummary;
|
return purgeSummary;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public int getTotalMarkerCount() {
|
||||||
|
return totalMarkerCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getFilteredMarkerCount() {
|
||||||
|
return filteredMarkerCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Throw an exception if the exit code is non-zero.
|
||||||
|
* @return 0 if everything is good.
|
||||||
|
* @throws ExitUtil.ExitException if code != 0
|
||||||
|
*/
|
||||||
|
public int finish() throws ExitUtil.ExitException {
|
||||||
|
if (exitCode != 0) {
|
||||||
|
throw new ExitUtil.ExitException(exitCode, exitText);
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Do the scan/purge.
|
* Do the scan/purge.
|
||||||
* @param path path to scan.
|
* @param path path to scan.
|
||||||
* @param clean purge?
|
* @param doPurge purge rather than just scan/audit?
|
||||||
* @param expectedMarkerCount expected marker count
|
* @param minMarkerCount min marker count (ignored on purge)
|
||||||
|
* @param maxMarkerCount max marker count (ignored on purge)
|
||||||
* @param limit limit of files to scan; 0 for 'unlimited'
|
* @param limit limit of files to scan; 0 for 'unlimited'
|
||||||
* @param filterPolicy filter policy on a nonauth scan; may be null
|
* @param filterPolicy filter policy on a nonauth scan; may be null
|
||||||
* @return result.
|
* @return result.
|
||||||
|
@ -430,8 +506,9 @@ public final class MarkerTool extends S3GuardTool {
|
||||||
@Retries.RetryTranslated
|
@Retries.RetryTranslated
|
||||||
private ScanResult scan(
|
private ScanResult scan(
|
||||||
final Path path,
|
final Path path,
|
||||||
final boolean clean,
|
final boolean doPurge,
|
||||||
final int expectedMarkerCount,
|
final int minMarkerCount,
|
||||||
|
final int maxMarkerCount,
|
||||||
final int limit,
|
final int limit,
|
||||||
final DirectoryPolicy filterPolicy)
|
final DirectoryPolicy filterPolicy)
|
||||||
throws IOException, ExitUtil.ExitException {
|
throws IOException, ExitUtil.ExitException {
|
||||||
|
@ -458,13 +535,16 @@ public final class MarkerTool extends S3GuardTool {
|
||||||
= tracker.getSurplusMarkers();
|
= tracker.getSurplusMarkers();
|
||||||
Map<Path, DirMarkerTracker.Marker> leafMarkers
|
Map<Path, DirMarkerTracker.Marker> leafMarkers
|
||||||
= tracker.getLeafMarkers();
|
= tracker.getLeafMarkers();
|
||||||
int surplus = surplusMarkers.size();
|
// determine marker count
|
||||||
if (surplus == 0) {
|
int markerCount = surplusMarkers.size();
|
||||||
|
result.totalMarkerCount = markerCount;
|
||||||
|
result.filteredMarkerCount = markerCount;
|
||||||
|
if (markerCount == 0) {
|
||||||
println(out, "No surplus directory markers were found under %s", path);
|
println(out, "No surplus directory markers were found under %s", path);
|
||||||
} else {
|
} else {
|
||||||
println(out, "Found %d surplus directory marker%s under %s",
|
println(out, "Found %d surplus directory marker%s under %s",
|
||||||
surplus,
|
markerCount,
|
||||||
suffix(surplus),
|
suffix(markerCount),
|
||||||
path);
|
path);
|
||||||
|
|
||||||
for (Path markers : surplusMarkers.keySet()) {
|
for (Path markers : surplusMarkers.keySet()) {
|
||||||
|
@ -482,9 +562,9 @@ public final class MarkerTool extends S3GuardTool {
|
||||||
println(out, "These are required to indicate empty directories");
|
println(out, "These are required to indicate empty directories");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (clean) {
|
if (doPurge) {
|
||||||
// clean: remove the markers, do not worry about their
|
// clean: remove the markers, do not worry about their
|
||||||
// presence when reporting success/failiure
|
// presence when reporting success/failure
|
||||||
int deletePageSize = storeContext.getConfiguration()
|
int deletePageSize = storeContext.getConfiguration()
|
||||||
.getInt(BULK_DELETE_PAGE_SIZE,
|
.getInt(BULK_DELETE_PAGE_SIZE,
|
||||||
BULK_DELETE_PAGE_SIZE_DEFAULT);
|
BULK_DELETE_PAGE_SIZE_DEFAULT);
|
||||||
|
@ -503,28 +583,45 @@ public final class MarkerTool extends S3GuardTool {
|
||||||
allowed.forEach(p -> println(out, p.toString()));
|
allowed.forEach(p -> println(out, p.toString()));
|
||||||
}
|
}
|
||||||
// recalculate the marker size
|
// recalculate the marker size
|
||||||
surplus = surplusMarkers.size();
|
markerCount = surplusMarkers.size();
|
||||||
|
result.filteredMarkerCount = markerCount;
|
||||||
}
|
}
|
||||||
if (surplus > expectedMarkerCount) {
|
if (markerCount < minMarkerCount || markerCount > maxMarkerCount) {
|
||||||
// failure
|
// failure
|
||||||
if (expectedMarkerCount > 0) {
|
return failScan(result, EXIT_NOT_ACCEPTABLE,
|
||||||
println(out, "Expected %d marker%s", expectedMarkerCount,
|
"Marker count %d out of range "
|
||||||
suffix(surplus));
|
+ "[%d - %d]",
|
||||||
}
|
markerCount, minMarkerCount, maxMarkerCount);
|
||||||
println(out, "Surplus markers were found -failing audit");
|
|
||||||
|
|
||||||
result.exitCode = EXIT_NOT_ACCEPTABLE;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// now one little check for whether a limit was reached.
|
// now one little check for whether a limit was reached.
|
||||||
if (!completed) {
|
if (!completed) {
|
||||||
println(out, "Listing limit reached before completing the scan");
|
failScan(result, EXIT_INTERRUPTED,
|
||||||
result.exitCode = EXIT_INTERRUPTED;
|
"Listing limit (%d) reached before completing the scan", limit);
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fail the scan; print the formatted error and update the result.
|
||||||
|
* @param result result to update
|
||||||
|
* @param code Exit code
|
||||||
|
* @param message Error message
|
||||||
|
* @param args arguments for the error message
|
||||||
|
* @return scan result
|
||||||
|
*/
|
||||||
|
private ScanResult failScan(
|
||||||
|
ScanResult result,
|
||||||
|
int code,
|
||||||
|
String message,
|
||||||
|
Object...args) {
|
||||||
|
String text = String.format(message, args);
|
||||||
|
result.exitCode = code;
|
||||||
|
result.exitText = text;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Suffix for plurals.
|
* Suffix for plurals.
|
||||||
* @param size size to generate a suffix for
|
* @param size size to generate a suffix for
|
||||||
|
@ -587,7 +684,7 @@ public final class MarkerTool extends S3GuardTool {
|
||||||
* Result of a call of {@link #purgeMarkers(DirMarkerTracker, int)};
|
* Result of a call of {@link #purgeMarkers(DirMarkerTracker, int)};
|
||||||
* included in {@link ScanResult} so must share visibility.
|
* included in {@link ScanResult} so must share visibility.
|
||||||
*/
|
*/
|
||||||
static final class MarkerPurgeSummary {
|
public static final class MarkerPurgeSummary {
|
||||||
|
|
||||||
/** Number of markers deleted. */
|
/** Number of markers deleted. */
|
||||||
private int markersDeleted;
|
private int markersDeleted;
|
||||||
|
@ -613,14 +710,26 @@ public final class MarkerTool extends S3GuardTool {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Count of markers deleted.
|
||||||
|
* @return a number, zero when prune==false.
|
||||||
|
*/
|
||||||
int getMarkersDeleted() {
|
int getMarkersDeleted() {
|
||||||
return markersDeleted;
|
return markersDeleted;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Count of bulk delete requests issued.
|
||||||
|
* @return count of calls made to S3.
|
||||||
|
*/
|
||||||
int getDeleteRequests() {
|
int getDeleteRequests() {
|
||||||
return deleteRequests;
|
return deleteRequests;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Total duration of delete requests.
|
||||||
|
* @return a time interval in millis.
|
||||||
|
*/
|
||||||
long getTotalDeleteRequestDuration() {
|
long getTotalDeleteRequestDuration() {
|
||||||
return totalDeleteRequestDuration;
|
return totalDeleteRequestDuration;
|
||||||
}
|
}
|
||||||
|
@ -699,25 +808,181 @@ public final class MarkerTool extends S3GuardTool {
|
||||||
/**
|
/**
|
||||||
* Execute the marker tool, with no checks on return codes.
|
* Execute the marker tool, with no checks on return codes.
|
||||||
*
|
*
|
||||||
* @param sourceFS filesystem to use
|
* @param scanArgs set of args for the scanner.
|
||||||
* @param path path to scan
|
|
||||||
* @param doPurge should markers be purged
|
|
||||||
* @param expectedMarkers number of markers expected
|
|
||||||
* @param limit limit of files to scan; -1 for 'unlimited'
|
|
||||||
* @param nonAuth only use nonauth path count for failure rules
|
|
||||||
* @return the result
|
* @return the result
|
||||||
*/
|
*/
|
||||||
@SuppressWarnings("IOResourceOpenedButNotSafelyClosed")
|
@SuppressWarnings("IOResourceOpenedButNotSafelyClosed")
|
||||||
public static MarkerTool.ScanResult execMarkerTool(
|
public static MarkerTool.ScanResult execMarkerTool(
|
||||||
final FileSystem sourceFS,
|
ScanArgs scanArgs) throws IOException {
|
||||||
final Path path,
|
MarkerTool tool = new MarkerTool(scanArgs.getSourceFS().getConf());
|
||||||
final boolean doPurge,
|
|
||||||
final int expectedMarkers,
|
|
||||||
final int limit, boolean nonAuth) throws IOException {
|
|
||||||
MarkerTool tool = new MarkerTool(sourceFS.getConf());
|
|
||||||
tool.setVerbose(LOG.isDebugEnabled());
|
tool.setVerbose(LOG.isDebugEnabled());
|
||||||
|
|
||||||
return tool.execute(sourceFS, path, doPurge,
|
return tool.execute(scanArgs);
|
||||||
expectedMarkers, limit, nonAuth);
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Arguments for the scan.
|
||||||
|
* <p></p>
|
||||||
|
* Uses a builder/argument object because too many arguments were
|
||||||
|
* being created and it was making maintenance harder.
|
||||||
|
*/
|
||||||
|
public static final class ScanArgs {
|
||||||
|
|
||||||
|
/** Source FS; must be or wrap an S3A FS. */
|
||||||
|
private final FileSystem sourceFS;
|
||||||
|
|
||||||
|
/** Path to scan. */
|
||||||
|
private final Path path;
|
||||||
|
|
||||||
|
/** Purge? */
|
||||||
|
private final boolean doPurge;
|
||||||
|
|
||||||
|
/** Min marker count (ignored on purge). */
|
||||||
|
private final int minMarkerCount;
|
||||||
|
|
||||||
|
/** Max marker count (ignored on purge). */
|
||||||
|
private final int maxMarkerCount;
|
||||||
|
|
||||||
|
/** Limit of files to scan; 0 for 'unlimited'. */
|
||||||
|
private final int limit;
|
||||||
|
|
||||||
|
/** Consider only markers in nonauth paths as errors. */
|
||||||
|
private final boolean nonAuth;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param sourceFS source FS; must be or wrap an S3A FS.
|
||||||
|
* @param path path to scan.
|
||||||
|
* @param doPurge purge?
|
||||||
|
* @param minMarkerCount min marker count (ignored on purge)
|
||||||
|
* @param maxMarkerCount max marker count (ignored on purge)
|
||||||
|
* @param limit limit of files to scan; 0 for 'unlimited'
|
||||||
|
* @param nonAuth consider only markers in nonauth paths as errors
|
||||||
|
*/
|
||||||
|
private ScanArgs(final FileSystem sourceFS,
|
||||||
|
final Path path,
|
||||||
|
final boolean doPurge,
|
||||||
|
final int minMarkerCount,
|
||||||
|
final int maxMarkerCount,
|
||||||
|
final int limit,
|
||||||
|
final boolean nonAuth) {
|
||||||
|
this.sourceFS = sourceFS;
|
||||||
|
this.path = path;
|
||||||
|
this.doPurge = doPurge;
|
||||||
|
this.minMarkerCount = minMarkerCount;
|
||||||
|
this.maxMarkerCount = maxMarkerCount;
|
||||||
|
this.limit = limit;
|
||||||
|
this.nonAuth = nonAuth;
|
||||||
|
}
|
||||||
|
|
||||||
|
FileSystem getSourceFS() {
|
||||||
|
return sourceFS;
|
||||||
|
}
|
||||||
|
|
||||||
|
Path getPath() {
|
||||||
|
return path;
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean isDoPurge() {
|
||||||
|
return doPurge;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getMinMarkerCount() {
|
||||||
|
return minMarkerCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getMaxMarkerCount() {
|
||||||
|
return maxMarkerCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
int getLimit() {
|
||||||
|
return limit;
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean isNonAuth() {
|
||||||
|
return nonAuth;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Builder of the scan arguments.
|
||||||
|
*/
|
||||||
|
public static final class ScanArgsBuilder {
|
||||||
|
|
||||||
|
/** Source FS; must be or wrap an S3A FS. */
|
||||||
|
private FileSystem sourceFS;
|
||||||
|
|
||||||
|
/** Path to scan. */
|
||||||
|
private Path path;
|
||||||
|
|
||||||
|
/** Purge? */
|
||||||
|
private boolean doPurge = false;
|
||||||
|
|
||||||
|
/** Min marker count (ignored on purge). */
|
||||||
|
private int minMarkerCount = 0;
|
||||||
|
|
||||||
|
/** Max marker count (ignored on purge). */
|
||||||
|
private int maxMarkerCount = 0;
|
||||||
|
|
||||||
|
/** Limit of files to scan; 0 for 'unlimited'. */
|
||||||
|
private int limit = UNLIMITED_LISTING;
|
||||||
|
|
||||||
|
/** Consider only markers in nonauth paths as errors. */
|
||||||
|
private boolean nonAuth = false;
|
||||||
|
|
||||||
|
/** Source FS; must be or wrap an S3A FS. */
|
||||||
|
public ScanArgsBuilder withSourceFS(final FileSystem source) {
|
||||||
|
this.sourceFS = source;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Path to scan. */
|
||||||
|
public ScanArgsBuilder withPath(final Path p) {
|
||||||
|
this.path = p;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Purge? */
|
||||||
|
public ScanArgsBuilder withDoPurge(final boolean d) {
|
||||||
|
this.doPurge = d;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Min marker count (ignored on purge). */
|
||||||
|
public ScanArgsBuilder withMinMarkerCount(final int min) {
|
||||||
|
this.minMarkerCount = min;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Max marker count (ignored on purge). */
|
||||||
|
public ScanArgsBuilder withMaxMarkerCount(final int max) {
|
||||||
|
this.maxMarkerCount = max;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Limit of files to scan; 0 for 'unlimited'. */
|
||||||
|
public ScanArgsBuilder withLimit(final int l) {
|
||||||
|
this.limit = l;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Consider only markers in nonauth paths as errors. */
|
||||||
|
public ScanArgsBuilder withNonAuth(final boolean b) {
|
||||||
|
this.nonAuth = b;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Build the actual argument instance.
|
||||||
|
* @return the arguments to pass in
|
||||||
|
*/
|
||||||
|
public ScanArgs build() {
|
||||||
|
return new ScanArgs(sourceFS,
|
||||||
|
path,
|
||||||
|
doPurge,
|
||||||
|
minMarkerCount,
|
||||||
|
maxMarkerCount,
|
||||||
|
limit,
|
||||||
|
nonAuth);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -366,7 +366,7 @@ Syntax
|
||||||
|
|
||||||
```
|
```
|
||||||
> hadoop s3guard markers -verbose -nonauth
|
> hadoop s3guard markers -verbose -nonauth
|
||||||
markers (-audit | -clean) [-expected <count>] [-out <filename>] [-limit <limit>] [-nonauth] [-verbose] <PATH>
|
markers (-audit | -clean) [-min <count>] [-max <count>] [-out <filename>] [-limit <limit>] [-nonauth] [-verbose] <PATH>
|
||||||
View and manipulate S3 directory markers
|
View and manipulate S3 directory markers
|
||||||
|
|
||||||
```
|
```
|
||||||
|
@ -377,7 +377,8 @@ markers (-audit | -clean) [-expected <count>] [-out <filename>] [-limit <limit>]
|
||||||
|-------------------------|-------------------------|
|
|-------------------------|-------------------------|
|
||||||
| `-audit` | Audit the path for surplus markers |
|
| `-audit` | Audit the path for surplus markers |
|
||||||
| `-clean` | Clean all surplus markers under a path |
|
| `-clean` | Clean all surplus markers under a path |
|
||||||
| `-expected <count>]` | Expected number of markers to find (primarily for testing) |
|
| `-min <count>` | Minimum number of markers an audit must find (default: 0) |
|
||||||
|
| `-max <count>]` | Minimum number of markers an audit must find (default: 0) |
|
||||||
| `-limit <count>]` | Limit the number of objects to scan |
|
| `-limit <count>]` | Limit the number of objects to scan |
|
||||||
| `-nonauth` | Only consider markers in non-authoritative paths as errors |
|
| `-nonauth` | Only consider markers in non-authoritative paths as errors |
|
||||||
| `-out <filename>` | Save a list of all markers found to the nominated file |
|
| `-out <filename>` | Save a list of all markers found to the nominated file |
|
||||||
|
@ -489,7 +490,7 @@ The `markers clean` command will clean the directory tree of all surplus markers
|
||||||
The `-verbose` option prints more detail on the operation as well as some IO statistics
|
The `-verbose` option prints more detail on the operation as well as some IO statistics
|
||||||
|
|
||||||
```
|
```
|
||||||
> hadoop s3guard markers -verbose -clean s3a://london/
|
> hadoop s3guard markers -clean -verbose s3a://london/
|
||||||
|
|
||||||
2020-08-05 18:33:25,303 [main] INFO impl.DirectoryPolicyImpl (DirectoryPolicyImpl.java:getDirectoryPolicy(143)) - Directory markers will be kept on authoritative paths
|
2020-08-05 18:33:25,303 [main] INFO impl.DirectoryPolicyImpl (DirectoryPolicyImpl.java:getDirectoryPolicy(143)) - Directory markers will be kept on authoritative paths
|
||||||
The directory marker policy of s3a://london is "Authoritative"
|
The directory marker policy of s3a://london is "Authoritative"
|
||||||
|
|
|
@ -90,10 +90,23 @@ public abstract class AbstractS3ATestBase extends AbstractFSContractTestBase
|
||||||
&& !fs.getDirectoryMarkerPolicy()
|
&& !fs.getDirectoryMarkerPolicy()
|
||||||
.keepDirectoryMarkers(methodPath)
|
.keepDirectoryMarkers(methodPath)
|
||||||
&& fs.isDirectory(methodPath)) {
|
&& fs.isDirectory(methodPath)) {
|
||||||
MarkerTool.ScanResult result = MarkerTool.execMarkerTool(fs,
|
MarkerTool.ScanResult result = MarkerTool.execMarkerTool(
|
||||||
methodPath, true, 0, UNLIMITED_LISTING, false);
|
new MarkerTool.ScanArgsBuilder()
|
||||||
assertEquals("Audit of " + methodPath + " failed: " + result,
|
.withSourceFS(fs)
|
||||||
|
.withPath(methodPath)
|
||||||
|
.withDoPurge(true)
|
||||||
|
.withMinMarkerCount(0)
|
||||||
|
.withMaxMarkerCount(0)
|
||||||
|
.withLimit(UNLIMITED_LISTING)
|
||||||
|
.withNonAuth(false)
|
||||||
|
.build());
|
||||||
|
final String resultStr = result.toString();
|
||||||
|
assertEquals("Audit of " + methodPath + " failed: "
|
||||||
|
+ resultStr,
|
||||||
0, result.getExitCode());
|
0, result.getExitCode());
|
||||||
|
assertEquals("Marker Count under " + methodPath
|
||||||
|
+ " non-zero: " + resultStr,
|
||||||
|
0, result.getFilteredMarkerCount());
|
||||||
}
|
}
|
||||||
} catch (FileNotFoundException ignored) {
|
} catch (FileNotFoundException ignored) {
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
|
|
@ -309,11 +309,15 @@ public class AbstractMarkerToolTest extends AbstractS3ATestBase {
|
||||||
final boolean nonAuth) throws IOException {
|
final boolean nonAuth) throws IOException {
|
||||||
|
|
||||||
MarkerTool.ScanResult result = MarkerTool.execMarkerTool(
|
MarkerTool.ScanResult result = MarkerTool.execMarkerTool(
|
||||||
sourceFS,
|
new MarkerTool.ScanArgsBuilder()
|
||||||
path,
|
.withSourceFS(sourceFS)
|
||||||
doPurge,
|
.withPath(path)
|
||||||
expectedMarkers,
|
.withDoPurge(doPurge)
|
||||||
limit, nonAuth);
|
.withMinMarkerCount(expectedMarkers)
|
||||||
|
.withMaxMarkerCount(expectedMarkers)
|
||||||
|
.withLimit(limit)
|
||||||
|
.withNonAuth(nonAuth)
|
||||||
|
.build());
|
||||||
Assertions.assertThat(result.getExitCode())
|
Assertions.assertThat(result.getExitCode())
|
||||||
.describedAs("Exit code of marker(%s, %s, %d) -> %s",
|
.describedAs("Exit code of marker(%s, %s, %d) -> %s",
|
||||||
path, doPurge, expectedMarkers, result)
|
path, doPurge, expectedMarkers, result)
|
||||||
|
@ -330,5 +334,4 @@ public class AbstractMarkerToolTest extends AbstractS3ATestBase {
|
||||||
return "-" + s;
|
return "-" + s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -259,7 +259,8 @@ public class ITestMarkerTool extends AbstractMarkerToolTest {
|
||||||
AUDIT,
|
AUDIT,
|
||||||
m(OPT_LIMIT), 0,
|
m(OPT_LIMIT), 0,
|
||||||
m(OPT_OUT), audit,
|
m(OPT_OUT), audit,
|
||||||
m(OPT_EXPECTED), expectedMarkersWithBaseDir,
|
m(OPT_MIN), expectedMarkersWithBaseDir,
|
||||||
|
m(OPT_MAX), expectedMarkersWithBaseDir,
|
||||||
createdPaths.base);
|
createdPaths.base);
|
||||||
expectMarkersInOutput(audit, expectedMarkersWithBaseDir);
|
expectMarkersInOutput(audit, expectedMarkersWithBaseDir);
|
||||||
}
|
}
|
||||||
|
@ -286,9 +287,6 @@ public class ITestMarkerTool extends AbstractMarkerToolTest {
|
||||||
m(OPT_LIMIT), 2,
|
m(OPT_LIMIT), 2,
|
||||||
CLEAN,
|
CLEAN,
|
||||||
createdPaths.base);
|
createdPaths.base);
|
||||||
run(MARKERS, V,
|
|
||||||
AUDIT,
|
|
||||||
createdPaths.base);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -302,7 +300,8 @@ public class ITestMarkerTool extends AbstractMarkerToolTest {
|
||||||
describe("Audit a few thousand landsat objects");
|
describe("Audit a few thousand landsat objects");
|
||||||
final File audit = tempAuditFile();
|
final File audit = tempAuditFile();
|
||||||
|
|
||||||
run(MARKERS,
|
runToFailure(EXIT_INTERRUPTED,
|
||||||
|
MARKERS,
|
||||||
AUDIT,
|
AUDIT,
|
||||||
m(OPT_LIMIT), 3000,
|
m(OPT_LIMIT), 3000,
|
||||||
m(OPT_OUT), audit,
|
m(OPT_OUT), audit,
|
||||||
|
|
Loading…
Reference in New Issue