HDFS-10778. Add -format option to make the output of FileDistribution processor human-readable in OfflineImageViewer.

This commit is contained in:
Akira Ajisaka 2016-09-08 15:13:43 +09:00
parent d355573f56
commit 63f594892e
7 changed files with 164 additions and 104 deletions

View File

@ -31,6 +31,7 @@ import org.apache.hadoop.hdfs.server.namenode.FSImageUtil;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary; import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection; import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection;
import org.apache.hadoop.util.LimitInputStream; import org.apache.hadoop.util.LimitInputStream;
import org.apache.hadoop.util.StringUtils;
import com.google.common.base.Preconditions; import com.google.common.base.Preconditions;
@ -75,11 +76,14 @@ final class FileDistributionCalculator {
private long totalSpace; private long totalSpace;
private long maxFileSize; private long maxFileSize;
private boolean formatOutput = false;
FileDistributionCalculator(Configuration conf, long maxSize, int steps, FileDistributionCalculator(Configuration conf, long maxSize, int steps,
PrintStream out) { boolean formatOutput, PrintStream out) {
this.conf = conf; this.conf = conf;
this.maxSize = maxSize == 0 ? MAX_SIZE_DEFAULT : maxSize; this.maxSize = maxSize == 0 ? MAX_SIZE_DEFAULT : maxSize;
this.steps = steps == 0 ? INTERVAL_DEFAULT : steps; this.steps = steps == 0 ? INTERVAL_DEFAULT : steps;
this.formatOutput = formatOutput;
this.out = out; this.out = out;
long numIntervals = this.maxSize / this.steps; long numIntervals = this.maxSize / this.steps;
// avoid OutOfMemoryError when allocating an array // avoid OutOfMemoryError when allocating an array
@ -148,10 +152,20 @@ final class FileDistributionCalculator {
private void output() { private void output() {
// write the distribution into the output file // write the distribution into the output file
out.print("Size\tNumFiles\n"); out.print((formatOutput ? "Size Range" : "Size") + "\tNumFiles\n");
for (int i = 0; i < distribution.length; i++) { for (int i = 0; i < distribution.length; i++) {
if (distribution[i] != 0) { if (distribution[i] != 0) {
if (formatOutput) {
out.print((i == 0 ? "[" : "(")
+ StringUtils.byteDesc(((long) (i == 0 ? 0 : i - 1) * steps))
+ ", "
+ StringUtils.byteDesc((long)
(i == distribution.length - 1 ? maxFileSize : i * steps))
+ "]\t" + distribution[i]);
} else {
out.print(((long) i * steps) + "\t" + distribution[i]); out.print(((long) i * steps) + "\t" + distribution[i]);
}
out.print('\n'); out.print('\n');
} }
} }

View File

@ -20,6 +20,8 @@ package org.apache.hadoop.hdfs.tools.offlineImageViewer;
import java.io.IOException; import java.io.IOException;
import java.util.LinkedList; import java.util.LinkedList;
import org.apache.hadoop.util.StringUtils;
/** /**
* File size distribution visitor. * File size distribution visitor.
* *
@ -67,6 +69,7 @@ class FileDistributionVisitor extends TextWriterImageVisitor {
private FileContext current; private FileContext current;
private boolean inInode = false; private boolean inInode = false;
private boolean formatOutput = false;
/** /**
* File or directory information. * File or directory information.
@ -78,12 +81,12 @@ class FileDistributionVisitor extends TextWriterImageVisitor {
int replication; int replication;
} }
public FileDistributionVisitor(String filename, public FileDistributionVisitor(String filename, long maxSize, int step,
long maxSize, boolean formatOutput) throws IOException {
int step) throws IOException {
super(filename, false); super(filename, false);
this.maxSize = (maxSize == 0 ? MAX_SIZE_DEFAULT : maxSize); this.maxSize = (maxSize == 0 ? MAX_SIZE_DEFAULT : maxSize);
this.step = (step == 0 ? INTERVAL_DEFAULT : step); this.step = (step == 0 ? INTERVAL_DEFAULT : step);
this.formatOutput = formatOutput;
long numIntervals = this.maxSize / this.step; long numIntervals = this.maxSize / this.step;
if(numIntervals >= Integer.MAX_VALUE) if(numIntervals >= Integer.MAX_VALUE)
throw new IOException("Too many distribution intervals " + numIntervals); throw new IOException("Too many distribution intervals " + numIntervals);
@ -113,9 +116,22 @@ class FileDistributionVisitor extends TextWriterImageVisitor {
private void output() throws IOException { private void output() throws IOException {
// write the distribution into the output file // write the distribution into the output file
write("Size\tNumFiles\n"); write((formatOutput ? "Size Range" : "Size") + "\tNumFiles\n");
for(int i = 0; i < distribution.length; i++) for (int i = 0; i < distribution.length; i++) {
write(((long)i * step) + "\t" + distribution[i] + "\n"); if (distribution[i] > 0) {
if (formatOutput) {
write((i == 0 ? "[" : "(")
+ StringUtils.byteDesc(((long) (i == 0 ? 0 : i - 1) * step))
+ ", "
+ StringUtils.byteDesc((long)
(i == distribution.length - 1 ? maxFileSize : i * step))
+ "]\t"
+ distribution[i] + "\n");
} else {
write(((long) i * step) + "\t" + distribution[i] + "\n");
}
}
}
System.out.println("totalFiles = " + totalFiles); System.out.println("totalFiles = " + totalFiles);
System.out.println("totalDirectories = " + totalDirectories); System.out.println("totalDirectories = " + totalDirectories);
System.out.println("totalBlocks = " + totalBlocks); System.out.println("totalBlocks = " + totalBlocks);

View File

@ -47,60 +47,62 @@ public class OfflineImageViewer {
public static final Log LOG = LogFactory.getLog(OfflineImageViewer.class); public static final Log LOG = LogFactory.getLog(OfflineImageViewer.class);
private final static String usage = private final static String usage =
"Usage: bin/hdfs oiv_legacy [OPTIONS] -i INPUTFILE -o OUTPUTFILE\n" + "Usage: bin/hdfs oiv_legacy [OPTIONS] -i INPUTFILE -o OUTPUTFILE\n"
"Offline Image Viewer\n" + + "Offline Image Viewer\n"
"View a Hadoop fsimage INPUTFILE using the specified PROCESSOR,\n" + + "View a Hadoop fsimage INPUTFILE using the specified PROCESSOR,\n"
"saving the results in OUTPUTFILE.\n" + + "saving the results in OUTPUTFILE.\n"
"\n" + + "\n"
"The oiv utility will attempt to parse correctly formed image files\n" + + "The oiv utility will attempt to parse correctly formed image files\n"
"and will abort fail with mal-formed image files.\n" + + "and will abort fail with mal-formed image files.\n"
"\n" + + "\n"
"The tool works offline and does not require a running cluster in\n" + + "The tool works offline and does not require a running cluster in\n"
"order to process an image file.\n" + + "order to process an image file.\n"
"\n" + + "\n"
"The following image processors are available:\n" + + "The following image processors are available:\n"
" * Ls: The default image processor generates an lsr-style listing\n" + + " * Ls: The default image processor generates an lsr-style listing\n"
" of the files in the namespace, with the same fields in the same\n" + + " of the files in the namespace, with the same fields in the same\n"
" order. Note that in order to correctly determine file sizes,\n" + + " order. Note that in order to correctly determine file sizes,\n"
" this formatter cannot skip blocks and will override the\n" + + " this formatter cannot skip blocks and will override the\n"
" -skipBlocks option.\n" + + " -skipBlocks option.\n"
" * Indented: This processor enumerates over all of the elements in\n" + + " * Indented: This processor enumerates over all of the elements in\n"
" the fsimage file, using levels of indentation to delineate\n" + + " the fsimage file, using levels of indentation to delineate\n"
" sections within the file.\n" + + " sections within the file.\n"
" * Delimited: Generate a text file with all of the elements common\n" + + " * Delimited: Generate a text file with all of the elements common\n"
" to both inodes and inodes-under-construction, separated by a\n" + + " to both inodes and inodes-under-construction, separated by a\n"
" delimiter. The default delimiter is \u0001, though this may be\n" + + " delimiter. The default delimiter is \u0001, though this may be\n"
" changed via the -delimiter argument. This processor also overrides\n" + + " changed via the -delimiter argument. This processor also overrides\n"
" the -skipBlocks option for the same reason as the Ls processor\n" + + " the -skipBlocks option for the same reason as the Ls processor\n"
" * XML: This processor creates an XML document with all elements of\n" + + " * XML: This processor creates an XML document with all elements of\n"
" the fsimage enumerated, suitable for further analysis by XML\n" + + " the fsimage enumerated, suitable for further analysis by XML\n"
" tools.\n" + + " tools.\n"
" * FileDistribution: This processor analyzes the file size\n" + + " * FileDistribution: This processor analyzes the file size\n"
" distribution in the image.\n" + + " distribution in the image.\n"
" -maxSize specifies the range [0, maxSize] of file sizes to be\n" + + " -maxSize specifies the range [0, maxSize] of file sizes to be\n"
" analyzed (128GB by default).\n" + + " analyzed (128GB by default).\n"
" -step defines the granularity of the distribution. (2MB by default)\n" + + " -step defines the granularity of the distribution. (2MB by default)\n"
" * NameDistribution: This processor analyzes the file names\n" + + " -format formats the output result in a human-readable fashion\n"
" in the image and prints total number of file names and how frequently\n" + + " rather than a number of bytes. (false by default)\n"
" file names are reused.\n" + + " * NameDistribution: This processor analyzes the file names\n"
"\n" + + " in the image and prints total number of file names and how frequently\n"
"Required command line arguments:\n" + + " file names are reused.\n"
"-i,--inputFile <arg> FSImage file to process.\n" + + "\n"
"-o,--outputFile <arg> Name of output file. If the specified\n" + + "Required command line arguments:\n"
" file exists, it will be overwritten.\n" + + "-i,--inputFile <arg> FSImage file to process.\n"
"\n" + + "-o,--outputFile <arg> Name of output file. If the specified\n"
"Optional command line arguments:\n" + + " file exists, it will be overwritten.\n"
"-p,--processor <arg> Select which type of processor to apply\n" + + "\n"
" against image file." + + "Optional command line arguments:\n"
" (Ls|XML|Delimited|Indented|FileDistribution).\n" + + "-p,--processor <arg> Select which type of processor to apply\n"
"-h,--help Display usage information and exit\n" + + " against image file."
"-printToScreen For processors that write to a file, also\n" + + " (Ls|XML|Delimited|Indented|FileDistribution).\n"
" output to screen. On large image files this\n" + + "-h,--help Display usage information and exit\n"
" will dramatically increase processing time.\n" + + "-printToScreen For processors that write to a file, also\n"
"-skipBlocks Skip inodes' blocks information. May\n" + + " output to screen. On large image files this\n"
" significantly decrease output.\n" + + " will dramatically increase processing time.\n"
" (default = false).\n" + + "-skipBlocks Skip inodes' blocks information. May\n"
"-delimiter <arg> Delimiting string to use with Delimited processor\n"; + " significantly decrease output.\n"
+ " (default = false).\n"
+ "-delimiter <arg> Delimiting string to use with Delimited processor\n";
private final boolean skipBlocks; private final boolean skipBlocks;
private final String inputFile; private final String inputFile;
@ -188,6 +190,7 @@ public class OfflineImageViewer {
options.addOption("h", "help", false, ""); options.addOption("h", "help", false, "");
options.addOption("maxSize", true, ""); options.addOption("maxSize", true, "");
options.addOption("step", true, ""); options.addOption("step", true, "");
options.addOption("format", false, "");
options.addOption("skipBlocks", false, ""); options.addOption("skipBlocks", false, "");
options.addOption("printToScreen", false, ""); options.addOption("printToScreen", false, "");
options.addOption("delimiter", true, ""); options.addOption("delimiter", true, "");
@ -253,7 +256,8 @@ public class OfflineImageViewer {
} else if (processor.equals("FileDistribution")) { } else if (processor.equals("FileDistribution")) {
long maxSize = Long.parseLong(cmd.getOptionValue("maxSize", "0")); long maxSize = Long.parseLong(cmd.getOptionValue("maxSize", "0"));
int step = Integer.parseInt(cmd.getOptionValue("step", "0")); int step = Integer.parseInt(cmd.getOptionValue("step", "0"));
v = new FileDistributionVisitor(outputFile, maxSize, step); boolean formatOutput = cmd.hasOption("format");
v = new FileDistributionVisitor(outputFile, maxSize, step, formatOutput);
} else if (processor.equals("NameDistribution")) { } else if (processor.equals("NameDistribution")) {
v = new NameDistributionVisitor(outputFile, printToScreen); v = new NameDistributionVisitor(outputFile, printToScreen);
} else { } else {

View File

@ -67,6 +67,8 @@ public class OfflineImageViewerPB {
+ " -maxSize specifies the range [0, maxSize] of file sizes to be\n" + " -maxSize specifies the range [0, maxSize] of file sizes to be\n"
+ " analyzed (128GB by default).\n" + " analyzed (128GB by default).\n"
+ " -step defines the granularity of the distribution. (2MB by default)\n" + " -step defines the granularity of the distribution. (2MB by default)\n"
+ " -format formats the output result in a human-readable fashion\n"
+ " rather than a number of bytes. (false by default)\n"
+ " * Web: Run a viewer to expose read-only WebHDFS API.\n" + " * Web: Run a viewer to expose read-only WebHDFS API.\n"
+ " -addr specifies the address to listen. (localhost:5978 by default)\n" + " -addr specifies the address to listen. (localhost:5978 by default)\n"
+ " * Delimited (experimental): Generate a text file with all of the elements common\n" + " * Delimited (experimental): Generate a text file with all of the elements common\n"
@ -111,6 +113,7 @@ public class OfflineImageViewerPB {
options.addOption("h", "help", false, ""); options.addOption("h", "help", false, "");
options.addOption("maxSize", true, ""); options.addOption("maxSize", true, "");
options.addOption("step", true, ""); options.addOption("step", true, "");
options.addOption("format", false, "");
options.addOption("addr", true, ""); options.addOption("addr", true, "");
options.addOption("delimiter", true, ""); options.addOption("delimiter", true, "");
options.addOption("t", "temp", true, ""); options.addOption("t", "temp", true, "");
@ -175,27 +178,28 @@ public class OfflineImageViewerPB {
case "FileDistribution": case "FileDistribution":
long maxSize = Long.parseLong(cmd.getOptionValue("maxSize", "0")); long maxSize = Long.parseLong(cmd.getOptionValue("maxSize", "0"));
int step = Integer.parseInt(cmd.getOptionValue("step", "0")); int step = Integer.parseInt(cmd.getOptionValue("step", "0"));
new FileDistributionCalculator(conf, maxSize, step, out).visit( boolean formatOutput = cmd.hasOption("format");
new RandomAccessFile(inputFile, "r")); new FileDistributionCalculator(conf, maxSize, step, formatOutput, out)
.visit(new RandomAccessFile(inputFile, "r"));
break; break;
case "XML": case "XML":
new PBImageXmlWriter(conf, out).visit( new PBImageXmlWriter(conf, out).visit(new RandomAccessFile(inputFile,
new RandomAccessFile(inputFile, "r")); "r"));
break; break;
case "ReverseXML": case "ReverseXML":
try { try {
OfflineImageReconstructor.run(inputFile, outputFile); OfflineImageReconstructor.run(inputFile, outputFile);
} catch (Exception e) { } catch (Exception e) {
System.err.println("OfflineImageReconstructor failed: " + System.err.println("OfflineImageReconstructor failed: "
e.getMessage()); + e.getMessage());
e.printStackTrace(System.err); e.printStackTrace(System.err);
System.exit(1); System.exit(1);
} }
break; break;
case "Web": case "Web":
String addr = cmd.getOptionValue("addr", "localhost:5978"); String addr = cmd.getOptionValue("addr", "localhost:5978");
try (WebImageViewer viewer = new WebImageViewer( try (WebImageViewer viewer =
NetUtils.createSocketAddr(addr))) { new WebImageViewer(NetUtils.createSocketAddr(addr))) {
viewer.start(inputFile); viewer.start(inputFile);
} }
break; break;

View File

@ -239,6 +239,7 @@ Usage: `hdfs oiv [OPTIONS] -i INPUT_FILE`
| `-addr` *address* | Specify the address(host:port) to listen. (localhost:5978 by default). This option is used with Web processor. | | `-addr` *address* | Specify the address(host:port) to listen. (localhost:5978 by default). This option is used with Web processor. |
| `-maxSize` *size* | Specify the range [0, maxSize] of file sizes to be analyzed in bytes (128GB by default). This option is used with FileDistribution processor. | | `-maxSize` *size* | Specify the range [0, maxSize] of file sizes to be analyzed in bytes (128GB by default). This option is used with FileDistribution processor. |
| `-step` *size* | Specify the granularity of the distribution in bytes (2MB by default). This option is used with FileDistribution processor. | | `-step` *size* | Specify the granularity of the distribution in bytes (2MB by default). This option is used with FileDistribution processor. |
| `-format` | Format the output result in a human-readable fashion rather than a number of bytes. (false by default). This option is used with FileDistribution processor. |
| `-delimiter` *arg* | Delimiting string to use with Delimited processor. | | `-delimiter` *arg* | Delimiting string to use with Delimited processor. |
| `-t`,`--temp` *temporary dir* | Use temporary dir to cache intermediate result to generate Delimited outputs. If not set, Delimited processor constructs the namespace in memory before outputting text. | | `-t`,`--temp` *temporary dir* | Use temporary dir to cache intermediate result to generate Delimited outputs. If not set, Delimited processor constructs the namespace in memory before outputting text. |
| `-h`,`--help` | Display the tool usage and help information and exit. | | `-h`,`--help` | Display the tool usage and help information and exit. |

View File

@ -150,6 +150,7 @@ Options
| `-addr` *address* | Specify the address(host:port) to listen. (localhost:5978 by default). This option is used with Web processor. | | `-addr` *address* | Specify the address(host:port) to listen. (localhost:5978 by default). This option is used with Web processor. |
| `-maxSize` *size* | Specify the range [0, maxSize] of file sizes to be analyzed in bytes (128GB by default). This option is used with FileDistribution processor. | | `-maxSize` *size* | Specify the range [0, maxSize] of file sizes to be analyzed in bytes (128GB by default). This option is used with FileDistribution processor. |
| `-step` *size* | Specify the granularity of the distribution in bytes (2MB by default). This option is used with FileDistribution processor. | | `-step` *size* | Specify the granularity of the distribution in bytes (2MB by default). This option is used with FileDistribution processor. |
| `-format` | Format the output result in a human-readable fashion rather than a number of bytes. (false by default). This option is used with FileDistribution processor. |
| `-delimiter` *arg* | Delimiting string to use with Delimited processor. | | `-delimiter` *arg* | Delimiting string to use with Delimited processor. |
| `-t`\|`--temp` *temporary dir* | Use temporary dir to cache intermediate result to generate Delimited outputs. If not set, Delimited processor constructs the namespace in memory before outputting text. | | `-t`\|`--temp` *temporary dir* | Use temporary dir to cache intermediate result to generate Delimited outputs. If not set, Delimited processor constructs the namespace in memory before outputting text. |
| `-h`\|`--help` | Display the tool usage and help information and exit. | | `-h`\|`--help` | Display the tool usage and help information and exit. |

View File

@ -237,7 +237,7 @@ public class TestOfflineImageViewer {
File truncatedFile = new File(tempDir, "truncatedFsImage"); File truncatedFile = new File(tempDir, "truncatedFsImage");
PrintStream output = new PrintStream(NullOutputStream.NULL_OUTPUT_STREAM); PrintStream output = new PrintStream(NullOutputStream.NULL_OUTPUT_STREAM);
copyPartOfFile(originalFsimage, truncatedFile); copyPartOfFile(originalFsimage, truncatedFile);
new FileDistributionCalculator(new Configuration(), 0, 0, output) new FileDistributionCalculator(new Configuration(), 0, 0, false, output)
.visit(new RandomAccessFile(truncatedFile, "r")); .visit(new RandomAccessFile(truncatedFile, "r"));
} }
@ -259,7 +259,7 @@ public class TestOfflineImageViewer {
public void testFileDistributionCalculator() throws IOException { public void testFileDistributionCalculator() throws IOException {
ByteArrayOutputStream output = new ByteArrayOutputStream(); ByteArrayOutputStream output = new ByteArrayOutputStream();
PrintStream o = new PrintStream(output); PrintStream o = new PrintStream(output);
new FileDistributionCalculator(new Configuration(), 0, 0, o) new FileDistributionCalculator(new Configuration(), 0, 0, false, o)
.visit(new RandomAccessFile(originalFsimage, "r")); .visit(new RandomAccessFile(originalFsimage, "r"));
o.close(); o.close();
@ -620,4 +620,24 @@ public class TestOfflineImageViewer {
IOUtils.closeStream(out); IOUtils.closeStream(out);
} }
} }
@Test
public void testOfflineImageViewerWithFormatOption() throws Exception {
final ByteArrayOutputStream bytes = new ByteArrayOutputStream();
final PrintStream out = new PrintStream(bytes);
final PrintStream oldOut = System.out;
try {
System.setOut(out);
int status =
OfflineImageViewerPB.run(new String[] {"-i",
originalFsimage.getAbsolutePath(), "-o", "-", "-p",
"FileDistribution", "-maxSize", "512", "-step", "8",
"-format"});
assertEquals(0, status);
Assert.assertTrue(bytes.toString().contains("(0 B, 8 B]"));
} finally {
System.setOut(oldOut);
IOUtils.closeStream(out);
}
}
} }