From 55bb4eca7350c780085f52d15098ef559352c840 Mon Sep 17 00:00:00 2001 From: Andrew Wang Date: Wed, 28 Jan 2015 12:36:29 -0800 Subject: [PATCH] HDFS-6673. Add delimited format support to PB OIV tool. Contributed by Eddy Xu. (cherry picked from commit caf7298e49f646a40023af999f62d61846fde5e2) --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 + hadoop-hdfs-project/hadoop-hdfs/pom.xml | 5 + .../offlineImageViewer/FSImageLoader.java | 4 +- .../OfflineImageViewerPB.java | 21 +- .../PBImageDelimitedTextWriter.java | 132 ++++ .../offlineImageViewer/PBImageTextWriter.java | 586 ++++++++++++++++++ .../TestOfflineImageViewer.java | 54 +- 7 files changed, 799 insertions(+), 5 deletions(-) create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageDelimitedTextWriter.java create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageTextWriter.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 77e7bb339d7..a9e7491b0b4 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -23,6 +23,8 @@ Release 2.7.0 - UNRELEASED HDFS-7056. Snapshot support for truncate. (Plamen Jeliazkov and shv) + HDFS-6673. Add delimited format support to PB OIV tool. (Eddy Xu via wang) + IMPROVEMENTS HDFS-7055. Add tracing to DFSInputStream (cmccabe) diff --git a/hadoop-hdfs-project/hadoop-hdfs/pom.xml b/hadoop-hdfs-project/hadoop-hdfs/pom.xml index 6df73b75312..3a95d8566e3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/pom.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/pom.xml @@ -197,6 +197,11 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd"> test-jar test + + org.fusesource.leveldbjni + leveldbjni-all + 1.8 + org.bouncycastle diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FSImageLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FSImageLoader.java index 2f2fa5fa386..fd291061923 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FSImageLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/FSImageLoader.java @@ -237,7 +237,7 @@ class FSImageLoader { return inodes; } - private static String[] loadStringTable(InputStream in) throws + static String[] loadStringTable(InputStream in) throws IOException { FsImageProto.StringTableSection s = FsImageProto.StringTableSection .parseDelimitedFrom(in); @@ -479,7 +479,7 @@ class FSImageLoader { } } - private long getFileSize(FsImageProto.INodeSection.INodeFile f) { + static long getFileSize(FsImageProto.INodeSection.INodeFile f) { long size = 0; for (HdfsProtos.BlockProto p : f.getBlocksList()) { size += p.getNumBytes(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java index 8f00ff623fc..9cba3c8892a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/OfflineImageViewerPB.java @@ -65,6 +65,10 @@ public class OfflineImageViewerPB { + " -step defines the granularity of the distribution. (2MB by default)\n" + " * Web: Run a viewer to expose read-only WebHDFS API.\n" + " -addr specifies the address to listen. (localhost:5978 by default)\n" + + " * Delimited: Generate a text file with all of the elements common\n" + + " to both inodes and inodes-under-construction, separated by a\n" + + " delimiter. The default delimiter is \\t, though this may be\n" + + " changed via the -delimiter argument.\n" + "\n" + "Required command line arguments:\n" + "-i,--inputFile FSImage file to process.\n" @@ -74,8 +78,12 @@ public class OfflineImageViewerPB { + " file exists, it will be overwritten.\n" + " (output to stdout by default)\n" + "-p,--processor Select which type of processor to apply\n" - + " against image file. (XML|FileDistribution|Web)\n" + + " against image file. (XML|FileDistribution|Web|Delimited)\n" + " (Web by default)\n" + + "-delimiter Delimiting string to use with Delimited processor\n" + + "-t,--temp Use temporary dir to cache intermediate result to generate\n" + + " Delimited outputs. If not set, Delimited processor constructs\n" + + " the namespace in memory before outputting text." + "-h,--help Display usage information and exit\n"; /** @@ -97,6 +105,8 @@ public class OfflineImageViewerPB { options.addOption("maxSize", true, ""); options.addOption("step", true, ""); options.addOption("addr", true, ""); + options.addOption("delimiter", true, ""); + options.addOption("t", "temp", true, ""); return options; } @@ -141,6 +151,9 @@ public class OfflineImageViewerPB { String inputFile = cmd.getOptionValue("i"); String processor = cmd.getOptionValue("p", "Web"); String outputFile = cmd.getOptionValue("o", "-"); + String delimiter = cmd.getOptionValue("delimiter", + PBImageDelimitedTextWriter.DEFAULT_DELIMITER); + String tempPath = cmd.getOptionValue("t", ""); Configuration conf = new Configuration(); try (PrintStream out = outputFile.equals("-") ? @@ -163,6 +176,12 @@ public class OfflineImageViewerPB { viewer.start(inputFile); } break; + case "Delimited": + try (PBImageDelimitedTextWriter writer = + new PBImageDelimitedTextWriter(out, delimiter, tempPath)) { + writer.visit(new RandomAccessFile(inputFile, "r")); + } + break; } return 0; } catch (EOFException e) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageDelimitedTextWriter.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageDelimitedTextWriter.java new file mode 100644 index 00000000000..350967d1ff8 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageDelimitedTextWriter.java @@ -0,0 +1,132 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.tools.offlineImageViewer; + +import org.apache.hadoop.fs.permission.PermissionStatus; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INode; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeDirectory; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeFile; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INodeSymlink; + +import java.io.File; +import java.io.IOException; +import java.io.PrintStream; +import java.text.SimpleDateFormat; +import java.util.Date; + +/** + * A PBImageDelimitedTextWriter generates a text representation of the PB fsimage, + * with each element separated by a delimiter string. All of the elements + * common to both inodes and inodes-under-construction are included. When + * processing an fsimage with a layout version that did not include an + * element, such as AccessTime, the output file will include a column + * for the value, but no value will be included. + * + * Individual block information for each file is not currently included. + * + * The default delimiter is tab, as this is an unlikely value to be included in + * an inode path or other text metadata. The delimiter value can be via the + * constructor. + */ +public class PBImageDelimitedTextWriter extends PBImageTextWriter { + static final String DEFAULT_DELIMITER = "\t"; + private static final String DATE_FORMAT="yyyy-MM-dd HH:mm"; + private final SimpleDateFormat dateFormatter = + new SimpleDateFormat(DATE_FORMAT); + + private final String delimiter; + + PBImageDelimitedTextWriter(PrintStream out, String delimiter, String tempPath) + throws IOException { + super(out, tempPath); + this.delimiter = delimiter; + } + + private String formatDate(long date) { + return dateFormatter.format(new Date(date)); + } + + private void append(StringBuffer buffer, int field) { + buffer.append(delimiter); + buffer.append(field); + } + + private void append(StringBuffer buffer, long field) { + buffer.append(delimiter); + buffer.append(field); + } + + private void append(StringBuffer buffer, String field) { + buffer.append(delimiter); + buffer.append(field); + } + + @Override + public String getEntry(String parent, INode inode) { + StringBuffer buffer = new StringBuffer(); + String path = new File(parent, inode.getName().toStringUtf8()).toString(); + buffer.append(path); + PermissionStatus p = null; + + switch (inode.getType()) { + case FILE: + INodeFile file = inode.getFile(); + p = getPermission(file.getPermission()); + append(buffer, file.getReplication()); + append(buffer, formatDate(file.getModificationTime())); + append(buffer, formatDate(file.getAccessTime())); + append(buffer, file.getPreferredBlockSize()); + append(buffer, file.getBlocksCount()); + append(buffer, FSImageLoader.getFileSize(file)); + append(buffer, 0); // NS_QUOTA + append(buffer, 0); // DS_QUOTA + break; + case DIRECTORY: + INodeDirectory dir = inode.getDirectory(); + p = getPermission(dir.getPermission()); + append(buffer, 0); // Replication + append(buffer, formatDate(dir.getModificationTime())); + append(buffer, formatDate(0)); // Access time. + append(buffer, 0); // Block size. + append(buffer, 0); // Num blocks. + append(buffer, 0); // Num bytes. + append(buffer, dir.getNsQuota()); + append(buffer, dir.getDsQuota()); + break; + case SYMLINK: + INodeSymlink s = inode.getSymlink(); + p = getPermission(s.getPermission()); + append(buffer, 0); // Replication + append(buffer, formatDate(s.getModificationTime())); + append(buffer, formatDate(s.getAccessTime())); + append(buffer, 0); // Block size. + append(buffer, 0); // Num blocks. + append(buffer, 0); // Num bytes. + append(buffer, 0); // NS_QUOTA + append(buffer, 0); // DS_QUOTA + break; + default: + break; + } + assert p != null; + append(buffer, p.getPermission().toString()); + append(buffer, p.getUserName()); + append(buffer, p.getGroupName()); + return buffer.toString(); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageTextWriter.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageTextWriter.java new file mode 100644 index 00000000000..0da263d87ae --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/PBImageTextWriter.java @@ -0,0 +1,586 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.tools.offlineImageViewer; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; +import com.google.common.io.LimitInputStream; +import org.apache.commons.io.FileUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.permission.PermissionStatus; +import org.apache.hadoop.hdfs.server.namenode.FSImageFormatPBINode; +import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf; +import org.apache.hadoop.hdfs.server.namenode.FSImageFormatProtobuf.SectionName; +import org.apache.hadoop.hdfs.server.namenode.FSImageUtil; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection; +import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.INode; +import org.apache.hadoop.hdfs.server.namenode.INodeId; +import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.util.Time; +import org.fusesource.leveldbjni.JniDBFactory; +import org.iq80.leveldb.DB; +import org.iq80.leveldb.Options; +import org.iq80.leveldb.WriteBatch; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedInputStream; +import java.io.Closeable; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.PrintStream; +import java.io.RandomAccessFile; +import java.io.UnsupportedEncodingException; +import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +/** + * This class reads the protobuf-based fsimage and generates text output + * for each inode to {@link PBImageTextWriter#out}. The sub-class can override + * {@link getEntry()} to generate formatted string for each inode. + * + * Since protobuf-based fsimage does not guarantee the order of inodes and + * directories, PBImageTextWriter runs two-phase scans: + * + *
    + *
  1. The first phase, PBImageTextWriter scans the INode sections to reads the + * filename of each directory. It also scans the INode_Dir sections to loads + * the relationships between a directory and its children. It uses these metadata + * to build FS namespace and stored in {@link MetadataMap}
  2. + *
  3. The second phase, PBImageTextWriter re-scans the INode sections. For each + * inode, it looks up the path of the parent directory in the {@link MetadataMap}, + * and generate output.
  4. + *
+ * + * Two various of {@link MetadataMap} are provided. {@link InMemoryMetadataDB} + * stores all metadata in memory (O(n) memory) while + * {@link LevelDBMetadataMap} stores metadata in LevelDB on disk (O(1) memory). + * User can choose between them based on the time/space tradeoffs. + */ +abstract class PBImageTextWriter implements Closeable { + private static final Logger LOG = + LoggerFactory.getLogger(PBImageTextWriter.class); + + /** + * This metadata map is used to construct the namespace before generating + * text outputs. + * + * It contains two mapping relationships: + *

+ *

  • It maps each inode (inode Id) to its parent directory (inode Id).
  • + *
  • It maps each directory from its inode Id.
  • + *

    + */ + private static interface MetadataMap extends Closeable { + /** + * Associate an inode with its parent directory. + */ + public void putDirChild(long parentId, long childId) throws IOException; + + /** + * Associate a directory with its inode Id. + */ + public void putDir(INode dir) throws IOException; + + /** Get the full path of the parent directory for the given inode. */ + public String getParentPath(long inode) throws IOException; + + /** Synchronize metadata to persistent storage, if possible */ + public void sync() throws IOException; + } + + /** + * Maintain all the metadata in memory. + */ + private static class InMemoryMetadataDB implements MetadataMap { + /** + * Represent a directory in memory. + */ + private static class Dir { + private final long inode; + private Dir parent = null; + private String name; + private String path = null; // cached full path of the directory. + + Dir(long inode, String name) { + this.inode = inode; + this.name = name; + } + + private void setParent(Dir parent) { + Preconditions.checkState(this.parent == null); + this.parent = parent; + } + + /** + * Returns the full path of this directory. + */ + private String getPath() { + if (this.parent == null) { + return "/"; + } + if (this.path == null) { + this.path = new File(parent.getPath(), name).toString(); + this.name = null; + } + return this.path; + } + + @Override + public boolean equals(Object o) { + return o instanceof Dir && inode == ((Dir) o).inode; + } + + @Override + public int hashCode() { + return Long.valueOf(inode).hashCode(); + } + } + + /** INode Id to Dir object mapping */ + private Map dirMap = new HashMap<>(); + + /** Children to parent directory INode ID mapping. */ + private Map dirChildMap = new HashMap<>(); + + InMemoryMetadataDB() { + } + + @Override + public void close() throws IOException { + } + + @Override + public void putDirChild(long parentId, long childId) { + Dir parent = dirMap.get(parentId); + Dir child = dirMap.get(childId); + if (child != null) { + child.setParent(parent); + } + Preconditions.checkState(!dirChildMap.containsKey(childId)); + dirChildMap.put(childId, parent); + } + + @Override + public void putDir(INode p) { + Preconditions.checkState(!dirMap.containsKey(p.getId())); + Dir dir = new Dir(p.getId(), p.getName().toStringUtf8()); + dirMap.put(p.getId(), dir); + } + + public String getParentPath(long inode) throws IOException { + if (inode == INodeId.ROOT_INODE_ID) { + return ""; + } + Dir parent = dirChildMap.get(inode); + Preconditions.checkState(parent != null, + "Can not find parent directory for INode: %s", inode); + return parent.getPath(); + } + + @Override + public void sync() { + } + } + + /** + * A MetadataMap that stores metadata in LevelDB. + */ + private static class LevelDBMetadataMap implements MetadataMap { + /** + * Store metadata in LevelDB. + */ + private static class LevelDBStore implements Closeable { + private DB db = null; + private WriteBatch batch = null; + private int writeCount = 0; + private static final int BATCH_SIZE = 1024; + + LevelDBStore(final File dbPath) throws IOException { + Options options = new Options(); + options.createIfMissing(true); + options.errorIfExists(true); + db = JniDBFactory.factory.open(dbPath, options); + batch = db.createWriteBatch(); + } + + @Override + public void close() throws IOException { + if (batch != null) { + IOUtils.cleanup(null, batch); + batch = null; + } + IOUtils.cleanup(null, db); + db = null; + } + + public void put(byte[] key, byte[] value) throws IOException { + batch.put(key, value); + writeCount++; + if (writeCount >= BATCH_SIZE) { + sync(); + } + } + + public byte[] get(byte[] key) throws IOException { + return db.get(key); + } + + public void sync() throws IOException { + try { + db.write(batch); + } finally { + batch.close(); + batch = null; + } + batch = db.createWriteBatch(); + writeCount = 0; + } + } + + /** + * A LRU cache for directory path strings. + * + * The key of this LRU cache is the inode of a directory. + */ + private static class DirPathCache extends LinkedHashMap { + private final static int CAPACITY = 16 * 1024; + + DirPathCache() { + super(CAPACITY); + } + + @Override + protected boolean removeEldestEntry(Map.Entry entry) { + return super.size() > CAPACITY; + } + } + + /** Map the child inode to the parent directory inode. */ + private LevelDBStore dirChildMap = null; + /** Directory entry map */ + private LevelDBStore dirMap = null; + private DirPathCache dirPathCache = new DirPathCache(); + + LevelDBMetadataMap(String baseDir) throws IOException { + File dbDir = new File(baseDir); + if (dbDir.exists()) { + FileUtils.deleteDirectory(dbDir); + } + if (!dbDir.mkdirs()) { + throw new IOException("Failed to mkdir on " + dbDir); + } + try { + dirChildMap = new LevelDBStore(new File(dbDir, "dirChildMap")); + dirMap = new LevelDBStore(new File(dbDir, "dirMap")); + } catch (IOException e) { + LOG.error("Failed to open LevelDBs", e); + IOUtils.cleanup(null, this); + } + } + + @Override + public void close() throws IOException { + IOUtils.cleanup(null, dirChildMap, dirMap); + dirChildMap = null; + dirMap = null; + } + + private static byte[] toBytes(long value) { + return ByteBuffer.allocate(8).putLong(value).array(); + } + + private static byte[] toBytes(String value) + throws UnsupportedEncodingException { + return value.getBytes("UTF-8"); + } + + private static long toLong(byte[] bytes) { + Preconditions.checkArgument(bytes.length == 8); + return ByteBuffer.wrap(bytes).getLong(); + } + + private static String toString(byte[] bytes) throws IOException { + try { + return new String(bytes, "UTF-8"); + } catch (UnsupportedEncodingException e) { + throw new IOException(e); + } + } + + @Override + public void putDirChild(long parentId, long childId) throws IOException { + dirChildMap.put(toBytes(childId), toBytes(parentId)); + } + + @Override + public void putDir(INode dir) throws IOException { + Preconditions.checkArgument(dir.hasDirectory(), + "INode %s (%s) is not a directory.", dir.getId(), dir.getName()); + dirMap.put(toBytes(dir.getId()), toBytes(dir.getName().toStringUtf8())); + } + + @Override + public String getParentPath(long inode) throws IOException { + if (inode == INodeId.ROOT_INODE_ID) { + return "/"; + } + byte[] bytes = dirChildMap.get(toBytes(inode)); + Preconditions.checkState(bytes != null && bytes.length == 8, + "Can not find parent directory for inode %s, " + + "fsimage might be corrupted", inode); + long parent = toLong(bytes); + if (!dirPathCache.containsKey(parent)) { + bytes = dirMap.get(toBytes(parent)); + if (parent != INodeId.ROOT_INODE_ID) { + Preconditions.checkState(bytes != null, + "Can not find parent directory for inode %s, " + + ", the fsimage might be corrupted.", parent); + } + String parentName = toString(bytes); + String parentPath = + new File(getParentPath(parent), parentName).toString(); + dirPathCache.put(parent, parentPath); + } + return dirPathCache.get(parent); + } + + @Override + public void sync() throws IOException { + dirChildMap.sync(); + dirMap.sync(); + } + } + + private String[] stringTable; + private PrintStream out; + private MetadataMap metadataMap = null; + + /** + * Construct a PB FsImage writer to generate text file. + * @param out the writer to output text information of fsimage. + * @param tempPath the path to store metadata. If it is empty, store metadata + * in memory instead. + */ + PBImageTextWriter(PrintStream out, String tempPath) throws IOException { + this.out = out; + if (tempPath.isEmpty()) { + metadataMap = new InMemoryMetadataDB(); + } else { + metadataMap = new LevelDBMetadataMap(tempPath); + } + } + + @Override + public void close() throws IOException { + IOUtils.cleanup(null, metadataMap); + } + + /** + * Get text output for the given inode. + * @param parent the path of parent directory + * @param inode the INode object to output. + */ + abstract protected String getEntry(String parent, INode inode); + + public void visit(RandomAccessFile file) throws IOException { + Configuration conf = new Configuration(); + if (!FSImageUtil.checkFileFormat(file)) { + throw new IOException("Unrecognized FSImage"); + } + + FileSummary summary = FSImageUtil.loadSummary(file); + + try (FileInputStream fin = new FileInputStream(file.getFD())) { + InputStream is; + ArrayList sections = + Lists.newArrayList(summary.getSectionsList()); + Collections.sort(sections, + new Comparator() { + @Override + public int compare(FsImageProto.FileSummary.Section s1, + FsImageProto.FileSummary.Section s2) { + FSImageFormatProtobuf.SectionName n1 = + FSImageFormatProtobuf.SectionName.fromString(s1.getName()); + FSImageFormatProtobuf.SectionName n2 = + FSImageFormatProtobuf.SectionName.fromString(s2.getName()); + if (n1 == null) { + return n2 == null ? 0 : -1; + } else if (n2 == null) { + return -1; + } else { + return n1.ordinal() - n2.ordinal(); + } + } + }); + + for (FileSummary.Section section : sections) { + fin.getChannel().position(section.getOffset()); + is = FSImageUtil.wrapInputStreamForCompression(conf, + summary.getCodec(), new BufferedInputStream(new LimitInputStream( + fin, section.getLength()))); + switch (SectionName.fromString(section.getName())) { + case STRING_TABLE: + stringTable = FSImageLoader.loadStringTable(is); + break; + default: + break; + } + } + + loadDirectories(fin, sections, summary, conf); + loadINodeDirSection(fin, sections, summary, conf); + metadataMap.sync(); + output(conf, summary, fin, sections); + } + } + + private void output(Configuration conf, FileSummary summary, + FileInputStream fin, ArrayList sections) + throws IOException { + InputStream is; + long startTime = Time.monotonicNow(); + for (FileSummary.Section section : sections) { + if (SectionName.fromString(section.getName()) == SectionName.INODE) { + fin.getChannel().position(section.getOffset()); + is = FSImageUtil.wrapInputStreamForCompression(conf, + summary.getCodec(), new BufferedInputStream(new LimitInputStream( + fin, section.getLength()))); + outputINodes(is); + } + } + long timeTaken = Time.monotonicNow() - startTime; + LOG.debug("Time to output inodes: {}ms", timeTaken); + } + + protected PermissionStatus getPermission(long perm) { + return FSImageFormatPBINode.Loader.loadPermission(perm, stringTable); + } + + /** Load the directories in the INode section. */ + private void loadDirectories( + FileInputStream fin, List sections, + FileSummary summary, Configuration conf) + throws IOException { + LOG.info("Loading directories"); + long startTime = Time.monotonicNow(); + for (FileSummary.Section section : sections) { + if (SectionName.fromString(section.getName()) + == SectionName.INODE) { + fin.getChannel().position(section.getOffset()); + InputStream is = FSImageUtil.wrapInputStreamForCompression(conf, + summary.getCodec(), new BufferedInputStream(new LimitInputStream( + fin, section.getLength()))); + loadDirectoriesInINodeSection(is); + } + } + long timeTaken = Time.monotonicNow() - startTime; + LOG.info("Finished loading directories in {}ms", timeTaken); + } + + private void loadINodeDirSection( + FileInputStream fin, List sections, + FileSummary summary, Configuration conf) + throws IOException { + LOG.info("Loading INode directory section."); + long startTime = Time.monotonicNow(); + for (FileSummary.Section section : sections) { + if (SectionName.fromString(section.getName()) + == SectionName.INODE_DIR) { + fin.getChannel().position(section.getOffset()); + InputStream is = FSImageUtil.wrapInputStreamForCompression(conf, + summary.getCodec(), new BufferedInputStream( + new LimitInputStream(fin, section.getLength()))); + buildNamespace(is); + } + } + long timeTaken = Time.monotonicNow() - startTime; + LOG.info("Finished loading INode directory section in {}ms", timeTaken); + } + + /** + * Load the filenames of the directories from the INode section. + */ + private void loadDirectoriesInINodeSection(InputStream in) throws IOException { + INodeSection s = INodeSection.parseDelimitedFrom(in); + LOG.info("Loading directories in INode section."); + int numDirs = 0; + for (int i = 0; i < s.getNumInodes(); ++i) { + INode p = INode.parseDelimitedFrom(in); + if (LOG.isDebugEnabled() && i % 10000 == 0) { + LOG.debug("Scanned {} inodes.", i); + } + if (p.hasDirectory()) { + metadataMap.putDir(p); + numDirs++; + } + } + LOG.info("Found {} directories in INode section.", numDirs); + } + + /** + * Scan the INodeDirectory section to construct the namespace. + */ + private void buildNamespace(InputStream in) throws IOException { + int count = 0; + while (true) { + FsImageProto.INodeDirectorySection.DirEntry e = + FsImageProto.INodeDirectorySection.DirEntry.parseDelimitedFrom(in); + if (e == null) { + break; + } + count++; + if (LOG.isDebugEnabled() && count % 10000 == 0) { + LOG.debug("Scanned {} directories.", count); + } + long parentId = e.getParent(); + // Referred INode is not support for now. + for (int i = 0; i < e.getChildrenCount(); i++) { + long childId = e.getChildren(i); + metadataMap.putDirChild(parentId, childId); + } + Preconditions.checkState(e.getRefChildrenCount() == 0); + } + LOG.info("Scanned {} INode directories to build namespace.", count); + } + + private void outputINodes(InputStream in) throws IOException { + INodeSection s = INodeSection.parseDelimitedFrom(in); + LOG.info("Found {} INodes in the INode section", s.getNumInodes()); + for (int i = 0; i < s.getNumInodes(); ++i) { + INode p = INode.parseDelimitedFrom(in); + String parentPath = metadataMap.getParentPath(p.getId()); + out.println(getEntry(parentPath, p)); + + if (LOG.isDebugEnabled() && i % 100000 == 0) { + LOG.debug("Outputted {} INodes.", i); + } + } + LOG.info("Outputted {} INodes.", s.getNumInodes()); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java index 4bb2b79593e..a8d1d544501 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/tools/offlineImageViewer/TestOfflineImageViewer.java @@ -20,22 +20,26 @@ package org.apache.hadoop.hdfs.tools.offlineImageViewer; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import java.io.BufferedReader; +import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; +import java.io.InputStreamReader; import java.io.PrintStream; -import java.io.PrintWriter; import java.io.RandomAccessFile; import java.io.StringReader; -import java.io.StringWriter; import java.net.HttpURLConnection; import java.net.URI; import java.net.URL; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -51,6 +55,7 @@ import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileSystemTestHelper; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DistributedFileSystem; @@ -329,6 +334,51 @@ public class TestOfflineImageViewer { } } + @Test + public void testPBDelimitedWriter() throws IOException, InterruptedException { + testPBDelimitedWriter(""); // Test in memory db. + testPBDelimitedWriter( + new FileSystemTestHelper().getTestRootDir() + "/delimited.db"); + } + + private void testPBDelimitedWriter(String db) + throws IOException, InterruptedException { + final String DELIMITER = "\t"; + ByteArrayOutputStream output = new ByteArrayOutputStream(); + + try (PrintStream o = new PrintStream(output)) { + PBImageDelimitedTextWriter v = + new PBImageDelimitedTextWriter(o, DELIMITER, db); + v.visit(new RandomAccessFile(originalFsimage, "r")); + } + + Set fileNames = new HashSet<>(); + try ( + ByteArrayInputStream input = + new ByteArrayInputStream(output.toByteArray()); + BufferedReader reader = + new BufferedReader(new InputStreamReader(input))) { + String line; + while ((line = reader.readLine()) != null) { + System.out.println(line); + String[] fields = line.split(DELIMITER); + assertEquals(12, fields.length); + fileNames.add(fields[0]); + } + } + + // writtenFiles does not contain root directory and "invalid XML char" dir. + for (Iterator it = fileNames.iterator(); it.hasNext(); ) { + String filename = it.next(); + if (filename.startsWith("/dirContainingInvalidXMLChar")) { + it.remove(); + } else if (filename.equals("/")) { + it.remove(); + } + } + assertEquals(writtenFiles.keySet(), fileNames); + } + private static void compareFile(FileStatus expected, FileStatus status) { assertEquals(expected.getAccessTime(), status.getAccessTime()); assertEquals(expected.getBlockSize(), status.getBlockSize());