HDFS-6293. Issues with OIV processing PB-based fsimages. Contributed by Kihwal Lee.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/branch-2@1594440 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Kihwal Lee 2014-05-14 01:15:48 +00:00
parent ffb0d24fef
commit f4b4bf14d3
34 changed files with 3517 additions and 42 deletions

View File

@ -102,6 +102,8 @@ Release 2.5.0 - UNRELEASED
HDFS-6186. Pause deletion of blocks when the namenode starts up. (jing9)
HDFS-6293. Issues with OIV processing PB-based fsimages. (kihwal)
OPTIMIZATIONS
HDFS-6214. Webhdfs has poor throughput for files >2GB (daryn)

View File

@ -49,6 +49,7 @@ function print_usage(){
echo " balancer run a cluster balancing utility"
echo " jmxget get JMX exported values from NameNode or DataNode."
echo " oiv apply the offline fsimage viewer to an fsimage"
echo " oiv_legacy apply the offline fsimage viewer to an legacy fsimage"
echo " oev apply the offline edits viewer to an edits file"
echo " fetchdt fetch a delegation token from the NameNode"
echo " getconf get config values from configuration"
@ -161,6 +162,8 @@ elif [ "$COMMAND" = "jmxget" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.JMXGet
elif [ "$COMMAND" = "oiv" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewerPB
elif [ "COMMAND" = "oiv_legacy" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.offlineImageViewer.OfflineImageViewer
elif [ "$COMMAND" = "oev" ] ; then
CLASS=org.apache.hadoop.hdfs.tools.offlineEditsViewer.OfflineEditsViewer
elif [ "$COMMAND" = "fetchdt" ] ; then

View File

@ -501,6 +501,7 @@ public class DFSConfigKeys extends CommonConfigurationKeys {
public static final String DFS_SECONDARY_NAMENODE_KERBEROS_INTERNAL_SPNEGO_PRINCIPAL_KEY = "dfs.secondary.namenode.kerberos.internal.spnego.principal";
public static final String DFS_NAMENODE_NAME_CACHE_THRESHOLD_KEY = "dfs.namenode.name.cache.threshold";
public static final int DFS_NAMENODE_NAME_CACHE_THRESHOLD_DEFAULT = 10;
public static final String DFS_NAMENODE_LEGACY_OIV_IMAGE_DIR_KEY = "dfs.namenode.legacy-oiv-image.dir";
public static final String DFS_NAMESERVICES = "dfs.nameservices";
public static final String DFS_NAMESERVICE_ID = "dfs.nameservice.id";

View File

@ -18,9 +18,16 @@
package org.apache.hadoop.hdfs.security.token.delegation;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.protobuf.ByteString;
import java.io.DataInput;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map.Entry;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
@ -43,13 +50,9 @@
import org.apache.hadoop.security.token.delegation.AbstractDelegationTokenSecretManager;
import org.apache.hadoop.security.token.delegation.DelegationKey;
import java.io.DataInput;
import java.io.IOException;
import java.io.InterruptedIOException;
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.List;
import java.util.Map.Entry;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.protobuf.ByteString;
/**
* A HDFS specific delegation token secret manager.
@ -211,6 +214,18 @@ public synchronized void loadSecretManagerState(SecretManagerState state)
}
}
/**
* Store the current state of the SecretManager for persistence
*
* @param out Output stream for writing into fsimage.
* @param sdPath String storage directory path
* @throws IOException
*/
public synchronized void saveSecretManagerStateCompat(DataOutputStream out,
String sdPath) throws IOException {
serializerCompat.save(out, sdPath);
}
public synchronized SecretManagerState saveSecretManagerState() {
SecretManagerSection s = SecretManagerSection.newBuilder()
.setCurrentId(currentId)
@ -406,6 +421,56 @@ private void load(DataInput in) throws IOException {
loadCurrentTokens(in);
}
private void save(DataOutputStream out, String sdPath) throws IOException {
out.writeInt(currentId);
saveAllKeys(out, sdPath);
out.writeInt(delegationTokenSequenceNumber);
saveCurrentTokens(out, sdPath);
}
/**
* Private helper methods to save delegation keys and tokens in fsimage
*/
private synchronized void saveCurrentTokens(DataOutputStream out,
String sdPath) throws IOException {
StartupProgress prog = NameNode.getStartupProgress();
Step step = new Step(StepType.DELEGATION_TOKENS, sdPath);
prog.beginStep(Phase.SAVING_CHECKPOINT, step);
prog.setTotal(Phase.SAVING_CHECKPOINT, step, currentTokens.size());
Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step);
out.writeInt(currentTokens.size());
Iterator<DelegationTokenIdentifier> iter = currentTokens.keySet()
.iterator();
while (iter.hasNext()) {
DelegationTokenIdentifier id = iter.next();
id.write(out);
DelegationTokenInformation info = currentTokens.get(id);
out.writeLong(info.getRenewDate());
counter.increment();
}
prog.endStep(Phase.SAVING_CHECKPOINT, step);
}
/*
* Save the current state of allKeys
*/
private synchronized void saveAllKeys(DataOutputStream out, String sdPath)
throws IOException {
StartupProgress prog = NameNode.getStartupProgress();
Step step = new Step(StepType.DELEGATION_KEYS, sdPath);
prog.beginStep(Phase.SAVING_CHECKPOINT, step);
prog.setTotal(Phase.SAVING_CHECKPOINT, step, currentTokens.size());
Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step);
out.writeInt(allKeys.size());
Iterator<Integer> iter = allKeys.keySet().iterator();
while (iter.hasNext()) {
Integer key = iter.next();
allKeys.get(key).write(out);
counter.increment();
}
prog.endStep(Phase.SAVING_CHECKPOINT, step);
}
/**
* Private helper methods to load Delegation tokens from fsimage
*/

View File

@ -27,6 +27,7 @@
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS_DEFAULT;
import java.io.DataInput;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
@ -61,10 +62,10 @@
import org.apache.hadoop.hdfs.protocol.CacheDirectiveStats;
import org.apache.hadoop.hdfs.protocol.CachePoolEntry;
import org.apache.hadoop.hdfs.protocol.CachePoolInfo;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto;
import org.apache.hadoop.hdfs.protocol.DatanodeID;
import org.apache.hadoop.hdfs.protocol.LocatedBlock;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto;
import org.apache.hadoop.hdfs.protocolPB.PBHelper;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
import org.apache.hadoop.hdfs.server.blockmanagement.CacheReplicationMonitor;
@ -953,6 +954,18 @@ private void processCacheReportImpl(final DatanodeDescriptor datanode,
}
}
/**
* Saves the current state of the CacheManager to the DataOutput. Used
* to persist CacheManager state in the FSImage.
* @param out DataOutput to persist state
* @param sdPath path of the storage directory
* @throws IOException
*/
public void saveStateCompat(DataOutputStream out, String sdPath)
throws IOException {
serializerCompat.save(out, sdPath);
}
public PersistState saveState() throws IOException {
ArrayList<CachePoolInfoProto> pools = Lists
.newArrayListWithCapacity(cachePools.size());
@ -1072,6 +1085,12 @@ private void addCacheDirective(final String poolName,
}
private final class SerializerCompat {
private void save(DataOutputStream out, String sdPath) throws IOException {
out.writeLong(nextDirectiveId);
savePools(out, sdPath);
saveDirectives(out, sdPath);
}
private void load(DataInput in) throws IOException {
nextDirectiveId = in.readLong();
// pools need to be loaded first since directives point to their parent pool
@ -1079,6 +1098,42 @@ private void load(DataInput in) throws IOException {
loadDirectives(in);
}
/**
* Save cache pools to fsimage
*/
private void savePools(DataOutputStream out,
String sdPath) throws IOException {
StartupProgress prog = NameNode.getStartupProgress();
Step step = new Step(StepType.CACHE_POOLS, sdPath);
prog.beginStep(Phase.SAVING_CHECKPOINT, step);
prog.setTotal(Phase.SAVING_CHECKPOINT, step, cachePools.size());
Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step);
out.writeInt(cachePools.size());
for (CachePool pool: cachePools.values()) {
FSImageSerialization.writeCachePoolInfo(out, pool.getInfo(true));
counter.increment();
}
prog.endStep(Phase.SAVING_CHECKPOINT, step);
}
/*
* Save cache entries to fsimage
*/
private void saveDirectives(DataOutputStream out, String sdPath)
throws IOException {
StartupProgress prog = NameNode.getStartupProgress();
Step step = new Step(StepType.CACHE_ENTRIES, sdPath);
prog.beginStep(Phase.SAVING_CHECKPOINT, step);
prog.setTotal(Phase.SAVING_CHECKPOINT, step, directivesById.size());
Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step);
out.writeInt(directivesById.size());
for (CacheDirective directive : directivesById.values()) {
FSImageSerialization.writeCacheDirectiveInfo(out, directive.toInfo());
counter.increment();
}
prog.endStep(Phase.SAVING_CHECKPOINT, step);
}
/**
* Load cache pools from fsimage
*/

View File

@ -41,6 +41,9 @@ public class CheckpointConf {
/** maxium number of retries when merge errors occur */
private final int maxRetriesOnMergeError;
/** The output dir for legacy OIV image */
private final String legacyOivImageDir;
public CheckpointConf(Configuration conf) {
checkpointCheckPeriod = conf.getLong(
@ -53,6 +56,7 @@ public CheckpointConf(Configuration conf) {
DFS_NAMENODE_CHECKPOINT_TXNS_DEFAULT);
maxRetriesOnMergeError = conf.getInt(DFS_NAMENODE_CHECKPOINT_MAX_RETRIES_KEY,
DFS_NAMENODE_CHECKPOINT_MAX_RETRIES_DEFAULT);
legacyOivImageDir = conf.get(DFS_NAMENODE_LEGACY_OIV_IMAGE_DIR_KEY);
warnForDeprecatedConfigs(conf);
}
@ -83,4 +87,8 @@ public long getTxnCount() {
public int getMaxRetriesOnMergeError() {
return maxRetriesOnMergeError;
}
public String getLegacyOivImageDir() {
return legacyOivImageDir;
}
}

View File

@ -934,6 +934,25 @@ void saveFSImage(SaveNamespaceContext context, StorageDirectory sd,
storage.setMostRecentCheckpointInfo(txid, Time.now());
}
/**
* Save FSimage in the legacy format. This is not for NN consumption,
* but for tools like OIV.
*/
public void saveLegacyOIVImage(FSNamesystem source, String targetDir,
Canceler canceler) throws IOException {
FSImageCompression compression =
FSImageCompression.createCompression(conf);
long txid = getLastAppliedOrWrittenTxId();
SaveNamespaceContext ctx = new SaveNamespaceContext(source, txid,
canceler);
FSImageFormat.Saver saver = new FSImageFormat.Saver(ctx);
String imageFileName = NNStorage.getLegacyOIVImageFileName(txid);
File imageFile = new File(targetDir, imageFileName);
saver.save(imageFile, compression);
archivalManager.purgeOldLegacyOIVImages(targetDir, txid);
}
/**
* FSImageSaver is being run in a separate thread when saving
* FSImage. There is one thread per each copy of the image.

View File

@ -21,14 +21,20 @@
import java.io.DataInput;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.security.DigestInputStream;
import java.security.DigestOutputStream;
import java.security.MessageDigest;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
@ -51,6 +57,7 @@
import org.apache.hadoop.hdfs.server.blockmanagement.BlockManager;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption;
import org.apache.hadoop.hdfs.server.common.InconsistentFSStateException;
import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature;
import org.apache.hadoop.hdfs.server.namenode.snapshot.FileDiffList;
import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottable;
import org.apache.hadoop.hdfs.server.namenode.snapshot.Snapshot;
@ -61,6 +68,7 @@
import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter;
import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step;
import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType;
import org.apache.hadoop.hdfs.util.ReadOnlyList;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.MD5Hash;
import org.apache.hadoop.io.Text;
@ -70,8 +78,105 @@
import com.google.common.annotations.VisibleForTesting;
/**
* This class loads and stores the FSImage of the NameNode. The file
* src/main/proto/fsimage.proto describes the on-disk layout of the FSImage.
* Contains inner classes for reading or writing the on-disk format for
* FSImages.
*
* In particular, the format of the FSImage looks like:
* <pre>
* FSImage {
* layoutVersion: int, namespaceID: int, numberItemsInFSDirectoryTree: long,
* namesystemGenerationStampV1: long, namesystemGenerationStampV2: long,
* generationStampAtBlockIdSwitch:long, lastAllocatedBlockId:
* long transactionID: long, snapshotCounter: int, numberOfSnapshots: int,
* numOfSnapshottableDirs: int,
* {FSDirectoryTree, FilesUnderConstruction, SecretManagerState} (can be compressed)
* }
*
* FSDirectoryTree (if {@link Feature#FSIMAGE_NAME_OPTIMIZATION} is supported) {
* INodeInfo of root, numberOfChildren of root: int
* [list of INodeInfo of root's children],
* [list of INodeDirectoryInfo of root's directory children]
* }
*
* FSDirectoryTree (if {@link Feature#FSIMAGE_NAME_OPTIMIZATION} not supported){
* [list of INodeInfo of INodes in topological order]
* }
*
* INodeInfo {
* {
* localName: short + byte[]
* } when {@link Feature#FSIMAGE_NAME_OPTIMIZATION} is supported
* or
* {
* fullPath: byte[]
* } when {@link Feature#FSIMAGE_NAME_OPTIMIZATION} is not supported
* replicationFactor: short, modificationTime: long,
* accessTime: long, preferredBlockSize: long,
* numberOfBlocks: int (-1 for INodeDirectory, -2 for INodeSymLink),
* {
* nsQuota: long, dsQuota: long,
* {
* isINodeSnapshottable: byte,
* isINodeWithSnapshot: byte (if isINodeSnapshottable is false)
* } (when {@link Feature#SNAPSHOT} is supported),
* fsPermission: short, PermissionStatus
* } for INodeDirectory
* or
* {
* symlinkString, fsPermission: short, PermissionStatus
* } for INodeSymlink
* or
* {
* [list of BlockInfo]
* [list of FileDiff]
* {
* isINodeFileUnderConstructionSnapshot: byte,
* {clientName: short + byte[], clientMachine: short + byte[]} (when
* isINodeFileUnderConstructionSnapshot is true),
* } (when {@link Feature#SNAPSHOT} is supported and writing snapshotINode),
* fsPermission: short, PermissionStatus
* } for INodeFile
* }
*
* INodeDirectoryInfo {
* fullPath of the directory: short + byte[],
* numberOfChildren: int, [list of INodeInfo of children INode],
* {
* numberOfSnapshots: int,
* [list of Snapshot] (when NumberOfSnapshots is positive),
* numberOfDirectoryDiffs: int,
* [list of DirectoryDiff] (NumberOfDirectoryDiffs is positive),
* number of children that are directories,
* [list of INodeDirectoryInfo of the directory children] (includes
* snapshot copies of deleted sub-directories)
* } (when {@link Feature#SNAPSHOT} is supported),
* }
*
* Snapshot {
* snapshotID: int, root of Snapshot: INodeDirectoryInfo (its local name is
* the name of the snapshot)
* }
*
* DirectoryDiff {
* full path of the root of the associated Snapshot: short + byte[],
* childrenSize: int,
* isSnapshotRoot: byte,
* snapshotINodeIsNotNull: byte (when isSnapshotRoot is false),
* snapshotINode: INodeDirectory (when SnapshotINodeIsNotNull is true), Diff
* }
*
* Diff {
* createdListSize: int, [Local name of INode in created list],
* deletedListSize: int, [INode in deleted list: INodeInfo]
* }
*
* FileDiff {
* full path of the root of the associated Snapshot: short + byte[],
* fileSize: long,
* snapshotINodeIsNotNull: byte,
* snapshotINode: INodeFile (when SnapshotINodeIsNotNull is true), Diff
* }
* </pre>
*/
@InterfaceAudience.Private
@InterfaceStability.Evolving
@ -581,6 +686,11 @@ public void updateBlocksMap(INodeFile file) {
}
}
/** @return The FSDirectory of the namesystem where the fsimage is loaded */
public FSDirectory getFSDirectoryInLoading() {
return namesystem.dir;
}
public INode loadINodeWithLocalName(boolean isSnapshotINode, DataInput in,
boolean updateINodeMap) throws IOException {
return loadINodeWithLocalName(isSnapshotINode, in, updateINodeMap, null);
@ -1014,7 +1124,7 @@ static String renameReservedPathsOnUpgrade(String path,
+ " option to automatically rename these paths during upgrade.";
/**
* Same as {@link #renameReservedPathsOnUpgrade}, but for a single
* Same as {@link #renameReservedPathsOnUpgrade(String)}, but for a single
* byte array path component.
*/
private static byte[] renameReservedComponentOnUpgrade(byte[] component,
@ -1034,7 +1144,7 @@ private static byte[] renameReservedComponentOnUpgrade(byte[] component,
}
/**
* Same as {@link #renameReservedPathsOnUpgrade}, but for a single
* Same as {@link #renameReservedPathsOnUpgrade(String)}, but for a single
* byte array path component.
*/
private static byte[] renameReservedRootComponentOnUpgrade(byte[] component,
@ -1055,4 +1165,271 @@ private static byte[] renameReservedRootComponentOnUpgrade(byte[] component,
}
return component;
}
/**
* A one-shot class responsible for writing an image file.
* The write() function should be called once, after which the getter
* functions may be used to retrieve information about the file that was written.
*
* This is replaced by the PB-based FSImage. The class is to maintain
* compatibility for the external fsimage tool.
*/
@Deprecated
static class Saver {
private static final int LAYOUT_VERSION = -51;
private final SaveNamespaceContext context;
/** Set to true once an image has been written */
private boolean saved = false;
/** The MD5 checksum of the file that was written */
private MD5Hash savedDigest;
private final ReferenceMap referenceMap = new ReferenceMap();
private final Map<Long, INodeFile> snapshotUCMap =
new HashMap<Long, INodeFile>();
/** @throws IllegalStateException if the instance has not yet saved an image */
private void checkSaved() {
if (!saved) {
throw new IllegalStateException("FSImageSaver has not saved an image");
}
}
/** @throws IllegalStateException if the instance has already saved an image */
private void checkNotSaved() {
if (saved) {
throw new IllegalStateException("FSImageSaver has already saved an image");
}
}
Saver(SaveNamespaceContext context) {
this.context = context;
}
/**
* Return the MD5 checksum of the image file that was saved.
*/
MD5Hash getSavedDigest() {
checkSaved();
return savedDigest;
}
void save(File newFile, FSImageCompression compression) throws IOException {
checkNotSaved();
final FSNamesystem sourceNamesystem = context.getSourceNamesystem();
final INodeDirectory rootDir = sourceNamesystem.dir.rootDir;
final long numINodes = rootDir.getDirectoryWithQuotaFeature()
.getSpaceConsumed().get(Quota.NAMESPACE);
String sdPath = newFile.getParentFile().getParentFile().getAbsolutePath();
Step step = new Step(StepType.INODES, sdPath);
StartupProgress prog = NameNode.getStartupProgress();
prog.beginStep(Phase.SAVING_CHECKPOINT, step);
prog.setTotal(Phase.SAVING_CHECKPOINT, step, numINodes);
Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step);
long startTime = now();
//
// Write out data
//
MessageDigest digester = MD5Hash.getDigester();
FileOutputStream fout = new FileOutputStream(newFile);
DigestOutputStream fos = new DigestOutputStream(fout, digester);
DataOutputStream out = new DataOutputStream(fos);
try {
out.writeInt(LAYOUT_VERSION);
LayoutFlags.write(out);
// We use the non-locked version of getNamespaceInfo here since
// the coordinating thread of saveNamespace already has read-locked
// the namespace for us. If we attempt to take another readlock
// from the actual saver thread, there's a potential of a
// fairness-related deadlock. See the comments on HDFS-2223.
out.writeInt(sourceNamesystem.unprotectedGetNamespaceInfo()
.getNamespaceID());
out.writeLong(numINodes);
out.writeLong(sourceNamesystem.getGenerationStampV1());
out.writeLong(sourceNamesystem.getGenerationStampV2());
out.writeLong(sourceNamesystem.getGenerationStampAtblockIdSwitch());
out.writeLong(sourceNamesystem.getLastAllocatedBlockId());
out.writeLong(context.getTxId());
out.writeLong(sourceNamesystem.getLastInodeId());
sourceNamesystem.getSnapshotManager().write(out);
// write compression info and set up compressed stream
out = compression.writeHeaderAndWrapStream(fos);
LOG.info("Saving image file " + newFile +
" using " + compression);
// save the root
saveINode2Image(rootDir, out, false, referenceMap, counter);
// save the rest of the nodes
saveImage(rootDir, out, true, false, counter);
prog.endStep(Phase.SAVING_CHECKPOINT, step);
// Now that the step is finished, set counter equal to total to adjust
// for possible under-counting due to reference inodes.
prog.setCount(Phase.SAVING_CHECKPOINT, step, numINodes);
// save files under construction
// TODO: for HDFS-5428, since we cannot break the compatibility of
// fsimage, we store part of the under-construction files that are only
// in snapshots in this "under-construction-file" section. As a
// temporary solution, we use "/.reserved/.inodes/<inodeid>" as their
// paths, so that when loading fsimage we do not put them into the lease
// map. In the future, we can remove this hack when we can bump the
// layout version.
sourceNamesystem.saveFilesUnderConstruction(out, snapshotUCMap);
context.checkCancelled();
sourceNamesystem.saveSecretManagerStateCompat(out, sdPath);
context.checkCancelled();
sourceNamesystem.getCacheManager().saveStateCompat(out, sdPath);
context.checkCancelled();
out.flush();
context.checkCancelled();
fout.getChannel().force(true);
} finally {
out.close();
}
saved = true;
// set md5 of the saved image
savedDigest = new MD5Hash(digester.digest());
LOG.info("Image file " + newFile + " of size " + newFile.length() +
" bytes saved in " + (now() - startTime)/1000 + " seconds.");
}
/**
* Save children INodes.
* @param children The list of children INodes
* @param out The DataOutputStream to write
* @param inSnapshot Whether the parent directory or its ancestor is in
* the deleted list of some snapshot (caused by rename or
* deletion)
* @param counter Counter to increment for namenode startup progress
* @return Number of children that are directory
*/
private int saveChildren(ReadOnlyList<INode> children,
DataOutputStream out, boolean inSnapshot, Counter counter)
throws IOException {
// Write normal children INode.
out.writeInt(children.size());
int dirNum = 0;
int i = 0;
for(INode child : children) {
// print all children first
// TODO: for HDFS-5428, we cannot change the format/content of fsimage
// here, thus even if the parent directory is in snapshot, we still
// do not handle INodeUC as those stored in deleted list
saveINode2Image(child, out, false, referenceMap, counter);
if (child.isDirectory()) {
dirNum++;
} else if (inSnapshot && child.isFile()
&& child.asFile().isUnderConstruction()) {
this.snapshotUCMap.put(child.getId(), child.asFile());
}
if (i++ % 50 == 0) {
context.checkCancelled();
}
}
return dirNum;
}
/**
* Save file tree image starting from the given root.
* This is a recursive procedure, which first saves all children and
* snapshot diffs of a current directory and then moves inside the
* sub-directories.
*
* @param current The current node
* @param out The DataoutputStream to write the image
* @param toSaveSubtree Whether or not to save the subtree to fsimage. For
* reference node, its subtree may already have been
* saved before.
* @param inSnapshot Whether the current directory is in snapshot
* @param counter Counter to increment for namenode startup progress
*/
private void saveImage(INodeDirectory current, DataOutputStream out,
boolean toSaveSubtree, boolean inSnapshot, Counter counter)
throws IOException {
// write the inode id of the directory
out.writeLong(current.getId());
if (!toSaveSubtree) {
return;
}
final ReadOnlyList<INode> children = current
.getChildrenList(Snapshot.CURRENT_STATE_ID);
int dirNum = 0;
List<INodeDirectory> snapshotDirs = null;
DirectoryWithSnapshotFeature sf = current.getDirectoryWithSnapshotFeature();
if (sf != null) {
snapshotDirs = new ArrayList<INodeDirectory>();
sf.getSnapshotDirectory(snapshotDirs);
dirNum += snapshotDirs.size();
}
// 2. Write INodeDirectorySnapshottable#snapshotsByNames to record all
// Snapshots
if (current instanceof INodeDirectorySnapshottable) {
INodeDirectorySnapshottable snapshottableNode =
(INodeDirectorySnapshottable) current;
SnapshotFSImageFormat.saveSnapshots(snapshottableNode, out);
} else {
out.writeInt(-1); // # of snapshots
}
// 3. Write children INode
dirNum += saveChildren(children, out, inSnapshot, counter);
// 4. Write DirectoryDiff lists, if there is any.
SnapshotFSImageFormat.saveDirectoryDiffList(current, out, referenceMap);
// Write sub-tree of sub-directories, including possible snapshots of
// deleted sub-directories
out.writeInt(dirNum); // the number of sub-directories
for(INode child : children) {
if(!child.isDirectory()) {
continue;
}
// make sure we only save the subtree under a reference node once
boolean toSave = child.isReference() ?
referenceMap.toProcessSubtree(child.getId()) : true;
saveImage(child.asDirectory(), out, toSave, inSnapshot, counter);
}
if (snapshotDirs != null) {
for (INodeDirectory subDir : snapshotDirs) {
// make sure we only save the subtree under a reference node once
boolean toSave = subDir.getParentReference() != null ?
referenceMap.toProcessSubtree(subDir.getId()) : true;
saveImage(subDir, out, toSave, true, counter);
}
}
}
/**
* Saves inode and increments progress counter.
*
* @param inode INode to save
* @param out DataOutputStream to receive inode
* @param writeUnderConstruction boolean true if this is under construction
* @param referenceMap ReferenceMap containing reference inodes
* @param counter Counter to increment for namenode startup progress
* @throws IOException thrown if there is an I/O error
*/
private void saveINode2Image(INode inode, DataOutputStream out,
boolean writeUnderConstruction, ReferenceMap referenceMap,
Counter counter) throws IOException {
FSImageSerialization.saveINode2Image(inode, out, writeUnderConstruction,
referenceMap);
// Intentionally do not increment counter for reference inodes, because it
// is too difficult at this point to assess whether or not this is a
// reference that counts toward quota.
if (!(inode instanceof INodeReference)) {
counter.increment();
}
}
}
}

View File

@ -17,6 +17,11 @@
*/
package org.apache.hadoop.hdfs.server.namenode;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.IOException;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.Path;
@ -31,20 +36,21 @@
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction;
import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState;
import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottable;
import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat;
import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat.ReferenceMap;
import org.apache.hadoop.hdfs.util.XMLUtils;
import org.apache.hadoop.hdfs.util.XMLUtils.InvalidXmlException;
import org.apache.hadoop.hdfs.util.XMLUtils.Stanza;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.ShortWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableUtils;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.DataOutputStream;
import java.io.IOException;
import com.google.common.base.Preconditions;
/**
* Static utility functions for serializing various pieces of data in the correct
@ -82,6 +88,26 @@ static private final class TLData {
final ShortWritable U_SHORT = new ShortWritable();
final IntWritable U_INT = new IntWritable();
final LongWritable U_LONG = new LongWritable();
final FsPermission FILE_PERM = new FsPermission((short) 0);
}
private static void writePermissionStatus(INodeAttributes inode,
DataOutput out) throws IOException {
final FsPermission p = TL_DATA.get().FILE_PERM;
p.fromShort(inode.getFsPermissionShort());
PermissionStatus.write(out, inode.getUserName(), inode.getGroupName(), p);
}
private static void writeBlocks(final Block[] blocks,
final DataOutput out) throws IOException {
if (blocks == null) {
out.writeInt(0);
} else {
out.writeInt(blocks.length);
for (Block blk : blocks) {
blk.write(out);
}
}
}
// Helper function that reads in an INodeUnderConstruction
@ -127,6 +153,183 @@ static INodeFile readINodeUnderConstruction(
return file;
}
// Helper function that writes an INodeUnderConstruction
// into the input stream
//
static void writeINodeUnderConstruction(DataOutputStream out, INodeFile cons,
String path) throws IOException {
writeString(path, out);
out.writeLong(cons.getId());
out.writeShort(cons.getFileReplication());
out.writeLong(cons.getModificationTime());
out.writeLong(cons.getPreferredBlockSize());
writeBlocks(cons.getBlocks(), out);
cons.getPermissionStatus().write(out);
FileUnderConstructionFeature uc = cons.getFileUnderConstructionFeature();
writeString(uc.getClientName(), out);
writeString(uc.getClientMachine(), out);
out.writeInt(0); // do not store locations of last block
}
/**
* Serialize a {@link INodeFile} node
* @param node The node to write
* @param out The {@link DataOutputStream} where the fields are written
* @param writeBlock Whether to write block information
*/
public static void writeINodeFile(INodeFile file, DataOutput out,
boolean writeUnderConstruction) throws IOException {
writeLocalName(file, out);
out.writeLong(file.getId());
out.writeShort(file.getFileReplication());
out.writeLong(file.getModificationTime());
out.writeLong(file.getAccessTime());
out.writeLong(file.getPreferredBlockSize());
writeBlocks(file.getBlocks(), out);
SnapshotFSImageFormat.saveFileDiffList(file, out);
if (writeUnderConstruction) {
if (file.isUnderConstruction()) {
out.writeBoolean(true);
final FileUnderConstructionFeature uc = file.getFileUnderConstructionFeature();
writeString(uc.getClientName(), out);
writeString(uc.getClientMachine(), out);
} else {
out.writeBoolean(false);
}
}
writePermissionStatus(file, out);
}
/** Serialize an {@link INodeFileAttributes}. */
public static void writeINodeFileAttributes(INodeFileAttributes file,
DataOutput out) throws IOException {
writeLocalName(file, out);
writePermissionStatus(file, out);
out.writeLong(file.getModificationTime());
out.writeLong(file.getAccessTime());
out.writeShort(file.getFileReplication());
out.writeLong(file.getPreferredBlockSize());
}
private static void writeQuota(Quota.Counts quota, DataOutput out)
throws IOException {
out.writeLong(quota.get(Quota.NAMESPACE));
out.writeLong(quota.get(Quota.DISKSPACE));
}
/**
* Serialize a {@link INodeDirectory}
* @param node The node to write
* @param out The {@link DataOutput} where the fields are written
*/
public static void writeINodeDirectory(INodeDirectory node, DataOutput out)
throws IOException {
writeLocalName(node, out);
out.writeLong(node.getId());
out.writeShort(0); // replication
out.writeLong(node.getModificationTime());
out.writeLong(0); // access time
out.writeLong(0); // preferred block size
out.writeInt(-1); // # of blocks
writeQuota(node.getQuotaCounts(), out);
if (node instanceof INodeDirectorySnapshottable) {
out.writeBoolean(true);
} else {
out.writeBoolean(false);
out.writeBoolean(node.isWithSnapshot());
}
writePermissionStatus(node, out);
}
/**
* Serialize a {@link INodeDirectory}
* @param a The node to write
* @param out The {@link DataOutput} where the fields are written
*/
public static void writeINodeDirectoryAttributes(
INodeDirectoryAttributes a, DataOutput out) throws IOException {
writeLocalName(a, out);
writePermissionStatus(a, out);
out.writeLong(a.getModificationTime());
writeQuota(a.getQuotaCounts(), out);
}
/**
* Serialize a {@link INodeSymlink} node
* @param node The node to write
* @param out The {@link DataOutput} where the fields are written
*/
private static void writeINodeSymlink(INodeSymlink node, DataOutput out)
throws IOException {
writeLocalName(node, out);
out.writeLong(node.getId());
out.writeShort(0); // replication
out.writeLong(0); // modification time
out.writeLong(0); // access time
out.writeLong(0); // preferred block size
out.writeInt(-2); // # of blocks
Text.writeString(out, node.getSymlinkString());
writePermissionStatus(node, out);
}
/** Serialize a {@link INodeReference} node */
private static void writeINodeReference(INodeReference ref, DataOutput out,
boolean writeUnderConstruction, ReferenceMap referenceMap
) throws IOException {
writeLocalName(ref, out);
out.writeLong(ref.getId());
out.writeShort(0); // replication
out.writeLong(0); // modification time
out.writeLong(0); // access time
out.writeLong(0); // preferred block size
out.writeInt(-3); // # of blocks
final boolean isWithName = ref instanceof INodeReference.WithName;
out.writeBoolean(isWithName);
if (!isWithName) {
Preconditions.checkState(ref instanceof INodeReference.DstReference);
// dst snapshot id
out.writeInt(((INodeReference.DstReference) ref).getDstSnapshotId());
} else {
out.writeInt(((INodeReference.WithName) ref).getLastSnapshotId());
}
final INodeReference.WithCount withCount
= (INodeReference.WithCount)ref.getReferredINode();
referenceMap.writeINodeReferenceWithCount(withCount, out,
writeUnderConstruction);
}
/**
* Save one inode's attributes to the image.
*/
public static void saveINode2Image(INode node, DataOutput out,
boolean writeUnderConstruction, ReferenceMap referenceMap)
throws IOException {
if (node.isReference()) {
writeINodeReference(node.asReference(), out, writeUnderConstruction,
referenceMap);
} else if (node.isDirectory()) {
writeINodeDirectory(node.asDirectory(), out);
} else if (node.isSymlink()) {
writeINodeSymlink(node.asSymlink(), out);
} else if (node.isFile()) {
writeINodeFile(node.asFile(), out, writeUnderConstruction);
}
}
// This should be reverted to package private once the ImageLoader
// code is moved into this package. This method should not be called
// by other code.
@ -226,6 +429,12 @@ public static byte[] readLocalName(DataInput in) throws IOException {
in.readFully(createdNodeName);
return createdNodeName;
}
private static void writeLocalName(INodeAttributes inode, DataOutput out)
throws IOException {
final byte[] name = inode.getLocalNameBytes();
writeBytes(name, out);
}
public static void writeBytes(byte[] data, DataOutput out)
throws IOException {

View File

@ -87,17 +87,7 @@
import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_SUPPORT_APPEND_KEY;
import static org.apache.hadoop.util.Time.now;
import java.io.BufferedWriter;
import java.io.ByteArrayInputStream;
import java.io.DataInput;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.io.*;
import java.lang.management.ManagementFactory;
import java.net.InetAddress;
import java.net.URI;
@ -6090,6 +6080,42 @@ void unprotectedChangeLease(String src, String dst) {
leaseManager.changeLease(src, dst);
}
/**
* Serializes leases.
*/
void saveFilesUnderConstruction(DataOutputStream out,
Map<Long, INodeFile> snapshotUCMap) throws IOException {
// This is run by an inferior thread of saveNamespace, which holds a read
// lock on our behalf. If we took the read lock here, we could block
// for fairness if a writer is waiting on the lock.
synchronized (leaseManager) {
Map<String, INodeFile> nodes = leaseManager.getINodesUnderConstruction();
for (Map.Entry<String, INodeFile> entry : nodes.entrySet()) {
// TODO: for HDFS-5428, because of rename operations, some
// under-construction files that are
// in the current fs directory can also be captured in the
// snapshotUCMap. We should remove them from the snapshotUCMap.
snapshotUCMap.remove(entry.getValue().getId());
}
out.writeInt(nodes.size() + snapshotUCMap.size()); // write the size
for (Map.Entry<String, INodeFile> entry : nodes.entrySet()) {
FSImageSerialization.writeINodeUnderConstruction(
out, entry.getValue(), entry.getKey());
}
for (Map.Entry<Long, INodeFile> entry : snapshotUCMap.entrySet()) {
// for those snapshot INodeFileUC, we use "/.reserved/.inodes/<inodeid>"
// as their paths
StringBuilder b = new StringBuilder();
b.append(FSDirectory.DOT_RESERVED_PATH_PREFIX)
.append(Path.SEPARATOR).append(FSDirectory.DOT_INODES_STRING)
.append(Path.SEPARATOR).append(entry.getValue().getId());
FSImageSerialization.writeINodeUnderConstruction(
out, entry.getValue(), b.toString());
}
}
}
/**
* @return all the under-construction files in the lease map
*/
@ -6376,6 +6402,15 @@ void cancelDelegationToken(Token<DelegationTokenIdentifier> token)
getEditLog().logSync();
}
/**
* @param out save state of the secret manager
* @param sdPath String storage directory path
*/
void saveSecretManagerStateCompat(DataOutputStream out, String sdPath)
throws IOException {
dtSecretManager.saveSecretManagerStateCompat(out, sdPath);
}
SecretManagerState saveSecretManagerState() {
return dtSecretManager.saveSecretManagerState();
}

View File

@ -77,7 +77,8 @@ public enum NameNodeFile {
IMAGE_ROLLBACK("fsimage_rollback"),
EDITS_NEW ("edits.new"), // from "old" pre-HDFS-1073 format
EDITS_INPROGRESS ("edits_inprogress"),
EDITS_TMP ("edits_tmp");
EDITS_TMP ("edits_tmp"),
IMAGE_LEGACY_OIV ("fsimage_legacy_oiv"); // For pre-PB format
private String fileName = null;
private NameNodeFile(String name) { this.fileName = name; }
@ -693,6 +694,10 @@ public static String getRollbackImageFileName(long txid) {
return getNameNodeFileName(NameNodeFile.IMAGE_ROLLBACK, txid);
}
public static String getLegacyOIVImageFileName(long txid) {
return getNameNodeFileName(NameNodeFile.IMAGE_LEGACY_OIV, txid);
}
private static String getNameNodeFileName(NameNodeFile nnf, long txid) {
return String.format("%s_%019d", nnf.getName(), txid);
}

View File

@ -18,11 +18,13 @@
package org.apache.hadoop.hdfs.server.namenode;
import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.EnumSet;
import java.util.Iterator;
import java.util.List;
import java.util.TreeSet;
@ -233,4 +235,58 @@ private static void deleteOrWarn(File file) {
}
}
}
/**
* Delete old OIV fsimages. Since the target dir is not a full blown
* storage directory, we simply list and keep the latest ones. For the
* same reason, no storage inspector is used.
*/
void purgeOldLegacyOIVImages(String dir, long txid) {
File oivImageDir = new File(dir);
final String oivImagePrefix = NameNodeFile.IMAGE_LEGACY_OIV.getName();
String filesInStorage[];
// Get the listing
filesInStorage = oivImageDir.list(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
return name.matches(oivImagePrefix + "_(\\d+)");
}
});
// Check whether there is any work to do.
if (filesInStorage.length <= numCheckpointsToRetain) {
return;
}
// Create a sorted list of txids from the file names.
TreeSet<Long> sortedTxIds = new TreeSet<Long>();
for (String fName : filesInStorage) {
// Extract the transaction id from the file name.
long fTxId;
try {
fTxId = Long.parseLong(fName.substring(oivImagePrefix.length() + 1));
} catch (NumberFormatException nfe) {
// This should not happen since we have already filtered it.
// Log and continue.
LOG.warn("Invalid file name. Skipping " + fName);
continue;
}
sortedTxIds.add(Long.valueOf(fTxId));
}
int numFilesToDelete = sortedTxIds.size() - numCheckpointsToRetain;
Iterator<Long> iter = sortedTxIds.iterator();
while (numFilesToDelete > 0 && iter.hasNext()) {
long txIdVal = iter.next().longValue();
String fileName = NNStorage.getLegacyOIVImageFileName(txIdVal);
LOG.info("Deleting " + fileName);
File fileToDelete = new File(oivImageDir, fileName);
if (!fileToDelete.delete()) {
// deletion failed.
LOG.warn("Failed to delete image file: " + fileToDelete);
}
numFilesToDelete--;
}
}
}

View File

@ -62,6 +62,7 @@
import org.apache.hadoop.hdfs.server.protocol.NamenodeProtocol;
import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog;
import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest;
import org.apache.hadoop.hdfs.util.Canceler;
import org.apache.hadoop.http.HttpConfig;
import org.apache.hadoop.http.HttpServer2;
import org.apache.hadoop.io.MD5Hash;
@ -125,6 +126,7 @@ public class SecondaryNameNode implements Runnable,
private Thread checkpointThread;
private ObjectName nameNodeStatusBeanName;
private String legacyOivImageDir;
@Override
public String toString() {
@ -289,6 +291,9 @@ private void initialize(final Configuration conf,
NetUtils.getHostPortString(httpsAddress));
}
legacyOivImageDir = conf.get(
DFSConfigKeys.DFS_NAMENODE_LEGACY_OIV_IMAGE_DIR_KEY);
LOG.info("Checkpoint Period :" + checkpointConf.getPeriod() + " secs "
+ "(" + checkpointConf.getPeriod() / 60 + " min)");
LOG.info("Log Size Trigger :" + checkpointConf.getTxnCount() + " txns");
@ -497,6 +502,7 @@ private URL getInfoServer() throws IOException {
* @return if the image is fetched from primary or not
*/
@VisibleForTesting
@SuppressWarnings("deprecated")
public boolean doCheckpoint() throws IOException {
checkpointImage.ensureCurrentDirExists();
NNStorage dstStorage = checkpointImage.getStorage();
@ -559,11 +565,18 @@ public boolean doCheckpoint() throws IOException {
LOG.warn("Checkpoint done. New Image Size: "
+ dstStorage.getFsImageName(txid).length());
if (legacyOivImageDir != null && !legacyOivImageDir.isEmpty()) {
try {
checkpointImage.saveLegacyOIVImage(namesystem, legacyOivImageDir,
new Canceler());
} catch (IOException e) {
LOG.warn("Failed to write legacy OIV image: ", e);
}
}
return loadImage;
}
/**
* @param opts The parameters passed to this program.
* @exception Exception if the filesystem does not exist.

View File

@ -183,6 +183,12 @@ private void doCheckpoint() throws InterruptedException, IOException {
txid = img.getStorage().getMostRecentCheckpointTxId();
assert txid == thisCheckpointTxId : "expected to save checkpoint at txid=" +
thisCheckpointTxId + " but instead saved at txid=" + txid;
// Save the legacy OIV image, if the output dir is defined.
String outputDir = checkpointConf.getLegacyOivImageDir();
if (outputDir != null && !outputDir.isEmpty()) {
img.saveLegacyOIVImage(namesystem, outputDir, canceler);
}
} finally {
namesystem.longReadUnlock();
}

View File

@ -17,13 +17,17 @@
*/
package org.apache.hadoop.hdfs.server.namenode.snapshot;
import com.google.common.base.Preconditions;
import java.io.DataOutput;
import java.io.IOException;
import java.util.List;
import org.apache.hadoop.hdfs.server.namenode.INode;
import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
import org.apache.hadoop.hdfs.server.namenode.INodeAttributes;
import org.apache.hadoop.hdfs.server.namenode.Quota;
import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat.ReferenceMap;
import java.util.List;
import com.google.common.base.Preconditions;
/**
* The difference of an inode between in two snapshots.
@ -128,4 +132,11 @@ public String toString() {
return getClass().getSimpleName() + ": " + this.getSnapshotId() + " (post="
+ (posteriorDiff == null? null: posteriorDiff.getSnapshotId()) + ")";
}
void writeSnapshot(DataOutput out) throws IOException {
out.writeInt(snapshotId);
}
abstract void write(DataOutput out, ReferenceMap referenceMap
) throws IOException;
}

View File

@ -17,6 +17,8 @@
*/
package org.apache.hadoop.hdfs.server.namenode.snapshot;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collections;
@ -32,6 +34,7 @@
import org.apache.hadoop.hdfs.protocol.SnapshotDiffReport.DiffType;
import org.apache.hadoop.hdfs.server.namenode.Content;
import org.apache.hadoop.hdfs.server.namenode.ContentSummaryComputationContext;
import org.apache.hadoop.hdfs.server.namenode.FSImageSerialization;
import org.apache.hadoop.hdfs.server.namenode.INode;
import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
import org.apache.hadoop.hdfs.server.namenode.INodeDirectory;
@ -39,6 +42,7 @@
import org.apache.hadoop.hdfs.server.namenode.INodeFile;
import org.apache.hadoop.hdfs.server.namenode.INodeReference;
import org.apache.hadoop.hdfs.server.namenode.Quota;
import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat.ReferenceMap;
import org.apache.hadoop.hdfs.util.Diff;
import org.apache.hadoop.hdfs.util.Diff.Container;
import org.apache.hadoop.hdfs.util.Diff.ListType;
@ -120,6 +124,35 @@ private Quota.Counts destroyDeletedList(
return counts;
}
/** Serialize {@link #created} */
private void writeCreated(DataOutput out) throws IOException {
final List<INode> created = getList(ListType.CREATED);
out.writeInt(created.size());
for (INode node : created) {
// For INode in created list, we only need to record its local name
byte[] name = node.getLocalNameBytes();
out.writeShort(name.length);
out.write(name);
}
}
/** Serialize {@link #deleted} */
private void writeDeleted(DataOutput out,
ReferenceMap referenceMap) throws IOException {
final List<INode> deleted = getList(ListType.DELETED);
out.writeInt(deleted.size());
for (INode node : deleted) {
FSImageSerialization.saveINode2Image(node, out, true, referenceMap);
}
}
/** Serialize to out */
private void write(DataOutput out, ReferenceMap referenceMap
) throws IOException {
writeCreated(out);
writeDeleted(out, referenceMap);
}
/** Get the list of INodeDirectory contained in the deleted list */
private void getDirsInDeleted(List<INodeDirectory> dirList) {
for (INode node : getList(ListType.DELETED)) {
@ -314,6 +347,25 @@ int getChildrenSize() {
return childrenSize;
}
@Override
void write(DataOutput out, ReferenceMap referenceMap) throws IOException {
writeSnapshot(out);
out.writeInt(childrenSize);
// Write snapshotINode
out.writeBoolean(isSnapshotRoot);
if (!isSnapshotRoot) {
if (snapshotINode != null) {
out.writeBoolean(true);
FSImageSerialization.writeINodeDirectoryAttributes(snapshotINode, out);
} else {
out.writeBoolean(false);
}
}
// Write diff. Node need to write poseriorDiff, since diffs is a list.
diff.write(out, referenceMap);
}
@Override
Quota.Counts destroyDiffAndCollectBlocks(INodeDirectory currentINode,
BlocksMapUpdateInfo collectedBlocks, final List<INode> removedINodes) {

View File

@ -17,13 +17,17 @@
*/
package org.apache.hadoop.hdfs.server.namenode.snapshot;
import java.io.DataOutput;
import java.io.IOException;
import java.util.List;
import org.apache.hadoop.hdfs.server.namenode.FSImageSerialization;
import org.apache.hadoop.hdfs.server.namenode.INode;
import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo;
import org.apache.hadoop.hdfs.server.namenode.INodeFile;
import org.apache.hadoop.hdfs.server.namenode.INodeFileAttributes;
import org.apache.hadoop.hdfs.server.namenode.Quota;
import java.util.List;
import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat.ReferenceMap;
/**
* The difference of an {@link INodeFile} between two snapshots.
@ -66,6 +70,20 @@ public String toString() {
+ (snapshotINode == null? "?": snapshotINode.getFileReplication());
}
@Override
void write(DataOutput out, ReferenceMap referenceMap) throws IOException {
writeSnapshot(out);
out.writeLong(fileSize);
// write snapshotINode
if (snapshotINode != null) {
out.writeBoolean(true);
FSImageSerialization.writeINodeFileAttributes(snapshotINode, out);
} else {
out.writeBoolean(false);
}
}
@Override
Quota.Counts destroyDiffAndCollectBlocks(INodeFile currentINode,
BlocksMapUpdateInfo collectedBlocks, final List<INode> removedINodes) {

View File

@ -18,6 +18,7 @@
package org.apache.hadoop.hdfs.server.namenode.snapshot;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Arrays;
@ -30,6 +31,7 @@
import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.server.namenode.AclFeature;
import org.apache.hadoop.hdfs.server.namenode.FSImageFormat;
import org.apache.hadoop.hdfs.server.namenode.FSImageSerialization;
import org.apache.hadoop.hdfs.server.namenode.INode;
import org.apache.hadoop.hdfs.server.namenode.INodeDirectory;
import org.apache.hadoop.hdfs.util.ReadOnlyList;
@ -214,4 +216,11 @@ public int hashCode() {
public String toString() {
return getClass().getSimpleName() + "." + root.getLocalName() + "(id=" + id + ")";
}
/** Serialize the fields to out */
void write(DataOutput out) throws IOException {
out.writeInt(id);
// write root
FSImageSerialization.writeINodeDirectory(root, out);
}
}

View File

@ -29,21 +29,75 @@
import org.apache.hadoop.hdfs.server.namenode.FSImageFormat;
import org.apache.hadoop.hdfs.server.namenode.FSImageSerialization;
import org.apache.hadoop.hdfs.server.namenode.INode;
import org.apache.hadoop.hdfs.server.namenode.INodeAttributes;
import org.apache.hadoop.hdfs.server.namenode.INodeDirectory;
import org.apache.hadoop.hdfs.server.namenode.INodeDirectoryAttributes;
import org.apache.hadoop.hdfs.server.namenode.INodeFile;
import org.apache.hadoop.hdfs.server.namenode.INodeFileAttributes;
import org.apache.hadoop.hdfs.server.namenode.INodeReference;
import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature.DirectoryDiff;
import org.apache.hadoop.hdfs.server.namenode.snapshot.DirectoryWithSnapshotFeature.DirectoryDiffList;
import org.apache.hadoop.hdfs.tools.snapshot.SnapshotDiff;
import org.apache.hadoop.hdfs.util.Diff.ListType;
import org.apache.hadoop.hdfs.server.namenode.FSImageFormat.Loader;
import org.apache.hadoop.hdfs.util.ReadOnlyList;
/**
* A helper class defining static methods for reading/writing snapshot related
* information from/to FSImage.
*/
public class SnapshotFSImageFormat {
/**
* Save snapshots and snapshot quota for a snapshottable directory.
* @param current The directory that the snapshots belongs to.
* @param out The {@link DataOutput} to write.
* @throws IOException
*/
public static void saveSnapshots(INodeDirectorySnapshottable current,
DataOutput out) throws IOException {
// list of snapshots in snapshotsByNames
ReadOnlyList<Snapshot> snapshots = current.getSnapshotsByNames();
out.writeInt(snapshots.size());
for (Snapshot s : snapshots) {
// write the snapshot id
out.writeInt(s.getId());
}
// snapshot quota
out.writeInt(current.getSnapshotQuota());
}
/**
* Save SnapshotDiff list for an INodeDirectoryWithSnapshot.
* @param sNode The directory that the SnapshotDiff list belongs to.
* @param out The {@link DataOutput} to write.
*/
private static <N extends INode, A extends INodeAttributes, D extends AbstractINodeDiff<N, A, D>>
void saveINodeDiffs(final AbstractINodeDiffList<N, A, D> diffs,
final DataOutput out, ReferenceMap referenceMap) throws IOException {
// Record the diffs in reversed order, so that we can find the correct
// reference for INodes in the created list when loading the FSImage
if (diffs == null) {
out.writeInt(-1); // no diffs
} else {
final List<D> list = diffs.asList();
final int size = list.size();
out.writeInt(size);
for (int i = size - 1; i >= 0; i--) {
list.get(i).write(out, referenceMap);
}
}
}
public static void saveDirectoryDiffList(final INodeDirectory dir,
final DataOutput out, final ReferenceMap referenceMap
) throws IOException {
saveINodeDiffs(dir.getDiffs(), out, referenceMap);
}
public static void saveFileDiffList(final INodeFile file,
final DataOutput out) throws IOException {
saveINodeDiffs(file.getDiffs(), out, null);
}
public static FileDiffList loadFileDiffList(DataInput in,
FSImageFormat.Loader loader) throws IOException {
final int size = in.readInt();
@ -264,6 +318,23 @@ public static class ReferenceMap {
* Used to record whether the subtree of the reference node has been saved
*/
private final Map<Long, Long> dirMap = new HashMap<Long, Long>();
public void writeINodeReferenceWithCount(
INodeReference.WithCount withCount, DataOutput out,
boolean writeUnderConstruction) throws IOException {
final INode referred = withCount.getReferredINode();
final long id = withCount.getId();
final boolean firstReferred = !referenceMap.containsKey(id);
out.writeBoolean(firstReferred);
if (firstReferred) {
FSImageSerialization.saveINode2Image(referred, out,
writeUnderConstruction, this);
referenceMap.put(id, withCount);
} else {
out.writeLong(id);
}
}
public boolean toProcessSubtree(long id) {
if (dirMap.containsKey(id)) {

View File

@ -18,6 +18,7 @@
package org.apache.hadoop.hdfs.server.namenode.snapshot;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
@ -287,6 +288,22 @@ INodeDirectorySnapshottable[] getSnapshottableDirs() {
return snapshottables.values().toArray(
new INodeDirectorySnapshottable[snapshottables.size()]);
}
/**
* Write {@link #snapshotCounter}, {@link #numSnapshots},
* and all snapshots to the DataOutput.
*/
public void write(DataOutput out) throws IOException {
out.writeInt(snapshotCounter);
out.writeInt(numSnapshots.get());
// write all snapshots.
for(INodeDirectorySnapshottable snapshottableDir : snapshottables.values()) {
for(Snapshot s : snapshottableDir.getSnapshotsByNames()) {
s.write(out);
}
}
}
/**
* Read values of {@link #snapshotCounter}, {@link #numSnapshots}, and

View File

@ -0,0 +1,172 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.tools.offlineImageViewer;
import java.io.IOException;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
/**
* A DelimitedImageVisitor generates a text representation of the fsimage,
* with each element separated by a delimiter string. All of the elements
* common to both inodes and inodes-under-construction are included. When
* processing an fsimage with a layout version that did not include an
* element, such as AccessTime, the output file will include a column
* for the value, but no value will be included.
*
* Individual block information for each file is not currently included.
*
* The default delimiter is tab, as this is an unlikely value to be included
* an inode path or other text metadata. The delimiter value can be via the
* constructor.
*/
class DelimitedImageVisitor extends TextWriterImageVisitor {
private static final String defaultDelimiter = "\t";
final private LinkedList<ImageElement> elemQ = new LinkedList<ImageElement>();
private long fileSize = 0l;
// Elements of fsimage we're interested in tracking
private final Collection<ImageElement> elementsToTrack;
// Values for each of the elements in elementsToTrack
private final AbstractMap<ImageElement, String> elements =
new HashMap<ImageElement, String>();
private final String delimiter;
{
elementsToTrack = new ArrayList<ImageElement>();
// This collection determines what elements are tracked and the order
// in which they are output
Collections.addAll(elementsToTrack, ImageElement.INODE_PATH,
ImageElement.REPLICATION,
ImageElement.MODIFICATION_TIME,
ImageElement.ACCESS_TIME,
ImageElement.BLOCK_SIZE,
ImageElement.NUM_BLOCKS,
ImageElement.NUM_BYTES,
ImageElement.NS_QUOTA,
ImageElement.DS_QUOTA,
ImageElement.PERMISSION_STRING,
ImageElement.USER_NAME,
ImageElement.GROUP_NAME);
}
public DelimitedImageVisitor(String filename) throws IOException {
this(filename, false);
}
public DelimitedImageVisitor(String outputFile, boolean printToScreen)
throws IOException {
this(outputFile, printToScreen, defaultDelimiter);
}
public DelimitedImageVisitor(String outputFile, boolean printToScreen,
String delimiter) throws IOException {
super(outputFile, printToScreen);
this.delimiter = delimiter;
reset();
}
/**
* Reset the values of the elements we're tracking in order to handle
* the next file
*/
private void reset() {
elements.clear();
for(ImageElement e : elementsToTrack)
elements.put(e, null);
fileSize = 0l;
}
@Override
void leaveEnclosingElement() throws IOException {
ImageElement elem = elemQ.pop();
// If we're done with an inode, write out our results and start over
if(elem == ImageElement.INODE ||
elem == ImageElement.INODE_UNDER_CONSTRUCTION) {
writeLine();
write("\n");
reset();
}
}
/**
* Iterate through all the elements we're tracking and, if a value was
* recorded for it, write it out.
*/
private void writeLine() throws IOException {
Iterator<ImageElement> it = elementsToTrack.iterator();
while(it.hasNext()) {
ImageElement e = it.next();
String v = null;
if(e == ImageElement.NUM_BYTES)
v = String.valueOf(fileSize);
else
v = elements.get(e);
if(v != null)
write(v);
if(it.hasNext())
write(delimiter);
}
}
@Override
void visit(ImageElement element, String value) throws IOException {
// Explicitly label the root path
if(element == ImageElement.INODE_PATH && value.equals(""))
value = "/";
// Special case of file size, which is sum of the num bytes in each block
if(element == ImageElement.NUM_BYTES)
fileSize += Long.valueOf(value);
if(elements.containsKey(element) && element != ImageElement.NUM_BYTES)
elements.put(element, value);
}
@Override
void visitEnclosingElement(ImageElement element) throws IOException {
elemQ.push(element);
}
@Override
void visitEnclosingElement(ImageElement element, ImageElement key,
String value) throws IOException {
// Special case as numBlocks is an attribute of the blocks element
if(key == ImageElement.NUM_BLOCKS
&& elements.containsKey(ImageElement.NUM_BLOCKS))
elements.put(key, value);
elemQ.push(element);
}
@Override
void start() throws IOException { /* Nothing to do */ }
}

View File

@ -0,0 +1,36 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.tools.offlineImageViewer;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
/**
* Utility class for tracking descent into the structure of the
* Visitor class (ImageVisitor, EditsVisitor etc.)
*/
@InterfaceAudience.Private
@InterfaceStability.Unstable
public class DepthCounter {
private int depth = 0;
public void incLevel() { depth++; }
public void decLevel() { if(depth >= 1) depth--; }
public int getLevel() { return depth; }
}

View File

@ -0,0 +1,193 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.tools.offlineImageViewer;
import java.io.IOException;
import java.util.LinkedList;
/**
* File size distribution visitor.
*
* <h3>Description.</h3>
* This is the tool for analyzing file sizes in the namespace image.
* In order to run the tool one should define a range of integers
* <tt>[0, maxSize]</tt> by specifying <tt>maxSize</tt> and a <tt>step</tt>.
* The range of integers is divided into segments of size <tt>step</tt>:
* <tt>[0, s<sub>1</sub>, ..., s<sub>n-1</sub>, maxSize]</tt>,
* and the visitor calculates how many files in the system fall into
* each segment <tt>[s<sub>i-1</sub>, s<sub>i</sub>)</tt>.
* Note that files larger than <tt>maxSize</tt> always fall into
* the very last segment.
*
* <h3>Input.</h3>
* <ul>
* <li><tt>filename</tt> specifies the location of the image file;</li>
* <li><tt>maxSize</tt> determines the range <tt>[0, maxSize]</tt> of files
* sizes considered by the visitor;</li>
* <li><tt>step</tt> the range is divided into segments of size step.</li>
* </ul>
*
* <h3>Output.</h3>
* The output file is formatted as a tab separated two column table:
* Size and NumFiles. Where Size represents the start of the segment,
* and numFiles is the number of files form the image which size falls in
* this segment.
*/
class FileDistributionVisitor extends TextWriterImageVisitor {
final private LinkedList<ImageElement> elemS = new LinkedList<ImageElement>();
private final static long MAX_SIZE_DEFAULT = 0x2000000000L; // 1/8 TB = 2^37
private final static int INTERVAL_DEFAULT = 0x200000; // 2 MB = 2^21
private int[] distribution;
private long maxSize;
private int step;
private int totalFiles;
private int totalDirectories;
private int totalBlocks;
private long totalSpace;
private long maxFileSize;
private FileContext current;
private boolean inInode = false;
/**
* File or directory information.
*/
private static class FileContext {
String path;
long fileSize;
int numBlocks;
int replication;
}
public FileDistributionVisitor(String filename,
long maxSize,
int step) throws IOException {
super(filename, false);
this.maxSize = (maxSize == 0 ? MAX_SIZE_DEFAULT : maxSize);
this.step = (step == 0 ? INTERVAL_DEFAULT : step);
long numIntervals = this.maxSize / this.step;
if(numIntervals >= Integer.MAX_VALUE)
throw new IOException("Too many distribution intervals " + numIntervals);
this.distribution = new int[1 + (int)(numIntervals)];
this.totalFiles = 0;
this.totalDirectories = 0;
this.totalBlocks = 0;
this.totalSpace = 0;
this.maxFileSize = 0;
}
@Override
void start() throws IOException {}
@Override
void finish() throws IOException {
output();
super.finish();
}
@Override
void finishAbnormally() throws IOException {
System.out.println("*** Image processing finished abnormally. Ending ***");
output();
super.finishAbnormally();
}
private void output() throws IOException {
// write the distribution into the output file
write("Size\tNumFiles\n");
for(int i = 0; i < distribution.length; i++)
write(((long)i * step) + "\t" + distribution[i] + "\n");
System.out.println("totalFiles = " + totalFiles);
System.out.println("totalDirectories = " + totalDirectories);
System.out.println("totalBlocks = " + totalBlocks);
System.out.println("totalSpace = " + totalSpace);
System.out.println("maxFileSize = " + maxFileSize);
}
@Override
void leaveEnclosingElement() throws IOException {
ImageElement elem = elemS.pop();
if(elem != ImageElement.INODE &&
elem != ImageElement.INODE_UNDER_CONSTRUCTION)
return;
inInode = false;
if(current.numBlocks < 0) {
totalDirectories ++;
return;
}
totalFiles++;
totalBlocks += current.numBlocks;
totalSpace += current.fileSize * current.replication;
if(maxFileSize < current.fileSize)
maxFileSize = current.fileSize;
int high;
if(current.fileSize > maxSize)
high = distribution.length-1;
else
high = (int)Math.ceil((double)current.fileSize / step);
distribution[high]++;
if(totalFiles % 1000000 == 1)
System.out.println("Files processed: " + totalFiles
+ " Current: " + current.path);
}
@Override
void visit(ImageElement element, String value) throws IOException {
if(inInode) {
switch(element) {
case INODE_PATH:
current.path = (value.equals("") ? "/" : value);
break;
case REPLICATION:
current.replication = Integer.valueOf(value);
break;
case NUM_BYTES:
current.fileSize += Long.valueOf(value);
break;
default:
break;
}
}
}
@Override
void visitEnclosingElement(ImageElement element) throws IOException {
elemS.push(element);
if(element == ImageElement.INODE ||
element == ImageElement.INODE_UNDER_CONSTRUCTION) {
current = new FileContext();
inInode = true;
}
}
@Override
void visitEnclosingElement(ImageElement element,
ImageElement key, String value) throws IOException {
elemS.push(element);
if(element == ImageElement.INODE ||
element == ImageElement.INODE_UNDER_CONSTRUCTION)
inInode = true;
else if(element == ImageElement.BLOCKS)
current.numBlocks = Integer.parseInt(value);
}
}

View File

@ -0,0 +1,83 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.tools.offlineImageViewer;
import java.io.DataInputStream;
import java.io.IOException;
import org.apache.hadoop.classification.InterfaceAudience;
/**
* An ImageLoader can accept a DataInputStream to an Hadoop FSImage file
* and walk over its structure using the supplied ImageVisitor.
*
* Each implementation of ImageLoader is designed to rapidly process an
* image file. As long as minor changes are made from one layout version
* to another, it is acceptable to tweak one implementation to read the next.
* However, if the layout version changes enough that it would make a
* processor slow or difficult to read, another processor should be created.
* This allows each processor to quickly read an image without getting
* bogged down in dealing with significant differences between layout versions.
*/
interface ImageLoader {
/**
* @param in DataInputStream pointing to an Hadoop FSImage file
* @param v Visit to apply to the FSImage file
* @param enumerateBlocks Should visitor visit each of the file blocks?
*/
public void loadImage(DataInputStream in, ImageVisitor v,
boolean enumerateBlocks) throws IOException;
/**
* Can this processor handle the specified version of FSImage file?
*
* @param version FSImage version file
* @return True if this instance can process the file
*/
public boolean canLoadVersion(int version);
/**
* Factory for obtaining version of image loader that can read
* a particular image format.
*/
@InterfaceAudience.Private
public class LoaderFactory {
// Java doesn't support static methods on interfaces, which necessitates
// this factory class
/**
* Find an image loader capable of interpreting the specified
* layout version number. If none, return null;
*
* @param version fsimage layout version number to be processed
* @return ImageLoader that can interpret specified version, or null
*/
static public ImageLoader getLoader(int version) {
// Easy to add more image processors as they are written
ImageLoader[] loaders = { new ImageLoaderCurrent() };
for (ImageLoader l : loaders) {
if (l.canLoadVersion(version))
return l;
}
return null;
}
}
}

View File

@ -0,0 +1,821 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.tools.offlineImageViewer;
import java.io.DataInputStream;
import java.io.IOException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.protocol.DatanodeInfo.AdminStates;
import org.apache.hadoop.hdfs.protocol.LayoutFlags;
import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature;
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
import org.apache.hadoop.hdfs.server.namenode.FSImageSerialization;
import org.apache.hadoop.hdfs.server.namenode.INodeId;
import org.apache.hadoop.hdfs.server.namenode.NameNodeLayoutVersion;
import org.apache.hadoop.hdfs.tools.offlineImageViewer.ImageVisitor.ImageElement;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.security.token.delegation.DelegationKey;
/**
* ImageLoaderCurrent processes Hadoop FSImage files and walks over
* them using a provided ImageVisitor, calling the visitor at each element
* enumerated below.
*
* The only difference between v18 and v19 was the utilization of the
* stickybit. Therefore, the same viewer can reader either format.
*
* Versions -19 fsimage layout (with changes from -16 up):
* Image version (int)
* Namepsace ID (int)
* NumFiles (long)
* Generation stamp (long)
* INodes (count = NumFiles)
* INode
* Path (String)
* Replication (short)
* Modification Time (long as date)
* Access Time (long) // added in -16
* Block size (long)
* Num blocks (int)
* Blocks (count = Num blocks)
* Block
* Block ID (long)
* Num bytes (long)
* Generation stamp (long)
* Namespace Quota (long)
* Diskspace Quota (long) // added in -18
* Permissions
* Username (String)
* Groupname (String)
* OctalPerms (short -> String) // Modified in -19
* Symlink (String) // added in -23
* NumINodesUnderConstruction (int)
* INodesUnderConstruction (count = NumINodesUnderConstruction)
* INodeUnderConstruction
* Path (bytes as string)
* Replication (short)
* Modification time (long as date)
* Preferred block size (long)
* Num blocks (int)
* Blocks
* Block
* Block ID (long)
* Num bytes (long)
* Generation stamp (long)
* Permissions
* Username (String)
* Groupname (String)
* OctalPerms (short -> String)
* Client Name (String)
* Client Machine (String)
* NumLocations (int)
* DatanodeDescriptors (count = numLocations) // not loaded into memory
* short // but still in file
* long
* string
* long
* int
* string
* string
* enum
* CurrentDelegationKeyId (int)
* NumDelegationKeys (int)
* DelegationKeys (count = NumDelegationKeys)
* DelegationKeyLength (vint)
* DelegationKey (bytes)
* DelegationTokenSequenceNumber (int)
* NumDelegationTokens (int)
* DelegationTokens (count = NumDelegationTokens)
* DelegationTokenIdentifier
* owner (String)
* renewer (String)
* realUser (String)
* issueDate (vlong)
* maxDate (vlong)
* sequenceNumber (vint)
* masterKeyId (vint)
* expiryTime (long)
*
*/
class ImageLoaderCurrent implements ImageLoader {
protected final DateFormat dateFormat =
new SimpleDateFormat("yyyy-MM-dd HH:mm");
private static int[] versions = { -16, -17, -18, -19, -20, -21, -22, -23,
-24, -25, -26, -27, -28, -30, -31, -32, -33, -34, -35, -36, -37, -38, -39,
-40, -41, -42, -43, -44, -45, -46, -47, -48, -49, -50, -51 };
private int imageVersion = 0;
private final Map<Long, Boolean> subtreeMap = new HashMap<Long, Boolean>();
private final Map<Long, String> dirNodeMap = new HashMap<Long, String>();
/* (non-Javadoc)
* @see ImageLoader#canProcessVersion(int)
*/
@Override
public boolean canLoadVersion(int version) {
for(int v : versions)
if(v == version) return true;
return false;
}
/* (non-Javadoc)
* @see ImageLoader#processImage(java.io.DataInputStream, ImageVisitor, boolean)
*/
@Override
public void loadImage(DataInputStream in, ImageVisitor v,
boolean skipBlocks) throws IOException {
boolean done = false;
try {
v.start();
v.visitEnclosingElement(ImageElement.FS_IMAGE);
imageVersion = in.readInt();
if( !canLoadVersion(imageVersion))
throw new IOException("Cannot process fslayout version " + imageVersion);
if (NameNodeLayoutVersion.supports(Feature.ADD_LAYOUT_FLAGS, imageVersion)) {
LayoutFlags.read(in);
}
v.visit(ImageElement.IMAGE_VERSION, imageVersion);
v.visit(ImageElement.NAMESPACE_ID, in.readInt());
long numInodes = in.readLong();
v.visit(ImageElement.GENERATION_STAMP, in.readLong());
if (NameNodeLayoutVersion.supports(Feature.SEQUENTIAL_BLOCK_ID, imageVersion)) {
v.visit(ImageElement.GENERATION_STAMP_V2, in.readLong());
v.visit(ImageElement.GENERATION_STAMP_V1_LIMIT, in.readLong());
v.visit(ImageElement.LAST_ALLOCATED_BLOCK_ID, in.readLong());
}
if (NameNodeLayoutVersion.supports(Feature.STORED_TXIDS, imageVersion)) {
v.visit(ImageElement.TRANSACTION_ID, in.readLong());
}
if (NameNodeLayoutVersion.supports(Feature.ADD_INODE_ID, imageVersion)) {
v.visit(ImageElement.LAST_INODE_ID, in.readLong());
}
boolean supportSnapshot = NameNodeLayoutVersion.supports(Feature.SNAPSHOT,
imageVersion);
if (supportSnapshot) {
v.visit(ImageElement.SNAPSHOT_COUNTER, in.readInt());
int numSnapshots = in.readInt();
v.visit(ImageElement.NUM_SNAPSHOTS_TOTAL, numSnapshots);
for (int i = 0; i < numSnapshots; i++) {
processSnapshot(in, v);
}
}
if (NameNodeLayoutVersion.supports(Feature.FSIMAGE_COMPRESSION, imageVersion)) {
boolean isCompressed = in.readBoolean();
v.visit(ImageElement.IS_COMPRESSED, String.valueOf(isCompressed));
if (isCompressed) {
String codecClassName = Text.readString(in);
v.visit(ImageElement.COMPRESS_CODEC, codecClassName);
CompressionCodecFactory codecFac = new CompressionCodecFactory(
new Configuration());
CompressionCodec codec = codecFac.getCodecByClassName(codecClassName);
if (codec == null) {
throw new IOException("Image compression codec not supported: "
+ codecClassName);
}
in = new DataInputStream(codec.createInputStream(in));
}
}
processINodes(in, v, numInodes, skipBlocks, supportSnapshot);
subtreeMap.clear();
dirNodeMap.clear();
processINodesUC(in, v, skipBlocks);
if (NameNodeLayoutVersion.supports(Feature.DELEGATION_TOKEN, imageVersion)) {
processDelegationTokens(in, v);
}
if (NameNodeLayoutVersion.supports(Feature.CACHING, imageVersion)) {
processCacheManagerState(in, v);
}
v.leaveEnclosingElement(); // FSImage
done = true;
} finally {
if (done) {
v.finish();
} else {
v.finishAbnormally();
}
}
}
/**
* Process CacheManager state from the fsimage.
*/
private void processCacheManagerState(DataInputStream in, ImageVisitor v)
throws IOException {
v.visit(ImageElement.CACHE_NEXT_ENTRY_ID, in.readLong());
final int numPools = in.readInt();
for (int i=0; i<numPools; i++) {
v.visit(ImageElement.CACHE_POOL_NAME, Text.readString(in));
processCachePoolPermission(in, v);
v.visit(ImageElement.CACHE_POOL_WEIGHT, in.readInt());
}
final int numEntries = in.readInt();
for (int i=0; i<numEntries; i++) {
v.visit(ImageElement.CACHE_ENTRY_PATH, Text.readString(in));
v.visit(ImageElement.CACHE_ENTRY_REPLICATION, in.readShort());
v.visit(ImageElement.CACHE_ENTRY_POOL_NAME, Text.readString(in));
}
}
/**
* Process the Delegation Token related section in fsimage.
*
* @param in DataInputStream to process
* @param v Visitor to walk over records
*/
private void processDelegationTokens(DataInputStream in, ImageVisitor v)
throws IOException {
v.visit(ImageElement.CURRENT_DELEGATION_KEY_ID, in.readInt());
int numDKeys = in.readInt();
v.visitEnclosingElement(ImageElement.DELEGATION_KEYS,
ImageElement.NUM_DELEGATION_KEYS, numDKeys);
for(int i =0; i < numDKeys; i++) {
DelegationKey key = new DelegationKey();
key.readFields(in);
v.visit(ImageElement.DELEGATION_KEY, key.toString());
}
v.leaveEnclosingElement();
v.visit(ImageElement.DELEGATION_TOKEN_SEQUENCE_NUMBER, in.readInt());
int numDTokens = in.readInt();
v.visitEnclosingElement(ImageElement.DELEGATION_TOKENS,
ImageElement.NUM_DELEGATION_TOKENS, numDTokens);
for(int i=0; i<numDTokens; i++){
DelegationTokenIdentifier id = new DelegationTokenIdentifier();
id.readFields(in);
long expiryTime = in.readLong();
v.visitEnclosingElement(ImageElement.DELEGATION_TOKEN_IDENTIFIER);
v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_KIND,
id.getKind().toString());
v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_SEQNO,
id.getSequenceNumber());
v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_OWNER,
id.getOwner().toString());
v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_RENEWER,
id.getRenewer().toString());
v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_REALUSER,
id.getRealUser().toString());
v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_ISSUE_DATE,
id.getIssueDate());
v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_MAX_DATE,
id.getMaxDate());
v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_EXPIRY_TIME,
expiryTime);
v.visit(ImageElement.DELEGATION_TOKEN_IDENTIFIER_MASTER_KEY_ID,
id.getMasterKeyId());
v.leaveEnclosingElement(); // DELEGATION_TOKEN_IDENTIFIER
}
v.leaveEnclosingElement(); // DELEGATION_TOKENS
}
/**
* Process the INodes under construction section of the fsimage.
*
* @param in DataInputStream to process
* @param v Visitor to walk over inodes
* @param skipBlocks Walk over each block?
*/
private void processINodesUC(DataInputStream in, ImageVisitor v,
boolean skipBlocks) throws IOException {
int numINUC = in.readInt();
v.visitEnclosingElement(ImageElement.INODES_UNDER_CONSTRUCTION,
ImageElement.NUM_INODES_UNDER_CONSTRUCTION, numINUC);
for(int i = 0; i < numINUC; i++) {
v.visitEnclosingElement(ImageElement.INODE_UNDER_CONSTRUCTION);
byte [] name = FSImageSerialization.readBytes(in);
String n = new String(name, "UTF8");
v.visit(ImageElement.INODE_PATH, n);
if (NameNodeLayoutVersion.supports(Feature.ADD_INODE_ID, imageVersion)) {
long inodeId = in.readLong();
v.visit(ImageElement.INODE_ID, inodeId);
}
v.visit(ImageElement.REPLICATION, in.readShort());
v.visit(ImageElement.MODIFICATION_TIME, formatDate(in.readLong()));
v.visit(ImageElement.PREFERRED_BLOCK_SIZE, in.readLong());
int numBlocks = in.readInt();
processBlocks(in, v, numBlocks, skipBlocks);
processPermission(in, v);
v.visit(ImageElement.CLIENT_NAME, FSImageSerialization.readString(in));
v.visit(ImageElement.CLIENT_MACHINE, FSImageSerialization.readString(in));
// Skip over the datanode descriptors, which are still stored in the
// file but are not used by the datanode or loaded into memory
int numLocs = in.readInt();
for(int j = 0; j < numLocs; j++) {
in.readShort();
in.readLong();
in.readLong();
in.readLong();
in.readInt();
FSImageSerialization.readString(in);
FSImageSerialization.readString(in);
WritableUtils.readEnum(in, AdminStates.class);
}
v.leaveEnclosingElement(); // INodeUnderConstruction
}
v.leaveEnclosingElement(); // INodesUnderConstruction
}
/**
* Process the blocks section of the fsimage.
*
* @param in Datastream to process
* @param v Visitor to walk over inodes
* @param skipBlocks Walk over each block?
*/
private void processBlocks(DataInputStream in, ImageVisitor v,
int numBlocks, boolean skipBlocks) throws IOException {
v.visitEnclosingElement(ImageElement.BLOCKS,
ImageElement.NUM_BLOCKS, numBlocks);
// directory or symlink or reference node, no blocks to process
if(numBlocks < 0) {
v.leaveEnclosingElement(); // Blocks
return;
}
if(skipBlocks) {
int bytesToSkip = ((Long.SIZE * 3 /* fields */) / 8 /*bits*/) * numBlocks;
if(in.skipBytes(bytesToSkip) != bytesToSkip)
throw new IOException("Error skipping over blocks");
} else {
for(int j = 0; j < numBlocks; j++) {
v.visitEnclosingElement(ImageElement.BLOCK);
v.visit(ImageElement.BLOCK_ID, in.readLong());
v.visit(ImageElement.NUM_BYTES, in.readLong());
v.visit(ImageElement.GENERATION_STAMP, in.readLong());
v.leaveEnclosingElement(); // Block
}
}
v.leaveEnclosingElement(); // Blocks
}
/**
* Extract the INode permissions stored in the fsimage file.
*
* @param in Datastream to process
* @param v Visitor to walk over inodes
*/
private void processPermission(DataInputStream in, ImageVisitor v)
throws IOException {
v.visitEnclosingElement(ImageElement.PERMISSIONS);
v.visit(ImageElement.USER_NAME, Text.readString(in));
v.visit(ImageElement.GROUP_NAME, Text.readString(in));
FsPermission fsp = new FsPermission(in.readShort());
v.visit(ImageElement.PERMISSION_STRING, fsp.toString());
v.leaveEnclosingElement(); // Permissions
}
/**
* Extract CachePool permissions stored in the fsimage file.
*
* @param in Datastream to process
* @param v Visitor to walk over inodes
*/
private void processCachePoolPermission(DataInputStream in, ImageVisitor v)
throws IOException {
v.visitEnclosingElement(ImageElement.PERMISSIONS);
v.visit(ImageElement.CACHE_POOL_OWNER_NAME, Text.readString(in));
v.visit(ImageElement.CACHE_POOL_GROUP_NAME, Text.readString(in));
FsPermission fsp = new FsPermission(in.readShort());
v.visit(ImageElement.CACHE_POOL_PERMISSION_STRING, fsp.toString());
v.leaveEnclosingElement(); // Permissions
}
/**
* Process the INode records stored in the fsimage.
*
* @param in Datastream to process
* @param v Visitor to walk over INodes
* @param numInodes Number of INodes stored in file
* @param skipBlocks Process all the blocks within the INode?
* @param supportSnapshot Whether or not the imageVersion supports snapshot
* @throws VisitException
* @throws IOException
*/
private void processINodes(DataInputStream in, ImageVisitor v,
long numInodes, boolean skipBlocks, boolean supportSnapshot)
throws IOException {
v.visitEnclosingElement(ImageElement.INODES,
ImageElement.NUM_INODES, numInodes);
if (NameNodeLayoutVersion.supports(Feature.FSIMAGE_NAME_OPTIMIZATION, imageVersion)) {
if (!supportSnapshot) {
processLocalNameINodes(in, v, numInodes, skipBlocks);
} else {
processLocalNameINodesWithSnapshot(in, v, skipBlocks);
}
} else { // full path name
processFullNameINodes(in, v, numInodes, skipBlocks);
}
v.leaveEnclosingElement(); // INodes
}
/**
* Process image with full path name
*
* @param in image stream
* @param v visitor
* @param numInodes number of indoes to read
* @param skipBlocks skip blocks or not
* @throws IOException if there is any error occurs
*/
private void processLocalNameINodes(DataInputStream in, ImageVisitor v,
long numInodes, boolean skipBlocks) throws IOException {
// process root
processINode(in, v, skipBlocks, "", false);
numInodes--;
while (numInodes > 0) {
numInodes -= processDirectory(in, v, skipBlocks);
}
}
private int processDirectory(DataInputStream in, ImageVisitor v,
boolean skipBlocks) throws IOException {
String parentName = FSImageSerialization.readString(in);
return processChildren(in, v, skipBlocks, parentName);
}
/**
* Process image with local path name and snapshot support
*
* @param in image stream
* @param v visitor
* @param skipBlocks skip blocks or not
*/
private void processLocalNameINodesWithSnapshot(DataInputStream in,
ImageVisitor v, boolean skipBlocks) throws IOException {
// process root
processINode(in, v, skipBlocks, "", false);
processDirectoryWithSnapshot(in, v, skipBlocks);
}
/**
* Process directories when snapshot is supported.
*/
private void processDirectoryWithSnapshot(DataInputStream in, ImageVisitor v,
boolean skipBlocks) throws IOException {
// 1. load dir node id
long inodeId = in.readLong();
String dirName = dirNodeMap.remove(inodeId);
Boolean visitedRef = subtreeMap.get(inodeId);
if (visitedRef != null) {
if (visitedRef.booleanValue()) { // the subtree has been visited
return;
} else { // first time to visit
subtreeMap.put(inodeId, true);
}
} // else the dir is not linked by a RefNode, thus cannot be revisited
// 2. load possible snapshots
processSnapshots(in, v, dirName);
// 3. load children nodes
processChildren(in, v, skipBlocks, dirName);
// 4. load possible directory diff list
processDirectoryDiffList(in, v, dirName);
// recursively process sub-directories
final int numSubTree = in.readInt();
for (int i = 0; i < numSubTree; i++) {
processDirectoryWithSnapshot(in, v, skipBlocks);
}
}
/**
* Process snapshots of a snapshottable directory
*/
private void processSnapshots(DataInputStream in, ImageVisitor v,
String rootName) throws IOException {
final int numSnapshots = in.readInt();
if (numSnapshots >= 0) {
v.visitEnclosingElement(ImageElement.SNAPSHOTS,
ImageElement.NUM_SNAPSHOTS, numSnapshots);
for (int i = 0; i < numSnapshots; i++) {
// process snapshot
v.visitEnclosingElement(ImageElement.SNAPSHOT);
v.visit(ImageElement.SNAPSHOT_ID, in.readInt());
v.leaveEnclosingElement();
}
v.visit(ImageElement.SNAPSHOT_QUOTA, in.readInt());
v.leaveEnclosingElement();
}
}
private void processSnapshot(DataInputStream in, ImageVisitor v)
throws IOException {
v.visitEnclosingElement(ImageElement.SNAPSHOT);
v.visit(ImageElement.SNAPSHOT_ID, in.readInt());
// process root of snapshot
v.visitEnclosingElement(ImageElement.SNAPSHOT_ROOT);
processINode(in, v, true, "", false);
v.leaveEnclosingElement();
v.leaveEnclosingElement();
}
private void processDirectoryDiffList(DataInputStream in, ImageVisitor v,
String currentINodeName) throws IOException {
final int numDirDiff = in.readInt();
if (numDirDiff >= 0) {
v.visitEnclosingElement(ImageElement.SNAPSHOT_DIR_DIFFS,
ImageElement.NUM_SNAPSHOT_DIR_DIFF, numDirDiff);
for (int i = 0; i < numDirDiff; i++) {
// process directory diffs in reverse chronological oder
processDirectoryDiff(in, v, currentINodeName);
}
v.leaveEnclosingElement();
}
}
private void processDirectoryDiff(DataInputStream in, ImageVisitor v,
String currentINodeName) throws IOException {
v.visitEnclosingElement(ImageElement.SNAPSHOT_DIR_DIFF);
int snapshotId = in.readInt();
v.visit(ImageElement.SNAPSHOT_DIFF_SNAPSHOTID, snapshotId);
v.visit(ImageElement.SNAPSHOT_DIR_DIFF_CHILDREN_SIZE, in.readInt());
// process snapshotINode
boolean useRoot = in.readBoolean();
if (!useRoot) {
if (in.readBoolean()) {
v.visitEnclosingElement(ImageElement.SNAPSHOT_INODE_DIRECTORY_ATTRIBUTES);
if (NameNodeLayoutVersion.supports(Feature.OPTIMIZE_SNAPSHOT_INODES, imageVersion)) {
processINodeDirectoryAttributes(in, v, currentINodeName);
} else {
processINode(in, v, true, currentINodeName, true);
}
v.leaveEnclosingElement();
}
}
// process createdList
int createdSize = in.readInt();
v.visitEnclosingElement(ImageElement.SNAPSHOT_DIR_DIFF_CREATEDLIST,
ImageElement.SNAPSHOT_DIR_DIFF_CREATEDLIST_SIZE, createdSize);
for (int i = 0; i < createdSize; i++) {
String createdNode = FSImageSerialization.readString(in);
v.visit(ImageElement.SNAPSHOT_DIR_DIFF_CREATED_INODE, createdNode);
}
v.leaveEnclosingElement();
// process deletedList
int deletedSize = in.readInt();
v.visitEnclosingElement(ImageElement.SNAPSHOT_DIR_DIFF_DELETEDLIST,
ImageElement.SNAPSHOT_DIR_DIFF_DELETEDLIST_SIZE, deletedSize);
for (int i = 0; i < deletedSize; i++) {
v.visitEnclosingElement(ImageElement.SNAPSHOT_DIR_DIFF_DELETED_INODE);
processINode(in, v, false, currentINodeName, true);
v.leaveEnclosingElement();
}
v.leaveEnclosingElement();
v.leaveEnclosingElement();
}
private void processINodeDirectoryAttributes(DataInputStream in, ImageVisitor v,
String parentName) throws IOException {
final String pathName = readINodePath(in, parentName);
v.visit(ImageElement.INODE_PATH, pathName);
processPermission(in, v);
v.visit(ImageElement.MODIFICATION_TIME, formatDate(in.readLong()));
v.visit(ImageElement.NS_QUOTA, in.readLong());
v.visit(ImageElement.DS_QUOTA, in.readLong());
}
/** Process children under a directory */
private int processChildren(DataInputStream in, ImageVisitor v,
boolean skipBlocks, String parentName) throws IOException {
int numChildren = in.readInt();
for (int i = 0; i < numChildren; i++) {
processINode(in, v, skipBlocks, parentName, false);
}
return numChildren;
}
/**
* Process image with full path name
*
* @param in image stream
* @param v visitor
* @param numInodes number of indoes to read
* @param skipBlocks skip blocks or not
* @throws IOException if there is any error occurs
*/
private void processFullNameINodes(DataInputStream in, ImageVisitor v,
long numInodes, boolean skipBlocks) throws IOException {
for(long i = 0; i < numInodes; i++) {
processINode(in, v, skipBlocks, null, false);
}
}
private String readINodePath(DataInputStream in, String parentName)
throws IOException {
String pathName = FSImageSerialization.readString(in);
if (parentName != null) { // local name
pathName = "/" + pathName;
if (!"/".equals(parentName)) { // children of non-root directory
pathName = parentName + pathName;
}
}
return pathName;
}
/**
* Process an INode
*
* @param in image stream
* @param v visitor
* @param skipBlocks skip blocks or not
* @param parentName the name of its parent node
* @param isSnapshotCopy whether or not the inode is a snapshot copy
* @throws IOException
*/
private void processINode(DataInputStream in, ImageVisitor v,
boolean skipBlocks, String parentName, boolean isSnapshotCopy)
throws IOException {
boolean supportSnapshot =
NameNodeLayoutVersion.supports(Feature.SNAPSHOT, imageVersion);
boolean supportInodeId =
NameNodeLayoutVersion.supports(Feature.ADD_INODE_ID, imageVersion);
v.visitEnclosingElement(ImageElement.INODE);
final String pathName = readINodePath(in, parentName);
v.visit(ImageElement.INODE_PATH, pathName);
long inodeId = INodeId.GRANDFATHER_INODE_ID;
if (supportInodeId) {
inodeId = in.readLong();
v.visit(ImageElement.INODE_ID, inodeId);
}
v.visit(ImageElement.REPLICATION, in.readShort());
v.visit(ImageElement.MODIFICATION_TIME, formatDate(in.readLong()));
if(NameNodeLayoutVersion.supports(Feature.FILE_ACCESS_TIME, imageVersion))
v.visit(ImageElement.ACCESS_TIME, formatDate(in.readLong()));
v.visit(ImageElement.BLOCK_SIZE, in.readLong());
int numBlocks = in.readInt();
processBlocks(in, v, numBlocks, skipBlocks);
if (numBlocks >= 0) { // File
if (supportSnapshot) {
// make sure subtreeMap only contains entry for directory
subtreeMap.remove(inodeId);
// process file diffs
processFileDiffList(in, v, parentName);
if (isSnapshotCopy) {
boolean underConstruction = in.readBoolean();
if (underConstruction) {
v.visit(ImageElement.CLIENT_NAME,
FSImageSerialization.readString(in));
v.visit(ImageElement.CLIENT_MACHINE,
FSImageSerialization.readString(in));
}
}
}
processPermission(in, v);
} else if (numBlocks == -1) { // Directory
if (supportSnapshot && supportInodeId) {
dirNodeMap.put(inodeId, pathName);
}
v.visit(ImageElement.NS_QUOTA, numBlocks == -1 ? in.readLong() : -1);
if (NameNodeLayoutVersion.supports(Feature.DISKSPACE_QUOTA, imageVersion))
v.visit(ImageElement.DS_QUOTA, numBlocks == -1 ? in.readLong() : -1);
if (supportSnapshot) {
boolean snapshottable = in.readBoolean();
if (!snapshottable) {
boolean withSnapshot = in.readBoolean();
v.visit(ImageElement.IS_WITHSNAPSHOT_DIR, Boolean.toString(withSnapshot));
} else {
v.visit(ImageElement.IS_SNAPSHOTTABLE_DIR, Boolean.toString(snapshottable));
}
}
processPermission(in, v);
} else if (numBlocks == -2) {
v.visit(ImageElement.SYMLINK, Text.readString(in));
processPermission(in, v);
} else if (numBlocks == -3) { // reference node
final boolean isWithName = in.readBoolean();
int snapshotId = in.readInt();
if (isWithName) {
v.visit(ImageElement.SNAPSHOT_LAST_SNAPSHOT_ID, snapshotId);
} else {
v.visit(ImageElement.SNAPSHOT_DST_SNAPSHOT_ID, snapshotId);
}
final boolean firstReferred = in.readBoolean();
if (firstReferred) {
// if a subtree is linked by multiple "parents", the corresponding dir
// must be referred by a reference node. we put the reference node into
// the subtreeMap here and let its value be false. when we later visit
// the subtree for the first time, we change the value to true.
subtreeMap.put(inodeId, false);
v.visitEnclosingElement(ImageElement.SNAPSHOT_REF_INODE);
processINode(in, v, skipBlocks, parentName, isSnapshotCopy);
v.leaveEnclosingElement(); // referred inode
} else {
v.visit(ImageElement.SNAPSHOT_REF_INODE_ID, in.readLong());
}
}
v.leaveEnclosingElement(); // INode
}
private void processINodeFileAttributes(DataInputStream in, ImageVisitor v,
String parentName) throws IOException {
final String pathName = readINodePath(in, parentName);
v.visit(ImageElement.INODE_PATH, pathName);
processPermission(in, v);
v.visit(ImageElement.MODIFICATION_TIME, formatDate(in.readLong()));
if(NameNodeLayoutVersion.supports(Feature.FILE_ACCESS_TIME, imageVersion)) {
v.visit(ImageElement.ACCESS_TIME, formatDate(in.readLong()));
}
v.visit(ImageElement.REPLICATION, in.readShort());
v.visit(ImageElement.BLOCK_SIZE, in.readLong());
}
private void processFileDiffList(DataInputStream in, ImageVisitor v,
String currentINodeName) throws IOException {
final int size = in.readInt();
if (size >= 0) {
v.visitEnclosingElement(ImageElement.SNAPSHOT_FILE_DIFFS,
ImageElement.NUM_SNAPSHOT_FILE_DIFF, size);
for (int i = 0; i < size; i++) {
processFileDiff(in, v, currentINodeName);
}
v.leaveEnclosingElement();
}
}
private void processFileDiff(DataInputStream in, ImageVisitor v,
String currentINodeName) throws IOException {
int snapshotId = in.readInt();
v.visitEnclosingElement(ImageElement.SNAPSHOT_FILE_DIFF,
ImageElement.SNAPSHOT_DIFF_SNAPSHOTID, snapshotId);
v.visit(ImageElement.SNAPSHOT_FILE_SIZE, in.readLong());
if (in.readBoolean()) {
v.visitEnclosingElement(ImageElement.SNAPSHOT_INODE_FILE_ATTRIBUTES);
if (NameNodeLayoutVersion.supports(Feature.OPTIMIZE_SNAPSHOT_INODES, imageVersion)) {
processINodeFileAttributes(in, v, currentINodeName);
} else {
processINode(in, v, true, currentINodeName, true);
}
v.leaveEnclosingElement();
}
v.leaveEnclosingElement();
}
/**
* Helper method to format dates during processing.
* @param date Date as read from image file
* @return String version of date format
*/
private String formatDate(long date) {
return dateFormat.format(new Date(date));
}
}

View File

@ -0,0 +1,212 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.tools.offlineImageViewer;
import java.io.IOException;
/**
* An implementation of ImageVisitor can traverse the structure of an
* Hadoop fsimage and respond to each of the structures within the file.
*/
abstract class ImageVisitor {
/**
* Structural elements of an FSImage that may be encountered within the
* file. ImageVisitors are able to handle processing any of these elements.
*/
public enum ImageElement {
FS_IMAGE,
IMAGE_VERSION,
NAMESPACE_ID,
IS_COMPRESSED,
COMPRESS_CODEC,
LAYOUT_VERSION,
NUM_INODES,
GENERATION_STAMP,
GENERATION_STAMP_V2,
GENERATION_STAMP_V1_LIMIT,
LAST_ALLOCATED_BLOCK_ID,
INODES,
INODE,
INODE_PATH,
REPLICATION,
MODIFICATION_TIME,
ACCESS_TIME,
BLOCK_SIZE,
NUM_BLOCKS,
BLOCKS,
BLOCK,
BLOCK_ID,
NUM_BYTES,
NS_QUOTA,
DS_QUOTA,
PERMISSIONS,
SYMLINK,
NUM_INODES_UNDER_CONSTRUCTION,
INODES_UNDER_CONSTRUCTION,
INODE_UNDER_CONSTRUCTION,
PREFERRED_BLOCK_SIZE,
CLIENT_NAME,
CLIENT_MACHINE,
USER_NAME,
GROUP_NAME,
PERMISSION_STRING,
CURRENT_DELEGATION_KEY_ID,
NUM_DELEGATION_KEYS,
DELEGATION_KEYS,
DELEGATION_KEY,
DELEGATION_TOKEN_SEQUENCE_NUMBER,
NUM_DELEGATION_TOKENS,
DELEGATION_TOKENS,
DELEGATION_TOKEN_IDENTIFIER,
DELEGATION_TOKEN_IDENTIFIER_KIND,
DELEGATION_TOKEN_IDENTIFIER_SEQNO,
DELEGATION_TOKEN_IDENTIFIER_OWNER,
DELEGATION_TOKEN_IDENTIFIER_RENEWER,
DELEGATION_TOKEN_IDENTIFIER_REALUSER,
DELEGATION_TOKEN_IDENTIFIER_ISSUE_DATE,
DELEGATION_TOKEN_IDENTIFIER_MAX_DATE,
DELEGATION_TOKEN_IDENTIFIER_EXPIRY_TIME,
DELEGATION_TOKEN_IDENTIFIER_MASTER_KEY_ID,
TRANSACTION_ID,
LAST_INODE_ID,
INODE_ID,
SNAPSHOT_COUNTER,
NUM_SNAPSHOTS_TOTAL,
NUM_SNAPSHOTS,
SNAPSHOTS,
SNAPSHOT,
SNAPSHOT_ID,
SNAPSHOT_ROOT,
SNAPSHOT_QUOTA,
NUM_SNAPSHOT_DIR_DIFF,
SNAPSHOT_DIR_DIFFS,
SNAPSHOT_DIR_DIFF,
SNAPSHOT_DIFF_SNAPSHOTID,
SNAPSHOT_DIR_DIFF_CHILDREN_SIZE,
SNAPSHOT_INODE_FILE_ATTRIBUTES,
SNAPSHOT_INODE_DIRECTORY_ATTRIBUTES,
SNAPSHOT_DIR_DIFF_CREATEDLIST,
SNAPSHOT_DIR_DIFF_CREATEDLIST_SIZE,
SNAPSHOT_DIR_DIFF_CREATED_INODE,
SNAPSHOT_DIR_DIFF_DELETEDLIST,
SNAPSHOT_DIR_DIFF_DELETEDLIST_SIZE,
SNAPSHOT_DIR_DIFF_DELETED_INODE,
IS_SNAPSHOTTABLE_DIR,
IS_WITHSNAPSHOT_DIR,
SNAPSHOT_FILE_DIFFS,
SNAPSHOT_FILE_DIFF,
NUM_SNAPSHOT_FILE_DIFF,
SNAPSHOT_FILE_SIZE,
SNAPSHOT_DST_SNAPSHOT_ID,
SNAPSHOT_LAST_SNAPSHOT_ID,
SNAPSHOT_REF_INODE_ID,
SNAPSHOT_REF_INODE,
CACHE_NEXT_ENTRY_ID,
CACHE_NUM_POOLS,
CACHE_POOL_NAME,
CACHE_POOL_OWNER_NAME,
CACHE_POOL_GROUP_NAME,
CACHE_POOL_PERMISSION_STRING,
CACHE_POOL_WEIGHT,
CACHE_NUM_ENTRIES,
CACHE_ENTRY_PATH,
CACHE_ENTRY_REPLICATION,
CACHE_ENTRY_POOL_NAME
}
/**
* Begin visiting the fsimage structure. Opportunity to perform
* any initialization necessary for the implementing visitor.
*/
abstract void start() throws IOException;
/**
* Finish visiting the fsimage structure. Opportunity to perform any
* clean up necessary for the implementing visitor.
*/
abstract void finish() throws IOException;
/**
* Finish visiting the fsimage structure after an error has occurred
* during the processing. Opportunity to perform any clean up necessary
* for the implementing visitor.
*/
abstract void finishAbnormally() throws IOException;
/**
* Visit non enclosing element of fsimage with specified value.
*
* @param element FSImage element
* @param value Element's value
*/
abstract void visit(ImageElement element, String value) throws IOException;
// Convenience methods to automatically convert numeric value types to strings
void visit(ImageElement element, int value) throws IOException {
visit(element, Integer.toString(value));
}
void visit(ImageElement element, long value) throws IOException {
visit(element, Long.toString(value));
}
/**
* Begin visiting an element that encloses another element, such as
* the beginning of the list of blocks that comprise a file.
*
* @param element Element being visited
*/
abstract void visitEnclosingElement(ImageElement element)
throws IOException;
/**
* Begin visiting an element that encloses another element, such as
* the beginning of the list of blocks that comprise a file.
*
* Also provide an additional key and value for the element, such as the
* number items within the element.
*
* @param element Element being visited
* @param key Key describing the element being visited
* @param value Value associated with element being visited
*/
abstract void visitEnclosingElement(ImageElement element,
ImageElement key, String value) throws IOException;
// Convenience methods to automatically convert value types to strings
void visitEnclosingElement(ImageElement element,
ImageElement key, int value)
throws IOException {
visitEnclosingElement(element, key, Integer.toString(value));
}
void visitEnclosingElement(ImageElement element,
ImageElement key, long value)
throws IOException {
visitEnclosingElement(element, key, Long.toString(value));
}
/**
* Leave current enclosing element. Called, for instance, at the end of
* processing the blocks that compromise a file.
*/
abstract void leaveEnclosingElement() throws IOException;
}

View File

@ -0,0 +1,111 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.tools.offlineImageViewer;
import java.io.IOException;
import java.util.Date;
/**
* IndentedImageVisitor walks over an FSImage and displays its structure
* using indenting to organize sections within the image file.
*/
class IndentedImageVisitor extends TextWriterImageVisitor {
public IndentedImageVisitor(String filename) throws IOException {
super(filename);
}
public IndentedImageVisitor(String filename, boolean printToScreen) throws IOException {
super(filename, printToScreen);
}
final private DepthCounter dc = new DepthCounter();// to track leading spacing
@Override
void start() throws IOException {}
@Override
void finish() throws IOException { super.finish(); }
@Override
void finishAbnormally() throws IOException {
System.out.println("*** Image processing finished abnormally. Ending ***");
super.finishAbnormally();
}
@Override
void leaveEnclosingElement() throws IOException {
dc.decLevel();
}
@Override
void visit(ImageElement element, String value) throws IOException {
printIndents();
write(element + " = " + value + "\n");
}
@Override
void visit(ImageElement element, long value) throws IOException {
if ((element == ImageElement.DELEGATION_TOKEN_IDENTIFIER_EXPIRY_TIME) ||
(element == ImageElement.DELEGATION_TOKEN_IDENTIFIER_ISSUE_DATE) ||
(element == ImageElement.DELEGATION_TOKEN_IDENTIFIER_MAX_DATE)) {
visit(element, new Date(value).toString());
} else {
visit(element, Long.toString(value));
}
}
@Override
void visitEnclosingElement(ImageElement element) throws IOException {
printIndents();
write(element + "\n");
dc.incLevel();
}
// Print element, along with associated key/value pair, in brackets
@Override
void visitEnclosingElement(ImageElement element,
ImageElement key, String value)
throws IOException {
printIndents();
write(element + " [" + key + " = " + value + "]\n");
dc.incLevel();
}
/**
* Print an appropriate number of spaces for the current level.
* FsImages can potentially be millions of lines long, so caching can
* significantly speed up output.
*/
final private static String [] indents = { "",
" ",
" ",
" ",
" ",
" ",
" "};
private void printIndents() throws IOException {
try {
write(indents[dc.getLevel()]);
} catch (IndexOutOfBoundsException e) {
// There's no reason in an fsimage would need a deeper indent
for(int i = 0; i < dc.getLevel(); i++)
write(" ");
}
}
}

View File

@ -0,0 +1,178 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.tools.offlineImageViewer;
import java.io.IOException;
import java.util.Formatter;
import java.util.LinkedList;
/**
* LsImageVisitor displays the blocks of the namespace in a format very similar
* to the output of ls/lsr. Entries are marked as directories or not,
* permissions listed, replication, username and groupname, along with size,
* modification date and full path.
*
* Note: A significant difference between the output of the lsr command
* and this image visitor is that this class cannot sort the file entries;
* they are listed in the order they are stored within the fsimage file.
* Therefore, the output of this class cannot be directly compared to the
* output of the lsr command.
*/
class LsImageVisitor extends TextWriterImageVisitor {
final private LinkedList<ImageElement> elemQ = new LinkedList<ImageElement>();
private int numBlocks;
private String perms;
private int replication;
private String username;
private String group;
private long filesize;
private String modTime;
private String path;
private String linkTarget;
private boolean inInode = false;
final private StringBuilder sb = new StringBuilder();
final private Formatter formatter = new Formatter(sb);
public LsImageVisitor(String filename) throws IOException {
super(filename);
}
public LsImageVisitor(String filename, boolean printToScreen) throws IOException {
super(filename, printToScreen);
}
/**
* Start a new line of output, reset values.
*/
private void newLine() {
numBlocks = 0;
perms = username = group = path = linkTarget = "";
filesize = 0l;
replication = 0;
inInode = true;
}
/**
* All the values have been gathered. Print them to the console in an
* ls-style format.
*/
private final static int widthRepl = 2;
private final static int widthUser = 8;
private final static int widthGroup = 10;
private final static int widthSize = 10;
private final static int widthMod = 10;
private final static String lsStr = " %" + widthRepl + "s %" + widthUser +
"s %" + widthGroup + "s %" + widthSize +
"d %" + widthMod + "s %s";
private void printLine() throws IOException {
sb.append(numBlocks < 0 ? "d" : "-");
sb.append(perms);
if (0 != linkTarget.length()) {
path = path + " -> " + linkTarget;
}
formatter.format(lsStr, replication > 0 ? replication : "-",
username, group, filesize, modTime, path);
sb.append("\n");
write(sb.toString());
sb.setLength(0); // clear string builder
inInode = false;
}
@Override
void start() throws IOException {}
@Override
void finish() throws IOException {
super.finish();
}
@Override
void finishAbnormally() throws IOException {
System.out.println("Input ended unexpectedly.");
super.finishAbnormally();
}
@Override
void leaveEnclosingElement() throws IOException {
ImageElement elem = elemQ.pop();
if(elem == ImageElement.INODE)
printLine();
}
// Maintain state of location within the image tree and record
// values needed to display the inode in ls-style format.
@Override
void visit(ImageElement element, String value) throws IOException {
if(inInode) {
switch(element) {
case INODE_PATH:
if(value.equals("")) path = "/";
else path = value;
break;
case PERMISSION_STRING:
perms = value;
break;
case REPLICATION:
replication = Integer.valueOf(value);
break;
case USER_NAME:
username = value;
break;
case GROUP_NAME:
group = value;
break;
case NUM_BYTES:
filesize += Long.valueOf(value);
break;
case MODIFICATION_TIME:
modTime = value;
break;
case SYMLINK:
linkTarget = value;
break;
default:
// This is OK. We're not looking for all the values.
break;
}
}
}
@Override
void visitEnclosingElement(ImageElement element) throws IOException {
elemQ.push(element);
if(element == ImageElement.INODE)
newLine();
}
@Override
void visitEnclosingElement(ImageElement element,
ImageElement key, String value) throws IOException {
elemQ.push(element);
if(element == ImageElement.INODE)
newLine();
else if (element == ImageElement.BLOCKS)
numBlocks = Integer.valueOf(value);
}
}

View File

@ -0,0 +1,118 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.tools.offlineImageViewer;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map.Entry;
import org.apache.hadoop.classification.InterfaceAudience;
/**
* File name distribution visitor.
* <p>
* It analyzes file names in fsimage and prints the following information:
* <li>Number of unique file names</li>
* <li>Number file names and the corresponding number range of files that use
* these same names</li>
* <li>Heap saved if the file name objects are reused</li>
*/
@InterfaceAudience.Private
public class NameDistributionVisitor extends TextWriterImageVisitor {
HashMap<String, Integer> counts = new HashMap<String, Integer>();
public NameDistributionVisitor(String filename, boolean printToScreen)
throws IOException {
super(filename, printToScreen);
}
@Override
void finish() throws IOException {
final int BYTEARRAY_OVERHEAD = 24;
write("Total unique file names " + counts.size());
// Columns: Frequency of file occurrence, savings in heap, total files using
// the name and number of file names
final long stats[][] = { { 100000, 0, 0, 0 },
{ 10000, 0, 0, 0 },
{ 1000, 0, 0, 0 },
{ 100, 0, 0, 0 },
{ 10, 0, 0, 0 },
{ 5, 0, 0, 0 },
{ 4, 0, 0, 0 },
{ 3, 0, 0, 0 },
{ 2, 0, 0, 0 }};
int highbound = Integer.MIN_VALUE;
for (Entry<String, Integer> entry : counts.entrySet()) {
highbound = Math.max(highbound, entry.getValue());
for (int i = 0; i < stats.length; i++) {
if (entry.getValue() >= stats[i][0]) {
stats[i][1] += (BYTEARRAY_OVERHEAD + entry.getKey().length())
* (entry.getValue() - 1);
stats[i][2] += entry.getValue();
stats[i][3]++;
break;
}
}
}
long lowbound = 0;
long totalsavings = 0;
for (long[] stat : stats) {
lowbound = stat[0];
totalsavings += stat[1];
String range = lowbound == highbound ? " " + lowbound :
" between " + lowbound + "-" + highbound;
write("\n" + stat[3] + " names are used by " + stat[2] + " files"
+ range + " times. Heap savings ~" + stat[1] + " bytes.");
highbound = (int) stat[0] - 1;
}
write("\n\nTotal saved heap ~" + totalsavings + "bytes.\n");
super.finish();
}
@Override
void visit(ImageElement element, String value) throws IOException {
if (element == ImageElement.INODE_PATH) {
String filename = value.substring(value.lastIndexOf("/") + 1);
if (counts.containsKey(filename)) {
counts.put(filename, counts.get(filename) + 1);
} else {
counts.put(filename, 1);
}
}
}
@Override
void leaveEnclosingElement() throws IOException {
}
@Override
void start() throws IOException {
}
@Override
void visitEnclosingElement(ImageElement element) throws IOException {
}
@Override
void visitEnclosingElement(ImageElement element, ImageElement key,
String value) throws IOException {
}
}

View File

@ -0,0 +1,274 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.tools.offlineImageViewer;
import java.io.BufferedInputStream;
import java.io.DataInputStream;
import java.io.EOFException;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.cli.PosixParser;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.hdfs.server.namenode.FSEditLogLoader.PositionTrackingInputStream;
/**
* OfflineImageViewer to dump the contents of an Hadoop image file to XML
* or the console. Main entry point into utility, either via the
* command line or programatically.
*/
@InterfaceAudience.Private
public class OfflineImageViewer {
public static final Log LOG = LogFactory.getLog(OfflineImageViewer.class);
private final static String usage =
"Usage: bin/hdfs oiv [OPTIONS] -i INPUTFILE -o OUTPUTFILE\n" +
"Offline Image Viewer\n" +
"View a Hadoop fsimage INPUTFILE using the specified PROCESSOR,\n" +
"saving the results in OUTPUTFILE.\n" +
"\n" +
"The oiv utility will attempt to parse correctly formed image files\n" +
"and will abort fail with mal-formed image files.\n" +
"\n" +
"The tool works offline and does not require a running cluster in\n" +
"order to process an image file.\n" +
"\n" +
"The following image processors are available:\n" +
" * Ls: The default image processor generates an lsr-style listing\n" +
" of the files in the namespace, with the same fields in the same\n" +
" order. Note that in order to correctly determine file sizes,\n" +
" this formatter cannot skip blocks and will override the\n" +
" -skipBlocks option.\n" +
" * Indented: This processor enumerates over all of the elements in\n" +
" the fsimage file, using levels of indentation to delineate\n" +
" sections within the file.\n" +
" * Delimited: Generate a text file with all of the elements common\n" +
" to both inodes and inodes-under-construction, separated by a\n" +
" delimiter. The default delimiter is \u0001, though this may be\n" +
" changed via the -delimiter argument. This processor also overrides\n" +
" the -skipBlocks option for the same reason as the Ls processor\n" +
" * XML: This processor creates an XML document with all elements of\n" +
" the fsimage enumerated, suitable for further analysis by XML\n" +
" tools.\n" +
" * FileDistribution: This processor analyzes the file size\n" +
" distribution in the image.\n" +
" -maxSize specifies the range [0, maxSize] of file sizes to be\n" +
" analyzed (128GB by default).\n" +
" -step defines the granularity of the distribution. (2MB by default)\n" +
" * NameDistribution: This processor analyzes the file names\n" +
" in the image and prints total number of file names and how frequently\n" +
" file names are reused.\n" +
"\n" +
"Required command line arguments:\n" +
"-i,--inputFile <arg> FSImage file to process.\n" +
"-o,--outputFile <arg> Name of output file. If the specified\n" +
" file exists, it will be overwritten.\n" +
"\n" +
"Optional command line arguments:\n" +
"-p,--processor <arg> Select which type of processor to apply\n" +
" against image file." +
" (Ls|XML|Delimited|Indented|FileDistribution).\n" +
"-h,--help Display usage information and exit\n" +
"-printToScreen For processors that write to a file, also\n" +
" output to screen. On large image files this\n" +
" will dramatically increase processing time.\n" +
"-skipBlocks Skip inodes' blocks information. May\n" +
" significantly decrease output.\n" +
" (default = false).\n" +
"-delimiter <arg> Delimiting string to use with Delimited processor\n";
private final boolean skipBlocks;
private final String inputFile;
private final ImageVisitor processor;
public OfflineImageViewer(String inputFile, ImageVisitor processor,
boolean skipBlocks) {
this.inputFile = inputFile;
this.processor = processor;
this.skipBlocks = skipBlocks;
}
/**
* Process image file.
*/
public void go() throws IOException {
DataInputStream in = null;
PositionTrackingInputStream tracker = null;
ImageLoader fsip = null;
boolean done = false;
try {
tracker = new PositionTrackingInputStream(new BufferedInputStream(
new FileInputStream(new File(inputFile))));
in = new DataInputStream(tracker);
int imageVersionFile = findImageVersion(in);
fsip = ImageLoader.LoaderFactory.getLoader(imageVersionFile);
if(fsip == null)
throw new IOException("No image processor to read version " +
imageVersionFile + " is available.");
fsip.loadImage(in, processor, skipBlocks);
done = true;
} finally {
if (!done) {
LOG.error("image loading failed at offset " + tracker.getPos());
}
IOUtils.cleanup(LOG, in, tracker);
}
}
/**
* Check an fsimage datainputstream's version number.
*
* The datainput stream is returned at the same point as it was passed in;
* this method has no effect on the datainputstream's read pointer.
*
* @param in Datainputstream of fsimage
* @return Filesystem layout version of fsimage represented by stream
* @throws IOException If problem reading from in
*/
private int findImageVersion(DataInputStream in) throws IOException {
in.mark(42); // arbitrary amount, resetting immediately
int version = in.readInt();
in.reset();
return version;
}
/**
* Build command-line options and descriptions
*/
public static Options buildOptions() {
Options options = new Options();
// Build in/output file arguments, which are required, but there is no
// addOption method that can specify this
OptionBuilder.isRequired();
OptionBuilder.hasArgs();
OptionBuilder.withLongOpt("outputFile");
options.addOption(OptionBuilder.create("o"));
OptionBuilder.isRequired();
OptionBuilder.hasArgs();
OptionBuilder.withLongOpt("inputFile");
options.addOption(OptionBuilder.create("i"));
options.addOption("p", "processor", true, "");
options.addOption("h", "help", false, "");
options.addOption("skipBlocks", false, "");
options.addOption("printToScreen", false, "");
options.addOption("delimiter", true, "");
return options;
}
/**
* Entry point to command-line-driven operation. User may specify
* options and start fsimage viewer from the command line. Program
* will process image file and exit cleanly or, if an error is
* encountered, inform user and exit.
*
* @param args Command line options
* @throws IOException
*/
public static void main(String[] args) throws IOException {
Options options = buildOptions();
if(args.length == 0) {
printUsage();
return;
}
CommandLineParser parser = new PosixParser();
CommandLine cmd;
try {
cmd = parser.parse(options, args);
} catch (ParseException e) {
System.out.println("Error parsing command-line options: ");
printUsage();
return;
}
if(cmd.hasOption("h")) { // print help and exit
printUsage();
return;
}
boolean skipBlocks = cmd.hasOption("skipBlocks");
boolean printToScreen = cmd.hasOption("printToScreen");
String inputFile = cmd.getOptionValue("i");
String processor = cmd.getOptionValue("p", "Ls");
String outputFile = cmd.getOptionValue("o");
String delimiter = cmd.getOptionValue("delimiter");
if( !(delimiter == null || processor.equals("Delimited")) ) {
System.out.println("Can only specify -delimiter with Delimited processor");
printUsage();
return;
}
ImageVisitor v;
if(processor.equals("Indented")) {
v = new IndentedImageVisitor(outputFile, printToScreen);
} else if (processor.equals("XML")) {
v = new XmlImageVisitor(outputFile, printToScreen);
} else if (processor.equals("Delimited")) {
v = delimiter == null ?
new DelimitedImageVisitor(outputFile, printToScreen) :
new DelimitedImageVisitor(outputFile, printToScreen, delimiter);
skipBlocks = false;
} else if (processor.equals("FileDistribution")) {
long maxSize = Long.parseLong(cmd.getOptionValue("maxSize", "0"));
int step = Integer.parseInt(cmd.getOptionValue("step", "0"));
v = new FileDistributionVisitor(outputFile, maxSize, step);
} else if (processor.equals("NameDistribution")) {
v = new NameDistributionVisitor(outputFile, printToScreen);
} else {
v = new LsImageVisitor(outputFile, printToScreen);
skipBlocks = false;
}
try {
OfflineImageViewer d = new OfflineImageViewer(inputFile, v, skipBlocks);
d.go();
} catch (EOFException e) {
System.err.println("Input file ended unexpectedly. Exiting");
} catch(IOException e) {
System.err.println("Encountered exception. Exiting: " + e.getMessage());
}
}
/**
* Print application usage instructions.
*/
private static void printUsage() {
System.out.println(usage);
}
}

View File

@ -0,0 +1,109 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.tools.offlineImageViewer;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import com.google.common.base.Charsets;
/**
* TextWriterImageProcessor mixes in the ability for ImageVisitor
* implementations to easily write their output to a text file.
*
* Implementing classes should be sure to call the super methods for the
* constructors, finish and finishAbnormally methods, in order that the
* underlying file may be opened and closed correctly.
*
* Note, this class does not add newlines to text written to file or (if
* enabled) screen. This is the implementing class' responsibility.
*/
abstract class TextWriterImageVisitor extends ImageVisitor {
private boolean printToScreen = false;
private boolean okToWrite = false;
final private OutputStreamWriter fw;
/**
* Create a processor that writes to the file named.
*
* @param filename Name of file to write output to
*/
public TextWriterImageVisitor(String filename) throws IOException {
this(filename, false);
}
/**
* Create a processor that writes to the file named and may or may not
* also output to the screen, as specified.
*
* @param filename Name of file to write output to
* @param printToScreen Mirror output to screen?
*/
public TextWriterImageVisitor(String filename, boolean printToScreen)
throws IOException {
super();
this.printToScreen = printToScreen;
fw = new OutputStreamWriter(new FileOutputStream(filename), Charsets.UTF_8);
okToWrite = true;
}
/* (non-Javadoc)
* @see org.apache.hadoop.hdfs.tools.offlineImageViewer.ImageVisitor#finish()
*/
@Override
void finish() throws IOException {
close();
}
/* (non-Javadoc)
* @see org.apache.hadoop.hdfs.tools.offlineImageViewer.ImageVisitor#finishAbnormally()
*/
@Override
void finishAbnormally() throws IOException {
close();
}
/**
* Close output stream and prevent further writing
*/
private void close() throws IOException {
fw.close();
okToWrite = false;
}
/**
* Write parameter to output file (and possibly screen).
*
* @param toWrite Text to write to file
*/
protected void write(String toWrite) throws IOException {
if(!okToWrite)
throw new IOException("file not open for writing.");
if(printToScreen)
System.out.print(toWrite);
try {
fw.write(toWrite);
} catch (IOException e) {
okToWrite = false;
throw e;
}
}
}

View File

@ -0,0 +1,88 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.tools.offlineImageViewer;
import java.io.IOException;
import java.util.LinkedList;
/**
* An XmlImageVisitor walks over an fsimage structure and writes out
* an equivalent XML document that contains the fsimage's components.
*/
public class XmlImageVisitor extends TextWriterImageVisitor {
final private LinkedList<ImageElement> tagQ =
new LinkedList<ImageElement>();
public XmlImageVisitor(String filename) throws IOException {
super(filename, false);
}
public XmlImageVisitor(String filename, boolean printToScreen)
throws IOException {
super(filename, printToScreen);
}
@Override
void finish() throws IOException {
super.finish();
}
@Override
void finishAbnormally() throws IOException {
write("\n<!-- Error processing image file. Exiting -->\n");
super.finishAbnormally();
}
@Override
void leaveEnclosingElement() throws IOException {
if(tagQ.size() == 0)
throw new IOException("Tried to exit non-existent enclosing element " +
"in FSImage file");
ImageElement element = tagQ.pop();
write("</" + element.toString() + ">\n");
}
@Override
void start() throws IOException {
write("<?xml version=\"1.0\" ?>\n");
}
@Override
void visit(ImageElement element, String value) throws IOException {
writeTag(element.toString(), value);
}
@Override
void visitEnclosingElement(ImageElement element) throws IOException {
write("<" + element.toString() + ">\n");
tagQ.push(element);
}
@Override
void visitEnclosingElement(ImageElement element,
ImageElement key, String value)
throws IOException {
write("<" + element.toString() + " " + key + "=\"" + value +"\">\n");
tagQ.push(element);
}
private void writeTag(String tag, String value) throws IOException {
write("<" + tag + ">" + value + "</" + tag + ">\n");
}
}

View File

@ -44,6 +44,7 @@
import java.util.List;
import java.util.Random;
import com.google.common.io.Files;
import org.apache.commons.cli.ParseException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -2407,6 +2408,46 @@ public void testCommandLineParsing() throws ParseException {
}
}
@Test
public void testLegacyOivImage() throws Exception {
MiniDFSCluster cluster = null;
SecondaryNameNode secondary = null;
File tmpDir = Files.createTempDir();
Configuration conf = new HdfsConfiguration();
conf.set(DFSConfigKeys.DFS_NAMENODE_LEGACY_OIV_IMAGE_DIR_KEY,
tmpDir.getAbsolutePath());
conf.set(DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_KEY,
"2");
try {
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0)
.format(true).build();
secondary = startSecondaryNameNode(conf);
// Checkpoint once
secondary.doCheckpoint();
String files1[] = tmpDir.list();
assertEquals("Only one file is expected", 1, files1.length);
// Perform more checkpointngs and check whether retention management
// is working.
secondary.doCheckpoint();
secondary.doCheckpoint();
String files2[] = tmpDir.list();
assertEquals("Two files are expected", 2, files2.length);
// Verify that the first file is deleted.
for (String fName : files2) {
assertFalse(fName.equals(files1[0]));
}
} finally {
cleanup(secondary);
cleanup(cluster);
tmpDir.delete();
}
}
private static void cleanup(SecondaryNameNode snn) {
if (snn != null) {
try {

View File

@ -66,24 +66,28 @@
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.io.Files;
public class TestStandbyCheckpoints {
private static final int NUM_DIRS_IN_LOG = 200000;
protected MiniDFSCluster cluster;
protected NameNode nn0, nn1;
protected FileSystem fs;
protected File tmpOivImgDir;
private static final Log LOG = LogFactory.getLog(TestStandbyCheckpoints.class);
@SuppressWarnings("rawtypes")
@Before
public void setupCluster() throws Exception {
tmpOivImgDir = Files.createTempDir();
Configuration conf = new Configuration();
conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_KEY, 1);
conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 5);
conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
conf.set(DFSConfigKeys.DFS_NAMENODE_LEGACY_OIV_IMAGE_DIR_KEY,
tmpOivImgDir.getAbsolutePath());
// Dial down the retention of extra edits and checkpoints. This is to
// help catch regressions of HDFS-4238 (SBN should not purge shared edits)
conf.setInt(DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_KEY, 1);
@ -129,6 +133,9 @@ public void testSBNCheckpoints() throws Exception {
// Once the standby catches up, it should notice that it needs to
// do a checkpoint and save one to its local directories.
HATestUtil.waitForCheckpoint(cluster, 1, ImmutableList.of(12));
// It should have saved the oiv image too.
assertEquals("One file is expected", 1, tmpOivImgDir.list().length);
// It should also upload it back to the active.
HATestUtil.waitForCheckpoint(cluster, 0, ImmutableList.of(12));