HDFS-5933. Optimize the FSImage layout for ACLs. Contributed by Haohui Mai.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/branches/HDFS-4685@1567785 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris Nauroth 2014-02-12 22:56:32 +00:00
parent fc14360b03
commit ea0b21af15
7 changed files with 79 additions and 110 deletions

View File

@ -78,6 +78,8 @@ HDFS-4685 (Unreleased)
HDFS-5923. Do not persist the ACL bit in the FsPermission. HDFS-5923. Do not persist the ACL bit in the FsPermission.
(Haohui Mai via cnauroth) (Haohui Mai via cnauroth)
HDFS-5933. Optimize the FSImage layout for ACLs (Haohui Mai via cnauroth)
OPTIMIZATIONS OPTIMIZATIONS
BUG FIXES BUG FIXES

View File

@ -93,7 +93,6 @@ import org.apache.hadoop.hdfs.protocol.HdfsConstants;
import org.apache.hadoop.hdfs.protocol.LayoutVersion; import org.apache.hadoop.hdfs.protocol.LayoutVersion;
import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature; import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature;
import org.apache.hadoop.hdfs.protocol.proto.AclProtos.AclEditLogProto; import org.apache.hadoop.hdfs.protocol.proto.AclProtos.AclEditLogProto;
import org.apache.hadoop.hdfs.protocol.proto.AclProtos.AclFeatureProto;
import org.apache.hadoop.hdfs.protocolPB.PBHelper; import org.apache.hadoop.hdfs.protocolPB.PBHelper;
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenIdentifier;
import org.apache.hadoop.hdfs.util.XMLUtils; import org.apache.hadoop.hdfs.util.XMLUtils;
@ -468,14 +467,7 @@ public abstract class FSEditLogOp {
permissions.write(out); permissions.write(out);
if (this.opCode == OP_ADD) { if (this.opCode == OP_ADD) {
boolean hasAcl = aclEntries != null; AclEditLogUtil.write(aclEntries, out);
out.writeBoolean(hasAcl);
if (hasAcl) {
AclFeatureProto.newBuilder()
.addAllEntries(PBHelper.convertAclEntryProto(aclEntries)).build()
.writeDelimitedTo(out);
}
FSImageSerialization.writeString(clientName,out); FSImageSerialization.writeString(clientName,out);
FSImageSerialization.writeString(clientMachine,out); FSImageSerialization.writeString(clientMachine,out);
// write clientId and callId // write clientId and callId
@ -535,7 +527,6 @@ public abstract class FSEditLogOp {
// clientname, clientMachine and block locations of last block. // clientname, clientMachine and block locations of last block.
if (this.opCode == OP_ADD) { if (this.opCode == OP_ADD) {
aclEntries = AclEditLogUtil.read(in, logVersion); aclEntries = AclEditLogUtil.read(in, logVersion);
this.clientName = FSImageSerialization.readString(in); this.clientName = FSImageSerialization.readString(in);
this.clientMachine = FSImageSerialization.readString(in); this.clientMachine = FSImageSerialization.readString(in);
// read clientId and callId // read clientId and callId
@ -1364,14 +1355,7 @@ public abstract class FSEditLogOp {
FSImageSerialization.writeLong(timestamp, out); // mtime FSImageSerialization.writeLong(timestamp, out); // mtime
FSImageSerialization.writeLong(timestamp, out); // atime, unused at this FSImageSerialization.writeLong(timestamp, out); // atime, unused at this
permissions.write(out); permissions.write(out);
AclEditLogUtil.write(aclEntries, out);
boolean hasAcl = aclEntries != null;
out.writeBoolean(hasAcl);
if (hasAcl) {
AclFeatureProto.newBuilder()
.addAllEntries(PBHelper.convertAclEntryProto(aclEntries)).build()
.writeDelimitedTo(out);
}
} }
@Override @Override

View File

@ -31,10 +31,12 @@ import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.HadoopIllegalArgumentException;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.fs.permission.AclEntry; import org.apache.hadoop.fs.permission.AclEntry;
import org.apache.hadoop.fs.permission.AclEntryScope;
import org.apache.hadoop.fs.permission.AclEntryType;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.fs.permission.PermissionStatus; import org.apache.hadoop.fs.permission.PermissionStatus;
import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.proto.AclProtos.AclEntryProto;
import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockProto; import org.apache.hadoop.hdfs.protocol.proto.HdfsProtos.BlockProto;
import org.apache.hadoop.hdfs.protocolPB.PBHelper; import org.apache.hadoop.hdfs.protocolPB.PBHelper;
import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo;
@ -46,6 +48,7 @@ import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FilesUnderConstructionSection.FileUnderConstructionEntry; import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FilesUnderConstructionSection.FileUnderConstructionEntry;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeDirectorySection; import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeDirectorySection;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection; import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.INodeSection.AclFeatureProto;
import org.apache.hadoop.hdfs.server.namenode.INodeReference.DstReference; import org.apache.hadoop.hdfs.server.namenode.INodeReference.DstReference;
import org.apache.hadoop.hdfs.server.namenode.INodeReference.WithCount; import org.apache.hadoop.hdfs.server.namenode.INodeReference.WithCount;
import org.apache.hadoop.hdfs.server.namenode.INodeReference.WithName; import org.apache.hadoop.hdfs.server.namenode.INodeReference.WithName;
@ -61,6 +64,20 @@ public final class FSImageFormatPBINode {
private final static long USER_GROUP_STRID_MASK = (1 << 24) - 1; private final static long USER_GROUP_STRID_MASK = (1 << 24) - 1;
private final static int USER_STRID_OFFSET = 40; private final static int USER_STRID_OFFSET = 40;
private final static int GROUP_STRID_OFFSET = 16; private final static int GROUP_STRID_OFFSET = 16;
private static final int ACL_ENTRY_NAME_MASK = (1 << 24) - 1;
private static final int ACL_ENTRY_NAME_OFFSET = 6;
private static final int ACL_ENTRY_TYPE_OFFSET = 3;
private static final int ACL_ENTRY_SCOPE_OFFSET = 5;
private static final int ACL_ENTRY_PERM_MASK = 7;
private static final int ACL_ENTRY_TYPE_MASK = 3;
private static final int ACL_ENTRY_SCOPE_MASK = 1;
private static final FsAction[] FSACTION_VALUES = FsAction.values();
private static final AclEntryScope[] ACL_ENTRY_SCOPE_VALUES = AclEntryScope
.values();
private static final AclEntryType[] ACL_ENTRY_TYPE_VALUES = AclEntryType
.values();
private static final Log LOG = LogFactory.getLog(FSImageFormatProtobuf.class); private static final Log LOG = LogFactory.getLog(FSImageFormatProtobuf.class);
public final static class Loader { public final static class Loader {
@ -73,9 +90,21 @@ public final class FSImageFormatPBINode {
new FsPermission(perm)); new FsPermission(perm));
} }
public static ImmutableList<AclEntry> loadAclEntries(int id, public static ImmutableList<AclEntry> loadAclEntries(
final ImmutableList<AclEntry>[] aclTable) { AclFeatureProto proto, final String[] stringTable) {
return aclTable[id]; ImmutableList.Builder<AclEntry> b = ImmutableList.builder();
for (int v : proto.getEntriesList()) {
int p = v & ACL_ENTRY_PERM_MASK;
int t = (v >> ACL_ENTRY_TYPE_OFFSET) & ACL_ENTRY_TYPE_MASK;
int s = (v >> ACL_ENTRY_SCOPE_OFFSET) & ACL_ENTRY_SCOPE_MASK;
int nid = (v >> ACL_ENTRY_NAME_OFFSET) & ACL_ENTRY_NAME_MASK;
String name = stringTable[nid];
b.add(new AclEntry.Builder().setName(name)
.setPermission(FSACTION_VALUES[p])
.setScope(ACL_ENTRY_SCOPE_VALUES[s])
.setType(ACL_ENTRY_TYPE_VALUES[t]).build());
}
return b.build();
} }
public static INodeReference loadINodeReference( public static INodeReference loadINodeReference(
@ -112,9 +141,9 @@ public final class FSImageFormatPBINode {
dir.addDirectoryWithQuotaFeature(nsQuota, dsQuota); dir.addDirectoryWithQuotaFeature(nsQuota, dsQuota);
} }
if (d.hasAclId()) { if (d.hasAcl()) {
dir.addAclFeature(new AclFeature(loadAclEntries(d.getAclId(), dir.addAclFeature(new AclFeature(loadAclEntries(d.getAcl(),
state.getExtendedAclTable()))); state.getStringTable())));
} }
return dir; return dir;
} }
@ -249,9 +278,9 @@ public final class FSImageFormatPBINode {
n.getName().toByteArray(), permissions, f.getModificationTime(), n.getName().toByteArray(), permissions, f.getModificationTime(),
f.getAccessTime(), blocks, replication, f.getPreferredBlockSize()); f.getAccessTime(), blocks, replication, f.getPreferredBlockSize());
if (f.hasAclId()) { if (f.hasAcl()) {
file.addAclFeature(new AclFeature(loadAclEntries(f.getAclId(), file.addAclFeature(new AclFeature(loadAclEntries(f.getAcl(),
state.getExtendedAclTable()))); state.getStringTable())));
} }
// under-construction information // under-construction information
@ -305,14 +334,17 @@ public final class FSImageFormatPBINode {
| n.getFsPermissionShort(); | n.getFsPermissionShort();
} }
/** private static AclFeatureProto.Builder buildAclEntries(AclFeature f,
* Get a unique id for the AclEntry list. Notice that the code does not final SaverContext.DeduplicationMap<String> map) {
* deduplicate the list of aclentry yet. AclFeatureProto.Builder b = AclFeatureProto.newBuilder();
*/ for (AclEntry e : f.getEntries()) {
private static int buildAclEntries(AclFeature f, int v = ((map.getId(e.getName()) & ACL_ENTRY_NAME_MASK) << ACL_ENTRY_NAME_OFFSET)
final SaverContext.DeduplicationMap<ImmutableList<AclEntryProto>> map) { | (e.getType().ordinal() << ACL_ENTRY_TYPE_OFFSET)
return map.getId(ImmutableList.copyOf(PBHelper.convertAclEntryProto(f | (e.getScope().ordinal() << ACL_ENTRY_SCOPE_OFFSET)
.getEntries()))); | (e.getPermission().ordinal());
b.addEntries(v);
}
return b;
} }
public static INodeSection.INodeFile.Builder buildINodeFile( public static INodeSection.INodeFile.Builder buildINodeFile(
@ -326,7 +358,7 @@ public final class FSImageFormatPBINode {
AclFeature f = file.getAclFeature(); AclFeature f = file.getAclFeature();
if (f != null) { if (f != null) {
b.setAclId(buildAclEntries(f, state.getExtendedAclMap())); b.setAcl(buildAclEntries(f, state.getStringMap()));
} }
return b; return b;
} }
@ -342,7 +374,7 @@ public final class FSImageFormatPBINode {
AclFeature f = dir.getAclFeature(); AclFeature f = dir.getAclFeature();
if (f != null) { if (f != null) {
b.setAclId(buildAclEntries(f, state.getExtendedAclMap())); b.setAcl(buildAclEntries(f, state.getStringMap()));
} }
return b; return b;
} }

View File

@ -42,16 +42,11 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.permission.AclEntry;
import org.apache.hadoop.hdfs.protocol.LayoutVersion; import org.apache.hadoop.hdfs.protocol.LayoutVersion;
import org.apache.hadoop.hdfs.protocol.proto.AclProtos.AclEntryProto;
import org.apache.hadoop.hdfs.protocol.proto.AclProtos.AclFeatureProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoProto;
import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto;
import org.apache.hadoop.hdfs.protocolPB.PBHelper;
import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager; import org.apache.hadoop.hdfs.security.token.delegation.DelegationTokenSecretManager;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.CacheManagerSection; import org.apache.hadoop.hdfs.server.namenode.FsImageProto.CacheManagerSection;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.ExtendedAclSection;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary; import org.apache.hadoop.hdfs.server.namenode.FsImageProto.FileSummary;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.NameSystemSection; import org.apache.hadoop.hdfs.server.namenode.FsImageProto.NameSystemSection;
import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection; import org.apache.hadoop.hdfs.server.namenode.FsImageProto.SecretManagerSection;
@ -66,7 +61,6 @@ import org.apache.hadoop.io.MD5Hash;
import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressorStream; import org.apache.hadoop.io.compress.CompressorStream;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists; import com.google.common.collect.Lists;
import com.google.common.collect.Maps; import com.google.common.collect.Maps;
import com.google.common.io.LimitInputStream; import com.google.common.io.LimitInputStream;
@ -81,15 +75,10 @@ public final class FSImageFormatProtobuf {
public static final class LoaderContext { public static final class LoaderContext {
private String[] stringTable; private String[] stringTable;
private ImmutableList<AclEntry>[] extendedAclTable;
public String[] getStringTable() { public String[] getStringTable() {
return stringTable; return stringTable;
} }
public ImmutableList<AclEntry>[] getExtendedAclTable() {
return extendedAclTable;
}
} }
public static final class SaverContext { public static final class SaverContext {
@ -125,16 +114,10 @@ public final class FSImageFormatProtobuf {
private final DeduplicationMap<String> stringMap = DeduplicationMap private final DeduplicationMap<String> stringMap = DeduplicationMap
.newMap(); .newMap();
private final DeduplicationMap<ImmutableList<AclEntryProto>> extendedAclMap = DeduplicationMap
.newMap();
public DeduplicationMap<String> getStringMap() { public DeduplicationMap<String> getStringMap() {
return stringMap; return stringMap;
} }
public DeduplicationMap<ImmutableList<AclEntryProto>> getExtendedAclMap() {
return extendedAclMap;
}
} }
public static final class Loader implements FSImageFormat.AbstractLoader { public static final class Loader implements FSImageFormat.AbstractLoader {
@ -239,9 +222,6 @@ public final class FSImageFormatProtobuf {
case STRING_TABLE: case STRING_TABLE:
loadStringTableSection(in); loadStringTableSection(in);
break; break;
case EXTENDED_ACL:
loadExtendedAclSection(in);
break;
case INODE: { case INODE: {
currentStep = new Step(StepType.INODES); currentStep = new Step(StepType.INODES);
prog.beginStep(Phase.LOADING_FSIMAGE, currentStep); prog.beginStep(Phase.LOADING_FSIMAGE, currentStep);
@ -301,18 +281,6 @@ public final class FSImageFormatProtobuf {
} }
} }
@SuppressWarnings("unchecked")
private void loadExtendedAclSection(InputStream in) throws IOException {
ExtendedAclSection s = ExtendedAclSection.parseDelimitedFrom(in);
ctx.extendedAclTable = new ImmutableList[s.getNumEntry() + 1];
for (int i = 0; i < s.getNumEntry(); ++i) {
ExtendedAclSection.Entry e = ExtendedAclSection.Entry
.parseDelimitedFrom(in);
ctx.extendedAclTable[e.getId()] = ImmutableList.copyOf(PBHelper
.convertAclEntry(e.getAcl().getEntriesList()));
}
}
private void loadSecretManagerSection(InputStream in) throws IOException { private void loadSecretManagerSection(InputStream in) throws IOException {
SecretManagerSection s = SecretManagerSection.parseDelimitedFrom(in); SecretManagerSection s = SecretManagerSection.parseDelimitedFrom(in);
int numKeys = s.getNumKeys(), numTokens = s.getNumTokens(); int numKeys = s.getNumKeys(), numTokens = s.getNumTokens();
@ -481,7 +449,6 @@ public final class FSImageFormatProtobuf {
saveCacheManagerSection(b); saveCacheManagerSection(b);
prog.endStep(Phase.SAVING_CHECKPOINT, step); prog.endStep(Phase.SAVING_CHECKPOINT, step);
saveExtendedAclSection(b);
saveStringTableSection(b); saveStringTableSection(b);
// We use the underlyingOutputStream to write the header. Therefore flush // We use the underlyingOutputStream to write the header. Therefore flush
@ -560,22 +527,6 @@ public final class FSImageFormatProtobuf {
} }
commitSection(summary, SectionName.STRING_TABLE); commitSection(summary, SectionName.STRING_TABLE);
} }
private void saveExtendedAclSection(FileSummary.Builder summary)
throws IOException {
OutputStream out = sectionOutputStream;
ExtendedAclSection.Builder b = ExtendedAclSection.newBuilder()
.setNumEntry(saverContext.extendedAclMap.size());
b.build().writeDelimitedTo(out);
for (Entry<ImmutableList<AclEntryProto>, Integer> e : saverContext.extendedAclMap
.entrySet()) {
ExtendedAclSection.Entry.Builder eb = ExtendedAclSection.Entry
.newBuilder().setId(e.getValue())
.setAcl(AclFeatureProto.newBuilder().addAllEntries(e.getKey()));
eb.build().writeDelimitedTo(out);
}
commitSection(summary, SectionName.EXTENDED_ACL);
}
} }
/** /**

View File

@ -169,9 +169,9 @@ public class FSImageFormatPBSnapshot {
fileInPb.getPermission(), state.getStringTable()); fileInPb.getPermission(), state.getStringTable());
AclFeature acl = null; AclFeature acl = null;
if (fileInPb.hasAclId()) { if (fileInPb.hasAcl()) {
acl = new AclFeature(FSImageFormatPBINode.Loader.loadAclEntries( acl = new AclFeature(FSImageFormatPBINode.Loader.loadAclEntries(
fileInPb.getAclId(), state.getExtendedAclTable())); fileInPb.getAcl(), state.getStringTable()));
} }
copy = new INodeFileAttributes.SnapshotCopy(pbf.getName() copy = new INodeFileAttributes.SnapshotCopy(pbf.getName()
@ -265,9 +265,9 @@ public class FSImageFormatPBSnapshot {
PermissionStatus permission = loadPermission( PermissionStatus permission = loadPermission(
dirCopyInPb.getPermission(), state.getStringTable()); dirCopyInPb.getPermission(), state.getStringTable());
AclFeature acl = null; AclFeature acl = null;
if (dirCopyInPb.hasAclId()) { if (dirCopyInPb.hasAcl()) {
acl = new AclFeature(FSImageFormatPBINode.Loader.loadAclEntries( acl = new AclFeature(FSImageFormatPBINode.Loader.loadAclEntries(
dirCopyInPb.getAclId(), state.getExtendedAclTable())); dirCopyInPb.getAcl(), state.getStringTable()));
} }
long modTime = dirCopyInPb.getModificationTime(); long modTime = dirCopyInPb.getModificationTime();

View File

@ -60,10 +60,6 @@ message AclStatusProto {
repeated AclEntryProto entries = 4; repeated AclEntryProto entries = 4;
} }
message AclFeatureProto {
repeated AclEntryProto entries = 1;
}
message AclEditLogProto { message AclEditLogProto {
required string src = 1; required string src = 1;
repeated AclEntryProto entries = 2; repeated AclEntryProto entries = 2;

View File

@ -89,6 +89,23 @@ message INodeSection {
optional string clientMachine = 2; optional string clientMachine = 2;
} }
message AclFeatureProto {
/**
* An ACL entry is represented by a 32-bit integer in Big Endian
* format. The bits can be divided in four segments:
* [0:2) || [2:26) || [26:27) || [27:29) || [29:32)
*
* [0:2) -- reserved for futute uses.
* [2:26) -- the name of the entry, which is an ID that points to a
* string in the StringTableSection.
* [26:27) -- the scope of the entry (AclEntryScopeProto)
* [27:29) -- the type of the entry (AclEntryTypeProto)
* [29:32) -- the permission of the entry (FsActionProto)
*
*/
repeated fixed32 entries = 2 [packed = true];
}
message INodeFile { message INodeFile {
optional uint32 replication = 1; optional uint32 replication = 1;
optional uint64 modificationTime = 2; optional uint64 modificationTime = 2;
@ -97,7 +114,7 @@ message INodeSection {
optional fixed64 permission = 5; optional fixed64 permission = 5;
repeated BlockProto blocks = 6; repeated BlockProto blocks = 6;
optional FileUnderConstructionFeature fileUC = 7; optional FileUnderConstructionFeature fileUC = 7;
optional uint32 aclId = 8; optional AclFeatureProto acl = 8;
} }
message INodeDirectory { message INodeDirectory {
@ -107,7 +124,7 @@ message INodeSection {
// diskspace quota // diskspace quota
optional uint64 dsQuota = 3; optional uint64 dsQuota = 3;
optional fixed64 permission = 4; optional fixed64 permission = 4;
optional uint32 aclId = 5; optional AclFeatureProto acl = 5;
} }
message INodeSymlink { message INodeSymlink {
@ -280,16 +297,3 @@ message CacheManagerSection {
// repeated CachePoolInfoProto pools // repeated CachePoolInfoProto pools
// repeated CacheDirectiveInfoProto directives // repeated CacheDirectiveInfoProto directives
} }
/**
* This section maps string to id
* NAME: EXTENDED_ACL
*/
message ExtendedAclSection {
message Entry {
required uint32 id = 1;
optional AclFeatureProto acl = 2;
}
optional uint32 numEntry = 1;
// repeated Entry entries
}