HADOOP-14017. User friendly name for ADLS user and group. Contributed by Vishwajeet Dusane

(cherry picked from commit 924def7854)
This commit is contained in:
Mingliang Liu 2017-02-21 13:44:42 -08:00 committed by Chris Douglas
parent a146866802
commit acf20c8318
5 changed files with 91 additions and 4 deletions

View File

@ -87,6 +87,10 @@ public final class AdlConfKeys {
"adl.feature.support.acl.bit";
static final boolean ADL_SUPPORT_ACL_BIT_IN_FSPERMISSION_DEFAULT = true;
static final String ADL_ENABLEUPN_FOR_OWNERGROUP_KEY =
"adl.feature.ownerandgroup.enableupn";
static final boolean ADL_ENABLEUPN_FOR_OWNERGROUP_DEFAULT = false;
private AdlConfKeys() {
}
}

View File

@ -32,6 +32,7 @@ import com.microsoft.azure.datalake.store.DirectoryEntry;
import com.microsoft.azure.datalake.store.DirectoryEntryType;
import com.microsoft.azure.datalake.store.IfExists;
import com.microsoft.azure.datalake.store.LatencyTracker;
import com.microsoft.azure.datalake.store.UserGroupRepresentation;
import com.microsoft.azure.datalake.store.oauth2.AccessTokenProvider;
import com.microsoft.azure.datalake.store.oauth2.ClientCredsTokenProvider;
import com.microsoft.azure.datalake.store.oauth2.RefreshTokenBasedTokenProvider;
@ -80,6 +81,8 @@ public class AdlFileSystem extends FileSystem {
private ADLStoreClient adlClient;
private Path workingDirectory;
private boolean aclBitStatus;
private UserGroupRepresentation oidOrUpn;
// retained for tests
private AccessTokenProvider tokenProvider;
@ -181,6 +184,11 @@ public class AdlFileSystem extends FileSystem {
if (!trackLatency) {
LatencyTracker.disable();
}
boolean enableUPN = conf.getBoolean(ADL_ENABLEUPN_FOR_OWNERGROUP_KEY,
ADL_ENABLEUPN_FOR_OWNERGROUP_DEFAULT);
oidOrUpn = enableUPN ? UserGroupRepresentation.UPN :
UserGroupRepresentation.OID;
}
/**
@ -439,7 +447,8 @@ public class AdlFileSystem extends FileSystem {
@Override
public FileStatus getFileStatus(final Path f) throws IOException {
statistics.incrementReadOps(1);
DirectoryEntry entry = adlClient.getDirectoryEntry(toRelativeFilePath(f));
DirectoryEntry entry =
adlClient.getDirectoryEntry(toRelativeFilePath(f), oidOrUpn);
return toFileStatus(entry, f);
}
@ -456,7 +465,7 @@ public class AdlFileSystem extends FileSystem {
public FileStatus[] listStatus(final Path f) throws IOException {
statistics.incrementReadOps(1);
List<DirectoryEntry> entries =
adlClient.enumerateDirectory(toRelativeFilePath(f));
adlClient.enumerateDirectory(toRelativeFilePath(f), oidOrUpn);
return toFileStatuses(entries, f);
}
@ -749,8 +758,8 @@ public class AdlFileSystem extends FileSystem {
@Override
public AclStatus getAclStatus(final Path path) throws IOException {
statistics.incrementReadOps(1);
com.microsoft.azure.datalake.store.acl.AclStatus adlStatus = adlClient
.getAclStatus(toRelativeFilePath(path));
com.microsoft.azure.datalake.store.acl.AclStatus adlStatus =
adlClient.getAclStatus(toRelativeFilePath(path), oidOrUpn);
AclStatus.Builder aclStatusBuilder = new AclStatus.Builder();
aclStatusBuilder.owner(adlStatus.owner);
aclStatusBuilder.group(adlStatus.group);
@ -963,4 +972,10 @@ public class AdlFileSystem extends FileSystem {
}
return new String(passchars);
}
@VisibleForTesting
public void setUserGroupRepresentationAsUPN(boolean enableUPN) {
oidOrUpn = enableUPN ? UserGroupRepresentation.UPN :
UserGroupRepresentation.OID;
}
}

View File

@ -26,6 +26,7 @@
* [Protecting the Credentials with Credential Providers](#Credential_Provider)
* [Enabling ADL Filesystem](#Enabling_ADL)
* [Accessing adl URLs](#Accessing_adl_URLs)
* [User/Group Representation](#OIDtoUPNConfiguration)
* [Testing the hadoop-azure Module](#Testing_the_hadoop-azure_Module)
## <a name="Introduction" />Introduction
@ -42,6 +43,8 @@ The jar file is named azure-datalake-store.jar.
* Can act as a source of data in a MapReduce job, or a sink.
* Tested on both Linux and Windows.
* Tested for scale.
* API setOwner/setAcl/removeAclEntries/modifyAclEntries accepts UPN or OID
(Object ID) as user and group name.
## <a name="Limitations" />Limitations
Partial or no support for the following operations :
@ -221,6 +224,29 @@ commands demonstrate access to a storage account named `youraccount`.
> hadoop fs -cat adl://yourcontainer.azuredatalakestore.net/testDir/testFile
test file content
### <a name="OIDtoUPNConfiguration" />User/Group Representation
The hadoop-azure-datalake module provides support for configuring how
User/Group information is represented during
getFileStatus/listStatus/getAclStatus.
Add the following properties to your core-site.xml
<property>
<name>adl.feature.ownerandgroup.enableupn</name>
<value>true</value>
<description>
When true : User and Group in FileStatus/AclStatus response is
represented as user friendly name as per Azure AD profile.
When false (default) : User and Group in FileStatus/AclStatus
response is represented by the unique identifier from Azure AD
profile (Object ID as GUID).
For performance optimization, Recommended default value.
</description>
</property>
## <a name="Testing_the_hadoop-azure_Module" />Testing the azure-datalake-store Module
The hadoop-azure module includes a full suite of unit tests. Most of the tests will run without additional configuration by running mvn test. This includes tests against mocked storage, which is an in-memory emulation of Azure Data Lake Storage.

View File

@ -26,6 +26,10 @@ import static org.apache.hadoop.fs.adl.AdlConfKeys
.ADL_DEBUG_OVERRIDE_LOCAL_USER_AS_OWNER;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.ADL_DEBUG_SET_LOCAL_USER_AS_OWNER_DEFAULT;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.ADL_ENABLEUPN_FOR_OWNERGROUP_DEFAULT;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.ADL_ENABLEUPN_FOR_OWNERGROUP_KEY;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.ADL_EXPERIMENT_POSITIONAL_READ_DEFAULT;
import static org.apache.hadoop.fs.adl.AdlConfKeys
@ -99,5 +103,10 @@ public class TestValidateConfiguration {
Assert.assertEquals(false, ADL_DEBUG_SET_LOCAL_USER_AS_OWNER_DEFAULT);
Assert.assertEquals(4 * 1024 * 1024, DEFAULT_READ_AHEAD_BUFFER_SIZE);
Assert.assertEquals(4 * 1024 * 1024, DEFAULT_WRITE_AHEAD_BUFFER_SIZE);
Assert.assertEquals("adl.feature.ownerandgroup.enableupn",
ADL_ENABLEUPN_FOR_OWNERGROUP_KEY);
Assert.assertEquals(false,
ADL_ENABLEUPN_FOR_OWNERGROUP_DEFAULT);
}
}

View File

@ -22,6 +22,7 @@ import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.adl.AdlFileSystem;
import org.junit.After;
import org.junit.Assert;
import org.junit.Assume;
@ -32,6 +33,8 @@ import java.io.IOException;
import java.io.OutputStream;
import java.util.UUID;
import static org.junit.Assert.fail;
/**
* This class is responsible for testing ContentSummary, ListStatus on
* file/folder.
@ -107,5 +110,35 @@ public class TestMetadata {
.assertEquals(path.makeQualified(fs.getUri(), fs.getWorkingDirectory()),
statuses[0].getPath());
}
@Test
public void testUserRepresentationConfiguration() throws IOException {
// Validating actual user/group OID or friendly name is outside scope of
// this test.
Path path = new Path(parent, "a.txt");
AdlFileSystem fs = (AdlFileSystem) adlStore;
// When set to true, User/Group information should be user friendly name.
// That is non GUID value.
fs.setUserGroupRepresentationAsUPN(false);
fs.createNewFile(path);
Assert.assertTrue(fs.isFile(path));
FileStatus fileStatus = fs.getFileStatus(path);
UUID.fromString(fileStatus.getGroup());
UUID.fromString(fileStatus.getOwner());
// When set to false, User/Group information should be AAD represented
// unique OID. That is GUID value.
// Majority of the cases, user friendly name would not be GUID value.
fs.setUserGroupRepresentationAsUPN(true);
fileStatus = fs.getFileStatus(path);
try {
UUID.fromString(fileStatus.getGroup());
UUID.fromString(fileStatus.getOwner());
fail("Expected user friendly name to be non guid value.");
} catch (IllegalArgumentException e) {
// expected to fail since
}
}
}