From edf149b9790a96563fe7bba289a040542c8ab8f2 Mon Sep 17 00:00:00 2001 From: Chris Douglas Date: Wed, 8 Mar 2017 23:18:28 -0800 Subject: [PATCH] HADOOP-13037. Refactor Azure Data Lake Store as an independent FileSystem. Contributed by Vishwajeet Dusane --- hadoop-tools/hadoop-azure-datalake/pom.xml | 168 ++++ .../java/org/apache/hadoop/fs/adl/Adl.java | 56 ++ .../org/apache/hadoop/fs/adl/AdlConfKeys.java | 92 ++ .../apache/hadoop/fs/adl/AdlFileSystem.java | 923 ++++++++++++++++++ .../hadoop/fs/adl/AdlFsInputStream.java | 149 +++ .../hadoop/fs/adl/AdlFsOutputStream.java | 82 ++ .../apache/hadoop/fs/adl/AdlPermission.java | 69 ++ .../fs/adl/SdkTokenProviderAdapter.java | 41 + .../hadoop/fs/adl/TokenProviderType.java | 25 + .../fs/adl/oauth2/AzureADTokenProvider.java | 70 ++ .../hadoop/fs/adl/oauth2/package-info.java | 23 + .../apache/hadoop/fs/adl/package-info.java | 23 + .../META-INF/org.apache.hadoop.fs.FileSystem | 16 + .../src/site/markdown/index.md | 193 ++++ .../hadoop/fs/adl/AdlMockWebServer.java | 99 ++ .../apache/hadoop/fs/adl/TestACLFeatures.java | 262 +++++ .../hadoop/fs/adl/TestADLResponseData.java | 147 +++ .../org/apache/hadoop/fs/adl/TestAdlRead.java | 196 ++++ .../fs/adl/TestAzureADTokenProvider.java | 133 +++ .../adl/TestConcurrentDataReadOperations.java | 299 ++++++ .../fs/adl/TestCustomTokenProvider.java | 136 +++ .../hadoop/fs/adl/TestGetFileStatus.java | 70 ++ .../apache/hadoop/fs/adl/TestListStatus.java | 103 ++ .../fs/adl/TestRelativePathFormation.java | 61 ++ .../fs/adl/TestValidateConfiguration.java | 103 ++ .../hadoop/fs/adl/TestableAdlFileSystem.java | 30 + .../adl/common/CustomMockTokenProvider.java | 61 ++ .../fs/adl/common/ExpectedResponse.java | 71 ++ .../hadoop/fs/adl/common/Parallelized.java | 60 ++ .../hadoop/fs/adl/common/TestDataForRead.java | 122 +++ .../fs/adl/live/AdlStorageConfiguration.java | 94 ++ .../fs/adl/live/AdlStorageContract.java | 66 ++ .../adl/live/TestAdlContractAppendLive.java | 53 + .../adl/live/TestAdlContractConcatLive.java | 52 + .../adl/live/TestAdlContractCreateLive.java | 52 + .../adl/live/TestAdlContractDeleteLive.java | 44 + .../fs/adl/live/TestAdlContractMkdirLive.java | 55 ++ .../fs/adl/live/TestAdlContractOpenLive.java | 44 + .../adl/live/TestAdlContractRenameLive.java | 63 ++ .../adl/live/TestAdlContractRootDirLive.java | 52 + .../fs/adl/live/TestAdlContractSeekLive.java | 44 + .../live/TestAdlDifferentSizeWritesLive.java | 102 ++ .../live/TestAdlFileSystemContractLive.java | 94 ++ .../src/test/resources/adls.xml | 140 +++ .../test/resources/contract-test-options.xml | 61 ++ .../src/test/resources/log4j.properties | 30 + hadoop-tools/pom.xml | 1 + 47 files changed, 4930 insertions(+) create mode 100644 hadoop-tools/hadoop-azure-datalake/pom.xml create mode 100644 hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/Adl.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlConfKeys.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFileSystem.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFsInputStream.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFsOutputStream.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlPermission.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/SdkTokenProviderAdapter.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/TokenProviderType.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/oauth2/AzureADTokenProvider.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/oauth2/package-info.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/package-info.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/main/resources/META-INF/org.apache.hadoop.fs.FileSystem create mode 100644 hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md create mode 100644 hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/AdlMockWebServer.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestACLFeatures.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestADLResponseData.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestAdlRead.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestAzureADTokenProvider.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestConcurrentDataReadOperations.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestCustomTokenProvider.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestGetFileStatus.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestListStatus.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestRelativePathFormation.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestValidateConfiguration.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestableAdlFileSystem.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/common/CustomMockTokenProvider.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/common/ExpectedResponse.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/common/Parallelized.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/common/TestDataForRead.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/AdlStorageConfiguration.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/AdlStorageContract.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractAppendLive.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractConcatLive.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractCreateLive.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractDeleteLive.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractMkdirLive.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractOpenLive.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractRenameLive.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractRootDirLive.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractSeekLive.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlDifferentSizeWritesLive.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlFileSystemContractLive.java create mode 100644 hadoop-tools/hadoop-azure-datalake/src/test/resources/adls.xml create mode 100644 hadoop-tools/hadoop-azure-datalake/src/test/resources/contract-test-options.xml create mode 100644 hadoop-tools/hadoop-azure-datalake/src/test/resources/log4j.properties diff --git a/hadoop-tools/hadoop-azure-datalake/pom.xml b/hadoop-tools/hadoop-azure-datalake/pom.xml new file mode 100644 index 00000000000..01d101fe33d --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/pom.xml @@ -0,0 +1,168 @@ + + + + 4.0.0 + + org.apache.hadoop + hadoop-project + 2.9.0-SNAPSHOT + ../../hadoop-project + + org.apache.hadoop + hadoop-azure-datalake + Apache Hadoop Azure Data Lake support + + This module contains code to support integration with Azure Data Lake. + + jar + + 2.4.0 + 0.9.1 + UTF-8 + true + + + + snapshots-repo + https://oss.sonatype.org/content/repositories/snapshots + false + true + + + + + + org.apache.maven.plugins + maven-project-info-reports-plugin + + + false + false + + + + + org.apache.maven.plugins + maven-jar-plugin + + + + test-jar + + + + + + org.apache.maven.plugins + maven-dependency-plugin + + + deplist + compile + + list + + + + ${project.basedir}/target/hadoop-tools-deps/${project.artifactId}.tools-optional.txt + + + + + + + + + + + + org.eclipse.m2e + lifecycle-mapping + 1.0.0 + + + + + + org.apache.maven.plugins + + maven-enforcer-plugin + + [1.0.0,) + + enforce + + + + + + + + + + + + + + + + + + + com.microsoft.azure + azure-data-lake-store-sdk + 2.0.4-SNAPSHOT + + + + org.apache.hadoop + hadoop-common + + + com.squareup.okhttp + okhttp + 2.4.0 + + + junit + junit + test + + + com.eclipsesource.minimal-json + minimal-json + 0.9.1 + test + + + org.apache.hadoop + hadoop-common + test + test-jar + + + com.squareup.okhttp + mockwebserver + 2.4.0 + test + + + diff --git a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/Adl.java b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/Adl.java new file mode 100644 index 00000000000..7ec04cf3232 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/Adl.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.DelegateToFileSystem; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; + +/** + * Expose adl:// scheme to access ADL file system. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class Adl extends DelegateToFileSystem { + + Adl(URI theUri, Configuration conf) throws IOException, URISyntaxException { + super(theUri, createDataLakeFileSystem(conf), conf, AdlFileSystem.SCHEME, + false); + } + + private static AdlFileSystem createDataLakeFileSystem(Configuration conf) { + AdlFileSystem fs = new AdlFileSystem(); + fs.setConf(conf); + return fs; + } + + /** + * @return Default port for ADL File system to communicate + */ + @Override + public final int getUriDefaultPort() { + return AdlFileSystem.DEFAULT_PORT; + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlConfKeys.java b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlConfKeys.java new file mode 100644 index 00000000000..21120df6ed8 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlConfKeys.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.adl; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * Constants. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public final class AdlConfKeys { + // OAuth2 Common Configuration + public static final String AZURE_AD_REFRESH_URL_KEY = "dfs.adls.oauth2" + + ".refresh.url"; + + // optional when provider type is refresh or client id. + public static final String AZURE_AD_TOKEN_PROVIDER_CLASS_KEY = + "dfs.adls.oauth2.access.token.provider"; + public static final String AZURE_AD_CLIENT_ID_KEY = + "dfs.adls.oauth2.client.id"; + public static final String AZURE_AD_TOKEN_PROVIDER_TYPE_KEY = + "dfs.adls.oauth2.access.token.provider.type"; + + // OAuth Refresh Token Configuration + public static final String AZURE_AD_REFRESH_TOKEN_KEY = + "dfs.adls.oauth2.refresh.token"; + + public static final String TOKEN_PROVIDER_TYPE_REFRESH_TOKEN = "RefreshToken"; + // OAuth Client Cred Token Configuration + public static final String AZURE_AD_CLIENT_SECRET_KEY = + "dfs.adls.oauth2.credential"; + public static final String TOKEN_PROVIDER_TYPE_CLIENT_CRED = + "ClientCredential"; + + public static final String READ_AHEAD_BUFFER_SIZE_KEY = + "adl.feature.client.cache.readahead"; + + public static final String WRITE_BUFFER_SIZE_KEY = + "adl.feature.client.cache.drop.behind.writes"; + static final String SECURE_TRANSPORT_SCHEME = "https"; + static final String INSECURE_TRANSPORT_SCHEME = "http"; + static final String ADL_DEBUG_OVERRIDE_LOCAL_USER_AS_OWNER = + "adl.debug.override.localuserasfileowner"; + + static final boolean ADL_DEBUG_SET_LOCAL_USER_AS_OWNER_DEFAULT = false; + static final long ADL_BLOCK_SIZE = 256 * 1024 * 1024; + static final int ADL_REPLICATION_FACTOR = 1; + static final String ADL_HADOOP_CLIENT_NAME = "hadoop-azure-datalake-"; + static final String ADL_HADOOP_CLIENT_VERSION = + "2.0.0-SNAPSHOT"; + static final String ADL_EVENTS_TRACKING_SOURCE = "adl.events.tracking.source"; + static final String ADL_EVENTS_TRACKING_CLUSTERNAME = + "adl.events.tracking.clustername"; + + static final String ADL_EVENTS_TRACKING_CLUSTERTYPE = + "adl.events.tracking.clustertype"; + static final int DEFAULT_READ_AHEAD_BUFFER_SIZE = 4 * 1024 * 1024; + static final int DEFAULT_WRITE_AHEAD_BUFFER_SIZE = 4 * 1024 * 1024; + + static final String LATENCY_TRACKER_KEY = + "adl.dfs.enable.client.latency.tracker"; + static final boolean LATENCY_TRACKER_DEFAULT = true; + + static final String ADL_EXPERIMENT_POSITIONAL_READ_KEY = + "adl.feature.experiment.positional.read.enable"; + static final boolean ADL_EXPERIMENT_POSITIONAL_READ_DEFAULT = true; + + static final String ADL_SUPPORT_ACL_BIT_IN_FSPERMISSION = + "adl.feature.support.acl.bit"; + static final boolean ADL_SUPPORT_ACL_BIT_IN_FSPERMISSION_DEFAULT = true; + + private AdlConfKeys() { + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFileSystem.java b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFileSystem.java new file mode 100644 index 00000000000..9083afc24e4 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFileSystem.java @@ -0,0 +1,923 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl; + +import java.io.IOException; +import java.net.URI; +import java.util.ArrayList; +import java.util.EnumSet; +import java.util.List; + +import com.google.common.annotations.VisibleForTesting; +import com.microsoft.azure.datalake.store.ADLStoreClient; +import com.microsoft.azure.datalake.store.ADLStoreOptions; +import com.microsoft.azure.datalake.store.DirectoryEntry; +import com.microsoft.azure.datalake.store.DirectoryEntryType; +import com.microsoft.azure.datalake.store.IfExists; +import com.microsoft.azure.datalake.store.LatencyTracker; +import com.microsoft.azure.datalake.store.oauth2.AccessTokenProvider; +import com.microsoft.azure.datalake.store.oauth2.ClientCredsTokenProvider; +import com.microsoft.azure.datalake.store.oauth2.RefreshTokenBasedTokenProvider; + +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.BlockLocation; +import org.apache.hadoop.fs.ContentSummary; +import org.apache.hadoop.fs.ContentSummary.Builder; +import org.apache.hadoop.fs.CreateFlag; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.InvalidPathException; +import org.apache.hadoop.fs.Options; +import org.apache.hadoop.fs.Options.Rename; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.adl.oauth2.AzureADTokenProvider; +import org.apache.hadoop.fs.permission.AclEntry; +import org.apache.hadoop.fs.permission.AclStatus; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.security.AccessControlException; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.util.Progressable; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.hadoop.util.VersionInfo; +import static org.apache.hadoop.fs.adl.AdlConfKeys.*; + +/** + * A FileSystem to access Azure Data Lake Store. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class AdlFileSystem extends FileSystem { + static final String SCHEME = "adl"; + static final int DEFAULT_PORT = 443; + private URI uri; + private String userName; + private boolean overrideOwner; + private ADLStoreClient adlClient; + private Path workingDirectory; + private boolean aclBitStatus; + + // retained for tests + private AccessTokenProvider tokenProvider; + private AzureADTokenProvider azureTokenProvider; + + @Override + public String getScheme() { + return SCHEME; + } + + public URI getUri() { + return uri; + } + + @Override + public int getDefaultPort() { + return DEFAULT_PORT; + } + + @Override + public boolean supportsSymlinks() { + return false; + } + + /** + * Called after a new FileSystem instance is constructed. + * + * @param storeUri a uri whose authority section names the host, port, etc. + * for this FileSystem + * @param conf the configuration + */ + @Override + public void initialize(URI storeUri, Configuration conf) throws IOException { + super.initialize(storeUri, conf); + this.setConf(conf); + this.uri = URI + .create(storeUri.getScheme() + "://" + storeUri.getAuthority()); + + try { + userName = UserGroupInformation.getCurrentUser().getShortUserName(); + } catch (IOException e) { + userName = "hadoop"; + } + + this.setWorkingDirectory(getHomeDirectory()); + + overrideOwner = getConf().getBoolean(ADL_DEBUG_OVERRIDE_LOCAL_USER_AS_OWNER, + ADL_DEBUG_SET_LOCAL_USER_AS_OWNER_DEFAULT); + + aclBitStatus = conf.getBoolean(ADL_SUPPORT_ACL_BIT_IN_FSPERMISSION, + ADL_SUPPORT_ACL_BIT_IN_FSPERMISSION_DEFAULT); + + String accountFQDN = null; + String mountPoint = null; + String hostname = storeUri.getHost(); + if (!hostname.contains(".") && !hostname.equalsIgnoreCase( + "localhost")) { // this is a symbolic name. Resolve it. + String hostNameProperty = "dfs.adls." + hostname + ".hostname"; + String mountPointProperty = "dfs.adls." + hostname + ".mountpoint"; + accountFQDN = getNonEmptyVal(conf, hostNameProperty); + mountPoint = getNonEmptyVal(conf, mountPointProperty); + } else { + accountFQDN = hostname; + } + + if (storeUri.getPort() > 0) { + accountFQDN = accountFQDN + ":" + storeUri.getPort(); + } + + adlClient = ADLStoreClient + .createClient(accountFQDN, getAccessTokenProvider(conf)); + + ADLStoreOptions options = new ADLStoreOptions(); + options.enableThrowingRemoteExceptions(); + + if (getTransportScheme().equalsIgnoreCase(INSECURE_TRANSPORT_SCHEME)) { + options.setInsecureTransport(); + } + + if (mountPoint != null) { + options.setFilePathPrefix(mountPoint); + } + + String clusterName = conf.get(ADL_EVENTS_TRACKING_CLUSTERNAME, "UNKNOWN"); + String clusterType = conf.get(ADL_EVENTS_TRACKING_CLUSTERTYPE, "UNKNOWN"); + + String clientVersion = ADL_HADOOP_CLIENT_NAME + (StringUtils + .isEmpty(VersionInfo.getVersion().trim()) ? + ADL_HADOOP_CLIENT_VERSION.trim() : + VersionInfo.getVersion().trim()); + options.setUserAgentSuffix(clientVersion + "/" + + VersionInfo.getVersion().trim() + "/" + clusterName + "/" + + clusterType); + + adlClient.setOptions(options); + + boolean trackLatency = conf + .getBoolean(LATENCY_TRACKER_KEY, LATENCY_TRACKER_DEFAULT); + if (!trackLatency) { + LatencyTracker.disable(); + } + } + + /** + * This method is provided for convenience for derived classes to define + * custom {@link AzureADTokenProvider} instance. + * + * In order to ensure secure hadoop infrastructure and user context for which + * respective {@link AdlFileSystem} instance is initialized, + * Loading {@link AzureADTokenProvider} is not sufficient. + * + * The order of loading {@link AzureADTokenProvider} is to first invoke + * {@link #getCustomAccessTokenProvider(Configuration)}, If method return null + * which means no implementation provided by derived classes, then + * configuration object is loaded to retrieve token configuration as specified + * is documentation. + * + * Custom token management takes the higher precedence during initialization. + * + * @param conf Configuration object + * @return null if the no custom {@link AzureADTokenProvider} token management + * is specified. + * @throws IOException if failed to initialize token provider. + */ + protected synchronized AzureADTokenProvider getCustomAccessTokenProvider( + Configuration conf) throws IOException { + String className = getNonEmptyVal(conf, AZURE_AD_TOKEN_PROVIDER_CLASS_KEY); + + Class azureADTokenProviderClass = + conf.getClass(AZURE_AD_TOKEN_PROVIDER_CLASS_KEY, null, + AzureADTokenProvider.class); + if (azureADTokenProviderClass == null) { + throw new IllegalArgumentException( + "Configuration " + className + " " + "not defined/accessible."); + } + + azureTokenProvider = ReflectionUtils + .newInstance(azureADTokenProviderClass, conf); + if (azureTokenProvider == null) { + throw new IllegalArgumentException("Failed to initialize " + className); + } + + azureTokenProvider.initialize(conf); + return azureTokenProvider; + } + + private AccessTokenProvider getAccessTokenProvider(Configuration conf) + throws IOException { + TokenProviderType type = conf.getEnum( + AdlConfKeys.AZURE_AD_TOKEN_PROVIDER_TYPE_KEY, TokenProviderType.Custom); + + switch (type) { + case RefreshToken: + tokenProvider = getConfRefreshTokenBasedTokenProvider(conf); + break; + case ClientCredential: + tokenProvider = getConfCredentialBasedTokenProvider(conf); + break; + case Custom: + default: + AzureADTokenProvider azureADTokenProvider = getCustomAccessTokenProvider( + conf); + tokenProvider = new SdkTokenProviderAdapter(azureADTokenProvider); + break; + } + + return tokenProvider; + } + + private AccessTokenProvider getConfCredentialBasedTokenProvider( + Configuration conf) { + String clientId = getNonEmptyVal(conf, AZURE_AD_CLIENT_ID_KEY); + String refreshUrl = getNonEmptyVal(conf, AZURE_AD_REFRESH_URL_KEY); + String clientSecret = getNonEmptyVal(conf, AZURE_AD_CLIENT_SECRET_KEY); + return new ClientCredsTokenProvider(refreshUrl, clientId, clientSecret); + } + + private AccessTokenProvider getConfRefreshTokenBasedTokenProvider( + Configuration conf) { + String clientId = getNonEmptyVal(conf, AZURE_AD_CLIENT_ID_KEY); + String refreshToken = getNonEmptyVal(conf, AZURE_AD_REFRESH_TOKEN_KEY); + return new RefreshTokenBasedTokenProvider(clientId, refreshToken); + } + + @VisibleForTesting + AccessTokenProvider getTokenProvider() { + return tokenProvider; + } + + @VisibleForTesting + AzureADTokenProvider getAzureTokenProvider() { + return azureTokenProvider; + } + + /** + * Constructing home directory locally is fine as long as Hadoop + * local user name and ADL user name relationship story is not fully baked + * yet. + * + * @return Hadoop local user home directory. + */ + @Override + public Path getHomeDirectory() { + return makeQualified(new Path("/user/" + userName)); + } + + /** + * Create call semantic is handled differently in case of ADL. Create + * semantics is translated to Create/Append + * semantics. + * 1. No dedicated connection to server. + * 2. Buffering is locally done, Once buffer is full or flush is invoked on + * the by the caller. All the pending + * data is pushed to ADL as APPEND operation code. + * 3. On close - Additional call is send to server to close the stream, and + * release lock from the stream. + * + * Necessity of Create/Append semantics is + * 1. ADL backend server does not allow idle connection for longer duration + * . In case of slow writer scenario, + * observed connection timeout/Connection reset causing occasional job + * failures. + * 2. Performance boost to jobs which are slow writer, avoided network latency + * 3. ADL equally better performing with multiple of 4MB chunk as append + * calls. + * + * @param f File path + * @param permission Access permission for the newly created file + * @param overwrite Remove existing file and recreate new one if true + * otherwise throw error if file exist + * @param bufferSize Buffer size, ADL backend does not honour + * @param replication Replication count, ADL backend does not honour + * @param blockSize Block size, ADL backend does not honour + * @param progress Progress indicator + * @return FSDataOutputStream OutputStream on which application can push + * stream of bytes + * @throws IOException when system error, internal server error or user error + */ + @Override + public FSDataOutputStream create(Path f, FsPermission permission, + boolean overwrite, int bufferSize, short replication, long blockSize, + Progressable progress) throws IOException { + statistics.incrementWriteOps(1); + IfExists overwriteRule = overwrite ? IfExists.OVERWRITE : IfExists.FAIL; + return new FSDataOutputStream(new AdlFsOutputStream(adlClient + .createFile(toRelativeFilePath(f), overwriteRule, + Integer.toOctalString(applyUMask(permission).toShort()), true), + getConf()), this.statistics); + } + + /** + * Opens an FSDataOutputStream at the indicated Path with write-progress + * reporting. Same as create(), except fails if parent directory doesn't + * already exist. + * + * @param f the file name to open + * @param permission Access permission for the newly created file + * @param flags {@link CreateFlag}s to use for this stream. + * @param bufferSize the size of the buffer to be used. ADL backend does + * not honour + * @param replication required block replication for the file. ADL backend + * does not honour + * @param blockSize Block size, ADL backend does not honour + * @param progress Progress indicator + * @throws IOException when system error, internal server error or user error + * @see #setPermission(Path, FsPermission) + * @deprecated API only for 0.20-append + */ + @Deprecated + @Override + public FSDataOutputStream createNonRecursive(Path f, FsPermission permission, + EnumSet flags, int bufferSize, short replication, + long blockSize, Progressable progress) throws IOException { + statistics.incrementWriteOps(1); + IfExists overwriteRule = IfExists.FAIL; + for (CreateFlag flag : flags) { + if (flag == CreateFlag.OVERWRITE) { + overwriteRule = IfExists.OVERWRITE; + break; + } + } + + return new FSDataOutputStream(new AdlFsOutputStream(adlClient + .createFile(toRelativeFilePath(f), overwriteRule, + Integer.toOctalString(applyUMask(permission).toShort()), false), + getConf()), this.statistics); + } + + /** + * Append to an existing file (optional operation). + * + * @param f the existing file to be appended. + * @param bufferSize the size of the buffer to be used. ADL backend does + * not honour + * @param progress Progress indicator + * @throws IOException when system error, internal server error or user error + */ + @Override + public FSDataOutputStream append(Path f, int bufferSize, + Progressable progress) throws IOException { + statistics.incrementWriteOps(1); + return new FSDataOutputStream( + new AdlFsOutputStream(adlClient.getAppendStream(toRelativeFilePath(f)), + getConf()), this.statistics); + } + + /** + * Azure data lake does not support user configuration for data replication + * hence not leaving system to query on + * azure data lake. + * + * Stub implementation + * + * @param p Not honoured + * @param replication Not honoured + * @return True hard coded since ADL file system does not support + * replication configuration + * @throws IOException No exception would not thrown in this case however + * aligning with parent api definition. + */ + @Override + public boolean setReplication(final Path p, final short replication) + throws IOException { + statistics.incrementWriteOps(1); + return true; + } + + /** + * Open call semantic is handled differently in case of ADL. Instead of + * network stream is returned to the user, + * Overridden FsInputStream is returned. + * + * @param f File path + * @param buffersize Buffer size, Not honoured + * @return FSDataInputStream InputStream on which application can read + * stream of bytes + * @throws IOException when system error, internal server error or user error + */ + @Override + public FSDataInputStream open(final Path f, final int buffersize) + throws IOException { + statistics.incrementReadOps(1); + return new FSDataInputStream( + new AdlFsInputStream(adlClient.getReadStream(toRelativeFilePath(f)), + statistics, getConf())); + } + + /** + * Return a file status object that represents the path. + * + * @param f The path we want information from + * @return a FileStatus object + * @throws IOException when the path does not exist or any other error; + * IOException see specific implementation + */ + @Override + public FileStatus getFileStatus(final Path f) throws IOException { + statistics.incrementReadOps(1); + DirectoryEntry entry = adlClient.getDirectoryEntry(toRelativeFilePath(f)); + return toFileStatus(entry, f); + } + + /** + * List the statuses of the files/directories in the given path if the path is + * a directory. + * + * @param f given path + * @return the statuses of the files/directories in the given patch + * @throws IOException when the path does not exist or any other error; + * IOException see specific implementation + */ + @Override + public FileStatus[] listStatus(final Path f) throws IOException { + statistics.incrementReadOps(1); + List entries = + adlClient.enumerateDirectory(toRelativeFilePath(f)); + return toFileStatuses(entries, f); + } + + /** + * Renames Path src to Path dst. Can take place on local fs + * or remote DFS. + * + * ADLS support POSIX standard for rename operation. + * + * @param src path to be renamed + * @param dst new path after rename + * @return true if rename is successful + * @throws IOException on failure + */ + @Override + public boolean rename(final Path src, final Path dst) throws IOException { + statistics.incrementWriteOps(1); + return adlClient.rename(toRelativeFilePath(src), toRelativeFilePath(dst)); + } + + @Override + @Deprecated + public void rename(final Path src, final Path dst, + final Options.Rename... options) throws IOException { + statistics.incrementWriteOps(1); + boolean overwrite = false; + for (Rename renameOption : options) { + if (renameOption == Rename.OVERWRITE) { + overwrite = true; + break; + } + } + adlClient + .rename(toRelativeFilePath(src), toRelativeFilePath(dst), overwrite); + } + + /** + * Concat existing files together. + * + * @param trg the path to the target destination. + * @param srcs the paths to the sources to use for the concatenation. + * @throws IOException when system error, internal server error or user error + */ + @Override + public void concat(final Path trg, final Path[] srcs) throws IOException { + statistics.incrementWriteOps(1); + List sourcesList = new ArrayList(); + for (Path entry : srcs) { + sourcesList.add(toRelativeFilePath(entry)); + } + adlClient.concatenateFiles(toRelativeFilePath(trg), sourcesList); + } + + /** + * Delete a file. + * + * @param path the path to delete. + * @param recursive if path is a directory and set to + * true, the directory is deleted else throws an exception. + * In case of a file the recursive can be set to either + * true or false. + * @return true if delete is successful else false. + * @throws IOException when system error, internal server error or user error + */ + @Override + public boolean delete(final Path path, final boolean recursive) + throws IOException { + statistics.incrementWriteOps(1); + return recursive ? + adlClient.deleteRecursive(toRelativeFilePath(path)) : + adlClient.delete(toRelativeFilePath(path)); + } + + /** + * Make the given file and all non-existent parents into + * directories. Has the semantics of Unix 'mkdir -p'. + * Existence of the directory hierarchy is not an error. + * + * @param path path to create + * @param permission to apply to path + */ + @Override + public boolean mkdirs(final Path path, final FsPermission permission) + throws IOException { + statistics.incrementWriteOps(1); + return adlClient.createDirectory(toRelativeFilePath(path), + Integer.toOctalString(applyUMask(permission).toShort())); + } + + private FileStatus[] toFileStatuses(final List entries, + final Path parent) { + FileStatus[] fileStatuses = new FileStatus[entries.size()]; + int index = 0; + for (DirectoryEntry entry : entries) { + FileStatus status = toFileStatus(entry, parent); + if (!(entry.name == null || entry.name == "")) { + status.setPath( + new Path(parent.makeQualified(uri, workingDirectory), entry.name)); + } + + fileStatuses[index++] = status; + } + + return fileStatuses; + } + + private FsPermission applyUMask(FsPermission permission) { + if (permission == null) { + permission = FsPermission.getDefault(); + } + return permission.applyUMask(FsPermission.getUMask(getConf())); + } + + private FileStatus toFileStatus(final DirectoryEntry entry, final Path f) { + boolean isDirectory = entry.type == DirectoryEntryType.DIRECTORY; + long lastModificationData = entry.lastModifiedTime.getTime(); + long lastAccessTime = entry.lastAccessTime.getTime(); + FsPermission permission = new AdlPermission(aclBitStatus, + Short.valueOf(entry.permission, 8)); + String user = entry.user; + String group = entry.group; + + FileStatus status; + if (overrideOwner) { + status = new FileStatus(entry.length, isDirectory, ADL_REPLICATION_FACTOR, + ADL_BLOCK_SIZE, lastModificationData, lastAccessTime, permission, + userName, "hdfs", this.makeQualified(f)); + } else { + status = new FileStatus(entry.length, isDirectory, ADL_REPLICATION_FACTOR, + ADL_BLOCK_SIZE, lastModificationData, lastAccessTime, permission, + user, group, this.makeQualified(f)); + } + + return status; + } + + /** + * Set owner of a path (i.e. a file or a directory). + * The parameters owner and group cannot both be null. + * + * @param path The path + * @param owner If it is null, the original username remains unchanged. + * @param group If it is null, the original groupname remains unchanged. + */ + @Override + public void setOwner(final Path path, final String owner, final String group) + throws IOException { + statistics.incrementWriteOps(1); + adlClient.setOwner(toRelativeFilePath(path), owner, group); + } + + /** + * Set permission of a path. + * + * @param path The path + * @param permission Access permission + */ + @Override + public void setPermission(final Path path, final FsPermission permission) + throws IOException { + statistics.incrementWriteOps(1); + adlClient.setPermission(toRelativeFilePath(path), + Integer.toOctalString(permission.toShort())); + } + + /** + * Modifies ACL entries of files and directories. This method can add new ACL + * entries or modify the permissions on existing ACL entries. All existing + * ACL entries that are not specified in this call are retained without + * changes. (Modifications are merged into the current ACL.) + * + * @param path Path to modify + * @param aclSpec List of AclEntry describing modifications + * @throws IOException if an ACL could not be modified + */ + @Override + public void modifyAclEntries(final Path path, final List aclSpec) + throws IOException { + statistics.incrementWriteOps(1); + List msAclEntries = new + ArrayList(); + for (AclEntry aclEntry : aclSpec) { + msAclEntries.add(com.microsoft.azure.datalake.store.acl.AclEntry + .parseAclEntry(aclEntry.toString())); + } + adlClient.modifyAclEntries(toRelativeFilePath(path), msAclEntries); + } + + /** + * Removes ACL entries from files and directories. Other ACL entries are + * retained. + * + * @param path Path to modify + * @param aclSpec List of AclEntry describing entries to remove + * @throws IOException if an ACL could not be modified + */ + @Override + public void removeAclEntries(final Path path, final List aclSpec) + throws IOException { + statistics.incrementWriteOps(1); + List msAclEntries = new + ArrayList(); + for (AclEntry aclEntry : aclSpec) { + msAclEntries.add(com.microsoft.azure.datalake.store.acl.AclEntry + .parseAclEntry(aclEntry.toString(), true)); + } + adlClient.removeAclEntries(toRelativeFilePath(path), msAclEntries); + } + + /** + * Removes all default ACL entries from files and directories. + * + * @param path Path to modify + * @throws IOException if an ACL could not be modified + */ + @Override + public void removeDefaultAcl(final Path path) throws IOException { + statistics.incrementWriteOps(1); + adlClient.removeDefaultAcls(toRelativeFilePath(path)); + } + + /** + * Removes all but the base ACL entries of files and directories. The entries + * for user, group, and others are retained for compatibility with permission + * bits. + * + * @param path Path to modify + * @throws IOException if an ACL could not be removed + */ + @Override + public void removeAcl(final Path path) throws IOException { + statistics.incrementWriteOps(1); + adlClient.removeAllAcls(toRelativeFilePath(path)); + } + + /** + * Fully replaces ACL of files and directories, discarding all existing + * entries. + * + * @param path Path to modify + * @param aclSpec List of AclEntry describing modifications, must include + * entries for user, group, and others for compatibility with + * permission bits. + * @throws IOException if an ACL could not be modified + */ + @Override + public void setAcl(final Path path, final List aclSpec) + throws IOException { + statistics.incrementWriteOps(1); + List msAclEntries = new + ArrayList(); + for (AclEntry aclEntry : aclSpec) { + msAclEntries.add(com.microsoft.azure.datalake.store.acl.AclEntry + .parseAclEntry(aclEntry.toString())); + } + + adlClient.setAcl(toRelativeFilePath(path), msAclEntries); + } + + /** + * Gets the ACL of a file or directory. + * + * @param path Path to get + * @return AclStatus describing the ACL of the file or directory + * @throws IOException if an ACL could not be read + */ + @Override + public AclStatus getAclStatus(final Path path) throws IOException { + statistics.incrementReadOps(1); + com.microsoft.azure.datalake.store.acl.AclStatus adlStatus = adlClient + .getAclStatus(toRelativeFilePath(path)); + AclStatus.Builder aclStatusBuilder = new AclStatus.Builder(); + aclStatusBuilder.owner(adlStatus.owner); + aclStatusBuilder.group(adlStatus.group); + aclStatusBuilder.setPermission( + new FsPermission(Short.valueOf(adlStatus.octalPermissions, 8))); + aclStatusBuilder.stickyBit(adlStatus.stickyBit); + String aclListString = com.microsoft.azure.datalake.store.acl.AclEntry + .aclListToString(adlStatus.aclSpec); + List aclEntries = AclEntry.parseAclSpec(aclListString, true); + aclStatusBuilder.addEntries(aclEntries); + return aclStatusBuilder.build(); + } + + /** + * Checks if the user can access a path. The mode specifies which access + * checks to perform. If the requested permissions are granted, then the + * method returns normally. If access is denied, then the method throws an + * {@link AccessControlException}. + * + * @param path Path to check + * @param mode type of access to check + * @throws AccessControlException if access is denied + * @throws java.io.FileNotFoundException if the path does not exist + * @throws IOException see specific implementation + */ + @Override + public void access(final Path path, FsAction mode) throws IOException { + statistics.incrementReadOps(1); + if (!adlClient.checkAccess(toRelativeFilePath(path), mode.SYMBOL)) { + throw new AccessControlException("Access Denied : " + path.toString()); + } + } + + /** + * Return the {@link ContentSummary} of a given {@link Path}. + * + * @param f path to use + */ + @Override + public ContentSummary getContentSummary(Path f) throws IOException { + statistics.incrementReadOps(1); + com.microsoft.azure.datalake.store.ContentSummary msSummary = adlClient + .getContentSummary(toRelativeFilePath(f)); + return new Builder().length(msSummary.length) + .directoryCount(msSummary.directoryCount).fileCount(msSummary.fileCount) + .spaceConsumed(msSummary.spaceConsumed).build(); + } + + @VisibleForTesting + protected String getTransportScheme() { + return SECURE_TRANSPORT_SCHEME; + } + + @VisibleForTesting + String toRelativeFilePath(Path path) { + return path.makeQualified(uri, workingDirectory).toUri().getPath(); + } + + /** + * Get the current working directory for the given file system. + * + * @return the directory pathname + */ + @Override + public Path getWorkingDirectory() { + return workingDirectory; + } + + /** + * Set the current working directory for the given file system. All relative + * paths will be resolved relative to it. + * + * @param dir Working directory path. + */ + @Override + public void setWorkingDirectory(final Path dir) { + if (dir == null) { + throw new InvalidPathException("Working directory cannot be set to NULL"); + } + + /** + * Do not validate the scheme and URI of the passsed parameter. When Adls + * runs as additional file system, working directory set has the default + * file system scheme and uri. + * + * Found a problem during PIG execution in + * https://github.com/apache/pig/blob/branch-0 + * .15/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer + * /PigInputFormat.java#L235 + * However similar problem would be present in other application so + * defaulting to build working directory using relative path only. + */ + this.workingDirectory = this.makeAbsolute(dir); + } + + /** + * Return the number of bytes that large input files should be optimally + * be split into to minimize i/o time. + * + * @deprecated use {@link #getDefaultBlockSize(Path)} instead + */ + @Deprecated + public long getDefaultBlockSize() { + return ADL_BLOCK_SIZE; + } + + /** + * Return the number of bytes that large input files should be optimally + * be split into to minimize i/o time. The given path will be used to + * locate the actual filesystem. The full path does not have to exist. + * + * @param f path of file + * @return the default block size for the path's filesystem + */ + public long getDefaultBlockSize(Path f) { + return getDefaultBlockSize(); + } + + /** + * Get the block size. + * @param f the filename + * @return the number of bytes in a block + */ + /** + * @deprecated Use getFileStatus() instead + */ + @Deprecated + public long getBlockSize(Path f) throws IOException { + return ADL_BLOCK_SIZE; + } + + @Override + public BlockLocation[] getFileBlockLocations(final FileStatus status, + final long offset, final long length) throws IOException { + if (status == null) { + return null; + } + + if ((offset < 0) || (length < 0)) { + throw new IllegalArgumentException("Invalid start or len parameter"); + } + + if (status.getLen() < offset) { + return new BlockLocation[0]; + } + + final String[] name = {"localhost"}; + final String[] host = {"localhost"}; + long blockSize = ADL_BLOCK_SIZE; + int numberOfLocations = + (int) (length / blockSize) + ((length % blockSize == 0) ? 0 : 1); + BlockLocation[] locations = new BlockLocation[numberOfLocations]; + for (int i = 0; i < locations.length; i++) { + long currentOffset = offset + (i * blockSize); + long currentLength = Math.min(blockSize, offset + length - currentOffset); + locations[i] = new BlockLocation(name, host, currentOffset, + currentLength); + } + + return locations; + } + + @Override + public BlockLocation[] getFileBlockLocations(final Path p, final long offset, + final long length) throws IOException { + // read ops incremented in getFileStatus + FileStatus fileStatus = getFileStatus(p); + return getFileBlockLocations(fileStatus, offset, length); + } + + /** + * Get replication. + * + * @param src file name + * @return file replication + * @deprecated Use getFileStatus() instead + */ + @Deprecated + public short getReplication(Path src) { + return ADL_REPLICATION_FACTOR; + } + + private Path makeAbsolute(Path path) { + return path.isAbsolute() ? path : new Path(this.workingDirectory, path); + } + + private static String getNonEmptyVal(Configuration conf, String key) { + String value = conf.get(key); + if (StringUtils.isEmpty(value)) { + throw new IllegalArgumentException( + "No value for " + key + " found in conf file."); + } + return value; + } + +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFsInputStream.java b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFsInputStream.java new file mode 100644 index 00000000000..5248cbf37a0 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFsInputStream.java @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.adl; + +import com.microsoft.azure.datalake.store.ADLFileInputStream; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSInputStream; +import org.apache.hadoop.fs.FileSystem.Statistics; + +import java.io.IOException; + +import static org.apache.hadoop.fs.adl.AdlConfKeys + .ADL_EXPERIMENT_POSITIONAL_READ_DEFAULT; +import static org.apache.hadoop.fs.adl.AdlConfKeys + .ADL_EXPERIMENT_POSITIONAL_READ_KEY; +import static org.apache.hadoop.fs.adl.AdlConfKeys + .DEFAULT_READ_AHEAD_BUFFER_SIZE; +import static org.apache.hadoop.fs.adl.AdlConfKeys.READ_AHEAD_BUFFER_SIZE_KEY; + +/** + * Wraps {@link ADLFileInputStream} implementation. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public final class AdlFsInputStream extends FSInputStream { + + private final ADLFileInputStream in; + private final Statistics stat; + private final boolean enablePositionalReadExperiment; + + public AdlFsInputStream(ADLFileInputStream inputStream, Statistics statistics, + Configuration conf) throws IOException { + this.in = inputStream; + this.in.setBufferSize(conf.getInt(READ_AHEAD_BUFFER_SIZE_KEY, + DEFAULT_READ_AHEAD_BUFFER_SIZE)); + enablePositionalReadExperiment = conf + .getBoolean(ADL_EXPERIMENT_POSITIONAL_READ_KEY, + ADL_EXPERIMENT_POSITIONAL_READ_DEFAULT); + stat = statistics; + } + + @Override + public synchronized void seek(long pos) throws IOException { + in.seek(pos); + } + + /** + * Return the current offset from the start of the file. + */ + @Override + public synchronized long getPos() throws IOException { + return in.getPos(); + } + + @Override + public boolean seekToNewSource(long l) throws IOException { + return false; + } + + @Override + public synchronized int read() throws IOException { + int ch = in.read(); + if (stat != null && ch != -1) { + stat.incrementBytesRead(1); + } + return ch; + } + + @Override + public int read(long position, byte[] buffer, int offset, int length) + throws IOException { + int numberOfByteRead = 0; + if (enablePositionalReadExperiment) { + numberOfByteRead = in.read(position, buffer, offset, length); + } else { + numberOfByteRead = super.read(position, buffer, offset, length); + } + + if (stat != null && numberOfByteRead > 0) { + stat.incrementBytesRead(numberOfByteRead); + } + return numberOfByteRead; + } + + @Override + public synchronized int read(byte[] buffer, int offset, int length) + throws IOException { + int numberOfByteRead = in.read(buffer, offset, length); + if (stat != null && numberOfByteRead > 0) { + stat.incrementBytesRead(numberOfByteRead); + } + return numberOfByteRead; + } + + /** + * This method returns the remaining bytes in the stream, rather than the + * expected Java + * interpretation of {@link java.io.InputStream#available()}, which expects + * the + * number of remaining + * bytes in the local buffer. Moreover, it caps the value returned to a + * maximum of Integer.MAX_VALUE. + * These changed behaviors are to ensure compatibility with the + * expectations of HBase WAL reader, + * which depends on available() returning the number of bytes in stream. + * + * Given all other FileSystems in the hadoop ecosystem (especially HDFS) do + * this, it is possible other + * apps other than HBase would also pick up expectation of this behavior + * based on HDFS implementation. + * Therefore keeping this quirky behavior here, to ensure compatibility. + * + * @return remaining bytes in the stream, with maximum of Integer.MAX_VALUE. + * @throws IOException If fails to get the position or file length from SDK. + */ + @Override + public synchronized int available() throws IOException { + return (int) Math.min(in.length() - in.getPos(), Integer.MAX_VALUE); + } + + @Override + public synchronized void close() throws IOException { + in.close(); + } + + @Override + public synchronized long skip(long pos) throws IOException { + return in.skip(pos); + } + +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFsOutputStream.java b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFsOutputStream.java new file mode 100644 index 00000000000..2b89fb0a732 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlFsOutputStream.java @@ -0,0 +1,82 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.adl; + +import com.microsoft.azure.datalake.store.ADLFileOutputStream; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Syncable; + +import java.io.IOException; +import java.io.OutputStream; + +import static org.apache.hadoop.fs.adl.AdlConfKeys + .DEFAULT_WRITE_AHEAD_BUFFER_SIZE; +import static org.apache.hadoop.fs.adl.AdlConfKeys.WRITE_BUFFER_SIZE_KEY; + +/** + * Wraps {@link com.microsoft.azure.datalake.store.ADLFileOutputStream} + * implementation. + * + * Flush semantics. + * no-op, since some parts of hadoop ecosystem call flush(), expecting it to + * have no perf impact. In hadoop filesystems, flush() itself guarantees no + * durability: that is achieved by calling hflush() or hsync() + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public final class AdlFsOutputStream extends OutputStream implements Syncable { + private final ADLFileOutputStream out; + + public AdlFsOutputStream(ADLFileOutputStream out, Configuration configuration) + throws IOException { + this.out = out; + out.setBufferSize(configuration + .getInt(WRITE_BUFFER_SIZE_KEY, DEFAULT_WRITE_AHEAD_BUFFER_SIZE)); + } + + @Override + public synchronized void write(int b) throws IOException { + out.write(b); + } + + @Override + public synchronized void write(byte[] b, int off, int len) + throws IOException { + out.write(b, off, len); + } + + @Override + public synchronized void close() throws IOException { + out.close(); + } + + public synchronized void sync() throws IOException { + out.flush(); + } + + public synchronized void hflush() throws IOException { + out.flush(); + } + + public synchronized void hsync() throws IOException { + out.flush(); + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlPermission.java b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlPermission.java new file mode 100644 index 00000000000..af3342a13e4 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/AdlPermission.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl; + +import org.apache.hadoop.fs.permission.FsPermission; + +/** + * Hadoop shell command -getfacl does not invoke getAclStatus if FsPermission + * from getFileStatus has not set ACL bit to true. By default getAclBit returns + * false. + * + * Provision to make additional call to invoke getAclStatus would be redundant + * when adls is running as additional FS. To avoid this redundancy, provided + * configuration to return true/false on getAclBit. + */ +class AdlPermission extends FsPermission { + private final boolean aclBit; + + AdlPermission(boolean aclBitStatus, Short aShort) { + super(aShort); + this.aclBit = aclBitStatus; + } + + /** + * Returns true if "adl.feature.support.acl.bit" configuration is set to + * true. + * + * If configuration is not set then default value is true. + * + * @return If configuration is not set then default value is true. + */ + public boolean getAclBit() { + return aclBit; + } + + @Override + public boolean equals(Object obj) { + if (obj instanceof FsPermission) { + FsPermission that = (FsPermission) obj; + return this.getUserAction() == that.getUserAction() + && this.getGroupAction() == that.getGroupAction() + && this.getOtherAction() == that.getOtherAction() + && this.getStickyBit() == that.getStickyBit(); + } + return false; + } + + @Override + public int hashCode() { + return toShort(); + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/SdkTokenProviderAdapter.java b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/SdkTokenProviderAdapter.java new file mode 100644 index 00000000000..7b107aef972 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/SdkTokenProviderAdapter.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.adl; + +import com.microsoft.azure.datalake.store.oauth2.AccessTokenProvider; +import com.microsoft.azure.datalake.store.oauth2.AzureADToken; +import org.apache.hadoop.fs.adl.oauth2.AzureADTokenProvider; + +import java.io.IOException; + +final class SdkTokenProviderAdapter extends AccessTokenProvider { + + private AzureADTokenProvider tokenProvider; + + SdkTokenProviderAdapter(AzureADTokenProvider tp) { + this.tokenProvider = tp; + } + + protected AzureADToken refreshToken() throws IOException { + AzureADToken azureADToken = new AzureADToken(); + azureADToken.accessToken = tokenProvider.getAccessToken(); + azureADToken.expiry = tokenProvider.getExpiryTime(); + return azureADToken; + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/TokenProviderType.java b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/TokenProviderType.java new file mode 100644 index 00000000000..9fd4f4f46b1 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/TokenProviderType.java @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.adl; + +enum TokenProviderType { + RefreshToken, + ClientCredential, + Custom +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/oauth2/AzureADTokenProvider.java b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/oauth2/AzureADTokenProvider.java new file mode 100644 index 00000000000..a0b3922502b --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/oauth2/AzureADTokenProvider.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.adl.oauth2; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; + +import java.io.IOException; +import java.util.Date; + +/** + * Provide an Azure Active Directory supported + * OAuth2 access token to be used to authenticate REST calls against Azure data + * lake file system {@link org.apache.hadoop.fs.adl.AdlFileSystem}. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public abstract class AzureADTokenProvider { + + /** + * Initialize with supported configuration. This method is invoked when the + * {@link org.apache.hadoop.fs.adl.AdlFileSystem#initialize + * (URI, Configuration)} method is invoked. + * + * @param configuration Configuration object + * @throws IOException if instance can not be configured. + */ + public abstract void initialize(Configuration configuration) + throws IOException; + + /** + * Obtain the access token that should be added to https connection's header. + * Will be called depending upon {@link #getExpiryTime()} expiry time is set, + * so implementations should be performant. Implementations are responsible + * for any refreshing of the token. + * + * @return String containing the access token + * @throws IOException if there is an error fetching the token + */ + public abstract String getAccessToken() throws IOException; + + /** + * Obtain expiry time of the token. If implementation is performant enough to + * maintain expiry and expect {@link #getAccessToken()} call for every + * connection then safe to return current or past time. + * + * However recommended to use the token expiry time received from Azure Active + * Directory. + * + * @return Date to expire access token retrieved from AAD. + */ + public abstract Date getExpiryTime(); +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/oauth2/package-info.java b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/oauth2/package-info.java new file mode 100644 index 00000000000..16139413894 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/oauth2/package-info.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +/** + * public interface to expose OAuth2 authentication related features. + */ +package org.apache.hadoop.fs.adl.oauth2; \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/package-info.java b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/package-info.java new file mode 100644 index 00000000000..456eebcd8b8 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/fs/adl/package-info.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +/** + * Supporting classes for metrics instrumentation. + */ +package org.apache.hadoop.fs.adl; \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure-datalake/src/main/resources/META-INF/org.apache.hadoop.fs.FileSystem b/hadoop-tools/hadoop-azure-datalake/src/main/resources/META-INF/org.apache.hadoop.fs.FileSystem new file mode 100644 index 00000000000..7ec7812295d --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/main/resources/META-INF/org.apache.hadoop.fs.FileSystem @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.hadoop.fs.adl.AdlFileSystem \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md b/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md new file mode 100644 index 00000000000..00825d101c5 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md @@ -0,0 +1,193 @@ + + +# Hadoop Azure Data Lake Support + +* [Introduction](#Introduction) +* [Features](#Features) +* [Limitations](#Limitations) +* [Usage](#Usage) + * [Concepts](#Concepts) + * [OAuth2 Support](#OAuth2_Support) + * [Configuring Credentials & FileSystem](#Configuring_Credentials) + * [Using Refresh Token](#Refresh_Token) + * [Using Client Keys](#Client_Credential_Token) + * [Enabling ADL Filesystem](#Enabling_ADL) + * [Accessing adl URLs](#Accessing_adl_URLs) +* [Testing the hadoop-azure Module](#Testing_the_hadoop-azure_Module) + +## Introduction + +The hadoop-azure-datalake module provides support for integration with +[Azure Data Lake Store]( https://azure.microsoft.com/en-in/documentation/services/data-lake-store/). +The jar file is named azure-datalake-store.jar. + +## Features + +* Read and write data stored in an Azure Data Lake Storage account. +* Reference file system paths using URLs using the `adl` scheme for Secure Webhdfs i.e. SSL + encrypted access. +* Can act as a source of data in a MapReduce job, or a sink. +* Tested on both Linux and Windows. +* Tested for scale. + +## Limitations +Partial or no support for the following operations : + +* Operation on Symbolic Link +* Proxy Users +* File Truncate +* File Checksum +* File replication factor +* Home directory the active user on Hadoop cluster. +* Extended Attributes(XAttrs) Operations +* Snapshot Operations +* Delegation Token Operations +* User and group information returned as ListStatus and GetFileStatus is in form of GUID associated in Azure Active Directory. + +## Usage + +### Concepts +Azure Data Lake Storage access path syntax is + + adl://.azuredatalakestore.net/ + +Get started with azure data lake account with [https://azure.microsoft.com/en-in/documentation/articles/data-lake-store-get-started-portal/](https://azure.microsoft.com/en-in/documentation/articles/data-lake-store-get-started-portal/) + +#### OAuth2 Support +Usage of Azure Data Lake Storage requires OAuth2 bearer token to be present as part of the HTTPS header as per OAuth2 specification. Valid OAuth2 bearer token should be obtained from Azure Active Directory for valid users who have access to Azure Data Lake Storage Account. + +Azure Active Directory (Azure AD) is Microsoft's multi-tenant cloud based directory and identity management service. See [https://azure.microsoft.com/en-in/documentation/articles/active-directory-whatis/](https://azure.microsoft.com/en-in/documentation/articles/active-directory-whatis/) + +Following sections describes on OAuth2 configuration in core-site.xml. + +## Configuring Credentials & FileSystem +Credentials can be configured using either a refresh token (associated with a user) or a client credential (analogous to a service principal). + +### Using Refresh Token + +Add the following properties to your core-site.xml + + + dfs.adls.oauth2.access.token.provider.type + RefreshToken + + +Application require to set Client id and OAuth2 refresh token from Azure Active Directory associated with client id. See [https://github.com/AzureAD/azure-activedirectory-library-for-java](https://github.com/AzureAD/azure-activedirectory-library-for-java). + +**Do not share client id and refresh token, it must be kept secret.** + + + dfs.adls.oauth2.client.id + + + + + dfs.adls.oauth2.refresh.token + + + + +### Using Client Keys + +#### Generating the Service Principal +1. Go to the portal (https://portal.azure.com) +2. Under "Browse", look for Active Directory and click on it. +3. Create "Web Application". Remember the name you create here - that is what you will add to your ADL account as authorized user. +4. Go through the wizard +5. Once app is created, Go to app configuration, and find the section on "keys" +6. Select a key duration and hit save. Save the generated keys. +7. Note down the properties you will need to auth: + - The client ID + - The key you just generated above + - The token endpoint (select "View endpoints" at the bottom of the page and copy/paste the OAuth2 .0 Token Endpoint value) + - Resource: Always https://management.core.windows.net/ , for all customers + +#### Adding the service principal to your ADL Account +1. Go to the portal again, and open your ADL account +2. Select Users under Settings +3. Add your user name you created in Step 6 above (note that it does not show up in the list, but will be found if you searched for the name) +4. Add "Owner" role + +#### Configure core-site.xml +Add the following properties to your core-site.xml + + + dfs.adls.oauth2.refresh.url + TOKEN ENDPOINT FROM STEP 7 ABOVE + + + + dfs.adls.oauth2.client.id + CLIENT ID FROM STEP 7 ABOVE + + + + dfs.adls.oauth2.credential + PASSWORD FROM STEP 7 ABOVE + + + + +## Enabling ADL Filesystem + +For ADL FileSystem to take effect. Update core-site.xml with + + + fs.adl.impl + org.apache.hadoop.fs.adl.AdlFileSystem + + + + fs.AbstractFileSystem.adl.impl + org.apache.hadoop.fs.adl.Adl + + + +### Accessing adl URLs + +After credentials are configured in core-site.xml, any Hadoop component may +reference files in that Azure Data Lake Storage account by using URLs of the following +format: + + adl://.azuredatalakestore.net/ + +The schemes `adl` identify a URL on a file system backed by Azure +Data Lake Storage. `adl` utilizes encrypted HTTPS access for all interaction with +the Azure Data Lake Storage API. + +For example, the following +[FileSystem Shell](../hadoop-project-dist/hadoop-common/FileSystemShell.html) +commands demonstrate access to a storage account named `youraccount`. + + > hadoop fs -mkdir adl://yourcontainer.azuredatalakestore.net/testDir + + > hadoop fs -put testFile adl://yourcontainer.azuredatalakestore.net/testDir/testFile + + > hadoop fs -cat adl://yourcontainer.azuredatalakestore.net/testDir/testFile + test file content +## Testing the azure-datalake-store Module +The hadoop-azure module includes a full suite of unit tests. Most of the tests will run without additional configuration by running mvn test. This includes tests against mocked storage, which is an in-memory emulation of Azure Data Lake Storage. + +A selection of tests can run against the Azure Data Lake Storage. To run tests against Adl storage. Please configure contract-test-options.xml with Adl account information mentioned in the above sections. Also turn on contract test execution flag to trigger tests against Azure Data Lake Storage. + + + dfs.adl.test.contract.enable + true + + + + test.fs.adl.name + adl://yourcontainer.azuredatalakestore.net + diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/AdlMockWebServer.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/AdlMockWebServer.java new file mode 100644 index 00000000000..55c8f812c25 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/AdlMockWebServer.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.adl; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.adl.common.CustomMockTokenProvider; +import org.apache.hadoop.fs.adl.oauth2.AzureADTokenProvider; +import static org.apache.hadoop.fs.adl.AdlConfKeys + .AZURE_AD_TOKEN_PROVIDER_CLASS_KEY; + +import com.squareup.okhttp.mockwebserver.MockWebServer; + +import org.junit.After; +import org.junit.Before; + +/** + * Mock server to simulate Adls backend calls. This infrastructure is expandable + * to override expected server response based on the derived test functionality. + * Common functionality to generate token information before request is send to + * adls backend is also managed within AdlMockWebServer implementation using + * {@link org.apache.hadoop.fs.adl.common.CustomMockTokenProvider}. + */ +public class AdlMockWebServer { + // Create a MockWebServer. These are lean enough that you can create a new + // instance for every unit test. + private MockWebServer server = null; + private TestableAdlFileSystem fs = null; + private int port = 0; + private Configuration conf = new Configuration(); + + public MockWebServer getMockServer() { + return server; + } + + public TestableAdlFileSystem getMockAdlFileSystem() { + return fs; + } + + public int getPort() { + return port; + } + + public Configuration getConf() { + return conf; + } + + public void setConf(Configuration conf) { + this.conf = conf; + } + + @Before + public void preTestSetup() throws IOException, URISyntaxException { + server = new MockWebServer(); + + // Start the server. + server.start(); + + // Ask the server for its URL. You'll need this to make HTTP requests. + URL baseUrl = server.getUrl(""); + port = baseUrl.getPort(); + + // Exercise your application code, which should make those HTTP requests. + // Responses are returned in the same order that they are enqueued. + fs = new TestableAdlFileSystem(); + + conf.setClass(AZURE_AD_TOKEN_PROVIDER_CLASS_KEY, + CustomMockTokenProvider.class, AzureADTokenProvider.class); + + URI uri = new URI("adl://localhost:" + port); + fs.initialize(uri, conf); + } + + @After + public void postTestSetup() throws IOException { + fs.close(); + server.shutdown(); + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestACLFeatures.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestACLFeatures.java new file mode 100644 index 00000000000..b420daae337 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestACLFeatures.java @@ -0,0 +1,262 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.AclEntry; +import org.apache.hadoop.fs.permission.AclEntryScope; +import org.apache.hadoop.fs.permission.AclEntryType; +import org.apache.hadoop.fs.permission.AclStatus; +import org.apache.hadoop.fs.permission.FsAction; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.security.AccessControlException; + +import com.squareup.okhttp.mockwebserver.MockResponse; + +import org.junit.Assert; +import org.junit.Test; + +/** + * Stub adl server and test acl data conversion within SDK and Hadoop adl + * client. + */ +public class TestACLFeatures extends AdlMockWebServer { + + @Test(expected=AccessControlException.class) + public void testModifyAclEntries() throws URISyntaxException, IOException { + getMockServer().enqueue(new MockResponse().setResponseCode(200)); + List entries = new ArrayList(); + AclEntry.Builder aclEntryBuilder = new AclEntry.Builder(); + aclEntryBuilder.setName("hadoop"); + aclEntryBuilder.setType(AclEntryType.USER); + aclEntryBuilder.setPermission(FsAction.ALL); + aclEntryBuilder.setScope(AclEntryScope.ACCESS); + entries.add(aclEntryBuilder.build()); + + aclEntryBuilder.setName("hdfs"); + aclEntryBuilder.setType(AclEntryType.GROUP); + aclEntryBuilder.setPermission(FsAction.READ_WRITE); + aclEntryBuilder.setScope(AclEntryScope.DEFAULT); + entries.add(aclEntryBuilder.build()); + + getMockAdlFileSystem().modifyAclEntries(new Path("/test1/test2"), entries); + + getMockServer().enqueue(new MockResponse().setResponseCode(403) + .setBody(TestADLResponseData.getAccessControlException())); + + getMockAdlFileSystem() + .modifyAclEntries(new Path("/test1/test2"), entries); + } + + @Test(expected=AccessControlException.class) + public void testRemoveAclEntriesWithOnlyUsers() + throws URISyntaxException, IOException { + getMockServer().enqueue(new MockResponse().setResponseCode(200)); + List entries = new ArrayList(); + AclEntry.Builder aclEntryBuilder = new AclEntry.Builder(); + aclEntryBuilder.setName("hadoop"); + aclEntryBuilder.setType(AclEntryType.USER); + entries.add(aclEntryBuilder.build()); + + getMockAdlFileSystem().removeAclEntries(new Path("/test1/test2"), entries); + + getMockServer().enqueue(new MockResponse().setResponseCode(403) + .setBody(TestADLResponseData.getAccessControlException())); + + getMockAdlFileSystem() + .removeAclEntries(new Path("/test1/test2"), entries); + } + + @Test(expected=AccessControlException.class) + public void testRemoveAclEntries() throws URISyntaxException, IOException { + getMockServer().enqueue(new MockResponse().setResponseCode(200)); + List entries = new ArrayList(); + AclEntry.Builder aclEntryBuilder = new AclEntry.Builder(); + aclEntryBuilder.setName("hadoop"); + aclEntryBuilder.setType(AclEntryType.USER); + aclEntryBuilder.setPermission(FsAction.ALL); + aclEntryBuilder.setScope(AclEntryScope.ACCESS); + entries.add(aclEntryBuilder.build()); + + aclEntryBuilder.setName("hdfs"); + aclEntryBuilder.setType(AclEntryType.GROUP); + aclEntryBuilder.setPermission(FsAction.READ_WRITE); + aclEntryBuilder.setScope(AclEntryScope.DEFAULT); + entries.add(aclEntryBuilder.build()); + + getMockAdlFileSystem().removeAclEntries(new Path("/test1/test2"), entries); + + getMockServer().enqueue(new MockResponse().setResponseCode(403) + .setBody(TestADLResponseData.getAccessControlException())); + + getMockAdlFileSystem() + .removeAclEntries(new Path("/test1/test2"), entries); + } + + @Test(expected=AccessControlException.class) + public void testRemoveDefaultAclEntries() + throws URISyntaxException, IOException { + getMockServer().enqueue(new MockResponse().setResponseCode(200)); + getMockAdlFileSystem().removeDefaultAcl(new Path("/test1/test2")); + + getMockServer().enqueue(new MockResponse().setResponseCode(403) + .setBody(TestADLResponseData.getAccessControlException())); + + getMockAdlFileSystem().removeDefaultAcl(new Path("/test1/test2")); + } + + @Test(expected=AccessControlException.class) + public void testRemoveAcl() throws URISyntaxException, IOException { + getMockServer().enqueue(new MockResponse().setResponseCode(200)); + getMockAdlFileSystem().removeAcl(new Path("/test1/test2")); + + getMockServer().enqueue(new MockResponse().setResponseCode(403) + .setBody(TestADLResponseData.getAccessControlException())); + + getMockAdlFileSystem().removeAcl(new Path("/test1/test2")); + } + + @Test(expected=AccessControlException.class) + public void testSetAcl() throws URISyntaxException, IOException { + getMockServer().enqueue(new MockResponse().setResponseCode(200)); + List entries = new ArrayList(); + AclEntry.Builder aclEntryBuilder = new AclEntry.Builder(); + aclEntryBuilder.setName("hadoop"); + aclEntryBuilder.setType(AclEntryType.USER); + aclEntryBuilder.setPermission(FsAction.ALL); + aclEntryBuilder.setScope(AclEntryScope.ACCESS); + entries.add(aclEntryBuilder.build()); + + aclEntryBuilder.setName("hdfs"); + aclEntryBuilder.setType(AclEntryType.GROUP); + aclEntryBuilder.setPermission(FsAction.READ_WRITE); + aclEntryBuilder.setScope(AclEntryScope.DEFAULT); + entries.add(aclEntryBuilder.build()); + + getMockAdlFileSystem().setAcl(new Path("/test1/test2"), entries); + + getMockServer().enqueue(new MockResponse().setResponseCode(403) + .setBody(TestADLResponseData.getAccessControlException())); + + getMockAdlFileSystem().setAcl(new Path("/test1/test2"), entries); + } + + @Test(expected=AccessControlException.class) + public void testCheckAccess() throws URISyntaxException, IOException { + getMockServer().enqueue(new MockResponse().setResponseCode(200)); + getMockAdlFileSystem().access(new Path("/test1/test2"), FsAction.ALL); + + getMockServer().enqueue(new MockResponse().setResponseCode(200)); + getMockAdlFileSystem().access(new Path("/test1/test2"), FsAction.EXECUTE); + + getMockServer().enqueue(new MockResponse().setResponseCode(200)); + getMockAdlFileSystem().access(new Path("/test1/test2"), FsAction.READ); + + getMockServer().enqueue(new MockResponse().setResponseCode(200)); + getMockAdlFileSystem() + .access(new Path("/test1/test2"), FsAction.READ_EXECUTE); + + getMockServer().enqueue(new MockResponse().setResponseCode(200)); + getMockAdlFileSystem() + .access(new Path("/test1/test2"), FsAction.READ_WRITE); + + getMockServer().enqueue(new MockResponse().setResponseCode(200)); + getMockAdlFileSystem().access(new Path("/test1/test2"), FsAction.NONE); + + getMockServer().enqueue(new MockResponse().setResponseCode(200)); + getMockAdlFileSystem().access(new Path("/test1/test2"), FsAction.WRITE); + + getMockServer().enqueue(new MockResponse().setResponseCode(200)); + getMockAdlFileSystem() + .access(new Path("/test1/test2"), FsAction.WRITE_EXECUTE); + + getMockServer().enqueue(new MockResponse().setResponseCode(403) + .setBody(TestADLResponseData.getAccessControlException())); + + getMockAdlFileSystem() + .access(new Path("/test1/test2"), FsAction.WRITE_EXECUTE); + } + + @Test(expected=AccessControlException.class) + public void testSetPermission() throws URISyntaxException, IOException { + getMockServer().enqueue(new MockResponse().setResponseCode(200)); + getMockAdlFileSystem() + .setPermission(new Path("/test1/test2"), FsPermission.getDefault()); + + getMockServer().enqueue(new MockResponse().setResponseCode(403) + .setBody(TestADLResponseData.getAccessControlException())); + + getMockAdlFileSystem() + .setPermission(new Path("/test1/test2"), FsPermission.getDefault()); + } + + @Test(expected=AccessControlException.class) + public void testSetOwner() throws URISyntaxException, IOException { + getMockServer().enqueue(new MockResponse().setResponseCode(200)); + getMockAdlFileSystem().setOwner(new Path("/test1/test2"), "hadoop", "hdfs"); + + getMockServer().enqueue(new MockResponse().setResponseCode(403) + .setBody(TestADLResponseData.getAccessControlException())); + + getMockAdlFileSystem() + .setOwner(new Path("/test1/test2"), "hadoop", "hdfs"); + } + + @Test + public void getAclStatusAsExpected() throws URISyntaxException, IOException { + getMockServer().enqueue(new MockResponse().setResponseCode(200) + .setBody(TestADLResponseData.getGetAclStatusJSONResponse())); + AclStatus aclStatus = getMockAdlFileSystem() + .getAclStatus(new Path("/test1/test2")); + Assert.assertEquals(aclStatus.getGroup(), "supergroup"); + Assert.assertEquals(aclStatus.getOwner(), "hadoop"); + Assert.assertEquals((Short) aclStatus.getPermission().toShort(), + Short.valueOf("775", 8)); + + for (AclEntry entry : aclStatus.getEntries()) { + if (!(entry.toString().equalsIgnoreCase("user:carla:rw-") || entry + .toString().equalsIgnoreCase("group::r-x"))) { + Assert.fail("Unexpected entry : " + entry.toString()); + } + } + } + + @Test(expected=FileNotFoundException.class) + public void getAclStatusNotExists() throws URISyntaxException, IOException { + getMockServer().enqueue(new MockResponse().setResponseCode(404) + .setBody(TestADLResponseData.getFileNotFoundException())); + + getMockAdlFileSystem().getAclStatus(new Path("/test1/test2")); + } + + @Test(expected=AccessControlException.class) + public void testAclStatusDenied() throws URISyntaxException, IOException { + getMockServer().enqueue(new MockResponse().setResponseCode(403) + .setBody(TestADLResponseData.getAccessControlException())); + + getMockAdlFileSystem().getAclStatus(new Path("/test1/test2")); + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestADLResponseData.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestADLResponseData.java new file mode 100644 index 00000000000..24eb31477ff --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestADLResponseData.java @@ -0,0 +1,147 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl; + +import org.apache.hadoop.fs.FileStatus; + +import java.util.Random; + +/** + * Mock up response data returned from Adl storage account. + */ +public final class TestADLResponseData { + + private TestADLResponseData() { + + } + + public static String getGetFileStatusJSONResponse(FileStatus status) { + return "{\"FileStatus\":{\"length\":" + status.getLen() + "," + + "\"pathSuffix\":\"\",\"type\":\"" + (status.isDirectory() ? + "DIRECTORY" : + "FILE") + "\"" + + ",\"blockSize\":" + status.getBlockSize() + ",\"accessTime\":" + + status.getAccessTime() + ",\"modificationTime\":" + status + .getModificationTime() + "" + + ",\"replication\":" + status.getReplication() + ",\"permission\":\"" + + status.getPermission() + "\",\"owner\":\"" + status.getOwner() + + "\",\"group\":\"" + status.getGroup() + "\"}}"; + } + + public static String getGetFileStatusJSONResponse() { + return getGetFileStatusJSONResponse(4194304); + } + + public static String getGetAclStatusJSONResponse() { + return "{\n" + " \"AclStatus\": {\n" + " \"entries\": [\n" + + " \"user:carla:rw-\", \n" + " \"group::r-x\"\n" + + " ], \n" + " \"group\": \"supergroup\", \n" + + " \"owner\": \"hadoop\", \n" + + " \"permission\":\"775\",\n" + " \"stickyBit\": false\n" + + " }\n" + "}"; + } + + public static String getGetFileStatusJSONResponse(long length) { + return "{\"FileStatus\":{\"length\":" + length + "," + + "\"pathSuffix\":\"\",\"type\":\"FILE\",\"blockSize\":268435456," + + "\"accessTime\":1452103827023,\"modificationTime\":1452103827023," + + "\"replication\":0,\"permission\":\"777\"," + + "\"owner\":\"NotSupportYet\",\"group\":\"NotSupportYet\"}}"; + } + + public static String getListFileStatusJSONResponse(int dirSize) { + String list = ""; + for (int i = 0; i < dirSize; ++i) { + list += "{\"length\":1024,\"pathSuffix\":\"" + java.util.UUID.randomUUID() + + "\",\"type\":\"FILE\",\"blockSize\":268435456," + + "\"accessTime\":1452103878833," + + "\"modificationTime\":1452103879190,\"replication\":0," + + "\"permission\":\"777\",\"owner\":\"NotSupportYet\"," + + "\"group\":\"NotSupportYet\"},"; + } + + list = list.substring(0, list.length() - 1); + return "{\"FileStatuses\":{\"FileStatus\":[" + list + "]}}"; + } + + public static String getJSONResponse(boolean status) { + return "{\"boolean\":" + status + "}"; + } + + public static String getErrorIllegalArgumentExceptionJSONResponse() { + return "{\n" + + " \"RemoteException\":\n" + + " {\n" + + " \"exception\" : \"IllegalArgumentException\",\n" + + " \"javaClassName\": \"java.lang.IllegalArgumentException\",\n" + + " \"message\" : \"Invalid\"" + + " }\n" + + "}"; + } + + public static String getErrorBadOffsetExceptionJSONResponse() { + return "{\n" + + " \"RemoteException\":\n" + + " {\n" + + " \"exception\" : \"BadOffsetException\",\n" + + " \"javaClassName\": \"org.apache.hadoop.fs.adl" + + ".BadOffsetException\",\n" + + " \"message\" : \"Invalid\"" + + " }\n" + + "}"; + } + + public static String getErrorInternalServerExceptionJSONResponse() { + return "{\n" + + " \"RemoteException\":\n" + + " {\n" + + " \"exception\" : \"RuntimeException\",\n" + + " \"javaClassName\": \"java.lang.RuntimeException\",\n" + + " \"message\" : \"Internal Server Error\"" + + " }\n" + + "}"; + } + + public static String getAccessControlException() { + return "{\n" + " \"RemoteException\":\n" + " {\n" + + " \"exception\" : \"AccessControlException\",\n" + + " \"javaClassName\": \"org.apache.hadoop.security" + + ".AccessControlException\",\n" + + " \"message\" : \"Permission denied: ...\"\n" + " }\n" + "}"; + } + + public static String getFileNotFoundException() { + return "{\n" + " \"RemoteException\":\n" + " {\n" + + " \"exception\" : \"FileNotFoundException\",\n" + + " \"javaClassName\": \"java.io.FileNotFoundException\",\n" + + " \"message\" : \"File does not exist\"\n" + " }\n" + "}"; + } + + public static byte[] getRandomByteArrayData() { + return getRandomByteArrayData(4 * 1024 * 1024); + } + + public static byte[] getRandomByteArrayData(int size) { + byte[] b = new byte[size]; + Random rand = new Random(); + rand.nextBytes(b); + return b; + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestAdlRead.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestAdlRead.java new file mode 100644 index 00000000000..734256a680f --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestAdlRead.java @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.adl; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.adl.common.Parallelized; +import org.apache.hadoop.fs.adl.common.TestDataForRead; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.ByteArrayInputStream; +import java.io.EOFException; +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.Random; + +import static org.apache.hadoop.fs.adl.AdlConfKeys.READ_AHEAD_BUFFER_SIZE_KEY; + +/** + * This class is responsible for stress positional reads vs number of network + * calls required by to fetch the amount of data. Test does ensure the data + * integrity and order of the data is maintained. + */ +@RunWith(Parallelized.class) +public class TestAdlRead extends AdlMockWebServer { + + private TestDataForRead testData; + + public TestAdlRead(TestDataForRead testData) { + Configuration configuration = new Configuration(); + configuration.setInt(READ_AHEAD_BUFFER_SIZE_KEY, 4 * 1024); + setConf(configuration); + this.testData = testData; + } + + @Parameterized.Parameters(name = "{index}") + public static Collection testDataForReadOperation() { + return Arrays.asList(new Object[][] { + + //-------------------------- + // Test Data + //-------------------------- + {new TestDataForRead("Hello World".getBytes(), 2, 1000, true)}, + {new TestDataForRead( + ("the problem you appear to be wrestling with is that this doesn't " + + "display very well. ").getBytes(), 2, 1000, true)}, + {new TestDataForRead(("您的數據是寶貴的資產,以您的組織,並有當前和未來價值。由於這個原因," + + "所有的數據應存儲以供將來分析。今天,這往往是不這樣做," + "因為傳統的分析基礎架構的限制," + + "像模式的預定義,存儲大數據集和不同的數據筒倉的傳播的成本。" + + "為了應對這一挑戰,數據湖面概念被引入作為一個企業級存儲庫來存儲所有" + + "類型的在一個地方收集到的數據。對於運作和探索性分析的目的,所有類型的" + "數據可以定義需求或模式之前被存儲在數據湖。") + .getBytes(), 2, 1000, true)}, {new TestDataForRead( + TestADLResponseData.getRandomByteArrayData(4 * 1024), 2, 10, true)}, + {new TestDataForRead(TestADLResponseData.getRandomByteArrayData(100), 2, + 1000, true)}, {new TestDataForRead( + TestADLResponseData.getRandomByteArrayData(1 * 1024), 2, 50, true)}, + {new TestDataForRead( + TestADLResponseData.getRandomByteArrayData(8 * 1024), 3, 10, + false)}, {new TestDataForRead( + TestADLResponseData.getRandomByteArrayData(16 * 1024), 5, 10, false)}, + {new TestDataForRead( + TestADLResponseData.getRandomByteArrayData(32 * 1024), 9, 10, + false)}, {new TestDataForRead( + TestADLResponseData.getRandomByteArrayData(64 * 1024), 17, 10, + false)}}); + } + + @Test + public void testEntireBytes() throws IOException, InterruptedException { + getMockServer().setDispatcher(testData.getDispatcher()); + FSDataInputStream in = getMockAdlFileSystem().open(new Path("/test")); + byte[] expectedData = new byte[testData.getActualData().length]; + int n = 0; + int len = expectedData.length; + int off = 0; + while (n < len) { + int count = in.read(expectedData, off + n, len - n); + if (count < 0) { + throw new EOFException(); + } + n += count; + } + + Assert.assertEquals(testData.getActualData().length, expectedData.length); + Assert.assertArrayEquals(expectedData, testData.getActualData()); + in.close(); + if (testData.isCheckOfNoOfCalls()) { + Assert.assertEquals(testData.getExpectedNoNetworkCall(), + getMockServer().getRequestCount()); + } + } + + @Test + public void testSeekOperation() throws IOException, InterruptedException { + getMockServer().setDispatcher(testData.getDispatcher()); + FSDataInputStream in = getMockAdlFileSystem().open(new Path("/test")); + Random random = new Random(); + for (int i = 0; i < 1000; ++i) { + int position = random.nextInt(testData.getActualData().length); + in.seek(position); + Assert.assertEquals(in.getPos(), position); + Assert.assertEquals(in.read(), testData.getActualData()[position] & 0xFF); + } + in.close(); + if (testData.isCheckOfNoOfCalls()) { + Assert.assertEquals(testData.getExpectedNoNetworkCall(), + getMockServer().getRequestCount()); + } + } + + @Test + public void testReadServerCalls() throws IOException, InterruptedException { + getMockServer().setDispatcher(testData.getDispatcher()); + FSDataInputStream in = getMockAdlFileSystem().open(new Path("/test")); + byte[] expectedData = new byte[testData.getActualData().length]; + in.readFully(expectedData); + Assert.assertArrayEquals(expectedData, testData.getActualData()); + Assert.assertEquals(testData.getExpectedNoNetworkCall(), + getMockServer().getRequestCount()); + in.close(); + } + + @Test + public void testReadFully() throws IOException, InterruptedException { + getMockServer().setDispatcher(testData.getDispatcher()); + FSDataInputStream in = getMockAdlFileSystem().open(new Path("/test")); + byte[] expectedData = new byte[testData.getActualData().length]; + in.readFully(expectedData); + Assert.assertArrayEquals(expectedData, testData.getActualData()); + + in.readFully(0, expectedData); + Assert.assertArrayEquals(expectedData, testData.getActualData()); + + in.readFully(0, expectedData, 0, expectedData.length); + Assert.assertArrayEquals(expectedData, testData.getActualData()); + in.close(); + } + + @Test + public void testRandomPositionalReadUsingReadFully() + throws IOException, InterruptedException { + getMockServer().setDispatcher(testData.getDispatcher()); + FSDataInputStream in = getMockAdlFileSystem().open(new Path("/test")); + ByteArrayInputStream actualData = new ByteArrayInputStream( + testData.getActualData()); + Random random = new Random(); + for (int i = 0; i < testData.getIntensityOfTest(); ++i) { + int offset = random.nextInt(testData.getActualData().length); + int length = testData.getActualData().length - offset; + byte[] expectedData = new byte[length]; + byte[] actualDataSubset = new byte[length]; + actualData.reset(); + actualData.skip(offset); + actualData.read(actualDataSubset, 0, length); + + in.readFully(offset, expectedData, 0, length); + Assert.assertArrayEquals(expectedData, actualDataSubset); + } + + for (int i = 0; i < testData.getIntensityOfTest(); ++i) { + int offset = random.nextInt(testData.getActualData().length); + int length = random.nextInt(testData.getActualData().length - offset); + byte[] expectedData = new byte[length]; + byte[] actualDataSubset = new byte[length]; + actualData.reset(); + actualData.skip(offset); + actualData.read(actualDataSubset, 0, length); + + in.readFully(offset, expectedData, 0, length); + Assert.assertArrayEquals(expectedData, actualDataSubset); + } + + in.close(); + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestAzureADTokenProvider.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestAzureADTokenProvider.java new file mode 100644 index 00000000000..c94e6929432 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestAzureADTokenProvider.java @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.adl; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.adl.common.CustomMockTokenProvider; +import org.apache.hadoop.fs.adl.oauth2.AzureADTokenProvider; + +import com.microsoft.azure.datalake.store.oauth2.AccessTokenProvider; +import com.microsoft.azure.datalake.store.oauth2.ClientCredsTokenProvider; +import com.microsoft.azure.datalake.store.oauth2.RefreshTokenBasedTokenProvider; + +import static org.apache.hadoop.fs.adl.AdlConfKeys.AZURE_AD_CLIENT_ID_KEY; +import static org.apache.hadoop.fs.adl.AdlConfKeys.AZURE_AD_CLIENT_SECRET_KEY; +import static org.apache.hadoop.fs.adl.AdlConfKeys.AZURE_AD_REFRESH_TOKEN_KEY; +import static org.apache.hadoop.fs.adl.AdlConfKeys.AZURE_AD_REFRESH_URL_KEY; +import static org.apache.hadoop.fs.adl.AdlConfKeys + .AZURE_AD_TOKEN_PROVIDER_CLASS_KEY; +import static org.apache.hadoop.fs.adl.AdlConfKeys + .AZURE_AD_TOKEN_PROVIDER_TYPE_KEY; +import static org.apache.hadoop.fs.adl.TokenProviderType.*; + +import org.junit.Assert; +import org.junit.Test; + +/** + * Test appropriate token provider is loaded as per configuration. + */ +public class TestAzureADTokenProvider { + + @Test + public void testRefreshTokenProvider() + throws URISyntaxException, IOException { + Configuration conf = new Configuration(); + conf.set(AZURE_AD_CLIENT_ID_KEY, "MY_CLIENTID"); + conf.set(AZURE_AD_REFRESH_TOKEN_KEY, "XYZ"); + conf.setEnum(AZURE_AD_TOKEN_PROVIDER_TYPE_KEY, RefreshToken); + conf.set(AZURE_AD_REFRESH_URL_KEY, "http://localhost:8080/refresh"); + + URI uri = new URI("adl://localhost:8080"); + AdlFileSystem fileSystem = new AdlFileSystem(); + fileSystem.initialize(uri, conf); + AccessTokenProvider tokenProvider = fileSystem.getTokenProvider(); + Assert.assertTrue(tokenProvider instanceof RefreshTokenBasedTokenProvider); + } + + @Test + public void testClientCredTokenProvider() + throws IOException, URISyntaxException { + Configuration conf = new Configuration(); + conf.set(AZURE_AD_CLIENT_ID_KEY, "MY_CLIENTID"); + conf.set(AZURE_AD_CLIENT_SECRET_KEY, "XYZ"); + conf.setEnum(AZURE_AD_TOKEN_PROVIDER_TYPE_KEY, ClientCredential); + conf.set(AZURE_AD_REFRESH_URL_KEY, "http://localhost:8080/refresh"); + + URI uri = new URI("adl://localhost:8080"); + AdlFileSystem fileSystem = new AdlFileSystem(); + fileSystem.initialize(uri, conf); + AccessTokenProvider tokenProvider = fileSystem.getTokenProvider(); + Assert.assertTrue(tokenProvider instanceof ClientCredsTokenProvider); + } + + @Test + public void testCustomCredTokenProvider() + throws URISyntaxException, IOException { + Configuration conf = new Configuration(); + conf.setClass(AZURE_AD_TOKEN_PROVIDER_CLASS_KEY, + CustomMockTokenProvider.class, AzureADTokenProvider.class); + + URI uri = new URI("adl://localhost:8080"); + AdlFileSystem fileSystem = new AdlFileSystem(); + fileSystem.initialize(uri, conf); + AccessTokenProvider tokenProvider = fileSystem.getTokenProvider(); + Assert.assertTrue(tokenProvider instanceof SdkTokenProviderAdapter); + } + + @Test + public void testInvalidProviderConfigurationForType() + throws URISyntaxException, IOException { + Configuration conf = new Configuration(); + URI uri = new URI("adl://localhost:8080"); + AdlFileSystem fileSystem = new AdlFileSystem(); + try { + fileSystem.initialize(uri, conf); + Assert.fail("Initialization should have failed due no token provider " + + "configuration"); + } catch (IllegalArgumentException e) { + Assert.assertTrue( + e.getMessage().contains("dfs.adls.oauth2.access.token.provider")); + } + conf.setClass(AZURE_AD_TOKEN_PROVIDER_CLASS_KEY, + CustomMockTokenProvider.class, AzureADTokenProvider.class); + fileSystem.initialize(uri, conf); + } + + @Test + public void testInvalidProviderConfigurationForClassPath() + throws URISyntaxException, IOException { + Configuration conf = new Configuration(); + URI uri = new URI("adl://localhost:8080"); + AdlFileSystem fileSystem = new AdlFileSystem(); + conf.set(AZURE_AD_TOKEN_PROVIDER_CLASS_KEY, + "wrong.classpath.CustomMockTokenProvider"); + try { + fileSystem.initialize(uri, conf); + Assert.fail("Initialization should have failed due invalid provider " + + "configuration"); + } catch (RuntimeException e) { + Assert.assertTrue( + e.getMessage().contains("wrong.classpath.CustomMockTokenProvider")); + } + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestConcurrentDataReadOperations.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestConcurrentDataReadOperations.java new file mode 100644 index 00000000000..b7905627831 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestConcurrentDataReadOperations.java @@ -0,0 +1,299 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.adl; + +import com.squareup.okhttp.mockwebserver.Dispatcher; +import com.squareup.okhttp.mockwebserver.MockResponse; +import com.squareup.okhttp.mockwebserver.RecordedRequest; +import okio.Buffer; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.Path; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Random; +import java.util.UUID; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * This class is responsible for testing multiple threads trying to access same + * or multiple files from the offset. + */ +@RunWith(Parameterized.class) +public class TestConcurrentDataReadOperations extends AdlMockWebServer { + private static final Logger LOG = LoggerFactory + .getLogger(TestConcurrentDataReadOperations.class); + private static final Object LOCK = new Object(); + private static FSDataInputStream commonHandle = null; + private int concurrencyLevel; + + public TestConcurrentDataReadOperations(int concurrencyLevel) { + this.concurrencyLevel = concurrencyLevel; + } + + @Parameterized.Parameters(name = "{index}") + public static Collection testDataNumberOfConcurrentRun() { + return Arrays.asList(new Object[][] {{1}, {2}, {3}, {4}, {5}}); + } + + public static byte[] getRandomByteArrayData(int size) { + byte[] b = new byte[size]; + Random rand = new Random(); + rand.nextBytes(b); + return b; + } + + private void setDispatcher(final ArrayList testData) { + getMockServer().setDispatcher(new Dispatcher() { + @Override + public MockResponse dispatch(RecordedRequest recordedRequest) + throws InterruptedException { + CreateTestData currentRequest = null; + for (CreateTestData local : testData) { + if (recordedRequest.getPath().contains(local.path.toString())) { + currentRequest = local; + break; + } + } + + if (currentRequest == null) { + new MockResponse().setBody("Request data not found") + .setResponseCode(501); + } + + if (recordedRequest.getRequestLine().contains("op=GETFILESTATUS")) { + return new MockResponse().setResponseCode(200).setBody( + TestADLResponseData + .getGetFileStatusJSONResponse(currentRequest.data.length)); + } + + if (recordedRequest.getRequestLine().contains("op=OPEN")) { + String request = recordedRequest.getRequestLine(); + int offset = 0; + int byteCount = 0; + + Pattern pattern = Pattern.compile("offset=([0-9]+)"); + Matcher matcher = pattern.matcher(request); + if (matcher.find()) { + LOG.debug(matcher.group(1)); + offset = Integer.parseInt(matcher.group(1)); + } + + pattern = Pattern.compile("length=([0-9]+)"); + matcher = pattern.matcher(request); + if (matcher.find()) { + LOG.debug(matcher.group(1)); + byteCount = Integer.parseInt(matcher.group(1)); + } + + Buffer buf = new Buffer(); + buf.write(currentRequest.data, offset, + Math.min(currentRequest.data.length - offset, byteCount)); + return new MockResponse().setResponseCode(200) + .setChunkedBody(buf, 4 * 1024 * 1024); + } + + return new MockResponse().setBody("NOT SUPPORTED").setResponseCode(501); + } + }); + } + + @Before + public void resetHandle() { + commonHandle = null; + } + + @Test + public void testParallelReadOnDifferentStreams() + throws IOException, InterruptedException, ExecutionException { + + ArrayList createTestData = new ArrayList(); + + Random random = new Random(); + + for (int i = 0; i < concurrencyLevel; i++) { + CreateTestData testData = new CreateTestData(); + testData + .set(new Path("/test/concurrentRead/" + UUID.randomUUID().toString()), + getRandomByteArrayData(random.nextInt(1 * 1024 * 1024))); + createTestData.add(testData); + } + + setDispatcher(createTestData); + + ArrayList readTestData = new ArrayList(); + for (CreateTestData local : createTestData) { + ReadTestData localReadData = new ReadTestData(); + localReadData.set(local.path, local.data, 0); + readTestData.add(localReadData); + } + + runReadTest(readTestData, false); + } + + @Test + public void testParallelReadOnSameStreams() + throws IOException, InterruptedException, ExecutionException { + ArrayList createTestData = new ArrayList(); + + Random random = new Random(); + + for (int i = 0; i < 1; i++) { + CreateTestData testData = new CreateTestData(); + testData + .set(new Path("/test/concurrentRead/" + UUID.randomUUID().toString()), + getRandomByteArrayData(1024 * 1024)); + createTestData.add(testData); + } + + setDispatcher(createTestData); + + ArrayList readTestData = new ArrayList(); + ByteArrayInputStream buffered = new ByteArrayInputStream( + createTestData.get(0).data); + + ReadTestData readInitially = new ReadTestData(); + byte[] initialData = new byte[1024 * 1024]; + buffered.read(initialData); + + readInitially.set(createTestData.get(0).path, initialData, 0); + readTestData.add(readInitially); + runReadTest(readTestData, false); + + readTestData.clear(); + + for (int i = 0; i < concurrencyLevel * 5; i++) { + ReadTestData localReadData = new ReadTestData(); + int offset = random.nextInt((1024 * 1024) - 1); + int length = 1024 * 1024 - offset; + byte[] expectedData = new byte[length]; + buffered.reset(); + buffered.skip(offset); + buffered.read(expectedData); + localReadData.set(createTestData.get(0).path, expectedData, offset); + readTestData.add(localReadData); + } + + runReadTest(readTestData, true); + } + + void runReadTest(ArrayList testData, boolean useSameStream) + throws InterruptedException, ExecutionException { + + ExecutorService executor = Executors.newFixedThreadPool(testData.size()); + Future[] subtasks = new Future[testData.size()]; + + for (int i = 0; i < testData.size(); i++) { + subtasks[i] = executor.submit( + new ReadConcurrentRunnable(testData.get(i).data, testData.get(i).path, + testData.get(i).offset, useSameStream)); + } + + executor.shutdown(); + + // wait until all tasks are finished + executor.awaitTermination(120, TimeUnit.SECONDS); + + for (int i = 0; i < testData.size(); ++i) { + Assert.assertTrue((Boolean) subtasks[i].get()); + } + } + + class ReadTestData { + private Path path; + private byte[] data; + private int offset; + + public void set(Path filePath, byte[] dataToBeRead, int fromOffset) { + this.path = filePath; + this.data = dataToBeRead; + this.offset = fromOffset; + } + } + + class CreateTestData { + private Path path; + private byte[] data; + + public void set(Path filePath, byte[] dataToBeWritten) { + this.path = filePath; + this.data = dataToBeWritten; + } + } + + class ReadConcurrentRunnable implements Callable { + private Path path; + private int offset; + private byte[] expectedData; + private boolean useSameStream; + + public ReadConcurrentRunnable(byte[] expectedData, Path path, int offset, + boolean useSameStream) { + this.path = path; + this.offset = offset; + this.expectedData = expectedData; + this.useSameStream = useSameStream; + } + + public Boolean call() throws IOException { + try { + FSDataInputStream in; + if (useSameStream) { + synchronized (LOCK) { + if (commonHandle == null) { + commonHandle = getMockAdlFileSystem().open(path); + } + in = commonHandle; + } + } else { + in = getMockAdlFileSystem().open(path); + } + + byte[] actualData = new byte[expectedData.length]; + in.readFully(offset, actualData); + Assert.assertArrayEquals("Path :" + path.toString() + " did not match.", + expectedData, actualData); + if (!useSameStream) { + in.close(); + } + } catch (IOException e) { + e.printStackTrace(); + return false; + } + return true; + } + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestCustomTokenProvider.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestCustomTokenProvider.java new file mode 100644 index 00000000000..c594c6540b4 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestCustomTokenProvider.java @@ -0,0 +1,136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.adl; + +import com.squareup.okhttp.mockwebserver.MockResponse; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.adl.common.CustomMockTokenProvider; +import org.apache.hadoop.fs.permission.FsPermission; +import org.junit.Assert; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.Arrays; +import java.util.Collection; + +import static org.apache.hadoop.fs.adl.AdlConfKeys.ADL_BLOCK_SIZE; +import static org.apache.hadoop.fs.adl.AdlConfKeys + .AZURE_AD_TOKEN_PROVIDER_CLASS_KEY; + +/** + * Test access token provider behaviour with custom token provider and for token + * provider cache is enabled. + */ +@RunWith(Parameterized.class) +public class TestCustomTokenProvider extends AdlMockWebServer { + private static final long TEN_MINUTES_IN_MILIS = 600000; + private int backendCallCount; + private int expectedCallbackToAccessToken; + private TestableAdlFileSystem[] fileSystems; + private Class typeOfTokenProviderClass; + private long expiryFromNow; + private int fsObjectCount; + + public TestCustomTokenProvider(Class typeOfTokenProviderClass, + long expiryFromNow, int fsObjectCount, int backendCallCount, + int expectedCallbackToAccessToken) + throws IllegalAccessException, InstantiationException, URISyntaxException, + IOException { + this.typeOfTokenProviderClass = typeOfTokenProviderClass; + this.expiryFromNow = expiryFromNow; + this.fsObjectCount = fsObjectCount; + this.backendCallCount = backendCallCount; + this.expectedCallbackToAccessToken = expectedCallbackToAccessToken; + } + + @Parameterized.Parameters(name = "{index}") + public static Collection testDataForTokenProvider() { + return Arrays.asList(new Object[][] { + // Data set in order + // INPUT - CustomTokenProvider class to load + // INPUT - expiry time in milis. Subtract from current time + // INPUT - No. of FileSystem object + // INPUT - No. of backend calls per FileSystem object + // EXPECTED - Number of callbacks to get token after test finished. + {CustomMockTokenProvider.class, 0, 1, 1, 1}, + {CustomMockTokenProvider.class, TEN_MINUTES_IN_MILIS, 1, 1, 1}, + {CustomMockTokenProvider.class, TEN_MINUTES_IN_MILIS, 2, 1, 2}, + {CustomMockTokenProvider.class, TEN_MINUTES_IN_MILIS, 10, 10, 10}}); + } + + /** + * Explicitly invoked init so that base class mock server is setup before + * test data initialization is done. + * + * @throws IOException + * @throws URISyntaxException + */ + public void init() throws IOException, URISyntaxException { + Configuration configuration = new Configuration(); + configuration.set(AZURE_AD_TOKEN_PROVIDER_CLASS_KEY, + typeOfTokenProviderClass.getName()); + fileSystems = new TestableAdlFileSystem[fsObjectCount]; + URI uri = new URI("adl://localhost:" + getPort()); + + for (int i = 0; i < fsObjectCount; ++i) { + fileSystems[i] = new TestableAdlFileSystem(); + fileSystems[i].initialize(uri, configuration); + + ((CustomMockTokenProvider) fileSystems[i].getAzureTokenProvider()) + .setExpiryTimeInMillisAfter(expiryFromNow); + } + } + + @Test + public void testCustomTokenManagement() + throws IOException, URISyntaxException { + int accessTokenCallbackDuringExec = 0; + init(); + for (TestableAdlFileSystem tfs : fileSystems) { + for (int i = 0; i < backendCallCount; ++i) { + getMockServer().enqueue(new MockResponse().setResponseCode(200) + .setBody(TestADLResponseData.getGetFileStatusJSONResponse())); + FileStatus fileStatus = tfs.getFileStatus(new Path("/test1/test2")); + Assert.assertTrue(fileStatus.isFile()); + Assert.assertEquals("adl://" + getMockServer().getHostName() + ":" + + getMockServer().getPort() + "/test1/test2", + fileStatus.getPath().toString()); + Assert.assertEquals(4194304, fileStatus.getLen()); + Assert.assertEquals(ADL_BLOCK_SIZE, fileStatus.getBlockSize()); + Assert.assertEquals(1, fileStatus.getReplication()); + Assert + .assertEquals(new FsPermission("777"), fileStatus.getPermission()); + Assert.assertEquals("NotSupportYet", fileStatus.getOwner()); + Assert.assertEquals("NotSupportYet", fileStatus.getGroup()); + } + + accessTokenCallbackDuringExec += ((CustomMockTokenProvider) tfs + .getAzureTokenProvider()).getAccessTokenRequestCount(); + } + + Assert.assertEquals(expectedCallbackToAccessToken, + accessTokenCallbackDuringExec); + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestGetFileStatus.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestGetFileStatus.java new file mode 100644 index 00000000000..08c805ea814 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestGetFileStatus.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl; + +import com.squareup.okhttp.mockwebserver.MockResponse; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.util.Time; +import org.junit.Assert; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.URISyntaxException; + +import static org.apache.hadoop.fs.adl.AdlConfKeys.ADL_BLOCK_SIZE; + +/** + * This class is responsible for testing local getFileStatus implementation + * to cover correct parsing of successful and error JSON response + * from the server. + * Adls GetFileStatus operation is in detail covered in + * org.apache.hadoop.fs.adl.live testing package. + */ +public class TestGetFileStatus extends AdlMockWebServer { + private static final Logger LOG = LoggerFactory + .getLogger(TestGetFileStatus.class); + + @Test + public void getFileStatusReturnsAsExpected() + throws URISyntaxException, IOException { + getMockServer().enqueue(new MockResponse().setResponseCode(200) + .setBody(TestADLResponseData.getGetFileStatusJSONResponse())); + long startTime = Time.monotonicNow(); + FileStatus fileStatus = getMockAdlFileSystem() + .getFileStatus(new Path("/test1/test2")); + long endTime = Time.monotonicNow(); + LOG.debug("Time : " + (endTime - startTime)); + Assert.assertTrue(fileStatus.isFile()); + Assert.assertEquals("adl://" + getMockServer().getHostName() + ":" + + getMockServer().getPort() + "/test1/test2", + fileStatus.getPath().toString()); + Assert.assertEquals(4194304, fileStatus.getLen()); + Assert.assertEquals(ADL_BLOCK_SIZE, fileStatus.getBlockSize()); + Assert.assertEquals(1, fileStatus.getReplication()); + Assert.assertEquals(new FsPermission("777"), fileStatus.getPermission()); + Assert.assertEquals("NotSupportYet", fileStatus.getOwner()); + Assert.assertEquals("NotSupportYet", fileStatus.getGroup()); + } + +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestListStatus.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestListStatus.java new file mode 100644 index 00000000000..dd27a1045d6 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestListStatus.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl; + +import com.squareup.okhttp.mockwebserver.MockResponse; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.util.Time; +import org.junit.Assert; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; + +/** + * This class is responsible for testing local listStatus implementation to + * cover correct parsing of successful and error JSON response from the server. + * Adls ListStatus functionality is in detail covered in + * org.apache.hadoop.fs.adl.live testing package. + */ +public class TestListStatus extends AdlMockWebServer { + + private static final Logger LOG = LoggerFactory + .getLogger(TestListStatus.class); + + @Test + public void listStatusReturnsAsExpected() throws IOException { + getMockServer().enqueue(new MockResponse().setResponseCode(200) + .setBody(TestADLResponseData.getListFileStatusJSONResponse(10))); + long startTime = Time.monotonicNow(); + FileStatus[] ls = getMockAdlFileSystem() + .listStatus(new Path("/test1/test2")); + long endTime = Time.monotonicNow(); + LOG.debug("Time : " + (endTime - startTime)); + Assert.assertEquals(ls.length, 10); + + getMockServer().enqueue(new MockResponse().setResponseCode(200) + .setBody(TestADLResponseData.getListFileStatusJSONResponse(200))); + startTime = Time.monotonicNow(); + ls = getMockAdlFileSystem().listStatus(new Path("/test1/test2")); + endTime = Time.monotonicNow(); + LOG.debug("Time : " + (endTime - startTime)); + Assert.assertEquals(ls.length, 200); + + getMockServer().enqueue(new MockResponse().setResponseCode(200) + .setBody(TestADLResponseData.getListFileStatusJSONResponse(2048))); + startTime = Time.monotonicNow(); + ls = getMockAdlFileSystem().listStatus(new Path("/test1/test2")); + endTime = Time.monotonicNow(); + LOG.debug("Time : " + (endTime - startTime)); + Assert.assertEquals(ls.length, 2048); + } + + @Test + public void listStatusOnFailure() throws IOException { + getMockServer().enqueue(new MockResponse().setResponseCode(403).setBody( + TestADLResponseData.getErrorIllegalArgumentExceptionJSONResponse())); + FileStatus[] ls = null; + long startTime = Time.monotonicNow(); + try { + ls = getMockAdlFileSystem().listStatus(new Path("/test1/test2")); + } catch (IOException e) { + Assert.assertTrue(e.getMessage().contains("Invalid")); + } + long endTime = Time.monotonicNow(); + LOG.debug("Time : " + (endTime - startTime)); + + // SDK may increase number of retry attempts before error is propagated + // to caller. Adding max 10 error responses in the queue to align with SDK. + for (int i = 0; i < 10; ++i) { + getMockServer().enqueue(new MockResponse().setResponseCode(500).setBody( + TestADLResponseData.getErrorInternalServerExceptionJSONResponse())); + } + + startTime = Time.monotonicNow(); + try { + ls = getMockAdlFileSystem().listStatus(new Path("/test1/test2")); + } catch (IOException e) { + Assert.assertTrue(e.getMessage().contains("Internal Server Error")); + } + endTime = Time.monotonicNow(); + LOG.debug("Time : " + (endTime - startTime)); + } + +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestRelativePathFormation.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestRelativePathFormation.java new file mode 100644 index 00000000000..908f8b8919e --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestRelativePathFormation.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.adl; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.junit.Assert; +import org.junit.Test; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; + +import static org.apache.hadoop.fs.adl.AdlConfKeys + .AZURE_AD_TOKEN_PROVIDER_CLASS_KEY; + +/** + * This class verifies path conversion to SDK. + */ +public class TestRelativePathFormation { + + @Test + public void testToRelativePath() throws URISyntaxException, IOException { + AdlFileSystem fs = new AdlFileSystem(); + Configuration configuration = new Configuration(); + configuration.set(AZURE_AD_TOKEN_PROVIDER_CLASS_KEY, + "org.apache.hadoop.fs.adl.common.CustomMockTokenProvider"); + + fs.initialize(new URI("adl://temp.account.net"), configuration); + + Assert.assertEquals("/usr", fs.toRelativeFilePath(new Path("/usr"))); + Assert.assertEquals("/usr", + fs.toRelativeFilePath(new Path("adl://temp.account.net/usr"))); + + // When working directory is set. + fs.setWorkingDirectory(new Path("/a/b/")); + Assert.assertEquals("/usr", fs.toRelativeFilePath(new Path("/usr"))); + Assert.assertEquals("/a/b/usr", fs.toRelativeFilePath(new Path("usr"))); + Assert.assertEquals("/usr", + fs.toRelativeFilePath(new Path("adl://temp.account.net/usr"))); + Assert.assertEquals("/usr", + fs.toRelativeFilePath(new Path("wasb://temp.account.net/usr"))); + } + +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestValidateConfiguration.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestValidateConfiguration.java new file mode 100644 index 00000000000..e3025b2aef2 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestValidateConfiguration.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.adl; + +import org.junit.Assert; +import org.junit.Test; + +import static org.apache.hadoop.fs.adl.AdlConfKeys.ADL_BLOCK_SIZE; +import static org.apache.hadoop.fs.adl.AdlConfKeys + .ADL_DEBUG_OVERRIDE_LOCAL_USER_AS_OWNER; +import static org.apache.hadoop.fs.adl.AdlConfKeys + .ADL_DEBUG_SET_LOCAL_USER_AS_OWNER_DEFAULT; +import static org.apache.hadoop.fs.adl.AdlConfKeys + .ADL_EXPERIMENT_POSITIONAL_READ_DEFAULT; +import static org.apache.hadoop.fs.adl.AdlConfKeys + .ADL_EXPERIMENT_POSITIONAL_READ_KEY; +import static org.apache.hadoop.fs.adl.AdlConfKeys.ADL_REPLICATION_FACTOR; +import static org.apache.hadoop.fs.adl.AdlConfKeys.AZURE_AD_CLIENT_ID_KEY; +import static org.apache.hadoop.fs.adl.AdlConfKeys.AZURE_AD_CLIENT_SECRET_KEY; +import static org.apache.hadoop.fs.adl.AdlConfKeys.AZURE_AD_REFRESH_TOKEN_KEY; +import static org.apache.hadoop.fs.adl.AdlConfKeys.AZURE_AD_REFRESH_URL_KEY; +import static org.apache.hadoop.fs.adl.AdlConfKeys + .AZURE_AD_TOKEN_PROVIDER_CLASS_KEY; +import static org.apache.hadoop.fs.adl.AdlConfKeys + .AZURE_AD_TOKEN_PROVIDER_TYPE_KEY; +import static org.apache.hadoop.fs.adl.AdlConfKeys + .DEFAULT_READ_AHEAD_BUFFER_SIZE; +import static org.apache.hadoop.fs.adl.AdlConfKeys + .DEFAULT_WRITE_AHEAD_BUFFER_SIZE; +import static org.apache.hadoop.fs.adl.AdlConfKeys.LATENCY_TRACKER_DEFAULT; +import static org.apache.hadoop.fs.adl.AdlConfKeys.LATENCY_TRACKER_KEY; +import static org.apache.hadoop.fs.adl.AdlConfKeys.READ_AHEAD_BUFFER_SIZE_KEY; +import static org.apache.hadoop.fs.adl.AdlConfKeys + .TOKEN_PROVIDER_TYPE_CLIENT_CRED; +import static org.apache.hadoop.fs.adl.AdlConfKeys + .TOKEN_PROVIDER_TYPE_REFRESH_TOKEN; +import static org.apache.hadoop.fs.adl.AdlConfKeys.WRITE_BUFFER_SIZE_KEY; + +/** + * Validate configuration keys defined for adl storage file system instance. + */ +public class TestValidateConfiguration { + + @Test + public void validateConfigurationKeys() { + Assert + .assertEquals("dfs.adls.oauth2.refresh.url", AZURE_AD_REFRESH_URL_KEY); + Assert.assertEquals("dfs.adls.oauth2.access.token.provider", + AZURE_AD_TOKEN_PROVIDER_CLASS_KEY); + Assert.assertEquals("dfs.adls.oauth2.client.id", AZURE_AD_CLIENT_ID_KEY); + Assert.assertEquals("dfs.adls.oauth2.refresh.token", + AZURE_AD_REFRESH_TOKEN_KEY); + Assert + .assertEquals("dfs.adls.oauth2.credential", AZURE_AD_CLIENT_SECRET_KEY); + Assert.assertEquals("adl.debug.override.localuserasfileowner", + ADL_DEBUG_OVERRIDE_LOCAL_USER_AS_OWNER); + + Assert.assertEquals("dfs.adls.oauth2.access.token.provider.type", + AZURE_AD_TOKEN_PROVIDER_TYPE_KEY); + + Assert.assertEquals("adl.feature.client.cache.readahead", + READ_AHEAD_BUFFER_SIZE_KEY); + + Assert.assertEquals("adl.feature.client.cache.drop.behind.writes", + WRITE_BUFFER_SIZE_KEY); + + Assert.assertEquals("RefreshToken", TOKEN_PROVIDER_TYPE_REFRESH_TOKEN); + + Assert.assertEquals("ClientCredential", TOKEN_PROVIDER_TYPE_CLIENT_CRED); + + Assert.assertEquals("adl.dfs.enable.client.latency.tracker", + LATENCY_TRACKER_KEY); + + Assert.assertEquals(true, LATENCY_TRACKER_DEFAULT); + + Assert.assertEquals(true, ADL_EXPERIMENT_POSITIONAL_READ_DEFAULT); + + Assert.assertEquals("adl.feature.experiment.positional.read.enable", + ADL_EXPERIMENT_POSITIONAL_READ_KEY); + + Assert.assertEquals(1, ADL_REPLICATION_FACTOR); + Assert.assertEquals(256 * 1024 * 1024, ADL_BLOCK_SIZE); + Assert.assertEquals(false, ADL_DEBUG_SET_LOCAL_USER_AS_OWNER_DEFAULT); + Assert.assertEquals(4 * 1024 * 1024, DEFAULT_READ_AHEAD_BUFFER_SIZE); + Assert.assertEquals(4 * 1024 * 1024, DEFAULT_WRITE_AHEAD_BUFFER_SIZE); + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestableAdlFileSystem.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestableAdlFileSystem.java new file mode 100644 index 00000000000..4acb39b196b --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/TestableAdlFileSystem.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl; + +/** + * Mock adl file storage subclass to mock adl storage on local http service. + */ +public class TestableAdlFileSystem extends AdlFileSystem { + @Override + protected String getTransportScheme() { + return "http"; + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/common/CustomMockTokenProvider.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/common/CustomMockTokenProvider.java new file mode 100644 index 00000000000..c48ca0ec8c3 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/common/CustomMockTokenProvider.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.adl.common; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.adl.oauth2.AzureADTokenProvider; + +import java.io.IOException; +import java.util.Date; +import java.util.Random; + +/** + * Custom token management without cache enabled. + */ +public class CustomMockTokenProvider extends AzureADTokenProvider { + private Random random; + private long expiryTime; + private int accessTokenRequestCount = 0; + + @Override + public void initialize(Configuration configuration) throws IOException { + random = new Random(); + } + + @Override + public String getAccessToken() throws IOException { + accessTokenRequestCount++; + return String.valueOf(random.nextInt()); + } + + @Override + public Date getExpiryTime() { + Date before10Min = new Date(); + before10Min.setTime(expiryTime); + return before10Min; + } + + public void setExpiryTimeInMillisAfter(long timeInMillis) { + expiryTime = System.currentTimeMillis() + timeInMillis; + } + + public int getAccessTokenRequestCount() { + return accessTokenRequestCount; + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/common/ExpectedResponse.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/common/ExpectedResponse.java new file mode 100644 index 00000000000..dc8577db697 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/common/ExpectedResponse.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.common; + +import com.squareup.okhttp.mockwebserver.MockResponse; + +import java.util.ArrayList; + +/** + * Supporting class to hold expected MockResponse object along with parameters + * for validation in test methods. + */ +public class ExpectedResponse { + private MockResponse response; + private ArrayList expectedQueryParameters = new ArrayList(); + private int expectedBodySize; + private String httpRequestType; + + public int getExpectedBodySize() { + return expectedBodySize; + } + + public String getHttpRequestType() { + return httpRequestType; + } + + public ArrayList getExpectedQueryParameters() { + return expectedQueryParameters; + } + + public MockResponse getResponse() { + return response; + } + + ExpectedResponse set(MockResponse mockResponse) { + this.response = mockResponse; + return this; + } + + ExpectedResponse addExpectedQueryParam(String param) { + expectedQueryParameters.add(param); + return this; + } + + ExpectedResponse addExpectedBodySize(int bodySize) { + this.expectedBodySize = bodySize; + return this; + } + + ExpectedResponse addExpectedHttpRequestType(String expectedHttpRequestType) { + this.httpRequestType = expectedHttpRequestType; + return this; + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/common/Parallelized.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/common/Parallelized.java new file mode 100644 index 00000000000..b08a892d149 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/common/Parallelized.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.common; + +import org.junit.runners.Parameterized; +import org.junit.runners.model.RunnerScheduler; + +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; + +/** + * Provided for convenience to execute parametrized test cases concurrently. + */ +public class Parallelized extends Parameterized { + + public Parallelized(Class classObj) throws Throwable { + super(classObj); + setScheduler(new ThreadPoolScheduler()); + } + + private static class ThreadPoolScheduler implements RunnerScheduler { + private ExecutorService executor; + + public ThreadPoolScheduler() { + int numThreads = 10; + executor = Executors.newFixedThreadPool(numThreads); + } + + public void finished() { + executor.shutdown(); + try { + executor.awaitTermination(10, TimeUnit.MINUTES); + } catch (InterruptedException exc) { + throw new RuntimeException(exc); + } + } + + public void schedule(Runnable childStatement) { + executor.submit(childStatement); + } + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/common/TestDataForRead.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/common/TestDataForRead.java new file mode 100644 index 00000000000..509b3f041b3 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/common/TestDataForRead.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.common; + +import com.squareup.okhttp.mockwebserver.Dispatcher; +import com.squareup.okhttp.mockwebserver.MockResponse; +import com.squareup.okhttp.mockwebserver.RecordedRequest; +import okio.Buffer; +import org.apache.hadoop.fs.adl.TestADLResponseData; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Supporting class for mock test to validate Adls read operation. + */ +public class TestDataForRead { + private static final Logger LOG = LoggerFactory + .getLogger(TestDataForRead.class); + + private byte[] actualData; + private ArrayList responses; + private Dispatcher dispatcher; + private int intensityOfTest; + private boolean checkOfNoOfCalls; + private int expectedNoNetworkCall; + + public TestDataForRead(final byte[] actualData, int expectedNoNetworkCall, + int intensityOfTest, boolean checkOfNoOfCalls) { + + this.checkOfNoOfCalls = checkOfNoOfCalls; + this.actualData = actualData; + responses = new ArrayList(); + this.expectedNoNetworkCall = expectedNoNetworkCall; + this.intensityOfTest = intensityOfTest; + + dispatcher = new Dispatcher() { + @Override + public MockResponse dispatch(RecordedRequest recordedRequest) + throws InterruptedException { + + if (recordedRequest.getRequestLine().contains("op=GETFILESTATUS")) { + return new MockResponse().setResponseCode(200).setBody( + TestADLResponseData + .getGetFileStatusJSONResponse(actualData.length)); + } + + if (recordedRequest.getRequestLine().contains("op=OPEN")) { + String request = recordedRequest.getRequestLine(); + int offset = 0; + int byteCount = 0; + + Pattern pattern = Pattern.compile("offset=([0-9]+)"); + Matcher matcher = pattern.matcher(request); + if (matcher.find()) { + LOG.debug(matcher.group(1)); + offset = Integer.parseInt(matcher.group(1)); + } + + pattern = Pattern.compile("length=([0-9]+)"); + matcher = pattern.matcher(request); + if (matcher.find()) { + LOG.debug(matcher.group(1)); + byteCount = Integer.parseInt(matcher.group(1)); + } + + Buffer buf = new Buffer(); + buf.write(actualData, offset, + Math.min(actualData.length - offset, byteCount)); + return new MockResponse().setResponseCode(200) + .setChunkedBody(buf, 4 * 1024 * 1024); + } + + return new MockResponse().setBody("NOT SUPPORTED").setResponseCode(501); + } + }; + } + + public boolean isCheckOfNoOfCalls() { + return checkOfNoOfCalls; + } + + public int getExpectedNoNetworkCall() { + return expectedNoNetworkCall; + } + + public int getIntensityOfTest() { + return intensityOfTest; + } + + public byte[] getActualData() { + return actualData; + } + + public ArrayList getResponses() { + return responses; + } + + public Dispatcher getDispatcher() { + return dispatcher; + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/AdlStorageConfiguration.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/AdlStorageConfiguration.java new file mode 100644 index 00000000000..f7f0b710e13 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/AdlStorageConfiguration.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.live; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; + +/** + * Configure Adl storage file system. + */ +public final class AdlStorageConfiguration { + private static final String CONTRACT_ENABLE_KEY = + "dfs.adl.test.contract.enable"; + + private static final String TEST_CONFIGURATION_FILE_NAME = + "contract-test-options.xml"; + private static final String TEST_SUPPORTED_TEST_CONFIGURATION_FILE_NAME = + "adls.xml"; + private static final String KEY_FILE_SYSTEM_IMPL = "fs.contract.test.fs"; + private static final String KEY_FILE_SYSTEM = "test.fs.adl.name"; + + private static boolean isContractTestEnabled = false; + private static Configuration conf = null; + + private AdlStorageConfiguration() { + } + + public synchronized static Configuration getConfiguration() { + Configuration newConf = new Configuration(); + newConf.addResource(TEST_CONFIGURATION_FILE_NAME); + newConf.addResource(TEST_SUPPORTED_TEST_CONFIGURATION_FILE_NAME); + return newConf; + } + + public synchronized static boolean isContractTestEnabled() { + if (conf == null) { + conf = getConfiguration(); + } + + isContractTestEnabled = conf.getBoolean(CONTRACT_ENABLE_KEY, false); + return isContractTestEnabled; + } + + public synchronized static FileSystem createStorageConnector() + throws URISyntaxException, IOException { + if (conf == null) { + conf = getConfiguration(); + } + + if (!isContractTestEnabled()) { + return null; + } + + String fileSystem = conf.get(KEY_FILE_SYSTEM); + if (fileSystem == null || fileSystem.trim().length() == 0) { + throw new IOException("Default file system not configured."); + } + String fileSystemImpl = conf.get(KEY_FILE_SYSTEM_IMPL); + if (fileSystemImpl == null || fileSystemImpl.trim().length() == 0) { + throw new IOException( + "Configuration " + KEY_FILE_SYSTEM_IMPL + "does not exist."); + } + FileSystem fs = null; + try { + fs = (FileSystem) Class.forName(fileSystemImpl).newInstance(); + } catch (Exception e) { + throw new IOException("Could not instantiate the filesystem."); + } + + fs.initialize(new URI(conf.get(KEY_FILE_SYSTEM)), conf); + return fs; + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/AdlStorageContract.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/AdlStorageContract.java new file mode 100644 index 00000000000..262b6365177 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/AdlStorageContract.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.live; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.AbstractFSContract; + +import java.io.IOException; +import java.net.URISyntaxException; + +class AdlStorageContract extends AbstractFSContract { + private FileSystem fs; + + protected AdlStorageContract(Configuration conf) { + super(conf); + try { + fs = AdlStorageConfiguration.createStorageConnector(); + } catch (URISyntaxException e) { + throw new IllegalStateException("Can not initialize ADL FileSystem. " + + "Please check test.fs.adl.name property.", e); + } catch (IOException e) { + throw new IllegalStateException("Can not initialize ADL FileSystem.", e); + } + this.setConf(AdlStorageConfiguration.getConfiguration()); + } + + @Override + public String getScheme() { + return "adl"; + } + + @Override + public FileSystem getTestFileSystem() throws IOException { + return this.fs; + } + + @Override + public Path getTestPath() { + return new Path("/test"); + } + + @Override + public boolean isEnabled() { + return AdlStorageConfiguration.isContractTestEnabled(); + } + +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractAppendLive.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractAppendLive.java new file mode 100644 index 00000000000..83390aff894 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractAppendLive.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.live; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractAppendTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.junit.Before; +import org.junit.Test; + +/** + * Verify Adls APPEND semantics compliance with Hadoop. + */ +public class TestAdlContractAppendLive extends AbstractContractAppendTest { + + @Override + protected AbstractFSContract createContract(Configuration configuration) { + return new AdlStorageContract(configuration); + } + + @Override + @Test + public void testRenameFileBeingAppended() throws Throwable { + ContractTestUtils.unsupported("Skipping since renaming file in append " + + "mode not supported in Adl"); + } + + @Before + @Override + public void setup() throws Exception { + org.junit.Assume + .assumeTrue(AdlStorageConfiguration.isContractTestEnabled()); + super.setup(); + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractConcatLive.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractConcatLive.java new file mode 100644 index 00000000000..8474e9cf274 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractConcatLive.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.live; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractConcatTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.junit.Before; +import org.junit.Test; + +/** + * Verify Adls CONCAT semantics compliance with Hadoop. + */ +public class TestAdlContractConcatLive extends AbstractContractConcatTest { + + @Override + protected AbstractFSContract createContract(Configuration configuration) { + return new AdlStorageContract(configuration); + } + + @Before + @Override + public void setup() throws Exception { + org.junit.Assume + .assumeTrue(AdlStorageConfiguration.isContractTestEnabled()); + super.setup(); + } + + @Test + public void testConcatMissingTarget() throws Throwable { + ContractTestUtils.unsupported("BUG : Adl to support expectation from " + + "concat on missing targets."); + } +} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractCreateLive.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractCreateLive.java new file mode 100644 index 00000000000..907c50c6812 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractCreateLive.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.live; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractCreateTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.junit.Before; +import org.junit.Test; + +/** + * Verify Adls CREATE semantics compliance with Hadoop. + */ +public class TestAdlContractCreateLive extends AbstractContractCreateTest { + + @Override + protected AbstractFSContract createContract(Configuration configuration) { + return new AdlStorageContract(configuration); + } + + @Before + @Override + public void setup() throws Exception { + org.junit.Assume + .assumeTrue(AdlStorageConfiguration.isContractTestEnabled()); + super.setup(); + } + + @Test + public void testOverwriteEmptyDirectory() throws Throwable { + ContractTestUtils + .unsupported("BUG : Adl to support override empty " + "directory."); + } +} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractDeleteLive.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractDeleteLive.java new file mode 100644 index 00000000000..30eaec7fa4e --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractDeleteLive.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.live; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractDeleteTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.junit.Before; + +/** + * Verify Adls DELETE semantics compliance with Hadoop. + */ +public class TestAdlContractDeleteLive extends AbstractContractDeleteTest { + + @Override + protected AbstractFSContract createContract(Configuration configuration) { + return new AdlStorageContract(configuration); + } + + @Before + @Override + public void setup() throws Exception { + org.junit.Assume + .assumeTrue(AdlStorageConfiguration.isContractTestEnabled()); + super.setup(); + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractMkdirLive.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractMkdirLive.java new file mode 100644 index 00000000000..e498110d88b --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractMkdirLive.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.live; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractMkdirTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.junit.Before; +import org.junit.Test; + +/** + * Verify Adls MKDIR semantics compliance with Hadoop. + */ +public class TestAdlContractMkdirLive extends AbstractContractMkdirTest { + @Override + protected AbstractFSContract createContract(Configuration conf) { + return new AdlStorageContract(conf); + } + + @Before + @Override + public void setup() throws Exception { + org.junit.Assume + .assumeTrue(AdlStorageConfiguration.isContractTestEnabled()); + super.setup(); + } + + @Test + public void testMkdirOverParentFile() throws Throwable { + ContractTestUtils.unsupported("Not supported by Adl"); + } + + @Test + public void testNoMkdirOverFile() throws Throwable { + ContractTestUtils.unsupported("Not supported by Adl"); + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractOpenLive.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractOpenLive.java new file mode 100644 index 00000000000..2bb2095e34e --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractOpenLive.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.live; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractOpenTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.junit.Before; + +/** + * Verify Adls OPEN/READ semantics compliance with Hadoop. + */ +public class TestAdlContractOpenLive extends AbstractContractOpenTest { + + @Override + protected AbstractFSContract createContract(Configuration configuration) { + return new AdlStorageContract(configuration); + } + + @Before + @Override + public void setup() throws Exception { + org.junit.Assume + .assumeTrue(AdlStorageConfiguration.isContractTestEnabled()); + super.setup(); + } +} \ No newline at end of file diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractRenameLive.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractRenameLive.java new file mode 100644 index 00000000000..06063c5a631 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractRenameLive.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.live; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractRenameTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.junit.Before; +import org.junit.Test; + +/** + * Verify Adls RENAME semantics compliance with Hadoop. + */ +public class TestAdlContractRenameLive extends AbstractContractRenameTest { + + @Override + protected AbstractFSContract createContract(Configuration configuration) { + return new AdlStorageContract(configuration); + } + + @Before + @Override + public void setup() throws Exception { + org.junit.Assume + .assumeTrue(AdlStorageConfiguration.isContractTestEnabled()); + super.setup(); + } + + @Test + public void testRenameFileOverExistingFile() throws Throwable { + ContractTestUtils + .unsupported("BUG : Adl to support full complete POSIX" + "behaviour"); + } + + @Test + public void testRenameFileNonexistentDir() throws Throwable { + ContractTestUtils + .unsupported("BUG : Adl to support create dir is not " + "exist"); + } + + @Test + public void testRenameWithNonEmptySubDir() throws Throwable { + ContractTestUtils.unsupported("BUG : Adl to support non empty dir move."); + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractRootDirLive.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractRootDirLive.java new file mode 100644 index 00000000000..bf4e549440d --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractRootDirLive.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.live; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractRootDirectoryTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.junit.Before; +import org.junit.Test; + +/** + * Verify Adls root level operation support. + */ +public class TestAdlContractRootDirLive + extends AbstractContractRootDirectoryTest { + @Override + protected AbstractFSContract createContract(Configuration configuration) { + return new AdlStorageContract(configuration); + } + + @Before + @Override + public void setup() throws Exception { + org.junit.Assume + .assumeTrue(AdlStorageConfiguration.isContractTestEnabled()); + super.setup(); + } + + @Test + public void testRmNonEmptyRootDirNonRecursive() throws Throwable { + ContractTestUtils.unsupported( + "BUG : Adl should throw exception instred " + "of returning false."); + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractSeekLive.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractSeekLive.java new file mode 100644 index 00000000000..09764648a53 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlContractSeekLive.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.live; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.contract.AbstractContractSeekTest; +import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.junit.Before; + +/** + * Verify Adls OPEN/READ seek operation support. + */ +public class TestAdlContractSeekLive extends AbstractContractSeekTest { + + @Override + protected AbstractFSContract createContract(Configuration configuration) { + return new AdlStorageContract(configuration); + } + + @Before + @Override + public void setup() throws Exception { + org.junit.Assume + .assumeTrue(AdlStorageConfiguration.isContractTestEnabled()); + super.setup(); + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlDifferentSizeWritesLive.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlDifferentSizeWritesLive.java new file mode 100644 index 00000000000..8f534009e5f --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlDifferentSizeWritesLive.java @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.live; + +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.io.IOException; +import java.net.URISyntaxException; +import java.util.Random; + +/** + * Verify different data segment size writes ensure the integrity and + * order of the data. + */ +public class TestAdlDifferentSizeWritesLive { + + public static byte[] getRandomByteArrayData(int size) { + byte[] b = new byte[size]; + Random rand = new Random(); + rand.nextBytes(b); + return b; + } + + @Before + public void setup() throws Exception { + org.junit.Assume + .assumeTrue(AdlStorageConfiguration.isContractTestEnabled()); + } + + @Test + public void testSmallDataWrites() throws IOException { + testDataIntegrity(4 * 1024 * 1024, 1 * 1024); + testDataIntegrity(4 * 1024 * 1024, 7 * 1024); + testDataIntegrity(4 * 1024 * 1024, 10); + testDataIntegrity(2 * 1024 * 1024, 10); + testDataIntegrity(1 * 1024 * 1024, 10); + testDataIntegrity(100, 1); + } + + @Test + public void testMediumDataWrites() throws IOException { + testDataIntegrity(4 * 1024 * 1024, 1 * 1024 * 1024); + testDataIntegrity(7 * 1024 * 1024, 2 * 1024 * 1024); + testDataIntegrity(9 * 1024 * 1024, 2 * 1024 * 1024); + testDataIntegrity(10 * 1024 * 1024, 3 * 1024 * 1024); + } + + private void testDataIntegrity(int totalSize, int chunkSize) + throws IOException { + Path path = new Path("/test/dataIntegrityCheck"); + FileSystem fs = null; + try { + fs = AdlStorageConfiguration.createStorageConnector(); + } catch (URISyntaxException e) { + throw new IllegalStateException("Can not initialize ADL FileSystem. " + + "Please check fs.defaultFS property.", e); + } + byte[] expectedData = getRandomByteArrayData(totalSize); + + FSDataOutputStream out = fs.create(path, true); + int iteration = totalSize / chunkSize; + int reminderIteration = totalSize % chunkSize; + int offset = 0; + for (int i = 0; i < iteration; ++i) { + out.write(expectedData, offset, chunkSize); + offset += chunkSize; + } + + out.write(expectedData, offset, reminderIteration); + out.close(); + + byte[] actualData = new byte[totalSize]; + FSDataInputStream in = fs.open(path); + in.readFully(0, actualData); + in.close(); + Assert.assertArrayEquals(expectedData, actualData); + Assert.assertTrue(fs.delete(path, true)); + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlFileSystemContractLive.java b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlFileSystemContractLive.java new file mode 100644 index 00000000000..be279360040 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/java/org/apache/hadoop/fs/adl/live/TestAdlFileSystemContractLive.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +package org.apache.hadoop.fs.adl.live; + +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileSystemContractBaseTest; +import org.apache.hadoop.fs.Path; +import org.junit.Test; + +import java.io.IOException; + +/** + * Verify Adls adhere to Hadoop file system semantics. + */ +public class TestAdlFileSystemContractLive extends FileSystemContractBaseTest { + private FileSystem adlStore; + + @Override + protected void setUp() throws Exception { + adlStore = AdlStorageConfiguration.createStorageConnector(); + if (AdlStorageConfiguration.isContractTestEnabled()) { + fs = adlStore; + } + } + + @Override + protected void tearDown() throws Exception { + if (AdlStorageConfiguration.isContractTestEnabled()) { + cleanup(); + adlStore = null; + fs = null; + } + } + + private void cleanup() throws IOException { + adlStore.delete(new Path("/test"), true); + } + + @Override + protected void runTest() throws Throwable { + if (AdlStorageConfiguration.isContractTestEnabled()) { + super.runTest(); + } + } + + public void testGetFileStatus() throws IOException { + if (!AdlStorageConfiguration.isContractTestEnabled()) { + return; + } + + Path testPath = new Path("/test/adltest"); + if (adlStore.exists(testPath)) { + adlStore.delete(testPath, false); + } + + adlStore.create(testPath).close(); + assertTrue(adlStore.delete(testPath, false)); + } + + /** + * The following tests are failing on Azure Data Lake and the Azure Data Lake + * file system code needs to be modified to make them pass. + * A separate work item has been opened for this. + */ + @Test + @Override + public void testMkdirsFailsForSubdirectoryOfExistingFile() throws Exception { + // BUG : Adl should return exception instead of false. + } + + @Test + @Override + public void testMkdirsWithUmask() throws Exception { + // Support under implementation in Adl + } + +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/resources/adls.xml b/hadoop-tools/hadoop-azure-datalake/src/test/resources/adls.xml new file mode 100644 index 00000000000..76a78b0129e --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/resources/adls.xml @@ -0,0 +1,140 @@ + + + + fs.contract.test.root-tests-enabled + true + + + + fs.contract.test.supports-concat + true + + + + fs.contract.rename-returns-false-if-source-missing + true + + + + fs.contract.test.random-seek-count + 10 + + + + fs.contract.is-case-sensitive + true + + + + fs.contract.rename-returns-true-if-dest-exists + false + + + + fs.contract.rename-returns-true-if-source-missing + false + + + + fs.contract.rename-creates-dest-dirs + false + + + + fs.contract.rename-remove-dest-if-empty-dir + false + + + + fs.contract.supports-settimes + true + + + + fs.contract.supports-append + true + + + + fs.contract.supports-atomic-directory-delete + true + + + + fs.contract.supports-atomic-rename + true + + + + fs.contract.supports-block-locality + true + + + + fs.contract.supports-concat + true + + + + fs.contract.supports-seek + true + + + + fs.contract.supports-seek-on-closed-file + true + + + + fs.contract.rejects-seek-past-eof + true + + + + fs.contract.supports-available-on-closed-file + true + + + + fs.contract.supports-strict-exceptions + false + + + + fs.contract.supports-unix-permissions + true + + + + fs.contract.rename-overwrites-dest + false + + + + fs.contract.supports-append + true + + + + fs.azure.enable.append.support + true + + + + fs.contract.supports-getfilestatus + true + + + diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/resources/contract-test-options.xml b/hadoop-tools/hadoop-azure-datalake/src/test/resources/contract-test-options.xml new file mode 100644 index 00000000000..646922ecbb1 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/resources/contract-test-options.xml @@ -0,0 +1,61 @@ + + + + dfs.adls.oauth2.refresh.url + + + + + + dfs.adls.oauth2.credential + + + + + dfs.adls.oauth2.client.id + + + + + dfs.adls.oauth2.access.token.provider.type + ClientCredential + + Supported provider type: + + "ClientCredential" : Client id and client credentials(Provided + through configuration file) flow. + + "RefreshToken" : Client id and refresh token(Provided + through configuration file)flow. + + "Custom" : Custom AAD token management. + + + + + dfs.adl.test.contract.enable + false + + + + test.fs.adl.name + + + + + fs.contract.test.fs + org.apache.hadoop.fs.adl.AdlFileSystem + + diff --git a/hadoop-tools/hadoop-azure-datalake/src/test/resources/log4j.properties b/hadoop-tools/hadoop-azure-datalake/src/test/resources/log4j.properties new file mode 100644 index 00000000000..4cc8f7f3829 --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/test/resources/log4j.properties @@ -0,0 +1,30 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +log4j.rootLogger=DEBUG,stdout +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=%d [%t] %-5p %X{file} %c{1} - %m%n +log4j.logger.your.app=* +log4j.additivity.your.app=false +log4j.logger.yourApp=* +log4j.additivity.yourApp=false +log4j.appender.yourApp=org.apache.log4j.ConsoleAppender +log4j.appender.yourApp.layout=org.apache.log4j.PatternLayout +log4j.appender.yourApp.layout.ConversionPattern=%d [%t] %-5p %X{file} %c{1} %m%n +log4j.appender.yourApp.ImmediateFlush=true \ No newline at end of file diff --git a/hadoop-tools/pom.xml b/hadoop-tools/pom.xml index 16bab03cb5c..86d573a887e 100644 --- a/hadoop-tools/pom.xml +++ b/hadoop-tools/pom.xml @@ -46,6 +46,7 @@ hadoop-sls hadoop-aws hadoop-azure + hadoop-azure-datalake