HADOOP-13037. Refactor Azure Data Lake Store as an independent FileSystem. Contributed by Vishwajeet Dusane

(cherry picked from commit edf149b979)
This commit is contained in:
Chris Douglas 2017-03-08 23:18:28 -08:00
parent ffc00c57d4
commit b4b4ca9199
47 changed files with 4930 additions and 0 deletions

View File

@ -0,0 +1,168 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-project</artifactId>
<version>2.8.1-SNAPSHOT</version>
<relativePath>../../hadoop-project</relativePath>
</parent>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-azure-datalake</artifactId>
<name>Apache Hadoop Azure Data Lake support</name>
<description>
This module contains code to support integration with Azure Data Lake.
</description>
<packaging>jar</packaging>
<properties>
<okHttpVersion>2.4.0</okHttpVersion>
<minimalJsonVersion>0.9.1</minimalJsonVersion>
<file.encoding>UTF-8</file.encoding>
<downloadSources>true</downloadSources>
</properties>
<repositories>
<repository>
<id>snapshots-repo</id>
<url>https://oss.sonatype.org/content/repositories/snapshots</url>
<releases><enabled>false</enabled></releases>
<snapshots><enabled>true</enabled></snapshots>
</repository>
</repositories>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-project-info-reports-plugin</artifactId>
<configuration>
<dependencyDetailsEnabled>false</dependencyDetailsEnabled>
<dependencyLocationsEnabled>false
</dependencyLocationsEnabled>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<executions>
<execution>
<goals>
<goal>test-jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<execution>
<id>deplist</id>
<phase>compile</phase>
<goals>
<goal>list</goal>
</goals>
<configuration>
<!-- build a shellprofile -->
<outputFile>${project.basedir}/target/hadoop-tools-deps/${project.artifactId}.tools-optional.txt</outputFile>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
<!--
The following is to suppress a m2e warning in eclipse
(m2e doesn't know how to handle maven-enforcer:enforce, so we have to tell m2e to ignore it)
see: http://stackoverflow.com/questions/13040788/how-to-elimate-the-maven-enforcer-plugin-goal-enforce-is-ignored-by-m2e-wa
-->
<pluginManagement>
<plugins>
<plugin>
<groupId>org.eclipse.m2e</groupId>
<artifactId>lifecycle-mapping</artifactId>
<version>1.0.0</version>
<configuration>
<lifecycleMappingMetadata>
<pluginExecutions>
<pluginExecution>
<pluginExecutionFilter>
<groupId>org.apache.maven.plugins
</groupId>
<artifactId>maven-enforcer-plugin
</artifactId>
<versionRange>[1.0.0,)</versionRange>
<goals>
<goal>enforce</goal>
</goals>
</pluginExecutionFilter>
<action>
<ignore/>
</action>
</pluginExecution>
</pluginExecutions>
</lifecycleMappingMetadata>
</configuration>
</plugin>
</plugins>
</pluginManagement>
</build>
<dependencies>
<!-- SDK Dependency -->
<dependency>
<groupId>com.microsoft.azure</groupId>
<artifactId>azure-data-lake-store-sdk</artifactId>
<version>2.0.4-SNAPSHOT</version>
</dependency>
<!-- ENDS HERE-->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
</dependency>
<dependency>
<groupId>com.squareup.okhttp</groupId>
<artifactId>okhttp</artifactId>
<version>2.4.0</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.eclipsesource.minimal-json</groupId>
<artifactId>minimal-json</artifactId>
<version>0.9.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<scope>test</scope>
<type>test-jar</type>
</dependency>
<dependency>
<groupId>com.squareup.okhttp</groupId>
<artifactId>mockwebserver</artifactId>
<version>2.4.0</version>
<scope>test</scope>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,56 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.fs.adl;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.DelegateToFileSystem;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
/**
* Expose adl:// scheme to access ADL file system.
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public class Adl extends DelegateToFileSystem {
Adl(URI theUri, Configuration conf) throws IOException, URISyntaxException {
super(theUri, createDataLakeFileSystem(conf), conf, AdlFileSystem.SCHEME,
false);
}
private static AdlFileSystem createDataLakeFileSystem(Configuration conf) {
AdlFileSystem fs = new AdlFileSystem();
fs.setConf(conf);
return fs;
}
/**
* @return Default port for ADL File system to communicate
*/
@Override
public final int getUriDefaultPort() {
return AdlFileSystem.DEFAULT_PORT;
}
}

View File

@ -0,0 +1,92 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.adl;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
/**
* Constants.
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public final class AdlConfKeys {
// OAuth2 Common Configuration
public static final String AZURE_AD_REFRESH_URL_KEY = "dfs.adls.oauth2"
+ ".refresh.url";
// optional when provider type is refresh or client id.
public static final String AZURE_AD_TOKEN_PROVIDER_CLASS_KEY =
"dfs.adls.oauth2.access.token.provider";
public static final String AZURE_AD_CLIENT_ID_KEY =
"dfs.adls.oauth2.client.id";
public static final String AZURE_AD_TOKEN_PROVIDER_TYPE_KEY =
"dfs.adls.oauth2.access.token.provider.type";
// OAuth Refresh Token Configuration
public static final String AZURE_AD_REFRESH_TOKEN_KEY =
"dfs.adls.oauth2.refresh.token";
public static final String TOKEN_PROVIDER_TYPE_REFRESH_TOKEN = "RefreshToken";
// OAuth Client Cred Token Configuration
public static final String AZURE_AD_CLIENT_SECRET_KEY =
"dfs.adls.oauth2.credential";
public static final String TOKEN_PROVIDER_TYPE_CLIENT_CRED =
"ClientCredential";
public static final String READ_AHEAD_BUFFER_SIZE_KEY =
"adl.feature.client.cache.readahead";
public static final String WRITE_BUFFER_SIZE_KEY =
"adl.feature.client.cache.drop.behind.writes";
static final String SECURE_TRANSPORT_SCHEME = "https";
static final String INSECURE_TRANSPORT_SCHEME = "http";
static final String ADL_DEBUG_OVERRIDE_LOCAL_USER_AS_OWNER =
"adl.debug.override.localuserasfileowner";
static final boolean ADL_DEBUG_SET_LOCAL_USER_AS_OWNER_DEFAULT = false;
static final long ADL_BLOCK_SIZE = 256 * 1024 * 1024;
static final int ADL_REPLICATION_FACTOR = 1;
static final String ADL_HADOOP_CLIENT_NAME = "hadoop-azure-datalake-";
static final String ADL_HADOOP_CLIENT_VERSION =
"2.0.0-SNAPSHOT";
static final String ADL_EVENTS_TRACKING_SOURCE = "adl.events.tracking.source";
static final String ADL_EVENTS_TRACKING_CLUSTERNAME =
"adl.events.tracking.clustername";
static final String ADL_EVENTS_TRACKING_CLUSTERTYPE =
"adl.events.tracking.clustertype";
static final int DEFAULT_READ_AHEAD_BUFFER_SIZE = 4 * 1024 * 1024;
static final int DEFAULT_WRITE_AHEAD_BUFFER_SIZE = 4 * 1024 * 1024;
static final String LATENCY_TRACKER_KEY =
"adl.dfs.enable.client.latency.tracker";
static final boolean LATENCY_TRACKER_DEFAULT = true;
static final String ADL_EXPERIMENT_POSITIONAL_READ_KEY =
"adl.feature.experiment.positional.read.enable";
static final boolean ADL_EXPERIMENT_POSITIONAL_READ_DEFAULT = true;
static final String ADL_SUPPORT_ACL_BIT_IN_FSPERMISSION =
"adl.feature.support.acl.bit";
static final boolean ADL_SUPPORT_ACL_BIT_IN_FSPERMISSION_DEFAULT = true;
private AdlConfKeys() {
}
}

View File

@ -0,0 +1,923 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.fs.adl;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.List;
import com.google.common.annotations.VisibleForTesting;
import com.microsoft.azure.datalake.store.ADLStoreClient;
import com.microsoft.azure.datalake.store.ADLStoreOptions;
import com.microsoft.azure.datalake.store.DirectoryEntry;
import com.microsoft.azure.datalake.store.DirectoryEntryType;
import com.microsoft.azure.datalake.store.IfExists;
import com.microsoft.azure.datalake.store.LatencyTracker;
import com.microsoft.azure.datalake.store.oauth2.AccessTokenProvider;
import com.microsoft.azure.datalake.store.oauth2.ClientCredsTokenProvider;
import com.microsoft.azure.datalake.store.oauth2.RefreshTokenBasedTokenProvider;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.ContentSummary.Builder;
import org.apache.hadoop.fs.CreateFlag;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.InvalidPathException;
import org.apache.hadoop.fs.Options;
import org.apache.hadoop.fs.Options.Rename;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.adl.oauth2.AzureADTokenProvider;
import org.apache.hadoop.fs.permission.AclEntry;
import org.apache.hadoop.fs.permission.AclStatus;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.security.AccessControlException;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.VersionInfo;
import static org.apache.hadoop.fs.adl.AdlConfKeys.*;
/**
* A FileSystem to access Azure Data Lake Store.
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public class AdlFileSystem extends FileSystem {
static final String SCHEME = "adl";
static final int DEFAULT_PORT = 443;
private URI uri;
private String userName;
private boolean overrideOwner;
private ADLStoreClient adlClient;
private Path workingDirectory;
private boolean aclBitStatus;
// retained for tests
private AccessTokenProvider tokenProvider;
private AzureADTokenProvider azureTokenProvider;
@Override
public String getScheme() {
return SCHEME;
}
public URI getUri() {
return uri;
}
@Override
public int getDefaultPort() {
return DEFAULT_PORT;
}
@Override
public boolean supportsSymlinks() {
return false;
}
/**
* Called after a new FileSystem instance is constructed.
*
* @param storeUri a uri whose authority section names the host, port, etc.
* for this FileSystem
* @param conf the configuration
*/
@Override
public void initialize(URI storeUri, Configuration conf) throws IOException {
super.initialize(storeUri, conf);
this.setConf(conf);
this.uri = URI
.create(storeUri.getScheme() + "://" + storeUri.getAuthority());
try {
userName = UserGroupInformation.getCurrentUser().getShortUserName();
} catch (IOException e) {
userName = "hadoop";
}
this.setWorkingDirectory(getHomeDirectory());
overrideOwner = getConf().getBoolean(ADL_DEBUG_OVERRIDE_LOCAL_USER_AS_OWNER,
ADL_DEBUG_SET_LOCAL_USER_AS_OWNER_DEFAULT);
aclBitStatus = conf.getBoolean(ADL_SUPPORT_ACL_BIT_IN_FSPERMISSION,
ADL_SUPPORT_ACL_BIT_IN_FSPERMISSION_DEFAULT);
String accountFQDN = null;
String mountPoint = null;
String hostname = storeUri.getHost();
if (!hostname.contains(".") && !hostname.equalsIgnoreCase(
"localhost")) { // this is a symbolic name. Resolve it.
String hostNameProperty = "dfs.adls." + hostname + ".hostname";
String mountPointProperty = "dfs.adls." + hostname + ".mountpoint";
accountFQDN = getNonEmptyVal(conf, hostNameProperty);
mountPoint = getNonEmptyVal(conf, mountPointProperty);
} else {
accountFQDN = hostname;
}
if (storeUri.getPort() > 0) {
accountFQDN = accountFQDN + ":" + storeUri.getPort();
}
adlClient = ADLStoreClient
.createClient(accountFQDN, getAccessTokenProvider(conf));
ADLStoreOptions options = new ADLStoreOptions();
options.enableThrowingRemoteExceptions();
if (getTransportScheme().equalsIgnoreCase(INSECURE_TRANSPORT_SCHEME)) {
options.setInsecureTransport();
}
if (mountPoint != null) {
options.setFilePathPrefix(mountPoint);
}
String clusterName = conf.get(ADL_EVENTS_TRACKING_CLUSTERNAME, "UNKNOWN");
String clusterType = conf.get(ADL_EVENTS_TRACKING_CLUSTERTYPE, "UNKNOWN");
String clientVersion = ADL_HADOOP_CLIENT_NAME + (StringUtils
.isEmpty(VersionInfo.getVersion().trim()) ?
ADL_HADOOP_CLIENT_VERSION.trim() :
VersionInfo.getVersion().trim());
options.setUserAgentSuffix(clientVersion + "/" +
VersionInfo.getVersion().trim() + "/" + clusterName + "/"
+ clusterType);
adlClient.setOptions(options);
boolean trackLatency = conf
.getBoolean(LATENCY_TRACKER_KEY, LATENCY_TRACKER_DEFAULT);
if (!trackLatency) {
LatencyTracker.disable();
}
}
/**
* This method is provided for convenience for derived classes to define
* custom {@link AzureADTokenProvider} instance.
*
* In order to ensure secure hadoop infrastructure and user context for which
* respective {@link AdlFileSystem} instance is initialized,
* Loading {@link AzureADTokenProvider} is not sufficient.
*
* The order of loading {@link AzureADTokenProvider} is to first invoke
* {@link #getCustomAccessTokenProvider(Configuration)}, If method return null
* which means no implementation provided by derived classes, then
* configuration object is loaded to retrieve token configuration as specified
* is documentation.
*
* Custom token management takes the higher precedence during initialization.
*
* @param conf Configuration object
* @return null if the no custom {@link AzureADTokenProvider} token management
* is specified.
* @throws IOException if failed to initialize token provider.
*/
protected synchronized AzureADTokenProvider getCustomAccessTokenProvider(
Configuration conf) throws IOException {
String className = getNonEmptyVal(conf, AZURE_AD_TOKEN_PROVIDER_CLASS_KEY);
Class<? extends AzureADTokenProvider> azureADTokenProviderClass =
conf.getClass(AZURE_AD_TOKEN_PROVIDER_CLASS_KEY, null,
AzureADTokenProvider.class);
if (azureADTokenProviderClass == null) {
throw new IllegalArgumentException(
"Configuration " + className + " " + "not defined/accessible.");
}
azureTokenProvider = ReflectionUtils
.newInstance(azureADTokenProviderClass, conf);
if (azureTokenProvider == null) {
throw new IllegalArgumentException("Failed to initialize " + className);
}
azureTokenProvider.initialize(conf);
return azureTokenProvider;
}
private AccessTokenProvider getAccessTokenProvider(Configuration conf)
throws IOException {
TokenProviderType type = conf.getEnum(
AdlConfKeys.AZURE_AD_TOKEN_PROVIDER_TYPE_KEY, TokenProviderType.Custom);
switch (type) {
case RefreshToken:
tokenProvider = getConfRefreshTokenBasedTokenProvider(conf);
break;
case ClientCredential:
tokenProvider = getConfCredentialBasedTokenProvider(conf);
break;
case Custom:
default:
AzureADTokenProvider azureADTokenProvider = getCustomAccessTokenProvider(
conf);
tokenProvider = new SdkTokenProviderAdapter(azureADTokenProvider);
break;
}
return tokenProvider;
}
private AccessTokenProvider getConfCredentialBasedTokenProvider(
Configuration conf) {
String clientId = getNonEmptyVal(conf, AZURE_AD_CLIENT_ID_KEY);
String refreshUrl = getNonEmptyVal(conf, AZURE_AD_REFRESH_URL_KEY);
String clientSecret = getNonEmptyVal(conf, AZURE_AD_CLIENT_SECRET_KEY);
return new ClientCredsTokenProvider(refreshUrl, clientId, clientSecret);
}
private AccessTokenProvider getConfRefreshTokenBasedTokenProvider(
Configuration conf) {
String clientId = getNonEmptyVal(conf, AZURE_AD_CLIENT_ID_KEY);
String refreshToken = getNonEmptyVal(conf, AZURE_AD_REFRESH_TOKEN_KEY);
return new RefreshTokenBasedTokenProvider(clientId, refreshToken);
}
@VisibleForTesting
AccessTokenProvider getTokenProvider() {
return tokenProvider;
}
@VisibleForTesting
AzureADTokenProvider getAzureTokenProvider() {
return azureTokenProvider;
}
/**
* Constructing home directory locally is fine as long as Hadoop
* local user name and ADL user name relationship story is not fully baked
* yet.
*
* @return Hadoop local user home directory.
*/
@Override
public Path getHomeDirectory() {
return makeQualified(new Path("/user/" + userName));
}
/**
* Create call semantic is handled differently in case of ADL. Create
* semantics is translated to Create/Append
* semantics.
* 1. No dedicated connection to server.
* 2. Buffering is locally done, Once buffer is full or flush is invoked on
* the by the caller. All the pending
* data is pushed to ADL as APPEND operation code.
* 3. On close - Additional call is send to server to close the stream, and
* release lock from the stream.
*
* Necessity of Create/Append semantics is
* 1. ADL backend server does not allow idle connection for longer duration
* . In case of slow writer scenario,
* observed connection timeout/Connection reset causing occasional job
* failures.
* 2. Performance boost to jobs which are slow writer, avoided network latency
* 3. ADL equally better performing with multiple of 4MB chunk as append
* calls.
*
* @param f File path
* @param permission Access permission for the newly created file
* @param overwrite Remove existing file and recreate new one if true
* otherwise throw error if file exist
* @param bufferSize Buffer size, ADL backend does not honour
* @param replication Replication count, ADL backend does not honour
* @param blockSize Block size, ADL backend does not honour
* @param progress Progress indicator
* @return FSDataOutputStream OutputStream on which application can push
* stream of bytes
* @throws IOException when system error, internal server error or user error
*/
@Override
public FSDataOutputStream create(Path f, FsPermission permission,
boolean overwrite, int bufferSize, short replication, long blockSize,
Progressable progress) throws IOException {
statistics.incrementWriteOps(1);
IfExists overwriteRule = overwrite ? IfExists.OVERWRITE : IfExists.FAIL;
return new FSDataOutputStream(new AdlFsOutputStream(adlClient
.createFile(toRelativeFilePath(f), overwriteRule,
Integer.toOctalString(applyUMask(permission).toShort()), true),
getConf()), this.statistics);
}
/**
* Opens an FSDataOutputStream at the indicated Path with write-progress
* reporting. Same as create(), except fails if parent directory doesn't
* already exist.
*
* @param f the file name to open
* @param permission Access permission for the newly created file
* @param flags {@link CreateFlag}s to use for this stream.
* @param bufferSize the size of the buffer to be used. ADL backend does
* not honour
* @param replication required block replication for the file. ADL backend
* does not honour
* @param blockSize Block size, ADL backend does not honour
* @param progress Progress indicator
* @throws IOException when system error, internal server error or user error
* @see #setPermission(Path, FsPermission)
* @deprecated API only for 0.20-append
*/
@Deprecated
@Override
public FSDataOutputStream createNonRecursive(Path f, FsPermission permission,
EnumSet<CreateFlag> flags, int bufferSize, short replication,
long blockSize, Progressable progress) throws IOException {
statistics.incrementWriteOps(1);
IfExists overwriteRule = IfExists.FAIL;
for (CreateFlag flag : flags) {
if (flag == CreateFlag.OVERWRITE) {
overwriteRule = IfExists.OVERWRITE;
break;
}
}
return new FSDataOutputStream(new AdlFsOutputStream(adlClient
.createFile(toRelativeFilePath(f), overwriteRule,
Integer.toOctalString(applyUMask(permission).toShort()), false),
getConf()), this.statistics);
}
/**
* Append to an existing file (optional operation).
*
* @param f the existing file to be appended.
* @param bufferSize the size of the buffer to be used. ADL backend does
* not honour
* @param progress Progress indicator
* @throws IOException when system error, internal server error or user error
*/
@Override
public FSDataOutputStream append(Path f, int bufferSize,
Progressable progress) throws IOException {
statistics.incrementWriteOps(1);
return new FSDataOutputStream(
new AdlFsOutputStream(adlClient.getAppendStream(toRelativeFilePath(f)),
getConf()), this.statistics);
}
/**
* Azure data lake does not support user configuration for data replication
* hence not leaving system to query on
* azure data lake.
*
* Stub implementation
*
* @param p Not honoured
* @param replication Not honoured
* @return True hard coded since ADL file system does not support
* replication configuration
* @throws IOException No exception would not thrown in this case however
* aligning with parent api definition.
*/
@Override
public boolean setReplication(final Path p, final short replication)
throws IOException {
statistics.incrementWriteOps(1);
return true;
}
/**
* Open call semantic is handled differently in case of ADL. Instead of
* network stream is returned to the user,
* Overridden FsInputStream is returned.
*
* @param f File path
* @param buffersize Buffer size, Not honoured
* @return FSDataInputStream InputStream on which application can read
* stream of bytes
* @throws IOException when system error, internal server error or user error
*/
@Override
public FSDataInputStream open(final Path f, final int buffersize)
throws IOException {
statistics.incrementReadOps(1);
return new FSDataInputStream(
new AdlFsInputStream(adlClient.getReadStream(toRelativeFilePath(f)),
statistics, getConf()));
}
/**
* Return a file status object that represents the path.
*
* @param f The path we want information from
* @return a FileStatus object
* @throws IOException when the path does not exist or any other error;
* IOException see specific implementation
*/
@Override
public FileStatus getFileStatus(final Path f) throws IOException {
statistics.incrementReadOps(1);
DirectoryEntry entry = adlClient.getDirectoryEntry(toRelativeFilePath(f));
return toFileStatus(entry, f);
}
/**
* List the statuses of the files/directories in the given path if the path is
* a directory.
*
* @param f given path
* @return the statuses of the files/directories in the given patch
* @throws IOException when the path does not exist or any other error;
* IOException see specific implementation
*/
@Override
public FileStatus[] listStatus(final Path f) throws IOException {
statistics.incrementReadOps(1);
List<DirectoryEntry> entries =
adlClient.enumerateDirectory(toRelativeFilePath(f));
return toFileStatuses(entries, f);
}
/**
* Renames Path src to Path dst. Can take place on local fs
* or remote DFS.
*
* ADLS support POSIX standard for rename operation.
*
* @param src path to be renamed
* @param dst new path after rename
* @return true if rename is successful
* @throws IOException on failure
*/
@Override
public boolean rename(final Path src, final Path dst) throws IOException {
statistics.incrementWriteOps(1);
return adlClient.rename(toRelativeFilePath(src), toRelativeFilePath(dst));
}
@Override
@Deprecated
public void rename(final Path src, final Path dst,
final Options.Rename... options) throws IOException {
statistics.incrementWriteOps(1);
boolean overwrite = false;
for (Rename renameOption : options) {
if (renameOption == Rename.OVERWRITE) {
overwrite = true;
break;
}
}
adlClient
.rename(toRelativeFilePath(src), toRelativeFilePath(dst), overwrite);
}
/**
* Concat existing files together.
*
* @param trg the path to the target destination.
* @param srcs the paths to the sources to use for the concatenation.
* @throws IOException when system error, internal server error or user error
*/
@Override
public void concat(final Path trg, final Path[] srcs) throws IOException {
statistics.incrementWriteOps(1);
List<String> sourcesList = new ArrayList<String>();
for (Path entry : srcs) {
sourcesList.add(toRelativeFilePath(entry));
}
adlClient.concatenateFiles(toRelativeFilePath(trg), sourcesList);
}
/**
* Delete a file.
*
* @param path the path to delete.
* @param recursive if path is a directory and set to
* true, the directory is deleted else throws an exception.
* In case of a file the recursive can be set to either
* true or false.
* @return true if delete is successful else false.
* @throws IOException when system error, internal server error or user error
*/
@Override
public boolean delete(final Path path, final boolean recursive)
throws IOException {
statistics.incrementWriteOps(1);
return recursive ?
adlClient.deleteRecursive(toRelativeFilePath(path)) :
adlClient.delete(toRelativeFilePath(path));
}
/**
* Make the given file and all non-existent parents into
* directories. Has the semantics of Unix 'mkdir -p'.
* Existence of the directory hierarchy is not an error.
*
* @param path path to create
* @param permission to apply to path
*/
@Override
public boolean mkdirs(final Path path, final FsPermission permission)
throws IOException {
statistics.incrementWriteOps(1);
return adlClient.createDirectory(toRelativeFilePath(path),
Integer.toOctalString(applyUMask(permission).toShort()));
}
private FileStatus[] toFileStatuses(final List<DirectoryEntry> entries,
final Path parent) {
FileStatus[] fileStatuses = new FileStatus[entries.size()];
int index = 0;
for (DirectoryEntry entry : entries) {
FileStatus status = toFileStatus(entry, parent);
if (!(entry.name == null || entry.name == "")) {
status.setPath(
new Path(parent.makeQualified(uri, workingDirectory), entry.name));
}
fileStatuses[index++] = status;
}
return fileStatuses;
}
private FsPermission applyUMask(FsPermission permission) {
if (permission == null) {
permission = FsPermission.getDefault();
}
return permission.applyUMask(FsPermission.getUMask(getConf()));
}
private FileStatus toFileStatus(final DirectoryEntry entry, final Path f) {
boolean isDirectory = entry.type == DirectoryEntryType.DIRECTORY;
long lastModificationData = entry.lastModifiedTime.getTime();
long lastAccessTime = entry.lastAccessTime.getTime();
FsPermission permission = new AdlPermission(aclBitStatus,
Short.valueOf(entry.permission, 8));
String user = entry.user;
String group = entry.group;
FileStatus status;
if (overrideOwner) {
status = new FileStatus(entry.length, isDirectory, ADL_REPLICATION_FACTOR,
ADL_BLOCK_SIZE, lastModificationData, lastAccessTime, permission,
userName, "hdfs", this.makeQualified(f));
} else {
status = new FileStatus(entry.length, isDirectory, ADL_REPLICATION_FACTOR,
ADL_BLOCK_SIZE, lastModificationData, lastAccessTime, permission,
user, group, this.makeQualified(f));
}
return status;
}
/**
* Set owner of a path (i.e. a file or a directory).
* The parameters owner and group cannot both be null.
*
* @param path The path
* @param owner If it is null, the original username remains unchanged.
* @param group If it is null, the original groupname remains unchanged.
*/
@Override
public void setOwner(final Path path, final String owner, final String group)
throws IOException {
statistics.incrementWriteOps(1);
adlClient.setOwner(toRelativeFilePath(path), owner, group);
}
/**
* Set permission of a path.
*
* @param path The path
* @param permission Access permission
*/
@Override
public void setPermission(final Path path, final FsPermission permission)
throws IOException {
statistics.incrementWriteOps(1);
adlClient.setPermission(toRelativeFilePath(path),
Integer.toOctalString(permission.toShort()));
}
/**
* Modifies ACL entries of files and directories. This method can add new ACL
* entries or modify the permissions on existing ACL entries. All existing
* ACL entries that are not specified in this call are retained without
* changes. (Modifications are merged into the current ACL.)
*
* @param path Path to modify
* @param aclSpec List of AclEntry describing modifications
* @throws IOException if an ACL could not be modified
*/
@Override
public void modifyAclEntries(final Path path, final List<AclEntry> aclSpec)
throws IOException {
statistics.incrementWriteOps(1);
List<com.microsoft.azure.datalake.store.acl.AclEntry> msAclEntries = new
ArrayList<com.microsoft.azure.datalake.store.acl.AclEntry>();
for (AclEntry aclEntry : aclSpec) {
msAclEntries.add(com.microsoft.azure.datalake.store.acl.AclEntry
.parseAclEntry(aclEntry.toString()));
}
adlClient.modifyAclEntries(toRelativeFilePath(path), msAclEntries);
}
/**
* Removes ACL entries from files and directories. Other ACL entries are
* retained.
*
* @param path Path to modify
* @param aclSpec List of AclEntry describing entries to remove
* @throws IOException if an ACL could not be modified
*/
@Override
public void removeAclEntries(final Path path, final List<AclEntry> aclSpec)
throws IOException {
statistics.incrementWriteOps(1);
List<com.microsoft.azure.datalake.store.acl.AclEntry> msAclEntries = new
ArrayList<com.microsoft.azure.datalake.store.acl.AclEntry>();
for (AclEntry aclEntry : aclSpec) {
msAclEntries.add(com.microsoft.azure.datalake.store.acl.AclEntry
.parseAclEntry(aclEntry.toString(), true));
}
adlClient.removeAclEntries(toRelativeFilePath(path), msAclEntries);
}
/**
* Removes all default ACL entries from files and directories.
*
* @param path Path to modify
* @throws IOException if an ACL could not be modified
*/
@Override
public void removeDefaultAcl(final Path path) throws IOException {
statistics.incrementWriteOps(1);
adlClient.removeDefaultAcls(toRelativeFilePath(path));
}
/**
* Removes all but the base ACL entries of files and directories. The entries
* for user, group, and others are retained for compatibility with permission
* bits.
*
* @param path Path to modify
* @throws IOException if an ACL could not be removed
*/
@Override
public void removeAcl(final Path path) throws IOException {
statistics.incrementWriteOps(1);
adlClient.removeAllAcls(toRelativeFilePath(path));
}
/**
* Fully replaces ACL of files and directories, discarding all existing
* entries.
*
* @param path Path to modify
* @param aclSpec List of AclEntry describing modifications, must include
* entries for user, group, and others for compatibility with
* permission bits.
* @throws IOException if an ACL could not be modified
*/
@Override
public void setAcl(final Path path, final List<AclEntry> aclSpec)
throws IOException {
statistics.incrementWriteOps(1);
List<com.microsoft.azure.datalake.store.acl.AclEntry> msAclEntries = new
ArrayList<com.microsoft.azure.datalake.store.acl.AclEntry>();
for (AclEntry aclEntry : aclSpec) {
msAclEntries.add(com.microsoft.azure.datalake.store.acl.AclEntry
.parseAclEntry(aclEntry.toString()));
}
adlClient.setAcl(toRelativeFilePath(path), msAclEntries);
}
/**
* Gets the ACL of a file or directory.
*
* @param path Path to get
* @return AclStatus describing the ACL of the file or directory
* @throws IOException if an ACL could not be read
*/
@Override
public AclStatus getAclStatus(final Path path) throws IOException {
statistics.incrementReadOps(1);
com.microsoft.azure.datalake.store.acl.AclStatus adlStatus = adlClient
.getAclStatus(toRelativeFilePath(path));
AclStatus.Builder aclStatusBuilder = new AclStatus.Builder();
aclStatusBuilder.owner(adlStatus.owner);
aclStatusBuilder.group(adlStatus.group);
aclStatusBuilder.setPermission(
new FsPermission(Short.valueOf(adlStatus.octalPermissions, 8)));
aclStatusBuilder.stickyBit(adlStatus.stickyBit);
String aclListString = com.microsoft.azure.datalake.store.acl.AclEntry
.aclListToString(adlStatus.aclSpec);
List<AclEntry> aclEntries = AclEntry.parseAclSpec(aclListString, true);
aclStatusBuilder.addEntries(aclEntries);
return aclStatusBuilder.build();
}
/**
* Checks if the user can access a path. The mode specifies which access
* checks to perform. If the requested permissions are granted, then the
* method returns normally. If access is denied, then the method throws an
* {@link AccessControlException}.
*
* @param path Path to check
* @param mode type of access to check
* @throws AccessControlException if access is denied
* @throws java.io.FileNotFoundException if the path does not exist
* @throws IOException see specific implementation
*/
@Override
public void access(final Path path, FsAction mode) throws IOException {
statistics.incrementReadOps(1);
if (!adlClient.checkAccess(toRelativeFilePath(path), mode.SYMBOL)) {
throw new AccessControlException("Access Denied : " + path.toString());
}
}
/**
* Return the {@link ContentSummary} of a given {@link Path}.
*
* @param f path to use
*/
@Override
public ContentSummary getContentSummary(Path f) throws IOException {
statistics.incrementReadOps(1);
com.microsoft.azure.datalake.store.ContentSummary msSummary = adlClient
.getContentSummary(toRelativeFilePath(f));
return new Builder().length(msSummary.length)
.directoryCount(msSummary.directoryCount).fileCount(msSummary.fileCount)
.spaceConsumed(msSummary.spaceConsumed).build();
}
@VisibleForTesting
protected String getTransportScheme() {
return SECURE_TRANSPORT_SCHEME;
}
@VisibleForTesting
String toRelativeFilePath(Path path) {
return path.makeQualified(uri, workingDirectory).toUri().getPath();
}
/**
* Get the current working directory for the given file system.
*
* @return the directory pathname
*/
@Override
public Path getWorkingDirectory() {
return workingDirectory;
}
/**
* Set the current working directory for the given file system. All relative
* paths will be resolved relative to it.
*
* @param dir Working directory path.
*/
@Override
public void setWorkingDirectory(final Path dir) {
if (dir == null) {
throw new InvalidPathException("Working directory cannot be set to NULL");
}
/**
* Do not validate the scheme and URI of the passsed parameter. When Adls
* runs as additional file system, working directory set has the default
* file system scheme and uri.
*
* Found a problem during PIG execution in
* https://github.com/apache/pig/blob/branch-0
* .15/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer
* /PigInputFormat.java#L235
* However similar problem would be present in other application so
* defaulting to build working directory using relative path only.
*/
this.workingDirectory = this.makeAbsolute(dir);
}
/**
* Return the number of bytes that large input files should be optimally
* be split into to minimize i/o time.
*
* @deprecated use {@link #getDefaultBlockSize(Path)} instead
*/
@Deprecated
public long getDefaultBlockSize() {
return ADL_BLOCK_SIZE;
}
/**
* Return the number of bytes that large input files should be optimally
* be split into to minimize i/o time. The given path will be used to
* locate the actual filesystem. The full path does not have to exist.
*
* @param f path of file
* @return the default block size for the path's filesystem
*/
public long getDefaultBlockSize(Path f) {
return getDefaultBlockSize();
}
/**
* Get the block size.
* @param f the filename
* @return the number of bytes in a block
*/
/**
* @deprecated Use getFileStatus() instead
*/
@Deprecated
public long getBlockSize(Path f) throws IOException {
return ADL_BLOCK_SIZE;
}
@Override
public BlockLocation[] getFileBlockLocations(final FileStatus status,
final long offset, final long length) throws IOException {
if (status == null) {
return null;
}
if ((offset < 0) || (length < 0)) {
throw new IllegalArgumentException("Invalid start or len parameter");
}
if (status.getLen() < offset) {
return new BlockLocation[0];
}
final String[] name = {"localhost"};
final String[] host = {"localhost"};
long blockSize = ADL_BLOCK_SIZE;
int numberOfLocations =
(int) (length / blockSize) + ((length % blockSize == 0) ? 0 : 1);
BlockLocation[] locations = new BlockLocation[numberOfLocations];
for (int i = 0; i < locations.length; i++) {
long currentOffset = offset + (i * blockSize);
long currentLength = Math.min(blockSize, offset + length - currentOffset);
locations[i] = new BlockLocation(name, host, currentOffset,
currentLength);
}
return locations;
}
@Override
public BlockLocation[] getFileBlockLocations(final Path p, final long offset,
final long length) throws IOException {
// read ops incremented in getFileStatus
FileStatus fileStatus = getFileStatus(p);
return getFileBlockLocations(fileStatus, offset, length);
}
/**
* Get replication.
*
* @param src file name
* @return file replication
* @deprecated Use getFileStatus() instead
*/
@Deprecated
public short getReplication(Path src) {
return ADL_REPLICATION_FACTOR;
}
private Path makeAbsolute(Path path) {
return path.isAbsolute() ? path : new Path(this.workingDirectory, path);
}
private static String getNonEmptyVal(Configuration conf, String key) {
String value = conf.get(key);
if (StringUtils.isEmpty(value)) {
throw new IllegalArgumentException(
"No value for " + key + " found in conf file.");
}
return value;
}
}

View File

@ -0,0 +1,149 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.adl;
import com.microsoft.azure.datalake.store.ADLFileInputStream;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSInputStream;
import org.apache.hadoop.fs.FileSystem.Statistics;
import java.io.IOException;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.ADL_EXPERIMENT_POSITIONAL_READ_DEFAULT;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.ADL_EXPERIMENT_POSITIONAL_READ_KEY;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.DEFAULT_READ_AHEAD_BUFFER_SIZE;
import static org.apache.hadoop.fs.adl.AdlConfKeys.READ_AHEAD_BUFFER_SIZE_KEY;
/**
* Wraps {@link ADLFileInputStream} implementation.
*/
@InterfaceAudience.Private
@InterfaceStability.Evolving
public final class AdlFsInputStream extends FSInputStream {
private final ADLFileInputStream in;
private final Statistics stat;
private final boolean enablePositionalReadExperiment;
public AdlFsInputStream(ADLFileInputStream inputStream, Statistics statistics,
Configuration conf) throws IOException {
this.in = inputStream;
this.in.setBufferSize(conf.getInt(READ_AHEAD_BUFFER_SIZE_KEY,
DEFAULT_READ_AHEAD_BUFFER_SIZE));
enablePositionalReadExperiment = conf
.getBoolean(ADL_EXPERIMENT_POSITIONAL_READ_KEY,
ADL_EXPERIMENT_POSITIONAL_READ_DEFAULT);
stat = statistics;
}
@Override
public synchronized void seek(long pos) throws IOException {
in.seek(pos);
}
/**
* Return the current offset from the start of the file.
*/
@Override
public synchronized long getPos() throws IOException {
return in.getPos();
}
@Override
public boolean seekToNewSource(long l) throws IOException {
return false;
}
@Override
public synchronized int read() throws IOException {
int ch = in.read();
if (stat != null && ch != -1) {
stat.incrementBytesRead(1);
}
return ch;
}
@Override
public int read(long position, byte[] buffer, int offset, int length)
throws IOException {
int numberOfByteRead = 0;
if (enablePositionalReadExperiment) {
numberOfByteRead = in.read(position, buffer, offset, length);
} else {
numberOfByteRead = super.read(position, buffer, offset, length);
}
if (stat != null && numberOfByteRead > 0) {
stat.incrementBytesRead(numberOfByteRead);
}
return numberOfByteRead;
}
@Override
public synchronized int read(byte[] buffer, int offset, int length)
throws IOException {
int numberOfByteRead = in.read(buffer, offset, length);
if (stat != null && numberOfByteRead > 0) {
stat.incrementBytesRead(numberOfByteRead);
}
return numberOfByteRead;
}
/**
* This method returns the remaining bytes in the stream, rather than the
* expected Java
* interpretation of {@link java.io.InputStream#available()}, which expects
* the
* number of remaining
* bytes in the local buffer. Moreover, it caps the value returned to a
* maximum of Integer.MAX_VALUE.
* These changed behaviors are to ensure compatibility with the
* expectations of HBase WAL reader,
* which depends on available() returning the number of bytes in stream.
*
* Given all other FileSystems in the hadoop ecosystem (especially HDFS) do
* this, it is possible other
* apps other than HBase would also pick up expectation of this behavior
* based on HDFS implementation.
* Therefore keeping this quirky behavior here, to ensure compatibility.
*
* @return remaining bytes in the stream, with maximum of Integer.MAX_VALUE.
* @throws IOException If fails to get the position or file length from SDK.
*/
@Override
public synchronized int available() throws IOException {
return (int) Math.min(in.length() - in.getPos(), Integer.MAX_VALUE);
}
@Override
public synchronized void close() throws IOException {
in.close();
}
@Override
public synchronized long skip(long pos) throws IOException {
return in.skip(pos);
}
}

View File

@ -0,0 +1,82 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.adl;
import com.microsoft.azure.datalake.store.ADLFileOutputStream;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Syncable;
import java.io.IOException;
import java.io.OutputStream;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.DEFAULT_WRITE_AHEAD_BUFFER_SIZE;
import static org.apache.hadoop.fs.adl.AdlConfKeys.WRITE_BUFFER_SIZE_KEY;
/**
* Wraps {@link com.microsoft.azure.datalake.store.ADLFileOutputStream}
* implementation.
*
* Flush semantics.
* no-op, since some parts of hadoop ecosystem call flush(), expecting it to
* have no perf impact. In hadoop filesystems, flush() itself guarantees no
* durability: that is achieved by calling hflush() or hsync()
*/
@InterfaceAudience.Private
@InterfaceStability.Evolving
public final class AdlFsOutputStream extends OutputStream implements Syncable {
private final ADLFileOutputStream out;
public AdlFsOutputStream(ADLFileOutputStream out, Configuration configuration)
throws IOException {
this.out = out;
out.setBufferSize(configuration
.getInt(WRITE_BUFFER_SIZE_KEY, DEFAULT_WRITE_AHEAD_BUFFER_SIZE));
}
@Override
public synchronized void write(int b) throws IOException {
out.write(b);
}
@Override
public synchronized void write(byte[] b, int off, int len)
throws IOException {
out.write(b, off, len);
}
@Override
public synchronized void close() throws IOException {
out.close();
}
public synchronized void sync() throws IOException {
out.flush();
}
public synchronized void hflush() throws IOException {
out.flush();
}
public synchronized void hsync() throws IOException {
out.flush();
}
}

View File

@ -0,0 +1,69 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.fs.adl;
import org.apache.hadoop.fs.permission.FsPermission;
/**
* Hadoop shell command -getfacl does not invoke getAclStatus if FsPermission
* from getFileStatus has not set ACL bit to true. By default getAclBit returns
* false.
*
* Provision to make additional call to invoke getAclStatus would be redundant
* when adls is running as additional FS. To avoid this redundancy, provided
* configuration to return true/false on getAclBit.
*/
class AdlPermission extends FsPermission {
private final boolean aclBit;
AdlPermission(boolean aclBitStatus, Short aShort) {
super(aShort);
this.aclBit = aclBitStatus;
}
/**
* Returns true if "adl.feature.support.acl.bit" configuration is set to
* true.
*
* If configuration is not set then default value is true.
*
* @return If configuration is not set then default value is true.
*/
public boolean getAclBit() {
return aclBit;
}
@Override
public boolean equals(Object obj) {
if (obj instanceof FsPermission) {
FsPermission that = (FsPermission) obj;
return this.getUserAction() == that.getUserAction()
&& this.getGroupAction() == that.getGroupAction()
&& this.getOtherAction() == that.getOtherAction()
&& this.getStickyBit() == that.getStickyBit();
}
return false;
}
@Override
public int hashCode() {
return toShort();
}
}

View File

@ -0,0 +1,41 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.adl;
import com.microsoft.azure.datalake.store.oauth2.AccessTokenProvider;
import com.microsoft.azure.datalake.store.oauth2.AzureADToken;
import org.apache.hadoop.fs.adl.oauth2.AzureADTokenProvider;
import java.io.IOException;
final class SdkTokenProviderAdapter extends AccessTokenProvider {
private AzureADTokenProvider tokenProvider;
SdkTokenProviderAdapter(AzureADTokenProvider tp) {
this.tokenProvider = tp;
}
protected AzureADToken refreshToken() throws IOException {
AzureADToken azureADToken = new AzureADToken();
azureADToken.accessToken = tokenProvider.getAccessToken();
azureADToken.expiry = tokenProvider.getExpiryTime();
return azureADToken;
}
}

View File

@ -0,0 +1,25 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.adl;
enum TokenProviderType {
RefreshToken,
ClientCredential,
Custom
}

View File

@ -0,0 +1,70 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.adl.oauth2;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import java.io.IOException;
import java.util.Date;
/**
* Provide an Azure Active Directory supported
* OAuth2 access token to be used to authenticate REST calls against Azure data
* lake file system {@link org.apache.hadoop.fs.adl.AdlFileSystem}.
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public abstract class AzureADTokenProvider {
/**
* Initialize with supported configuration. This method is invoked when the
* {@link org.apache.hadoop.fs.adl.AdlFileSystem#initialize
* (URI, Configuration)} method is invoked.
*
* @param configuration Configuration object
* @throws IOException if instance can not be configured.
*/
public abstract void initialize(Configuration configuration)
throws IOException;
/**
* Obtain the access token that should be added to https connection's header.
* Will be called depending upon {@link #getExpiryTime()} expiry time is set,
* so implementations should be performant. Implementations are responsible
* for any refreshing of the token.
*
* @return String containing the access token
* @throws IOException if there is an error fetching the token
*/
public abstract String getAccessToken() throws IOException;
/**
* Obtain expiry time of the token. If implementation is performant enough to
* maintain expiry and expect {@link #getAccessToken()} call for every
* connection then safe to return current or past time.
*
* However recommended to use the token expiry time received from Azure Active
* Directory.
*
* @return Date to expire access token retrieved from AAD.
*/
public abstract Date getExpiryTime();
}

View File

@ -0,0 +1,23 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
/**
* public interface to expose OAuth2 authentication related features.
*/
package org.apache.hadoop.fs.adl.oauth2;

View File

@ -0,0 +1,23 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
/**
* Supporting classes for metrics instrumentation.
*/
package org.apache.hadoop.fs.adl;

View File

@ -0,0 +1,16 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
org.apache.hadoop.fs.adl.AdlFileSystem

View File

@ -0,0 +1,193 @@
<!---
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
# Hadoop Azure Data Lake Support
* [Introduction](#Introduction)
* [Features](#Features)
* [Limitations](#Limitations)
* [Usage](#Usage)
* [Concepts](#Concepts)
* [OAuth2 Support](#OAuth2_Support)
* [Configuring Credentials & FileSystem](#Configuring_Credentials)
* [Using Refresh Token](#Refresh_Token)
* [Using Client Keys](#Client_Credential_Token)
* [Enabling ADL Filesystem](#Enabling_ADL)
* [Accessing adl URLs](#Accessing_adl_URLs)
* [Testing the hadoop-azure Module](#Testing_the_hadoop-azure_Module)
## <a name="Introduction" />Introduction
The hadoop-azure-datalake module provides support for integration with
[Azure Data Lake Store]( https://azure.microsoft.com/en-in/documentation/services/data-lake-store/).
The jar file is named azure-datalake-store.jar.
## <a name="Features" />Features
* Read and write data stored in an Azure Data Lake Storage account.
* Reference file system paths using URLs using the `adl` scheme for Secure Webhdfs i.e. SSL
encrypted access.
* Can act as a source of data in a MapReduce job, or a sink.
* Tested on both Linux and Windows.
* Tested for scale.
## <a name="Limitations" />Limitations
Partial or no support for the following operations :
* Operation on Symbolic Link
* Proxy Users
* File Truncate
* File Checksum
* File replication factor
* Home directory the active user on Hadoop cluster.
* Extended Attributes(XAttrs) Operations
* Snapshot Operations
* Delegation Token Operations
* User and group information returned as ListStatus and GetFileStatus is in form of GUID associated in Azure Active Directory.
## <a name="Usage" />Usage
### <a name="Concepts" />Concepts
Azure Data Lake Storage access path syntax is
adl://<Account Name>.azuredatalakestore.net/
Get started with azure data lake account with [https://azure.microsoft.com/en-in/documentation/articles/data-lake-store-get-started-portal/](https://azure.microsoft.com/en-in/documentation/articles/data-lake-store-get-started-portal/)
#### <a name="#OAuth2_Support" />OAuth2 Support
Usage of Azure Data Lake Storage requires OAuth2 bearer token to be present as part of the HTTPS header as per OAuth2 specification. Valid OAuth2 bearer token should be obtained from Azure Active Directory for valid users who have access to Azure Data Lake Storage Account.
Azure Active Directory (Azure AD) is Microsoft's multi-tenant cloud based directory and identity management service. See [https://azure.microsoft.com/en-in/documentation/articles/active-directory-whatis/](https://azure.microsoft.com/en-in/documentation/articles/active-directory-whatis/)
Following sections describes on OAuth2 configuration in core-site.xml.
## <a name="Configuring_Credentials" />Configuring Credentials & FileSystem
Credentials can be configured using either a refresh token (associated with a user) or a client credential (analogous to a service principal).
### <a name="Refresh_Token" />Using Refresh Token
Add the following properties to your core-site.xml
<property>
<name>dfs.adls.oauth2.access.token.provider.type</name>
<value>RefreshToken</value>
</property>
Application require to set Client id and OAuth2 refresh token from Azure Active Directory associated with client id. See [https://github.com/AzureAD/azure-activedirectory-library-for-java](https://github.com/AzureAD/azure-activedirectory-library-for-java).
**Do not share client id and refresh token, it must be kept secret.**
<property>
<name>dfs.adls.oauth2.client.id</name>
<value></value>
</property>
<property>
<name>dfs.adls.oauth2.refresh.token</name>
<value></value>
</property>
### <a name="Client_Credential_Token" />Using Client Keys
#### Generating the Service Principal
1. Go to the portal (https://portal.azure.com)
2. Under "Browse", look for Active Directory and click on it.
3. Create "Web Application". Remember the name you create here - that is what you will add to your ADL account as authorized user.
4. Go through the wizard
5. Once app is created, Go to app configuration, and find the section on "keys"
6. Select a key duration and hit save. Save the generated keys.
7. Note down the properties you will need to auth:
- The client ID
- The key you just generated above
- The token endpoint (select "View endpoints" at the bottom of the page and copy/paste the OAuth2 .0 Token Endpoint value)
- Resource: Always https://management.core.windows.net/ , for all customers
#### Adding the service principal to your ADL Account
1. Go to the portal again, and open your ADL account
2. Select Users under Settings
3. Add your user name you created in Step 6 above (note that it does not show up in the list, but will be found if you searched for the name)
4. Add "Owner" role
#### Configure core-site.xml
Add the following properties to your core-site.xml
<property>
<name>dfs.adls.oauth2.refresh.url</name>
<value>TOKEN ENDPOINT FROM STEP 7 ABOVE</value>
</property>
<property>
<name>dfs.adls.oauth2.client.id</name>
<value>CLIENT ID FROM STEP 7 ABOVE</value>
</property>
<property>
<name>dfs.adls.oauth2.credential</name>
<value>PASSWORD FROM STEP 7 ABOVE</value>
</property>
## <a name="Enabling_ADL" />Enabling ADL Filesystem
For ADL FileSystem to take effect. Update core-site.xml with
<property>
<name>fs.adl.impl</name>
<value>org.apache.hadoop.fs.adl.AdlFileSystem</value>
</property>
<property>
<name>fs.AbstractFileSystem.adl.impl</name>
<value>org.apache.hadoop.fs.adl.Adl</value>
</property>
### <a name="Accessing_adl_URLs" />Accessing adl URLs
After credentials are configured in core-site.xml, any Hadoop component may
reference files in that Azure Data Lake Storage account by using URLs of the following
format:
adl://<Account Name>.azuredatalakestore.net/<path>
The schemes `adl` identify a URL on a file system backed by Azure
Data Lake Storage. `adl` utilizes encrypted HTTPS access for all interaction with
the Azure Data Lake Storage API.
For example, the following
[FileSystem Shell](../hadoop-project-dist/hadoop-common/FileSystemShell.html)
commands demonstrate access to a storage account named `youraccount`.
> hadoop fs -mkdir adl://yourcontainer.azuredatalakestore.net/testDir
> hadoop fs -put testFile adl://yourcontainer.azuredatalakestore.net/testDir/testFile
> hadoop fs -cat adl://yourcontainer.azuredatalakestore.net/testDir/testFile
test file content
## <a name="Testing_the_hadoop-azure_Module" />Testing the azure-datalake-store Module
The hadoop-azure module includes a full suite of unit tests. Most of the tests will run without additional configuration by running mvn test. This includes tests against mocked storage, which is an in-memory emulation of Azure Data Lake Storage.
A selection of tests can run against the Azure Data Lake Storage. To run tests against Adl storage. Please configure contract-test-options.xml with Adl account information mentioned in the above sections. Also turn on contract test execution flag to trigger tests against Azure Data Lake Storage.
<property>
<name>dfs.adl.test.contract.enable</name>
<value>true</value>
</property>
<property>
<name>test.fs.adl.name</name>
<value>adl://yourcontainer.azuredatalakestore.net</value>
</property>

View File

@ -0,0 +1,99 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.adl;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.adl.common.CustomMockTokenProvider;
import org.apache.hadoop.fs.adl.oauth2.AzureADTokenProvider;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.AZURE_AD_TOKEN_PROVIDER_CLASS_KEY;
import com.squareup.okhttp.mockwebserver.MockWebServer;
import org.junit.After;
import org.junit.Before;
/**
* Mock server to simulate Adls backend calls. This infrastructure is expandable
* to override expected server response based on the derived test functionality.
* Common functionality to generate token information before request is send to
* adls backend is also managed within AdlMockWebServer implementation using
* {@link org.apache.hadoop.fs.adl.common.CustomMockTokenProvider}.
*/
public class AdlMockWebServer {
// Create a MockWebServer. These are lean enough that you can create a new
// instance for every unit test.
private MockWebServer server = null;
private TestableAdlFileSystem fs = null;
private int port = 0;
private Configuration conf = new Configuration();
public MockWebServer getMockServer() {
return server;
}
public TestableAdlFileSystem getMockAdlFileSystem() {
return fs;
}
public int getPort() {
return port;
}
public Configuration getConf() {
return conf;
}
public void setConf(Configuration conf) {
this.conf = conf;
}
@Before
public void preTestSetup() throws IOException, URISyntaxException {
server = new MockWebServer();
// Start the server.
server.start();
// Ask the server for its URL. You'll need this to make HTTP requests.
URL baseUrl = server.getUrl("");
port = baseUrl.getPort();
// Exercise your application code, which should make those HTTP requests.
// Responses are returned in the same order that they are enqueued.
fs = new TestableAdlFileSystem();
conf.setClass(AZURE_AD_TOKEN_PROVIDER_CLASS_KEY,
CustomMockTokenProvider.class, AzureADTokenProvider.class);
URI uri = new URI("adl://localhost:" + port);
fs.initialize(uri, conf);
}
@After
public void postTestSetup() throws IOException {
fs.close();
server.shutdown();
}
}

View File

@ -0,0 +1,262 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.fs.adl;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.AclEntry;
import org.apache.hadoop.fs.permission.AclEntryScope;
import org.apache.hadoop.fs.permission.AclEntryType;
import org.apache.hadoop.fs.permission.AclStatus;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.security.AccessControlException;
import com.squareup.okhttp.mockwebserver.MockResponse;
import org.junit.Assert;
import org.junit.Test;
/**
* Stub adl server and test acl data conversion within SDK and Hadoop adl
* client.
*/
public class TestACLFeatures extends AdlMockWebServer {
@Test(expected=AccessControlException.class)
public void testModifyAclEntries() throws URISyntaxException, IOException {
getMockServer().enqueue(new MockResponse().setResponseCode(200));
List<AclEntry> entries = new ArrayList<AclEntry>();
AclEntry.Builder aclEntryBuilder = new AclEntry.Builder();
aclEntryBuilder.setName("hadoop");
aclEntryBuilder.setType(AclEntryType.USER);
aclEntryBuilder.setPermission(FsAction.ALL);
aclEntryBuilder.setScope(AclEntryScope.ACCESS);
entries.add(aclEntryBuilder.build());
aclEntryBuilder.setName("hdfs");
aclEntryBuilder.setType(AclEntryType.GROUP);
aclEntryBuilder.setPermission(FsAction.READ_WRITE);
aclEntryBuilder.setScope(AclEntryScope.DEFAULT);
entries.add(aclEntryBuilder.build());
getMockAdlFileSystem().modifyAclEntries(new Path("/test1/test2"), entries);
getMockServer().enqueue(new MockResponse().setResponseCode(403)
.setBody(TestADLResponseData.getAccessControlException()));
getMockAdlFileSystem()
.modifyAclEntries(new Path("/test1/test2"), entries);
}
@Test(expected=AccessControlException.class)
public void testRemoveAclEntriesWithOnlyUsers()
throws URISyntaxException, IOException {
getMockServer().enqueue(new MockResponse().setResponseCode(200));
List<AclEntry> entries = new ArrayList<AclEntry>();
AclEntry.Builder aclEntryBuilder = new AclEntry.Builder();
aclEntryBuilder.setName("hadoop");
aclEntryBuilder.setType(AclEntryType.USER);
entries.add(aclEntryBuilder.build());
getMockAdlFileSystem().removeAclEntries(new Path("/test1/test2"), entries);
getMockServer().enqueue(new MockResponse().setResponseCode(403)
.setBody(TestADLResponseData.getAccessControlException()));
getMockAdlFileSystem()
.removeAclEntries(new Path("/test1/test2"), entries);
}
@Test(expected=AccessControlException.class)
public void testRemoveAclEntries() throws URISyntaxException, IOException {
getMockServer().enqueue(new MockResponse().setResponseCode(200));
List<AclEntry> entries = new ArrayList<AclEntry>();
AclEntry.Builder aclEntryBuilder = new AclEntry.Builder();
aclEntryBuilder.setName("hadoop");
aclEntryBuilder.setType(AclEntryType.USER);
aclEntryBuilder.setPermission(FsAction.ALL);
aclEntryBuilder.setScope(AclEntryScope.ACCESS);
entries.add(aclEntryBuilder.build());
aclEntryBuilder.setName("hdfs");
aclEntryBuilder.setType(AclEntryType.GROUP);
aclEntryBuilder.setPermission(FsAction.READ_WRITE);
aclEntryBuilder.setScope(AclEntryScope.DEFAULT);
entries.add(aclEntryBuilder.build());
getMockAdlFileSystem().removeAclEntries(new Path("/test1/test2"), entries);
getMockServer().enqueue(new MockResponse().setResponseCode(403)
.setBody(TestADLResponseData.getAccessControlException()));
getMockAdlFileSystem()
.removeAclEntries(new Path("/test1/test2"), entries);
}
@Test(expected=AccessControlException.class)
public void testRemoveDefaultAclEntries()
throws URISyntaxException, IOException {
getMockServer().enqueue(new MockResponse().setResponseCode(200));
getMockAdlFileSystem().removeDefaultAcl(new Path("/test1/test2"));
getMockServer().enqueue(new MockResponse().setResponseCode(403)
.setBody(TestADLResponseData.getAccessControlException()));
getMockAdlFileSystem().removeDefaultAcl(new Path("/test1/test2"));
}
@Test(expected=AccessControlException.class)
public void testRemoveAcl() throws URISyntaxException, IOException {
getMockServer().enqueue(new MockResponse().setResponseCode(200));
getMockAdlFileSystem().removeAcl(new Path("/test1/test2"));
getMockServer().enqueue(new MockResponse().setResponseCode(403)
.setBody(TestADLResponseData.getAccessControlException()));
getMockAdlFileSystem().removeAcl(new Path("/test1/test2"));
}
@Test(expected=AccessControlException.class)
public void testSetAcl() throws URISyntaxException, IOException {
getMockServer().enqueue(new MockResponse().setResponseCode(200));
List<AclEntry> entries = new ArrayList<AclEntry>();
AclEntry.Builder aclEntryBuilder = new AclEntry.Builder();
aclEntryBuilder.setName("hadoop");
aclEntryBuilder.setType(AclEntryType.USER);
aclEntryBuilder.setPermission(FsAction.ALL);
aclEntryBuilder.setScope(AclEntryScope.ACCESS);
entries.add(aclEntryBuilder.build());
aclEntryBuilder.setName("hdfs");
aclEntryBuilder.setType(AclEntryType.GROUP);
aclEntryBuilder.setPermission(FsAction.READ_WRITE);
aclEntryBuilder.setScope(AclEntryScope.DEFAULT);
entries.add(aclEntryBuilder.build());
getMockAdlFileSystem().setAcl(new Path("/test1/test2"), entries);
getMockServer().enqueue(new MockResponse().setResponseCode(403)
.setBody(TestADLResponseData.getAccessControlException()));
getMockAdlFileSystem().setAcl(new Path("/test1/test2"), entries);
}
@Test(expected=AccessControlException.class)
public void testCheckAccess() throws URISyntaxException, IOException {
getMockServer().enqueue(new MockResponse().setResponseCode(200));
getMockAdlFileSystem().access(new Path("/test1/test2"), FsAction.ALL);
getMockServer().enqueue(new MockResponse().setResponseCode(200));
getMockAdlFileSystem().access(new Path("/test1/test2"), FsAction.EXECUTE);
getMockServer().enqueue(new MockResponse().setResponseCode(200));
getMockAdlFileSystem().access(new Path("/test1/test2"), FsAction.READ);
getMockServer().enqueue(new MockResponse().setResponseCode(200));
getMockAdlFileSystem()
.access(new Path("/test1/test2"), FsAction.READ_EXECUTE);
getMockServer().enqueue(new MockResponse().setResponseCode(200));
getMockAdlFileSystem()
.access(new Path("/test1/test2"), FsAction.READ_WRITE);
getMockServer().enqueue(new MockResponse().setResponseCode(200));
getMockAdlFileSystem().access(new Path("/test1/test2"), FsAction.NONE);
getMockServer().enqueue(new MockResponse().setResponseCode(200));
getMockAdlFileSystem().access(new Path("/test1/test2"), FsAction.WRITE);
getMockServer().enqueue(new MockResponse().setResponseCode(200));
getMockAdlFileSystem()
.access(new Path("/test1/test2"), FsAction.WRITE_EXECUTE);
getMockServer().enqueue(new MockResponse().setResponseCode(403)
.setBody(TestADLResponseData.getAccessControlException()));
getMockAdlFileSystem()
.access(new Path("/test1/test2"), FsAction.WRITE_EXECUTE);
}
@Test(expected=AccessControlException.class)
public void testSetPermission() throws URISyntaxException, IOException {
getMockServer().enqueue(new MockResponse().setResponseCode(200));
getMockAdlFileSystem()
.setPermission(new Path("/test1/test2"), FsPermission.getDefault());
getMockServer().enqueue(new MockResponse().setResponseCode(403)
.setBody(TestADLResponseData.getAccessControlException()));
getMockAdlFileSystem()
.setPermission(new Path("/test1/test2"), FsPermission.getDefault());
}
@Test(expected=AccessControlException.class)
public void testSetOwner() throws URISyntaxException, IOException {
getMockServer().enqueue(new MockResponse().setResponseCode(200));
getMockAdlFileSystem().setOwner(new Path("/test1/test2"), "hadoop", "hdfs");
getMockServer().enqueue(new MockResponse().setResponseCode(403)
.setBody(TestADLResponseData.getAccessControlException()));
getMockAdlFileSystem()
.setOwner(new Path("/test1/test2"), "hadoop", "hdfs");
}
@Test
public void getAclStatusAsExpected() throws URISyntaxException, IOException {
getMockServer().enqueue(new MockResponse().setResponseCode(200)
.setBody(TestADLResponseData.getGetAclStatusJSONResponse()));
AclStatus aclStatus = getMockAdlFileSystem()
.getAclStatus(new Path("/test1/test2"));
Assert.assertEquals(aclStatus.getGroup(), "supergroup");
Assert.assertEquals(aclStatus.getOwner(), "hadoop");
Assert.assertEquals((Short) aclStatus.getPermission().toShort(),
Short.valueOf("775", 8));
for (AclEntry entry : aclStatus.getEntries()) {
if (!(entry.toString().equalsIgnoreCase("user:carla:rw-") || entry
.toString().equalsIgnoreCase("group::r-x"))) {
Assert.fail("Unexpected entry : " + entry.toString());
}
}
}
@Test(expected=FileNotFoundException.class)
public void getAclStatusNotExists() throws URISyntaxException, IOException {
getMockServer().enqueue(new MockResponse().setResponseCode(404)
.setBody(TestADLResponseData.getFileNotFoundException()));
getMockAdlFileSystem().getAclStatus(new Path("/test1/test2"));
}
@Test(expected=AccessControlException.class)
public void testAclStatusDenied() throws URISyntaxException, IOException {
getMockServer().enqueue(new MockResponse().setResponseCode(403)
.setBody(TestADLResponseData.getAccessControlException()));
getMockAdlFileSystem().getAclStatus(new Path("/test1/test2"));
}
}

View File

@ -0,0 +1,147 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.fs.adl;
import org.apache.hadoop.fs.FileStatus;
import java.util.Random;
/**
* Mock up response data returned from Adl storage account.
*/
public final class TestADLResponseData {
private TestADLResponseData() {
}
public static String getGetFileStatusJSONResponse(FileStatus status) {
return "{\"FileStatus\":{\"length\":" + status.getLen() + "," +
"\"pathSuffix\":\"\",\"type\":\"" + (status.isDirectory() ?
"DIRECTORY" :
"FILE") + "\"" +
",\"blockSize\":" + status.getBlockSize() + ",\"accessTime\":" +
status.getAccessTime() + ",\"modificationTime\":" + status
.getModificationTime() + "" +
",\"replication\":" + status.getReplication() + ",\"permission\":\""
+ status.getPermission() + "\",\"owner\":\"" + status.getOwner()
+ "\",\"group\":\"" + status.getGroup() + "\"}}";
}
public static String getGetFileStatusJSONResponse() {
return getGetFileStatusJSONResponse(4194304);
}
public static String getGetAclStatusJSONResponse() {
return "{\n" + " \"AclStatus\": {\n" + " \"entries\": [\n"
+ " \"user:carla:rw-\", \n" + " \"group::r-x\"\n"
+ " ], \n" + " \"group\": \"supergroup\", \n"
+ " \"owner\": \"hadoop\", \n"
+ " \"permission\":\"775\",\n" + " \"stickyBit\": false\n"
+ " }\n" + "}";
}
public static String getGetFileStatusJSONResponse(long length) {
return "{\"FileStatus\":{\"length\":" + length + "," +
"\"pathSuffix\":\"\",\"type\":\"FILE\",\"blockSize\":268435456," +
"\"accessTime\":1452103827023,\"modificationTime\":1452103827023," +
"\"replication\":0,\"permission\":\"777\"," +
"\"owner\":\"NotSupportYet\",\"group\":\"NotSupportYet\"}}";
}
public static String getListFileStatusJSONResponse(int dirSize) {
String list = "";
for (int i = 0; i < dirSize; ++i) {
list += "{\"length\":1024,\"pathSuffix\":\"" + java.util.UUID.randomUUID()
+ "\",\"type\":\"FILE\",\"blockSize\":268435456," +
"\"accessTime\":1452103878833," +
"\"modificationTime\":1452103879190,\"replication\":0," +
"\"permission\":\"777\",\"owner\":\"NotSupportYet\"," +
"\"group\":\"NotSupportYet\"},";
}
list = list.substring(0, list.length() - 1);
return "{\"FileStatuses\":{\"FileStatus\":[" + list + "]}}";
}
public static String getJSONResponse(boolean status) {
return "{\"boolean\":" + status + "}";
}
public static String getErrorIllegalArgumentExceptionJSONResponse() {
return "{\n" +
" \"RemoteException\":\n" +
" {\n" +
" \"exception\" : \"IllegalArgumentException\",\n" +
" \"javaClassName\": \"java.lang.IllegalArgumentException\",\n" +
" \"message\" : \"Invalid\"" +
" }\n" +
"}";
}
public static String getErrorBadOffsetExceptionJSONResponse() {
return "{\n" +
" \"RemoteException\":\n" +
" {\n" +
" \"exception\" : \"BadOffsetException\",\n" +
" \"javaClassName\": \"org.apache.hadoop.fs.adl"
+ ".BadOffsetException\",\n" +
" \"message\" : \"Invalid\"" +
" }\n" +
"}";
}
public static String getErrorInternalServerExceptionJSONResponse() {
return "{\n" +
" \"RemoteException\":\n" +
" {\n" +
" \"exception\" : \"RuntimeException\",\n" +
" \"javaClassName\": \"java.lang.RuntimeException\",\n" +
" \"message\" : \"Internal Server Error\"" +
" }\n" +
"}";
}
public static String getAccessControlException() {
return "{\n" + " \"RemoteException\":\n" + " {\n"
+ " \"exception\" : \"AccessControlException\",\n"
+ " \"javaClassName\": \"org.apache.hadoop.security"
+ ".AccessControlException\",\n"
+ " \"message\" : \"Permission denied: ...\"\n" + " }\n" + "}";
}
public static String getFileNotFoundException() {
return "{\n" + " \"RemoteException\":\n" + " {\n"
+ " \"exception\" : \"FileNotFoundException\",\n"
+ " \"javaClassName\": \"java.io.FileNotFoundException\",\n"
+ " \"message\" : \"File does not exist\"\n" + " }\n" + "}";
}
public static byte[] getRandomByteArrayData() {
return getRandomByteArrayData(4 * 1024 * 1024);
}
public static byte[] getRandomByteArrayData(int size) {
byte[] b = new byte[size];
Random rand = new Random();
rand.nextBytes(b);
return b;
}
}

View File

@ -0,0 +1,196 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.adl;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.adl.common.Parallelized;
import org.apache.hadoop.fs.adl.common.TestDataForRead;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import java.io.ByteArrayInputStream;
import java.io.EOFException;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Random;
import static org.apache.hadoop.fs.adl.AdlConfKeys.READ_AHEAD_BUFFER_SIZE_KEY;
/**
* This class is responsible for stress positional reads vs number of network
* calls required by to fetch the amount of data. Test does ensure the data
* integrity and order of the data is maintained.
*/
@RunWith(Parallelized.class)
public class TestAdlRead extends AdlMockWebServer {
private TestDataForRead testData;
public TestAdlRead(TestDataForRead testData) {
Configuration configuration = new Configuration();
configuration.setInt(READ_AHEAD_BUFFER_SIZE_KEY, 4 * 1024);
setConf(configuration);
this.testData = testData;
}
@Parameterized.Parameters(name = "{index}")
public static Collection testDataForReadOperation() {
return Arrays.asList(new Object[][] {
//--------------------------
// Test Data
//--------------------------
{new TestDataForRead("Hello World".getBytes(), 2, 1000, true)},
{new TestDataForRead(
("the problem you appear to be wrestling with is that this doesn't "
+ "display very well. ").getBytes(), 2, 1000, true)},
{new TestDataForRead(("您的數據是寶貴的資產,以您的組織,並有當前和未來價值。由於這個原因,"
+ "所有的數據應存儲以供將來分析。今天,這往往是不這樣做," + "因為傳統的分析基礎架構的限制,"
+ "像模式的預定義,存儲大數據集和不同的數據筒倉的傳播的成本。"
+ "為了應對這一挑戰,數據湖面概念被引入作為一個企業級存儲庫來存儲所有"
+ "類型的在一個地方收集到的數據。對於運作和探索性分析的目的,所有類型的" + "數據可以定義需求或模式之前被存儲在數據湖。")
.getBytes(), 2, 1000, true)}, {new TestDataForRead(
TestADLResponseData.getRandomByteArrayData(4 * 1024), 2, 10, true)},
{new TestDataForRead(TestADLResponseData.getRandomByteArrayData(100), 2,
1000, true)}, {new TestDataForRead(
TestADLResponseData.getRandomByteArrayData(1 * 1024), 2, 50, true)},
{new TestDataForRead(
TestADLResponseData.getRandomByteArrayData(8 * 1024), 3, 10,
false)}, {new TestDataForRead(
TestADLResponseData.getRandomByteArrayData(16 * 1024), 5, 10, false)},
{new TestDataForRead(
TestADLResponseData.getRandomByteArrayData(32 * 1024), 9, 10,
false)}, {new TestDataForRead(
TestADLResponseData.getRandomByteArrayData(64 * 1024), 17, 10,
false)}});
}
@Test
public void testEntireBytes() throws IOException, InterruptedException {
getMockServer().setDispatcher(testData.getDispatcher());
FSDataInputStream in = getMockAdlFileSystem().open(new Path("/test"));
byte[] expectedData = new byte[testData.getActualData().length];
int n = 0;
int len = expectedData.length;
int off = 0;
while (n < len) {
int count = in.read(expectedData, off + n, len - n);
if (count < 0) {
throw new EOFException();
}
n += count;
}
Assert.assertEquals(testData.getActualData().length, expectedData.length);
Assert.assertArrayEquals(expectedData, testData.getActualData());
in.close();
if (testData.isCheckOfNoOfCalls()) {
Assert.assertEquals(testData.getExpectedNoNetworkCall(),
getMockServer().getRequestCount());
}
}
@Test
public void testSeekOperation() throws IOException, InterruptedException {
getMockServer().setDispatcher(testData.getDispatcher());
FSDataInputStream in = getMockAdlFileSystem().open(new Path("/test"));
Random random = new Random();
for (int i = 0; i < 1000; ++i) {
int position = random.nextInt(testData.getActualData().length);
in.seek(position);
Assert.assertEquals(in.getPos(), position);
Assert.assertEquals(in.read(), testData.getActualData()[position] & 0xFF);
}
in.close();
if (testData.isCheckOfNoOfCalls()) {
Assert.assertEquals(testData.getExpectedNoNetworkCall(),
getMockServer().getRequestCount());
}
}
@Test
public void testReadServerCalls() throws IOException, InterruptedException {
getMockServer().setDispatcher(testData.getDispatcher());
FSDataInputStream in = getMockAdlFileSystem().open(new Path("/test"));
byte[] expectedData = new byte[testData.getActualData().length];
in.readFully(expectedData);
Assert.assertArrayEquals(expectedData, testData.getActualData());
Assert.assertEquals(testData.getExpectedNoNetworkCall(),
getMockServer().getRequestCount());
in.close();
}
@Test
public void testReadFully() throws IOException, InterruptedException {
getMockServer().setDispatcher(testData.getDispatcher());
FSDataInputStream in = getMockAdlFileSystem().open(new Path("/test"));
byte[] expectedData = new byte[testData.getActualData().length];
in.readFully(expectedData);
Assert.assertArrayEquals(expectedData, testData.getActualData());
in.readFully(0, expectedData);
Assert.assertArrayEquals(expectedData, testData.getActualData());
in.readFully(0, expectedData, 0, expectedData.length);
Assert.assertArrayEquals(expectedData, testData.getActualData());
in.close();
}
@Test
public void testRandomPositionalReadUsingReadFully()
throws IOException, InterruptedException {
getMockServer().setDispatcher(testData.getDispatcher());
FSDataInputStream in = getMockAdlFileSystem().open(new Path("/test"));
ByteArrayInputStream actualData = new ByteArrayInputStream(
testData.getActualData());
Random random = new Random();
for (int i = 0; i < testData.getIntensityOfTest(); ++i) {
int offset = random.nextInt(testData.getActualData().length);
int length = testData.getActualData().length - offset;
byte[] expectedData = new byte[length];
byte[] actualDataSubset = new byte[length];
actualData.reset();
actualData.skip(offset);
actualData.read(actualDataSubset, 0, length);
in.readFully(offset, expectedData, 0, length);
Assert.assertArrayEquals(expectedData, actualDataSubset);
}
for (int i = 0; i < testData.getIntensityOfTest(); ++i) {
int offset = random.nextInt(testData.getActualData().length);
int length = random.nextInt(testData.getActualData().length - offset);
byte[] expectedData = new byte[length];
byte[] actualDataSubset = new byte[length];
actualData.reset();
actualData.skip(offset);
actualData.read(actualDataSubset, 0, length);
in.readFully(offset, expectedData, 0, length);
Assert.assertArrayEquals(expectedData, actualDataSubset);
}
in.close();
}
}

View File

@ -0,0 +1,133 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.adl;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.adl.common.CustomMockTokenProvider;
import org.apache.hadoop.fs.adl.oauth2.AzureADTokenProvider;
import com.microsoft.azure.datalake.store.oauth2.AccessTokenProvider;
import com.microsoft.azure.datalake.store.oauth2.ClientCredsTokenProvider;
import com.microsoft.azure.datalake.store.oauth2.RefreshTokenBasedTokenProvider;
import static org.apache.hadoop.fs.adl.AdlConfKeys.AZURE_AD_CLIENT_ID_KEY;
import static org.apache.hadoop.fs.adl.AdlConfKeys.AZURE_AD_CLIENT_SECRET_KEY;
import static org.apache.hadoop.fs.adl.AdlConfKeys.AZURE_AD_REFRESH_TOKEN_KEY;
import static org.apache.hadoop.fs.adl.AdlConfKeys.AZURE_AD_REFRESH_URL_KEY;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.AZURE_AD_TOKEN_PROVIDER_CLASS_KEY;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.AZURE_AD_TOKEN_PROVIDER_TYPE_KEY;
import static org.apache.hadoop.fs.adl.TokenProviderType.*;
import org.junit.Assert;
import org.junit.Test;
/**
* Test appropriate token provider is loaded as per configuration.
*/
public class TestAzureADTokenProvider {
@Test
public void testRefreshTokenProvider()
throws URISyntaxException, IOException {
Configuration conf = new Configuration();
conf.set(AZURE_AD_CLIENT_ID_KEY, "MY_CLIENTID");
conf.set(AZURE_AD_REFRESH_TOKEN_KEY, "XYZ");
conf.setEnum(AZURE_AD_TOKEN_PROVIDER_TYPE_KEY, RefreshToken);
conf.set(AZURE_AD_REFRESH_URL_KEY, "http://localhost:8080/refresh");
URI uri = new URI("adl://localhost:8080");
AdlFileSystem fileSystem = new AdlFileSystem();
fileSystem.initialize(uri, conf);
AccessTokenProvider tokenProvider = fileSystem.getTokenProvider();
Assert.assertTrue(tokenProvider instanceof RefreshTokenBasedTokenProvider);
}
@Test
public void testClientCredTokenProvider()
throws IOException, URISyntaxException {
Configuration conf = new Configuration();
conf.set(AZURE_AD_CLIENT_ID_KEY, "MY_CLIENTID");
conf.set(AZURE_AD_CLIENT_SECRET_KEY, "XYZ");
conf.setEnum(AZURE_AD_TOKEN_PROVIDER_TYPE_KEY, ClientCredential);
conf.set(AZURE_AD_REFRESH_URL_KEY, "http://localhost:8080/refresh");
URI uri = new URI("adl://localhost:8080");
AdlFileSystem fileSystem = new AdlFileSystem();
fileSystem.initialize(uri, conf);
AccessTokenProvider tokenProvider = fileSystem.getTokenProvider();
Assert.assertTrue(tokenProvider instanceof ClientCredsTokenProvider);
}
@Test
public void testCustomCredTokenProvider()
throws URISyntaxException, IOException {
Configuration conf = new Configuration();
conf.setClass(AZURE_AD_TOKEN_PROVIDER_CLASS_KEY,
CustomMockTokenProvider.class, AzureADTokenProvider.class);
URI uri = new URI("adl://localhost:8080");
AdlFileSystem fileSystem = new AdlFileSystem();
fileSystem.initialize(uri, conf);
AccessTokenProvider tokenProvider = fileSystem.getTokenProvider();
Assert.assertTrue(tokenProvider instanceof SdkTokenProviderAdapter);
}
@Test
public void testInvalidProviderConfigurationForType()
throws URISyntaxException, IOException {
Configuration conf = new Configuration();
URI uri = new URI("adl://localhost:8080");
AdlFileSystem fileSystem = new AdlFileSystem();
try {
fileSystem.initialize(uri, conf);
Assert.fail("Initialization should have failed due no token provider "
+ "configuration");
} catch (IllegalArgumentException e) {
Assert.assertTrue(
e.getMessage().contains("dfs.adls.oauth2.access.token.provider"));
}
conf.setClass(AZURE_AD_TOKEN_PROVIDER_CLASS_KEY,
CustomMockTokenProvider.class, AzureADTokenProvider.class);
fileSystem.initialize(uri, conf);
}
@Test
public void testInvalidProviderConfigurationForClassPath()
throws URISyntaxException, IOException {
Configuration conf = new Configuration();
URI uri = new URI("adl://localhost:8080");
AdlFileSystem fileSystem = new AdlFileSystem();
conf.set(AZURE_AD_TOKEN_PROVIDER_CLASS_KEY,
"wrong.classpath.CustomMockTokenProvider");
try {
fileSystem.initialize(uri, conf);
Assert.fail("Initialization should have failed due invalid provider "
+ "configuration");
} catch (RuntimeException e) {
Assert.assertTrue(
e.getMessage().contains("wrong.classpath.CustomMockTokenProvider"));
}
}
}

View File

@ -0,0 +1,299 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.adl;
import com.squareup.okhttp.mockwebserver.Dispatcher;
import com.squareup.okhttp.mockwebserver.MockResponse;
import com.squareup.okhttp.mockwebserver.RecordedRequest;
import okio.Buffer;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.Path;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Random;
import java.util.UUID;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* This class is responsible for testing multiple threads trying to access same
* or multiple files from the offset.
*/
@RunWith(Parameterized.class)
public class TestConcurrentDataReadOperations extends AdlMockWebServer {
private static final Logger LOG = LoggerFactory
.getLogger(TestConcurrentDataReadOperations.class);
private static final Object LOCK = new Object();
private static FSDataInputStream commonHandle = null;
private int concurrencyLevel;
public TestConcurrentDataReadOperations(int concurrencyLevel) {
this.concurrencyLevel = concurrencyLevel;
}
@Parameterized.Parameters(name = "{index}")
public static Collection<?> testDataNumberOfConcurrentRun() {
return Arrays.asList(new Object[][] {{1}, {2}, {3}, {4}, {5}});
}
public static byte[] getRandomByteArrayData(int size) {
byte[] b = new byte[size];
Random rand = new Random();
rand.nextBytes(b);
return b;
}
private void setDispatcher(final ArrayList<CreateTestData> testData) {
getMockServer().setDispatcher(new Dispatcher() {
@Override
public MockResponse dispatch(RecordedRequest recordedRequest)
throws InterruptedException {
CreateTestData currentRequest = null;
for (CreateTestData local : testData) {
if (recordedRequest.getPath().contains(local.path.toString())) {
currentRequest = local;
break;
}
}
if (currentRequest == null) {
new MockResponse().setBody("Request data not found")
.setResponseCode(501);
}
if (recordedRequest.getRequestLine().contains("op=GETFILESTATUS")) {
return new MockResponse().setResponseCode(200).setBody(
TestADLResponseData
.getGetFileStatusJSONResponse(currentRequest.data.length));
}
if (recordedRequest.getRequestLine().contains("op=OPEN")) {
String request = recordedRequest.getRequestLine();
int offset = 0;
int byteCount = 0;
Pattern pattern = Pattern.compile("offset=([0-9]+)");
Matcher matcher = pattern.matcher(request);
if (matcher.find()) {
LOG.debug(matcher.group(1));
offset = Integer.parseInt(matcher.group(1));
}
pattern = Pattern.compile("length=([0-9]+)");
matcher = pattern.matcher(request);
if (matcher.find()) {
LOG.debug(matcher.group(1));
byteCount = Integer.parseInt(matcher.group(1));
}
Buffer buf = new Buffer();
buf.write(currentRequest.data, offset,
Math.min(currentRequest.data.length - offset, byteCount));
return new MockResponse().setResponseCode(200)
.setChunkedBody(buf, 4 * 1024 * 1024);
}
return new MockResponse().setBody("NOT SUPPORTED").setResponseCode(501);
}
});
}
@Before
public void resetHandle() {
commonHandle = null;
}
@Test
public void testParallelReadOnDifferentStreams()
throws IOException, InterruptedException, ExecutionException {
ArrayList<CreateTestData> createTestData = new ArrayList<CreateTestData>();
Random random = new Random();
for (int i = 0; i < concurrencyLevel; i++) {
CreateTestData testData = new CreateTestData();
testData
.set(new Path("/test/concurrentRead/" + UUID.randomUUID().toString()),
getRandomByteArrayData(random.nextInt(1 * 1024 * 1024)));
createTestData.add(testData);
}
setDispatcher(createTestData);
ArrayList<ReadTestData> readTestData = new ArrayList<ReadTestData>();
for (CreateTestData local : createTestData) {
ReadTestData localReadData = new ReadTestData();
localReadData.set(local.path, local.data, 0);
readTestData.add(localReadData);
}
runReadTest(readTestData, false);
}
@Test
public void testParallelReadOnSameStreams()
throws IOException, InterruptedException, ExecutionException {
ArrayList<CreateTestData> createTestData = new ArrayList<CreateTestData>();
Random random = new Random();
for (int i = 0; i < 1; i++) {
CreateTestData testData = new CreateTestData();
testData
.set(new Path("/test/concurrentRead/" + UUID.randomUUID().toString()),
getRandomByteArrayData(1024 * 1024));
createTestData.add(testData);
}
setDispatcher(createTestData);
ArrayList<ReadTestData> readTestData = new ArrayList<ReadTestData>();
ByteArrayInputStream buffered = new ByteArrayInputStream(
createTestData.get(0).data);
ReadTestData readInitially = new ReadTestData();
byte[] initialData = new byte[1024 * 1024];
buffered.read(initialData);
readInitially.set(createTestData.get(0).path, initialData, 0);
readTestData.add(readInitially);
runReadTest(readTestData, false);
readTestData.clear();
for (int i = 0; i < concurrencyLevel * 5; i++) {
ReadTestData localReadData = new ReadTestData();
int offset = random.nextInt((1024 * 1024) - 1);
int length = 1024 * 1024 - offset;
byte[] expectedData = new byte[length];
buffered.reset();
buffered.skip(offset);
buffered.read(expectedData);
localReadData.set(createTestData.get(0).path, expectedData, offset);
readTestData.add(localReadData);
}
runReadTest(readTestData, true);
}
void runReadTest(ArrayList<ReadTestData> testData, boolean useSameStream)
throws InterruptedException, ExecutionException {
ExecutorService executor = Executors.newFixedThreadPool(testData.size());
Future[] subtasks = new Future[testData.size()];
for (int i = 0; i < testData.size(); i++) {
subtasks[i] = executor.submit(
new ReadConcurrentRunnable(testData.get(i).data, testData.get(i).path,
testData.get(i).offset, useSameStream));
}
executor.shutdown();
// wait until all tasks are finished
executor.awaitTermination(120, TimeUnit.SECONDS);
for (int i = 0; i < testData.size(); ++i) {
Assert.assertTrue((Boolean) subtasks[i].get());
}
}
class ReadTestData {
private Path path;
private byte[] data;
private int offset;
public void set(Path filePath, byte[] dataToBeRead, int fromOffset) {
this.path = filePath;
this.data = dataToBeRead;
this.offset = fromOffset;
}
}
class CreateTestData {
private Path path;
private byte[] data;
public void set(Path filePath, byte[] dataToBeWritten) {
this.path = filePath;
this.data = dataToBeWritten;
}
}
class ReadConcurrentRunnable implements Callable<Boolean> {
private Path path;
private int offset;
private byte[] expectedData;
private boolean useSameStream;
public ReadConcurrentRunnable(byte[] expectedData, Path path, int offset,
boolean useSameStream) {
this.path = path;
this.offset = offset;
this.expectedData = expectedData;
this.useSameStream = useSameStream;
}
public Boolean call() throws IOException {
try {
FSDataInputStream in;
if (useSameStream) {
synchronized (LOCK) {
if (commonHandle == null) {
commonHandle = getMockAdlFileSystem().open(path);
}
in = commonHandle;
}
} else {
in = getMockAdlFileSystem().open(path);
}
byte[] actualData = new byte[expectedData.length];
in.readFully(offset, actualData);
Assert.assertArrayEquals("Path :" + path.toString() + " did not match.",
expectedData, actualData);
if (!useSameStream) {
in.close();
}
} catch (IOException e) {
e.printStackTrace();
return false;
}
return true;
}
}
}

View File

@ -0,0 +1,136 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.adl;
import com.squareup.okhttp.mockwebserver.MockResponse;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.adl.common.CustomMockTokenProvider;
import org.apache.hadoop.fs.permission.FsPermission;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Arrays;
import java.util.Collection;
import static org.apache.hadoop.fs.adl.AdlConfKeys.ADL_BLOCK_SIZE;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.AZURE_AD_TOKEN_PROVIDER_CLASS_KEY;
/**
* Test access token provider behaviour with custom token provider and for token
* provider cache is enabled.
*/
@RunWith(Parameterized.class)
public class TestCustomTokenProvider extends AdlMockWebServer {
private static final long TEN_MINUTES_IN_MILIS = 600000;
private int backendCallCount;
private int expectedCallbackToAccessToken;
private TestableAdlFileSystem[] fileSystems;
private Class typeOfTokenProviderClass;
private long expiryFromNow;
private int fsObjectCount;
public TestCustomTokenProvider(Class typeOfTokenProviderClass,
long expiryFromNow, int fsObjectCount, int backendCallCount,
int expectedCallbackToAccessToken)
throws IllegalAccessException, InstantiationException, URISyntaxException,
IOException {
this.typeOfTokenProviderClass = typeOfTokenProviderClass;
this.expiryFromNow = expiryFromNow;
this.fsObjectCount = fsObjectCount;
this.backendCallCount = backendCallCount;
this.expectedCallbackToAccessToken = expectedCallbackToAccessToken;
}
@Parameterized.Parameters(name = "{index}")
public static Collection testDataForTokenProvider() {
return Arrays.asList(new Object[][] {
// Data set in order
// INPUT - CustomTokenProvider class to load
// INPUT - expiry time in milis. Subtract from current time
// INPUT - No. of FileSystem object
// INPUT - No. of backend calls per FileSystem object
// EXPECTED - Number of callbacks to get token after test finished.
{CustomMockTokenProvider.class, 0, 1, 1, 1},
{CustomMockTokenProvider.class, TEN_MINUTES_IN_MILIS, 1, 1, 1},
{CustomMockTokenProvider.class, TEN_MINUTES_IN_MILIS, 2, 1, 2},
{CustomMockTokenProvider.class, TEN_MINUTES_IN_MILIS, 10, 10, 10}});
}
/**
* Explicitly invoked init so that base class mock server is setup before
* test data initialization is done.
*
* @throws IOException
* @throws URISyntaxException
*/
public void init() throws IOException, URISyntaxException {
Configuration configuration = new Configuration();
configuration.set(AZURE_AD_TOKEN_PROVIDER_CLASS_KEY,
typeOfTokenProviderClass.getName());
fileSystems = new TestableAdlFileSystem[fsObjectCount];
URI uri = new URI("adl://localhost:" + getPort());
for (int i = 0; i < fsObjectCount; ++i) {
fileSystems[i] = new TestableAdlFileSystem();
fileSystems[i].initialize(uri, configuration);
((CustomMockTokenProvider) fileSystems[i].getAzureTokenProvider())
.setExpiryTimeInMillisAfter(expiryFromNow);
}
}
@Test
public void testCustomTokenManagement()
throws IOException, URISyntaxException {
int accessTokenCallbackDuringExec = 0;
init();
for (TestableAdlFileSystem tfs : fileSystems) {
for (int i = 0; i < backendCallCount; ++i) {
getMockServer().enqueue(new MockResponse().setResponseCode(200)
.setBody(TestADLResponseData.getGetFileStatusJSONResponse()));
FileStatus fileStatus = tfs.getFileStatus(new Path("/test1/test2"));
Assert.assertTrue(fileStatus.isFile());
Assert.assertEquals("adl://" + getMockServer().getHostName() + ":" +
getMockServer().getPort() + "/test1/test2",
fileStatus.getPath().toString());
Assert.assertEquals(4194304, fileStatus.getLen());
Assert.assertEquals(ADL_BLOCK_SIZE, fileStatus.getBlockSize());
Assert.assertEquals(1, fileStatus.getReplication());
Assert
.assertEquals(new FsPermission("777"), fileStatus.getPermission());
Assert.assertEquals("NotSupportYet", fileStatus.getOwner());
Assert.assertEquals("NotSupportYet", fileStatus.getGroup());
}
accessTokenCallbackDuringExec += ((CustomMockTokenProvider) tfs
.getAzureTokenProvider()).getAccessTokenRequestCount();
}
Assert.assertEquals(expectedCallbackToAccessToken,
accessTokenCallbackDuringExec);
}
}

View File

@ -0,0 +1,70 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.fs.adl;
import com.squareup.okhttp.mockwebserver.MockResponse;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.util.Time;
import org.junit.Assert;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.net.URISyntaxException;
import static org.apache.hadoop.fs.adl.AdlConfKeys.ADL_BLOCK_SIZE;
/**
* This class is responsible for testing local getFileStatus implementation
* to cover correct parsing of successful and error JSON response
* from the server.
* Adls GetFileStatus operation is in detail covered in
* org.apache.hadoop.fs.adl.live testing package.
*/
public class TestGetFileStatus extends AdlMockWebServer {
private static final Logger LOG = LoggerFactory
.getLogger(TestGetFileStatus.class);
@Test
public void getFileStatusReturnsAsExpected()
throws URISyntaxException, IOException {
getMockServer().enqueue(new MockResponse().setResponseCode(200)
.setBody(TestADLResponseData.getGetFileStatusJSONResponse()));
long startTime = Time.monotonicNow();
FileStatus fileStatus = getMockAdlFileSystem()
.getFileStatus(new Path("/test1/test2"));
long endTime = Time.monotonicNow();
LOG.debug("Time : " + (endTime - startTime));
Assert.assertTrue(fileStatus.isFile());
Assert.assertEquals("adl://" + getMockServer().getHostName() + ":" +
getMockServer().getPort() + "/test1/test2",
fileStatus.getPath().toString());
Assert.assertEquals(4194304, fileStatus.getLen());
Assert.assertEquals(ADL_BLOCK_SIZE, fileStatus.getBlockSize());
Assert.assertEquals(1, fileStatus.getReplication());
Assert.assertEquals(new FsPermission("777"), fileStatus.getPermission());
Assert.assertEquals("NotSupportYet", fileStatus.getOwner());
Assert.assertEquals("NotSupportYet", fileStatus.getGroup());
}
}

View File

@ -0,0 +1,103 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.fs.adl;
import com.squareup.okhttp.mockwebserver.MockResponse;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.Time;
import org.junit.Assert;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
/**
* This class is responsible for testing local listStatus implementation to
* cover correct parsing of successful and error JSON response from the server.
* Adls ListStatus functionality is in detail covered in
* org.apache.hadoop.fs.adl.live testing package.
*/
public class TestListStatus extends AdlMockWebServer {
private static final Logger LOG = LoggerFactory
.getLogger(TestListStatus.class);
@Test
public void listStatusReturnsAsExpected() throws IOException {
getMockServer().enqueue(new MockResponse().setResponseCode(200)
.setBody(TestADLResponseData.getListFileStatusJSONResponse(10)));
long startTime = Time.monotonicNow();
FileStatus[] ls = getMockAdlFileSystem()
.listStatus(new Path("/test1/test2"));
long endTime = Time.monotonicNow();
LOG.debug("Time : " + (endTime - startTime));
Assert.assertEquals(ls.length, 10);
getMockServer().enqueue(new MockResponse().setResponseCode(200)
.setBody(TestADLResponseData.getListFileStatusJSONResponse(200)));
startTime = Time.monotonicNow();
ls = getMockAdlFileSystem().listStatus(new Path("/test1/test2"));
endTime = Time.monotonicNow();
LOG.debug("Time : " + (endTime - startTime));
Assert.assertEquals(ls.length, 200);
getMockServer().enqueue(new MockResponse().setResponseCode(200)
.setBody(TestADLResponseData.getListFileStatusJSONResponse(2048)));
startTime = Time.monotonicNow();
ls = getMockAdlFileSystem().listStatus(new Path("/test1/test2"));
endTime = Time.monotonicNow();
LOG.debug("Time : " + (endTime - startTime));
Assert.assertEquals(ls.length, 2048);
}
@Test
public void listStatusOnFailure() throws IOException {
getMockServer().enqueue(new MockResponse().setResponseCode(403).setBody(
TestADLResponseData.getErrorIllegalArgumentExceptionJSONResponse()));
FileStatus[] ls = null;
long startTime = Time.monotonicNow();
try {
ls = getMockAdlFileSystem().listStatus(new Path("/test1/test2"));
} catch (IOException e) {
Assert.assertTrue(e.getMessage().contains("Invalid"));
}
long endTime = Time.monotonicNow();
LOG.debug("Time : " + (endTime - startTime));
// SDK may increase number of retry attempts before error is propagated
// to caller. Adding max 10 error responses in the queue to align with SDK.
for (int i = 0; i < 10; ++i) {
getMockServer().enqueue(new MockResponse().setResponseCode(500).setBody(
TestADLResponseData.getErrorInternalServerExceptionJSONResponse()));
}
startTime = Time.monotonicNow();
try {
ls = getMockAdlFileSystem().listStatus(new Path("/test1/test2"));
} catch (IOException e) {
Assert.assertTrue(e.getMessage().contains("Internal Server Error"));
}
endTime = Time.monotonicNow();
LOG.debug("Time : " + (endTime - startTime));
}
}

View File

@ -0,0 +1,61 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.adl;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.junit.Assert;
import org.junit.Test;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.AZURE_AD_TOKEN_PROVIDER_CLASS_KEY;
/**
* This class verifies path conversion to SDK.
*/
public class TestRelativePathFormation {
@Test
public void testToRelativePath() throws URISyntaxException, IOException {
AdlFileSystem fs = new AdlFileSystem();
Configuration configuration = new Configuration();
configuration.set(AZURE_AD_TOKEN_PROVIDER_CLASS_KEY,
"org.apache.hadoop.fs.adl.common.CustomMockTokenProvider");
fs.initialize(new URI("adl://temp.account.net"), configuration);
Assert.assertEquals("/usr", fs.toRelativeFilePath(new Path("/usr")));
Assert.assertEquals("/usr",
fs.toRelativeFilePath(new Path("adl://temp.account.net/usr")));
// When working directory is set.
fs.setWorkingDirectory(new Path("/a/b/"));
Assert.assertEquals("/usr", fs.toRelativeFilePath(new Path("/usr")));
Assert.assertEquals("/a/b/usr", fs.toRelativeFilePath(new Path("usr")));
Assert.assertEquals("/usr",
fs.toRelativeFilePath(new Path("adl://temp.account.net/usr")));
Assert.assertEquals("/usr",
fs.toRelativeFilePath(new Path("wasb://temp.account.net/usr")));
}
}

View File

@ -0,0 +1,103 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.adl;
import org.junit.Assert;
import org.junit.Test;
import static org.apache.hadoop.fs.adl.AdlConfKeys.ADL_BLOCK_SIZE;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.ADL_DEBUG_OVERRIDE_LOCAL_USER_AS_OWNER;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.ADL_DEBUG_SET_LOCAL_USER_AS_OWNER_DEFAULT;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.ADL_EXPERIMENT_POSITIONAL_READ_DEFAULT;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.ADL_EXPERIMENT_POSITIONAL_READ_KEY;
import static org.apache.hadoop.fs.adl.AdlConfKeys.ADL_REPLICATION_FACTOR;
import static org.apache.hadoop.fs.adl.AdlConfKeys.AZURE_AD_CLIENT_ID_KEY;
import static org.apache.hadoop.fs.adl.AdlConfKeys.AZURE_AD_CLIENT_SECRET_KEY;
import static org.apache.hadoop.fs.adl.AdlConfKeys.AZURE_AD_REFRESH_TOKEN_KEY;
import static org.apache.hadoop.fs.adl.AdlConfKeys.AZURE_AD_REFRESH_URL_KEY;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.AZURE_AD_TOKEN_PROVIDER_CLASS_KEY;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.AZURE_AD_TOKEN_PROVIDER_TYPE_KEY;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.DEFAULT_READ_AHEAD_BUFFER_SIZE;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.DEFAULT_WRITE_AHEAD_BUFFER_SIZE;
import static org.apache.hadoop.fs.adl.AdlConfKeys.LATENCY_TRACKER_DEFAULT;
import static org.apache.hadoop.fs.adl.AdlConfKeys.LATENCY_TRACKER_KEY;
import static org.apache.hadoop.fs.adl.AdlConfKeys.READ_AHEAD_BUFFER_SIZE_KEY;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.TOKEN_PROVIDER_TYPE_CLIENT_CRED;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.TOKEN_PROVIDER_TYPE_REFRESH_TOKEN;
import static org.apache.hadoop.fs.adl.AdlConfKeys.WRITE_BUFFER_SIZE_KEY;
/**
* Validate configuration keys defined for adl storage file system instance.
*/
public class TestValidateConfiguration {
@Test
public void validateConfigurationKeys() {
Assert
.assertEquals("dfs.adls.oauth2.refresh.url", AZURE_AD_REFRESH_URL_KEY);
Assert.assertEquals("dfs.adls.oauth2.access.token.provider",
AZURE_AD_TOKEN_PROVIDER_CLASS_KEY);
Assert.assertEquals("dfs.adls.oauth2.client.id", AZURE_AD_CLIENT_ID_KEY);
Assert.assertEquals("dfs.adls.oauth2.refresh.token",
AZURE_AD_REFRESH_TOKEN_KEY);
Assert
.assertEquals("dfs.adls.oauth2.credential", AZURE_AD_CLIENT_SECRET_KEY);
Assert.assertEquals("adl.debug.override.localuserasfileowner",
ADL_DEBUG_OVERRIDE_LOCAL_USER_AS_OWNER);
Assert.assertEquals("dfs.adls.oauth2.access.token.provider.type",
AZURE_AD_TOKEN_PROVIDER_TYPE_KEY);
Assert.assertEquals("adl.feature.client.cache.readahead",
READ_AHEAD_BUFFER_SIZE_KEY);
Assert.assertEquals("adl.feature.client.cache.drop.behind.writes",
WRITE_BUFFER_SIZE_KEY);
Assert.assertEquals("RefreshToken", TOKEN_PROVIDER_TYPE_REFRESH_TOKEN);
Assert.assertEquals("ClientCredential", TOKEN_PROVIDER_TYPE_CLIENT_CRED);
Assert.assertEquals("adl.dfs.enable.client.latency.tracker",
LATENCY_TRACKER_KEY);
Assert.assertEquals(true, LATENCY_TRACKER_DEFAULT);
Assert.assertEquals(true, ADL_EXPERIMENT_POSITIONAL_READ_DEFAULT);
Assert.assertEquals("adl.feature.experiment.positional.read.enable",
ADL_EXPERIMENT_POSITIONAL_READ_KEY);
Assert.assertEquals(1, ADL_REPLICATION_FACTOR);
Assert.assertEquals(256 * 1024 * 1024, ADL_BLOCK_SIZE);
Assert.assertEquals(false, ADL_DEBUG_SET_LOCAL_USER_AS_OWNER_DEFAULT);
Assert.assertEquals(4 * 1024 * 1024, DEFAULT_READ_AHEAD_BUFFER_SIZE);
Assert.assertEquals(4 * 1024 * 1024, DEFAULT_WRITE_AHEAD_BUFFER_SIZE);
}
}

View File

@ -0,0 +1,30 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.fs.adl;
/**
* Mock adl file storage subclass to mock adl storage on local http service.
*/
public class TestableAdlFileSystem extends AdlFileSystem {
@Override
protected String getTransportScheme() {
return "http";
}
}

View File

@ -0,0 +1,61 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.adl.common;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.adl.oauth2.AzureADTokenProvider;
import java.io.IOException;
import java.util.Date;
import java.util.Random;
/**
* Custom token management without cache enabled.
*/
public class CustomMockTokenProvider extends AzureADTokenProvider {
private Random random;
private long expiryTime;
private int accessTokenRequestCount = 0;
@Override
public void initialize(Configuration configuration) throws IOException {
random = new Random();
}
@Override
public String getAccessToken() throws IOException {
accessTokenRequestCount++;
return String.valueOf(random.nextInt());
}
@Override
public Date getExpiryTime() {
Date before10Min = new Date();
before10Min.setTime(expiryTime);
return before10Min;
}
public void setExpiryTimeInMillisAfter(long timeInMillis) {
expiryTime = System.currentTimeMillis() + timeInMillis;
}
public int getAccessTokenRequestCount() {
return accessTokenRequestCount;
}
}

View File

@ -0,0 +1,71 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.fs.adl.common;
import com.squareup.okhttp.mockwebserver.MockResponse;
import java.util.ArrayList;
/**
* Supporting class to hold expected MockResponse object along with parameters
* for validation in test methods.
*/
public class ExpectedResponse {
private MockResponse response;
private ArrayList<String> expectedQueryParameters = new ArrayList<String>();
private int expectedBodySize;
private String httpRequestType;
public int getExpectedBodySize() {
return expectedBodySize;
}
public String getHttpRequestType() {
return httpRequestType;
}
public ArrayList<String> getExpectedQueryParameters() {
return expectedQueryParameters;
}
public MockResponse getResponse() {
return response;
}
ExpectedResponse set(MockResponse mockResponse) {
this.response = mockResponse;
return this;
}
ExpectedResponse addExpectedQueryParam(String param) {
expectedQueryParameters.add(param);
return this;
}
ExpectedResponse addExpectedBodySize(int bodySize) {
this.expectedBodySize = bodySize;
return this;
}
ExpectedResponse addExpectedHttpRequestType(String expectedHttpRequestType) {
this.httpRequestType = expectedHttpRequestType;
return this;
}
}

View File

@ -0,0 +1,60 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.fs.adl.common;
import org.junit.runners.Parameterized;
import org.junit.runners.model.RunnerScheduler;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
/**
* Provided for convenience to execute parametrized test cases concurrently.
*/
public class Parallelized extends Parameterized {
public Parallelized(Class classObj) throws Throwable {
super(classObj);
setScheduler(new ThreadPoolScheduler());
}
private static class ThreadPoolScheduler implements RunnerScheduler {
private ExecutorService executor;
public ThreadPoolScheduler() {
int numThreads = 10;
executor = Executors.newFixedThreadPool(numThreads);
}
public void finished() {
executor.shutdown();
try {
executor.awaitTermination(10, TimeUnit.MINUTES);
} catch (InterruptedException exc) {
throw new RuntimeException(exc);
}
}
public void schedule(Runnable childStatement) {
executor.submit(childStatement);
}
}
}

View File

@ -0,0 +1,122 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.fs.adl.common;
import com.squareup.okhttp.mockwebserver.Dispatcher;
import com.squareup.okhttp.mockwebserver.MockResponse;
import com.squareup.okhttp.mockwebserver.RecordedRequest;
import okio.Buffer;
import org.apache.hadoop.fs.adl.TestADLResponseData;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Supporting class for mock test to validate Adls read operation.
*/
public class TestDataForRead {
private static final Logger LOG = LoggerFactory
.getLogger(TestDataForRead.class);
private byte[] actualData;
private ArrayList<ExpectedResponse> responses;
private Dispatcher dispatcher;
private int intensityOfTest;
private boolean checkOfNoOfCalls;
private int expectedNoNetworkCall;
public TestDataForRead(final byte[] actualData, int expectedNoNetworkCall,
int intensityOfTest, boolean checkOfNoOfCalls) {
this.checkOfNoOfCalls = checkOfNoOfCalls;
this.actualData = actualData;
responses = new ArrayList<ExpectedResponse>();
this.expectedNoNetworkCall = expectedNoNetworkCall;
this.intensityOfTest = intensityOfTest;
dispatcher = new Dispatcher() {
@Override
public MockResponse dispatch(RecordedRequest recordedRequest)
throws InterruptedException {
if (recordedRequest.getRequestLine().contains("op=GETFILESTATUS")) {
return new MockResponse().setResponseCode(200).setBody(
TestADLResponseData
.getGetFileStatusJSONResponse(actualData.length));
}
if (recordedRequest.getRequestLine().contains("op=OPEN")) {
String request = recordedRequest.getRequestLine();
int offset = 0;
int byteCount = 0;
Pattern pattern = Pattern.compile("offset=([0-9]+)");
Matcher matcher = pattern.matcher(request);
if (matcher.find()) {
LOG.debug(matcher.group(1));
offset = Integer.parseInt(matcher.group(1));
}
pattern = Pattern.compile("length=([0-9]+)");
matcher = pattern.matcher(request);
if (matcher.find()) {
LOG.debug(matcher.group(1));
byteCount = Integer.parseInt(matcher.group(1));
}
Buffer buf = new Buffer();
buf.write(actualData, offset,
Math.min(actualData.length - offset, byteCount));
return new MockResponse().setResponseCode(200)
.setChunkedBody(buf, 4 * 1024 * 1024);
}
return new MockResponse().setBody("NOT SUPPORTED").setResponseCode(501);
}
};
}
public boolean isCheckOfNoOfCalls() {
return checkOfNoOfCalls;
}
public int getExpectedNoNetworkCall() {
return expectedNoNetworkCall;
}
public int getIntensityOfTest() {
return intensityOfTest;
}
public byte[] getActualData() {
return actualData;
}
public ArrayList<ExpectedResponse> getResponses() {
return responses;
}
public Dispatcher getDispatcher() {
return dispatcher;
}
}

View File

@ -0,0 +1,94 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.fs.adl.live;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
/**
* Configure Adl storage file system.
*/
public final class AdlStorageConfiguration {
private static final String CONTRACT_ENABLE_KEY =
"dfs.adl.test.contract.enable";
private static final String TEST_CONFIGURATION_FILE_NAME =
"contract-test-options.xml";
private static final String TEST_SUPPORTED_TEST_CONFIGURATION_FILE_NAME =
"adls.xml";
private static final String KEY_FILE_SYSTEM_IMPL = "fs.contract.test.fs";
private static final String KEY_FILE_SYSTEM = "test.fs.adl.name";
private static boolean isContractTestEnabled = false;
private static Configuration conf = null;
private AdlStorageConfiguration() {
}
public synchronized static Configuration getConfiguration() {
Configuration newConf = new Configuration();
newConf.addResource(TEST_CONFIGURATION_FILE_NAME);
newConf.addResource(TEST_SUPPORTED_TEST_CONFIGURATION_FILE_NAME);
return newConf;
}
public synchronized static boolean isContractTestEnabled() {
if (conf == null) {
conf = getConfiguration();
}
isContractTestEnabled = conf.getBoolean(CONTRACT_ENABLE_KEY, false);
return isContractTestEnabled;
}
public synchronized static FileSystem createStorageConnector()
throws URISyntaxException, IOException {
if (conf == null) {
conf = getConfiguration();
}
if (!isContractTestEnabled()) {
return null;
}
String fileSystem = conf.get(KEY_FILE_SYSTEM);
if (fileSystem == null || fileSystem.trim().length() == 0) {
throw new IOException("Default file system not configured.");
}
String fileSystemImpl = conf.get(KEY_FILE_SYSTEM_IMPL);
if (fileSystemImpl == null || fileSystemImpl.trim().length() == 0) {
throw new IOException(
"Configuration " + KEY_FILE_SYSTEM_IMPL + "does not exist.");
}
FileSystem fs = null;
try {
fs = (FileSystem) Class.forName(fileSystemImpl).newInstance();
} catch (Exception e) {
throw new IOException("Could not instantiate the filesystem.");
}
fs.initialize(new URI(conf.get(KEY_FILE_SYSTEM)), conf);
return fs;
}
}

View File

@ -0,0 +1,66 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.fs.adl.live;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.contract.AbstractFSContract;
import java.io.IOException;
import java.net.URISyntaxException;
class AdlStorageContract extends AbstractFSContract {
private FileSystem fs;
protected AdlStorageContract(Configuration conf) {
super(conf);
try {
fs = AdlStorageConfiguration.createStorageConnector();
} catch (URISyntaxException e) {
throw new IllegalStateException("Can not initialize ADL FileSystem. "
+ "Please check test.fs.adl.name property.", e);
} catch (IOException e) {
throw new IllegalStateException("Can not initialize ADL FileSystem.", e);
}
this.setConf(AdlStorageConfiguration.getConfiguration());
}
@Override
public String getScheme() {
return "adl";
}
@Override
public FileSystem getTestFileSystem() throws IOException {
return this.fs;
}
@Override
public Path getTestPath() {
return new Path("/test");
}
@Override
public boolean isEnabled() {
return AdlStorageConfiguration.isContractTestEnabled();
}
}

View File

@ -0,0 +1,53 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.fs.adl.live;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractAppendTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;
import org.apache.hadoop.fs.contract.ContractTestUtils;
import org.junit.Before;
import org.junit.Test;
/**
* Verify Adls APPEND semantics compliance with Hadoop.
*/
public class TestAdlContractAppendLive extends AbstractContractAppendTest {
@Override
protected AbstractFSContract createContract(Configuration configuration) {
return new AdlStorageContract(configuration);
}
@Override
@Test
public void testRenameFileBeingAppended() throws Throwable {
ContractTestUtils.unsupported("Skipping since renaming file in append "
+ "mode not supported in Adl");
}
@Before
@Override
public void setup() throws Exception {
org.junit.Assume
.assumeTrue(AdlStorageConfiguration.isContractTestEnabled());
super.setup();
}
}

View File

@ -0,0 +1,52 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.fs.adl.live;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractConcatTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;
import org.apache.hadoop.fs.contract.ContractTestUtils;
import org.junit.Before;
import org.junit.Test;
/**
* Verify Adls CONCAT semantics compliance with Hadoop.
*/
public class TestAdlContractConcatLive extends AbstractContractConcatTest {
@Override
protected AbstractFSContract createContract(Configuration configuration) {
return new AdlStorageContract(configuration);
}
@Before
@Override
public void setup() throws Exception {
org.junit.Assume
.assumeTrue(AdlStorageConfiguration.isContractTestEnabled());
super.setup();
}
@Test
public void testConcatMissingTarget() throws Throwable {
ContractTestUtils.unsupported("BUG : Adl to support expectation from "
+ "concat on missing targets.");
}
}

View File

@ -0,0 +1,52 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.fs.adl.live;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractCreateTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;
import org.apache.hadoop.fs.contract.ContractTestUtils;
import org.junit.Before;
import org.junit.Test;
/**
* Verify Adls CREATE semantics compliance with Hadoop.
*/
public class TestAdlContractCreateLive extends AbstractContractCreateTest {
@Override
protected AbstractFSContract createContract(Configuration configuration) {
return new AdlStorageContract(configuration);
}
@Before
@Override
public void setup() throws Exception {
org.junit.Assume
.assumeTrue(AdlStorageConfiguration.isContractTestEnabled());
super.setup();
}
@Test
public void testOverwriteEmptyDirectory() throws Throwable {
ContractTestUtils
.unsupported("BUG : Adl to support override empty " + "directory.");
}
}

View File

@ -0,0 +1,44 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.fs.adl.live;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractDeleteTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;
import org.junit.Before;
/**
* Verify Adls DELETE semantics compliance with Hadoop.
*/
public class TestAdlContractDeleteLive extends AbstractContractDeleteTest {
@Override
protected AbstractFSContract createContract(Configuration configuration) {
return new AdlStorageContract(configuration);
}
@Before
@Override
public void setup() throws Exception {
org.junit.Assume
.assumeTrue(AdlStorageConfiguration.isContractTestEnabled());
super.setup();
}
}

View File

@ -0,0 +1,55 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.fs.adl.live;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractMkdirTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;
import org.apache.hadoop.fs.contract.ContractTestUtils;
import org.junit.Before;
import org.junit.Test;
/**
* Verify Adls MKDIR semantics compliance with Hadoop.
*/
public class TestAdlContractMkdirLive extends AbstractContractMkdirTest {
@Override
protected AbstractFSContract createContract(Configuration conf) {
return new AdlStorageContract(conf);
}
@Before
@Override
public void setup() throws Exception {
org.junit.Assume
.assumeTrue(AdlStorageConfiguration.isContractTestEnabled());
super.setup();
}
@Test
public void testMkdirOverParentFile() throws Throwable {
ContractTestUtils.unsupported("Not supported by Adl");
}
@Test
public void testNoMkdirOverFile() throws Throwable {
ContractTestUtils.unsupported("Not supported by Adl");
}
}

View File

@ -0,0 +1,44 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.fs.adl.live;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractOpenTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;
import org.junit.Before;
/**
* Verify Adls OPEN/READ semantics compliance with Hadoop.
*/
public class TestAdlContractOpenLive extends AbstractContractOpenTest {
@Override
protected AbstractFSContract createContract(Configuration configuration) {
return new AdlStorageContract(configuration);
}
@Before
@Override
public void setup() throws Exception {
org.junit.Assume
.assumeTrue(AdlStorageConfiguration.isContractTestEnabled());
super.setup();
}
}

View File

@ -0,0 +1,63 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.fs.adl.live;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractRenameTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;
import org.apache.hadoop.fs.contract.ContractTestUtils;
import org.junit.Before;
import org.junit.Test;
/**
* Verify Adls RENAME semantics compliance with Hadoop.
*/
public class TestAdlContractRenameLive extends AbstractContractRenameTest {
@Override
protected AbstractFSContract createContract(Configuration configuration) {
return new AdlStorageContract(configuration);
}
@Before
@Override
public void setup() throws Exception {
org.junit.Assume
.assumeTrue(AdlStorageConfiguration.isContractTestEnabled());
super.setup();
}
@Test
public void testRenameFileOverExistingFile() throws Throwable {
ContractTestUtils
.unsupported("BUG : Adl to support full complete POSIX" + "behaviour");
}
@Test
public void testRenameFileNonexistentDir() throws Throwable {
ContractTestUtils
.unsupported("BUG : Adl to support create dir is not " + "exist");
}
@Test
public void testRenameWithNonEmptySubDir() throws Throwable {
ContractTestUtils.unsupported("BUG : Adl to support non empty dir move.");
}
}

View File

@ -0,0 +1,52 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.fs.adl.live;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractRootDirectoryTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;
import org.apache.hadoop.fs.contract.ContractTestUtils;
import org.junit.Before;
import org.junit.Test;
/**
* Verify Adls root level operation support.
*/
public class TestAdlContractRootDirLive
extends AbstractContractRootDirectoryTest {
@Override
protected AbstractFSContract createContract(Configuration configuration) {
return new AdlStorageContract(configuration);
}
@Before
@Override
public void setup() throws Exception {
org.junit.Assume
.assumeTrue(AdlStorageConfiguration.isContractTestEnabled());
super.setup();
}
@Test
public void testRmNonEmptyRootDirNonRecursive() throws Throwable {
ContractTestUtils.unsupported(
"BUG : Adl should throw exception instred " + "of returning false.");
}
}

View File

@ -0,0 +1,44 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.fs.adl.live;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.contract.AbstractContractSeekTest;
import org.apache.hadoop.fs.contract.AbstractFSContract;
import org.junit.Before;
/**
* Verify Adls OPEN/READ seek operation support.
*/
public class TestAdlContractSeekLive extends AbstractContractSeekTest {
@Override
protected AbstractFSContract createContract(Configuration configuration) {
return new AdlStorageContract(configuration);
}
@Before
@Override
public void setup() throws Exception {
org.junit.Assume
.assumeTrue(AdlStorageConfiguration.isContractTestEnabled());
super.setup();
}
}

View File

@ -0,0 +1,102 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.fs.adl.live;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.Random;
/**
* Verify different data segment size writes ensure the integrity and
* order of the data.
*/
public class TestAdlDifferentSizeWritesLive {
public static byte[] getRandomByteArrayData(int size) {
byte[] b = new byte[size];
Random rand = new Random();
rand.nextBytes(b);
return b;
}
@Before
public void setup() throws Exception {
org.junit.Assume
.assumeTrue(AdlStorageConfiguration.isContractTestEnabled());
}
@Test
public void testSmallDataWrites() throws IOException {
testDataIntegrity(4 * 1024 * 1024, 1 * 1024);
testDataIntegrity(4 * 1024 * 1024, 7 * 1024);
testDataIntegrity(4 * 1024 * 1024, 10);
testDataIntegrity(2 * 1024 * 1024, 10);
testDataIntegrity(1 * 1024 * 1024, 10);
testDataIntegrity(100, 1);
}
@Test
public void testMediumDataWrites() throws IOException {
testDataIntegrity(4 * 1024 * 1024, 1 * 1024 * 1024);
testDataIntegrity(7 * 1024 * 1024, 2 * 1024 * 1024);
testDataIntegrity(9 * 1024 * 1024, 2 * 1024 * 1024);
testDataIntegrity(10 * 1024 * 1024, 3 * 1024 * 1024);
}
private void testDataIntegrity(int totalSize, int chunkSize)
throws IOException {
Path path = new Path("/test/dataIntegrityCheck");
FileSystem fs = null;
try {
fs = AdlStorageConfiguration.createStorageConnector();
} catch (URISyntaxException e) {
throw new IllegalStateException("Can not initialize ADL FileSystem. "
+ "Please check fs.defaultFS property.", e);
}
byte[] expectedData = getRandomByteArrayData(totalSize);
FSDataOutputStream out = fs.create(path, true);
int iteration = totalSize / chunkSize;
int reminderIteration = totalSize % chunkSize;
int offset = 0;
for (int i = 0; i < iteration; ++i) {
out.write(expectedData, offset, chunkSize);
offset += chunkSize;
}
out.write(expectedData, offset, reminderIteration);
out.close();
byte[] actualData = new byte[totalSize];
FSDataInputStream in = fs.open(path);
in.readFully(0, actualData);
in.close();
Assert.assertArrayEquals(expectedData, actualData);
Assert.assertTrue(fs.delete(path, true));
}
}

View File

@ -0,0 +1,94 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.fs.adl.live;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileSystemContractBaseTest;
import org.apache.hadoop.fs.Path;
import org.junit.Test;
import java.io.IOException;
/**
* Verify Adls adhere to Hadoop file system semantics.
*/
public class TestAdlFileSystemContractLive extends FileSystemContractBaseTest {
private FileSystem adlStore;
@Override
protected void setUp() throws Exception {
adlStore = AdlStorageConfiguration.createStorageConnector();
if (AdlStorageConfiguration.isContractTestEnabled()) {
fs = adlStore;
}
}
@Override
protected void tearDown() throws Exception {
if (AdlStorageConfiguration.isContractTestEnabled()) {
cleanup();
adlStore = null;
fs = null;
}
}
private void cleanup() throws IOException {
adlStore.delete(new Path("/test"), true);
}
@Override
protected void runTest() throws Throwable {
if (AdlStorageConfiguration.isContractTestEnabled()) {
super.runTest();
}
}
public void testGetFileStatus() throws IOException {
if (!AdlStorageConfiguration.isContractTestEnabled()) {
return;
}
Path testPath = new Path("/test/adltest");
if (adlStore.exists(testPath)) {
adlStore.delete(testPath, false);
}
adlStore.create(testPath).close();
assertTrue(adlStore.delete(testPath, false));
}
/**
* The following tests are failing on Azure Data Lake and the Azure Data Lake
* file system code needs to be modified to make them pass.
* A separate work item has been opened for this.
*/
@Test
@Override
public void testMkdirsFailsForSubdirectoryOfExistingFile() throws Exception {
// BUG : Adl should return exception instead of false.
}
@Test
@Override
public void testMkdirsWithUmask() throws Exception {
// Support under implementation in Adl
}
}

View File

@ -0,0 +1,140 @@
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<property>
<name>fs.contract.test.root-tests-enabled</name>
<value>true</value>
</property>
<property>
<name>fs.contract.test.supports-concat</name>
<value>true</value>
</property>
<property>
<name>fs.contract.rename-returns-false-if-source-missing</name>
<value>true</value>
</property>
<property>
<name>fs.contract.test.random-seek-count</name>
<value>10</value>
</property>
<property>
<name>fs.contract.is-case-sensitive</name>
<value>true</value>
</property>
<property>
<name>fs.contract.rename-returns-true-if-dest-exists</name>
<value>false</value>
</property>
<property>
<name>fs.contract.rename-returns-true-if-source-missing</name>
<value>false</value>
</property>
<property>
<name>fs.contract.rename-creates-dest-dirs</name>
<value>false</value>
</property>
<property>
<name>fs.contract.rename-remove-dest-if-empty-dir</name>
<value>false</value>
</property>
<property>
<name>fs.contract.supports-settimes</name>
<value>true</value>
</property>
<property>
<name>fs.contract.supports-append</name>
<value>true</value>
</property>
<property>
<name>fs.contract.supports-atomic-directory-delete</name>
<value>true</value>
</property>
<property>
<name>fs.contract.supports-atomic-rename</name>
<value>true</value>
</property>
<property>
<name>fs.contract.supports-block-locality</name>
<value>true</value>
</property>
<property>
<name>fs.contract.supports-concat</name>
<value>true</value>
</property>
<property>
<name>fs.contract.supports-seek</name>
<value>true</value>
</property>
<property>
<name>fs.contract.supports-seek-on-closed-file</name>
<value>true</value>
</property>
<property>
<name>fs.contract.rejects-seek-past-eof</name>
<value>true</value>
</property>
<property>
<name>fs.contract.supports-available-on-closed-file</name>
<value>true</value>
</property>
<property>
<name>fs.contract.supports-strict-exceptions</name>
<value>false</value>
</property>
<property>
<name>fs.contract.supports-unix-permissions</name>
<value>true</value>
</property>
<property>
<name>fs.contract.rename-overwrites-dest</name>
<value>false</value>
</property>
<property>
<name>fs.contract.supports-append</name>
<value>true</value>
</property>
<property>
<name>fs.azure.enable.append.support</name>
<value>true</value>
</property>
<property>
<name>fs.contract.supports-getfilestatus</name>
<value>true</value>
</property>
</configuration>

View File

@ -0,0 +1,61 @@
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<property>
<name>dfs.adls.oauth2.refresh.url</name>
<value>
</value>
</property>
<property>
<name>dfs.adls.oauth2.credential</name>
<value></value>
</property>
<property>
<name>dfs.adls.oauth2.client.id</name>
<value></value>
</property>
<property>
<name>dfs.adls.oauth2.access.token.provider.type</name>
<value>ClientCredential</value>
<description>
Supported provider type:
"ClientCredential" : Client id and client credentials(Provided
through configuration file) flow.
"RefreshToken" : Client id and refresh token(Provided
through configuration file)flow.
"Custom" : Custom AAD token management.
</description>
</property>
<property>
<name>dfs.adl.test.contract.enable</name>
<value>false</value>
</property>
<property>
<name>test.fs.adl.name</name>
<value></value>
</property>
<property>
<name>fs.contract.test.fs</name>
<value>org.apache.hadoop.fs.adl.AdlFileSystem</value>
</property>
</configuration>

View File

@ -0,0 +1,30 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
log4j.rootLogger=DEBUG,stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d [%t] %-5p %X{file} %c{1} - %m%n
log4j.logger.your.app=*
log4j.additivity.your.app=false
log4j.logger.yourApp=*
log4j.additivity.yourApp=false
log4j.appender.yourApp=org.apache.log4j.ConsoleAppender
log4j.appender.yourApp.layout=org.apache.log4j.PatternLayout
log4j.appender.yourApp.layout.ConversionPattern=%d [%t] %-5p %X{file} %c{1} %m%n
log4j.appender.yourApp.ImmediateFlush=true

View File

@ -46,6 +46,7 @@
<module>hadoop-sls</module>
<module>hadoop-aws</module>
<module>hadoop-azure</module>
<module>hadoop-azure-datalake</module>
</modules>
<build>