HADOOP-13037. Refactor Azure Data Lake Store as an independent FileSystem. Contributed by Vishwajeet Dusane

This commit is contained in:
Chris Douglas 2016-11-10 21:55:55 -08:00
parent a9ad5d6c81
commit 5c61ad2488
61 changed files with 2540 additions and 3679 deletions

View File

@ -2382,53 +2382,6 @@
<!-- Azure Data Lake File System Configurations -->
<property>
<name>adl.feature.override.readahead</name>
<value>true</value>
<description>
Enables read aheads in the ADL client, the feature is used to
improve read throughput.
This works in conjunction with the value set in
adl.feature.override.readahead.max.buffersize.
When set to false the read ahead feature is turned off.
Default : True if not configured.
</description>
</property>
<property>
<name>adl.feature.override.readahead.max.buffersize</name>
<value>8388608</value>
<description>
Define maximum buffer size to cache read ahead data, this is
allocated per process to
cache read ahead data. Applicable only when
adl.feature.override.readahead is set to true.
Default : 8388608 Byte i.e. 8MB if not configured.
</description>
</property>
<property>
<name>adl.feature.override.readahead.max.concurrent.connection</name>
<value>2</value>
<description>
Define maximum concurrent connection can be established to
read ahead. If the data size is less than 4MB then only 1 read n/w
connection
is set. If the data size is less than 4MB but less than 8MB then 2 read
n/w connection
is set. Data greater than 8MB then value set under the property would
take
effect. Applicable only when adl.feature.override.readahead is set
to true and buffer size is greater than 8MB.
It is recommended to reset this property if the
adl.feature.override.readahead.max.buffersize
is less than 8MB to gain performance. Application has to consider
throttling limit for the account as well before configuring large
buffer size.
</description>
</property>
<property>
<name>fs.adl.impl</name>
<value>org.apache.hadoop.fs.adl.AdlFileSystem</value>
@ -2438,6 +2391,7 @@
<name>fs.AbstractFileSystem.adl.impl</name>
<value>org.apache.hadoop.fs.adl.Adl</value>
</property>
<!-- Azure Data Lake File System Configurations Ends Here-->
<property>
<name>hadoop.caller.context.enabled</name>

View File

@ -1,24 +0,0 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<FindBugsFilter>
<!-- Buffer object is accessed withing trusted code and intentionally assigned instead of array copy -->
<Match>
<Class name="org.apache.hadoop.hdfs.web.PrivateAzureDataLakeFileSystem$BatchAppendOutputStream$CommitTask"/>
<Bug pattern="EI_EXPOSE_REP2"/>
<Priority value="2"/>
</Match>
</FindBugsFilter>

View File

@ -35,21 +35,16 @@
<file.encoding>UTF-8</file.encoding>
<downloadSources>true</downloadSources>
</properties>
<repositories>
<repository>
<id>snapshots-repo</id>
<url>https://oss.sonatype.org/content/repositories/snapshots</url>
<releases><enabled>false</enabled></releases>
<snapshots><enabled>true</enabled></snapshots>
</repository>
</repositories>
<build>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>findbugs-maven-plugin</artifactId>
<configuration>
<findbugsXmlOutput>true</findbugsXmlOutput>
<xmlOutput>true</xmlOutput>
<excludeFilterFile>
${basedir}/dev-support/findbugs-exclude.xml
</excludeFilterFile>
<effort>Max</effort>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-project-info-reports-plugin</artifactId>
@ -130,20 +125,13 @@
</build>
<dependencies>
<!-- SDK Dependency -->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<exclusions>
<exclusion>
<artifactId>javax.servlet-api</artifactId>
<groupId>javax.servlet</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs-client</artifactId>
<groupId>com.microsoft.azure</groupId>
<artifactId>azure-data-lake-store-sdk</artifactId>
<version>2.0.4-SNAPSHOT</version>
</dependency>
<!-- ENDS HERE-->
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
@ -158,11 +146,6 @@
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-all</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.eclipsesource.minimal-json</groupId>
<artifactId>minimal-json</artifactId>
@ -181,9 +164,5 @@
<version>2.4.0</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</dependency>
</dependencies>
</project>

View File

@ -19,6 +19,8 @@
package org.apache.hadoop.fs.adl;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.DelegateToFileSystem;
@ -29,6 +31,8 @@ import java.net.URISyntaxException;
/**
* Expose adl:// scheme to access ADL file system.
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public class Adl extends DelegateToFileSystem {
Adl(URI theUri, Configuration conf) throws IOException, URISyntaxException {

View File

@ -0,0 +1,92 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.adl;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
/**
* Constants.
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public final class AdlConfKeys {
// OAuth2 Common Configuration
public static final String AZURE_AD_REFRESH_URL_KEY = "dfs.adls.oauth2"
+ ".refresh.url";
// optional when provider type is refresh or client id.
public static final String AZURE_AD_TOKEN_PROVIDER_CLASS_KEY =
"dfs.adls.oauth2.access.token.provider";
public static final String AZURE_AD_CLIENT_ID_KEY =
"dfs.adls.oauth2.client.id";
public static final String AZURE_AD_TOKEN_PROVIDER_TYPE_KEY =
"dfs.adls.oauth2.access.token.provider.type";
// OAuth Refresh Token Configuration
public static final String AZURE_AD_REFRESH_TOKEN_KEY =
"dfs.adls.oauth2.refresh.token";
public static final String TOKEN_PROVIDER_TYPE_REFRESH_TOKEN = "RefreshToken";
// OAuth Client Cred Token Configuration
public static final String AZURE_AD_CLIENT_SECRET_KEY =
"dfs.adls.oauth2.credential";
public static final String TOKEN_PROVIDER_TYPE_CLIENT_CRED =
"ClientCredential";
public static final String READ_AHEAD_BUFFER_SIZE_KEY =
"adl.feature.client.cache.readahead";
public static final String WRITE_BUFFER_SIZE_KEY =
"adl.feature.client.cache.drop.behind.writes";
static final String SECURE_TRANSPORT_SCHEME = "https";
static final String INSECURE_TRANSPORT_SCHEME = "http";
static final String ADL_DEBUG_OVERRIDE_LOCAL_USER_AS_OWNER =
"adl.debug.override.localuserasfileowner";
static final boolean ADL_DEBUG_SET_LOCAL_USER_AS_OWNER_DEFAULT = false;
static final long ADL_BLOCK_SIZE = 256 * 1024 * 1024;
static final int ADL_REPLICATION_FACTOR = 1;
static final String ADL_HADOOP_CLIENT_NAME = "hadoop-azure-datalake-";
static final String ADL_HADOOP_CLIENT_VERSION =
"2.0.0-SNAPSHOT";
static final String ADL_EVENTS_TRACKING_SOURCE = "adl.events.tracking.source";
static final String ADL_EVENTS_TRACKING_CLUSTERNAME =
"adl.events.tracking.clustername";
static final String ADL_EVENTS_TRACKING_CLUSTERTYPE =
"adl.events.tracking.clustertype";
static final int DEFAULT_READ_AHEAD_BUFFER_SIZE = 4 * 1024 * 1024;
static final int DEFAULT_WRITE_AHEAD_BUFFER_SIZE = 4 * 1024 * 1024;
static final String LATENCY_TRACKER_KEY =
"adl.dfs.enable.client.latency.tracker";
static final boolean LATENCY_TRACKER_DEFAULT = true;
static final String ADL_EXPERIMENT_POSITIONAL_READ_KEY =
"adl.feature.experiment.positional.read.enable";
static final boolean ADL_EXPERIMENT_POSITIONAL_READ_DEFAULT = true;
static final String ADL_SUPPORT_ACL_BIT_IN_FSPERMISSION =
"adl.feature.support.acl.bit";
static final boolean ADL_SUPPORT_ACL_BIT_IN_FSPERMISSION_DEFAULT = true;
private AdlConfKeys() {
}
}

View File

@ -19,23 +19,905 @@
package org.apache.hadoop.fs.adl;
import org.apache.hadoop.hdfs.web.PrivateAzureDataLakeFileSystem;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.List;
import com.google.common.annotations.VisibleForTesting;
import com.microsoft.azure.datalake.store.ADLStoreClient;
import com.microsoft.azure.datalake.store.ADLStoreOptions;
import com.microsoft.azure.datalake.store.DirectoryEntry;
import com.microsoft.azure.datalake.store.DirectoryEntryType;
import com.microsoft.azure.datalake.store.IfExists;
import com.microsoft.azure.datalake.store.LatencyTracker;
import com.microsoft.azure.datalake.store.oauth2.AccessTokenProvider;
import com.microsoft.azure.datalake.store.oauth2.ClientCredsTokenProvider;
import com.microsoft.azure.datalake.store.oauth2.RefreshTokenBasedTokenProvider;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.ContentSummary.Builder;
import org.apache.hadoop.fs.CreateFlag;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.InvalidPathException;
import org.apache.hadoop.fs.Options;
import org.apache.hadoop.fs.Options.Rename;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.adl.oauth2.AzureADTokenProvider;
import org.apache.hadoop.fs.permission.AclEntry;
import org.apache.hadoop.fs.permission.AclStatus;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.security.AccessControlException;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.Progressable;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.VersionInfo;
import static org.apache.hadoop.fs.adl.AdlConfKeys.*;
/**
* Expose adl:// scheme to access ADL file system.
* A FileSystem to access Azure Data Lake Store.
*/
public class AdlFileSystem extends PrivateAzureDataLakeFileSystem {
@InterfaceAudience.Public
@InterfaceStability.Evolving
public class AdlFileSystem extends FileSystem {
static final String SCHEME = "adl";
static final int DEFAULT_PORT = 443;
private URI uri;
private String userName;
private boolean overrideOwner;
private ADLStoreClient adlClient;
private Path workingDirectory;
private boolean aclBitStatus;
public static final String SCHEME = "adl";
public static final int DEFAULT_PORT = 443;
// retained for tests
private AccessTokenProvider tokenProvider;
private AzureADTokenProvider azureTokenProvider;
@Override
public String getScheme() {
return SCHEME;
}
public URI getUri() {
return uri;
}
@Override
public int getDefaultPort() {
return DEFAULT_PORT;
}
@Override
public boolean supportsSymlinks() {
return false;
}
/**
* Called after a new FileSystem instance is constructed.
*
* @param storeUri a uri whose authority section names the host, port, etc.
* for this FileSystem
* @param conf the configuration
*/
@Override
public void initialize(URI storeUri, Configuration conf) throws IOException {
super.initialize(storeUri, conf);
this.setConf(conf);
this.uri = URI
.create(storeUri.getScheme() + "://" + storeUri.getAuthority());
try {
userName = UserGroupInformation.getCurrentUser().getShortUserName();
} catch (IOException e) {
userName = "hadoop";
}
this.setWorkingDirectory(getHomeDirectory());
overrideOwner = getConf().getBoolean(ADL_DEBUG_OVERRIDE_LOCAL_USER_AS_OWNER,
ADL_DEBUG_SET_LOCAL_USER_AS_OWNER_DEFAULT);
aclBitStatus = conf.getBoolean(ADL_SUPPORT_ACL_BIT_IN_FSPERMISSION,
ADL_SUPPORT_ACL_BIT_IN_FSPERMISSION_DEFAULT);
String accountFQDN = null;
String mountPoint = null;
String hostname = storeUri.getHost();
if (!hostname.contains(".") && !hostname.equalsIgnoreCase(
"localhost")) { // this is a symbolic name. Resolve it.
String hostNameProperty = "dfs.adls." + hostname + ".hostname";
String mountPointProperty = "dfs.adls." + hostname + ".mountpoint";
accountFQDN = getNonEmptyVal(conf, hostNameProperty);
mountPoint = getNonEmptyVal(conf, mountPointProperty);
} else {
accountFQDN = hostname;
}
if (storeUri.getPort() > 0) {
accountFQDN = accountFQDN + ":" + storeUri.getPort();
}
adlClient = ADLStoreClient
.createClient(accountFQDN, getAccessTokenProvider(conf));
ADLStoreOptions options = new ADLStoreOptions();
options.enableThrowingRemoteExceptions();
if (getTransportScheme().equalsIgnoreCase(INSECURE_TRANSPORT_SCHEME)) {
options.setInsecureTransport();
}
if (mountPoint != null) {
options.setFilePathPrefix(mountPoint);
}
String clusterName = conf.get(ADL_EVENTS_TRACKING_CLUSTERNAME, "UNKNOWN");
String clusterType = conf.get(ADL_EVENTS_TRACKING_CLUSTERTYPE, "UNKNOWN");
String clientVersion = ADL_HADOOP_CLIENT_NAME + (StringUtils
.isEmpty(VersionInfo.getVersion().trim()) ?
ADL_HADOOP_CLIENT_VERSION.trim() :
VersionInfo.getVersion().trim());
options.setUserAgentSuffix(clientVersion + "/" +
VersionInfo.getVersion().trim() + "/" + clusterName + "/"
+ clusterType);
adlClient.setOptions(options);
boolean trackLatency = conf
.getBoolean(LATENCY_TRACKER_KEY, LATENCY_TRACKER_DEFAULT);
if (!trackLatency) {
LatencyTracker.disable();
}
}
/**
* This method is provided for convenience for derived classes to define
* custom {@link AzureADTokenProvider} instance.
*
* In order to ensure secure hadoop infrastructure and user context for which
* respective {@link AdlFileSystem} instance is initialized,
* Loading {@link AzureADTokenProvider} is not sufficient.
*
* The order of loading {@link AzureADTokenProvider} is to first invoke
* {@link #getCustomAccessTokenProvider(Configuration)}, If method return null
* which means no implementation provided by derived classes, then
* configuration object is loaded to retrieve token configuration as specified
* is documentation.
*
* Custom token management takes the higher precedence during initialization.
*
* @param conf Configuration object
* @return null if the no custom {@link AzureADTokenProvider} token management
* is specified.
* @throws IOException if failed to initialize token provider.
*/
protected synchronized AzureADTokenProvider getCustomAccessTokenProvider(
Configuration conf) throws IOException {
String className = getNonEmptyVal(conf, AZURE_AD_TOKEN_PROVIDER_CLASS_KEY);
Class<? extends AzureADTokenProvider> azureADTokenProviderClass =
conf.getClass(AZURE_AD_TOKEN_PROVIDER_CLASS_KEY, null,
AzureADTokenProvider.class);
if (azureADTokenProviderClass == null) {
throw new IllegalArgumentException(
"Configuration " + className + " " + "not defined/accessible.");
}
azureTokenProvider = ReflectionUtils
.newInstance(azureADTokenProviderClass, conf);
if (azureTokenProvider == null) {
throw new IllegalArgumentException("Failed to initialize " + className);
}
azureTokenProvider.initialize(conf);
return azureTokenProvider;
}
private AccessTokenProvider getAccessTokenProvider(Configuration conf)
throws IOException {
TokenProviderType type = conf.getEnum(
AdlConfKeys.AZURE_AD_TOKEN_PROVIDER_TYPE_KEY, TokenProviderType.Custom);
switch (type) {
case RefreshToken:
tokenProvider = getConfRefreshTokenBasedTokenProvider(conf);
break;
case ClientCredential:
tokenProvider = getConfCredentialBasedTokenProvider(conf);
break;
case Custom:
default:
AzureADTokenProvider azureADTokenProvider = getCustomAccessTokenProvider(
conf);
tokenProvider = new SdkTokenProviderAdapter(azureADTokenProvider);
break;
}
return tokenProvider;
}
private AccessTokenProvider getConfCredentialBasedTokenProvider(
Configuration conf) {
String clientId = getNonEmptyVal(conf, AZURE_AD_CLIENT_ID_KEY);
String refreshUrl = getNonEmptyVal(conf, AZURE_AD_REFRESH_URL_KEY);
String clientSecret = getNonEmptyVal(conf, AZURE_AD_CLIENT_SECRET_KEY);
return new ClientCredsTokenProvider(refreshUrl, clientId, clientSecret);
}
private AccessTokenProvider getConfRefreshTokenBasedTokenProvider(
Configuration conf) {
String clientId = getNonEmptyVal(conf, AZURE_AD_CLIENT_ID_KEY);
String refreshToken = getNonEmptyVal(conf, AZURE_AD_REFRESH_TOKEN_KEY);
return new RefreshTokenBasedTokenProvider(clientId, refreshToken);
}
@VisibleForTesting
AccessTokenProvider getTokenProvider() {
return tokenProvider;
}
@VisibleForTesting
AzureADTokenProvider getAzureTokenProvider() {
return azureTokenProvider;
}
/**
* Constructing home directory locally is fine as long as Hadoop
* local user name and ADL user name relationship story is not fully baked
* yet.
*
* @return Hadoop local user home directory.
*/
@Override
public Path getHomeDirectory() {
return makeQualified(new Path("/user/" + userName));
}
/**
* Create call semantic is handled differently in case of ADL. Create
* semantics is translated to Create/Append
* semantics.
* 1. No dedicated connection to server.
* 2. Buffering is locally done, Once buffer is full or flush is invoked on
* the by the caller. All the pending
* data is pushed to ADL as APPEND operation code.
* 3. On close - Additional call is send to server to close the stream, and
* release lock from the stream.
*
* Necessity of Create/Append semantics is
* 1. ADL backend server does not allow idle connection for longer duration
* . In case of slow writer scenario,
* observed connection timeout/Connection reset causing occasional job
* failures.
* 2. Performance boost to jobs which are slow writer, avoided network latency
* 3. ADL equally better performing with multiple of 4MB chunk as append
* calls.
*
* @param f File path
* @param permission Access permission for the newly created file
* @param overwrite Remove existing file and recreate new one if true
* otherwise throw error if file exist
* @param bufferSize Buffer size, ADL backend does not honour
* @param replication Replication count, ADL backend does not honour
* @param blockSize Block size, ADL backend does not honour
* @param progress Progress indicator
* @return FSDataOutputStream OutputStream on which application can push
* stream of bytes
* @throws IOException when system error, internal server error or user error
*/
@Override
public FSDataOutputStream create(Path f, FsPermission permission,
boolean overwrite, int bufferSize, short replication, long blockSize,
Progressable progress) throws IOException {
statistics.incrementWriteOps(1);
IfExists overwriteRule = overwrite ? IfExists.OVERWRITE : IfExists.FAIL;
return new FSDataOutputStream(new AdlFsOutputStream(adlClient
.createFile(toRelativeFilePath(f), overwriteRule,
Integer.toOctalString(applyUMask(permission).toShort()), true),
getConf()), this.statistics);
}
/**
* Opens an FSDataOutputStream at the indicated Path with write-progress
* reporting. Same as create(), except fails if parent directory doesn't
* already exist.
*
* @param f the file name to open
* @param permission Access permission for the newly created file
* @param flags {@link CreateFlag}s to use for this stream.
* @param bufferSize the size of the buffer to be used. ADL backend does
* not honour
* @param replication required block replication for the file. ADL backend
* does not honour
* @param blockSize Block size, ADL backend does not honour
* @param progress Progress indicator
* @throws IOException when system error, internal server error or user error
* @see #setPermission(Path, FsPermission)
* @deprecated API only for 0.20-append
*/
@Deprecated
@Override
public FSDataOutputStream createNonRecursive(Path f, FsPermission permission,
EnumSet<CreateFlag> flags, int bufferSize, short replication,
long blockSize, Progressable progress) throws IOException {
statistics.incrementWriteOps(1);
IfExists overwriteRule = IfExists.FAIL;
for (CreateFlag flag : flags) {
if (flag == CreateFlag.OVERWRITE) {
overwriteRule = IfExists.OVERWRITE;
break;
}
}
return new FSDataOutputStream(new AdlFsOutputStream(adlClient
.createFile(toRelativeFilePath(f), overwriteRule,
Integer.toOctalString(applyUMask(permission).toShort()), false),
getConf()), this.statistics);
}
/**
* Append to an existing file (optional operation).
*
* @param f the existing file to be appended.
* @param bufferSize the size of the buffer to be used. ADL backend does
* not honour
* @param progress Progress indicator
* @throws IOException when system error, internal server error or user error
*/
@Override
public FSDataOutputStream append(Path f, int bufferSize,
Progressable progress) throws IOException {
statistics.incrementWriteOps(1);
return new FSDataOutputStream(
new AdlFsOutputStream(adlClient.getAppendStream(toRelativeFilePath(f)),
getConf()), this.statistics);
}
/**
* Azure data lake does not support user configuration for data replication
* hence not leaving system to query on
* azure data lake.
*
* Stub implementation
*
* @param p Not honoured
* @param replication Not honoured
* @return True hard coded since ADL file system does not support
* replication configuration
* @throws IOException No exception would not thrown in this case however
* aligning with parent api definition.
*/
@Override
public boolean setReplication(final Path p, final short replication)
throws IOException {
statistics.incrementWriteOps(1);
return true;
}
/**
* Open call semantic is handled differently in case of ADL. Instead of
* network stream is returned to the user,
* Overridden FsInputStream is returned.
*
* @param f File path
* @param buffersize Buffer size, Not honoured
* @return FSDataInputStream InputStream on which application can read
* stream of bytes
* @throws IOException when system error, internal server error or user error
*/
@Override
public FSDataInputStream open(final Path f, final int buffersize)
throws IOException {
statistics.incrementReadOps(1);
return new FSDataInputStream(
new AdlFsInputStream(adlClient.getReadStream(toRelativeFilePath(f)),
statistics, getConf()));
}
/**
* Return a file status object that represents the path.
*
* @param f The path we want information from
* @return a FileStatus object
* @throws IOException when the path does not exist or any other error;
* IOException see specific implementation
*/
@Override
public FileStatus getFileStatus(final Path f) throws IOException {
statistics.incrementReadOps(1);
DirectoryEntry entry = adlClient.getDirectoryEntry(toRelativeFilePath(f));
return toFileStatus(entry, f);
}
/**
* List the statuses of the files/directories in the given path if the path is
* a directory.
*
* @param f given path
* @return the statuses of the files/directories in the given patch
* @throws IOException when the path does not exist or any other error;
* IOException see specific implementation
*/
@Override
public FileStatus[] listStatus(final Path f) throws IOException {
statistics.incrementReadOps(1);
List<DirectoryEntry> entries =
adlClient.enumerateDirectory(toRelativeFilePath(f));
return toFileStatuses(entries, f);
}
/**
* Renames Path src to Path dst. Can take place on local fs
* or remote DFS.
*
* ADLS support POSIX standard for rename operation.
*
* @param src path to be renamed
* @param dst new path after rename
* @return true if rename is successful
* @throws IOException on failure
*/
@Override
public boolean rename(final Path src, final Path dst) throws IOException {
statistics.incrementWriteOps(1);
return adlClient.rename(toRelativeFilePath(src), toRelativeFilePath(dst));
}
@Override
@Deprecated
public void rename(final Path src, final Path dst,
final Options.Rename... options) throws IOException {
statistics.incrementWriteOps(1);
boolean overwrite = false;
for (Rename renameOption : options) {
if (renameOption == Rename.OVERWRITE) {
overwrite = true;
break;
}
}
adlClient
.rename(toRelativeFilePath(src), toRelativeFilePath(dst), overwrite);
}
/**
* Concat existing files together.
*
* @param trg the path to the target destination.
* @param srcs the paths to the sources to use for the concatenation.
* @throws IOException when system error, internal server error or user error
*/
@Override
public void concat(final Path trg, final Path[] srcs) throws IOException {
statistics.incrementWriteOps(1);
List<String> sourcesList = new ArrayList<String>();
for (Path entry : srcs) {
sourcesList.add(toRelativeFilePath(entry));
}
adlClient.concatenateFiles(toRelativeFilePath(trg), sourcesList);
}
/**
* Delete a file.
*
* @param path the path to delete.
* @param recursive if path is a directory and set to
* true, the directory is deleted else throws an exception.
* In case of a file the recursive can be set to either
* true or false.
* @return true if delete is successful else false.
* @throws IOException when system error, internal server error or user error
*/
@Override
public boolean delete(final Path path, final boolean recursive)
throws IOException {
statistics.incrementWriteOps(1);
return recursive ?
adlClient.deleteRecursive(toRelativeFilePath(path)) :
adlClient.delete(toRelativeFilePath(path));
}
/**
* Make the given file and all non-existent parents into
* directories. Has the semantics of Unix 'mkdir -p'.
* Existence of the directory hierarchy is not an error.
*
* @param path path to create
* @param permission to apply to path
*/
@Override
public boolean mkdirs(final Path path, final FsPermission permission)
throws IOException {
statistics.incrementWriteOps(1);
return adlClient.createDirectory(toRelativeFilePath(path),
Integer.toOctalString(applyUMask(permission).toShort()));
}
private FileStatus[] toFileStatuses(final List<DirectoryEntry> entries,
final Path parent) {
FileStatus[] fileStatuses = new FileStatus[entries.size()];
int index = 0;
for (DirectoryEntry entry : entries) {
FileStatus status = toFileStatus(entry, parent);
if (!(entry.name == null || entry.name == "")) {
status.setPath(
new Path(parent.makeQualified(uri, workingDirectory), entry.name));
}
fileStatuses[index++] = status;
}
return fileStatuses;
}
private FsPermission applyUMask(FsPermission permission) {
if (permission == null) {
permission = FsPermission.getDefault();
}
return permission.applyUMask(FsPermission.getUMask(getConf()));
}
private FileStatus toFileStatus(final DirectoryEntry entry, final Path f) {
boolean isDirectory = entry.type == DirectoryEntryType.DIRECTORY;
long lastModificationData = entry.lastModifiedTime.getTime();
long lastAccessTime = entry.lastAccessTime.getTime();
FsPermission permission = new AdlPermission(aclBitStatus,
Short.valueOf(entry.permission, 8));
String user = entry.user;
String group = entry.group;
FileStatus status;
if (overrideOwner) {
status = new FileStatus(entry.length, isDirectory, ADL_REPLICATION_FACTOR,
ADL_BLOCK_SIZE, lastModificationData, lastAccessTime, permission,
userName, "hdfs", this.makeQualified(f));
} else {
status = new FileStatus(entry.length, isDirectory, ADL_REPLICATION_FACTOR,
ADL_BLOCK_SIZE, lastModificationData, lastAccessTime, permission,
user, group, this.makeQualified(f));
}
return status;
}
/**
* Set owner of a path (i.e. a file or a directory).
* The parameters owner and group cannot both be null.
*
* @param path The path
* @param owner If it is null, the original username remains unchanged.
* @param group If it is null, the original groupname remains unchanged.
*/
@Override
public void setOwner(final Path path, final String owner, final String group)
throws IOException {
statistics.incrementWriteOps(1);
adlClient.setOwner(toRelativeFilePath(path), owner, group);
}
/**
* Set permission of a path.
*
* @param path The path
* @param permission Access permission
*/
@Override
public void setPermission(final Path path, final FsPermission permission)
throws IOException {
statistics.incrementWriteOps(1);
adlClient.setPermission(toRelativeFilePath(path),
Integer.toOctalString(permission.toShort()));
}
/**
* Modifies ACL entries of files and directories. This method can add new ACL
* entries or modify the permissions on existing ACL entries. All existing
* ACL entries that are not specified in this call are retained without
* changes. (Modifications are merged into the current ACL.)
*
* @param path Path to modify
* @param aclSpec List of AclEntry describing modifications
* @throws IOException if an ACL could not be modified
*/
@Override
public void modifyAclEntries(final Path path, final List<AclEntry> aclSpec)
throws IOException {
statistics.incrementWriteOps(1);
List<com.microsoft.azure.datalake.store.acl.AclEntry> msAclEntries = new
ArrayList<com.microsoft.azure.datalake.store.acl.AclEntry>();
for (AclEntry aclEntry : aclSpec) {
msAclEntries.add(com.microsoft.azure.datalake.store.acl.AclEntry
.parseAclEntry(aclEntry.toString()));
}
adlClient.modifyAclEntries(toRelativeFilePath(path), msAclEntries);
}
/**
* Removes ACL entries from files and directories. Other ACL entries are
* retained.
*
* @param path Path to modify
* @param aclSpec List of AclEntry describing entries to remove
* @throws IOException if an ACL could not be modified
*/
@Override
public void removeAclEntries(final Path path, final List<AclEntry> aclSpec)
throws IOException {
statistics.incrementWriteOps(1);
List<com.microsoft.azure.datalake.store.acl.AclEntry> msAclEntries = new
ArrayList<com.microsoft.azure.datalake.store.acl.AclEntry>();
for (AclEntry aclEntry : aclSpec) {
msAclEntries.add(com.microsoft.azure.datalake.store.acl.AclEntry
.parseAclEntry(aclEntry.toString(), true));
}
adlClient.removeAclEntries(toRelativeFilePath(path), msAclEntries);
}
/**
* Removes all default ACL entries from files and directories.
*
* @param path Path to modify
* @throws IOException if an ACL could not be modified
*/
@Override
public void removeDefaultAcl(final Path path) throws IOException {
statistics.incrementWriteOps(1);
adlClient.removeDefaultAcls(toRelativeFilePath(path));
}
/**
* Removes all but the base ACL entries of files and directories. The entries
* for user, group, and others are retained for compatibility with permission
* bits.
*
* @param path Path to modify
* @throws IOException if an ACL could not be removed
*/
@Override
public void removeAcl(final Path path) throws IOException {
statistics.incrementWriteOps(1);
adlClient.removeAllAcls(toRelativeFilePath(path));
}
/**
* Fully replaces ACL of files and directories, discarding all existing
* entries.
*
* @param path Path to modify
* @param aclSpec List of AclEntry describing modifications, must include
* entries for user, group, and others for compatibility with
* permission bits.
* @throws IOException if an ACL could not be modified
*/
@Override
public void setAcl(final Path path, final List<AclEntry> aclSpec)
throws IOException {
statistics.incrementWriteOps(1);
List<com.microsoft.azure.datalake.store.acl.AclEntry> msAclEntries = new
ArrayList<com.microsoft.azure.datalake.store.acl.AclEntry>();
for (AclEntry aclEntry : aclSpec) {
msAclEntries.add(com.microsoft.azure.datalake.store.acl.AclEntry
.parseAclEntry(aclEntry.toString()));
}
adlClient.setAcl(toRelativeFilePath(path), msAclEntries);
}
/**
* Gets the ACL of a file or directory.
*
* @param path Path to get
* @return AclStatus describing the ACL of the file or directory
* @throws IOException if an ACL could not be read
*/
@Override
public AclStatus getAclStatus(final Path path) throws IOException {
statistics.incrementReadOps(1);
com.microsoft.azure.datalake.store.acl.AclStatus adlStatus = adlClient
.getAclStatus(toRelativeFilePath(path));
AclStatus.Builder aclStatusBuilder = new AclStatus.Builder();
aclStatusBuilder.owner(adlStatus.owner);
aclStatusBuilder.group(adlStatus.group);
aclStatusBuilder.setPermission(
new FsPermission(Short.valueOf(adlStatus.octalPermissions, 8)));
aclStatusBuilder.stickyBit(adlStatus.stickyBit);
String aclListString = com.microsoft.azure.datalake.store.acl.AclEntry
.aclListToString(adlStatus.aclSpec);
List<AclEntry> aclEntries = AclEntry.parseAclSpec(aclListString, true);
aclStatusBuilder.addEntries(aclEntries);
return aclStatusBuilder.build();
}
/**
* Checks if the user can access a path. The mode specifies which access
* checks to perform. If the requested permissions are granted, then the
* method returns normally. If access is denied, then the method throws an
* {@link AccessControlException}.
*
* @param path Path to check
* @param mode type of access to check
* @throws AccessControlException if access is denied
* @throws java.io.FileNotFoundException if the path does not exist
* @throws IOException see specific implementation
*/
@Override
public void access(final Path path, FsAction mode) throws IOException {
statistics.incrementReadOps(1);
if (!adlClient.checkAccess(toRelativeFilePath(path), mode.SYMBOL)) {
throw new AccessControlException("Access Denied : " + path.toString());
}
}
/**
* Return the {@link ContentSummary} of a given {@link Path}.
*
* @param f path to use
*/
@Override
public ContentSummary getContentSummary(Path f) throws IOException {
statistics.incrementReadOps(1);
com.microsoft.azure.datalake.store.ContentSummary msSummary = adlClient
.getContentSummary(toRelativeFilePath(f));
return new Builder().length(msSummary.length)
.directoryCount(msSummary.directoryCount).fileCount(msSummary.fileCount)
.spaceConsumed(msSummary.spaceConsumed).build();
}
@VisibleForTesting
protected String getTransportScheme() {
return SECURE_TRANSPORT_SCHEME;
}
@VisibleForTesting
String toRelativeFilePath(Path path) {
return path.makeQualified(uri, workingDirectory).toUri().getPath();
}
/**
* Get the current working directory for the given file system.
*
* @return the directory pathname
*/
@Override
public Path getWorkingDirectory() {
return workingDirectory;
}
/**
* Set the current working directory for the given file system. All relative
* paths will be resolved relative to it.
*
* @param dir Working directory path.
*/
@Override
public void setWorkingDirectory(final Path dir) {
if (dir == null) {
throw new InvalidPathException("Working directory cannot be set to NULL");
}
/**
* Do not validate the scheme and URI of the passsed parameter. When Adls
* runs as additional file system, working directory set has the default
* file system scheme and uri.
*
* Found a problem during PIG execution in
* https://github.com/apache/pig/blob/branch-0
* .15/src/org/apache/pig/backend/hadoop/executionengine/mapReduceLayer
* /PigInputFormat.java#L235
* However similar problem would be present in other application so
* defaulting to build working directory using relative path only.
*/
this.workingDirectory = this.makeAbsolute(dir);
}
/**
* Return the number of bytes that large input files should be optimally
* be split into to minimize i/o time.
*
* @deprecated use {@link #getDefaultBlockSize(Path)} instead
*/
@Deprecated
public long getDefaultBlockSize() {
return ADL_BLOCK_SIZE;
}
/**
* Return the number of bytes that large input files should be optimally
* be split into to minimize i/o time. The given path will be used to
* locate the actual filesystem. The full path does not have to exist.
*
* @param f path of file
* @return the default block size for the path's filesystem
*/
public long getDefaultBlockSize(Path f) {
return getDefaultBlockSize();
}
/**
* Get the block size.
* @param f the filename
* @return the number of bytes in a block
*/
/**
* @deprecated Use getFileStatus() instead
*/
@Deprecated
public long getBlockSize(Path f) throws IOException {
return ADL_BLOCK_SIZE;
}
@Override
public BlockLocation[] getFileBlockLocations(final FileStatus status,
final long offset, final long length) throws IOException {
if (status == null) {
return null;
}
if ((offset < 0) || (length < 0)) {
throw new IllegalArgumentException("Invalid start or len parameter");
}
if (status.getLen() < offset) {
return new BlockLocation[0];
}
final String[] name = {"localhost"};
final String[] host = {"localhost"};
long blockSize = ADL_BLOCK_SIZE;
int numberOfLocations =
(int) (length / blockSize) + ((length % blockSize == 0) ? 0 : 1);
BlockLocation[] locations = new BlockLocation[numberOfLocations];
for (int i = 0; i < locations.length; i++) {
long currentOffset = offset + (i * blockSize);
long currentLength = Math.min(blockSize, offset + length - currentOffset);
locations[i] = new BlockLocation(name, host, currentOffset,
currentLength);
}
return locations;
}
@Override
public BlockLocation[] getFileBlockLocations(final Path p, final long offset,
final long length) throws IOException {
// read ops incremented in getFileStatus
FileStatus fileStatus = getFileStatus(p);
return getFileBlockLocations(fileStatus, offset, length);
}
/**
* Get replication.
*
* @param src file name
* @return file replication
* @deprecated Use getFileStatus() instead
*/
@Deprecated
public short getReplication(Path src) {
return ADL_REPLICATION_FACTOR;
}
private Path makeAbsolute(Path path) {
return path.isAbsolute() ? path : new Path(this.workingDirectory, path);
}
private static String getNonEmptyVal(Configuration conf, String key) {
String value = conf.get(key);
if (StringUtils.isEmpty(value)) {
throw new IllegalArgumentException(
"No value for " + key + " found in conf file.");
}
return value;
}
}

View File

@ -0,0 +1,149 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.adl;
import com.microsoft.azure.datalake.store.ADLFileInputStream;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSInputStream;
import org.apache.hadoop.fs.FileSystem.Statistics;
import java.io.IOException;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.ADL_EXPERIMENT_POSITIONAL_READ_DEFAULT;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.ADL_EXPERIMENT_POSITIONAL_READ_KEY;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.DEFAULT_READ_AHEAD_BUFFER_SIZE;
import static org.apache.hadoop.fs.adl.AdlConfKeys.READ_AHEAD_BUFFER_SIZE_KEY;
/**
* Wraps {@link ADLFileInputStream} implementation.
*/
@InterfaceAudience.Private
@InterfaceStability.Evolving
public final class AdlFsInputStream extends FSInputStream {
private final ADLFileInputStream in;
private final Statistics stat;
private final boolean enablePositionalReadExperiment;
public AdlFsInputStream(ADLFileInputStream inputStream, Statistics statistics,
Configuration conf) throws IOException {
this.in = inputStream;
this.in.setBufferSize(conf.getInt(READ_AHEAD_BUFFER_SIZE_KEY,
DEFAULT_READ_AHEAD_BUFFER_SIZE));
enablePositionalReadExperiment = conf
.getBoolean(ADL_EXPERIMENT_POSITIONAL_READ_KEY,
ADL_EXPERIMENT_POSITIONAL_READ_DEFAULT);
stat = statistics;
}
@Override
public synchronized void seek(long pos) throws IOException {
in.seek(pos);
}
/**
* Return the current offset from the start of the file.
*/
@Override
public synchronized long getPos() throws IOException {
return in.getPos();
}
@Override
public boolean seekToNewSource(long l) throws IOException {
return false;
}
@Override
public synchronized int read() throws IOException {
int ch = in.read();
if (stat != null && ch != -1) {
stat.incrementBytesRead(1);
}
return ch;
}
@Override
public int read(long position, byte[] buffer, int offset, int length)
throws IOException {
int numberOfByteRead = 0;
if (enablePositionalReadExperiment) {
numberOfByteRead = in.read(position, buffer, offset, length);
} else {
numberOfByteRead = super.read(position, buffer, offset, length);
}
if (stat != null && numberOfByteRead > 0) {
stat.incrementBytesRead(numberOfByteRead);
}
return numberOfByteRead;
}
@Override
public synchronized int read(byte[] buffer, int offset, int length)
throws IOException {
int numberOfByteRead = in.read(buffer, offset, length);
if (stat != null && numberOfByteRead > 0) {
stat.incrementBytesRead(numberOfByteRead);
}
return numberOfByteRead;
}
/**
* This method returns the remaining bytes in the stream, rather than the
* expected Java
* interpretation of {@link java.io.InputStream#available()}, which expects
* the
* number of remaining
* bytes in the local buffer. Moreover, it caps the value returned to a
* maximum of Integer.MAX_VALUE.
* These changed behaviors are to ensure compatibility with the
* expectations of HBase WAL reader,
* which depends on available() returning the number of bytes in stream.
*
* Given all other FileSystems in the hadoop ecosystem (especially HDFS) do
* this, it is possible other
* apps other than HBase would also pick up expectation of this behavior
* based on HDFS implementation.
* Therefore keeping this quirky behavior here, to ensure compatibility.
*
* @return remaining bytes in the stream, with maximum of Integer.MAX_VALUE.
* @throws IOException If fails to get the position or file length from SDK.
*/
@Override
public synchronized int available() throws IOException {
return (int) Math.min(in.length() - in.getPos(), Integer.MAX_VALUE);
}
@Override
public synchronized void close() throws IOException {
in.close();
}
@Override
public synchronized long skip(long pos) throws IOException {
return in.skip(pos);
}
}

View File

@ -0,0 +1,82 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.adl;
import com.microsoft.azure.datalake.store.ADLFileOutputStream;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Syncable;
import java.io.IOException;
import java.io.OutputStream;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.DEFAULT_WRITE_AHEAD_BUFFER_SIZE;
import static org.apache.hadoop.fs.adl.AdlConfKeys.WRITE_BUFFER_SIZE_KEY;
/**
* Wraps {@link com.microsoft.azure.datalake.store.ADLFileOutputStream}
* implementation.
*
* Flush semantics.
* no-op, since some parts of hadoop ecosystem call flush(), expecting it to
* have no perf impact. In hadoop filesystems, flush() itself guarantees no
* durability: that is achieved by calling hflush() or hsync()
*/
@InterfaceAudience.Private
@InterfaceStability.Evolving
public final class AdlFsOutputStream extends OutputStream implements Syncable {
private final ADLFileOutputStream out;
public AdlFsOutputStream(ADLFileOutputStream out, Configuration configuration)
throws IOException {
this.out = out;
out.setBufferSize(configuration
.getInt(WRITE_BUFFER_SIZE_KEY, DEFAULT_WRITE_AHEAD_BUFFER_SIZE));
}
@Override
public synchronized void write(int b) throws IOException {
out.write(b);
}
@Override
public synchronized void write(byte[] b, int off, int len)
throws IOException {
out.write(b, off, len);
}
@Override
public synchronized void close() throws IOException {
out.close();
}
public synchronized void sync() throws IOException {
out.flush();
}
public synchronized void hflush() throws IOException {
out.flush();
}
public synchronized void hsync() throws IOException {
out.flush();
}
}

View File

@ -0,0 +1,69 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.fs.adl;
import org.apache.hadoop.fs.permission.FsPermission;
/**
* Hadoop shell command -getfacl does not invoke getAclStatus if FsPermission
* from getFileStatus has not set ACL bit to true. By default getAclBit returns
* false.
*
* Provision to make additional call to invoke getAclStatus would be redundant
* when adls is running as additional FS. To avoid this redundancy, provided
* configuration to return true/false on getAclBit.
*/
class AdlPermission extends FsPermission {
private final boolean aclBit;
AdlPermission(boolean aclBitStatus, Short aShort) {
super(aShort);
this.aclBit = aclBitStatus;
}
/**
* Returns true if "adl.feature.support.acl.bit" configuration is set to
* true.
*
* If configuration is not set then default value is true.
*
* @return If configuration is not set then default value is true.
*/
public boolean getAclBit() {
return aclBit;
}
@Override
public boolean equals(Object obj) {
if (obj instanceof FsPermission) {
FsPermission that = (FsPermission) obj;
return this.getUserAction() == that.getUserAction()
&& this.getGroupAction() == that.getGroupAction()
&& this.getOtherAction() == that.getOtherAction()
&& this.getStickyBit() == that.getStickyBit();
}
return false;
}
@Override
public int hashCode() {
return toShort();
}
}

View File

@ -7,43 +7,35 @@
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.hdfs.web.resources;
package org.apache.hadoop.fs.adl;
/**
* Query parameter to notify backend server that the all the data has been
* pushed to over the stream.
*
* Used in operation code Create and Append.
*/
public class ADLFlush extends BooleanParam {
/**
* Parameter name.
*/
public static final String NAME = "flush";
import com.microsoft.azure.datalake.store.oauth2.AccessTokenProvider;
import com.microsoft.azure.datalake.store.oauth2.AzureADToken;
import org.apache.hadoop.fs.adl.oauth2.AzureADTokenProvider;
private static final Domain DOMAIN = new Domain(NAME);
import java.io.IOException;
/**
* Constructor.
*
* @param value the parameter value.
*/
public ADLFlush(final Boolean value) {
super(DOMAIN, value);
final class SdkTokenProviderAdapter extends AccessTokenProvider {
private AzureADTokenProvider tokenProvider;
SdkTokenProviderAdapter(AzureADTokenProvider tp) {
this.tokenProvider = tp;
}
@Override
public final String getName() {
return NAME;
protected AzureADToken refreshToken() throws IOException {
AzureADToken azureADToken = new AzureADToken();
azureADToken.accessToken = tokenProvider.getAccessToken();
azureADToken.expiry = tokenProvider.getExpiryTime();
return azureADToken;
}
}

View File

@ -7,18 +7,19 @@
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
/**
* A distributed implementation of {@link
* org.apache.hadoop.hdfs.web.oauth2} for oauth2 token management support.
*/
package org.apache.hadoop.hdfs.web.oauth2;
package org.apache.hadoop.fs.adl;
enum TokenProviderType {
RefreshToken,
ClientCredential,
Custom
}

View File

@ -0,0 +1,70 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.adl.oauth2;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import java.io.IOException;
import java.util.Date;
/**
* Provide an Azure Active Directory supported
* OAuth2 access token to be used to authenticate REST calls against Azure data
* lake file system {@link org.apache.hadoop.fs.adl.AdlFileSystem}.
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public abstract class AzureADTokenProvider {
/**
* Initialize with supported configuration. This method is invoked when the
* {@link org.apache.hadoop.fs.adl.AdlFileSystem#initialize
* (URI, Configuration)} method is invoked.
*
* @param configuration Configuration object
* @throws IOException if instance can not be configured.
*/
public abstract void initialize(Configuration configuration)
throws IOException;
/**
* Obtain the access token that should be added to https connection's header.
* Will be called depending upon {@link #getExpiryTime()} expiry time is set,
* so implementations should be performant. Implementations are responsible
* for any refreshing of the token.
*
* @return String containing the access token
* @throws IOException if there is an error fetching the token
*/
public abstract String getAccessToken() throws IOException;
/**
* Obtain expiry time of the token. If implementation is performant enough to
* maintain expiry and expect {@link #getAccessToken()} call for every
* connection then safe to return current or past time.
*
* However recommended to use the token expiry time received from Azure Active
* Directory.
*
* @return Date to expire access token retrieved from AAD.
*/
public abstract Date getExpiryTime();
}

View File

@ -1,135 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.fs.adl.oauth2;
import java.io.IOException;
import java.util.Map;
import java.util.LinkedHashMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.web.oauth2.AccessTokenProvider;
import org.apache.hadoop.hdfs.web.oauth2.ConfRefreshTokenBasedAccessTokenProvider;
import org.apache.hadoop.hdfs.web.oauth2.PrivateCachedRefreshTokenBasedAccessTokenProvider;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.OAUTH_CLIENT_ID_KEY;
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.OAUTH_REFRESH_URL_KEY;
import static org.apache.hadoop.hdfs.web.oauth2.ConfRefreshTokenBasedAccessTokenProvider.OAUTH_REFRESH_TOKEN_KEY;
/**
* Share refresh tokens across all ADLS instances with a common client ID. The
* {@link AccessTokenProvider} can be shared across multiple instances,
* amortizing the cost of refreshing tokens.
*/
public class CachedRefreshTokenBasedAccessTokenProvider
extends PrivateCachedRefreshTokenBasedAccessTokenProvider {
public static final String FORCE_REFRESH = "adl.force.token.refresh";
private static final Logger LOG =
LoggerFactory.getLogger(CachedRefreshTokenBasedAccessTokenProvider.class);
/** Limit size of provider cache. */
static final int MAX_PROVIDERS = 10;
@SuppressWarnings("serial")
private static final Map<String, AccessTokenProvider> CACHE =
new LinkedHashMap<String, AccessTokenProvider>() {
@Override
public boolean removeEldestEntry(
Map.Entry<String, AccessTokenProvider> e) {
return size() > MAX_PROVIDERS;
}
};
private AccessTokenProvider instance = null;
/**
* Create handle for cached instance.
*/
public CachedRefreshTokenBasedAccessTokenProvider() {
}
/**
* Gets the access token from internally cached
* ConfRefreshTokenBasedAccessTokenProvider instance.
*
* @return Valid OAuth2 access token for the user.
* @throws IOException when system error, internal server error or user error
*/
@Override
public synchronized String getAccessToken() throws IOException {
return instance.getAccessToken();
}
/**
* @return A cached Configuration consistent with the parameters of this
* instance.
*/
@Override
public synchronized Configuration getConf() {
return instance.getConf();
}
/**
* Configure cached instance. Note that the Configuration instance returned
* from subsequent calls to {@link #getConf() getConf} may be from a
* previous, cached entry.
* @param conf Configuration instance
*/
@Override
public synchronized void setConf(Configuration conf) {
String id = conf.get(OAUTH_CLIENT_ID_KEY);
if (null == id) {
throw new IllegalArgumentException("Missing client ID");
}
synchronized (CACHE) {
instance = CACHE.get(id);
if (null == instance
|| conf.getBoolean(FORCE_REFRESH, false)
|| replace(instance, conf)) {
instance = newInstance();
// clone configuration
instance.setConf(new Configuration(conf));
CACHE.put(id, instance);
LOG.debug("Created new client {}", id);
}
}
}
AccessTokenProvider newInstance() {
return new ConfRefreshTokenBasedAccessTokenProvider();
}
private static boolean replace(AccessTokenProvider cached, Configuration c2) {
// ConfRefreshTokenBasedAccessTokenProvider::setConf asserts !null
final Configuration c1 = cached.getConf();
for (String key : new String[] {
OAUTH_REFRESH_TOKEN_KEY, OAUTH_REFRESH_URL_KEY }) {
if (!c1.get(key).equals(c2.get(key))) {
// replace cached instance for this clientID
return true;
}
}
return false;
}
}

View File

@ -20,4 +20,4 @@
/**
* public interface to expose OAuth2 authentication related features.
*/
package org.apache.hadoop.fs.adl.oauth2;
package org.apache.hadoop.fs.adl.oauth2;

View File

@ -20,4 +20,4 @@
/**
* Supporting classes for metrics instrumentation.
*/
package org.apache.hadoop.fs.adl;
package org.apache.hadoop.fs.adl;

View File

@ -1,61 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.hdfs.web;
/**
* Constants.
*/
public final class ADLConfKeys {
public static final String
ADL_FEATURE_CONCURRENT_READ_AHEAD_MAX_CONCURRENT_CONN =
"adl.feature.override.readahead.max.concurrent.connection";
public static final int
ADL_FEATURE_CONCURRENT_READ_AHEAD_MAX_CONCURRENT_CONN_DEFAULT = 2;
public static final String ADL_WEBSDK_VERSION_KEY = "ADLFeatureSet";
static final String ADL_DEBUG_OVERRIDE_LOCAL_USER_AS_OWNER =
"adl.debug.override.localuserasfileowner";
static final boolean ADL_DEBUG_SET_LOCAL_USER_AS_OWNER_DEFAULT = false;
static final String ADL_FEATURE_REDIRECT_OFF =
"adl.feature.override.redirection.off";
static final boolean ADL_FEATURE_REDIRECT_OFF_DEFAULT = true;
static final String ADL_FEATURE_GET_BLOCK_LOCATION_LOCALLY_BUNDLED =
"adl.feature.override.getblocklocation.locally.bundled";
static final boolean ADL_FEATURE_GET_BLOCK_LOCATION_LOCALLY_BUNDLED_DEFAULT
= true;
static final String ADL_FEATURE_CONCURRENT_READ_WITH_READ_AHEAD =
"adl.feature.override.readahead";
static final boolean ADL_FEATURE_CONCURRENT_READ_WITH_READ_AHEAD_DEFAULT =
true;
static final String ADL_FEATURE_CONCURRENT_READ_WITH_READ_AHEAD_BUFFER_SIZE =
"adl.feature.override.readahead.max.buffersize";
static final int KB = 1024;
static final int MB = KB * KB;
static final int DEFAULT_BLOCK_SIZE = 4 * MB;
static final int DEFAULT_EXTENT_SIZE = 256 * MB;
static final int DEFAULT_TIMEOUT_IN_SECONDS = 120;
static final int
ADL_FEATURE_CONCURRENT_READ_WITH_READ_AHEAD_BUFFER_SIZE_DEFAULT =
8 * MB;
private ADLConfKeys() {
}
}

View File

@ -1,180 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.hdfs.web;
/**
* Responsible for holding buffered data in the process. Hold only 1 and only
* 1 buffer block in the memory. Buffer block
* information is for the given file and the offset from the which the block
* is fetched. Across the webhdfs instances if
* same buffer block has been used then backend trip is avoided. Buffer block
* is certainly important since ADL fetches
* large amount of data (Default is 4MB however can be configured through
* core-site.xml) from the backend.
* Observation is in case of ORC/Avro kind of compressed file, buffer block
* does not avoid few backend calls across
* webhdfs
* instances.
*/
final class BufferManager {
private static final BufferManager BUFFER_MANAGER_INSTANCE = new
BufferManager();
private static Object lock = new Object();
private Buffer buffer = null;
private String fileName;
/**
* Constructor.
*/
private BufferManager() {
}
public static Object getLock() {
return lock;
}
public static BufferManager getInstance() {
return BUFFER_MANAGER_INSTANCE;
}
/**
* Validate if the current buffer block is of given stream.
*
* @param path ADL stream path
* @param offset Stream offset that caller is interested in
* @return True if the buffer block is available otherwise false
*/
boolean hasValidDataForOffset(String path, long offset) {
if (this.fileName == null) {
return false;
}
if (!this.fileName.equals(path)) {
return false;
}
if (buffer == null) {
return false;
}
if ((offset < buffer.offset) || (offset >= (buffer.offset
+ buffer.data.length))) {
return false;
}
return true;
}
/**
* Clean buffer block.
*/
void clear() {
buffer = null;
}
/**
* Validate if the current buffer block is of given stream. For now partial
* data available is not supported.
* Data must be available exactly or within the range of offset and size
* passed as parameter.
*
* @param path Stream path
* @param offset Offset of the stream
* @param size Size of the data from the offset of the stream caller
* interested in
* @return True if the data is available from the given offset and of the
* size caller is interested in.
*/
boolean hasData(String path, long offset, int size) {
if (!hasValidDataForOffset(path, offset)) {
return false;
}
if ((size + offset) > (buffer.data.length + buffer.offset)) {
return false;
}
return true;
}
/**
* Return the buffer block from the requested offset. It is caller
* responsibility to check if the buffer block is
* of there interest and offset is valid.
*
* @param data Byte array to be filed from the buffer block
* @param offset Data to be fetched from the offset.
*/
void get(byte[] data, long offset) {
System.arraycopy(buffer.data, (int) (offset - buffer.offset), data, 0,
data.length);
}
/**
* Create new empty buffer block of the given size.
*
* @param len Size of the buffer block.
* @return Empty byte array.
*/
byte[] getEmpty(int len) {
return new byte[len];
}
/**
* This function allows caller to specify new buffer block for the stream
* which is pulled from the backend.
*
* @param data Buffer
* @param path Stream path to which buffer belongs to
* @param offset Stream offset where buffer start with
*/
void add(byte[] data, String path, long offset) {
if (data == null) {
return;
}
buffer = new Buffer();
buffer.data = data;
buffer.offset = offset;
this.fileName = path;
}
/**
* @return Size of the buffer.
*/
int getBufferSize() {
return buffer.data.length;
}
/**
* @return Stream offset where buffer start with
*/
long getBufferOffset() {
return buffer.offset;
}
/**
* Buffer container.
*/
static class Buffer {
private byte[] data;
private long offset;
}
}

View File

@ -1,156 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hadoop.hdfs.web.oauth2;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.ObjectReader;
import com.squareup.okhttp.OkHttpClient;
import com.squareup.okhttp.Request;
import com.squareup.okhttp.RequestBody;
import com.squareup.okhttp.Response;
import com.squareup.okhttp.MediaType;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.web.URLConnectionFactory;
import org.apache.hadoop.util.Timer;
import org.apache.http.HttpStatus;
import java.io.IOException;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import static org.apache.hadoop.hdfs.web.oauth2.Utils.notNull;
/**
* Obtain an access token via the credential-based OAuth2 workflow.
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public class AzureADClientCredentialBasedAccesTokenProvider
extends AccessTokenProvider {
private static final ObjectReader READER =
new ObjectMapper().reader(Map.class);
public static final String OAUTH_CREDENTIAL_KEY
= "dfs.webhdfs.oauth2.credential";
public static final String AAD_RESOURCE_KEY
= "fs.adls.oauth2.resource";
public static final String RESOURCE_PARAM_NAME
= "resource";
private static final String OAUTH_CLIENT_ID_KEY
= "dfs.webhdfs.oauth2.client.id";
private static final String OAUTH_REFRESH_URL_KEY
= "dfs.webhdfs.oauth2.refresh.url";
public static final String ACCESS_TOKEN = "access_token";
public static final String CLIENT_CREDENTIALS = "client_credentials";
public static final String CLIENT_ID = "client_id";
public static final String CLIENT_SECRET = "client_secret";
public static final String EXPIRES_IN = "expires_in";
public static final String GRANT_TYPE = "grant_type";
public static final MediaType URLENCODED
= MediaType.parse("application/x-www-form-urlencoded; charset=utf-8");
private AccessTokenTimer timer;
private String clientId;
private String refreshURL;
private String accessToken;
private String resource;
private String credential;
private boolean initialCredentialObtained = false;
AzureADClientCredentialBasedAccesTokenProvider() {
this.timer = new AccessTokenTimer();
}
AzureADClientCredentialBasedAccesTokenProvider(Timer timer) {
this.timer = new AccessTokenTimer(timer);
}
@Override
public void setConf(Configuration conf) {
super.setConf(conf);
clientId = notNull(conf, OAUTH_CLIENT_ID_KEY);
refreshURL = notNull(conf, OAUTH_REFRESH_URL_KEY);
resource = notNull(conf, AAD_RESOURCE_KEY);
credential = notNull(conf, OAUTH_CREDENTIAL_KEY);
}
@Override
public String getAccessToken() throws IOException {
if(timer.shouldRefresh() || !initialCredentialObtained) {
refresh();
initialCredentialObtained = true;
}
return accessToken;
}
void refresh() throws IOException {
try {
OkHttpClient client = new OkHttpClient();
client.setConnectTimeout(URLConnectionFactory.DEFAULT_SOCKET_TIMEOUT,
TimeUnit.MILLISECONDS);
client.setReadTimeout(URLConnectionFactory.DEFAULT_SOCKET_TIMEOUT,
TimeUnit.MILLISECONDS);
String bodyString = Utils.postBody(CLIENT_SECRET, credential,
GRANT_TYPE, CLIENT_CREDENTIALS,
RESOURCE_PARAM_NAME, resource,
CLIENT_ID, clientId);
RequestBody body = RequestBody.create(URLENCODED, bodyString);
Request request = new Request.Builder()
.url(refreshURL)
.post(body)
.build();
Response responseBody = client.newCall(request).execute();
if (responseBody.code() != HttpStatus.SC_OK) {
throw new IllegalArgumentException("Received invalid http response: "
+ responseBody.code() + ", text = " + responseBody.toString());
}
Map<?, ?> response = READER.readValue(responseBody.body().string());
String newExpiresIn = response.get(EXPIRES_IN).toString();
timer.setExpiresIn(newExpiresIn);
accessToken = response.get(ACCESS_TOKEN).toString();
} catch (Exception e) {
throw new IOException("Unable to obtain access token from credential", e);
}
}
}

View File

@ -1,37 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.hdfs.web.oauth2;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceStability.Unstable;
/**
* Exposing AccessTokenProvider publicly to extend in com.microsoft.azure
* .datalake package. Extended version to cache
* token for the process to gain performance gain.
*/
@Private
@Unstable
public abstract class PrivateCachedRefreshTokenBasedAccessTokenProvider
extends AccessTokenProvider {
// visibility workaround
}

View File

@ -1,25 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
/**
* A distributed implementation of {@link org.apache.hadoop.hdfs.web} for
* reading and writing files on Azure data lake file system. This
* implementation is derivation from the webhdfs specification.
*/
package org.apache.hadoop.hdfs.web;

View File

@ -1,96 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.hdfs.web.resources;
import java.net.HttpURLConnection;
/**
* Extended Webhdfs GetOpParam to avoid redirect operation for azure data
* lake storage.
*/
public class ADLGetOpParam extends HttpOpParam<ADLGetOpParam.Op> {
private static final Domain<Op> DOMAIN = new Domain<Op>(NAME, Op.class);
/**
* Constructor.
*
* @param str a string representation of the parameter value.
*/
public ADLGetOpParam(final String str) {
super(DOMAIN, DOMAIN.parse(str));
}
@Override
public final String getName() {
return NAME;
}
/**
* Get operations.
*/
public static enum Op implements HttpOpParam.Op {
OPEN(false, HttpURLConnection.HTTP_OK);
private final boolean redirect;
private final int expectedHttpResponseCode;
private final boolean requireAuth;
Op(final boolean doRedirect, final int expectHttpResponseCode) {
this(doRedirect, expectHttpResponseCode, false);
}
Op(final boolean doRedirect, final int expectHttpResponseCode,
final boolean doRequireAuth) {
this.redirect = doRedirect;
this.expectedHttpResponseCode = expectHttpResponseCode;
this.requireAuth = doRequireAuth;
}
@Override
public HttpOpParam.Type getType() {
return HttpOpParam.Type.GET;
}
@Override
public boolean getRequireAuth() {
return requireAuth;
}
@Override
public boolean getDoOutput() {
return false;
}
@Override
public boolean getRedirect() {
return redirect;
}
@Override
public int getExpectedHttpResponseCode() {
return expectedHttpResponseCode;
}
@Override
public String toQueryString() {
return NAME + "=" + this;
}
}
}

View File

@ -1,97 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.hdfs.web.resources;
import java.net.HttpURLConnection;
/**
* Extended Webhdfs PostOpParam to avoid redirect during append operation for
* azure data lake storage.
*/
public class ADLPostOpParam extends HttpOpParam<ADLPostOpParam.Op> {
private static final Domain<Op> DOMAIN = new Domain<ADLPostOpParam.Op>(NAME,
Op.class);
/**
* Constructor.
*
* @param str a string representation of the parameter value.
*/
public ADLPostOpParam(final String str) {
super(DOMAIN, DOMAIN.parse(str));
}
@Override
public final String getName() {
return NAME;
}
/**
* Post operations.
*/
public static enum Op implements HttpOpParam.Op {
APPEND(true, false, HttpURLConnection.HTTP_OK);
private final boolean redirect;
private final boolean doOutput;
private final int expectedHttpResponseCode;
Op(final boolean doOut, final boolean doRedirect,
final int expectHttpResponseCode) {
this.doOutput = doOut;
this.redirect = doRedirect;
this.expectedHttpResponseCode = expectHttpResponseCode;
}
@Override
public Type getType() {
return Type.POST;
}
@Override
public boolean getRequireAuth() {
return false;
}
@Override
public boolean getDoOutput() {
return doOutput;
}
@Override
public boolean getRedirect() {
return redirect;
}
@Override
public int getExpectedHttpResponseCode() {
return expectedHttpResponseCode;
}
/**
* @return a URI query string.
*/
@Override
public String toQueryString() {
return NAME + "=" + this;
}
}
}

View File

@ -1,94 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.hdfs.web.resources;
import java.net.HttpURLConnection;
/**
* Extended Webhdfs PutOpParam to avoid redirect during Create operation for
* azure data lake storage.
*/
public class ADLPutOpParam extends HttpOpParam<ADLPutOpParam.Op> {
private static final Domain<Op> DOMAIN = new Domain<Op>(NAME, Op.class);
/**
* Constructor.
*
* @param str a string representation of the parameter value.
*/
public ADLPutOpParam(final String str) {
super(DOMAIN, DOMAIN.parse(str));
}
@Override
public final String getName() {
return NAME;
}
/**
* Put operations.
*/
public static enum Op implements HttpOpParam.Op {
CREATE(true, false, HttpURLConnection.HTTP_CREATED);
private final boolean redirect;
private final boolean doOutput;
private final int expectedHttpResponseCode;
private final boolean requireAuth;
Op(final boolean doOut, final boolean doRedirect,
final int expectHttpResponseCode) {
this.doOutput = doOut;
this.redirect = doRedirect;
this.expectedHttpResponseCode = expectHttpResponseCode;
this.requireAuth = false;
}
@Override
public HttpOpParam.Type getType() {
return HttpOpParam.Type.PUT;
}
@Override
public boolean getRequireAuth() {
return requireAuth;
}
@Override
public boolean getDoOutput() {
return doOutput;
}
@Override
public boolean getRedirect() {
return redirect;
}
@Override
public int getExpectedHttpResponseCode() {
return expectedHttpResponseCode;
}
@Override
public String toQueryString() {
return NAME + "=" + this;
}
}
}

View File

@ -1,51 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.hdfs.web.resources;
import org.apache.hadoop.hdfs.web.ADLConfKeys;
import java.util.regex.Pattern;
/**
* Capture ADL Jar version information. Require for debugging and analysis
* purpose in the backend.
*/
public class ADLVersionInfo extends StringParam {
/**
* Parameter name.
*/
public static final String NAME = ADLConfKeys.ADL_WEBSDK_VERSION_KEY;
private static final StringParam.Domain DOMAIN = new StringParam.Domain(NAME,
Pattern.compile(".+"));
/**
* Constructor.
* @param featureSetVersion Enabled featured information
*/
public ADLVersionInfo(String featureSetVersion) {
super(DOMAIN, featureSetVersion);
}
@Override
public final String getName() {
return NAME;
}
}

View File

@ -1,45 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.hdfs.web.resources;
/**
* Overwrite parameter.
*/
public class AppendADLNoRedirectParam extends BooleanParam {
/**
* Parameter name.
*/
public static final String NAME = "append";
private static final Domain DOMAIN = new Domain(NAME);
/**
* Constructor.
*
* @param value the parameter value.
*/
public AppendADLNoRedirectParam(final Boolean value) {
super(DOMAIN, value);
}
@Override
public final String getName() {
return NAME;
}
}

View File

@ -1,44 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.web.resources;
/**
* Overwrite parameter.
*/
public class CreateADLNoRedirectParam extends BooleanParam {
/**
* Parameter name.
*/
public static final String NAME = "write";
private static final Domain DOMAIN = new Domain(NAME);
/**
* Constructor.
*
* @param value the parameter value.
*/
public CreateADLNoRedirectParam(final Boolean value) {
super(DOMAIN, value);
}
@Override
public final String getName() {
return NAME;
}
}

View File

@ -1,53 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.hdfs.web.resources;
/**
* To support single writer semantics. Notify to ADL backend if the stream
* needs to locked in order to protect
* concurrent write operation on the same stream.
*
* Used in append operation.
*/
public class LeaseParam extends StringParam {
public static final String NAME = "leaseId";
/**
* Default parameter value.
*/
public static final String DEFAULT = NULL;
private static final StringParam.Domain DOMAIN = new StringParam.Domain(NAME,
null);
/**
* Constructor.
*
* @param str a string representation of the parameter value.
*/
public LeaseParam(final String str) {
super(DOMAIN, str == null || str.equals(DEFAULT) ? null : str);
}
@Override
public final String getName() {
return NAME;
}
}

View File

@ -1,44 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hdfs.web.resources;
/**
* Overwrite parameter.
*/
public class ReadADLNoRedirectParam extends BooleanParam {
/**
* Parameter name.
*/
public static final String NAME = "read";
private static final Domain DOMAIN = new Domain(NAME);
/**
* Constructor.
*
* @param value the parameter value.
*/
public ReadADLNoRedirectParam(final Boolean value) {
super(DOMAIN, value);
}
@Override
public final String getName() {
return NAME;
}
}

View File

@ -1,27 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
/**
* A distributed implementation of {@link
* org.apache.hadoop.hdfs.web.resources} for reading or extending query
* parameter for webhdfs specification. ADL
* specific
* query parameter also goes in the same package.
*/
package org.apache.hadoop.hdfs.web.resources;

View File

@ -0,0 +1,16 @@
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
org.apache.hadoop.fs.adl.AdlFileSystem

View File

@ -19,9 +19,7 @@
* [Limitations](#Limitations)
* [Usage](#Usage)
* [Concepts](#Concepts)
* [Webhdfs Compliance](#Webhdfs_Specification_Compliance)
* [OAuth2 Support](#OAuth2_Support)
* [Read Ahead Buffer Management](Read_Ahead_Buffer_Management)
* [Configuring Credentials & FileSystem](#Configuring_Credentials)
* [Using Refresh Token](#Refresh_Token)
* [Using Client Keys](#Client_Credential_Token)
@ -38,7 +36,6 @@ The jar file is named azure-datalake-store.jar.
## <a name="Features" />Features
* Read and write data stored in an Azure Data Lake Storage account.
* Partial support for [Webhdfs Specification 2.7.0](https://hadoop.apache.org/docs/r2.7.0/hadoop-project-dist/hadoop-hdfs/WebHDFS.html)
* Reference file system paths using URLs using the `adl` scheme for Secure Webhdfs i.e. SSL
encrypted access.
* Can act as a source of data in a MapReduce job, or a sink.
@ -46,14 +43,14 @@ The jar file is named azure-datalake-store.jar.
* Tested for scale.
## <a name="Limitations" />Limitations
Partial or no support for the following operations in [Webhdfs Specification 2.7.0](https://hadoop.apache.org/docs/r2.7.0/hadoop-project-dist/hadoop-hdfs/WebHDFS.html):
Partial or no support for the following operations :
* Operation on Symbolic Link
* Proxy Users
* File Truncate
* File Checksum
* File replication factor
* Home Directory Partial supported based on OAuth2 token information and not the active user on Hadoop cluster.
* Home directory the active user on Hadoop cluster.
* Extended Attributes(XAttrs) Operations
* Snapshot Operations
* Delegation Token Operations
@ -68,101 +65,23 @@ Azure Data Lake Storage access path syntax is
Get started with azure data lake account with [https://azure.microsoft.com/en-in/documentation/articles/data-lake-store-get-started-portal/](https://azure.microsoft.com/en-in/documentation/articles/data-lake-store-get-started-portal/)
#### <a name="Webhdfs_Specification_Compliance" />Webhdfs Compliance
Azure Data Lake Storage exposes a public REST endpoint as per [Webhdfs Specification 2.7.0](https://hadoop.apache.org/docs/r2.7.0/hadoop-project-dist/hadoop-hdfs/WebHDFS.html) to access storage file system.
Syntax to access Azure data lake storage account over [Webhdfs Specification 2.7.0](https://hadoop.apache.org/docs/r2.7.0/hadoop-project-dist/hadoop-hdfs/WebHDFS.html) is
https://<Account Name>.azuredatalakestore.net/webhdfs/v1/<File System Path>?<Query paramaters>
#### <a name="#OAuth2_Support" />OAuth2 Support
Usage of Azure Data Lake Storage requires OAuth2 bearer token to be present as part of the HTTPS header as per OAuth2 specification. Valid OAuth2 bearer token should be obtained from Azure Active Directory for valid users who have access to Azure Data Lake Storage Account.
Azure Active Directory (Azure AD) is Microsofts multi-tenant cloud based directory and identity management service. See [https://azure.microsoft.com/en-in/documentation/articles/active-directory-whatis/](https://azure.microsoft.com/en-in/documentation/articles/active-directory-whatis/)
Azure Active Directory (Azure AD) is Microsoft's multi-tenant cloud based directory and identity management service. See [https://azure.microsoft.com/en-in/documentation/articles/active-directory-whatis/](https://azure.microsoft.com/en-in/documentation/articles/active-directory-whatis/)
Following sections describes on OAuth2 configuration in core-site.xml.
#### <a name="#Read_Ahead_Buffer_Management" />Read Ahead Buffer Management
Azure Data Lake Storage offers high throughput. To maximize throughput, applications can use this feature to buffer data concurrently, in memory during read operation. This data is cached in memory per process per stream.
To Enable/Disable read ahead feature.
<property>
<name>adl.feature.override.readahead</name>
<value>true</value>
<description>
Enables read aheads in the ADL client, the feature is used to improve read throughput.
This works in conjunction with the value set in adl.feature.override.readahead.max.buffersize.
When set to false the read ahead feature is turned off.
Default : True if not configured.
</description>
</property>
To configure read ahead buffer size.
<property>
<name>adl.feature.override.readahead.max.buffersize</name>
<value>8388608</value>
<description>
Define maximum buffer size to cache read ahead data, this is allocated per process to
cache read ahead data. Applicable only when adl.feature.override.readahead is set to true.
Default : 8388608 Byte i.e. 8MB if not configured.
</description>
</property>
To configure number of concurrent connection to Azure Data Lake Storage Account.
<property>
<name>adl.feature.override.readahead.max.concurrent.connection</name>
<value>2</value>
<description>
Define maximum concurrent connection can be established to
read ahead. If the data size is<4MB then only 1 read n/w connection
is set. If the data size is >4MB but<8MB then 2 read n/w
connection
is set. Data >8MB then value set under the property would
take
effect. Applicable only when adl.feature.override.readahead is set
to true and buffer size is >8MB.
It is recommended to reset this property if the adl.feature.override.readahead.max.buffersize
is < 8MB to gain performance. Application has to consider
throttling
limit for the account as well before configuring large buffer size.
</description>
</property>
## <a name="Configuring_Credentials" />Configuring Credentials & FileSystem
Credentials can be configured using either a refresh token (associated with a user) or a client credential (analogous to a service principal).
### <a name="Refresh_Token" />Using Refresh Token
Update core-site.xml for OAuth2 configuration
<property>
<name>dfs.webhdfs.oauth2.refresh.token.expires.ms.since.epoch</name>
<value>0</value>
</property>
<property>
<name>dfs.webhdfs.oauth2.credential</name>
<value>bearer.and.refresh.token</value>
</property>
Add the following properties to your core-site.xml
<property>
<name>dfs.webhdfs.oauth2.access.token</name>
<value>NOT_SET</value>
</property>
<property>
<name>dfs.webhdfs.oauth2.refresh.url</name>
<value>https://login.windows.net/common/oauth2/token/</value>
</property>
<property>
<name>dfs.webhdfs.oauth2.access.token.provider</name>
<value>org.apache.hadoop.fs.adl.oauth2.CachedRefreshTokenBasedAccessTokenProvider</value>
<name>dfs.adls.oauth2.access.token.provider.type</name>
<value>RefreshToken</value>
</property>
Application require to set Client id and OAuth2 refresh token from Azure Active Directory associated with client id. See [https://github.com/AzureAD/azure-activedirectory-library-for-java](https://github.com/AzureAD/azure-activedirectory-library-for-java).
@ -170,12 +89,12 @@ Application require to set Client id and OAuth2 refresh token from Azure Active
**Do not share client id and refresh token, it must be kept secret.**
<property>
<name>dfs.webhdfs.oauth2.client.id</name>
<name>dfs.adls.oauth2.client.id</name>
<value></value>
</property>
<property>
<name>dfs.webhdfs.oauth2.refresh.token</name>
<name>dfs.adls.oauth2.refresh.token</name>
<value></value>
</property>
@ -205,30 +124,20 @@ Application require to set Client id and OAuth2 refresh token from Azure Active
Add the following properties to your core-site.xml
<property>
<name>dfs.webhdfs.oauth2.access.token.provider</name>
<value>org.apache.hadoop.hdfs.web.oauth2.AzureADClientCredentialBasedAccesTokenProvider</value>
</property>
<property>
<name>dfs.webhdfs.oauth2.refresh.url</name>
<name>dfs.adls.oauth2.refresh.url</name>
<value>TOKEN ENDPOINT FROM STEP 7 ABOVE</value>
</property>
<property>
<name>dfs.webhdfs.oauth2.client.id</name>
<name>dfs.adls.oauth2.client.id</name>
<value>CLIENT ID FROM STEP 7 ABOVE</value>
</property>
<property>
<name>dfs.webhdfs.oauth2.credential</name>
<name>dfs.adls.oauth2.credential</name>
<value>PASSWORD FROM STEP 7 ABOVE</value>
</property>
<property>
<name>fs.adls.oauth2.resource</name>
<value>https://management.core.windows.net/</value>
</property>
## <a name="Enabling_ADL" />Enabling ADL Filesystem
@ -273,7 +182,12 @@ The hadoop-azure module includes a full suite of unit tests. Most of the tests w
A selection of tests can run against the Azure Data Lake Storage. To run tests against Adl storage. Please configure contract-test-options.xml with Adl account information mentioned in the above sections. Also turn on contract test execution flag to trigger tests against Azure Data Lake Storage.
<property>
<name>dfs.adl.test.contract.enable</name>
<value>true</value>
</property>
<property>
<name>dfs.adl.test.contract.enable</name>
<value>true</value>
</property>
<property>
<name>test.fs.adl.name</name>
<value>adl://yourcontainer.azuredatalakestore.net</value>
</property>

View File

@ -7,43 +7,41 @@
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.fs.common;
import com.eclipsesource.json.JsonObject;
import com.squareup.okhttp.mockwebserver.MockResponse;
import com.squareup.okhttp.mockwebserver.MockWebServer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.adl.TestableAdlFileSystem;
import org.apache.hadoop.hdfs.client.HdfsClientConfigKeys;
import org.apache.hadoop.hdfs.web.oauth2.ConfCredentialBasedAccessTokenProvider;
import org.apache.hadoop.hdfs.web.oauth2.CredentialBasedAccessTokenProvider;
import org.apache.hadoop.hdfs.web.oauth2.OAuth2Constants;
import org.junit.After;
import org.junit.Before;
package org.apache.hadoop.fs.adl;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.adl.common.CustomMockTokenProvider;
import org.apache.hadoop.fs.adl.oauth2.AzureADTokenProvider;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.AZURE_AD_TOKEN_PROVIDER_CLASS_KEY;
import com.squareup.okhttp.mockwebserver.MockWebServer;
import org.junit.After;
import org.junit.Before;
/**
* Mock server to simulate Adls backend calls. This infrastructure is expandable
* to override expected server response based on the derived test functionality.
* Common functionality to generate token information before request is send to
* adls backend is also managed within AdlMockWebServer implementation.
* adls backend is also managed within AdlMockWebServer implementation using
* {@link org.apache.hadoop.fs.adl.common.CustomMockTokenProvider}.
*/
public class AdlMockWebServer {
// Create a MockWebServer. These are lean enough that you can create a new
// instance for every unit test.
private MockWebServer server = null;
@ -67,22 +65,13 @@ public class AdlMockWebServer {
return conf;
}
public static MockResponse getTokenResponse() {
JsonObject jsonObject = new JsonObject()
.set(OAuth2Constants.EXPIRES_IN, "0987654321")
.set("token_type", "bearer").set(OAuth2Constants.ACCESS_TOKEN, "123");
MockResponse oauth2Response = new MockResponse();
oauth2Response.addHeader("Content-Type", "application/json");
oauth2Response.setResponseCode(200);
oauth2Response.setBody(jsonObject.toString());
return oauth2Response;
public void setConf(Configuration conf) {
this.conf = conf;
}
@Before
public void preTestSetup() throws IOException, URISyntaxException {
server = new MockWebServer();
server.enqueue(getTokenResponse());
// Start the server.
server.start();
@ -95,14 +84,8 @@ public class AdlMockWebServer {
// Responses are returned in the same order that they are enqueued.
fs = new TestableAdlFileSystem();
conf.set(HdfsClientConfigKeys.OAUTH_CLIENT_ID_KEY, "MY_CLIENTID");
conf.set(HdfsClientConfigKeys.ACCESS_TOKEN_PROVIDER_KEY,
ConfCredentialBasedAccessTokenProvider.class.getName());
conf.set(HdfsClientConfigKeys.DFS_WEBHDFS_OAUTH_ENABLED_KEY, "true");
conf.set(HdfsClientConfigKeys.OAUTH_REFRESH_URL_KEY, "http://localhost:" +
port + "/refresh");
conf.set(CredentialBasedAccessTokenProvider.OAUTH_CREDENTIAL_KEY,
"credential");
conf.setClass(AZURE_AD_TOKEN_PROVIDER_CLASS_KEY,
CustomMockTokenProvider.class, AzureADTokenProvider.class);
URI uri = new URI("adl://localhost:" + port);
fs.initialize(uri, conf);

View File

@ -0,0 +1,262 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.fs.adl;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.AclEntry;
import org.apache.hadoop.fs.permission.AclEntryScope;
import org.apache.hadoop.fs.permission.AclEntryType;
import org.apache.hadoop.fs.permission.AclStatus;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.security.AccessControlException;
import com.squareup.okhttp.mockwebserver.MockResponse;
import org.junit.Assert;
import org.junit.Test;
/**
* Stub adl server and test acl data conversion within SDK and Hadoop adl
* client.
*/
public class TestACLFeatures extends AdlMockWebServer {
@Test(expected=AccessControlException.class)
public void testModifyAclEntries() throws URISyntaxException, IOException {
getMockServer().enqueue(new MockResponse().setResponseCode(200));
List<AclEntry> entries = new ArrayList<AclEntry>();
AclEntry.Builder aclEntryBuilder = new AclEntry.Builder();
aclEntryBuilder.setName("hadoop");
aclEntryBuilder.setType(AclEntryType.USER);
aclEntryBuilder.setPermission(FsAction.ALL);
aclEntryBuilder.setScope(AclEntryScope.ACCESS);
entries.add(aclEntryBuilder.build());
aclEntryBuilder.setName("hdfs");
aclEntryBuilder.setType(AclEntryType.GROUP);
aclEntryBuilder.setPermission(FsAction.READ_WRITE);
aclEntryBuilder.setScope(AclEntryScope.DEFAULT);
entries.add(aclEntryBuilder.build());
getMockAdlFileSystem().modifyAclEntries(new Path("/test1/test2"), entries);
getMockServer().enqueue(new MockResponse().setResponseCode(403)
.setBody(TestADLResponseData.getAccessControlException()));
getMockAdlFileSystem()
.modifyAclEntries(new Path("/test1/test2"), entries);
}
@Test(expected=AccessControlException.class)
public void testRemoveAclEntriesWithOnlyUsers()
throws URISyntaxException, IOException {
getMockServer().enqueue(new MockResponse().setResponseCode(200));
List<AclEntry> entries = new ArrayList<AclEntry>();
AclEntry.Builder aclEntryBuilder = new AclEntry.Builder();
aclEntryBuilder.setName("hadoop");
aclEntryBuilder.setType(AclEntryType.USER);
entries.add(aclEntryBuilder.build());
getMockAdlFileSystem().removeAclEntries(new Path("/test1/test2"), entries);
getMockServer().enqueue(new MockResponse().setResponseCode(403)
.setBody(TestADLResponseData.getAccessControlException()));
getMockAdlFileSystem()
.removeAclEntries(new Path("/test1/test2"), entries);
}
@Test(expected=AccessControlException.class)
public void testRemoveAclEntries() throws URISyntaxException, IOException {
getMockServer().enqueue(new MockResponse().setResponseCode(200));
List<AclEntry> entries = new ArrayList<AclEntry>();
AclEntry.Builder aclEntryBuilder = new AclEntry.Builder();
aclEntryBuilder.setName("hadoop");
aclEntryBuilder.setType(AclEntryType.USER);
aclEntryBuilder.setPermission(FsAction.ALL);
aclEntryBuilder.setScope(AclEntryScope.ACCESS);
entries.add(aclEntryBuilder.build());
aclEntryBuilder.setName("hdfs");
aclEntryBuilder.setType(AclEntryType.GROUP);
aclEntryBuilder.setPermission(FsAction.READ_WRITE);
aclEntryBuilder.setScope(AclEntryScope.DEFAULT);
entries.add(aclEntryBuilder.build());
getMockAdlFileSystem().removeAclEntries(new Path("/test1/test2"), entries);
getMockServer().enqueue(new MockResponse().setResponseCode(403)
.setBody(TestADLResponseData.getAccessControlException()));
getMockAdlFileSystem()
.removeAclEntries(new Path("/test1/test2"), entries);
}
@Test(expected=AccessControlException.class)
public void testRemoveDefaultAclEntries()
throws URISyntaxException, IOException {
getMockServer().enqueue(new MockResponse().setResponseCode(200));
getMockAdlFileSystem().removeDefaultAcl(new Path("/test1/test2"));
getMockServer().enqueue(new MockResponse().setResponseCode(403)
.setBody(TestADLResponseData.getAccessControlException()));
getMockAdlFileSystem().removeDefaultAcl(new Path("/test1/test2"));
}
@Test(expected=AccessControlException.class)
public void testRemoveAcl() throws URISyntaxException, IOException {
getMockServer().enqueue(new MockResponse().setResponseCode(200));
getMockAdlFileSystem().removeAcl(new Path("/test1/test2"));
getMockServer().enqueue(new MockResponse().setResponseCode(403)
.setBody(TestADLResponseData.getAccessControlException()));
getMockAdlFileSystem().removeAcl(new Path("/test1/test2"));
}
@Test(expected=AccessControlException.class)
public void testSetAcl() throws URISyntaxException, IOException {
getMockServer().enqueue(new MockResponse().setResponseCode(200));
List<AclEntry> entries = new ArrayList<AclEntry>();
AclEntry.Builder aclEntryBuilder = new AclEntry.Builder();
aclEntryBuilder.setName("hadoop");
aclEntryBuilder.setType(AclEntryType.USER);
aclEntryBuilder.setPermission(FsAction.ALL);
aclEntryBuilder.setScope(AclEntryScope.ACCESS);
entries.add(aclEntryBuilder.build());
aclEntryBuilder.setName("hdfs");
aclEntryBuilder.setType(AclEntryType.GROUP);
aclEntryBuilder.setPermission(FsAction.READ_WRITE);
aclEntryBuilder.setScope(AclEntryScope.DEFAULT);
entries.add(aclEntryBuilder.build());
getMockAdlFileSystem().setAcl(new Path("/test1/test2"), entries);
getMockServer().enqueue(new MockResponse().setResponseCode(403)
.setBody(TestADLResponseData.getAccessControlException()));
getMockAdlFileSystem().setAcl(new Path("/test1/test2"), entries);
}
@Test(expected=AccessControlException.class)
public void testCheckAccess() throws URISyntaxException, IOException {
getMockServer().enqueue(new MockResponse().setResponseCode(200));
getMockAdlFileSystem().access(new Path("/test1/test2"), FsAction.ALL);
getMockServer().enqueue(new MockResponse().setResponseCode(200));
getMockAdlFileSystem().access(new Path("/test1/test2"), FsAction.EXECUTE);
getMockServer().enqueue(new MockResponse().setResponseCode(200));
getMockAdlFileSystem().access(new Path("/test1/test2"), FsAction.READ);
getMockServer().enqueue(new MockResponse().setResponseCode(200));
getMockAdlFileSystem()
.access(new Path("/test1/test2"), FsAction.READ_EXECUTE);
getMockServer().enqueue(new MockResponse().setResponseCode(200));
getMockAdlFileSystem()
.access(new Path("/test1/test2"), FsAction.READ_WRITE);
getMockServer().enqueue(new MockResponse().setResponseCode(200));
getMockAdlFileSystem().access(new Path("/test1/test2"), FsAction.NONE);
getMockServer().enqueue(new MockResponse().setResponseCode(200));
getMockAdlFileSystem().access(new Path("/test1/test2"), FsAction.WRITE);
getMockServer().enqueue(new MockResponse().setResponseCode(200));
getMockAdlFileSystem()
.access(new Path("/test1/test2"), FsAction.WRITE_EXECUTE);
getMockServer().enqueue(new MockResponse().setResponseCode(403)
.setBody(TestADLResponseData.getAccessControlException()));
getMockAdlFileSystem()
.access(new Path("/test1/test2"), FsAction.WRITE_EXECUTE);
}
@Test(expected=AccessControlException.class)
public void testSetPermission() throws URISyntaxException, IOException {
getMockServer().enqueue(new MockResponse().setResponseCode(200));
getMockAdlFileSystem()
.setPermission(new Path("/test1/test2"), FsPermission.getDefault());
getMockServer().enqueue(new MockResponse().setResponseCode(403)
.setBody(TestADLResponseData.getAccessControlException()));
getMockAdlFileSystem()
.setPermission(new Path("/test1/test2"), FsPermission.getDefault());
}
@Test(expected=AccessControlException.class)
public void testSetOwner() throws URISyntaxException, IOException {
getMockServer().enqueue(new MockResponse().setResponseCode(200));
getMockAdlFileSystem().setOwner(new Path("/test1/test2"), "hadoop", "hdfs");
getMockServer().enqueue(new MockResponse().setResponseCode(403)
.setBody(TestADLResponseData.getAccessControlException()));
getMockAdlFileSystem()
.setOwner(new Path("/test1/test2"), "hadoop", "hdfs");
}
@Test
public void getAclStatusAsExpected() throws URISyntaxException, IOException {
getMockServer().enqueue(new MockResponse().setResponseCode(200)
.setBody(TestADLResponseData.getGetAclStatusJSONResponse()));
AclStatus aclStatus = getMockAdlFileSystem()
.getAclStatus(new Path("/test1/test2"));
Assert.assertEquals(aclStatus.getGroup(), "supergroup");
Assert.assertEquals(aclStatus.getOwner(), "hadoop");
Assert.assertEquals((Short) aclStatus.getPermission().toShort(),
Short.valueOf("775", 8));
for (AclEntry entry : aclStatus.getEntries()) {
if (!(entry.toString().equalsIgnoreCase("user:carla:rw-") || entry
.toString().equalsIgnoreCase("group::r-x"))) {
Assert.fail("Unexpected entry : " + entry.toString());
}
}
}
@Test(expected=FileNotFoundException.class)
public void getAclStatusNotExists() throws URISyntaxException, IOException {
getMockServer().enqueue(new MockResponse().setResponseCode(404)
.setBody(TestADLResponseData.getFileNotFoundException()));
getMockAdlFileSystem().getAclStatus(new Path("/test1/test2"));
}
@Test(expected=AccessControlException.class)
public void testAclStatusDenied() throws URISyntaxException, IOException {
getMockServer().enqueue(new MockResponse().setResponseCode(403)
.setBody(TestADLResponseData.getAccessControlException()));
getMockAdlFileSystem().getAclStatus(new Path("/test1/test2"));
}
}

View File

@ -24,15 +24,16 @@ import org.apache.hadoop.fs.FileStatus;
import java.util.Random;
/**
* This class is responsible to provide generic test methods for mock up test
* to generate stub response for a network request.
* Mock up response data returned from Adl storage account.
*/
public final class TestADLResponseData {
private TestADLResponseData() {}
private TestADLResponseData() {
}
public static String getGetFileStatusJSONResponse(FileStatus status) {
String str = "{\"FileStatus\":{\"length\":" + status.getLen() + "," +
return "{\"FileStatus\":{\"length\":" + status.getLen() + "," +
"\"pathSuffix\":\"\",\"type\":\"" + (status.isDirectory() ?
"DIRECTORY" :
"FILE") + "\"" +
@ -42,21 +43,27 @@ public final class TestADLResponseData {
",\"replication\":" + status.getReplication() + ",\"permission\":\""
+ status.getPermission() + "\",\"owner\":\"" + status.getOwner()
+ "\",\"group\":\"" + status.getGroup() + "\"}}";
return str;
}
public static String getGetFileStatusJSONResponse() {
return getGetFileStatusJSONResponse(4194304);
}
public static String getGetAclStatusJSONResponse() {
return "{\n" + " \"AclStatus\": {\n" + " \"entries\": [\n"
+ " \"user:carla:rw-\", \n" + " \"group::r-x\"\n"
+ " ], \n" + " \"group\": \"supergroup\", \n"
+ " \"owner\": \"hadoop\", \n"
+ " \"permission\":\"775\",\n" + " \"stickyBit\": false\n"
+ " }\n" + "}";
}
public static String getGetFileStatusJSONResponse(long length) {
String str = "{\"FileStatus\":{\"length\":" + length + "," +
return "{\"FileStatus\":{\"length\":" + length + "," +
"\"pathSuffix\":\"\",\"type\":\"FILE\",\"blockSize\":268435456," +
"\"accessTime\":1452103827023,\"modificationTime\":1452103827023," +
"\"replication\":0,\"permission\":\"777\"," +
"\"owner\":\"NotSupportYet\",\"group\":\"NotSupportYet\"}}";
return str;
}
public static String getListFileStatusJSONResponse(int dirSize) {
@ -71,40 +78,60 @@ public final class TestADLResponseData {
}
list = list.substring(0, list.length() - 1);
String str = "{\"FileStatuses\":{\"FileStatus\":[" + list + "]}}";
return str;
return "{\"FileStatuses\":{\"FileStatus\":[" + list + "]}}";
}
public static String getJSONResponse(boolean status) {
String str = "{\"boolean\":" + status + "}";
return str;
return "{\"boolean\":" + status + "}";
}
public static String getErrorIllegalArgumentExceptionJSONResponse() {
String str = "{\n" +
return "{\n" +
" \"RemoteException\":\n" +
" {\n" +
" \"exception\" : \"IllegalArgumentException\",\n" +
" \"javaClassName\": \"java.lang.IllegalArgumentException\",\n" +
" \"message\" : \"Bad Offset 0x83090015\"" +
" \"message\" : \"Invalid\"" +
" }\n" +
"}";
}
return str;
public static String getErrorBadOffsetExceptionJSONResponse() {
return "{\n" +
" \"RemoteException\":\n" +
" {\n" +
" \"exception\" : \"BadOffsetException\",\n" +
" \"javaClassName\": \"org.apache.hadoop.fs.adl"
+ ".BadOffsetException\",\n" +
" \"message\" : \"Invalid\"" +
" }\n" +
"}";
}
public static String getErrorInternalServerExceptionJSONResponse() {
String str = "{\n" +
return "{\n" +
" \"RemoteException\":\n" +
" {\n" +
" \"exception\" : \"RumtimeException\",\n" +
" \"javaClassName\": \"java.lang.RumtimeException\",\n" +
" \"exception\" : \"RuntimeException\",\n" +
" \"javaClassName\": \"java.lang.RuntimeException\",\n" +
" \"message\" : \"Internal Server Error\"" +
" }\n" +
"}";
}
return str;
public static String getAccessControlException() {
return "{\n" + " \"RemoteException\":\n" + " {\n"
+ " \"exception\" : \"AccessControlException\",\n"
+ " \"javaClassName\": \"org.apache.hadoop.security"
+ ".AccessControlException\",\n"
+ " \"message\" : \"Permission denied: ...\"\n" + " }\n" + "}";
}
public static String getFileNotFoundException() {
return "{\n" + " \"RemoteException\":\n" + " {\n"
+ " \"exception\" : \"FileNotFoundException\",\n"
+ " \"javaClassName\": \"java.io.FileNotFoundException\",\n"
+ " \"message\" : \"File does not exist\"\n" + " }\n" + "}";
}
public static byte[] getRandomByteArrayData() {

View File

@ -7,58 +7,51 @@
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.hdfs.web;
package org.apache.hadoop.fs.adl;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.adl.TestADLResponseData;
import org.apache.hadoop.fs.common.AdlMockWebServer;
import org.apache.hadoop.fs.common.TestDataForRead;
import org.junit.After;
import org.apache.hadoop.fs.adl.common.Parallelized;
import org.apache.hadoop.fs.adl.common.TestDataForRead;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.Timeout;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import java.io.ByteArrayInputStream;
import java.io.EOFException;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Random;
import static org.apache.hadoop.fs.adl.AdlConfKeys.READ_AHEAD_BUFFER_SIZE_KEY;
/**
* This class is responsible for stress positional reads vs number of network
* calls required by to fetch the amount of data. Test does ensure the data
* integrity and order of the data is maintained. This tests are meant to test
* BufferManager.java and BatchByteArrayInputStream implementation.
* integrity and order of the data is maintained.
*/
@RunWith(Parameterized.class)
@RunWith(Parallelized.class)
public class TestAdlRead extends AdlMockWebServer {
// Keeping timeout of 1 hour to ensure the test does complete and should
// not terminate due to high backend latency.
@Rule
public Timeout globalTimeout = new Timeout(60 * 60000);
private TestDataForRead testData;
public TestAdlRead(TestDataForRead testData) {
Configuration configuration = new Configuration();
configuration.setInt(READ_AHEAD_BUFFER_SIZE_KEY, 4 * 1024);
setConf(configuration);
this.testData = testData;
getConf().set("adl.feature.override.readahead.max.buffersize", "8192");
getConf().set("adl.feature.override.readahead.max.concurrent.connection",
"1");
}
@Parameterized.Parameters(name = "{index}")
@ -68,32 +61,29 @@ public class TestAdlRead extends AdlMockWebServer {
//--------------------------
// Test Data
//--------------------------
{new TestDataForRead("Hello World".getBytes(), 3, 1000, true)},
{new TestDataForRead("Hello World".getBytes(), 2, 1000, true)},
{new TestDataForRead(
("the problem you appear to be wrestling with is that this doesn't "
+ "display very well. ").getBytes(), 3, 1000, true)},
+ "display very well. ").getBytes(), 2, 1000, true)},
{new TestDataForRead(("您的數據是寶貴的資產,以您的組織,並有當前和未來價值。由於這個原因,"
+ "所有的數據應存儲以供將來分析。今天,這往往是不這樣做," + "因為傳統的分析基礎架構的限制,"
+ "像模式的預定義,存儲大數據集和不同的數據筒倉的傳播的成本。"
+ "為了應對這一挑戰,數據湖面概念被引入作為一個企業級存儲庫來存儲所有"
+ "類型的在一個地方收集到的數據。對於運作和探索性分析的目的,所有類型的" + "數據可以定義需求或模式之前被存儲在數據湖。")
.getBytes(), 2, 1000, true)}, {new TestDataForRead(
TestADLResponseData.getRandomByteArrayData(4 * 1024), 2, 10, true)},
{new TestDataForRead(TestADLResponseData.getRandomByteArrayData(100), 2,
1000, true)}, {new TestDataForRead(
TestADLResponseData.getRandomByteArrayData(1 * 1024), 2, 50, true)},
{new TestDataForRead(
("Chinese Indonesians (Indonesian: Orang Tionghoa-Indonesia; "
+ "Chinese: "
+ "trad ???????, simp ???????, pin Y<>nd<6E>n<EFBFBD>x?y<> Hu<48>r<EFBFBD>n), are "
+ "Indonesians descended from various Chinese ethnic groups, "
+ "particularly Han.").getBytes(), 3, 1000, true)},
{new TestDataForRead(
TestADLResponseData.getRandomByteArrayData(5 * 1024), 3, 1000,
true)}, {new TestDataForRead(
TestADLResponseData.getRandomByteArrayData(1 * 1024), 3, 50, true)},
{new TestDataForRead(
TestADLResponseData.getRandomByteArrayData(8 * 1024), 3, 10, true)},
{new TestDataForRead(
TestADLResponseData.getRandomByteArrayData(32 * 1024), 6, 10,
TestADLResponseData.getRandomByteArrayData(8 * 1024), 3, 10,
false)}, {new TestDataForRead(
TestADLResponseData.getRandomByteArrayData(48 * 1024), 8, 10, false)}});
}
@After
@Before
public void cleanReadBuffer() {
BufferManager.getInstance().clear();
TestADLResponseData.getRandomByteArrayData(16 * 1024), 5, 10, false)},
{new TestDataForRead(
TestADLResponseData.getRandomByteArrayData(32 * 1024), 9, 10,
false)}, {new TestDataForRead(
TestADLResponseData.getRandomByteArrayData(64 * 1024), 17, 10,
false)}});
}
@Test
@ -101,7 +91,18 @@ public class TestAdlRead extends AdlMockWebServer {
getMockServer().setDispatcher(testData.getDispatcher());
FSDataInputStream in = getMockAdlFileSystem().open(new Path("/test"));
byte[] expectedData = new byte[testData.getActualData().length];
Assert.assertEquals(in.read(expectedData), expectedData.length);
int n = 0;
int len = expectedData.length;
int off = 0;
while (n < len) {
int count = in.read(expectedData, off + n, len - n);
if (count < 0) {
throw new EOFException();
}
n += count;
}
Assert.assertEquals(testData.getActualData().length, expectedData.length);
Assert.assertArrayEquals(expectedData, testData.getActualData());
in.close();
if (testData.isCheckOfNoOfCalls()) {
@ -151,15 +152,9 @@ public class TestAdlRead extends AdlMockWebServer {
in.readFully(0, expectedData);
Assert.assertArrayEquals(expectedData, testData.getActualData());
in.seek(0);
in.readFully(expectedData, 0, expectedData.length);
in.readFully(0, expectedData, 0, expectedData.length);
Assert.assertArrayEquals(expectedData, testData.getActualData());
in.close();
if (testData.isCheckOfNoOfCalls()) {
Assert.assertEquals(testData.getExpectedNoNetworkCall(),
getMockServer().getRequestCount());
}
}
@Test
@ -197,9 +192,5 @@ public class TestAdlRead extends AdlMockWebServer {
}
in.close();
if (testData.isCheckOfNoOfCalls()) {
Assert.assertEquals(testData.getExpectedNoNetworkCall(),
getMockServer().getRequestCount());
}
}
}

View File

@ -0,0 +1,133 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.adl;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.adl.common.CustomMockTokenProvider;
import org.apache.hadoop.fs.adl.oauth2.AzureADTokenProvider;
import com.microsoft.azure.datalake.store.oauth2.AccessTokenProvider;
import com.microsoft.azure.datalake.store.oauth2.ClientCredsTokenProvider;
import com.microsoft.azure.datalake.store.oauth2.RefreshTokenBasedTokenProvider;
import static org.apache.hadoop.fs.adl.AdlConfKeys.AZURE_AD_CLIENT_ID_KEY;
import static org.apache.hadoop.fs.adl.AdlConfKeys.AZURE_AD_CLIENT_SECRET_KEY;
import static org.apache.hadoop.fs.adl.AdlConfKeys.AZURE_AD_REFRESH_TOKEN_KEY;
import static org.apache.hadoop.fs.adl.AdlConfKeys.AZURE_AD_REFRESH_URL_KEY;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.AZURE_AD_TOKEN_PROVIDER_CLASS_KEY;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.AZURE_AD_TOKEN_PROVIDER_TYPE_KEY;
import static org.apache.hadoop.fs.adl.TokenProviderType.*;
import org.junit.Assert;
import org.junit.Test;
/**
* Test appropriate token provider is loaded as per configuration.
*/
public class TestAzureADTokenProvider {
@Test
public void testRefreshTokenProvider()
throws URISyntaxException, IOException {
Configuration conf = new Configuration();
conf.set(AZURE_AD_CLIENT_ID_KEY, "MY_CLIENTID");
conf.set(AZURE_AD_REFRESH_TOKEN_KEY, "XYZ");
conf.setEnum(AZURE_AD_TOKEN_PROVIDER_TYPE_KEY, RefreshToken);
conf.set(AZURE_AD_REFRESH_URL_KEY, "http://localhost:8080/refresh");
URI uri = new URI("adl://localhost:8080");
AdlFileSystem fileSystem = new AdlFileSystem();
fileSystem.initialize(uri, conf);
AccessTokenProvider tokenProvider = fileSystem.getTokenProvider();
Assert.assertTrue(tokenProvider instanceof RefreshTokenBasedTokenProvider);
}
@Test
public void testClientCredTokenProvider()
throws IOException, URISyntaxException {
Configuration conf = new Configuration();
conf.set(AZURE_AD_CLIENT_ID_KEY, "MY_CLIENTID");
conf.set(AZURE_AD_CLIENT_SECRET_KEY, "XYZ");
conf.setEnum(AZURE_AD_TOKEN_PROVIDER_TYPE_KEY, ClientCredential);
conf.set(AZURE_AD_REFRESH_URL_KEY, "http://localhost:8080/refresh");
URI uri = new URI("adl://localhost:8080");
AdlFileSystem fileSystem = new AdlFileSystem();
fileSystem.initialize(uri, conf);
AccessTokenProvider tokenProvider = fileSystem.getTokenProvider();
Assert.assertTrue(tokenProvider instanceof ClientCredsTokenProvider);
}
@Test
public void testCustomCredTokenProvider()
throws URISyntaxException, IOException {
Configuration conf = new Configuration();
conf.setClass(AZURE_AD_TOKEN_PROVIDER_CLASS_KEY,
CustomMockTokenProvider.class, AzureADTokenProvider.class);
URI uri = new URI("adl://localhost:8080");
AdlFileSystem fileSystem = new AdlFileSystem();
fileSystem.initialize(uri, conf);
AccessTokenProvider tokenProvider = fileSystem.getTokenProvider();
Assert.assertTrue(tokenProvider instanceof SdkTokenProviderAdapter);
}
@Test
public void testInvalidProviderConfigurationForType()
throws URISyntaxException, IOException {
Configuration conf = new Configuration();
URI uri = new URI("adl://localhost:8080");
AdlFileSystem fileSystem = new AdlFileSystem();
try {
fileSystem.initialize(uri, conf);
Assert.fail("Initialization should have failed due no token provider "
+ "configuration");
} catch (IllegalArgumentException e) {
Assert.assertTrue(
e.getMessage().contains("dfs.adls.oauth2.access.token.provider"));
}
conf.setClass(AZURE_AD_TOKEN_PROVIDER_CLASS_KEY,
CustomMockTokenProvider.class, AzureADTokenProvider.class);
fileSystem.initialize(uri, conf);
}
@Test
public void testInvalidProviderConfigurationForClassPath()
throws URISyntaxException, IOException {
Configuration conf = new Configuration();
URI uri = new URI("adl://localhost:8080");
AdlFileSystem fileSystem = new AdlFileSystem();
conf.set(AZURE_AD_TOKEN_PROVIDER_CLASS_KEY,
"wrong.classpath.CustomMockTokenProvider");
try {
fileSystem.initialize(uri, conf);
Assert.fail("Initialization should have failed due invalid provider "
+ "configuration");
} catch (RuntimeException e) {
Assert.assertTrue(
e.getMessage().contains("wrong.classpath.CustomMockTokenProvider"));
}
}
}

View File

@ -7,17 +7,16 @@
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.hdfs.web;
package org.apache.hadoop.fs.adl;
import com.squareup.okhttp.mockwebserver.Dispatcher;
import com.squareup.okhttp.mockwebserver.MockResponse;
@ -25,13 +24,13 @@ import com.squareup.okhttp.mockwebserver.RecordedRequest;
import okio.Buffer;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.adl.TestADLResponseData;
import org.apache.hadoop.fs.common.AdlMockWebServer;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.ByteArrayInputStream;
import java.io.IOException;
@ -51,25 +50,22 @@ import java.util.regex.Pattern;
/**
* This class is responsible for testing multiple threads trying to access same
* or multiple files from the offset. This tests are meant to test
* BufferManager.java and BatchByteArrayInputStream implementation.
* or multiple files from the offset.
*/
@RunWith(Parameterized.class)
public class TestConcurrentDataReadOperations extends AdlMockWebServer {
private static final Logger LOG = LoggerFactory
.getLogger(TestConcurrentDataReadOperations.class);
private static final Object LOCK = new Object();
private static FSDataInputStream commonHandle = null;
private static Object lock = new Object();
private int concurrencyLevel;
public TestConcurrentDataReadOperations(int concurrencyLevel) {
this.concurrencyLevel = concurrencyLevel;
getConf().set("adl.feature.override.readahead.max.buffersize", "102400");
getConf().set("adl.feature.override.readahead.max.concurrent.connection",
"1");
}
@Parameterized.Parameters(name = "{index}")
public static Collection testDataNumberOfConcurrentRun() {
public static Collection<?> testDataNumberOfConcurrentRun() {
return Arrays.asList(new Object[][] {{1}, {2}, {3}, {4}, {5}});
}
@ -85,10 +81,6 @@ public class TestConcurrentDataReadOperations extends AdlMockWebServer {
@Override
public MockResponse dispatch(RecordedRequest recordedRequest)
throws InterruptedException {
if (recordedRequest.getPath().equals("/refresh")) {
return AdlMockWebServer.getTokenResponse();
}
CreateTestData currentRequest = null;
for (CreateTestData local : testData) {
if (recordedRequest.getPath().contains(local.path.toString())) {
@ -116,19 +108,20 @@ public class TestConcurrentDataReadOperations extends AdlMockWebServer {
Pattern pattern = Pattern.compile("offset=([0-9]+)");
Matcher matcher = pattern.matcher(request);
if (matcher.find()) {
System.out.println(matcher.group(1));
LOG.debug(matcher.group(1));
offset = Integer.parseInt(matcher.group(1));
}
pattern = Pattern.compile("length=([0-9]+)");
matcher = pattern.matcher(request);
if (matcher.find()) {
System.out.println(matcher.group(1));
LOG.debug(matcher.group(1));
byteCount = Integer.parseInt(matcher.group(1));
}
Buffer buf = new Buffer();
buf.write(currentRequest.data, offset, byteCount);
buf.write(currentRequest.data, offset,
Math.min(currentRequest.data.length - offset, byteCount));
return new MockResponse().setResponseCode(200)
.setChunkedBody(buf, 4 * 1024 * 1024);
}
@ -204,7 +197,7 @@ public class TestConcurrentDataReadOperations extends AdlMockWebServer {
for (int i = 0; i < concurrencyLevel * 5; i++) {
ReadTestData localReadData = new ReadTestData();
int offset = random.nextInt((1024 * 1024)-1);
int offset = random.nextInt((1024 * 1024) - 1);
int length = 1024 * 1024 - offset;
byte[] expectedData = new byte[length];
buffered.reset();
@ -279,7 +272,7 @@ public class TestConcurrentDataReadOperations extends AdlMockWebServer {
try {
FSDataInputStream in;
if (useSameStream) {
synchronized (lock) {
synchronized (LOCK) {
if (commonHandle == null) {
commonHandle = getMockAdlFileSystem().open(path);
}

View File

@ -0,0 +1,136 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.adl;
import com.squareup.okhttp.mockwebserver.MockResponse;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.adl.common.CustomMockTokenProvider;
import org.apache.hadoop.fs.permission.FsPermission;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Arrays;
import java.util.Collection;
import static org.apache.hadoop.fs.adl.AdlConfKeys.ADL_BLOCK_SIZE;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.AZURE_AD_TOKEN_PROVIDER_CLASS_KEY;
/**
* Test access token provider behaviour with custom token provider and for token
* provider cache is enabled.
*/
@RunWith(Parameterized.class)
public class TestCustomTokenProvider extends AdlMockWebServer {
private static final long TEN_MINUTES_IN_MILIS = 600000;
private int backendCallCount;
private int expectedCallbackToAccessToken;
private TestableAdlFileSystem[] fileSystems;
private Class typeOfTokenProviderClass;
private long expiryFromNow;
private int fsObjectCount;
public TestCustomTokenProvider(Class typeOfTokenProviderClass,
long expiryFromNow, int fsObjectCount, int backendCallCount,
int expectedCallbackToAccessToken)
throws IllegalAccessException, InstantiationException, URISyntaxException,
IOException {
this.typeOfTokenProviderClass = typeOfTokenProviderClass;
this.expiryFromNow = expiryFromNow;
this.fsObjectCount = fsObjectCount;
this.backendCallCount = backendCallCount;
this.expectedCallbackToAccessToken = expectedCallbackToAccessToken;
}
@Parameterized.Parameters(name = "{index}")
public static Collection testDataForTokenProvider() {
return Arrays.asList(new Object[][] {
// Data set in order
// INPUT - CustomTokenProvider class to load
// INPUT - expiry time in milis. Subtract from current time
// INPUT - No. of FileSystem object
// INPUT - No. of backend calls per FileSystem object
// EXPECTED - Number of callbacks to get token after test finished.
{CustomMockTokenProvider.class, 0, 1, 1, 1},
{CustomMockTokenProvider.class, TEN_MINUTES_IN_MILIS, 1, 1, 1},
{CustomMockTokenProvider.class, TEN_MINUTES_IN_MILIS, 2, 1, 2},
{CustomMockTokenProvider.class, TEN_MINUTES_IN_MILIS, 10, 10, 10}});
}
/**
* Explicitly invoked init so that base class mock server is setup before
* test data initialization is done.
*
* @throws IOException
* @throws URISyntaxException
*/
public void init() throws IOException, URISyntaxException {
Configuration configuration = new Configuration();
configuration.set(AZURE_AD_TOKEN_PROVIDER_CLASS_KEY,
typeOfTokenProviderClass.getName());
fileSystems = new TestableAdlFileSystem[fsObjectCount];
URI uri = new URI("adl://localhost:" + getPort());
for (int i = 0; i < fsObjectCount; ++i) {
fileSystems[i] = new TestableAdlFileSystem();
fileSystems[i].initialize(uri, configuration);
((CustomMockTokenProvider) fileSystems[i].getAzureTokenProvider())
.setExpiryTimeInMillisAfter(expiryFromNow);
}
}
@Test
public void testCustomTokenManagement()
throws IOException, URISyntaxException {
int accessTokenCallbackDuringExec = 0;
init();
for (TestableAdlFileSystem tfs : fileSystems) {
for (int i = 0; i < backendCallCount; ++i) {
getMockServer().enqueue(new MockResponse().setResponseCode(200)
.setBody(TestADLResponseData.getGetFileStatusJSONResponse()));
FileStatus fileStatus = tfs.getFileStatus(new Path("/test1/test2"));
Assert.assertTrue(fileStatus.isFile());
Assert.assertEquals("adl://" + getMockServer().getHostName() + ":" +
getMockServer().getPort() + "/test1/test2",
fileStatus.getPath().toString());
Assert.assertEquals(4194304, fileStatus.getLen());
Assert.assertEquals(ADL_BLOCK_SIZE, fileStatus.getBlockSize());
Assert.assertEquals(1, fileStatus.getReplication());
Assert
.assertEquals(new FsPermission("777"), fileStatus.getPermission());
Assert.assertEquals("NotSupportYet", fileStatus.getOwner());
Assert.assertEquals("NotSupportYet", fileStatus.getGroup());
}
accessTokenCallbackDuringExec += ((CustomMockTokenProvider) tfs
.getAzureTokenProvider()).getAccessTokenRequestCount();
}
Assert.assertEquals(expectedCallbackToAccessToken,
accessTokenCallbackDuringExec);
}
}

View File

@ -22,15 +22,18 @@ package org.apache.hadoop.fs.adl;
import com.squareup.okhttp.mockwebserver.MockResponse;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.common.AdlMockWebServer;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.util.Time;
import org.junit.Assert;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.net.URISyntaxException;
import static org.apache.hadoop.fs.adl.AdlConfKeys.ADL_BLOCK_SIZE;
/**
* This class is responsible for testing local getFileStatus implementation
* to cover correct parsing of successful and error JSON response
@ -39,6 +42,8 @@ import java.net.URISyntaxException;
* org.apache.hadoop.fs.adl.live testing package.
*/
public class TestGetFileStatus extends AdlMockWebServer {
private static final Logger LOG = LoggerFactory
.getLogger(TestGetFileStatus.class);
@Test
public void getFileStatusReturnsAsExpected()
@ -46,20 +51,20 @@ public class TestGetFileStatus extends AdlMockWebServer {
getMockServer().enqueue(new MockResponse().setResponseCode(200)
.setBody(TestADLResponseData.getGetFileStatusJSONResponse()));
long startTime = Time.monotonicNow();
FileStatus fileStatus = getMockAdlFileSystem().getFileStatus(
new Path("/test1/test2"));
FileStatus fileStatus = getMockAdlFileSystem()
.getFileStatus(new Path("/test1/test2"));
long endTime = Time.monotonicNow();
System.out.println("Time : " + (endTime - startTime));
LOG.debug("Time : " + (endTime - startTime));
Assert.assertTrue(fileStatus.isFile());
Assert.assertEquals(fileStatus.getPath().toString(),
"adl://" + getMockServer().getHostName() + ":"
+ getMockServer().getPort()
+ "/test1/test2");
Assert.assertEquals(fileStatus.getLen(), 4194304);
Assert.assertEquals(fileStatus.getBlockSize(), 268435456);
Assert.assertEquals(fileStatus.getReplication(), 0);
Assert.assertEquals(fileStatus.getPermission(), new FsPermission("777"));
Assert.assertEquals(fileStatus.getOwner(), "NotSupportYet");
Assert.assertEquals(fileStatus.getGroup(), "NotSupportYet");
Assert.assertEquals("adl://" + getMockServer().getHostName() + ":" +
getMockServer().getPort() + "/test1/test2",
fileStatus.getPath().toString());
Assert.assertEquals(4194304, fileStatus.getLen());
Assert.assertEquals(ADL_BLOCK_SIZE, fileStatus.getBlockSize());
Assert.assertEquals(1, fileStatus.getReplication());
Assert.assertEquals(new FsPermission("777"), fileStatus.getPermission());
Assert.assertEquals("NotSupportYet", fileStatus.getOwner());
Assert.assertEquals("NotSupportYet", fileStatus.getGroup());
}
}

View File

@ -22,10 +22,11 @@ package org.apache.hadoop.fs.adl;
import com.squareup.okhttp.mockwebserver.MockResponse;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.common.AdlMockWebServer;
import org.apache.hadoop.util.Time;
import org.junit.Assert;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
@ -37,15 +38,18 @@ import java.io.IOException;
*/
public class TestListStatus extends AdlMockWebServer {
private static final Logger LOG = LoggerFactory
.getLogger(TestListStatus.class);
@Test
public void listStatusReturnsAsExpected() throws IOException {
getMockServer().enqueue(new MockResponse().setResponseCode(200)
.setBody(TestADLResponseData.getListFileStatusJSONResponse(10)));
long startTime = Time.monotonicNow();
FileStatus[] ls = getMockAdlFileSystem().listStatus(
new Path("/test1/test2"));
FileStatus[] ls = getMockAdlFileSystem()
.listStatus(new Path("/test1/test2"));
long endTime = Time.monotonicNow();
System.out.println("Time : " + (endTime - startTime));
LOG.debug("Time : " + (endTime - startTime));
Assert.assertEquals(ls.length, 10);
getMockServer().enqueue(new MockResponse().setResponseCode(200)
@ -53,7 +57,7 @@ public class TestListStatus extends AdlMockWebServer {
startTime = Time.monotonicNow();
ls = getMockAdlFileSystem().listStatus(new Path("/test1/test2"));
endTime = Time.monotonicNow();
System.out.println("Time : " + (endTime - startTime));
LOG.debug("Time : " + (endTime - startTime));
Assert.assertEquals(ls.length, 200);
getMockServer().enqueue(new MockResponse().setResponseCode(200)
@ -61,12 +65,12 @@ public class TestListStatus extends AdlMockWebServer {
startTime = Time.monotonicNow();
ls = getMockAdlFileSystem().listStatus(new Path("/test1/test2"));
endTime = Time.monotonicNow();
System.out.println("Time : " + (endTime - startTime));
LOG.debug("Time : " + (endTime - startTime));
Assert.assertEquals(ls.length, 2048);
}
@Test
public void listStatusonFailure() throws IOException {
public void listStatusOnFailure() throws IOException {
getMockServer().enqueue(new MockResponse().setResponseCode(403).setBody(
TestADLResponseData.getErrorIllegalArgumentExceptionJSONResponse()));
FileStatus[] ls = null;
@ -74,14 +78,18 @@ public class TestListStatus extends AdlMockWebServer {
try {
ls = getMockAdlFileSystem().listStatus(new Path("/test1/test2"));
} catch (IOException e) {
Assert.assertTrue(e.getMessage().contains("Bad Offset 0x83090015"));
Assert.assertTrue(e.getMessage().contains("Invalid"));
}
long endTime = Time.monotonicNow();
System.out.println("Time : " + (endTime - startTime));
LOG.debug("Time : " + (endTime - startTime));
// SDK may increase number of retry attempts before error is propagated
// to caller. Adding max 10 error responses in the queue to align with SDK.
for (int i = 0; i < 10; ++i) {
getMockServer().enqueue(new MockResponse().setResponseCode(500).setBody(
TestADLResponseData.getErrorInternalServerExceptionJSONResponse()));
}
getMockServer().enqueue(new MockResponse().setResponseCode(500)
.setBody(
TestADLResponseData.getErrorInternalServerExceptionJSONResponse()));
startTime = Time.monotonicNow();
try {
ls = getMockAdlFileSystem().listStatus(new Path("/test1/test2"));
@ -89,7 +97,7 @@ public class TestListStatus extends AdlMockWebServer {
Assert.assertTrue(e.getMessage().contains("Internal Server Error"));
}
endTime = Time.monotonicNow();
System.out.println("Time : " + (endTime - startTime));
LOG.debug("Time : " + (endTime - startTime));
}
}

View File

@ -0,0 +1,61 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.adl;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.junit.Assert;
import org.junit.Test;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.AZURE_AD_TOKEN_PROVIDER_CLASS_KEY;
/**
* This class verifies path conversion to SDK.
*/
public class TestRelativePathFormation {
@Test
public void testToRelativePath() throws URISyntaxException, IOException {
AdlFileSystem fs = new AdlFileSystem();
Configuration configuration = new Configuration();
configuration.set(AZURE_AD_TOKEN_PROVIDER_CLASS_KEY,
"org.apache.hadoop.fs.adl.common.CustomMockTokenProvider");
fs.initialize(new URI("adl://temp.account.net"), configuration);
Assert.assertEquals("/usr", fs.toRelativeFilePath(new Path("/usr")));
Assert.assertEquals("/usr",
fs.toRelativeFilePath(new Path("adl://temp.account.net/usr")));
// When working directory is set.
fs.setWorkingDirectory(new Path("/a/b/"));
Assert.assertEquals("/usr", fs.toRelativeFilePath(new Path("/usr")));
Assert.assertEquals("/a/b/usr", fs.toRelativeFilePath(new Path("usr")));
Assert.assertEquals("/usr",
fs.toRelativeFilePath(new Path("adl://temp.account.net/usr")));
Assert.assertEquals("/usr",
fs.toRelativeFilePath(new Path("wasb://temp.account.net/usr")));
}
}

View File

@ -0,0 +1,103 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.adl;
import org.junit.Assert;
import org.junit.Test;
import static org.apache.hadoop.fs.adl.AdlConfKeys.ADL_BLOCK_SIZE;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.ADL_DEBUG_OVERRIDE_LOCAL_USER_AS_OWNER;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.ADL_DEBUG_SET_LOCAL_USER_AS_OWNER_DEFAULT;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.ADL_EXPERIMENT_POSITIONAL_READ_DEFAULT;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.ADL_EXPERIMENT_POSITIONAL_READ_KEY;
import static org.apache.hadoop.fs.adl.AdlConfKeys.ADL_REPLICATION_FACTOR;
import static org.apache.hadoop.fs.adl.AdlConfKeys.AZURE_AD_CLIENT_ID_KEY;
import static org.apache.hadoop.fs.adl.AdlConfKeys.AZURE_AD_CLIENT_SECRET_KEY;
import static org.apache.hadoop.fs.adl.AdlConfKeys.AZURE_AD_REFRESH_TOKEN_KEY;
import static org.apache.hadoop.fs.adl.AdlConfKeys.AZURE_AD_REFRESH_URL_KEY;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.AZURE_AD_TOKEN_PROVIDER_CLASS_KEY;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.AZURE_AD_TOKEN_PROVIDER_TYPE_KEY;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.DEFAULT_READ_AHEAD_BUFFER_SIZE;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.DEFAULT_WRITE_AHEAD_BUFFER_SIZE;
import static org.apache.hadoop.fs.adl.AdlConfKeys.LATENCY_TRACKER_DEFAULT;
import static org.apache.hadoop.fs.adl.AdlConfKeys.LATENCY_TRACKER_KEY;
import static org.apache.hadoop.fs.adl.AdlConfKeys.READ_AHEAD_BUFFER_SIZE_KEY;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.TOKEN_PROVIDER_TYPE_CLIENT_CRED;
import static org.apache.hadoop.fs.adl.AdlConfKeys
.TOKEN_PROVIDER_TYPE_REFRESH_TOKEN;
import static org.apache.hadoop.fs.adl.AdlConfKeys.WRITE_BUFFER_SIZE_KEY;
/**
* Validate configuration keys defined for adl storage file system instance.
*/
public class TestValidateConfiguration {
@Test
public void validateConfigurationKeys() {
Assert
.assertEquals("dfs.adls.oauth2.refresh.url", AZURE_AD_REFRESH_URL_KEY);
Assert.assertEquals("dfs.adls.oauth2.access.token.provider",
AZURE_AD_TOKEN_PROVIDER_CLASS_KEY);
Assert.assertEquals("dfs.adls.oauth2.client.id", AZURE_AD_CLIENT_ID_KEY);
Assert.assertEquals("dfs.adls.oauth2.refresh.token",
AZURE_AD_REFRESH_TOKEN_KEY);
Assert
.assertEquals("dfs.adls.oauth2.credential", AZURE_AD_CLIENT_SECRET_KEY);
Assert.assertEquals("adl.debug.override.localuserasfileowner",
ADL_DEBUG_OVERRIDE_LOCAL_USER_AS_OWNER);
Assert.assertEquals("dfs.adls.oauth2.access.token.provider.type",
AZURE_AD_TOKEN_PROVIDER_TYPE_KEY);
Assert.assertEquals("adl.feature.client.cache.readahead",
READ_AHEAD_BUFFER_SIZE_KEY);
Assert.assertEquals("adl.feature.client.cache.drop.behind.writes",
WRITE_BUFFER_SIZE_KEY);
Assert.assertEquals("RefreshToken", TOKEN_PROVIDER_TYPE_REFRESH_TOKEN);
Assert.assertEquals("ClientCredential", TOKEN_PROVIDER_TYPE_CLIENT_CRED);
Assert.assertEquals("adl.dfs.enable.client.latency.tracker",
LATENCY_TRACKER_KEY);
Assert.assertEquals(true, LATENCY_TRACKER_DEFAULT);
Assert.assertEquals(true, ADL_EXPERIMENT_POSITIONAL_READ_DEFAULT);
Assert.assertEquals("adl.feature.experiment.positional.read.enable",
ADL_EXPERIMENT_POSITIONAL_READ_KEY);
Assert.assertEquals(1, ADL_REPLICATION_FACTOR);
Assert.assertEquals(256 * 1024 * 1024, ADL_BLOCK_SIZE);
Assert.assertEquals(false, ADL_DEBUG_SET_LOCAL_USER_AS_OWNER_DEFAULT);
Assert.assertEquals(4 * 1024 * 1024, DEFAULT_READ_AHEAD_BUFFER_SIZE);
Assert.assertEquals(4 * 1024 * 1024, DEFAULT_WRITE_AHEAD_BUFFER_SIZE);
}
}

View File

@ -20,8 +20,7 @@
package org.apache.hadoop.fs.adl;
/**
* This class overrides AdlFileSystem to change transport scheme to http instead
* of https to run against Mock Server.
* Mock adl file storage subclass to mock adl storage on local http service.
*/
public class TestableAdlFileSystem extends AdlFileSystem {
@Override

View File

@ -0,0 +1,61 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.adl.common;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.adl.oauth2.AzureADTokenProvider;
import java.io.IOException;
import java.util.Date;
import java.util.Random;
/**
* Custom token management without cache enabled.
*/
public class CustomMockTokenProvider extends AzureADTokenProvider {
private Random random;
private long expiryTime;
private int accessTokenRequestCount = 0;
@Override
public void initialize(Configuration configuration) throws IOException {
random = new Random();
}
@Override
public String getAccessToken() throws IOException {
accessTokenRequestCount++;
return String.valueOf(random.nextInt());
}
@Override
public Date getExpiryTime() {
Date before10Min = new Date();
before10Min.setTime(expiryTime);
return before10Min;
}
public void setExpiryTimeInMillisAfter(long timeInMillis) {
expiryTime = System.currentTimeMillis() + timeInMillis;
}
public int getAccessTokenRequestCount() {
return accessTokenRequestCount;
}
}

View File

@ -17,8 +17,7 @@
*
*/
package org.apache.hadoop.fs.common;
package org.apache.hadoop.fs.adl.common;
import com.squareup.okhttp.mockwebserver.MockResponse;

View File

@ -0,0 +1,60 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.fs.adl.common;
import org.junit.runners.Parameterized;
import org.junit.runners.model.RunnerScheduler;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
/**
* Provided for convenience to execute parametrized test cases concurrently.
*/
public class Parallelized extends Parameterized {
public Parallelized(Class classObj) throws Throwable {
super(classObj);
setScheduler(new ThreadPoolScheduler());
}
private static class ThreadPoolScheduler implements RunnerScheduler {
private ExecutorService executor;
public ThreadPoolScheduler() {
int numThreads = 10;
executor = Executors.newFixedThreadPool(numThreads);
}
public void finished() {
executor.shutdown();
try {
executor.awaitTermination(10, TimeUnit.MINUTES);
} catch (InterruptedException exc) {
throw new RuntimeException(exc);
}
}
public void schedule(Runnable childStatement) {
executor.submit(childStatement);
}
}
}

View File

@ -17,23 +17,26 @@
*
*/
package org.apache.hadoop.fs.common;
package org.apache.hadoop.fs.adl.common;
import com.squareup.okhttp.mockwebserver.Dispatcher;
import com.squareup.okhttp.mockwebserver.MockResponse;
import com.squareup.okhttp.mockwebserver.RecordedRequest;
import okio.Buffer;
import org.apache.hadoop.fs.adl.TestADLResponseData;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Supporting class for mock test to validate Adls read operation using
* BufferManager.java and BatchByteArrayInputStream implementation.
* Supporting class for mock test to validate Adls read operation.
*/
public class TestDataForRead {
private static final Logger LOG = LoggerFactory
.getLogger(TestDataForRead.class);
private byte[] actualData;
private ArrayList<ExpectedResponse> responses;
@ -44,6 +47,7 @@ public class TestDataForRead {
public TestDataForRead(final byte[] actualData, int expectedNoNetworkCall,
int intensityOfTest, boolean checkOfNoOfCalls) {
this.checkOfNoOfCalls = checkOfNoOfCalls;
this.actualData = actualData;
responses = new ArrayList<ExpectedResponse>();
@ -54,9 +58,6 @@ public class TestDataForRead {
@Override
public MockResponse dispatch(RecordedRequest recordedRequest)
throws InterruptedException {
if (recordedRequest.getPath().equals("/refresh")) {
return AdlMockWebServer.getTokenResponse();
}
if (recordedRequest.getRequestLine().contains("op=GETFILESTATUS")) {
return new MockResponse().setResponseCode(200).setBody(
@ -72,19 +73,20 @@ public class TestDataForRead {
Pattern pattern = Pattern.compile("offset=([0-9]+)");
Matcher matcher = pattern.matcher(request);
if (matcher.find()) {
System.out.println(matcher.group(1));
LOG.debug(matcher.group(1));
offset = Integer.parseInt(matcher.group(1));
}
pattern = Pattern.compile("length=([0-9]+)");
matcher = pattern.matcher(request);
if (matcher.find()) {
System.out.println(matcher.group(1));
LOG.debug(matcher.group(1));
byteCount = Integer.parseInt(matcher.group(1));
}
Buffer buf = new Buffer();
buf.write(actualData, offset, byteCount);
buf.write(actualData, offset,
Math.min(actualData.length - offset, byteCount));
return new MockResponse().setResponseCode(200)
.setChunkedBody(buf, 4 * 1024 * 1024);
}

View File

@ -21,36 +21,39 @@ package org.apache.hadoop.fs.adl.live;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.adl.AdlFileSystem;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
/**
* Utility class to configure real Adls storage to run Live test suite against.
* Configure Adl storage file system.
*/
public final class AdlStorageConfiguration {
private AdlStorageConfiguration() {}
private static final String CONTRACT_ENABLE_KEY =
"dfs.adl.test.contract" + ".enable";
"dfs.adl.test.contract.enable";
private static final String TEST_CONFIGURATION_FILE_NAME =
"contract-test-options.xml";
private static final String TEST_SUPPORTED_TEST_CONFIGURATION_FILE_NAME =
"adls.xml";
private static final String KEY_FILE_SYSTEM_IMPL = "fs.contract.test.fs";
private static final String KEY_FILE_SYSTEM = "test.fs.adl.name";
private static boolean isContractTestEnabled = false;
private static Configuration conf = null;
public static Configuration getConfiguration() {
Configuration localConf = new Configuration();
localConf.addResource(TEST_CONFIGURATION_FILE_NAME);
localConf.addResource(TEST_SUPPORTED_TEST_CONFIGURATION_FILE_NAME);
return localConf;
private AdlStorageConfiguration() {
}
public static boolean isContractTestEnabled() {
public synchronized static Configuration getConfiguration() {
Configuration newConf = new Configuration();
newConf.addResource(TEST_CONFIGURATION_FILE_NAME);
newConf.addResource(TEST_SUPPORTED_TEST_CONFIGURATION_FILE_NAME);
return newConf;
}
public synchronized static boolean isContractTestEnabled() {
if (conf == null) {
conf = getConfiguration();
}
@ -59,18 +62,33 @@ public final class AdlStorageConfiguration {
return isContractTestEnabled;
}
public static FileSystem createAdlStorageConnector()
public synchronized static FileSystem createStorageConnector()
throws URISyntaxException, IOException {
if (conf == null) {
conf = getConfiguration();
}
if(!isContractTestEnabled()) {
if (!isContractTestEnabled()) {
return null;
}
AdlFileSystem fileSystem = new AdlFileSystem();
fileSystem.initialize(new URI(conf.get("fs.defaultFS")), conf);
return fileSystem;
String fileSystem = conf.get(KEY_FILE_SYSTEM);
if (fileSystem == null || fileSystem.trim().length() == 0) {
throw new IOException("Default file system not configured.");
}
String fileSystemImpl = conf.get(KEY_FILE_SYSTEM_IMPL);
if (fileSystemImpl == null || fileSystemImpl.trim().length() == 0) {
throw new IOException(
"Configuration " + KEY_FILE_SYSTEM_IMPL + "does not exist.");
}
FileSystem fs = null;
try {
fs = (FileSystem) Class.forName(fileSystemImpl).newInstance();
} catch (Exception e) {
throw new IOException("Could not instantiate the filesystem.");
}
fs.initialize(new URI(conf.get(KEY_FILE_SYSTEM)), conf);
return fs;
}
}

View File

@ -27,20 +27,16 @@ import org.apache.hadoop.fs.contract.AbstractFSContract;
import java.io.IOException;
import java.net.URISyntaxException;
/**
* Extension of AbstractFSContract representing a filesystem contract that
* a Adls filesystem implementation is expected implement.
*/
public class AdlStorageContract extends AbstractFSContract {
class AdlStorageContract extends AbstractFSContract {
private FileSystem fs;
protected AdlStorageContract(Configuration conf) {
super(conf);
try {
fs = AdlStorageConfiguration.createAdlStorageConnector();
fs = AdlStorageConfiguration.createStorageConnector();
} catch (URISyntaxException e) {
throw new IllegalStateException("Can not initialize ADL FileSystem. "
+ "Please check fs.defaultFS property.", e);
+ "Please check test.fs.adl.name property.", e);
} catch (IOException e) {
throw new IllegalStateException("Can not initialize ADL FileSystem.", e);
}
@ -59,7 +55,12 @@ public class AdlStorageContract extends AbstractFSContract {
@Override
public Path getTestPath() {
Path path = new Path("/test");
return path;
return new Path("/test");
}
@Override
public boolean isEnabled() {
return AdlStorageConfiguration.isContractTestEnabled();
}
}

View File

@ -73,7 +73,7 @@ public class TestAdlDifferentSizeWritesLive {
Path path = new Path("/test/dataIntegrityCheck");
FileSystem fs = null;
try {
fs = AdlStorageConfiguration.createAdlStorageConnector();
fs = AdlStorageConfiguration.createStorageConnector();
} catch (URISyntaxException e) {
throw new IllegalStateException("Can not initialize ADL FileSystem. "
+ "Please check fs.defaultFS property.", e);

View File

@ -34,7 +34,7 @@ public class TestAdlFileSystemContractLive extends FileSystemContractBaseTest {
@Override
protected void setUp() throws Exception {
adlStore = AdlStorageConfiguration.createAdlStorageConnector();
adlStore = AdlStorageConfiguration.createStorageConnector();
if (AdlStorageConfiguration.isContractTestEnabled()) {
fs = adlStore;
}

View File

@ -1,342 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.fs.adl.live;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.Random;
import java.util.UUID;
/**
* Verify different data segment size read from the file to ensure the
* integrity and order of the data over
* BufferManger and BatchByteArrayInputStream implementation.
*/
public class TestAdlReadLive {
private String expectedData = "1234567890abcdefghijklmnopqrstuvwxyz";
@Before
public void setup() throws Exception {
org.junit.Assume
.assumeTrue(AdlStorageConfiguration.isContractTestEnabled());
}
private FileSystem getFileSystem() throws IOException, URISyntaxException {
return AdlStorageConfiguration.createAdlStorageConnector();
}
private void setupFile(Path path) throws IOException, URISyntaxException {
setupFile(path, expectedData);
}
private void setupFile(Path path, String data)
throws IOException, URISyntaxException {
expectedData = data;
FileSystem fs = getFileSystem();
fs.delete(path, true);
FSDataOutputStream fdis = fs.create(path);
fdis.writeBytes(expectedData);
fdis.close();
fs.listStatus(path.getParent());
long actualLen = fs.getFileStatus(path).getLen();
long expectedLen = expectedData.length();
System.out.println(
" Length of file : " + fs.getFileStatus(path).getLen() + " " + fs
.getUri());
Assert.assertEquals(expectedLen, actualLen);
}
@Test
public void
testOpenReadMoreThanAvailableBufferCrashFixIndexOutOfBoundsException()
throws Throwable {
Path path = new Path("/test1");
FileSystem fs = getFileSystem();
setupFile(path);
if (fs.exists(path)) {
Assert.assertTrue(fs.delete(path, true));
}
FSDataOutputStream outputStream = fs.create(path);
final byte[] data = new byte[24 * 1024 * 1024];
Random ran = new Random();
ran.nextBytes(data);
outputStream.write(data);
FSDataInputStream bb = fs.open(path);
byte[] expected = new byte[4 * 1024 * 1024];
bb.read();
bb.readFully(16711581, expected, 33,
65640); // BugFix : Was causing crash IndexOutOfBoundsException
bb.seek(16711581);
bb.readFully(16711576, expected, 33, 65640);
bb.readFully(16711578, expected, 33, 65640);
bb.readFully(16711580, expected, 33, 65640);
bb.readFully(16711576, expected, 0, expected.length);
bb.seek(0);
expected = new byte[134144];
while (bb.read() != -1){
continue;
}
bb.readFully(0, data, 0, data.length);
}
@Test
public void readNullData() throws IOException, URISyntaxException {
String data = "SPL \u0001Lorg.apache.hadoop.examples.terasort"
+ ".TeraGen$RangeInputFormat$RangeInputSplit \u008DLK@Lorg.apache"
+ ".hadoop.examples.terasort"
+ ".TeraGen$RangeInputFormat$RangeInputSplit\u008DLK@\u008DLK@";
Path path = new Path("/test4");
FileSystem fs = this.getFileSystem();
setupFile(path, data);
FSDataInputStream bb = fs.open(path);
int i = 0;
String actualData = new String();
System.out.println("Data Length :" + expectedData.length());
byte[] arr = new byte[data.length()];
bb.readFully(0, arr);
actualData = new String(arr);
System.out.println(" Data : " + actualData);
Assert.assertEquals(actualData.length(), expectedData.length());
arr = new byte[data.length() - 7];
bb.readFully(7, arr);
actualData = new String(arr);
Assert.assertEquals(actualData.length(), expectedData.length() - 7);
bb.close();
}
@Test
public void readTest() throws IOException, URISyntaxException {
Path path = new Path("/test4");
FileSystem fs = this.getFileSystem();
setupFile(path);
FSDataInputStream bb = fs.open(path);
int i = 0;
String actualData = new String();
while (true) {
int c = bb.read();
if (c < 0) {
break;
}
actualData += (char) c;
}
byte[] b = new byte[100];
System.out.println(bb.read(b, 9, 91));
System.out.println(bb.read());
System.out.println(bb.read());
System.out.println(bb.read());
System.out.println(bb.read());
System.out.println(bb.read());
System.out.println(bb.read());
bb.close();
Assert.assertEquals(actualData, expectedData);
for (int j = 0; j < 100; ++j) {
fs = this.getFileSystem();
fs.exists(new Path("/test" + j));
}
}
@Test
public void readByteTest() throws IOException, URISyntaxException {
Path path = new Path("/test3");
FileSystem fs = this.getFileSystem();
setupFile(path);
FSDataInputStream bb = fs.open(path);
int i = 0;
byte[] data = new byte[expectedData.length()];
int readByte = bb.read(data);
bb.close();
Assert.assertEquals(readByte, expectedData.length());
Assert.assertEquals(new String(data), expectedData);
}
@Test
public void readByteFullyTest() throws IOException, URISyntaxException {
Path path = new Path("/test2");
FileSystem fs = this.getFileSystem();
setupFile(path);
FSDataInputStream bb = fs.open(path);
int i = 0;
byte[] data = new byte[expectedData.length()];
bb.readFully(data);
bb.close();
Assert.assertEquals(new String(data), expectedData);
bb = fs.open(path);
bb.readFully(data, 0, data.length);
bb.close();
Assert.assertEquals(new String(data), expectedData);
}
@Test
public void readCombinationTest() throws IOException, URISyntaxException {
Path path = new Path("/test1");
FileSystem fs = this.getFileSystem();
setupFile(path);
FSDataInputStream bb = fs.open(path);
int i = 0;
byte[] data = new byte[5];
int readByte = bb.read(data);
Assert.assertEquals(new String(data), expectedData.substring(0, 5));
bb.readFully(data, 0, data.length);
Assert.assertEquals(new String(data), expectedData.substring(5, 10));
bb.close();
bb = fs.open(path);
bb.readFully(5, data, 0, data.length);
Assert.assertEquals(new String(data), expectedData.substring(5, 10));
bb.read(data);
Assert.assertEquals(new String(data), expectedData.substring(0, 5));
bb.close();
bb = fs.open(path);
bb.read(new byte[100]);
bb.close();
}
@Test
public void readMultiSeekTest() throws IOException, URISyntaxException {
final Path path = new Path(
"/delete14/" + UUID.randomUUID().toString().replaceAll("-", ""));
FileSystem fs = this.getFileSystem();
final byte[] actualData = new byte[3267397];
Random ran = new Random();
ran.nextBytes(actualData);
byte[] testData = null;
fs.delete(path, true);
FSDataOutputStream os = fs.create(path);
os.write(actualData);
os.close();
FSDataInputStream bb = fs.open(path);
byte[] data = new byte[16384];
bb.readFully(3251013, data, 0, 16384);
testData = new byte[16384];
System.arraycopy(actualData, 3251013, testData, 0, 16384);
Assert.assertArrayEquals(testData, data);
data = new byte[1921];
bb.readFully(3265476, data, 0, 1921);
testData = new byte[1921];
System.arraycopy(actualData, 3265476, testData, 0, 1921);
Assert.assertArrayEquals(testData, data);
data = new byte[3267394];
bb.readFully(3, data, 0, 3267394);
testData = new byte[3267394];
System.arraycopy(actualData, 3, testData, 0, 3267394);
Assert.assertArrayEquals(testData, data);
data = new byte[3266943];
bb.readFully(454, data, 0, 3266943);
testData = new byte[3266943];
System.arraycopy(actualData, 454, testData, 0, 3266943);
Assert.assertArrayEquals(testData, data);
data = new byte[3265320];
bb.readFully(2077, data, 0, 3265320);
testData = new byte[3265320];
System.arraycopy(actualData, 2077, testData, 0, 3265320);
Assert.assertArrayEquals(testData, data);
bb.close();
bb = fs.open(path);
data = new byte[3263262];
bb.readFully(4135, data, 0, 3263262);
testData = new byte[3263262];
System.arraycopy(actualData, 4135, testData, 0, 3263262);
Assert.assertArrayEquals(testData, data);
data = new byte[2992591];
bb.readFully(274806, data, 0, 2992591);
testData = new byte[2992591];
System.arraycopy(actualData, 274806, testData, 0, 2992591);
Assert.assertArrayEquals(testData, data);
data = new byte[1985665];
bb.readFully(1281732, data, 0, 1985665);
testData = new byte[1985665];
System.arraycopy(actualData, 1281732, testData, 0, 1985665);
Assert.assertArrayEquals(testData, data);
data = new byte[3267394];
try {
bb.readFully(2420207, data, 0, 3267394);
Assert.fail("EOF expected");
} catch (IOException e) {
}
bb.close();
}
@Test
public void allASCIICharTest() throws IOException, URISyntaxException {
final Path path = new Path(
"/delete14/" + UUID.randomUUID().toString().replaceAll("-", ""));
FileSystem fs = this.getFileSystem();
final byte[] actualData = new byte[127];
for (byte i = 0; i < 127; ++i) {
actualData[i] = i;
}
fs.delete(path, true);
FSDataOutputStream os = fs.create(path);
os.write(actualData);
os.close();
FSDataInputStream bb = fs.open(path);
byte[] data = new byte[127];
bb.readFully(0, data, 0, data.length);
bb.close();
Assert.assertArrayEquals(data, actualData);
bb = fs.open(path);
int byteRead = 1;
while (bb.read() != -1) {
byteRead++;
}
bb.seek(0);
byteRead = 1;
while (bb.read() != -1) {
byteRead++;
}
bb.close();
}
}

View File

@ -1,79 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.fs.adl.live;
import org.apache.commons.lang.RandomStringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.DelegateToFileSystem;
import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.FileContextCreateMkdirBaseTest;
import org.apache.hadoop.fs.FileContextTestHelper;
import org.apache.hadoop.fs.FileSystem;
import org.junit.Assume;
import org.junit.BeforeClass;
import java.io.File;
import java.net.URI;
/**
* Verify Adls file system adhere to Hadoop file system contract using bunch of
* available test in FileContextCreateMkdirBaseTest.
*/
public class TestAdlWebHdfsFileContextCreateMkdirLive
extends FileContextCreateMkdirBaseTest {
private static final String KEY_FILE_SYSTEM = "fs.defaultFS";
@Override
public void setUp() throws Exception {
Configuration conf = AdlStorageConfiguration.getConfiguration();
String fileSystem = conf.get(KEY_FILE_SYSTEM);
if (fileSystem == null || fileSystem.trim().length() == 0) {
throw new Exception("Default file system not configured.");
}
URI uri = new URI(fileSystem);
FileSystem fs = AdlStorageConfiguration.createAdlStorageConnector();
fc = FileContext.getFileContext(
new DelegateToFileSystem(uri, fs, conf, fs.getScheme(), false) {
}, conf);
super.setUp();
}
/**
* Required to override since the getRandmizedTestDir on Windows generates
* absolute path of the local file path which contains ":" character.
* Example file system path generated is "adl://<FileSystem Path>/d:/a/b/c
*
* Adls does not support : character in the path hence overriding to remove
* unsupported character from the path.
*
* @return FileContextTestHelper
*/
@Override
protected FileContextTestHelper createFileContextHelper() {
return new FileContextTestHelper(new File(
RandomStringUtils.randomAlphanumeric(10))
.getAbsolutePath().replaceAll(":", ""));
}
@BeforeClass
public static void skipTestCheck() {
Assume.assumeTrue(AdlStorageConfiguration.isContractTestEnabled());
}
}

View File

@ -1,104 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.fs.adl.live;
import org.apache.commons.lang.RandomStringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.DelegateToFileSystem;
import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.FileContextMainOperationsBaseTest;
import org.apache.hadoop.fs.FileContextTestHelper;
import org.apache.hadoop.fs.FileSystem;
import org.junit.Assume;
import org.junit.BeforeClass;
import java.io.File;
import java.io.IOException;
import java.net.URI;
/**
* Verify Adls file system adhere to Hadoop file system contract using bunch of
* available test in FileContextMainOperationsBaseTest.
*/
public class TestAdlWebHdfsFileContextMainOperationsLive
extends FileContextMainOperationsBaseTest {
private static final String KEY_FILE_SYSTEM = "fs.defaultFS";
@BeforeClass
public static void skipTestCheck() {
Assume.assumeTrue(AdlStorageConfiguration.isContractTestEnabled());
}
@Override
public void setUp() throws Exception {
Configuration conf = AdlStorageConfiguration.getConfiguration();
String fileSystem = conf.get(KEY_FILE_SYSTEM);
if (fileSystem == null || fileSystem.trim().length() == 0) {
throw new Exception("Default file system not configured.");
}
URI uri = new URI(fileSystem);
FileSystem fs = AdlStorageConfiguration.createAdlStorageConnector();
fc = FileContext.getFileContext(
new DelegateToFileSystem(uri, fs, conf, fs.getScheme(), false) {
}, conf);
super.setUp();
}
/**
* Required to override since the getRandmizedTestDir on Windows generates
* absolute path of the local file path which contains ":" character.
* Example file system path generated is "adl://<FileSystem Path>/d:/a/b/c
*
* Adls does not support : character in the path hence overriding to remove
* unsupported character from the path.
*
* @return FileContextTestHelper
*/
@Override
protected FileContextTestHelper createFileContextHelper() {
return new FileContextTestHelper(
new File(RandomStringUtils.randomAlphanumeric(10)).getAbsolutePath()
.replaceAll(":", ""));
}
@Override
protected boolean listCorruptedBlocksSupported() {
return false;
}
@Override
public void testUnsupportedSymlink() throws IOException {
Assume.assumeTrue("Symbolic link are not supported by Adls", false);
}
/**
* In case this test is causing failure due to
* java.lang.RuntimeException: java.io.FileNotFoundException: Hadoop bin
* directory does not exist: <path>\hadoop-common-project
* \hadoop-common\target\bin -see https://wiki.apache
* .org/hadoop/WindowsProblems. then do build the hadoop dependencies
* otherwise mark this test as skip.
*/
@Override
public void testWorkingDirectory() throws Exception {
super.testWorkingDirectory();
}
}

View File

@ -1,149 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.fs.adl.oauth2;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.web.oauth2.AccessTokenProvider;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TestName;
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.OAUTH_CLIENT_ID_KEY;
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.OAUTH_REFRESH_URL_KEY;
import static org.apache.hadoop.hdfs.web.oauth2.ConfRefreshTokenBasedAccessTokenProvider.OAUTH_REFRESH_TOKEN_KEY;
import static org.junit.Assert.fail;
import static org.mockito.Mockito.any;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.verify;
import static org.mockito.Mockito.when;
/**
* Verify cache behavior of ConfRefreshTokenBasedAccessTokenProvider instances.
*/
public class TestCachedRefreshTokenBasedAccessTokenProvider {
private Configuration conf;
@Rule public TestName name = new TestName();
String clientId(int id) {
return name.getMethodName() + "_clientID" + id;
}
@Before
public void initConfig() {
conf = new Configuration(false);
conf.set(OAUTH_CLIENT_ID_KEY, clientId(0));
conf.set(OAUTH_REFRESH_TOKEN_KEY, "01234567890abcdef");
conf.set(OAUTH_REFRESH_URL_KEY, "http://dingo.invalid:80");
}
@Test
public void testCacheInstance() throws Exception {
final AccessTokenProvider inst0 = mock(AccessTokenProvider.class);
when(inst0.getConf()).thenReturn(conf);
// verify config
CachedRefreshTokenBasedAccessTokenProvider t1 = new MockProvider(inst0);
t1.setConf(conf);
verify(inst0).setConf(any(Configuration.class)); // cloned, not exact match
// verify cache hit
CachedRefreshTokenBasedAccessTokenProvider t2 =
new CachedRefreshTokenBasedAccessTokenProvider() {
@Override
AccessTokenProvider newInstance() {
fail("Failed to return cached instance");
return null;
}
};
t2.setConf(conf);
// verify force refresh
conf.setBoolean(
CachedRefreshTokenBasedAccessTokenProvider.FORCE_REFRESH, true);
final AccessTokenProvider inst1 = mock(AccessTokenProvider.class);
when(inst1.getConf()).thenReturn(conf);
CachedRefreshTokenBasedAccessTokenProvider t3 = new MockProvider(inst1);
t3.setConf(conf);
verify(inst1).setConf(any(Configuration.class));
// verify cache miss
conf.set(OAUTH_REFRESH_URL_KEY, "http://yak.invalid:80");
final AccessTokenProvider inst2 = mock(AccessTokenProvider.class);
when(inst2.getConf()).thenReturn(conf);
CachedRefreshTokenBasedAccessTokenProvider t4 = new MockProvider(inst2);
t4.setConf(conf);
verify(inst2).setConf(any(Configuration.class));
}
@Test
public void testCacheLimit() throws Exception {
final int iter = CachedRefreshTokenBasedAccessTokenProvider.MAX_PROVIDERS;
for (int i = 0; i < iter; ++i) {
conf.set(OAUTH_CLIENT_ID_KEY, clientId(i));
AccessTokenProvider inst = mock(AccessTokenProvider.class);
when(inst.getConf()).thenReturn(conf);
CachedRefreshTokenBasedAccessTokenProvider t = new MockProvider(inst);
t.setConf(conf);
verify(inst).setConf(any(Configuration.class));
}
// verify cache hit
for (int i = 0; i < iter; ++i) {
conf.set(OAUTH_CLIENT_ID_KEY, clientId(i));
CachedRefreshTokenBasedAccessTokenProvider t =
new CachedRefreshTokenBasedAccessTokenProvider() {
@Override
AccessTokenProvider newInstance() {
fail("Failed to return cached instance");
return null;
}
};
t.setConf(conf);
}
// verify miss, evict 0
conf.set(OAUTH_CLIENT_ID_KEY, clientId(iter));
final AccessTokenProvider inst = mock(AccessTokenProvider.class);
when(inst.getConf()).thenReturn(conf);
CachedRefreshTokenBasedAccessTokenProvider t = new MockProvider(inst);
t.setConf(conf);
verify(inst).setConf(any(Configuration.class));
// verify miss
conf.set(OAUTH_CLIENT_ID_KEY, clientId(0));
final AccessTokenProvider inst0 = mock(AccessTokenProvider.class);
when(inst0.getConf()).thenReturn(conf);
CachedRefreshTokenBasedAccessTokenProvider t0 = new MockProvider(inst0);
t0.setConf(conf);
verify(inst0).setConf(any(Configuration.class));
}
static class MockProvider extends CachedRefreshTokenBasedAccessTokenProvider {
private final AccessTokenProvider inst;
MockProvider(AccessTokenProvider inst) {
this.inst = inst;
}
@Override
AccessTokenProvider newInstance() {
return inst;
}
}
}

View File

@ -1,138 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.hdfs.web;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.adl.TestableAdlFileSystem;
import org.apache.hadoop.hdfs.web.oauth2.AccessTokenProvider;
import org.apache.hadoop.hdfs.web.oauth2.ConfCredentialBasedAccessTokenProvider;
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.ACCESS_TOKEN_PROVIDER_KEY;
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.DFS_WEBHDFS_OAUTH_ENABLED_KEY;
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.OAUTH_CLIENT_ID_KEY;
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.OAUTH_REFRESH_URL_KEY;
import static org.apache.hadoop.hdfs.web.oauth2.CredentialBasedAccessTokenProvider.OAUTH_CREDENTIAL_KEY;
import org.junit.Assert;
import org.junit.Test;
/**
* This class is responsible for testing adl file system required configuration
* and feature set keys.
*/
public class TestConfigurationSetting {
@Test
public void testAllConfiguration() throws URISyntaxException, IOException {
TestableAdlFileSystem fs = new TestableAdlFileSystem();
Configuration conf = new Configuration();
conf.set(OAUTH_REFRESH_URL_KEY, "http://localhost:1111/refresh");
conf.set(OAUTH_CREDENTIAL_KEY, "credential");
conf.set(OAUTH_CLIENT_ID_KEY, "MY_CLIENTID");
conf.setClass(ACCESS_TOKEN_PROVIDER_KEY,
ConfCredentialBasedAccessTokenProvider.class,
AccessTokenProvider.class);
conf.setBoolean(DFS_WEBHDFS_OAUTH_ENABLED_KEY, true);
URI uri = new URI("adl://localhost:1234");
fs.initialize(uri, conf);
// Default setting check
Assert.assertEquals(true, fs.isFeatureRedirectOff());
Assert.assertEquals(true, fs.isFeatureGetBlockLocationLocallyBundled());
Assert.assertEquals(true, fs.isFeatureConcurrentReadWithReadAhead());
Assert.assertEquals(false, fs.isOverrideOwnerFeatureOn());
Assert.assertEquals(8 * 1024 * 1024, fs.getMaxBufferSize());
Assert.assertEquals(2, fs.getMaxConcurrentConnection());
fs.close();
// Configuration toggle check
conf.set("adl.feature.override.redirection.off", "false");
fs.initialize(uri, conf);
Assert.assertEquals(false, fs.isFeatureRedirectOff());
fs.close();
conf.set("adl.feature.override.redirection.off", "true");
fs.initialize(uri, conf);
Assert.assertEquals(true, fs.isFeatureRedirectOff());
fs.close();
conf.set("adl.feature.override.getblocklocation.locally.bundled", "false");
fs.initialize(uri, conf);
Assert.assertEquals(false, fs.isFeatureGetBlockLocationLocallyBundled());
fs.close();
conf.set("adl.feature.override.getblocklocation.locally.bundled", "true");
fs.initialize(uri, conf);
Assert.assertEquals(true, fs.isFeatureGetBlockLocationLocallyBundled());
fs.close();
conf.set("adl.feature.override.readahead", "false");
fs.initialize(uri, conf);
Assert.assertEquals(false, fs.isFeatureConcurrentReadWithReadAhead());
fs.close();
conf.set("adl.feature.override.readahead", "true");
fs.initialize(uri, conf);
Assert.assertEquals(true, fs.isFeatureConcurrentReadWithReadAhead());
fs.close();
conf.set("adl.feature.override.readahead.max.buffersize", "101");
fs.initialize(uri, conf);
Assert.assertEquals(101, fs.getMaxBufferSize());
fs.close();
conf.set("adl.feature.override.readahead.max.buffersize", "12134565");
fs.initialize(uri, conf);
Assert.assertEquals(12134565, fs.getMaxBufferSize());
fs.close();
conf.set("adl.debug.override.localuserasfileowner", "true");
fs.initialize(uri, conf);
Assert.assertEquals(true, fs.isOverrideOwnerFeatureOn());
fs.close();
conf.set("adl.debug.override.localuserasfileowner", "false");
fs.initialize(uri, conf);
Assert.assertEquals(false, fs.isOverrideOwnerFeatureOn());
fs.close();
}
@Test
public void testOAuthEnable() throws Exception {
try (TestableAdlFileSystem fs = new TestableAdlFileSystem()) {
Configuration conf = new Configuration();
conf.set(OAUTH_REFRESH_URL_KEY, "http://localhost:1111/refresh");
conf.set(OAUTH_CREDENTIAL_KEY, "credential");
conf.set(OAUTH_CLIENT_ID_KEY, "MY_CLIENTID");
conf.setClass(ACCESS_TOKEN_PROVIDER_KEY,
ConfCredentialBasedAccessTokenProvider.class,
AccessTokenProvider.class);
// disable OAuth2 in configuration, verify overridden
conf.setBoolean(DFS_WEBHDFS_OAUTH_ENABLED_KEY, false);
URI uri = new URI("adl://localhost:1234");
fs.initialize(uri, conf);
Assert.assertFalse(conf.getBoolean(DFS_WEBHDFS_OAUTH_ENABLED_KEY, false));
Assert.assertTrue(fs.getConf().getBoolean(DFS_WEBHDFS_OAUTH_ENABLED_KEY,
false));
}
}
}

View File

@ -1,123 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.hadoop.hdfs.web;
import com.squareup.okhttp.mockwebserver.MockResponse;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.adl.TestADLResponseData;
import org.apache.hadoop.fs.common.AdlMockWebServer;
import org.apache.hadoop.hdfs.web.PrivateAzureDataLakeFileSystem.BatchByteArrayInputStream;
import org.junit.Assert;
import org.junit.Test;
import java.io.IOException;
import java.net.URISyntaxException;
import java.net.URL;
/**
* This class is responsible for testing split size calculation during
* read ahead buffer initiation based on the data size and configuration
* initialization.
*/
public class TestSplitSizeCalculation extends AdlMockWebServer {
@Test
public void testSplitSizeCalculations()
throws URISyntaxException, IOException {
getMockServer().enqueue(new MockResponse().setResponseCode(200).setBody(
TestADLResponseData.getGetFileStatusJSONResponse(128 * 1024 * 1024)));
getMockServer().enqueue(new MockResponse().setResponseCode(200).setBody(
TestADLResponseData.getGetFileStatusJSONResponse(128 * 1024 * 1024)));
getMockServer().enqueue(new MockResponse().setResponseCode(200).setBody(
TestADLResponseData.getGetFileStatusJSONResponse(128 * 1024 * 1024)));
getMockServer().enqueue(new MockResponse().setResponseCode(200).setBody(
TestADLResponseData.getGetFileStatusJSONResponse(128 * 1024 * 1024)));
URL url = getMockServer().getUrl("");
BatchByteArrayInputStream stream = getMockAdlFileSystem()
.new BatchByteArrayInputStream(url,
new Path("/test1/test2"), 16 * 1024 * 1024, 4);
Assert.assertEquals(1, stream.getSplitSize(1 * 1024 * 1024));
Assert.assertEquals(1, stream.getSplitSize(2 * 1024 * 1024));
Assert.assertEquals(1, stream.getSplitSize(3 * 1024 * 1024));
Assert.assertEquals(1, stream.getSplitSize(4 * 1024 * 1024));
Assert.assertEquals(1, stream.getSplitSize(5 * 1024 * 1024));
Assert.assertEquals(1, stream.getSplitSize(6 * 1024 * 1024));
Assert.assertEquals(1, stream.getSplitSize(7 * 1024 * 1024));
Assert.assertEquals(2, stream.getSplitSize(8 * 1024 * 1024));
Assert.assertEquals(4, stream.getSplitSize(16 * 1024 * 1024));
Assert.assertEquals(3, stream.getSplitSize(12 * 1024 * 1024));
Assert.assertEquals(4, stream.getSplitSize(102 * 1024 * 1024));
Assert.assertEquals(1, stream.getSplitSize(102));
stream.close();
stream = getMockAdlFileSystem().new BatchByteArrayInputStream(url,
new Path("/test1/test2"), 4 * 1024 * 1024, 4);
Assert.assertEquals(1, stream.getSplitSize(1 * 1024 * 1024));
Assert.assertEquals(1, stream.getSplitSize(2 * 1024 * 1024));
Assert.assertEquals(1, stream.getSplitSize(3 * 1024 * 1024));
Assert.assertEquals(1, stream.getSplitSize(4 * 1024 * 1024));
Assert.assertEquals(1, stream.getSplitSize(5 * 1024 * 1024));
Assert.assertEquals(1, stream.getSplitSize(8 * 1024 * 1024));
Assert.assertEquals(1, stream.getSplitSize(5 * 1024 * 1024));
Assert.assertEquals(1, stream.getSplitSize(6 * 1024 * 1024));
Assert.assertEquals(1, stream.getSplitSize(7 * 1024 * 1024));
Assert.assertEquals(1, stream.getSplitSize(16 * 1024 * 1024));
Assert.assertEquals(1, stream.getSplitSize(12 * 1024 * 1024));
Assert.assertEquals(1, stream.getSplitSize(102 * 1024 * 1024));
Assert.assertEquals(1, stream.getSplitSize(102));
stream.close();
stream = getMockAdlFileSystem().new BatchByteArrayInputStream(url,
new Path("/test1/test2"), 16 * 1024 * 1024, 2);
Assert.assertEquals(1, stream.getSplitSize(1 * 1024 * 1024));
Assert.assertEquals(1, stream.getSplitSize(2 * 1024 * 1024));
Assert.assertEquals(1, stream.getSplitSize(3 * 1024 * 1024));
Assert.assertEquals(1, stream.getSplitSize(4 * 1024 * 1024));
Assert.assertEquals(1, stream.getSplitSize(5 * 1024 * 1024));
Assert.assertEquals(1, stream.getSplitSize(5 * 1024 * 1024));
Assert.assertEquals(1, stream.getSplitSize(6 * 1024 * 1024));
Assert.assertEquals(1, stream.getSplitSize(7 * 1024 * 1024));
Assert.assertEquals(2, stream.getSplitSize(8 * 1024 * 1024));
Assert.assertEquals(2, stream.getSplitSize(16 * 1024 * 1024));
Assert.assertEquals(2, stream.getSplitSize(12 * 1024 * 1024));
Assert.assertEquals(2, stream.getSplitSize(102 * 1024 * 1024));
Assert.assertEquals(1, stream.getSplitSize(102));
stream.close();
stream = getMockAdlFileSystem().new BatchByteArrayInputStream(url,
new Path("/test1/test2"), 8 * 1024 * 1024, 2);
Assert.assertEquals(1, stream.getSplitSize(1 * 1024 * 1024));
Assert.assertEquals(1, stream.getSplitSize(2 * 1024 * 1024));
Assert.assertEquals(1, stream.getSplitSize(3 * 1024 * 1024));
Assert.assertEquals(1, stream.getSplitSize(4 * 1024 * 1024));
Assert.assertEquals(1, stream.getSplitSize(5 * 1024 * 1024));
Assert.assertEquals(1, stream.getSplitSize(6 * 1024 * 1024));
Assert.assertEquals(1, stream.getSplitSize(7 * 1024 * 1024));
Assert.assertEquals(2, stream.getSplitSize(8 * 1024 * 1024));
Assert.assertEquals(2, stream.getSplitSize(16 * 1024 * 1024));
Assert.assertEquals(2, stream.getSplitSize(12 * 1024 * 1024));
Assert.assertEquals(2, stream.getSplitSize(102 * 1024 * 1024));
Assert.assertEquals(1, stream.getSplitSize(102));
stream.close();
}
}

View File

@ -39,22 +39,22 @@
<property>
<name>fs.contract.rename-returns-true-if-dest-exists</name>
<value>true</value>
<value>false</value>
</property>
<property>
<name>fs.contract.rename-returns-true-if-source-missing</name>
<value>true</value>
<value>false</value>
</property>
<property>
<name>fs.contract.rename-creates-dest-dirs</name>
<value>true</value>
<value>false</value>
</property>
<property>
<name>fs.contract.rename-remove-dest-if-empty-dir</name>
<value>true</value>
<value>false</value>
</property>
<property>
@ -119,7 +119,7 @@
<property>
<name>fs.contract.rename-overwrites-dest</name>
<value>true</value>
<value>false</value>
</property>
<property>
@ -136,4 +136,5 @@
<name>fs.contract.supports-getfilestatus</name>
<value>true</value>
</property>
</configuration>

View File

@ -13,45 +13,49 @@
-->
<configuration>
<property>
<name>dfs.webhdfs.oauth2.refresh.token.expires.ms.since.epoch</name>
<value>0</value>
</property>
<property>
<name>dfs.webhdfs.oauth2.credential</name>
<value>bearer.and.refresh.token</value>
</property>
<property>
<name>dfs.webhdfs.oauth2.refresh.url</name>
<value>https://login.windows.net/common/oauth2/token/</value>
</property>
<property>
<name>dfs.webhdfs.oauth2.access.token.provider</name>
<name>dfs.adls.oauth2.refresh.url</name>
<value>
org.apache.hadoop.fs.adl.oauth2.CachedRefreshTokenBasedAccessTokenProvider
</value>
</property>
<property>
<name>dfs.webhdfs.oauth2.enabled</name>
<value>true</value>
<name>dfs.adls.oauth2.credential</name>
<value></value>
</property>
<!--USER INPUT REQUIRED-->
<property>
<name>dfs.webhdfs.oauth2.client.id</name>
<value>ADD CLIENT ID</value>
<name>dfs.adls.oauth2.client.id</name>
<value></value>
</property>
<!--USER INPUT REQUIRED-->
<property>
<name>dfs.webhdfs.oauth2.refresh.token</name>
<value>ADD REFRESH TOKEN</value>
<name>dfs.adls.oauth2.access.token.provider.type</name>
<value>ClientCredential</value>
<description>
Supported provider type:
"ClientCredential" : Client id and client credentials(Provided
through configuration file) flow.
"RefreshToken" : Client id and refresh token(Provided
through configuration file)flow.
"Custom" : Custom AAD token management.
</description>
</property>
<!--USER INPUT REQUIRED-->
<property>
<name>fs.defaultFS</name>
<value>adl://urAdlAccountGoesHere.azuredatalakestore.net:443/</value>
</property>
<!--USER INPUT REQUIRED-->
<property>
<name>dfs.adl.test.contract.enable</name>
<value>false</value>
</property>
</configuration>
<property>
<name>test.fs.adl.name</name>
<value></value>
</property>
<property>
<name>fs.contract.test.fs</name>
<value>org.apache.hadoop.fs.adl.AdlFileSystem</value>
</property>
</configuration>

View File

@ -0,0 +1,30 @@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
log4j.rootLogger=DEBUG,stdout
log4j.appender.stdout=org.apache.log4j.ConsoleAppender
log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
log4j.appender.stdout.layout.ConversionPattern=%d [%t] %-5p %X{file} %c{1} - %m%n
log4j.logger.your.app=*
log4j.additivity.your.app=false
log4j.logger.yourApp=*
log4j.additivity.yourApp=false
log4j.appender.yourApp=org.apache.log4j.ConsoleAppender
log4j.appender.yourApp.layout=org.apache.log4j.PatternLayout
log4j.appender.yourApp.layout.ConversionPattern=%d [%t] %-5p %X{file} %c{1} %m%n
log4j.appender.yourApp.ImmediateFlush=true