HADOOP-12666. Support Microsoft Azure Data Lake - as a file system in Hadoop. Contributed by Vishwajeet Dusane.
This commit is contained in:
parent
e383b732c5
commit
9581fb715c
|
@ -2213,4 +2213,64 @@
|
|||
needs to be specified in net.topology.script.file.name.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
|
||||
<!-- Azure Data Lake File System Configurations -->
|
||||
|
||||
<property>
|
||||
<name>adl.feature.override.readahead</name>
|
||||
<value>true</value>
|
||||
<description>
|
||||
Enables read aheads in the ADL client, the feature is used to
|
||||
improve read throughput.
|
||||
This works in conjunction with the value set in
|
||||
adl.feature.override.readahead.max.buffersize.
|
||||
When set to false the read ahead feature is turned off.
|
||||
Default : True if not configured.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>adl.feature.override.readahead.max.buffersize</name>
|
||||
<value>8388608</value>
|
||||
<description>
|
||||
Define maximum buffer size to cache read ahead data, this is
|
||||
allocated per process to
|
||||
cache read ahead data. Applicable only when
|
||||
adl.feature.override.readahead is set to true.
|
||||
Default : 8388608 Byte i.e. 8MB if not configured.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>adl.feature.override.readahead.max.concurrent.connection</name>
|
||||
<value>2</value>
|
||||
<description>
|
||||
Define maximum concurrent connection can be established to
|
||||
read ahead. If the data size is less than 4MB then only 1 read n/w
|
||||
connection
|
||||
is set. If the data size is less than 4MB but less than 8MB then 2 read
|
||||
n/w connection
|
||||
is set. Data greater than 8MB then value set under the property would
|
||||
take
|
||||
effect. Applicable only when adl.feature.override.readahead is set
|
||||
to true and buffer size is greater than 8MB.
|
||||
It is recommended to reset this property if the
|
||||
adl.feature.override.readahead.max.buffersize
|
||||
is less than 8MB to gain performance. Application has to consider
|
||||
throttling limit for the account as well before configuring large
|
||||
buffer size.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.adl.impl</name>
|
||||
<value>org.apache.hadoop.fs.adl.AdlFileSystem</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.AbstractFileSystem.adl.impl</name>
|
||||
<value>org.apache.hadoop.fs.adl.Adl</value>
|
||||
</property>
|
||||
|
||||
</configuration>
|
||||
|
|
|
@ -102,6 +102,12 @@ public class TestCommonConfigurationFields extends TestConfigurationFieldsBase {
|
|||
xmlPrefixToSkipCompare.add("s3.");
|
||||
xmlPrefixToSkipCompare.add("s3native.");
|
||||
|
||||
// ADL properties are in a different subtree
|
||||
// - org.apache.hadoop.hdfs.web.ADLConfKeys
|
||||
xmlPrefixToSkipCompare.add("adl.");
|
||||
xmlPropsToSkipCompare.add("fs.adl.impl");
|
||||
xmlPropsToSkipCompare.add("fs.AbstractFileSystem.adl.impl");
|
||||
|
||||
// Deprecated properties. These should eventually be removed from the
|
||||
// class.
|
||||
configurationPropsToSkipCompare
|
||||
|
|
|
@ -146,6 +146,8 @@
|
|||
<menu name="Hadoop Compatible File Systems" inherit="top">
|
||||
<item name="Amazon S3" href="hadoop-aws/tools/hadoop-aws/index.html"/>
|
||||
<item name="Azure Blob Storage" href="hadoop-azure/index.html"/>
|
||||
<item name="Azure Data Lake Storage"
|
||||
href="hadoop-azure-datalake/index.html"/>
|
||||
<item name="OpenStack Swift" href="hadoop-openstack/index.html"/>
|
||||
</menu>
|
||||
|
||||
|
|
|
@ -0,0 +1,24 @@
|
|||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<FindBugsFilter>
|
||||
<!-- Buffer object is accessed withing trusted code and intentionally assigned instead of array copy -->
|
||||
<Match>
|
||||
<Class name="org.apache.hadoop.hdfs.web.PrivateAzureDataLakeFileSystem$BatchAppendOutputStream$CommitTask"/>
|
||||
<Bug pattern="EI_EXPOSE_REP2"/>
|
||||
<Priority value="2"/>
|
||||
</Match>
|
||||
</FindBugsFilter>
|
|
@ -0,0 +1,180 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. See accompanying LICENSE file.
|
||||
-->
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-project</artifactId>
|
||||
<version>3.0.0-alpha1-SNAPSHOT</version>
|
||||
<relativePath>../../hadoop-project</relativePath>
|
||||
</parent>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-azure-datalake</artifactId>
|
||||
<name>Apache Hadoop Azure Data Lake support</name>
|
||||
<description>
|
||||
This module contains code to support integration with Azure Data Lake.
|
||||
</description>
|
||||
<packaging>jar</packaging>
|
||||
<properties>
|
||||
<okHttpVersion>2.4.0</okHttpVersion>
|
||||
<minimalJsonVersion>0.9.1</minimalJsonVersion>
|
||||
<file.encoding>UTF-8</file.encoding>
|
||||
<downloadSources>true</downloadSources>
|
||||
</properties>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.codehaus.mojo</groupId>
|
||||
<artifactId>findbugs-maven-plugin</artifactId>
|
||||
<configuration>
|
||||
<findbugsXmlOutput>true</findbugsXmlOutput>
|
||||
<xmlOutput>true</xmlOutput>
|
||||
<excludeFilterFile>
|
||||
${basedir}/dev-support/findbugs-exclude.xml
|
||||
</excludeFilterFile>
|
||||
<effort>Max</effort>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-project-info-reports-plugin</artifactId>
|
||||
|
||||
<configuration>
|
||||
<dependencyDetailsEnabled>false</dependencyDetailsEnabled>
|
||||
<dependencyLocationsEnabled>false
|
||||
</dependencyLocationsEnabled>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-jar-plugin</artifactId>
|
||||
<executions>
|
||||
<execution>
|
||||
<goals>
|
||||
<goal>test-jar</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-dependency-plugin</artifactId>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>deplist</id>
|
||||
<phase>compile</phase>
|
||||
<goals>
|
||||
<goal>list</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<!-- build a shellprofile -->
|
||||
<outputFile>${project.basedir}/target/hadoop-tools-deps/${project.artifactId}.tools-optional.txt</outputFile>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
|
||||
|
||||
<!--
|
||||
The following is to suppress a m2e warning in eclipse
|
||||
(m2e doesn't know how to handle maven-enforcer:enforce, so we have to tell m2e to ignore it)
|
||||
see: http://stackoverflow.com/questions/13040788/how-to-elimate-the-maven-enforcer-plugin-goal-enforce-is-ignored-by-m2e-wa
|
||||
-->
|
||||
<pluginManagement>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.eclipse.m2e</groupId>
|
||||
<artifactId>lifecycle-mapping</artifactId>
|
||||
<version>1.0.0</version>
|
||||
<configuration>
|
||||
<lifecycleMappingMetadata>
|
||||
<pluginExecutions>
|
||||
<pluginExecution>
|
||||
<pluginExecutionFilter>
|
||||
<groupId>org.apache.maven.plugins
|
||||
</groupId>
|
||||
<artifactId>maven-enforcer-plugin
|
||||
</artifactId>
|
||||
<versionRange>[1.0.0,)</versionRange>
|
||||
<goals>
|
||||
<goal>enforce</goal>
|
||||
</goals>
|
||||
</pluginExecutionFilter>
|
||||
<action>
|
||||
<ignore/>
|
||||
</action>
|
||||
</pluginExecution>
|
||||
</pluginExecutions>
|
||||
</lifecycleMappingMetadata>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</pluginManagement>
|
||||
|
||||
</build>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-client</artifactId>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<artifactId>servlet-api</artifactId>
|
||||
<groupId>javax.servlet</groupId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-hdfs-client</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-all</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.eclipsesource.minimal-json</groupId>
|
||||
<artifactId>minimal-json</artifactId>
|
||||
<version>0.9.1</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
<scope>test</scope>
|
||||
<type>test-jar</type>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.squareup.okhttp</groupId>
|
||||
<artifactId>mockwebserver</artifactId>
|
||||
<version>2.4.0</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
|
@ -0,0 +1,52 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs.adl;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.fs.DelegateToFileSystem;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URI;
|
||||
import java.net.URISyntaxException;
|
||||
|
||||
/**
|
||||
* Expose adl:// scheme to access ADL file system.
|
||||
*/
|
||||
public class Adl extends DelegateToFileSystem {
|
||||
|
||||
Adl(URI theUri, Configuration conf) throws IOException, URISyntaxException {
|
||||
super(theUri, createDataLakeFileSystem(conf), conf, AdlFileSystem.SCHEME,
|
||||
false);
|
||||
}
|
||||
|
||||
private static AdlFileSystem createDataLakeFileSystem(Configuration conf) {
|
||||
AdlFileSystem fs = new AdlFileSystem();
|
||||
fs.setConf(conf);
|
||||
return fs;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Default port for ADL File system to communicate
|
||||
*/
|
||||
@Override
|
||||
public final int getUriDefaultPort() {
|
||||
return AdlFileSystem.DEFAULT_PORT;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs.adl;
|
||||
|
||||
import org.apache.hadoop.hdfs.web.PrivateAzureDataLakeFileSystem;
|
||||
|
||||
/**
|
||||
* Expose adl:// scheme to access ADL file system.
|
||||
*/
|
||||
public class AdlFileSystem extends PrivateAzureDataLakeFileSystem {
|
||||
|
||||
public static final String SCHEME = "adl";
|
||||
public static final int DEFAULT_PORT = 443;
|
||||
|
||||
@Override
|
||||
public String getScheme() {
|
||||
return SCHEME;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getDefaultPort() {
|
||||
return DEFAULT_PORT;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,135 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.fs.adl.oauth2;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.LinkedHashMap;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hdfs.web.oauth2.AccessTokenProvider;
|
||||
import org.apache.hadoop.hdfs.web.oauth2.ConfRefreshTokenBasedAccessTokenProvider;
|
||||
import org.apache.hadoop.hdfs.web.oauth2.PrivateCachedRefreshTokenBasedAccessTokenProvider;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.OAUTH_CLIENT_ID_KEY;
|
||||
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.OAUTH_REFRESH_URL_KEY;
|
||||
import static org.apache.hadoop.hdfs.web.oauth2.ConfRefreshTokenBasedAccessTokenProvider.OAUTH_REFRESH_TOKEN_KEY;
|
||||
|
||||
/**
|
||||
* Share refresh tokens across all ADLS instances with a common client ID. The
|
||||
* {@link AccessTokenProvider} can be shared across multiple instances,
|
||||
* amortizing the cost of refreshing tokens.
|
||||
*/
|
||||
public class CachedRefreshTokenBasedAccessTokenProvider
|
||||
extends PrivateCachedRefreshTokenBasedAccessTokenProvider {
|
||||
|
||||
public static final String FORCE_REFRESH = "adl.force.token.refresh";
|
||||
|
||||
private static final Logger LOG =
|
||||
LoggerFactory.getLogger(CachedRefreshTokenBasedAccessTokenProvider.class);
|
||||
|
||||
/** Limit size of provider cache. */
|
||||
static final int MAX_PROVIDERS = 10;
|
||||
@SuppressWarnings("serial")
|
||||
private static final Map<String, AccessTokenProvider> CACHE =
|
||||
new LinkedHashMap<String, AccessTokenProvider>() {
|
||||
@Override
|
||||
public boolean removeEldestEntry(
|
||||
Map.Entry<String, AccessTokenProvider> e) {
|
||||
return size() > MAX_PROVIDERS;
|
||||
}
|
||||
};
|
||||
|
||||
private AccessTokenProvider instance = null;
|
||||
|
||||
/**
|
||||
* Create handle for cached instance.
|
||||
*/
|
||||
public CachedRefreshTokenBasedAccessTokenProvider() {
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the access token from internally cached
|
||||
* ConfRefreshTokenBasedAccessTokenProvider instance.
|
||||
*
|
||||
* @return Valid OAuth2 access token for the user.
|
||||
* @throws IOException when system error, internal server error or user error
|
||||
*/
|
||||
@Override
|
||||
public synchronized String getAccessToken() throws IOException {
|
||||
return instance.getAccessToken();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return A cached Configuration consistent with the parameters of this
|
||||
* instance.
|
||||
*/
|
||||
@Override
|
||||
public synchronized Configuration getConf() {
|
||||
return instance.getConf();
|
||||
}
|
||||
|
||||
/**
|
||||
* Configure cached instance. Note that the Configuration instance returned
|
||||
* from subsequent calls to {@link #getConf() getConf} may be from a
|
||||
* previous, cached entry.
|
||||
* @param conf Configuration instance
|
||||
*/
|
||||
@Override
|
||||
public synchronized void setConf(Configuration conf) {
|
||||
String id = conf.get(OAUTH_CLIENT_ID_KEY);
|
||||
if (null == id) {
|
||||
throw new IllegalArgumentException("Missing client ID");
|
||||
}
|
||||
synchronized (CACHE) {
|
||||
instance = CACHE.get(id);
|
||||
if (null == instance
|
||||
|| conf.getBoolean(FORCE_REFRESH, false)
|
||||
|| replace(instance, conf)) {
|
||||
instance = newInstance();
|
||||
// clone configuration
|
||||
instance.setConf(new Configuration(conf));
|
||||
CACHE.put(id, instance);
|
||||
LOG.debug("Created new client {}", id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
AccessTokenProvider newInstance() {
|
||||
return new ConfRefreshTokenBasedAccessTokenProvider();
|
||||
}
|
||||
|
||||
private static boolean replace(AccessTokenProvider cached, Configuration c2) {
|
||||
// ConfRefreshTokenBasedAccessTokenProvider::setConf asserts !null
|
||||
final Configuration c1 = cached.getConf();
|
||||
for (String key : new String[] {
|
||||
OAUTH_REFRESH_TOKEN_KEY, OAUTH_REFRESH_URL_KEY }) {
|
||||
if (!c1.get(key).equals(c2.get(key))) {
|
||||
// replace cached instance for this clientID
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,23 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* public interface to expose OAuth2 authentication related features.
|
||||
*/
|
||||
package org.apache.hadoop.fs.adl.oauth2;
|
|
@ -0,0 +1,23 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* Supporting classes for metrics instrumentation.
|
||||
*/
|
||||
package org.apache.hadoop.fs.adl;
|
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hdfs.web;
|
||||
|
||||
/**
|
||||
* Constants.
|
||||
*/
|
||||
public final class ADLConfKeys {
|
||||
public static final String
|
||||
ADL_FEATURE_CONCURRENT_READ_AHEAD_MAX_CONCURRENT_CONN =
|
||||
"adl.feature.override.readahead.max.concurrent.connection";
|
||||
public static final int
|
||||
ADL_FEATURE_CONCURRENT_READ_AHEAD_MAX_CONCURRENT_CONN_DEFAULT = 2;
|
||||
public static final String ADL_WEBSDK_VERSION_KEY = "ADLFeatureSet";
|
||||
static final String ADL_DEBUG_OVERRIDE_LOCAL_USER_AS_OWNER =
|
||||
"adl.debug.override.localuserasfileowner";
|
||||
static final boolean ADL_DEBUG_SET_LOCAL_USER_AS_OWNER_DEFAULT = false;
|
||||
static final String ADL_FEATURE_REDIRECT_OFF =
|
||||
"adl.feature.override.redirection.off";
|
||||
static final boolean ADL_FEATURE_REDIRECT_OFF_DEFAULT = true;
|
||||
static final String ADL_FEATURE_GET_BLOCK_LOCATION_LOCALLY_BUNDLED =
|
||||
"adl.feature.override.getblocklocation.locally.bundled";
|
||||
static final boolean ADL_FEATURE_GET_BLOCK_LOCATION_LOCALLY_BUNDLED_DEFAULT
|
||||
= true;
|
||||
static final String ADL_FEATURE_CONCURRENT_READ_WITH_READ_AHEAD =
|
||||
"adl.feature.override.readahead";
|
||||
static final boolean ADL_FEATURE_CONCURRENT_READ_WITH_READ_AHEAD_DEFAULT =
|
||||
true;
|
||||
static final String ADL_FEATURE_CONCURRENT_READ_WITH_READ_AHEAD_BUFFER_SIZE =
|
||||
"adl.feature.override.readahead.max.buffersize";
|
||||
|
||||
static final int KB = 1024;
|
||||
static final int MB = KB * KB;
|
||||
static final int DEFAULT_BLOCK_SIZE = 4 * MB;
|
||||
static final int DEFAULT_EXTENT_SIZE = 256 * MB;
|
||||
static final int DEFAULT_TIMEOUT_IN_SECONDS = 120;
|
||||
static final int
|
||||
ADL_FEATURE_CONCURRENT_READ_WITH_READ_AHEAD_BUFFER_SIZE_DEFAULT =
|
||||
8 * MB;
|
||||
|
||||
private ADLConfKeys() {
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,180 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hdfs.web;
|
||||
|
||||
/**
|
||||
* Responsible for holding buffered data in the process. Hold only 1 and only
|
||||
* 1 buffer block in the memory. Buffer block
|
||||
* information is for the given file and the offset from the which the block
|
||||
* is fetched. Across the webhdfs instances if
|
||||
* same buffer block has been used then backend trip is avoided. Buffer block
|
||||
* is certainly important since ADL fetches
|
||||
* large amount of data (Default is 4MB however can be configured through
|
||||
* core-site.xml) from the backend.
|
||||
* Observation is in case of ORC/Avro kind of compressed file, buffer block
|
||||
* does not avoid few backend calls across
|
||||
* webhdfs
|
||||
* instances.
|
||||
*/
|
||||
final class BufferManager {
|
||||
private static final BufferManager BUFFER_MANAGER_INSTANCE = new
|
||||
BufferManager();
|
||||
private static Object lock = new Object();
|
||||
private Buffer buffer = null;
|
||||
private String fileName;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*/
|
||||
private BufferManager() {
|
||||
}
|
||||
|
||||
public static Object getLock() {
|
||||
return lock;
|
||||
}
|
||||
|
||||
public static BufferManager getInstance() {
|
||||
return BUFFER_MANAGER_INSTANCE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate if the current buffer block is of given stream.
|
||||
*
|
||||
* @param path ADL stream path
|
||||
* @param offset Stream offset that caller is interested in
|
||||
* @return True if the buffer block is available otherwise false
|
||||
*/
|
||||
boolean hasValidDataForOffset(String path, long offset) {
|
||||
if (this.fileName == null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!this.fileName.equals(path)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (buffer == null) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((offset < buffer.offset) || (offset >= (buffer.offset
|
||||
+ buffer.data.length))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean buffer block.
|
||||
*/
|
||||
void clear() {
|
||||
buffer = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Validate if the current buffer block is of given stream. For now partial
|
||||
* data available is not supported.
|
||||
* Data must be available exactly or within the range of offset and size
|
||||
* passed as parameter.
|
||||
*
|
||||
* @param path Stream path
|
||||
* @param offset Offset of the stream
|
||||
* @param size Size of the data from the offset of the stream caller
|
||||
* interested in
|
||||
* @return True if the data is available from the given offset and of the
|
||||
* size caller is interested in.
|
||||
*/
|
||||
boolean hasData(String path, long offset, int size) {
|
||||
|
||||
if (!hasValidDataForOffset(path, offset)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((size + offset) > (buffer.data.length + buffer.offset)) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the buffer block from the requested offset. It is caller
|
||||
* responsibility to check if the buffer block is
|
||||
* of there interest and offset is valid.
|
||||
*
|
||||
* @param data Byte array to be filed from the buffer block
|
||||
* @param offset Data to be fetched from the offset.
|
||||
*/
|
||||
void get(byte[] data, long offset) {
|
||||
System.arraycopy(buffer.data, (int) (offset - buffer.offset), data, 0,
|
||||
data.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create new empty buffer block of the given size.
|
||||
*
|
||||
* @param len Size of the buffer block.
|
||||
* @return Empty byte array.
|
||||
*/
|
||||
byte[] getEmpty(int len) {
|
||||
return new byte[len];
|
||||
}
|
||||
|
||||
/**
|
||||
* This function allows caller to specify new buffer block for the stream
|
||||
* which is pulled from the backend.
|
||||
*
|
||||
* @param data Buffer
|
||||
* @param path Stream path to which buffer belongs to
|
||||
* @param offset Stream offset where buffer start with
|
||||
*/
|
||||
void add(byte[] data, String path, long offset) {
|
||||
if (data == null) {
|
||||
return;
|
||||
}
|
||||
|
||||
buffer = new Buffer();
|
||||
buffer.data = data;
|
||||
buffer.offset = offset;
|
||||
this.fileName = path;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Size of the buffer.
|
||||
*/
|
||||
int getBufferSize() {
|
||||
return buffer.data.length;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return Stream offset where buffer start with
|
||||
*/
|
||||
long getBufferOffset() {
|
||||
return buffer.offset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Buffer container.
|
||||
*/
|
||||
static class Buffer {
|
||||
private byte[] data;
|
||||
private long offset;
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,37 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hdfs.web.oauth2;
|
||||
|
||||
import org.apache.hadoop.classification.InterfaceAudience.Private;
|
||||
import org.apache.hadoop.classification.InterfaceStability.Unstable;
|
||||
|
||||
/**
|
||||
* Exposing AccessTokenProvider publicly to extend in com.microsoft.azure
|
||||
* .datalake package. Extended version to cache
|
||||
* token for the process to gain performance gain.
|
||||
*/
|
||||
@Private
|
||||
@Unstable
|
||||
public abstract class PrivateCachedRefreshTokenBasedAccessTokenProvider
|
||||
extends AccessTokenProvider {
|
||||
|
||||
// visibility workaround
|
||||
|
||||
}
|
|
@ -0,0 +1,24 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* A distributed implementation of {@link
|
||||
* org.apache.hadoop.hdfs.web.oauth2} for oauth2 token management support.
|
||||
*/
|
||||
package org.apache.hadoop.hdfs.web.oauth2;
|
|
@ -0,0 +1,25 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* A distributed implementation of {@link org.apache.hadoop.hdfs.web} for
|
||||
* reading and writing files on Azure data lake file system. This
|
||||
* implementation is derivation from the webhdfs specification.
|
||||
*/
|
||||
package org.apache.hadoop.hdfs.web;
|
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hdfs.web.resources;
|
||||
|
||||
/**
|
||||
* Query parameter to notify backend server that the all the data has been
|
||||
* pushed to over the stream.
|
||||
*
|
||||
* Used in operation code Create and Append.
|
||||
*/
|
||||
public class ADLFlush extends BooleanParam {
|
||||
/**
|
||||
* Parameter name.
|
||||
*/
|
||||
public static final String NAME = "flush";
|
||||
|
||||
private static final Domain DOMAIN = new Domain(NAME);
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param value the parameter value.
|
||||
*/
|
||||
public ADLFlush(final Boolean value) {
|
||||
super(DOMAIN, value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final String getName() {
|
||||
return NAME;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,96 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hdfs.web.resources;
|
||||
|
||||
import java.net.HttpURLConnection;
|
||||
|
||||
/**
|
||||
* Extended Webhdfs GetOpParam to avoid redirect operation for azure data
|
||||
* lake storage.
|
||||
*/
|
||||
public class ADLGetOpParam extends HttpOpParam<ADLGetOpParam.Op> {
|
||||
private static final Domain<Op> DOMAIN = new Domain<Op>(NAME, Op.class);
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param str a string representation of the parameter value.
|
||||
*/
|
||||
public ADLGetOpParam(final String str) {
|
||||
super(DOMAIN, DOMAIN.parse(str));
|
||||
}
|
||||
|
||||
@Override
|
||||
public final String getName() {
|
||||
return NAME;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get operations.
|
||||
*/
|
||||
public static enum Op implements HttpOpParam.Op {
|
||||
OPEN(false, HttpURLConnection.HTTP_OK);
|
||||
|
||||
private final boolean redirect;
|
||||
private final int expectedHttpResponseCode;
|
||||
private final boolean requireAuth;
|
||||
|
||||
Op(final boolean doRedirect, final int expectHttpResponseCode) {
|
||||
this(doRedirect, expectHttpResponseCode, false);
|
||||
}
|
||||
|
||||
Op(final boolean doRedirect, final int expectHttpResponseCode,
|
||||
final boolean doRequireAuth) {
|
||||
this.redirect = doRedirect;
|
||||
this.expectedHttpResponseCode = expectHttpResponseCode;
|
||||
this.requireAuth = doRequireAuth;
|
||||
}
|
||||
|
||||
@Override
|
||||
public HttpOpParam.Type getType() {
|
||||
return HttpOpParam.Type.GET;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean getRequireAuth() {
|
||||
return requireAuth;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean getDoOutput() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean getRedirect() {
|
||||
return redirect;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getExpectedHttpResponseCode() {
|
||||
return expectedHttpResponseCode;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toQueryString() {
|
||||
return NAME + "=" + this;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,97 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hdfs.web.resources;
|
||||
|
||||
import java.net.HttpURLConnection;
|
||||
|
||||
/**
|
||||
* Extended Webhdfs PostOpParam to avoid redirect during append operation for
|
||||
* azure data lake storage.
|
||||
*/
|
||||
|
||||
public class ADLPostOpParam extends HttpOpParam<ADLPostOpParam.Op> {
|
||||
private static final Domain<Op> DOMAIN = new Domain<ADLPostOpParam.Op>(NAME,
|
||||
Op.class);
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param str a string representation of the parameter value.
|
||||
*/
|
||||
public ADLPostOpParam(final String str) {
|
||||
super(DOMAIN, DOMAIN.parse(str));
|
||||
}
|
||||
|
||||
@Override
|
||||
public final String getName() {
|
||||
return NAME;
|
||||
}
|
||||
|
||||
/**
|
||||
* Post operations.
|
||||
*/
|
||||
public static enum Op implements HttpOpParam.Op {
|
||||
APPEND(true, false, HttpURLConnection.HTTP_OK);
|
||||
|
||||
private final boolean redirect;
|
||||
private final boolean doOutput;
|
||||
private final int expectedHttpResponseCode;
|
||||
|
||||
Op(final boolean doOut, final boolean doRedirect,
|
||||
final int expectHttpResponseCode) {
|
||||
this.doOutput = doOut;
|
||||
this.redirect = doRedirect;
|
||||
this.expectedHttpResponseCode = expectHttpResponseCode;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Type getType() {
|
||||
return Type.POST;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean getRequireAuth() {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean getDoOutput() {
|
||||
return doOutput;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean getRedirect() {
|
||||
return redirect;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getExpectedHttpResponseCode() {
|
||||
return expectedHttpResponseCode;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return a URI query string.
|
||||
*/
|
||||
@Override
|
||||
public String toQueryString() {
|
||||
return NAME + "=" + this;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,94 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hdfs.web.resources;
|
||||
|
||||
import java.net.HttpURLConnection;
|
||||
|
||||
/**
|
||||
* Extended Webhdfs PutOpParam to avoid redirect during Create operation for
|
||||
* azure data lake storage.
|
||||
*/
|
||||
public class ADLPutOpParam extends HttpOpParam<ADLPutOpParam.Op> {
|
||||
private static final Domain<Op> DOMAIN = new Domain<Op>(NAME, Op.class);
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param str a string representation of the parameter value.
|
||||
*/
|
||||
public ADLPutOpParam(final String str) {
|
||||
super(DOMAIN, DOMAIN.parse(str));
|
||||
}
|
||||
|
||||
@Override
|
||||
public final String getName() {
|
||||
return NAME;
|
||||
}
|
||||
|
||||
/**
|
||||
* Put operations.
|
||||
*/
|
||||
public static enum Op implements HttpOpParam.Op {
|
||||
CREATE(true, false, HttpURLConnection.HTTP_CREATED);
|
||||
|
||||
private final boolean redirect;
|
||||
private final boolean doOutput;
|
||||
private final int expectedHttpResponseCode;
|
||||
private final boolean requireAuth;
|
||||
|
||||
Op(final boolean doOut, final boolean doRedirect,
|
||||
final int expectHttpResponseCode) {
|
||||
this.doOutput = doOut;
|
||||
this.redirect = doRedirect;
|
||||
this.expectedHttpResponseCode = expectHttpResponseCode;
|
||||
this.requireAuth = false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public HttpOpParam.Type getType() {
|
||||
return HttpOpParam.Type.PUT;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean getRequireAuth() {
|
||||
return requireAuth;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean getDoOutput() {
|
||||
return doOutput;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean getRedirect() {
|
||||
return redirect;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getExpectedHttpResponseCode() {
|
||||
return expectedHttpResponseCode;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toQueryString() {
|
||||
return NAME + "=" + this;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hdfs.web.resources;
|
||||
|
||||
import org.apache.hadoop.hdfs.web.ADLConfKeys;
|
||||
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* Capture ADL Jar version information. Require for debugging and analysis
|
||||
* purpose in the backend.
|
||||
*/
|
||||
public class ADLVersionInfo extends StringParam {
|
||||
/**
|
||||
* Parameter name.
|
||||
*/
|
||||
public static final String NAME = ADLConfKeys.ADL_WEBSDK_VERSION_KEY;
|
||||
|
||||
private static final StringParam.Domain DOMAIN = new StringParam.Domain(NAME,
|
||||
Pattern.compile(".+"));
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
* @param featureSetVersion Enabled featured information
|
||||
*/
|
||||
public ADLVersionInfo(String featureSetVersion) {
|
||||
super(DOMAIN, featureSetVersion);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final String getName() {
|
||||
return NAME;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*/
|
||||
package org.apache.hadoop.hdfs.web.resources;
|
||||
|
||||
/**
|
||||
* Overwrite parameter.
|
||||
*/
|
||||
public class AppendADLNoRedirectParam extends BooleanParam {
|
||||
/**
|
||||
* Parameter name.
|
||||
*/
|
||||
public static final String NAME = "append";
|
||||
|
||||
private static final Domain DOMAIN = new Domain(NAME);
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param value the parameter value.
|
||||
*/
|
||||
public AppendADLNoRedirectParam(final Boolean value) {
|
||||
super(DOMAIN, value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final String getName() {
|
||||
return NAME;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hdfs.web.resources;
|
||||
|
||||
/**
|
||||
* Overwrite parameter.
|
||||
*/
|
||||
public class CreateADLNoRedirectParam extends BooleanParam {
|
||||
/**
|
||||
* Parameter name.
|
||||
*/
|
||||
public static final String NAME = "write";
|
||||
|
||||
private static final Domain DOMAIN = new Domain(NAME);
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param value the parameter value.
|
||||
*/
|
||||
public CreateADLNoRedirectParam(final Boolean value) {
|
||||
super(DOMAIN, value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final String getName() {
|
||||
return NAME;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,53 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*/
|
||||
|
||||
package org.apache.hadoop.hdfs.web.resources;
|
||||
|
||||
/**
|
||||
* To support single writer semantics. Notify to ADL backend if the stream
|
||||
* needs to locked in order to protect
|
||||
* concurrent write operation on the same stream.
|
||||
*
|
||||
* Used in append operation.
|
||||
*/
|
||||
public class LeaseParam extends StringParam {
|
||||
|
||||
public static final String NAME = "leaseId";
|
||||
/**
|
||||
* Default parameter value.
|
||||
*/
|
||||
public static final String DEFAULT = NULL;
|
||||
|
||||
private static final StringParam.Domain DOMAIN = new StringParam.Domain(NAME,
|
||||
null);
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param str a string representation of the parameter value.
|
||||
*/
|
||||
public LeaseParam(final String str) {
|
||||
super(DOMAIN, str == null || str.equals(DEFAULT) ? null : str);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final String getName() {
|
||||
return NAME;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,44 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hdfs.web.resources;
|
||||
|
||||
/**
|
||||
* Overwrite parameter.
|
||||
*/
|
||||
public class ReadADLNoRedirectParam extends BooleanParam {
|
||||
/**
|
||||
* Parameter name.
|
||||
*/
|
||||
public static final String NAME = "read";
|
||||
|
||||
private static final Domain DOMAIN = new Domain(NAME);
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
*
|
||||
* @param value the parameter value.
|
||||
*/
|
||||
public ReadADLNoRedirectParam(final Boolean value) {
|
||||
super(DOMAIN, value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final String getName() {
|
||||
return NAME;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,27 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* A distributed implementation of {@link
|
||||
* org.apache.hadoop.hdfs.web.resources} for reading or extending query
|
||||
* parameter for webhdfs specification. ADL
|
||||
* specific
|
||||
* query parameter also goes in the same package.
|
||||
*/
|
||||
package org.apache.hadoop.hdfs.web.resources;
|
|
@ -0,0 +1,219 @@
|
|||
<!---
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License. See accompanying LICENSE file.
|
||||
-->
|
||||
|
||||
# Hadoop Azure Data Lake Support
|
||||
|
||||
* [Introduction](#Introduction)
|
||||
* [Features](#Features)
|
||||
* [Limitations](#Limitations)
|
||||
* [Usage](#Usage)
|
||||
* [Concepts](#Concepts)
|
||||
* [Webhdfs Compliance](#Webhdfs_Specification_Compliance)
|
||||
* [OAuth2 Support](#OAuth2_Support)
|
||||
* [Read Ahead Buffer Management](Read_Ahead_Buffer_Management)
|
||||
* [Configuring Credentials & FileSystem](#Configuring_Credentials)
|
||||
* [Accessing adl URLs](#Accessing_adl_URLs)
|
||||
* [Testing the hadoop-azure Module](#Testing_the_hadoop-azure_Module)
|
||||
|
||||
## <a name="Introduction" />Introduction
|
||||
|
||||
The hadoop-azure-datalake module provides support for integration with
|
||||
[Azure Data Lake Store]( https://azure.microsoft.com/en-in/documentation/services/data-lake-store/).
|
||||
The jar file is named azure-datalake-store.jar.
|
||||
|
||||
## <a name="Features" />Features
|
||||
|
||||
* Read and write data stored in an Azure Data Lake Storage account.
|
||||
* Partial support for [Webhdfs Specification 2.7.0](https://hadoop.apache.org/docs/r2.7.0/hadoop-project-dist/hadoop-hdfs/WebHDFS.html)
|
||||
* Reference file system paths using URLs using the `adl` scheme for Secure Webhdfs i.e. SSL
|
||||
encrypted access.
|
||||
* Can act as a source of data in a MapReduce job, or a sink.
|
||||
* Tested on both Linux and Windows.
|
||||
* Tested for scale.
|
||||
|
||||
## <a name="Limitations" />Limitations
|
||||
Partial or no support for the following operations in [Webhdfs Specification 2.7.0](https://hadoop.apache.org/docs/r2.7.0/hadoop-project-dist/hadoop-hdfs/WebHDFS.html):
|
||||
|
||||
* Operation on Symbolic Link
|
||||
* Proxy Users
|
||||
* File Truncate
|
||||
* File Checksum
|
||||
* File replication factor
|
||||
* Home Directory Partial supported based on OAuth2 token information and not the active user on Hadoop cluster.
|
||||
* Extended Attributes(XAttrs) Operations
|
||||
* Snapshot Operations
|
||||
* Delegation Token Operations
|
||||
* User and group information returned as ListStatus and GetFileStatus is in form of GUID associated in Azure Active Directory.
|
||||
|
||||
## <a name="Usage" />Usage
|
||||
|
||||
### <a name="Concepts" />Concepts
|
||||
Azure Data Lake Storage access path syntax is
|
||||
|
||||
adl://<Account Name>.azuredatalakestore.net/
|
||||
|
||||
Get started with azure data lake account with [https://azure.microsoft.com/en-in/documentation/articles/data-lake-store-get-started-portal/](https://azure.microsoft.com/en-in/documentation/articles/data-lake-store-get-started-portal/)
|
||||
|
||||
#### <a name="Webhdfs_Specification_Compliance" />Webhdfs Compliance
|
||||
Azure Data Lake Storage exposes a public REST endpoint as per [Webhdfs Specification 2.7.0](https://hadoop.apache.org/docs/r2.7.0/hadoop-project-dist/hadoop-hdfs/WebHDFS.html) to access storage file system.
|
||||
|
||||
Syntax to access Azure data lake storage account over [Webhdfs Specification 2.7.0](https://hadoop.apache.org/docs/r2.7.0/hadoop-project-dist/hadoop-hdfs/WebHDFS.html) is
|
||||
|
||||
https://<Account Name>.azuredatalakestore.net/webhdfs/v1/<File System Path>?<Query paramaters>
|
||||
|
||||
|
||||
#### <a name="#OAuth2_Support" />OAuth2 Support
|
||||
Usage of Azure Data Lake Storage requires OAuth2 bearer token to be present as part of the HTTPS header as per OAuth2 specification. Valid OAuth2 bearer token should be obtained from Azure Active Directory for valid users who have access to Azure Data Lake Storage Account.
|
||||
|
||||
Azure Active Directory (Azure AD) is Microsoft’s multi-tenant cloud based directory and identity management service. See [https://azure.microsoft.com/en-in/documentation/articles/active-directory-whatis/](https://azure.microsoft.com/en-in/documentation/articles/active-directory-whatis/)
|
||||
|
||||
Following sections describes on OAuth2 configuration in core-site.xml.
|
||||
|
||||
#### <a name="#Read_Ahead_Buffer_Management" />Read Ahead Buffer Management
|
||||
Azure Data Lake Storage offers high throughput. To maximize throughput, applications can use this feature to buffer data concurrently, in memory during read operation. This data is cached in memory per process per stream.
|
||||
|
||||
|
||||
To Enable/Disable read ahead feature.
|
||||
|
||||
<property>
|
||||
<name>adl.feature.override.readahead</name>
|
||||
<value>true</value>
|
||||
<description>
|
||||
Enables read aheads in the ADL client, the feature is used to improve read throughput.
|
||||
This works in conjunction with the value set in adl.feature.override.readahead.max.buffersize.
|
||||
When set to false the read ahead feature is turned off.
|
||||
Default : True if not configured.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
To configure read ahead buffer size.
|
||||
|
||||
<property>
|
||||
<name>adl.feature.override.readahead.max.buffersize</name>
|
||||
<value>8388608</value>
|
||||
<description>
|
||||
Define maximum buffer size to cache read ahead data, this is allocated per process to
|
||||
cache read ahead data. Applicable only when adl.feature.override.readahead is set to true.
|
||||
Default : 8388608 Byte i.e. 8MB if not configured.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
To configure number of concurrent connection to Azure Data Lake Storage Account.
|
||||
|
||||
<property>
|
||||
<name>adl.feature.override.readahead.max.concurrent.connection</name>
|
||||
<value>2</value>
|
||||
<description>
|
||||
Define maximum concurrent connection can be established to
|
||||
read ahead. If the data size is<4MB then only 1 read n/w connection
|
||||
is set. If the data size is >4MB but<8MB then 2 read n/w
|
||||
connection
|
||||
is set. Data >8MB then value set under the property would
|
||||
take
|
||||
effect. Applicable only when adl.feature.override.readahead is set
|
||||
to true and buffer size is >8MB.
|
||||
It is recommended to reset this property if the adl.feature.override.readahead.max.buffersize
|
||||
is < 8MB to gain performance. Application has to consider
|
||||
throttling
|
||||
limit for the account as well before configuring large buffer size.
|
||||
</description>
|
||||
</property>
|
||||
|
||||
## <a name="Configuring_Credentials" />Configuring Credentials & FileSystem
|
||||
|
||||
Update core-site.xml for OAuth2 configuration
|
||||
|
||||
<property>
|
||||
<name>dfs.webhdfs.oauth2.refresh.token.expires.ms.since.epoch</name>
|
||||
<value>0</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.webhdfs.oauth2.credential</name>
|
||||
<value>bearer.and.refresh.token</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.webhdfs.oauth2.access.token</name>
|
||||
<value>NOT_SET</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.webhdfs.oauth2.refresh.url</name>
|
||||
<value>https://login.windows.net/common/oauth2/token/</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.webhdfs.oauth2.access.token.provider</name>
|
||||
<value>org.apache.hadoop.fs.adl.oauth2.CachedRefreshTokenBasedAccessTokenProvider</value>
|
||||
</property>
|
||||
|
||||
Application require to set Client id and OAuth2 refresh token from Azure Active Directory associated with client id. See [https://github.com/AzureAD/azure-activedirectory-library-for-java](https://github.com/AzureAD/azure-activedirectory-library-for-java).
|
||||
|
||||
**Do not share client id and refresh token, it must be kept secret.**
|
||||
|
||||
<property>
|
||||
<name>dfs.webhdfs.oauth2.client.id</name>
|
||||
<value></value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.webhdfs.oauth2.refresh.token</name>
|
||||
<value></value>
|
||||
</property>
|
||||
|
||||
For ADL FileSystem to take effect. Update core-site.xml with
|
||||
|
||||
<property>
|
||||
<name>fs.adl.impl</name>
|
||||
<value>org.apache.hadoop.fs.adl.AdlFileSystem</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.AbstractFileSystem.adl.impl</name>
|
||||
<value>org.apache.hadoop.fs.adl.Adl</value>
|
||||
</property>
|
||||
|
||||
|
||||
### <a name="Accessing_adl_URLs" />Accessing adl URLs
|
||||
|
||||
After credentials are configured in core-site.xml, any Hadoop component may
|
||||
reference files in that Azure Data Lake Storage account by using URLs of the following
|
||||
format:
|
||||
|
||||
adl://<Account Name>.azuredatalakestore.net/<path>
|
||||
|
||||
The schemes `adl` identify a URL on a file system backed by Azure
|
||||
Data Lake Storage. `adl` utilizes encrypted HTTPS access for all interaction with
|
||||
the Azure Data Lake Storage API.
|
||||
|
||||
For example, the following
|
||||
[FileSystem Shell](../hadoop-project-dist/hadoop-common/FileSystemShell.html)
|
||||
commands demonstrate access to a storage account named `youraccount`.
|
||||
|
||||
> hadoop fs -mkdir adl://yourcontainer.azuredatalakestore.net/testDir
|
||||
|
||||
> hadoop fs -put testFile adl://yourcontainer.azuredatalakestore.net/testDir/testFile
|
||||
|
||||
> hadoop fs -cat adl://yourcontainer.azuredatalakestore.net/testDir/testFile
|
||||
test file content
|
||||
## <a name="Testing_the_hadoop-azure_Module" />Testing the azure-datalake-store Module
|
||||
The hadoop-azure module includes a full suite of unit tests. Most of the tests will run without additional configuration by running mvn test. This includes tests against mocked storage, which is an in-memory emulation of Azure Data Lake Storage.
|
||||
|
||||
A selection of tests can run against the Azure Data Lake Storage. To run tests against Adl storage. Please configure contract-test-options.xml with Adl account information mentioned in the above sections. Also turn on contract test execution flag to trigger tests against Azure Data Lake Storage.
|
||||
|
||||
<property>
|
||||
<name>dfs.adl.test.contract.enable</name>
|
||||
<value>true</value>
|
||||
</property>
|
|
@ -0,0 +1,147 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*/
|
||||
package org.apache.hadoop.fs.adl.oauth2;
|
||||
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hdfs.web.oauth2.AccessTokenProvider;
|
||||
|
||||
import org.junit.Before;
|
||||
import org.junit.Rule;
|
||||
import org.junit.Test;
|
||||
import org.junit.rules.TestName;
|
||||
import static org.junit.Assert.*;
|
||||
import static org.mockito.Mockito.*;
|
||||
|
||||
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.OAUTH_CLIENT_ID_KEY;
|
||||
import static org.apache.hadoop.hdfs.client.HdfsClientConfigKeys.OAUTH_REFRESH_URL_KEY;
|
||||
import static org.apache.hadoop.hdfs.web.oauth2.ConfRefreshTokenBasedAccessTokenProvider.OAUTH_REFRESH_TOKEN_KEY;
|
||||
|
||||
/**
|
||||
* Verify cache behavior of ConfRefreshTokenBasedAccessTokenProvider instances.
|
||||
*/
|
||||
public class TestCachedRefreshTokenBasedAccessTokenProvider {
|
||||
|
||||
private Configuration conf;
|
||||
|
||||
@Rule public TestName name = new TestName();
|
||||
String clientId(int id) {
|
||||
return name.getMethodName() + "_clientID" + id;
|
||||
}
|
||||
|
||||
@Before
|
||||
public void initConfig() {
|
||||
conf = new Configuration(false);
|
||||
conf.set(OAUTH_CLIENT_ID_KEY, clientId(0));
|
||||
conf.set(OAUTH_REFRESH_TOKEN_KEY, "01234567890abcdef");
|
||||
conf.set(OAUTH_REFRESH_URL_KEY, "http://dingo.invalid:80");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCacheInstance() throws Exception {
|
||||
final AccessTokenProvider inst0 = mock(AccessTokenProvider.class);
|
||||
when(inst0.getConf()).thenReturn(conf);
|
||||
|
||||
// verify config
|
||||
CachedRefreshTokenBasedAccessTokenProvider t1 = new MockProvider(inst0);
|
||||
t1.setConf(conf);
|
||||
verify(inst0).setConf(any(Configuration.class)); // cloned, not exact match
|
||||
|
||||
// verify cache hit
|
||||
CachedRefreshTokenBasedAccessTokenProvider t2 =
|
||||
new CachedRefreshTokenBasedAccessTokenProvider() {
|
||||
@Override
|
||||
AccessTokenProvider newInstance() {
|
||||
fail("Failed to return cached instance");
|
||||
return null;
|
||||
}
|
||||
};
|
||||
t2.setConf(conf);
|
||||
|
||||
// verify force refresh
|
||||
conf.setBoolean(
|
||||
CachedRefreshTokenBasedAccessTokenProvider.FORCE_REFRESH, true);
|
||||
final AccessTokenProvider inst1 = mock(AccessTokenProvider.class);
|
||||
when(inst1.getConf()).thenReturn(conf);
|
||||
CachedRefreshTokenBasedAccessTokenProvider t3 = new MockProvider(inst1);
|
||||
t3.setConf(conf);
|
||||
verify(inst1).setConf(any(Configuration.class));
|
||||
|
||||
// verify cache miss
|
||||
conf.set(OAUTH_REFRESH_URL_KEY, "http://yak.invalid:80");
|
||||
final AccessTokenProvider inst2 = mock(AccessTokenProvider.class);
|
||||
when(inst2.getConf()).thenReturn(conf);
|
||||
CachedRefreshTokenBasedAccessTokenProvider t4 = new MockProvider(inst2);
|
||||
t4.setConf(conf);
|
||||
verify(inst2).setConf(any(Configuration.class));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCacheLimit() throws Exception {
|
||||
final int iter = CachedRefreshTokenBasedAccessTokenProvider.MAX_PROVIDERS;
|
||||
for (int i = 0; i < iter; ++i) {
|
||||
conf.set(OAUTH_CLIENT_ID_KEY, clientId(i));
|
||||
AccessTokenProvider inst = mock(AccessTokenProvider.class);
|
||||
when(inst.getConf()).thenReturn(conf);
|
||||
CachedRefreshTokenBasedAccessTokenProvider t = new MockProvider(inst);
|
||||
t.setConf(conf);
|
||||
verify(inst).setConf(any(Configuration.class));
|
||||
}
|
||||
// verify cache hit
|
||||
for (int i = 0; i < iter; ++i) {
|
||||
conf.set(OAUTH_CLIENT_ID_KEY, clientId(i));
|
||||
CachedRefreshTokenBasedAccessTokenProvider t =
|
||||
new CachedRefreshTokenBasedAccessTokenProvider() {
|
||||
@Override
|
||||
AccessTokenProvider newInstance() {
|
||||
fail("Failed to return cached instance");
|
||||
return null;
|
||||
}
|
||||
};
|
||||
t.setConf(conf);
|
||||
}
|
||||
|
||||
// verify miss, evict 0
|
||||
conf.set(OAUTH_CLIENT_ID_KEY, clientId(iter));
|
||||
final AccessTokenProvider inst = mock(AccessTokenProvider.class);
|
||||
when(inst.getConf()).thenReturn(conf);
|
||||
CachedRefreshTokenBasedAccessTokenProvider t = new MockProvider(inst);
|
||||
t.setConf(conf);
|
||||
verify(inst).setConf(any(Configuration.class));
|
||||
|
||||
// verify miss
|
||||
conf.set(OAUTH_CLIENT_ID_KEY, clientId(0));
|
||||
final AccessTokenProvider inst0 = mock(AccessTokenProvider.class);
|
||||
when(inst0.getConf()).thenReturn(conf);
|
||||
CachedRefreshTokenBasedAccessTokenProvider t0 = new MockProvider(inst0);
|
||||
t0.setConf(conf);
|
||||
verify(inst0).setConf(any(Configuration.class));
|
||||
}
|
||||
|
||||
static class MockProvider extends CachedRefreshTokenBasedAccessTokenProvider {
|
||||
private final AccessTokenProvider inst;
|
||||
MockProvider(AccessTokenProvider inst) {
|
||||
this.inst = inst;
|
||||
}
|
||||
@Override
|
||||
AccessTokenProvider newInstance() {
|
||||
return inst;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -105,6 +105,12 @@
|
|||
<artifactId>hadoop-sls</artifactId>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-azure-datalake</artifactId>
|
||||
<scope>compile</scope>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
|
|
|
@ -46,6 +46,7 @@
|
|||
<module>hadoop-azure</module>
|
||||
<module>hadoop-aws</module>
|
||||
<module>hadoop-kafka</module>
|
||||
<module>hadoop-azure-datalake</module>
|
||||
</modules>
|
||||
|
||||
<build>
|
||||
|
|
Loading…
Reference in New Issue