HADOOP-13242. Authenticate to Azure Data Lake using client ID and keys. Contributed by Atul Sikaria.
This commit is contained in:
parent
51d497fa93
commit
51d16e7b38
|
@ -147,7 +147,12 @@
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.hadoop</groupId>
|
<groupId>org.apache.hadoop</groupId>
|
||||||
<artifactId>hadoop-common</artifactId>
|
<artifactId>hadoop-common</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>com.squareup.okhttp</groupId>
|
||||||
|
<artifactId>okhttp</artifactId>
|
||||||
|
<version>2.4.0</version>
|
||||||
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>junit</groupId>
|
<groupId>junit</groupId>
|
||||||
<artifactId>junit</artifactId>
|
<artifactId>junit</artifactId>
|
||||||
|
|
|
@ -0,0 +1,155 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.hadoop.hdfs.web.oauth2;
|
||||||
|
|
||||||
|
import com.squareup.okhttp.OkHttpClient;
|
||||||
|
import com.squareup.okhttp.Request;
|
||||||
|
import com.squareup.okhttp.RequestBody;
|
||||||
|
import com.squareup.okhttp.Response;
|
||||||
|
import com.squareup.okhttp.MediaType;
|
||||||
|
import org.apache.hadoop.classification.InterfaceAudience;
|
||||||
|
import org.apache.hadoop.classification.InterfaceStability;
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.hdfs.web.URLConnectionFactory;
|
||||||
|
import org.apache.hadoop.util.Timer;
|
||||||
|
import org.apache.http.HttpStatus;
|
||||||
|
import org.codehaus.jackson.map.ObjectMapper;
|
||||||
|
import org.codehaus.jackson.map.ObjectReader;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
|
import static org.apache.hadoop.hdfs.web.oauth2.Utils.notNull;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Obtain an access token via the credential-based OAuth2 workflow.
|
||||||
|
*/
|
||||||
|
@InterfaceAudience.Public
|
||||||
|
@InterfaceStability.Evolving
|
||||||
|
public class AzureADClientCredentialBasedAccesTokenProvider
|
||||||
|
extends AccessTokenProvider {
|
||||||
|
private static final ObjectReader READER =
|
||||||
|
new ObjectMapper().reader(Map.class);
|
||||||
|
|
||||||
|
public static final String OAUTH_CREDENTIAL_KEY
|
||||||
|
= "dfs.webhdfs.oauth2.credential";
|
||||||
|
|
||||||
|
public static final String AAD_RESOURCE_KEY
|
||||||
|
= "fs.adls.oauth2.resource";
|
||||||
|
|
||||||
|
public static final String RESOURCE_PARAM_NAME
|
||||||
|
= "resource";
|
||||||
|
|
||||||
|
private static final String OAUTH_CLIENT_ID_KEY
|
||||||
|
= "dfs.webhdfs.oauth2.client.id";
|
||||||
|
|
||||||
|
private static final String OAUTH_REFRESH_URL_KEY
|
||||||
|
= "dfs.webhdfs.oauth2.refresh.url";
|
||||||
|
|
||||||
|
|
||||||
|
public static final String ACCESS_TOKEN = "access_token";
|
||||||
|
public static final String CLIENT_CREDENTIALS = "client_credentials";
|
||||||
|
public static final String CLIENT_ID = "client_id";
|
||||||
|
public static final String CLIENT_SECRET = "client_secret";
|
||||||
|
public static final String EXPIRES_IN = "expires_in";
|
||||||
|
public static final String GRANT_TYPE = "grant_type";
|
||||||
|
public static final MediaType URLENCODED
|
||||||
|
= MediaType.parse("application/x-www-form-urlencoded; charset=utf-8");
|
||||||
|
|
||||||
|
|
||||||
|
private AccessTokenTimer timer;
|
||||||
|
|
||||||
|
private String clientId;
|
||||||
|
|
||||||
|
private String refreshURL;
|
||||||
|
|
||||||
|
private String accessToken;
|
||||||
|
|
||||||
|
private String resource;
|
||||||
|
|
||||||
|
private String credential;
|
||||||
|
|
||||||
|
private boolean initialCredentialObtained = false;
|
||||||
|
|
||||||
|
AzureADClientCredentialBasedAccesTokenProvider() {
|
||||||
|
this.timer = new AccessTokenTimer();
|
||||||
|
}
|
||||||
|
|
||||||
|
AzureADClientCredentialBasedAccesTokenProvider(Timer timer) {
|
||||||
|
this.timer = new AccessTokenTimer(timer);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setConf(Configuration conf) {
|
||||||
|
super.setConf(conf);
|
||||||
|
clientId = notNull(conf, OAUTH_CLIENT_ID_KEY);
|
||||||
|
refreshURL = notNull(conf, OAUTH_REFRESH_URL_KEY);
|
||||||
|
resource = notNull(conf, AAD_RESOURCE_KEY);
|
||||||
|
credential = notNull(conf, OAUTH_CREDENTIAL_KEY);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getAccessToken() throws IOException {
|
||||||
|
if(timer.shouldRefresh() || !initialCredentialObtained) {
|
||||||
|
refresh();
|
||||||
|
initialCredentialObtained = true;
|
||||||
|
}
|
||||||
|
return accessToken;
|
||||||
|
}
|
||||||
|
|
||||||
|
void refresh() throws IOException {
|
||||||
|
try {
|
||||||
|
OkHttpClient client = new OkHttpClient();
|
||||||
|
client.setConnectTimeout(URLConnectionFactory.DEFAULT_SOCKET_TIMEOUT,
|
||||||
|
TimeUnit.MILLISECONDS);
|
||||||
|
client.setReadTimeout(URLConnectionFactory.DEFAULT_SOCKET_TIMEOUT,
|
||||||
|
TimeUnit.MILLISECONDS);
|
||||||
|
|
||||||
|
String bodyString = Utils.postBody(CLIENT_SECRET, credential,
|
||||||
|
GRANT_TYPE, CLIENT_CREDENTIALS,
|
||||||
|
RESOURCE_PARAM_NAME, resource,
|
||||||
|
CLIENT_ID, clientId);
|
||||||
|
|
||||||
|
RequestBody body = RequestBody.create(URLENCODED, bodyString);
|
||||||
|
|
||||||
|
Request request = new Request.Builder()
|
||||||
|
.url(refreshURL)
|
||||||
|
.post(body)
|
||||||
|
.build();
|
||||||
|
Response responseBody = client.newCall(request).execute();
|
||||||
|
|
||||||
|
if (responseBody.code() != HttpStatus.SC_OK) {
|
||||||
|
throw new IllegalArgumentException("Received invalid http response: "
|
||||||
|
+ responseBody.code() + ", text = " + responseBody.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
Map<?, ?> response = READER.readValue(responseBody.body().string());
|
||||||
|
|
||||||
|
String newExpiresIn = response.get(EXPIRES_IN).toString();
|
||||||
|
timer.setExpiresIn(newExpiresIn);
|
||||||
|
|
||||||
|
accessToken = response.get(ACCESS_TOKEN).toString();
|
||||||
|
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new IOException("Unable to obtain access token from credential", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -23,6 +23,9 @@
|
||||||
* [OAuth2 Support](#OAuth2_Support)
|
* [OAuth2 Support](#OAuth2_Support)
|
||||||
* [Read Ahead Buffer Management](Read_Ahead_Buffer_Management)
|
* [Read Ahead Buffer Management](Read_Ahead_Buffer_Management)
|
||||||
* [Configuring Credentials & FileSystem](#Configuring_Credentials)
|
* [Configuring Credentials & FileSystem](#Configuring_Credentials)
|
||||||
|
* [Using Refresh Token](#Refresh_Token)
|
||||||
|
* [Using Client Keys](#Client_Credential_Token)
|
||||||
|
* [Enabling ADL Filesystem](#Enabling_ADL)
|
||||||
* [Accessing adl URLs](#Accessing_adl_URLs)
|
* [Accessing adl URLs](#Accessing_adl_URLs)
|
||||||
* [Testing the hadoop-azure Module](#Testing_the_hadoop-azure_Module)
|
* [Testing the hadoop-azure Module](#Testing_the_hadoop-azure_Module)
|
||||||
|
|
||||||
|
@ -131,6 +134,9 @@ To configure number of concurrent connection to Azure Data Lake Storage Account.
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
## <a name="Configuring_Credentials" />Configuring Credentials & FileSystem
|
## <a name="Configuring_Credentials" />Configuring Credentials & FileSystem
|
||||||
|
Credentials can be configured using either a refresh token (associated with a user) or a client credential (analogous to a service principal).
|
||||||
|
|
||||||
|
### <a name="Refresh_Token" />Using Refresh Token
|
||||||
|
|
||||||
Update core-site.xml for OAuth2 configuration
|
Update core-site.xml for OAuth2 configuration
|
||||||
|
|
||||||
|
@ -173,6 +179,64 @@ Application require to set Client id and OAuth2 refresh token from Azure Active
|
||||||
<value></value>
|
<value></value>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
|
|
||||||
|
### <a name="Client_Credential_Token" />Using Client Keys
|
||||||
|
|
||||||
|
#### Generating the Service Principal
|
||||||
|
1. Go to the portal (https://portal.azure.com)
|
||||||
|
2. Under "Browse", look for Active Directory and click on it.
|
||||||
|
3. Create "Web Application". Remember the name you create here - that is what you will add to your ADL account as authorized user.
|
||||||
|
4. Go through the wizard
|
||||||
|
5. Once app is created, Go to app configuration, and find the section on "keys"
|
||||||
|
6. Select a key duration and hit save. Save the generated keys.
|
||||||
|
7. Note down the properties you will need to auth:
|
||||||
|
- The client ID
|
||||||
|
- The key you just generated above
|
||||||
|
- The token endpoint (select "View endpoints" at the bottom of the page and copy/paste the OAuth2 .0 Token Endpoint value)
|
||||||
|
- Resource: Always https://management.core.windows.net/ , for all customers
|
||||||
|
|
||||||
|
#### Adding the service principal to your ADL Account
|
||||||
|
1. Go to the portal again, and open your ADL account
|
||||||
|
2. Select Users under Settings
|
||||||
|
3. Add your user name you created in Step 6 above (note that it does not show up in the list, but will be found if you searched for the name)
|
||||||
|
4. Add "Owner" role
|
||||||
|
|
||||||
|
#### Configure core-site.xml
|
||||||
|
Add the following properties to your core-site.xml
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>dfs.webhdfs.oauth2.access.token.provider</name>
|
||||||
|
<value>org.apache.hadoop.hdfs.web.oauth2.AzureADClientCredentialBasedAccesTokenProvider</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>dfs.webhdfs.oauth2.refresh.url</name>
|
||||||
|
<value>TOKEN ENDPOINT FROM STEP 7 ABOVE</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>dfs.webhdfs.oauth2.client.id</name>
|
||||||
|
<value>CLIENT ID FROM STEP 7 ABOVE</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>dfs.webhdfs.oauth2.credential</name>
|
||||||
|
<value>PASSWORD FROM STEP 7 ABOVE</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>fs.adls.oauth2.resource</name>
|
||||||
|
<value>https://management.core.windows.net/</value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
<property>
|
||||||
|
<name>fs.defaultFS</name>
|
||||||
|
<value>YOUR ADL STORE URL (e.g., https://example.azuredatalakestore.net) </value>
|
||||||
|
</property>
|
||||||
|
|
||||||
|
|
||||||
|
## <a name="Enabling_ADL" />Enabling ADL Filesystem
|
||||||
|
|
||||||
For ADL FileSystem to take effect. Update core-site.xml with
|
For ADL FileSystem to take effect. Update core-site.xml with
|
||||||
|
|
||||||
<property>
|
<property>
|
||||||
|
|
Loading…
Reference in New Issue