From 51d16e7b38d247f73b0ec2ffd8b2b02069c05a33 Mon Sep 17 00:00:00 2001 From: Chris Nauroth Date: Thu, 16 Jun 2016 23:35:20 -0700 Subject: [PATCH] HADOOP-13242. Authenticate to Azure Data Lake using client ID and keys. Contributed by Atul Sikaria. --- hadoop-tools/hadoop-azure-datalake/pom.xml | 7 +- ...ientCredentialBasedAccesTokenProvider.java | 155 ++++++++++++++++++ .../src/site/markdown/index.md | 64 ++++++++ 3 files changed, 225 insertions(+), 1 deletion(-) create mode 100644 hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/hdfs/web/oauth2/AzureADClientCredentialBasedAccesTokenProvider.java diff --git a/hadoop-tools/hadoop-azure-datalake/pom.xml b/hadoop-tools/hadoop-azure-datalake/pom.xml index a4b1fe1fca5..d2161c73c7d 100644 --- a/hadoop-tools/hadoop-azure-datalake/pom.xml +++ b/hadoop-tools/hadoop-azure-datalake/pom.xml @@ -147,7 +147,12 @@ org.apache.hadoop hadoop-common - + + + com.squareup.okhttp + okhttp + 2.4.0 + junit junit diff --git a/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/hdfs/web/oauth2/AzureADClientCredentialBasedAccesTokenProvider.java b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/hdfs/web/oauth2/AzureADClientCredentialBasedAccesTokenProvider.java new file mode 100644 index 00000000000..6dfc593feaa --- /dev/null +++ b/hadoop-tools/hadoop-azure-datalake/src/main/java/org/apache/hadoop/hdfs/web/oauth2/AzureADClientCredentialBasedAccesTokenProvider.java @@ -0,0 +1,155 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hdfs.web.oauth2; + +import com.squareup.okhttp.OkHttpClient; +import com.squareup.okhttp.Request; +import com.squareup.okhttp.RequestBody; +import com.squareup.okhttp.Response; +import com.squareup.okhttp.MediaType; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hdfs.web.URLConnectionFactory; +import org.apache.hadoop.util.Timer; +import org.apache.http.HttpStatus; +import org.codehaus.jackson.map.ObjectMapper; +import org.codehaus.jackson.map.ObjectReader; + +import java.io.IOException; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +import static org.apache.hadoop.hdfs.web.oauth2.Utils.notNull; + + +/** + * Obtain an access token via the credential-based OAuth2 workflow. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public class AzureADClientCredentialBasedAccesTokenProvider + extends AccessTokenProvider { + private static final ObjectReader READER = + new ObjectMapper().reader(Map.class); + + public static final String OAUTH_CREDENTIAL_KEY + = "dfs.webhdfs.oauth2.credential"; + + public static final String AAD_RESOURCE_KEY + = "fs.adls.oauth2.resource"; + + public static final String RESOURCE_PARAM_NAME + = "resource"; + + private static final String OAUTH_CLIENT_ID_KEY + = "dfs.webhdfs.oauth2.client.id"; + + private static final String OAUTH_REFRESH_URL_KEY + = "dfs.webhdfs.oauth2.refresh.url"; + + + public static final String ACCESS_TOKEN = "access_token"; + public static final String CLIENT_CREDENTIALS = "client_credentials"; + public static final String CLIENT_ID = "client_id"; + public static final String CLIENT_SECRET = "client_secret"; + public static final String EXPIRES_IN = "expires_in"; + public static final String GRANT_TYPE = "grant_type"; + public static final MediaType URLENCODED + = MediaType.parse("application/x-www-form-urlencoded; charset=utf-8"); + + + private AccessTokenTimer timer; + + private String clientId; + + private String refreshURL; + + private String accessToken; + + private String resource; + + private String credential; + + private boolean initialCredentialObtained = false; + + AzureADClientCredentialBasedAccesTokenProvider() { + this.timer = new AccessTokenTimer(); + } + + AzureADClientCredentialBasedAccesTokenProvider(Timer timer) { + this.timer = new AccessTokenTimer(timer); + } + + @Override + public void setConf(Configuration conf) { + super.setConf(conf); + clientId = notNull(conf, OAUTH_CLIENT_ID_KEY); + refreshURL = notNull(conf, OAUTH_REFRESH_URL_KEY); + resource = notNull(conf, AAD_RESOURCE_KEY); + credential = notNull(conf, OAUTH_CREDENTIAL_KEY); + } + + @Override + public String getAccessToken() throws IOException { + if(timer.shouldRefresh() || !initialCredentialObtained) { + refresh(); + initialCredentialObtained = true; + } + return accessToken; + } + + void refresh() throws IOException { + try { + OkHttpClient client = new OkHttpClient(); + client.setConnectTimeout(URLConnectionFactory.DEFAULT_SOCKET_TIMEOUT, + TimeUnit.MILLISECONDS); + client.setReadTimeout(URLConnectionFactory.DEFAULT_SOCKET_TIMEOUT, + TimeUnit.MILLISECONDS); + + String bodyString = Utils.postBody(CLIENT_SECRET, credential, + GRANT_TYPE, CLIENT_CREDENTIALS, + RESOURCE_PARAM_NAME, resource, + CLIENT_ID, clientId); + + RequestBody body = RequestBody.create(URLENCODED, bodyString); + + Request request = new Request.Builder() + .url(refreshURL) + .post(body) + .build(); + Response responseBody = client.newCall(request).execute(); + + if (responseBody.code() != HttpStatus.SC_OK) { + throw new IllegalArgumentException("Received invalid http response: " + + responseBody.code() + ", text = " + responseBody.toString()); + } + + Map response = READER.readValue(responseBody.body().string()); + + String newExpiresIn = response.get(EXPIRES_IN).toString(); + timer.setExpiresIn(newExpiresIn); + + accessToken = response.get(ACCESS_TOKEN).toString(); + + } catch (Exception e) { + throw new IOException("Unable to obtain access token from credential", e); + } + } +} diff --git a/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md b/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md index 4158c88aff7..3f03d41f583 100644 --- a/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md +++ b/hadoop-tools/hadoop-azure-datalake/src/site/markdown/index.md @@ -23,6 +23,9 @@ * [OAuth2 Support](#OAuth2_Support) * [Read Ahead Buffer Management](Read_Ahead_Buffer_Management) * [Configuring Credentials & FileSystem](#Configuring_Credentials) + * [Using Refresh Token](#Refresh_Token) + * [Using Client Keys](#Client_Credential_Token) + * [Enabling ADL Filesystem](#Enabling_ADL) * [Accessing adl URLs](#Accessing_adl_URLs) * [Testing the hadoop-azure Module](#Testing_the_hadoop-azure_Module) @@ -131,6 +134,9 @@ To configure number of concurrent connection to Azure Data Lake Storage Account. ## Configuring Credentials & FileSystem +Credentials can be configured using either a refresh token (associated with a user) or a client credential (analogous to a service principal). + +### Using Refresh Token Update core-site.xml for OAuth2 configuration @@ -173,6 +179,64 @@ Application require to set Client id and OAuth2 refresh token from Azure Active + +### Using Client Keys + +#### Generating the Service Principal +1. Go to the portal (https://portal.azure.com) +2. Under "Browse", look for Active Directory and click on it. +3. Create "Web Application". Remember the name you create here - that is what you will add to your ADL account as authorized user. +4. Go through the wizard +5. Once app is created, Go to app configuration, and find the section on "keys" +6. Select a key duration and hit save. Save the generated keys. +7. Note down the properties you will need to auth: + - The client ID + - The key you just generated above + - The token endpoint (select "View endpoints" at the bottom of the page and copy/paste the OAuth2 .0 Token Endpoint value) + - Resource: Always https://management.core.windows.net/ , for all customers + +#### Adding the service principal to your ADL Account +1. Go to the portal again, and open your ADL account +2. Select Users under Settings +3. Add your user name you created in Step 6 above (note that it does not show up in the list, but will be found if you searched for the name) +4. Add "Owner" role + +#### Configure core-site.xml +Add the following properties to your core-site.xml + + + dfs.webhdfs.oauth2.access.token.provider + org.apache.hadoop.hdfs.web.oauth2.AzureADClientCredentialBasedAccesTokenProvider + + + + dfs.webhdfs.oauth2.refresh.url + TOKEN ENDPOINT FROM STEP 7 ABOVE + + + + dfs.webhdfs.oauth2.client.id + CLIENT ID FROM STEP 7 ABOVE + + + + dfs.webhdfs.oauth2.credential + PASSWORD FROM STEP 7 ABOVE + + + + fs.adls.oauth2.resource + https://management.core.windows.net/ + + + + fs.defaultFS + YOUR ADL STORE URL (e.g., https://example.azuredatalakestore.net) + + + +## Enabling ADL Filesystem + For ADL FileSystem to take effect. Update core-site.xml with