HADOOP-13242. Authenticate to Azure Data Lake using client ID and keys. Contributed by Atul Sikaria.

This commit is contained in:
Chris Nauroth 2016-06-16 23:35:20 -07:00
parent 51d497fa93
commit 51d16e7b38
3 changed files with 225 additions and 1 deletions

View File

@ -147,7 +147,12 @@
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
</dependency>
</dependency>
<dependency>
<groupId>com.squareup.okhttp</groupId>
<artifactId>okhttp</artifactId>
<version>2.4.0</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>

View File

@ -0,0 +1,155 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hadoop.hdfs.web.oauth2;
import com.squareup.okhttp.OkHttpClient;
import com.squareup.okhttp.Request;
import com.squareup.okhttp.RequestBody;
import com.squareup.okhttp.Response;
import com.squareup.okhttp.MediaType;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hdfs.web.URLConnectionFactory;
import org.apache.hadoop.util.Timer;
import org.apache.http.HttpStatus;
import org.codehaus.jackson.map.ObjectMapper;
import org.codehaus.jackson.map.ObjectReader;
import java.io.IOException;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import static org.apache.hadoop.hdfs.web.oauth2.Utils.notNull;
/**
* Obtain an access token via the credential-based OAuth2 workflow.
*/
@InterfaceAudience.Public
@InterfaceStability.Evolving
public class AzureADClientCredentialBasedAccesTokenProvider
extends AccessTokenProvider {
private static final ObjectReader READER =
new ObjectMapper().reader(Map.class);
public static final String OAUTH_CREDENTIAL_KEY
= "dfs.webhdfs.oauth2.credential";
public static final String AAD_RESOURCE_KEY
= "fs.adls.oauth2.resource";
public static final String RESOURCE_PARAM_NAME
= "resource";
private static final String OAUTH_CLIENT_ID_KEY
= "dfs.webhdfs.oauth2.client.id";
private static final String OAUTH_REFRESH_URL_KEY
= "dfs.webhdfs.oauth2.refresh.url";
public static final String ACCESS_TOKEN = "access_token";
public static final String CLIENT_CREDENTIALS = "client_credentials";
public static final String CLIENT_ID = "client_id";
public static final String CLIENT_SECRET = "client_secret";
public static final String EXPIRES_IN = "expires_in";
public static final String GRANT_TYPE = "grant_type";
public static final MediaType URLENCODED
= MediaType.parse("application/x-www-form-urlencoded; charset=utf-8");
private AccessTokenTimer timer;
private String clientId;
private String refreshURL;
private String accessToken;
private String resource;
private String credential;
private boolean initialCredentialObtained = false;
AzureADClientCredentialBasedAccesTokenProvider() {
this.timer = new AccessTokenTimer();
}
AzureADClientCredentialBasedAccesTokenProvider(Timer timer) {
this.timer = new AccessTokenTimer(timer);
}
@Override
public void setConf(Configuration conf) {
super.setConf(conf);
clientId = notNull(conf, OAUTH_CLIENT_ID_KEY);
refreshURL = notNull(conf, OAUTH_REFRESH_URL_KEY);
resource = notNull(conf, AAD_RESOURCE_KEY);
credential = notNull(conf, OAUTH_CREDENTIAL_KEY);
}
@Override
public String getAccessToken() throws IOException {
if(timer.shouldRefresh() || !initialCredentialObtained) {
refresh();
initialCredentialObtained = true;
}
return accessToken;
}
void refresh() throws IOException {
try {
OkHttpClient client = new OkHttpClient();
client.setConnectTimeout(URLConnectionFactory.DEFAULT_SOCKET_TIMEOUT,
TimeUnit.MILLISECONDS);
client.setReadTimeout(URLConnectionFactory.DEFAULT_SOCKET_TIMEOUT,
TimeUnit.MILLISECONDS);
String bodyString = Utils.postBody(CLIENT_SECRET, credential,
GRANT_TYPE, CLIENT_CREDENTIALS,
RESOURCE_PARAM_NAME, resource,
CLIENT_ID, clientId);
RequestBody body = RequestBody.create(URLENCODED, bodyString);
Request request = new Request.Builder()
.url(refreshURL)
.post(body)
.build();
Response responseBody = client.newCall(request).execute();
if (responseBody.code() != HttpStatus.SC_OK) {
throw new IllegalArgumentException("Received invalid http response: "
+ responseBody.code() + ", text = " + responseBody.toString());
}
Map<?, ?> response = READER.readValue(responseBody.body().string());
String newExpiresIn = response.get(EXPIRES_IN).toString();
timer.setExpiresIn(newExpiresIn);
accessToken = response.get(ACCESS_TOKEN).toString();
} catch (Exception e) {
throw new IOException("Unable to obtain access token from credential", e);
}
}
}

View File

@ -23,6 +23,9 @@
* [OAuth2 Support](#OAuth2_Support)
* [Read Ahead Buffer Management](Read_Ahead_Buffer_Management)
* [Configuring Credentials & FileSystem](#Configuring_Credentials)
* [Using Refresh Token](#Refresh_Token)
* [Using Client Keys](#Client_Credential_Token)
* [Enabling ADL Filesystem](#Enabling_ADL)
* [Accessing adl URLs](#Accessing_adl_URLs)
* [Testing the hadoop-azure Module](#Testing_the_hadoop-azure_Module)
@ -131,6 +134,9 @@ To configure number of concurrent connection to Azure Data Lake Storage Account.
</property>
## <a name="Configuring_Credentials" />Configuring Credentials & FileSystem
Credentials can be configured using either a refresh token (associated with a user) or a client credential (analogous to a service principal).
### <a name="Refresh_Token" />Using Refresh Token
Update core-site.xml for OAuth2 configuration
@ -173,6 +179,64 @@ Application require to set Client id and OAuth2 refresh token from Azure Active
<value></value>
</property>
### <a name="Client_Credential_Token" />Using Client Keys
#### Generating the Service Principal
1. Go to the portal (https://portal.azure.com)
2. Under "Browse", look for Active Directory and click on it.
3. Create "Web Application". Remember the name you create here - that is what you will add to your ADL account as authorized user.
4. Go through the wizard
5. Once app is created, Go to app configuration, and find the section on "keys"
6. Select a key duration and hit save. Save the generated keys.
7. Note down the properties you will need to auth:
- The client ID
- The key you just generated above
- The token endpoint (select "View endpoints" at the bottom of the page and copy/paste the OAuth2 .0 Token Endpoint value)
- Resource: Always https://management.core.windows.net/ , for all customers
#### Adding the service principal to your ADL Account
1. Go to the portal again, and open your ADL account
2. Select Users under Settings
3. Add your user name you created in Step 6 above (note that it does not show up in the list, but will be found if you searched for the name)
4. Add "Owner" role
#### Configure core-site.xml
Add the following properties to your core-site.xml
<property>
<name>dfs.webhdfs.oauth2.access.token.provider</name>
<value>org.apache.hadoop.hdfs.web.oauth2.AzureADClientCredentialBasedAccesTokenProvider</value>
</property>
<property>
<name>dfs.webhdfs.oauth2.refresh.url</name>
<value>TOKEN ENDPOINT FROM STEP 7 ABOVE</value>
</property>
<property>
<name>dfs.webhdfs.oauth2.client.id</name>
<value>CLIENT ID FROM STEP 7 ABOVE</value>
</property>
<property>
<name>dfs.webhdfs.oauth2.credential</name>
<value>PASSWORD FROM STEP 7 ABOVE</value>
</property>
<property>
<name>fs.adls.oauth2.resource</name>
<value>https://management.core.windows.net/</value>
</property>
<property>
<name>fs.defaultFS</name>
<value>YOUR ADL STORE URL (e.g., https://example.azuredatalakestore.net) </value>
</property>
## <a name="Enabling_ADL" />Enabling ADL Filesystem
For ADL FileSystem to take effect. Update core-site.xml with
<property>