HADOOP-13242. Authenticate to Azure Data Lake using client ID and keys. Contributed by Atul Sikaria.
This commit is contained in:
parent
51d497fa93
commit
51d16e7b38
|
@ -147,7 +147,12 @@
|
|||
<dependency>
|
||||
<groupId>org.apache.hadoop</groupId>
|
||||
<artifactId>hadoop-common</artifactId>
|
||||
</dependency>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.squareup.okhttp</groupId>
|
||||
<artifactId>okhttp</artifactId>
|
||||
<version>2.4.0</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
|
|
|
@ -0,0 +1,155 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package org.apache.hadoop.hdfs.web.oauth2;
|
||||
|
||||
import com.squareup.okhttp.OkHttpClient;
|
||||
import com.squareup.okhttp.Request;
|
||||
import com.squareup.okhttp.RequestBody;
|
||||
import com.squareup.okhttp.Response;
|
||||
import com.squareup.okhttp.MediaType;
|
||||
import org.apache.hadoop.classification.InterfaceAudience;
|
||||
import org.apache.hadoop.classification.InterfaceStability;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hdfs.web.URLConnectionFactory;
|
||||
import org.apache.hadoop.util.Timer;
|
||||
import org.apache.http.HttpStatus;
|
||||
import org.codehaus.jackson.map.ObjectMapper;
|
||||
import org.codehaus.jackson.map.ObjectReader;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import static org.apache.hadoop.hdfs.web.oauth2.Utils.notNull;
|
||||
|
||||
|
||||
/**
|
||||
* Obtain an access token via the credential-based OAuth2 workflow.
|
||||
*/
|
||||
@InterfaceAudience.Public
|
||||
@InterfaceStability.Evolving
|
||||
public class AzureADClientCredentialBasedAccesTokenProvider
|
||||
extends AccessTokenProvider {
|
||||
private static final ObjectReader READER =
|
||||
new ObjectMapper().reader(Map.class);
|
||||
|
||||
public static final String OAUTH_CREDENTIAL_KEY
|
||||
= "dfs.webhdfs.oauth2.credential";
|
||||
|
||||
public static final String AAD_RESOURCE_KEY
|
||||
= "fs.adls.oauth2.resource";
|
||||
|
||||
public static final String RESOURCE_PARAM_NAME
|
||||
= "resource";
|
||||
|
||||
private static final String OAUTH_CLIENT_ID_KEY
|
||||
= "dfs.webhdfs.oauth2.client.id";
|
||||
|
||||
private static final String OAUTH_REFRESH_URL_KEY
|
||||
= "dfs.webhdfs.oauth2.refresh.url";
|
||||
|
||||
|
||||
public static final String ACCESS_TOKEN = "access_token";
|
||||
public static final String CLIENT_CREDENTIALS = "client_credentials";
|
||||
public static final String CLIENT_ID = "client_id";
|
||||
public static final String CLIENT_SECRET = "client_secret";
|
||||
public static final String EXPIRES_IN = "expires_in";
|
||||
public static final String GRANT_TYPE = "grant_type";
|
||||
public static final MediaType URLENCODED
|
||||
= MediaType.parse("application/x-www-form-urlencoded; charset=utf-8");
|
||||
|
||||
|
||||
private AccessTokenTimer timer;
|
||||
|
||||
private String clientId;
|
||||
|
||||
private String refreshURL;
|
||||
|
||||
private String accessToken;
|
||||
|
||||
private String resource;
|
||||
|
||||
private String credential;
|
||||
|
||||
private boolean initialCredentialObtained = false;
|
||||
|
||||
AzureADClientCredentialBasedAccesTokenProvider() {
|
||||
this.timer = new AccessTokenTimer();
|
||||
}
|
||||
|
||||
AzureADClientCredentialBasedAccesTokenProvider(Timer timer) {
|
||||
this.timer = new AccessTokenTimer(timer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setConf(Configuration conf) {
|
||||
super.setConf(conf);
|
||||
clientId = notNull(conf, OAUTH_CLIENT_ID_KEY);
|
||||
refreshURL = notNull(conf, OAUTH_REFRESH_URL_KEY);
|
||||
resource = notNull(conf, AAD_RESOURCE_KEY);
|
||||
credential = notNull(conf, OAUTH_CREDENTIAL_KEY);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getAccessToken() throws IOException {
|
||||
if(timer.shouldRefresh() || !initialCredentialObtained) {
|
||||
refresh();
|
||||
initialCredentialObtained = true;
|
||||
}
|
||||
return accessToken;
|
||||
}
|
||||
|
||||
void refresh() throws IOException {
|
||||
try {
|
||||
OkHttpClient client = new OkHttpClient();
|
||||
client.setConnectTimeout(URLConnectionFactory.DEFAULT_SOCKET_TIMEOUT,
|
||||
TimeUnit.MILLISECONDS);
|
||||
client.setReadTimeout(URLConnectionFactory.DEFAULT_SOCKET_TIMEOUT,
|
||||
TimeUnit.MILLISECONDS);
|
||||
|
||||
String bodyString = Utils.postBody(CLIENT_SECRET, credential,
|
||||
GRANT_TYPE, CLIENT_CREDENTIALS,
|
||||
RESOURCE_PARAM_NAME, resource,
|
||||
CLIENT_ID, clientId);
|
||||
|
||||
RequestBody body = RequestBody.create(URLENCODED, bodyString);
|
||||
|
||||
Request request = new Request.Builder()
|
||||
.url(refreshURL)
|
||||
.post(body)
|
||||
.build();
|
||||
Response responseBody = client.newCall(request).execute();
|
||||
|
||||
if (responseBody.code() != HttpStatus.SC_OK) {
|
||||
throw new IllegalArgumentException("Received invalid http response: "
|
||||
+ responseBody.code() + ", text = " + responseBody.toString());
|
||||
}
|
||||
|
||||
Map<?, ?> response = READER.readValue(responseBody.body().string());
|
||||
|
||||
String newExpiresIn = response.get(EXPIRES_IN).toString();
|
||||
timer.setExpiresIn(newExpiresIn);
|
||||
|
||||
accessToken = response.get(ACCESS_TOKEN).toString();
|
||||
|
||||
} catch (Exception e) {
|
||||
throw new IOException("Unable to obtain access token from credential", e);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -23,6 +23,9 @@
|
|||
* [OAuth2 Support](#OAuth2_Support)
|
||||
* [Read Ahead Buffer Management](Read_Ahead_Buffer_Management)
|
||||
* [Configuring Credentials & FileSystem](#Configuring_Credentials)
|
||||
* [Using Refresh Token](#Refresh_Token)
|
||||
* [Using Client Keys](#Client_Credential_Token)
|
||||
* [Enabling ADL Filesystem](#Enabling_ADL)
|
||||
* [Accessing adl URLs](#Accessing_adl_URLs)
|
||||
* [Testing the hadoop-azure Module](#Testing_the_hadoop-azure_Module)
|
||||
|
||||
|
@ -131,6 +134,9 @@ To configure number of concurrent connection to Azure Data Lake Storage Account.
|
|||
</property>
|
||||
|
||||
## <a name="Configuring_Credentials" />Configuring Credentials & FileSystem
|
||||
Credentials can be configured using either a refresh token (associated with a user) or a client credential (analogous to a service principal).
|
||||
|
||||
### <a name="Refresh_Token" />Using Refresh Token
|
||||
|
||||
Update core-site.xml for OAuth2 configuration
|
||||
|
||||
|
@ -173,6 +179,64 @@ Application require to set Client id and OAuth2 refresh token from Azure Active
|
|||
<value></value>
|
||||
</property>
|
||||
|
||||
|
||||
### <a name="Client_Credential_Token" />Using Client Keys
|
||||
|
||||
#### Generating the Service Principal
|
||||
1. Go to the portal (https://portal.azure.com)
|
||||
2. Under "Browse", look for Active Directory and click on it.
|
||||
3. Create "Web Application". Remember the name you create here - that is what you will add to your ADL account as authorized user.
|
||||
4. Go through the wizard
|
||||
5. Once app is created, Go to app configuration, and find the section on "keys"
|
||||
6. Select a key duration and hit save. Save the generated keys.
|
||||
7. Note down the properties you will need to auth:
|
||||
- The client ID
|
||||
- The key you just generated above
|
||||
- The token endpoint (select "View endpoints" at the bottom of the page and copy/paste the OAuth2 .0 Token Endpoint value)
|
||||
- Resource: Always https://management.core.windows.net/ , for all customers
|
||||
|
||||
#### Adding the service principal to your ADL Account
|
||||
1. Go to the portal again, and open your ADL account
|
||||
2. Select Users under Settings
|
||||
3. Add your user name you created in Step 6 above (note that it does not show up in the list, but will be found if you searched for the name)
|
||||
4. Add "Owner" role
|
||||
|
||||
#### Configure core-site.xml
|
||||
Add the following properties to your core-site.xml
|
||||
|
||||
<property>
|
||||
<name>dfs.webhdfs.oauth2.access.token.provider</name>
|
||||
<value>org.apache.hadoop.hdfs.web.oauth2.AzureADClientCredentialBasedAccesTokenProvider</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.webhdfs.oauth2.refresh.url</name>
|
||||
<value>TOKEN ENDPOINT FROM STEP 7 ABOVE</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.webhdfs.oauth2.client.id</name>
|
||||
<value>CLIENT ID FROM STEP 7 ABOVE</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>dfs.webhdfs.oauth2.credential</name>
|
||||
<value>PASSWORD FROM STEP 7 ABOVE</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.adls.oauth2.resource</name>
|
||||
<value>https://management.core.windows.net/</value>
|
||||
</property>
|
||||
|
||||
<property>
|
||||
<name>fs.defaultFS</name>
|
||||
<value>YOUR ADL STORE URL (e.g., https://example.azuredatalakestore.net) </value>
|
||||
</property>
|
||||
|
||||
|
||||
## <a name="Enabling_ADL" />Enabling ADL Filesystem
|
||||
|
||||
For ADL FileSystem to take effect. Update core-site.xml with
|
||||
|
||||
<property>
|
||||
|
|
Loading…
Reference in New Issue