From 763f073f41e3eaa9ecd11c6ec0b76234739272aa Mon Sep 17 00:00:00 2001 From: Jing Zhao Date: Mon, 16 Dec 2013 21:57:47 +0000 Subject: [PATCH 01/32] HADOOP-10106. Incorrect thread name in RPC log messages. Contributed by Ming Ma. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1551369 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-common/CHANGES.txt | 2 + .../java/org/apache/hadoop/ipc/Server.java | 44 +++++++++---------- 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 449beed3e5c..dc0c5d06233 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -470,6 +470,8 @@ Release 2.4.0 - UNRELEASED HADOOP-10058. TestMetricsSystemImpl#testInitFirstVerifyStopInvokedImmediately fails on trunk (Chen He via jeagles) + HADOOP-10106. Incorrect thread name in RPC log messages. (Ming Ma via jing9) + Release 2.3.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java index 7f569408902..ac798595dde 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java @@ -551,14 +551,14 @@ public abstract class Server { @Override public void run() { - LOG.info("Starting " + getName()); + LOG.info("Starting " + Thread.currentThread().getName()); try { doRunLoop(); } finally { try { readSelector.close(); } catch (IOException ioe) { - LOG.error("Error closing read selector in " + this.getName(), ioe); + LOG.error("Error closing read selector in " + Thread.currentThread().getName(), ioe); } } } @@ -589,7 +589,7 @@ public abstract class Server { } } catch (InterruptedException e) { if (running) { // unexpected -- log it - LOG.info(getName() + " unexpectedly interrupted", e); + LOG.info(Thread.currentThread().getName() + " unexpectedly interrupted", e); } } catch (IOException ex) { LOG.error("Error in Reader", ex); @@ -620,7 +620,7 @@ public abstract class Server { @Override public void run() { - LOG.info(getName() + ": starting"); + LOG.info(Thread.currentThread().getName() + ": starting"); SERVER.set(Server.this); connectionManager.startIdleScan(); while (running) { @@ -652,7 +652,7 @@ public abstract class Server { closeCurrentConnection(key, e); } } - LOG.info("Stopping " + this.getName()); + LOG.info("Stopping " + Thread.currentThread().getName()); synchronized (this) { try { @@ -710,14 +710,14 @@ public abstract class Server { try { count = c.readAndProcess(); } catch (InterruptedException ieo) { - LOG.info(getName() + ": readAndProcess caught InterruptedException", ieo); + LOG.info(Thread.currentThread().getName() + ": readAndProcess caught InterruptedException", ieo); throw ieo; } catch (Exception e) { // a WrappedRpcServerException is an exception that has been sent // to the client, so the stacktrace is unnecessary; any other // exceptions are unexpected internal server errors and thus the // stacktrace should be logged - LOG.info(getName() + ": readAndProcess from client " + + LOG.info(Thread.currentThread().getName() + ": readAndProcess from client " + c.getHostAddress() + " threw exception [" + e + "]", (e instanceof WrappedRpcServerException) ? null : e); count = -1; //so that the (count < 0) block is executed @@ -740,7 +740,7 @@ public abstract class Server { try { acceptChannel.socket().close(); } catch (IOException e) { - LOG.info(getName() + ":Exception in closing listener socket. " + e); + LOG.info(Thread.currentThread().getName() + ":Exception in closing listener socket. " + e); } } for (Reader r : readers) { @@ -773,16 +773,16 @@ public abstract class Server { @Override public void run() { - LOG.info(getName() + ": starting"); + LOG.info(Thread.currentThread().getName() + ": starting"); SERVER.set(Server.this); try { doRunLoop(); } finally { - LOG.info("Stopping " + this.getName()); + LOG.info("Stopping " + Thread.currentThread().getName()); try { writeSelector.close(); } catch (IOException ioe) { - LOG.error("Couldn't close write selector in " + this.getName(), ioe); + LOG.error("Couldn't close write selector in " + Thread.currentThread().getName(), ioe); } } } @@ -803,7 +803,7 @@ public abstract class Server { doAsyncWrite(key); } } catch (IOException e) { - LOG.info(getName() + ": doAsyncWrite threw exception " + e); + LOG.info(Thread.currentThread().getName() + ": doAsyncWrite threw exception " + e); } } long now = Time.now(); @@ -918,7 +918,7 @@ public abstract class Server { call = responseQueue.removeFirst(); SocketChannel channel = call.connection.channel; if (LOG.isDebugEnabled()) { - LOG.debug(getName() + ": responding to " + call); + LOG.debug(Thread.currentThread().getName() + ": responding to " + call); } // // Send as much data as we can in the non-blocking fashion @@ -937,7 +937,7 @@ public abstract class Server { done = false; // more calls pending to be sent. } if (LOG.isDebugEnabled()) { - LOG.debug(getName() + ": responding to " + call + LOG.debug(Thread.currentThread().getName() + ": responding to " + call + " Wrote " + numBytes + " bytes."); } } else { @@ -965,7 +965,7 @@ public abstract class Server { } } if (LOG.isDebugEnabled()) { - LOG.debug(getName() + ": responding to " + call + LOG.debug(Thread.currentThread().getName() + ": responding to " + call + " Wrote partial " + numBytes + " bytes."); } } @@ -973,7 +973,7 @@ public abstract class Server { } } finally { if (error && call != null) { - LOG.warn(getName()+", call " + call + ": output error"); + LOG.warn(Thread.currentThread().getName()+", call " + call + ": output error"); done = true; // error. no more data for this channel. closeConnection(call.connection); } @@ -2011,7 +2011,7 @@ public abstract class Server { @Override public void run() { - LOG.debug(getName() + ": starting"); + LOG.debug(Thread.currentThread().getName() + ": starting"); SERVER.set(Server.this); ByteArrayOutputStream buf = new ByteArrayOutputStream(INITIAL_RESP_BUF_SIZE); @@ -2019,7 +2019,7 @@ public abstract class Server { try { final Call call = callQueue.take(); // pop the queue; maybe blocked here if (LOG.isDebugEnabled()) { - LOG.debug(getName() + ": " + call + " for RpcKind " + call.rpcKind); + LOG.debug(Thread.currentThread().getName() + ": " + call + " for RpcKind " + call.rpcKind); } String errorClass = null; String error = null; @@ -2052,7 +2052,7 @@ public abstract class Server { if (e instanceof UndeclaredThrowableException) { e = e.getCause(); } - String logMsg = getName() + ", call " + call + ": error: " + e; + String logMsg = Thread.currentThread().getName() + ", call " + call + ": error: " + e; if (e instanceof RuntimeException || e instanceof Error) { // These exception types indicate something is probably wrong // on the server side, as opposed to just a normal exceptional @@ -2101,13 +2101,13 @@ public abstract class Server { } } catch (InterruptedException e) { if (running) { // unexpected -- log it - LOG.info(getName() + " unexpectedly interrupted", e); + LOG.info(Thread.currentThread().getName() + " unexpectedly interrupted", e); } } catch (Exception e) { - LOG.info(getName() + " caught an exception", e); + LOG.info(Thread.currentThread().getName() + " caught an exception", e); } } - LOG.debug(getName() + ": exiting"); + LOG.debug(Thread.currentThread().getName() + ": exiting"); } } From 0d7b6dfa974bfff3cbe01261b45aaae5dcbaf6f2 Mon Sep 17 00:00:00 2001 From: Hitesh Shah Date: Mon, 16 Dec 2013 22:10:26 +0000 Subject: [PATCH 02/32] Fix CHANGES.txt for HADOOP-8753. Moved entry to correct 2.4.0 section. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1551374 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-common-project/hadoop-common/CHANGES.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index dc0c5d06233..49567607447 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -282,9 +282,6 @@ Trunk (Unreleased) HADOOP-10044 Improve the javadoc of rpc code (sanjay Radia) - HADOOP-8753. LocalDirAllocator throws "ArithmeticException: / by zero" when - there is no available space on configured local dir. (Benoy Antony via hitesh) - OPTIMIZATIONS HADOOP-7761. Improve the performance of raw comparisons. (todd) @@ -470,6 +467,9 @@ Release 2.4.0 - UNRELEASED HADOOP-10058. TestMetricsSystemImpl#testInitFirstVerifyStopInvokedImmediately fails on trunk (Chen He via jeagles) + HADOOP-8753. LocalDirAllocator throws "ArithmeticException: / by zero" when + there is no available space on configured local dir. (Benoy Antony via hitesh) + HADOOP-10106. Incorrect thread name in RPC log messages. (Ming Ma via jing9) Release 2.3.0 - UNRELEASED From 49ad07af9782c2c2608799573e815a7cfc26851f Mon Sep 17 00:00:00 2001 From: Vinod Kumar Vavilapalli Date: Mon, 16 Dec 2013 23:36:16 +0000 Subject: [PATCH 03/32] YARN-312. Introduced ResourceManagerAdministrationProtocol changes to support changing resources on node. Contributed by Junping Du. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1551403 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 + .../yarn/api/records/ResourceOption.java | 6 + ...ResourceManagerAdministrationProtocol.java | 25 +++ .../UpdateNodeResourceRequest.java | 71 ++++++++ .../UpdateNodeResourceResponse.java | 37 ++++ ...ourcemanager_administration_protocol.proto | 1 + ...erver_resourcemanager_service_protos.proto | 7 + .../src/main/proto/yarn_protos.proto | 5 + ...gerAdministrationProtocolPBClientImpl.java | 10 ++ ...gerAdministrationProtocolPBClientImpl.java | 19 ++ ...erAdministrationProtocolPBServiceImpl.java | 20 +++ .../pb/UpdateNodeResourceRequestPBImpl.java | 166 ++++++++++++++++++ .../pb/UpdateNodeResourceResponsePBImpl.java | 66 +++++++ .../server/resourcemanager/AdminService.java | 51 ++++++ 14 files changed, 487 insertions(+) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/UpdateNodeResourceRequest.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/UpdateNodeResourceResponse.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/UpdateNodeResourceRequestPBImpl.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/UpdateNodeResourceResponsePBImpl.java diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 07c9711f9d9..0a872dfccd0 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -49,6 +49,9 @@ Release 2.4.0 - UNRELEASED YARN-1448. AM-RM protocol changes to support container resizing (Wangda Tan via Sandy Ryza) + YARN-312. Introduced ResourceManagerAdministrationProtocol changes to support + changing resources on node. (Junping Du via vinodkv) + IMPROVEMENTS YARN-7. Support CPU resource for DistributedShell. (Junping Du via llu) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceOption.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceOption.java index 2844a952ada..d6393505f81 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceOption.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ResourceOption.java @@ -62,4 +62,10 @@ public abstract class ResourceOption { protected abstract void build(); + @Override + public String toString() { + return "Resource:" + getResource().toString() + + ", overCommitTimeout:" + getOverCommitTimeout(); + } + } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/ResourceManagerAdministrationProtocol.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/ResourceManagerAdministrationProtocol.java index dd3eb927304..e768847e850 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/ResourceManagerAdministrationProtocol.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/ResourceManagerAdministrationProtocol.java @@ -22,8 +22,11 @@ import java.io.IOException; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Evolving; import org.apache.hadoop.classification.InterfaceStability.Stable; import org.apache.hadoop.tools.GetUserMappingsProtocol; +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.api.records.ResourceOption; import org.apache.hadoop.yarn.exceptions.RMNotYetActiveException; import org.apache.hadoop.yarn.exceptions.YarnException; import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshAdminAclsRequest; @@ -38,6 +41,8 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshSuperUserGroupsC import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshSuperUserGroupsConfigurationResponse; import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshUserToGroupsMappingsRequest; import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshUserToGroupsMappingsResponse; +import org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceRequest; +import org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceResponse; @Private @Stable @@ -77,4 +82,24 @@ public interface ResourceManagerAdministrationProtocol extends GetUserMappingsPr public RefreshServiceAclsResponse refreshServiceAcls( RefreshServiceAclsRequest request) throws YarnException, IOException; + + /** + *

The interface used by admin to update nodes' resources to the + * ResourceManager

. + * + *

The admin client is required to provide details such as a map from + * {@link NodeId} to {@link ResourceOption} required to update resources on + * a list of RMNode in ResourceManager etc. + * via the {@link UpdateNodeResourceRequest}.

+ * + * @param request request to update resource for a node in cluster. + * @return (empty) response on accepting update. + * @throws YarnException + * @throws IOException + */ + @Public + @Evolving + public UpdateNodeResourceResponse updateNodeResource( + UpdateNodeResourceRequest request) + throws YarnException, IOException; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/UpdateNodeResourceRequest.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/UpdateNodeResourceRequest.java new file mode 100644 index 00000000000..d1ab781cda8 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/UpdateNodeResourceRequest.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.api.protocolrecords; + +import java.util.Map; + +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Evolving; +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.api.records.ResourceOption; +import org.apache.hadoop.yarn.server.api.ResourceManagerAdministrationProtocol; +import org.apache.hadoop.yarn.util.Records; + +/** + *

The request sent by admin to change a list of nodes' resource to the + * ResourceManager.

+ * + *

The request contains details such as a map from {@link NodeId} to + * {@link ResourceOption} for updating the RMNodes' resources in + * ResourceManager. + * + * @see ResourceManagerAdministrationProtocol#updateNodeResource( + * UpdateNodeResourceRequest) + */ +@Public +@Evolving +public abstract class UpdateNodeResourceRequest { + + + @Public + @Evolving + public static UpdateNodeResourceRequest newInstance( + Map nodeResourceMap) { + UpdateNodeResourceRequest request = + Records.newRecord(UpdateNodeResourceRequest.class); + request.setNodeResourceMap(nodeResourceMap); + return request; + } + + /** + * Get the map from NodeId to ResourceOption. + * @return the map of + */ + @Public + @Evolving + public abstract Map getNodeResourceMap(); + + /** + * Set the map from NodeId to ResourceOption. + * @param nodeResourceMap the map of + */ + @Public + @Evolving + public abstract void setNodeResourceMap(Map nodeResourceMap); + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/UpdateNodeResourceResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/UpdateNodeResourceResponse.java new file mode 100644 index 00000000000..5155101d244 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/UpdateNodeResourceResponse.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.api.protocolrecords; + +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Evolving; +import org.apache.hadoop.yarn.server.api.ResourceManagerAdministrationProtocol; + +/** + *

The response sent by the ResourceManager to Admin client on + * node resource change.

+ * + *

Currently, this is empty.

+ * + * @see ResourceManagerAdministrationProtocol#updateNodeResource( + * UpdateNodeResourceRequest) + */ +@Public +@Evolving +public interface UpdateNodeResourceResponse { + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/resourcemanager_administration_protocol.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/resourcemanager_administration_protocol.proto index c59c6b61f1e..47a6cf75c86 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/resourcemanager_administration_protocol.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/resourcemanager_administration_protocol.proto @@ -38,4 +38,5 @@ service ResourceManagerAdministrationProtocolService { rpc refreshAdminAcls(RefreshAdminAclsRequestProto) returns (RefreshAdminAclsResponseProto); rpc refreshServiceAcls(RefreshServiceAclsRequestProto) returns (RefreshServiceAclsResponseProto); rpc getGroupsForUser(GetGroupsForUserRequestProto) returns (GetGroupsForUserResponseProto); + rpc updateNodeResource (UpdateNodeResourceRequestProto) returns (UpdateNodeResourceResponseProto); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/yarn_server_resourcemanager_service_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/yarn_server_resourcemanager_service_protos.proto index df77486ca60..62bd649c209 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/yarn_server_resourcemanager_service_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/server/yarn_server_resourcemanager_service_protos.proto @@ -68,6 +68,13 @@ message GetGroupsForUserResponseProto { repeated string groups = 1; } +message UpdateNodeResourceRequestProto { + repeated NodeResourceMapProto node_resource_map = 1; +} + +message UpdateNodeResourceResponseProto { +} + //////////////////////////////////////////////////////////////////////// ////// RM recovery related records ///////////////////////////////////// //////////////////////////////////////////////////////////////////////// diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto index 9c1c4c61e16..7dc62fc5dcd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto @@ -63,6 +63,11 @@ message ResourceOptionProto { optional int32 over_commit_timeout = 2; } +message NodeResourceMapProto { + optional NodeIdProto node_id = 1; + optional ResourceOptionProto resource_option = 2; +} + message PriorityProto { optional int32 priority = 1; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceManagerAdministrationProtocolPBClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceManagerAdministrationProtocolPBClientImpl.java index 47306693952..013e00e324d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceManagerAdministrationProtocolPBClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestResourceManagerAdministrationProtocolPBClientImpl.java @@ -41,6 +41,8 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshSuperUserGroupsC import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshSuperUserGroupsConfigurationResponse; import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshUserToGroupsMappingsRequest; import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshUserToGroupsMappingsResponse; +import org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceRequest; +import org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceResponse; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.junit.AfterClass; import org.junit.BeforeClass; @@ -159,6 +161,14 @@ public class TestResourceManagerAdministrationProtocolPBClientImpl { RefreshAdminAclsResponse response = client.refreshAdminAcls(request); assertNotNull(response); } + + @Test + public void testUpdateNodeResource() throws Exception { + UpdateNodeResourceRequest request = recordFactory + .newRecordInstance(UpdateNodeResourceRequest.class); + UpdateNodeResourceResponse response = client.updateNodeResource(request); + assertNotNull(response); + } @Test public void testRefreshServiceAcls() throws Exception { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/impl/pb/client/ResourceManagerAdministrationProtocolPBClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/impl/pb/client/ResourceManagerAdministrationProtocolPBClientImpl.java index 50ee288df7d..ccffaed77f3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/impl/pb/client/ResourceManagerAdministrationProtocolPBClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/impl/pb/client/ResourceManagerAdministrationProtocolPBClientImpl.java @@ -37,6 +37,7 @@ import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.Refre import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.RefreshServiceAclsRequestProto; import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.RefreshSuperUserGroupsConfigurationRequestProto; import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.RefreshUserToGroupsMappingsRequestProto; +import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.UpdateNodeResourceRequestProto; import org.apache.hadoop.yarn.server.api.ResourceManagerAdministrationProtocol; import org.apache.hadoop.yarn.server.api.ResourceManagerAdministrationProtocolPB; import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshAdminAclsRequest; @@ -51,6 +52,8 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshSuperUserGroupsC import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshSuperUserGroupsConfigurationResponse; import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshUserToGroupsMappingsRequest; import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshUserToGroupsMappingsResponse; +import org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceRequest; +import org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceResponse; import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.RefreshAdminAclsRequestPBImpl; import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.RefreshAdminAclsResponsePBImpl; import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.RefreshNodesRequestPBImpl; @@ -63,6 +66,8 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.RefreshSuperUse import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.RefreshSuperUserGroupsConfigurationResponsePBImpl; import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.RefreshUserToGroupsMappingsRequestPBImpl; import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.RefreshUserToGroupsMappingsResponsePBImpl; +import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.UpdateNodeResourceRequestPBImpl; +import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.UpdateNodeResourceResponsePBImpl; import com.google.protobuf.ServiceException; @@ -186,5 +191,19 @@ public class ResourceManagerAdministrationProtocolPBClientImpl implements Resour throw ProtobufHelper.getRemoteException(e); } } + + @Override + public UpdateNodeResourceResponse updateNodeResource( + UpdateNodeResourceRequest request) throws YarnException, IOException { + UpdateNodeResourceRequestProto requestProto = + ((UpdateNodeResourceRequestPBImpl) request).getProto(); + try { + return new UpdateNodeResourceResponsePBImpl(proxy.updateNodeResource(null, + requestProto)); + } catch (ServiceException e) { + RPCUtil.unwrapAndThrowException(e); + return null; + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/impl/pb/service/ResourceManagerAdministrationProtocolPBServiceImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/impl/pb/service/ResourceManagerAdministrationProtocolPBServiceImpl.java index 97caaa7d4f5..d1f71feb270 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/impl/pb/service/ResourceManagerAdministrationProtocolPBServiceImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/impl/pb/service/ResourceManagerAdministrationProtocolPBServiceImpl.java @@ -36,6 +36,8 @@ import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.Refre import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.RefreshSuperUserGroupsConfigurationResponseProto; import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.RefreshUserToGroupsMappingsRequestProto; import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.RefreshUserToGroupsMappingsResponseProto; +import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.UpdateNodeResourceRequestProto; +import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.UpdateNodeResourceResponseProto; import org.apache.hadoop.yarn.server.api.ResourceManagerAdministrationProtocol; import org.apache.hadoop.yarn.server.api.ResourceManagerAdministrationProtocolPB; import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshAdminAclsResponse; @@ -44,6 +46,7 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshQueuesResponse; import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshServiceAclsResponse; import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshSuperUserGroupsConfigurationResponse; import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshUserToGroupsMappingsResponse; +import org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceResponse; import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.RefreshAdminAclsRequestPBImpl; import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.RefreshAdminAclsResponsePBImpl; import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.RefreshNodesRequestPBImpl; @@ -56,6 +59,8 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.RefreshSuperUse import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.RefreshSuperUserGroupsConfigurationResponsePBImpl; import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.RefreshUserToGroupsMappingsRequestPBImpl; import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.RefreshUserToGroupsMappingsResponsePBImpl; +import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.UpdateNodeResourceRequestPBImpl; +import org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb.UpdateNodeResourceResponsePBImpl; import com.google.protobuf.RpcController; import com.google.protobuf.ServiceException; @@ -183,5 +188,20 @@ public class ResourceManagerAdministrationProtocolPBServiceImpl implements Resou throw new ServiceException(e); } } + + @Override + public UpdateNodeResourceResponseProto updateNodeResource(RpcController controller, + UpdateNodeResourceRequestProto proto) throws ServiceException { + UpdateNodeResourceRequestPBImpl request = + new UpdateNodeResourceRequestPBImpl(proto); + try { + UpdateNodeResourceResponse response = real.updateNodeResource(request); + return ((UpdateNodeResourceResponsePBImpl)response).getProto(); + } catch (YarnException e) { + throw new ServiceException(e); + } catch (IOException e) { + throw new ServiceException(e); + } + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/UpdateNodeResourceRequestPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/UpdateNodeResourceRequestPBImpl.java new file mode 100644 index 00000000000..413e4a00c15 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/UpdateNodeResourceRequestPBImpl.java @@ -0,0 +1,166 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.api.records.ResourceOption; +import org.apache.hadoop.yarn.api.records.impl.pb.NodeIdPBImpl; +import org.apache.hadoop.yarn.api.records.impl.pb.ResourceOptionPBImpl; +import org.apache.hadoop.yarn.proto.YarnProtos.NodeIdProto; +import org.apache.hadoop.yarn.proto.YarnProtos.NodeResourceMapProto; +import org.apache.hadoop.yarn.proto.YarnProtos.ResourceOptionProto; +import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.UpdateNodeResourceRequestProto; +import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.UpdateNodeResourceRequestProtoOrBuilder; +import org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceRequest; + +public class UpdateNodeResourceRequestPBImpl extends UpdateNodeResourceRequest { + + UpdateNodeResourceRequestProto proto = UpdateNodeResourceRequestProto.getDefaultInstance(); + UpdateNodeResourceRequestProto.Builder builder = null; + boolean viaProto = false; + + Map nodeResourceMap = null; + + public UpdateNodeResourceRequestPBImpl() { + builder = UpdateNodeResourceRequestProto.newBuilder(); + } + + public UpdateNodeResourceRequestPBImpl(UpdateNodeResourceRequestProto proto) { + this.proto = proto; + viaProto = true; + } + + @Override + public Map getNodeResourceMap() { + initNodeResourceMap(); + return this.nodeResourceMap; + } + + @Override + public void setNodeResourceMap(Map nodeResourceMap) { + if (nodeResourceMap == null) { + return; + } + initNodeResourceMap(); + this.nodeResourceMap.clear(); + this.nodeResourceMap.putAll(nodeResourceMap); + } + + public UpdateNodeResourceRequestProto getProto() { + mergeLocalToProto(); + proto = viaProto ? proto : builder.build(); + viaProto = true; + return proto; + } + + private void mergeLocalToBuilder() { + if (this.nodeResourceMap != null) { + addNodeResourceMap(); + } + } + + private void mergeLocalToProto() { + if (viaProto) + maybeInitBuilder(); + mergeLocalToBuilder(); + proto = builder.build(); + viaProto = true; + } + + private void initNodeResourceMap() { + if (this.nodeResourceMap != null) { + return; + } + UpdateNodeResourceRequestProtoOrBuilder p = viaProto ? proto : builder; + List list = p.getNodeResourceMapList(); + this.nodeResourceMap = new HashMap(list + .size()); + for (NodeResourceMapProto nodeResourceProto : list) { + this.nodeResourceMap.put(convertFromProtoFormat(nodeResourceProto.getNodeId()), + convertFromProtoFormat(nodeResourceProto.getResourceOption())); + } + } + + private void maybeInitBuilder() { + if (viaProto || builder == null) { + builder = UpdateNodeResourceRequestProto.newBuilder(proto); + } + viaProto = false; + } + + private NodeIdProto convertToProtoFormat(NodeId nodeId) { + return ((NodeIdPBImpl)nodeId).getProto(); + } + + private NodeId convertFromProtoFormat(NodeIdProto proto) { + return new NodeIdPBImpl(proto); + } + + private ResourceOptionPBImpl convertFromProtoFormat(ResourceOptionProto c) { + return new ResourceOptionPBImpl(c); + } + + private ResourceOptionProto convertToProtoFormat(ResourceOption c) { + return ((ResourceOptionPBImpl)c).getProto(); + } + + private void addNodeResourceMap() { + maybeInitBuilder(); + builder.clearNodeResourceMap(); + if (nodeResourceMap == null) { + return; + } + Iterable values + = new Iterable() { + + @Override + public Iterator iterator() { + return new Iterator() { + Iterator nodeIterator = nodeResourceMap + .keySet().iterator(); + + @Override + public boolean hasNext() { + return nodeIterator.hasNext(); + } + + @Override + public NodeResourceMapProto next() { + NodeId nodeId = nodeIterator.next(); + return NodeResourceMapProto.newBuilder().setNodeId( + convertToProtoFormat(nodeId)).setResourceOption( + convertToProtoFormat(nodeResourceMap.get(nodeId))).build(); + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } + }; + this.builder.addAllNodeResourceMap(values); + } + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/UpdateNodeResourceResponsePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/UpdateNodeResourceResponsePBImpl.java new file mode 100644 index 00000000000..f314f861b65 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/server/api/protocolrecords/impl/pb/UpdateNodeResourceResponsePBImpl.java @@ -0,0 +1,66 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.server.api.protocolrecords.impl.pb; + +import org.apache.hadoop.yarn.proto.YarnServerResourceManagerServiceProtos.UpdateNodeResourceResponseProto; +import org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceResponse; + +public class UpdateNodeResourceResponsePBImpl implements UpdateNodeResourceResponse { + + UpdateNodeResourceResponseProto proto = UpdateNodeResourceResponseProto.getDefaultInstance(); + UpdateNodeResourceResponseProto.Builder builder = null; + boolean viaProto = false; + + public UpdateNodeResourceResponsePBImpl() { + builder = UpdateNodeResourceResponseProto.newBuilder(); + } + + public UpdateNodeResourceResponsePBImpl( + UpdateNodeResourceResponseProto proto) { + this.proto = proto; + viaProto = true; + } + + public UpdateNodeResourceResponseProto getProto() { + proto = viaProto ? proto : builder.build(); + viaProto = true; + return proto; + } + + @Override + public int hashCode() { + return getProto().hashCode(); + } + + @Override + public boolean equals(Object other) { + if (other == null) + return false; + if (other.getClass().isAssignableFrom(this.getClass())) { + return this.getProto().equals(this.getClass().cast(other).getProto()); + } + return false; + } + + @Override + public String toString() { + return getProto().toString().replaceAll("\\n", ", ") + .replaceAll("\\s+", " "); + } + +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java index 18ffee595e1..0d0e5481be5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/AdminService.java @@ -20,6 +20,11 @@ package org.apache.hadoop.yarn.server.resourcemanager; import java.io.IOException; import java.net.InetSocketAddress; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -41,6 +46,8 @@ import org.apache.hadoop.security.authorize.AccessControlList; import org.apache.hadoop.security.authorize.PolicyProvider; import org.apache.hadoop.security.authorize.ProxyUsers; import org.apache.hadoop.service.AbstractService; +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.api.records.ResourceOption; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.RMNotYetActiveException; import org.apache.hadoop.yarn.exceptions.YarnException; @@ -61,6 +68,9 @@ import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshSuperUserGroupsC import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshSuperUserGroupsConfigurationResponse; import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshUserToGroupsMappingsRequest; import org.apache.hadoop.yarn.server.api.protocolrecords.RefreshUserToGroupsMappingsResponse; +import org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceRequest; +import org.apache.hadoop.yarn.server.api.protocolrecords.UpdateNodeResourceResponse; +import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; import org.apache.hadoop.yarn.server.resourcemanager.security.authorize.RMPolicyProvider; import com.google.protobuf.BlockingService; @@ -377,4 +387,45 @@ public class AdminService extends AbstractService implements public String[] getGroupsForUser(String user) throws IOException { return UserGroupInformation.createRemoteUser(user).getGroupNames(); } + + @Override + public UpdateNodeResourceResponse updateNodeResource( + UpdateNodeResourceRequest request) throws YarnException, IOException { + Map nodeResourceMap = request.getNodeResourceMap(); + Set nodeIds = nodeResourceMap.keySet(); + // verify nodes are all valid first. + // if any invalid nodes, throw exception instead of partially updating + // valid nodes. + for (NodeId nodeId : nodeIds) { + RMNode node = this.rmContext.getRMNodes().get(nodeId); + if (node == null) { + LOG.error("Resource update get failed on all nodes due to change " + + "resource on an unrecognized node: " + nodeId); + throw RPCUtil.getRemoteException( + "Resource update get failed on all nodes due to change resource " + + "on an unrecognized node: " + nodeId); + } + } + + // do resource update on each node. + // Notice: it is still possible to have invalid NodeIDs as nodes decommission + // may happen just at the same time. This time, only log and skip absent + // nodes without throwing any exceptions. + for (Map.Entry entry : nodeResourceMap.entrySet()) { + ResourceOption newResourceOption = entry.getValue(); + NodeId nodeId = entry.getKey(); + RMNode node = this.rmContext.getRMNodes().get(nodeId); + if (node == null) { + LOG.warn("Resource update get failed on an unrecognized node: " + nodeId); + } else { + node.setResourceOption(newResourceOption); + LOG.info("Update resource successfully on node(" + node.getNodeID() + +") with resource(" + newResourceOption.toString() + ")"); + } + } + UpdateNodeResourceResponse response = recordFactory.newRecordInstance( + UpdateNodeResourceResponse.class); + return response; + } + } From 0fc2929d13435a71d759f29579a7a171dc05990d Mon Sep 17 00:00:00 2001 From: Andrew Wang Date: Tue, 17 Dec 2013 00:30:06 +0000 Subject: [PATCH 04/32] HDFS-5350. Name Node should report fsimage transfer time as a metric. Contributed by Jimmy Xiang. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1551415 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../hdfs/server/namenode/GetImageServlet.java | 22 +++++++++++++++++++ .../namenode/metrics/NameNodeMetrics.java | 19 ++++++++++++++++ .../hdfs/server/namenode/TestCheckpoint.java | 13 +++++++++++ 4 files changed, 57 insertions(+) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index ab12482466f..a1024e7eada 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -741,6 +741,9 @@ Release 2.4.0 - UNRELEASED HDFS-5652. Refactor invalid block token exception handling in DFSInputStream. (Liang Xie via junping_du) + HDFS-5350. Name Node should report fsimage transfer time as a metric. + (Jimmy Xiang via wang) + OPTIMIZATIONS HDFS-5239. Allow FSNamesystem lock fairness to be configurable (daryn) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/GetImageServlet.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/GetImageServlet.java index d2297b8656f..7ac69f80848 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/GetImageServlet.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/GetImageServlet.java @@ -17,6 +17,8 @@ */ package org.apache.hadoop.hdfs.server.namenode; +import static org.apache.hadoop.util.Time.now; + import java.security.PrivilegedExceptionAction; import java.util.*; import java.io.*; @@ -41,6 +43,7 @@ import org.apache.hadoop.hdfs.HAUtil; import org.apache.hadoop.hdfs.server.common.JspHelper; import org.apache.hadoop.hdfs.server.common.Storage; import org.apache.hadoop.hdfs.server.common.StorageInfo; +import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics; import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog; import org.apache.hadoop.hdfs.util.DataTransferThrottler; import org.apache.hadoop.hdfs.util.MD5FileUtils; @@ -88,6 +91,7 @@ public class GetImageServlet extends HttpServlet { final GetImageParams parsedParams = new GetImageParams(request, response); final Configuration conf = (Configuration) context .getAttribute(JspHelper.CURRENT_CONF); + final NameNodeMetrics metrics = NameNode.getNameNodeMetrics(); if (UserGroupInformation.isSecurityEnabled() && !isValidRequestor(context, request.getUserPrincipal().getName(), conf)) { @@ -128,14 +132,26 @@ public class GetImageServlet extends HttpServlet { throw new IOException(errorMessage); } CheckpointFaultInjector.getInstance().beforeGetImageSetsHeaders(); + long start = now(); serveFile(imageFile); + + if (metrics != null) { // Metrics non-null only when used inside name node + long elapsed = now() - start; + metrics.addGetImage(elapsed); + } } else if (parsedParams.isGetEdit()) { long startTxId = parsedParams.getStartTxId(); long endTxId = parsedParams.getEndTxId(); File editFile = nnImage.getStorage() .findFinalizedEditsFile(startTxId, endTxId); + long start = now(); serveFile(editFile); + + if (metrics != null) { // Metrics non-null only when used inside name node + long elapsed = now() - start; + metrics.addGetEdit(elapsed); + } } else if (parsedParams.isPutImage()) { final long txid = parsedParams.getTxId(); @@ -159,12 +175,18 @@ public class GetImageServlet extends HttpServlet { UserGroupInformation.getCurrentUser().checkTGTAndReloginFromKeytab(); } + long start = now(); // issue a HTTP get request to download the new fsimage MD5Hash downloadImageDigest = TransferFsImage.downloadImageToStorage( parsedParams.getInfoServer(conf), txid, nnImage.getStorage(), true); nnImage.saveDigestAndRenameCheckpointImage(txid, downloadImageDigest); + + if (metrics != null) { // Metrics non-null only when used inside name node + long elapsed = now() - start; + metrics.addPutImage(elapsed); + } // Now that we have a new checkpoint, we might be able to // remove some old ones. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java index 2916da07993..a47eb73d23a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/metrics/NameNodeMetrics.java @@ -89,6 +89,13 @@ public class NameNodeMetrics { @Metric("Time loading FS Image at startup in msec") MutableGaugeInt fsImageLoadTime; + @Metric("GetImageServlet getEdit") + MutableRate getEdit; + @Metric("GetImageServlet getImage") + MutableRate getImage; + @Metric("GetImageServlet putImage") + MutableRate putImage; + NameNodeMetrics(String processName, String sessionId, int[] intervals) { registry.tag(ProcessName, processName).tag(SessionId, sessionId); @@ -251,4 +258,16 @@ public class NameNodeMetrics { public void setSafeModeTime(long elapsed) { safeModeTime.set((int) elapsed); } + + public void addGetEdit(long latency) { + getEdit.add(latency); + } + + public void addGetImage(long latency) { + getImage.add(latency); + } + + public void addPutImage(long latency) { + putImage.add(latency); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java index e99e00a4ed4..74a7df35781 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCheckpoint.java @@ -20,6 +20,9 @@ package org.apache.hadoop.hdfs.server.namenode; import static org.apache.hadoop.hdfs.server.common.Util.fileAsURI; import static org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil.assertNNHasCheckpoints; import static org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil.getNameNodeCurrentDirs; +import static org.apache.hadoop.test.MetricsAsserts.assertCounterGt; +import static org.apache.hadoop.test.MetricsAsserts.assertGaugeGt; +import static org.apache.hadoop.test.MetricsAsserts.getMetrics; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; @@ -72,6 +75,7 @@ import org.apache.hadoop.hdfs.server.protocol.RemoteEditLog; import org.apache.hadoop.hdfs.server.protocol.RemoteEditLogManifest; import org.apache.hadoop.hdfs.tools.DFSAdmin; import org.apache.hadoop.io.Text; +import org.apache.hadoop.metrics2.MetricsRecordBuilder; import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.test.GenericTestUtils.DelayAnswer; import org.apache.hadoop.test.GenericTestUtils.LogCapturer; @@ -106,6 +110,7 @@ public class TestCheckpoint { } static final Log LOG = LogFactory.getLog(TestCheckpoint.class); + static final String NN_METRICS = "NameNodeActivity"; static final long seed = 0xDEADBEEFL; static final int blockSize = 4096; @@ -1048,6 +1053,14 @@ public class TestCheckpoint { // secondary = startSecondaryNameNode(conf); secondary.doCheckpoint(); + + MetricsRecordBuilder rb = getMetrics(NN_METRICS); + assertCounterGt("GetImageNumOps", 0, rb); + assertCounterGt("GetEditNumOps", 0, rb); + assertCounterGt("PutImageNumOps", 0, rb); + assertGaugeGt("GetImageAvgTime", 0.0, rb); + assertGaugeGt("GetEditAvgTime", 0.0, rb); + assertGaugeGt("PutImageAvgTime", 0.0, rb); } finally { fileSys.close(); cleanup(secondary); From a6754bbb812198b4842205b0498c31dd16382612 Mon Sep 17 00:00:00 2001 From: Karthik Kambatla Date: Tue, 17 Dec 2013 01:47:23 +0000 Subject: [PATCH 05/32] YARN-1451. TestResourceManager relies on the scheduler assigning multiple containers in a single node update. (Sandy Ryza via kasha) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1551440 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 +++ .../server/resourcemanager/TestResourceManager.java | 11 +++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index 0a872dfccd0..d91297eefde 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -255,6 +255,9 @@ Release 2.4.0 - UNRELEASED YARN-1145. Fixed a potential file-handle leak in the web interface for displaying aggregated logs. (Rohith Sharma via vinodkv) + YARN-1451. TestResourceManager relies on the scheduler assigning multiple + containers in a single node update. (Sandy Ryza via kasha) + Release 2.3.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceManager.java index 3bcfba3abd5..1f9d1798aaa 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestResourceManager.java @@ -79,22 +79,23 @@ public class TestResourceManager { @Test public void testResourceAllocation() throws IOException, - YarnException { + YarnException, InterruptedException { LOG.info("--- START: testResourceAllocation ---"); final int memory = 4 * 1024; + final int vcores = 4; // Register node1 String host1 = "host1"; org.apache.hadoop.yarn.server.resourcemanager.NodeManager nm1 = registerNode(host1, 1234, 2345, NetworkTopology.DEFAULT_RACK, - Resources.createResource(memory, 1)); + Resources.createResource(memory, vcores)); // Register node2 String host2 = "host2"; org.apache.hadoop.yarn.server.resourcemanager.NodeManager nm2 = registerNode(host2, 1234, 2345, NetworkTopology.DEFAULT_RACK, - Resources.createResource(memory/2, 1)); + Resources.createResource(memory/2, vcores/2)); // Submit an application Application application = new Application("user1", resourceManager); @@ -142,8 +143,10 @@ public class TestResourceManager { application.schedule(); checkResourceUsage(nm1, nm2); - // Send a heartbeat to kick the tires on the Scheduler + // Send heartbeats to kick the tires on the Scheduler nodeUpdate(nm2); + nodeUpdate(nm2); + nodeUpdate(nm1); nodeUpdate(nm1); // Get allocations from the scheduler From b774d7b3de81cda4165a0e86bc2267fda8546cb5 Mon Sep 17 00:00:00 2001 From: Vinod Kumar Vavilapalli Date: Tue, 17 Dec 2013 02:16:20 +0000 Subject: [PATCH 06/32] YARN-1446. Changed client API to retry killing application till RM acknowledges so as to account for RM crashes/failover. Contributed by Jian He. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1551444 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 + .../KillApplicationResponse.java | 36 +++++++-- .../hadoop/yarn/conf/YarnConfiguration.java | 13 +++- .../src/main/proto/yarn_service_protos.proto | 1 + .../yarn/client/api/impl/YarnClientImpl.java | 40 ++++++++-- .../yarn/client/api/impl/TestYarnClient.java | 29 +++++++ .../pb/KillApplicationResponsePBImpl.java | 21 +++++ .../src/main/resources/yarn-default.xml | 10 +-- .../ApplicationMasterService.java | 12 +-- .../resourcemanager/ClientRMService.java | 17 ++-- .../server/resourcemanager/rmapp/RMApp.java | 10 +-- .../resourcemanager/rmapp/RMAppEventType.java | 1 - .../resourcemanager/rmapp/RMAppImpl.java | 67 ++++++++++------ .../resourcemanager/rmapp/RMAppState.java | 1 + .../rmapp/attempt/RMAppAttemptImpl.java | 2 + .../yarn/server/resourcemanager/MockRM.java | 9 +-- .../server/resourcemanager/TestRMRestart.java | 77 +++++++++++++++++-- .../applicationsmanager/MockAsm.java | 2 +- .../resourcemanager/rmapp/MockRMApp.java | 2 +- .../rmapp/TestRMAppTransitions.java | 31 ++++---- 20 files changed, 290 insertions(+), 94 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index d91297eefde..e69ea1bda3a 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -178,6 +178,9 @@ Release 2.4.0 - UNRELEASED YARN-1435. Modified Distributed Shell to accept either the command or the custom script. (Xuan Gong via zjshen) + YARN-1446. Changed client API to retry killing application till RM + acknowledges so as to account for RM crashes/failover. (Jian He via vinodkv) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/KillApplicationResponse.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/KillApplicationResponse.java index 71aa28b0ae0..77bb71d6796 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/KillApplicationResponse.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/KillApplicationResponse.java @@ -26,10 +26,21 @@ import org.apache.hadoop.yarn.api.ApplicationClientProtocol; import org.apache.hadoop.yarn.util.Records; /** - *

The response sent by the ResourceManager to the client - * aborting a submitted application.

- * - *

Currently it's empty.

+ *

+ * The response sent by the ResourceManager to the client aborting + * a submitted application. + *

+ *

+ * The response, includes: + *

    + *
  • A flag which indicates that the process of killing the application is + * completed or not.
  • + *
+ * Note: user is recommended to wait until this flag becomes true, otherwise if + * the ResourceManager crashes before the process of killing the + * application is completed, the ResourceManager may retry this + * application on recovery. + *

* * @see ApplicationClientProtocol#forceKillApplication(KillApplicationRequest) */ @@ -38,9 +49,24 @@ import org.apache.hadoop.yarn.util.Records; public abstract class KillApplicationResponse { @Private @Unstable - public static KillApplicationResponse newInstance() { + public static KillApplicationResponse newInstance(boolean isKillCompleted) { KillApplicationResponse response = Records.newRecord(KillApplicationResponse.class); + response.setIsKillCompleted(isKillCompleted); return response; } + + /** + * Get the flag which indicates that the process of killing application is completed or not. + */ + @Public + @Stable + public abstract boolean getIsKillCompleted(); + + /** + * Set the flag which indicates that the process of killing application is completed or not. + */ + @Private + @Unstable + public abstract void setIsKillCompleted(boolean isKillCompleted); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index 12b5e2f5e77..de420b05e35 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -27,7 +27,6 @@ import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.classification.InterfaceStability.Evolving; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.http.HttpConfig; import org.apache.hadoop.net.NetUtils; @@ -882,14 +881,22 @@ public class YarnConfiguration extends Configuration { //////////////////////////////// /** + * Use YARN_CLIENT_APPLICATION_CLIENT_PROTOCOL_POLL_INTERVAL_MS instead. * The interval of the yarn client's querying application state after * application submission. The unit is millisecond. */ + @Deprecated public static final String YARN_CLIENT_APP_SUBMISSION_POLL_INTERVAL_MS = YARN_PREFIX + "client.app-submission.poll-interval"; - public static final long DEFAULT_YARN_CLIENT_APP_SUBMISSION_POLL_INTERVAL_MS = - 1000; + /** + * The interval that the yarn client library uses to poll the completion + * status of the asynchronous API of application client protocol. + */ + public static final String YARN_CLIENT_APPLICATION_CLIENT_PROTOCOL_POLL_INTERVAL_MS = + YARN_PREFIX + "client.application-client-protocol.poll-interval-ms"; + public static final long DEFAULT_YARN_CLIENT_APPLICATION_CLIENT_PROTOCOL_POLL_INTERVAL_MS = + 200; /** * Max number of threads in NMClientAsync to process container management * events diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto index 332be813627..a4631d11b6e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_service_protos.proto @@ -116,6 +116,7 @@ message KillApplicationRequestProto { } message KillApplicationResponseProto { + optional bool is_kill_completed = 1 [default = false]; } message GetClusterMetricsRequestProto { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java index d35e1a4300d..7c446045fe5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java @@ -48,6 +48,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetNewApplicationResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest; import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest; +import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse; import org.apache.hadoop.yarn.api.protocolrecords.SubmitApplicationRequest; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; @@ -79,7 +80,8 @@ public class YarnClientImpl extends YarnClient { protected ApplicationClientProtocol rmClient; protected InetSocketAddress rmAddress; - protected long statePollIntervalMillis; + protected long submitPollIntervalMillis; + private long asyncApiPollIntervalMillis; private static final String ROOT = "root"; @@ -92,12 +94,20 @@ public class YarnClientImpl extends YarnClient { YarnConfiguration.DEFAULT_RM_ADDRESS, YarnConfiguration.DEFAULT_RM_PORT); } + @SuppressWarnings("deprecation") @Override protected void serviceInit(Configuration conf) throws Exception { this.rmAddress = getRmAddress(conf); - statePollIntervalMillis = conf.getLong( + asyncApiPollIntervalMillis = + conf.getLong(YarnConfiguration.YARN_CLIENT_APPLICATION_CLIENT_PROTOCOL_POLL_INTERVAL_MS, + YarnConfiguration.DEFAULT_YARN_CLIENT_APPLICATION_CLIENT_PROTOCOL_POLL_INTERVAL_MS); + submitPollIntervalMillis = asyncApiPollIntervalMillis; + if (conf.get(YarnConfiguration.YARN_CLIENT_APP_SUBMISSION_POLL_INTERVAL_MS) + != null) { + submitPollIntervalMillis = conf.getLong( YarnConfiguration.YARN_CLIENT_APP_SUBMISSION_POLL_INTERVAL_MS, - YarnConfiguration.DEFAULT_YARN_CLIENT_APP_SUBMISSION_POLL_INTERVAL_MS); + YarnConfiguration.DEFAULT_YARN_CLIENT_APPLICATION_CLIENT_PROTOCOL_POLL_INTERVAL_MS); + } super.serviceInit(conf); } @@ -165,7 +175,7 @@ public class YarnClientImpl extends YarnClient { " is still in " + state); } try { - Thread.sleep(statePollIntervalMillis); + Thread.sleep(submitPollIntervalMillis); } catch (InterruptedException ie) { } } @@ -179,11 +189,29 @@ public class YarnClientImpl extends YarnClient { @Override public void killApplication(ApplicationId applicationId) throws YarnException, IOException { - LOG.info("Killing application " + applicationId); KillApplicationRequest request = Records.newRecord(KillApplicationRequest.class); request.setApplicationId(applicationId); - rmClient.forceKillApplication(request); + + try { + int pollCount = 0; + while (true) { + KillApplicationResponse response = + rmClient.forceKillApplication(request); + if (response.getIsKillCompleted()) { + break; + } + if (++pollCount % 10 == 0) { + LOG.info("Watiting for application " + applicationId + + " to be killed."); + } + Thread.sleep(asyncApiPollIntervalMillis); + } + } catch (InterruptedException e) { + LOG.error("Interrupted while waiting for application " + applicationId + + " to be killed."); + } + LOG.info("Killed application " + applicationId); } @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestYarnClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestYarnClient.java index 826433d5048..966995c99ce 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestYarnClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestYarnClient.java @@ -42,6 +42,8 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationReportResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetApplicationsResponse; +import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest; +import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse; import org.apache.hadoop.yarn.api.records.ApplicationAccessType; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; @@ -89,6 +91,7 @@ public class TestYarnClient { rm.stop(); } + @SuppressWarnings("deprecation") @Test (timeout = 30000) public void testSubmitApplication() { Configuration conf = new Configuration(); @@ -128,6 +131,23 @@ public class TestYarnClient { client.stop(); } + @Test + public void testKillApplication() throws Exception { + MockRM rm = new MockRM(); + rm.start(); + RMApp app = rm.submitApp(2000); + + Configuration conf = new Configuration(); + @SuppressWarnings("resource") + final YarnClient client = new MockYarnClient(); + client.init(conf); + client.start(); + + client.killApplication(app.getApplicationId()); + verify(((MockYarnClient) client).getRMClient(), times(2)) + .forceKillApplication(any(KillApplicationRequest.class)); + } + @Test(timeout = 30000) public void testApplicationType() throws Exception { Logger rootLogger = LogManager.getRootLogger(); @@ -234,6 +254,11 @@ public class TestYarnClient { GetApplicationReportRequest.class))).thenReturn(mockResponse); when(rmClient.getApplications(any(GetApplicationsRequest.class))) .thenReturn(mockAppResponse); + // return false for 1st kill request, and true for the 2nd. + when(rmClient.forceKillApplication(any( + KillApplicationRequest.class))) + .thenReturn(KillApplicationResponse.newInstance(false)).thenReturn( + KillApplicationResponse.newInstance(true)); } catch (YarnException e) { Assert.fail("Exception is not expected."); } catch (IOException e) { @@ -242,6 +267,10 @@ public class TestYarnClient { when(mockResponse.getApplicationReport()).thenReturn(mockReport); } + public ApplicationClientProtocol getRMClient() { + return rmClient; + } + @Override public List getApplications( Set applicationTypes, EnumSet applicationStates) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/KillApplicationResponsePBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/KillApplicationResponsePBImpl.java index 14e0c1f74af..1c937de2d39 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/KillApplicationResponsePBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/protocolrecords/impl/pb/KillApplicationResponsePBImpl.java @@ -23,6 +23,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse; import org.apache.hadoop.yarn.proto.YarnServiceProtos.KillApplicationResponseProto; +import org.apache.hadoop.yarn.proto.YarnServiceProtos.KillApplicationResponseProtoOrBuilder; import com.google.protobuf.TextFormat; @@ -67,4 +68,24 @@ public class KillApplicationResponsePBImpl extends KillApplicationResponse { public String toString() { return TextFormat.shortDebugString(getProto()); } + + private void maybeInitBuilder() { + if (viaProto || builder == null) { + builder = KillApplicationResponseProto.newBuilder(proto); + } + viaProto = false; + } + + @Override + public boolean getIsKillCompleted() { + KillApplicationResponseProtoOrBuilder p = + viaProto ? proto : builder; + return p.getIsKillCompleted(); + } + + @Override + public void setIsKillCompleted(boolean isKillCompleted) { + maybeInitBuilder(); + builder.setIsKillCompleted(isKillCompleted); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index c43dc1a4446..9673826c2ae 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -945,10 +945,10 @@ - The interval of the yarn client's querying application state - after application submission. The unit is millisecond. - yarn.client.app-submission.poll-interval - 1000 + The interval that the yarn client library uses to poll the + completion status of the asynchronous API of application client protocol. + + yarn.client.application-client-protocol.poll-interval-ms + 200 - diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java index f070f28f86f..787ed9fa656 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ApplicationMasterService.java @@ -292,15 +292,15 @@ public class ApplicationMasterService extends AbstractService implements this.amLivelinessMonitor.receivedPing(applicationAttemptId); - rmContext.getDispatcher().getEventHandler().handle( + if (rmContext.getRMApps().get(applicationAttemptId.getApplicationId()) + .isAppSafeToTerminate()) { + return FinishApplicationMasterResponse.newInstance(true); + } else { + // keep sending the unregister event as RM may crash in the meanwhile. + rmContext.getDispatcher().getEventHandler().handle( new RMAppAttemptUnregistrationEvent(applicationAttemptId, request .getTrackingUrl(), request.getFinalApplicationStatus(), request .getDiagnostics())); - - if (rmContext.getRMApps().get(applicationAttemptId.getApplicationId()) - .isAppSafeToUnregister()) { - return FinishApplicationMasterResponse.newInstance(true); - } else { return FinishApplicationMasterResponse.newInstance(false); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java index f0e85534900..cd2226fc3fa 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/ClientRMService.java @@ -380,14 +380,15 @@ public class ClientRMService extends AbstractService implements + ApplicationAccessType.MODIFY_APP.name() + " on " + applicationId)); } - this.rmContext.getDispatcher().getEventHandler().handle( - new RMAppEvent(applicationId, RMAppEventType.KILL)); - - RMAuditLogger.logSuccess(callerUGI.getShortUserName(), - AuditConstants.KILL_APP_REQUEST, "ClientRMService" , applicationId); - KillApplicationResponse response = recordFactory - .newRecordInstance(KillApplicationResponse.class); - return response; + if (application.isAppSafeToTerminate()) { + RMAuditLogger.logSuccess(callerUGI.getShortUserName(), + AuditConstants.KILL_APP_REQUEST, "ClientRMService", applicationId); + return KillApplicationResponse.newInstance(true); + } else { + this.rmContext.getDispatcher().getEventHandler() + .handle(new RMAppEvent(applicationId, RMAppEventType.KILL)); + return KillApplicationResponse.newInstance(false); + } } @Override diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java index fadaa3b00e4..1809a4bb470 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMApp.java @@ -197,13 +197,13 @@ public interface RMApp extends EventHandler { String getApplicationType(); /** - * Check whether this application is safe to unregister. - * An application is deemed to be safe to unregister if it is an unmanaged - * AM or its state has been removed from state store. + * Check whether this application is safe to terminate. + * An application is deemed to be safe to terminate if it is an unmanaged + * AM or its state has been saved in state store. * @return the flag which indicates whether this application is safe to - * unregister. + * terminate. */ - boolean isAppSafeToUnregister(); + boolean isAppSafeToTerminate(); /** * Create the external user-facing state of ApplicationMaster from the diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppEventType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppEventType.java index a2fa0e24eb0..ad3f20d23d9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppEventType.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppEventType.java @@ -37,5 +37,4 @@ public enum RMAppEventType { // Source: RMStateStore APP_NEW_SAVED, APP_UPDATE_SAVED, - APP_REMOVED } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java index 5a70cc21165..0bf7c817454 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/RMAppImpl.java @@ -110,10 +110,14 @@ public class RMAppImpl implements RMApp, Recoverable { private static final FinalTransition FINAL_TRANSITION = new FinalTransition(); private static final AppFinishedTransition FINISHED_TRANSITION = new AppFinishedTransition(); + + // These states stored are only valid when app is at killing or final_saving. + private RMAppState stateBeforeKilling; private RMAppState stateBeforeFinalSaving; private RMAppEvent eventCausingFinalSaving; private RMAppState targetedFinalState; private RMAppState recoveredFinalState; + Object transitionTodo; private static final StateMachineFactory= 1); + + rm1.waitForState(am1.getApplicationAttemptId(), RMAppAttemptState.KILLED); + rm1.waitForState(app1.getApplicationId(), RMAppState.KILLED); + Assert.assertEquals(1, ((TestMemoryRMStateStore) memStore).updateAttempt); + Assert.assertEquals(2, ((TestMemoryRMStateStore) memStore).updateApp); + } + + public class TestMemoryRMStateStore extends MemoryRMStateStore { + int count = 0; + public int updateApp = 0; + public int updateAttempt = 0; + + @Override + public void updateApplicationStateInternal(String appId, + ApplicationStateDataPBImpl appStateData) throws Exception { + updateApp = ++count; + super.updateApplicationStateInternal(appId, appStateData); + } + + @Override + public synchronized void + updateApplicationAttemptStateInternal(String attemptIdStr, + ApplicationAttemptStateDataPBImpl attemptStateData) + throws Exception { + updateAttempt = ++count; + super.updateApplicationAttemptStateInternal(attemptIdStr, + attemptStateData); + } + } + public static class TestSecurityMockRM extends MockRM { public TestSecurityMockRM(Configuration conf, RMStateStore store) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java index b90c711c3db..aa116bf85b2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/applicationsmanager/MockAsm.java @@ -145,7 +145,7 @@ public abstract class MockAsm extends MockApps { } @Override - public boolean isAppSafeToUnregister() { + public boolean isAppSafeToTerminate() { throw new UnsupportedOperationException("Not supported yet."); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java index bcb2f6f111b..debcffe97dc 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/MockRMApp.java @@ -218,7 +218,7 @@ public class MockRMApp implements RMApp { } @Override - public boolean isAppSafeToUnregister() { + public boolean isAppSafeToTerminate() { return true; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java index 5b687236a6f..ba255d339ef 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/rmapp/TestRMAppTransitions.java @@ -301,12 +301,9 @@ public class TestRMAppTransitions { private void assertAppAndAttemptKilled(RMApp application) throws InterruptedException { + sendAttemptUpdateSavedEvent(application); sendAppUpdateSavedEvent(application); assertKilled(application); - // send attempt final state saved event. - application.getCurrentAppAttempt().handle( - new RMAppAttemptUpdateSavedEvent(application.getCurrentAppAttempt() - .getAppAttemptId(), null)); Assert.assertEquals(RMAppAttemptState.KILLED, application .getCurrentAppAttempt().getAppAttemptState()); assertAppFinalStateSaved(application); @@ -329,6 +326,12 @@ public class TestRMAppTransitions { rmDispatcher.await(); } + private void sendAttemptUpdateSavedEvent(RMApp application) { + application.getCurrentAppAttempt().handle( + new RMAppAttemptUpdateSavedEvent(application.getCurrentAppAttempt() + .getAppAttemptId(), null)); + } + protected RMApp testCreateAppNewSaving( ApplicationSubmissionContext submissionContext) throws IOException { RMApp application = createNewTestApp(submissionContext); @@ -624,11 +627,12 @@ public class TestRMAppTransitions { rmDispatcher.await(); // Ignore Attempt_Finished if we were supposed to go to Finished. - assertAppState(RMAppState.FINAL_SAVING, application); + assertAppState(RMAppState.KILLING, application); RMAppEvent finishEvent = new RMAppFinishedAttemptEvent(application.getApplicationId(), null); application.handle(finishEvent); - assertAppState(RMAppState.FINAL_SAVING, application); + assertAppState(RMAppState.KILLING, application); + sendAttemptUpdateSavedEvent(application); sendAppUpdateSavedEvent(application); assertKilled(application); } @@ -686,8 +690,8 @@ public class TestRMAppTransitions { } @Test - public void testAppFinishingKill() throws IOException { - LOG.info("--- START: testAppFinishedFinished ---"); + public void testAppAtFinishingIgnoreKill() throws IOException { + LOG.info("--- START: testAppAtFinishingIgnoreKill ---"); RMApp application = testCreateAppFinishing(null); // FINISHING => FINISHED event RMAppEventType.KILL @@ -695,7 +699,7 @@ public class TestRMAppTransitions { new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL); application.handle(event); rmDispatcher.await(); - assertAppState(RMAppState.FINISHED, application); + assertAppState(RMAppState.FINISHING, application); } // While App is at FINAL_SAVING, Attempt_Finished event may come before @@ -780,6 +784,7 @@ public class TestRMAppTransitions { new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL); application.handle(event); rmDispatcher.await(); + sendAttemptUpdateSavedEvent(application); sendAppUpdateSavedEvent(application); assertTimesAtFinish(application); assertAppState(RMAppState.KILLED, application); @@ -801,14 +806,6 @@ public class TestRMAppTransitions { assertTimesAtFinish(application); assertAppState(RMAppState.KILLED, application); - // KILLED => KILLED event RMAppEventType.ATTEMPT_KILLED - event = - new RMAppEvent(application.getApplicationId(), - RMAppEventType.ATTEMPT_KILLED); - application.handle(event); - rmDispatcher.await(); - assertTimesAtFinish(application); - assertAppState(RMAppState.KILLED, application); // KILLED => KILLED event RMAppEventType.KILL event = new RMAppEvent(application.getApplicationId(), RMAppEventType.KILL); From 44429c4bb3e9054648cbf02dcc0b5feb41d420f1 Mon Sep 17 00:00:00 2001 From: Jason Darrell Lowe Date: Tue, 17 Dec 2013 16:45:23 +0000 Subject: [PATCH 07/32] MAPREDUCE-5679. TestJobHistoryParsing has race condition. Contributed by Liyin Liang git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1551616 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 + .../v2/hs/TestJobHistoryParsing.java | 78 +++++++++---------- 2 files changed, 42 insertions(+), 39 deletions(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 08e1c2cc21d..39475c193aa 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -240,6 +240,9 @@ Release 2.4.0 - UNRELEASED MAPREDUCE-5623. TestJobCleanup fails because of RejectedExecutionException and NPE. (jlowe) + MAPREDUCE-5679. TestJobHistoryParsing has race condition (Liyin Liang via + jlowe) + Release 2.3.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java index 86d5016a67b..382c1971f6c 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/src/test/java/org/apache/hadoop/mapreduce/v2/hs/TestJobHistoryParsing.java @@ -339,8 +339,11 @@ public class TestJobHistoryParsing { PrintStream stdps = System.out; try { System.setOut(new PrintStream(outContent)); - HistoryViewer viewer = new HistoryViewer(fc.makeQualified( - fileInfo.getHistoryFile()).toString(), conf, true); + HistoryViewer viewer; + synchronized (fileInfo) { + viewer = new HistoryViewer(fc.makeQualified( + fileInfo.getHistoryFile()).toString(), conf, true); + } viewer.print(); for (TaskInfo taskInfo : allTasks.values()) { @@ -397,29 +400,27 @@ public class TestJobHistoryParsing { // make sure all events are flushed app.waitForState(Service.STATE.STOPPED); - String jobhistoryDir = JobHistoryUtils - .getHistoryIntermediateDoneDirForUser(conf); JobHistory jobHistory = new JobHistory(); jobHistory.init(conf); + HistoryFileInfo fileInfo = jobHistory.getJobFileInfo(jobId); + + JobHistoryParser parser; + JobInfo jobInfo; + synchronized (fileInfo) { + Path historyFilePath = fileInfo.getHistoryFile(); + FSDataInputStream in = null; + FileContext fc = null; + try { + fc = FileContext.getFileContext(conf); + in = fc.open(fc.makeQualified(historyFilePath)); + } catch (IOException ioe) { + LOG.info("Can not open history file: " + historyFilePath, ioe); + throw (new Exception("Can not open History File")); + } - JobIndexInfo jobIndexInfo = jobHistory.getJobFileInfo(jobId) - .getJobIndexInfo(); - String jobhistoryFileName = FileNameIndexUtils - .getDoneFileName(jobIndexInfo); - - Path historyFilePath = new Path(jobhistoryDir, jobhistoryFileName); - FSDataInputStream in = null; - FileContext fc = null; - try { - fc = FileContext.getFileContext(conf); - in = fc.open(fc.makeQualified(historyFilePath)); - } catch (IOException ioe) { - LOG.info("Can not open history file: " + historyFilePath, ioe); - throw (new Exception("Can not open History File")); + parser = new JobHistoryParser(in); + jobInfo = parser.parse(); } - - JobHistoryParser parser = new JobHistoryParser(in); - JobInfo jobInfo = parser.parse(); Exception parseException = parser.getParseException(); Assert.assertNull("Caught an expected exception " + parseException, parseException); @@ -464,29 +465,28 @@ public class TestJobHistoryParsing { // make sure all events are flushed app.waitForState(Service.STATE.STOPPED); - String jobhistoryDir = JobHistoryUtils - .getHistoryIntermediateDoneDirForUser(conf); JobHistory jobHistory = new JobHistory(); jobHistory.init(conf); - JobIndexInfo jobIndexInfo = jobHistory.getJobFileInfo(jobId) - .getJobIndexInfo(); - String jobhistoryFileName = FileNameIndexUtils - .getDoneFileName(jobIndexInfo); + HistoryFileInfo fileInfo = jobHistory.getJobFileInfo(jobId); + + JobHistoryParser parser; + JobInfo jobInfo; + synchronized (fileInfo) { + Path historyFilePath = fileInfo.getHistoryFile(); + FSDataInputStream in = null; + FileContext fc = null; + try { + fc = FileContext.getFileContext(conf); + in = fc.open(fc.makeQualified(historyFilePath)); + } catch (IOException ioe) { + LOG.info("Can not open history file: " + historyFilePath, ioe); + throw (new Exception("Can not open History File")); + } - Path historyFilePath = new Path(jobhistoryDir, jobhistoryFileName); - FSDataInputStream in = null; - FileContext fc = null; - try { - fc = FileContext.getFileContext(conf); - in = fc.open(fc.makeQualified(historyFilePath)); - } catch (IOException ioe) { - LOG.info("Can not open history file: " + historyFilePath, ioe); - throw (new Exception("Can not open History File")); + parser = new JobHistoryParser(in); + jobInfo = parser.parse(); } - - JobHistoryParser parser = new JobHistoryParser(in); - JobInfo jobInfo = parser.parse(); Exception parseException = parser.getParseException(); Assert.assertNull("Caught an expected exception " + parseException, parseException); From 926a86780d26e87b8919b527d114ed48855b37a7 Mon Sep 17 00:00:00 2001 From: Suresh Srinivas Date: Tue, 17 Dec 2013 18:18:15 +0000 Subject: [PATCH 08/32] HADOOP-10168. fix javadoc of ReflectionUtils#copy. Contributed by Thejas Nair. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1551646 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-common-project/hadoop-common/CHANGES.txt | 6 ++++-- .../main/java/org/apache/hadoop/util/ReflectionUtils.java | 5 +++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 49567607447..b31423bb5b2 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -397,12 +397,14 @@ Release 2.4.0 - UNRELEASED HADOOP-10102. Update commons IO from 2.1 to 2.4 (Akira Ajisaka via stevel) + HADOOP-10168. fix javadoc of ReflectionUtils#copy. (Thejas Nair via suresh) + OPTIMIZATIONS HADOOP-9748. Reduce blocking on UGI.ensureInitialized (daryn) - HADOOP-10047. Add a direct-buffer based apis for compression. (Gopal V - via acmurthy) + HADOOP-10047. Add a direct-buffer based apis for compression. (Gopal V + via acmurthy) BUG FIXES diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ReflectionUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ReflectionUtils.java index be63c816201..3977e60287a 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ReflectionUtils.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/ReflectionUtils.java @@ -275,8 +275,9 @@ public class ReflectionUtils { /** * Make a copy of the writable object using serialization to a buffer - * @param dst the object to copy from - * @param src the object to copy into, which is destroyed + * @param src the object to copy from + * @param dst the object to copy into, which is destroyed + * @return dst param (the copy) * @throws IOException */ @SuppressWarnings("unchecked") From 991c453ca3ac141a3f286f74af8401f83c38b230 Mon Sep 17 00:00:00 2001 From: Colin McCabe Date: Tue, 17 Dec 2013 18:47:04 +0000 Subject: [PATCH 09/32] HDFS-5431. Support cachepool-based limit management in path-based caching. (awang via cmccabe) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1551651 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../dev-support/findbugsExcludeFile.xml | 9 +- .../java/org/apache/hadoop/fs/CacheFlag.java | 44 ++ .../org/apache/hadoop/hdfs/DFSClient.java | 9 +- .../hadoop/hdfs/DistributedFileSystem.java | 33 +- .../apache/hadoop/hdfs/client/HdfsAdmin.java | 21 +- .../hadoop/hdfs/protocol/CachePoolInfo.java | 92 +-- .../hadoop/hdfs/protocol/CachePoolStats.java | 21 +- .../hadoop/hdfs/protocol/ClientProtocol.java | 15 +- ...amenodeProtocolServerSideTranslatorPB.java | 11 +- .../ClientNamenodeProtocolTranslatorPB.java | 28 +- .../hadoop/hdfs/protocolPB/PBHelper.java | 32 +- .../CacheReplicationMonitor.java | 227 ++++-- .../hdfs/server/namenode/CacheManager.java | 285 +++++--- .../hdfs/server/namenode/CachePool.java | 42 +- .../hdfs/server/namenode/FSEditLogLoader.java | 4 +- .../hdfs/server/namenode/FSEditLogOp.java | 192 +----- .../server/namenode/FSImageSerialization.java | 205 ++++++ .../hdfs/server/namenode/FSNamesystem.java | 13 +- .../server/namenode/NameNodeRpcServer.java | 10 +- .../apache/hadoop/hdfs/tools/CacheAdmin.java | 117 ++-- .../main/proto/ClientNamenodeProtocol.proto | 13 +- .../org/apache/hadoop/hdfs/DFSTestUtil.java | 6 +- .../namenode/OfflineEditsViewerHelper.java | 2 +- .../server/namenode/TestCacheDirectives.java | 652 +++++++++--------- .../namenode/ha/TestRetryCacheWithHA.java | 17 +- .../src/test/resources/editsStored | Bin 4627 -> 4599 bytes .../src/test/resources/editsStored.xml | 186 +++-- .../src/test/resources/testCacheAdminConf.xml | 16 +- 29 files changed, 1367 insertions(+), 938 deletions(-) create mode 100644 hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/fs/CacheFlag.java diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index a1024e7eada..264cc411fca 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -253,6 +253,9 @@ Trunk (Unreleased) INodeDirectoryWithSnapshot with DirectoryWithSnapshotFeature. (jing9 via szetszwo) + HDFS-5431. Support cachepool-based limit management in path-based caching + (awang via cmccabe) + OPTIMIZATIONS HDFS-5349. DNA_CACHE and DNA_UNCACHE should be by blockId only. (cmccabe) diff --git a/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml b/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml index 1245ebba61c..f97110705ac 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/dev-support/findbugsExcludeFile.xml @@ -357,16 +357,9 @@ - - - - - - - - + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/fs/CacheFlag.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/fs/CacheFlag.java new file mode 100644 index 00000000000..f76fcaa23e7 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/fs/CacheFlag.java @@ -0,0 +1,44 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * Specifies semantics for CacheDirective operations. Multiple flags can + * be combined in an EnumSet. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public enum CacheFlag { + + /** + * Ignore cache pool resource limits when performing this operation. + */ + FORCE((short) 0x01); + private final short mode; + + private CacheFlag(short mode) { + this.mode = mode; + } + + short getMode() { + return mode; + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java index f008878e48e..49d197f74bc 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java @@ -85,6 +85,7 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.BlockStorageLocation; +import org.apache.hadoop.fs.CacheFlag; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.CreateFlag; @@ -2295,20 +2296,20 @@ public class DFSClient implements java.io.Closeable { } public long addCacheDirective( - CacheDirectiveInfo info) throws IOException { + CacheDirectiveInfo info, EnumSet flags) throws IOException { checkOpen(); try { - return namenode.addCacheDirective(info); + return namenode.addCacheDirective(info, flags); } catch (RemoteException re) { throw re.unwrapRemoteException(); } } public void modifyCacheDirective( - CacheDirectiveInfo info) throws IOException { + CacheDirectiveInfo info, EnumSet flags) throws IOException { checkOpen(); try { - namenode.modifyCacheDirective(info); + namenode.modifyCacheDirective(info, flags); } catch (RemoteException re) { throw re.unwrapRemoteException(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java index be44c13aa52..45c10b99ad7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DistributedFileSystem.java @@ -31,6 +31,7 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.BlockStorageLocation; +import org.apache.hadoop.fs.CacheFlag; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.FSDataInputStream; @@ -1585,40 +1586,56 @@ public class DistributedFileSystem extends FileSystem { }.resolve(this, absF); } + /** + * @see {@link #addCacheDirective(CacheDirectiveInfo, EnumSet)} + */ + public long addCacheDirective(CacheDirectiveInfo info) throws IOException { + return addCacheDirective(info, EnumSet.noneOf(CacheFlag.class)); + } + /** * Add a new CacheDirective. * * @param info Information about a directive to add. + * @param flags {@link CacheFlag}s to use for this operation. * @return the ID of the directive that was created. * @throws IOException if the directive could not be added */ public long addCacheDirective( - CacheDirectiveInfo info) throws IOException { + CacheDirectiveInfo info, EnumSet flags) throws IOException { Preconditions.checkNotNull(info.getPath()); Path path = new Path(getPathName(fixRelativePart(info.getPath()))). makeQualified(getUri(), getWorkingDirectory()); return dfs.addCacheDirective( new CacheDirectiveInfo.Builder(info). setPath(path). - build()); + build(), + flags); } - + + /** + * @see {@link #modifyCacheDirective(CacheDirectiveInfo, EnumSet)} + */ + public void modifyCacheDirective(CacheDirectiveInfo info) throws IOException { + modifyCacheDirective(info, EnumSet.noneOf(CacheFlag.class)); + } + /** * Modify a CacheDirective. * - * @param info Information about the directive to modify. - * You must set the ID to indicate which CacheDirective you want - * to modify. + * @param info Information about the directive to modify. You must set the ID + * to indicate which CacheDirective you want to modify. + * @param flags {@link CacheFlag}s to use for this operation. * @throws IOException if the directive could not be modified */ public void modifyCacheDirective( - CacheDirectiveInfo info) throws IOException { + CacheDirectiveInfo info, EnumSet flags) throws IOException { if (info.getPath() != null) { info = new CacheDirectiveInfo.Builder(info). setPath(new Path(getPathName(fixRelativePart(info.getPath()))). makeQualified(getUri(), getWorkingDirectory())).build(); } - dfs.modifyCacheDirective(info); + dfs.modifyCacheDirective(info, flags); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/HdfsAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/HdfsAdmin.java index da6fa9c0de7..0f0769e302c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/HdfsAdmin.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/client/HdfsAdmin.java @@ -19,10 +19,12 @@ package org.apache.hadoop.hdfs.client; import java.io.IOException; import java.net.URI; +import java.util.EnumSet; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CacheFlag; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; @@ -131,25 +133,26 @@ public class HdfsAdmin { * Add a new CacheDirectiveInfo. * * @param info Information about a directive to add. + * @param flags {@link CacheFlag}s to use for this operation. * @return the ID of the directive that was created. * @throws IOException if the directive could not be added */ - public long addCacheDirective(CacheDirectiveInfo info) - throws IOException { - return dfs.addCacheDirective(info); + public long addCacheDirective(CacheDirectiveInfo info, + EnumSet flags) throws IOException { + return dfs.addCacheDirective(info, flags); } /** * Modify a CacheDirective. * - * @param info Information about the directive to modify. - * You must set the ID to indicate which CacheDirective you want - * to modify. + * @param info Information about the directive to modify. You must set the ID + * to indicate which CacheDirective you want to modify. + * @param flags {@link CacheFlag}s to use for this operation. * @throws IOException if the directive could not be modified */ - public void modifyCacheDirective(CacheDirectiveInfo info) - throws IOException { - dfs.modifyCacheDirective(info); + public void modifyCacheDirective(CacheDirectiveInfo info, + EnumSet flags) throws IOException { + dfs.modifyCacheDirective(info, flags); } /** diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/CachePoolInfo.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/CachePoolInfo.java index a7c15ecfb22..98a7dd8e202 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/CachePoolInfo.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/CachePoolInfo.java @@ -18,8 +18,6 @@ package org.apache.hadoop.hdfs.protocol; -import java.io.DataInput; -import java.io.DataOutput; import java.io.IOException; import javax.annotation.Nullable; @@ -32,14 +30,6 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.InvalidRequestException; import org.apache.hadoop.fs.permission.FsPermission; -import org.apache.hadoop.fs.permission.PermissionStatus; -import org.apache.hadoop.hdfs.server.namenode.FSEditLogOp; -import org.apache.hadoop.hdfs.util.XMLUtils; -import org.apache.hadoop.hdfs.util.XMLUtils.InvalidXmlException; -import org.apache.hadoop.hdfs.util.XMLUtils.Stanza; -import org.apache.hadoop.io.Text; -import org.xml.sax.ContentHandler; -import org.xml.sax.SAXException; /** * CachePoolInfo describes a cache pool. @@ -64,7 +54,7 @@ public class CachePoolInfo { FsPermission mode; @Nullable - Integer weight; + Long limit; public CachePoolInfo(String poolName) { this.poolName = poolName; @@ -101,12 +91,12 @@ public class CachePoolInfo { return this; } - public Integer getWeight() { - return weight; + public Long getLimit() { + return limit; } - public CachePoolInfo setWeight(Integer weight) { - this.weight = weight; + public CachePoolInfo setLimit(Long bytes) { + this.limit = bytes; return this; } @@ -117,7 +107,7 @@ public class CachePoolInfo { append(", groupName:").append(groupName). append(", mode:").append((mode == null) ? "null" : String.format("0%03o", mode.toShort())). - append(", weight:").append(weight). + append(", limit:").append(limit). append("}").toString(); } @@ -134,7 +124,7 @@ public class CachePoolInfo { append(ownerName, other.ownerName). append(groupName, other.groupName). append(mode, other.mode). - append(weight, other.weight). + append(limit, other.limit). isEquals(); } @@ -145,7 +135,7 @@ public class CachePoolInfo { append(ownerName). append(groupName). append(mode). - append(weight). + append(limit). hashCode(); } @@ -153,8 +143,8 @@ public class CachePoolInfo { if (info == null) { throw new InvalidRequestException("CachePoolInfo is null"); } - if ((info.getWeight() != null) && (info.getWeight() < 0)) { - throw new InvalidRequestException("CachePool weight is negative."); + if ((info.getLimit() != null) && (info.getLimit() < 0)) { + throw new InvalidRequestException("Limit is negative."); } validateName(info.poolName); } @@ -167,66 +157,4 @@ public class CachePoolInfo { throw new IOException("invalid empty cache pool name"); } } - - public static CachePoolInfo readFrom(DataInput in) throws IOException { - String poolName = Text.readString(in); - CachePoolInfo info = new CachePoolInfo(poolName); - if (in.readBoolean()) { - info.setOwnerName(Text.readString(in)); - } - if (in.readBoolean()) { - info.setGroupName(Text.readString(in)); - } - if (in.readBoolean()) { - info.setMode(FsPermission.read(in)); - } - if (in.readBoolean()) { - info.setWeight(in.readInt()); - } - return info; - } - - public void writeTo(DataOutput out) throws IOException { - Text.writeString(out, poolName); - boolean hasOwner, hasGroup, hasMode, hasWeight; - hasOwner = ownerName != null; - hasGroup = groupName != null; - hasMode = mode != null; - hasWeight = weight != null; - out.writeBoolean(hasOwner); - if (hasOwner) { - Text.writeString(out, ownerName); - } - out.writeBoolean(hasGroup); - if (hasGroup) { - Text.writeString(out, groupName); - } - out.writeBoolean(hasMode); - if (hasMode) { - mode.write(out); - } - out.writeBoolean(hasWeight); - if (hasWeight) { - out.writeInt(weight); - } - } - - public void writeXmlTo(ContentHandler contentHandler) throws SAXException { - XMLUtils.addSaxString(contentHandler, "POOLNAME", poolName); - PermissionStatus perm = new PermissionStatus(ownerName, - groupName, mode); - FSEditLogOp.permissionStatusToXml(contentHandler, perm); - XMLUtils.addSaxString(contentHandler, "WEIGHT", Integer.toString(weight)); - } - - public static CachePoolInfo readXmlFrom(Stanza st) throws InvalidXmlException { - String poolName = st.getValue("POOLNAME"); - PermissionStatus perm = FSEditLogOp.permissionStatusFromXml(st); - int weight = Integer.parseInt(st.getValue("WEIGHT")); - return new CachePoolInfo(poolName). - setOwnerName(perm.getUserName()). - setGroupName(perm.getGroupName()). - setMode(perm.getPermission()). - setWeight(weight); - } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/CachePoolStats.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/CachePoolStats.java index c205c15be00..c552652ceb1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/CachePoolStats.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/CachePoolStats.java @@ -30,6 +30,7 @@ public class CachePoolStats { public static class Builder { private long bytesNeeded; private long bytesCached; + private long bytesOverlimit; private long filesNeeded; private long filesCached; @@ -46,6 +47,11 @@ public class CachePoolStats { return this; } + public Builder setBytesOverlimit(long bytesOverlimit) { + this.bytesOverlimit = bytesOverlimit; + return this; + } + public Builder setFilesNeeded(long filesNeeded) { this.filesNeeded = filesNeeded; return this; @@ -57,20 +63,22 @@ public class CachePoolStats { } public CachePoolStats build() { - return new CachePoolStats(bytesNeeded, bytesCached, filesNeeded, - filesCached); + return new CachePoolStats(bytesNeeded, bytesCached, bytesOverlimit, + filesNeeded, filesCached); } }; private final long bytesNeeded; private final long bytesCached; + private final long bytesOverlimit; private final long filesNeeded; private final long filesCached; - private CachePoolStats(long bytesNeeded, long bytesCached, long filesNeeded, - long filesCached) { + private CachePoolStats(long bytesNeeded, long bytesCached, + long bytesOverlimit, long filesNeeded, long filesCached) { this.bytesNeeded = bytesNeeded; this.bytesCached = bytesCached; + this.bytesOverlimit = bytesOverlimit; this.filesNeeded = filesNeeded; this.filesCached = filesCached; } @@ -83,6 +91,10 @@ public class CachePoolStats { return bytesCached; } + public long getBytesOverlimit() { + return bytesOverlimit; + } + public long getFilesNeeded() { return filesNeeded; } @@ -95,6 +107,7 @@ public class CachePoolStats { return new StringBuilder().append("{"). append("bytesNeeded:").append(bytesNeeded). append(", bytesCached:").append(bytesCached). + append(", bytesOverlimit:").append(bytesOverlimit). append(", filesNeeded:").append(filesNeeded). append(", filesCached:").append(filesCached). append("}").toString(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java index 512ca2a4969..18751a2246a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocol/ClientProtocol.java @@ -19,9 +19,11 @@ package org.apache.hadoop.hdfs.protocol; import java.io.FileNotFoundException; import java.io.IOException; +import java.util.EnumSet; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.CacheFlag; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.FileAlreadyExistsException; @@ -1100,23 +1102,24 @@ public interface ClientProtocol { * Add a CacheDirective to the CacheManager. * * @param directive A CacheDirectiveInfo to be added + * @param flags {@link CacheFlag}s to use for this operation. * @return A CacheDirectiveInfo associated with the added directive * @throws IOException if the directive could not be added */ @AtMostOnce - public long addCacheDirective( - CacheDirectiveInfo directive) throws IOException; + public long addCacheDirective(CacheDirectiveInfo directive, + EnumSet flags) throws IOException; /** * Modify a CacheDirective in the CacheManager. * - * @return directive The directive to modify. Must contain - * a directive ID. + * @return directive The directive to modify. Must contain a directive ID. + * @param flags {@link CacheFlag}s to use for this operation. * @throws IOException if the directive could not be modified */ @AtMostOnce - public void modifyCacheDirective( - CacheDirectiveInfo directive) throws IOException; + public void modifyCacheDirective(CacheDirectiveInfo directive, + EnumSet flags) throws IOException; /** * Remove a CacheDirectiveInfo from the CacheManager. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java index 79f2c1f4ba2..c8ab938044d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolServerSideTranslatorPB.java @@ -320,7 +320,7 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements try { HdfsFileStatus result = server.create(req.getSrc(), PBHelper.convert(req.getMasked()), req.getClientName(), - PBHelper.convert(req.getCreateFlag()), req.getCreateParent(), + PBHelper.convertCreateFlag(req.getCreateFlag()), req.getCreateParent(), (short) req.getReplication(), req.getBlockSize()); if (result != null) { @@ -1034,9 +1034,11 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements RpcController controller, AddCacheDirectiveRequestProto request) throws ServiceException { try { + long id = server.addCacheDirective( + PBHelper.convert(request.getInfo()), + PBHelper.convertCacheFlags(request.getCacheFlags())); return AddCacheDirectiveResponseProto.newBuilder(). - setId(server.addCacheDirective( - PBHelper.convert(request.getInfo()))).build(); + setId(id).build(); } catch (IOException e) { throw new ServiceException(e); } @@ -1048,7 +1050,8 @@ public class ClientNamenodeProtocolServerSideTranslatorPB implements throws ServiceException { try { server.modifyCacheDirective( - PBHelper.convert(request.getInfo())); + PBHelper.convert(request.getInfo()), + PBHelper.convertCacheFlags(request.getCacheFlags())); return ModifyCacheDirectiveResponseProto.newBuilder().build(); } catch (IOException e) { throw new ServiceException(e); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java index 773f6fdd80b..9b303f22cdf 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/ClientNamenodeProtocolTranslatorPB.java @@ -21,10 +21,12 @@ import java.io.Closeable; import java.io.FileNotFoundException; import java.io.IOException; import java.util.Arrays; +import java.util.EnumSet; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.BatchedRemoteIterator.BatchedEntries; +import org.apache.hadoop.fs.CacheFlag; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.FileAlreadyExistsException; @@ -1003,24 +1005,32 @@ public class ClientNamenodeProtocolTranslatorPB implements } @Override - public long addCacheDirective( - CacheDirectiveInfo directive) throws IOException { + public long addCacheDirective(CacheDirectiveInfo directive, + EnumSet flags) throws IOException { try { - return rpcProxy.addCacheDirective(null, - AddCacheDirectiveRequestProto.newBuilder(). - setInfo(PBHelper.convert(directive)).build()).getId(); + AddCacheDirectiveRequestProto.Builder builder = + AddCacheDirectiveRequestProto.newBuilder(). + setInfo(PBHelper.convert(directive)); + if (!flags.isEmpty()) { + builder.setCacheFlags(PBHelper.convertCacheFlags(flags)); + } + return rpcProxy.addCacheDirective(null, builder.build()).getId(); } catch (ServiceException e) { throw ProtobufHelper.getRemoteException(e); } } @Override - public void modifyCacheDirective( - CacheDirectiveInfo directive) throws IOException { + public void modifyCacheDirective(CacheDirectiveInfo directive, + EnumSet flags) throws IOException { try { - rpcProxy.modifyCacheDirective(null, + ModifyCacheDirectiveRequestProto.Builder builder = ModifyCacheDirectiveRequestProto.newBuilder(). - setInfo(PBHelper.convert(directive)).build()); + setInfo(PBHelper.convert(directive)); + if (!flags.isEmpty()) { + builder.setCacheFlags(PBHelper.convertCacheFlags(flags)); + } + rpcProxy.modifyCacheDirective(null, builder.build()); } catch (ServiceException e) { throw ProtobufHelper.getRemoteException(e); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java index 544836a9cb4..b7898da1e44 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java @@ -27,6 +27,7 @@ import java.util.Arrays; import java.util.EnumSet; import java.util.List; +import org.apache.hadoop.fs.CacheFlag; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.FsServerDefaults; @@ -64,6 +65,7 @@ import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveEntryProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveInfoExpirationProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheDirectiveStatsProto; +import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CacheFlagProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolEntryProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolInfoProto; import org.apache.hadoop.hdfs.protocol.proto.ClientNamenodeProtocolProtos.CachePoolStatsProto; @@ -1182,7 +1184,7 @@ public class PBHelper { return value; } - public static EnumSetWritable convert(int flag) { + public static EnumSetWritable convertCreateFlag(int flag) { EnumSet result = EnumSet.noneOf(CreateFlag.class); if ((flag & CreateFlagProto.APPEND_VALUE) == CreateFlagProto.APPEND_VALUE) { @@ -1197,7 +1199,23 @@ public class PBHelper { } return new EnumSetWritable(result); } - + + public static int convertCacheFlags(EnumSet flags) { + int value = 0; + if (flags.contains(CacheFlag.FORCE)) { + value |= CacheFlagProto.FORCE.getNumber(); + } + return value; + } + + public static EnumSet convertCacheFlags(int flags) { + EnumSet result = EnumSet.noneOf(CacheFlag.class); + if ((flags & CacheFlagProto.FORCE_VALUE) == CacheFlagProto.FORCE_VALUE) { + result.add(CacheFlag.FORCE); + } + return result; + } + public static HdfsFileStatus convert(HdfsFileStatusProto fs) { if (fs == null) return null; @@ -1795,8 +1813,8 @@ public class PBHelper { if (info.getMode() != null) { builder.setMode(info.getMode().toShort()); } - if (info.getWeight() != null) { - builder.setWeight(info.getWeight()); + if (info.getLimit() != null) { + builder.setLimit(info.getLimit()); } return builder.build(); } @@ -1814,8 +1832,8 @@ public class PBHelper { if (proto.hasMode()) { info.setMode(new FsPermission((short)proto.getMode())); } - if (proto.hasWeight()) { - info.setWeight(proto.getWeight()); + if (proto.hasLimit()) { + info.setLimit(proto.getLimit()); } return info; } @@ -1824,6 +1842,7 @@ public class PBHelper { CachePoolStatsProto.Builder builder = CachePoolStatsProto.newBuilder(); builder.setBytesNeeded(stats.getBytesNeeded()); builder.setBytesCached(stats.getBytesCached()); + builder.setBytesOverlimit(stats.getBytesOverlimit()); builder.setFilesNeeded(stats.getFilesNeeded()); builder.setFilesCached(stats.getFilesCached()); return builder.build(); @@ -1833,6 +1852,7 @@ public class PBHelper { CachePoolStats.Builder builder = new CachePoolStats.Builder(); builder.setBytesNeeded(proto.getBytesNeeded()); builder.setBytesCached(proto.getBytesCached()); + builder.setBytesOverlimit(proto.getBytesOverlimit()); builder.setFilesNeeded(proto.getFilesNeeded()); builder.setFilesCached(proto.getFilesCached()); return builder.build(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationMonitor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationMonitor.java index a36dc84e741..c3ae8881c3f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationMonitor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/CacheReplicationMonitor.java @@ -27,6 +27,9 @@ import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Random; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.Condition; +import java.util.concurrent.locks.ReentrantLock; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -48,6 +51,8 @@ import org.apache.hadoop.hdfs.util.ReadOnlyList; import org.apache.hadoop.util.GSet; import org.apache.hadoop.util.Time; +import com.google.common.base.Preconditions; + /** * Scans the namesystem, scheduling blocks to be cached as appropriate. * @@ -79,26 +84,53 @@ public class CacheReplicationMonitor extends Thread implements Closeable { private final long intervalMs; /** - * True if we should rescan immediately, regardless of how much time - * elapsed since the previous scan. + * The CacheReplicationMonitor (CRM) lock. Used to synchronize starting and + * waiting for rescan operations. */ - private boolean rescanImmediately; + private final ReentrantLock lock = new ReentrantLock(); + + /** + * Notifies the scan thread that an immediate rescan is needed. + */ + private final Condition doRescan = lock.newCondition(); + + /** + * Notifies waiting threads that a rescan has finished. + */ + private final Condition scanFinished = lock.newCondition(); + + /** + * Whether there are pending CacheManager operations that necessitate a + * CacheReplicationMonitor rescan. Protected by the CRM lock. + */ + private boolean needsRescan = true; + + /** + * Whether we are currently doing a rescan. Protected by the CRM lock. + */ + private boolean isScanning = false; + + /** + * The number of rescans completed. Used to wait for scans to finish. + * Protected by the CacheReplicationMonitor lock. + */ + private long scanCount = 0; + + /** + * True if this monitor should terminate. Protected by the CRM lock. + */ + private boolean shutdown = false; /** * The monotonic time at which the current scan started. */ - private long scanTimeMs; + private long startTimeMs; /** * Mark status of the current scan. */ private boolean mark = false; - /** - * True if this monitor should terminate. - */ - private boolean shutdown; - /** * Cache directives found in the previous scan. */ @@ -108,7 +140,7 @@ public class CacheReplicationMonitor extends Thread implements Closeable { * Blocks found in the previous scan. */ private long scannedBlocks; - + public CacheReplicationMonitor(FSNamesystem namesystem, CacheManager cacheManager, long intervalMs) { this.namesystem = namesystem; @@ -120,41 +152,60 @@ public class CacheReplicationMonitor extends Thread implements Closeable { @Override public void run() { - shutdown = false; - rescanImmediately = true; - scanTimeMs = 0; + startTimeMs = 0; LOG.info("Starting CacheReplicationMonitor with interval " + intervalMs + " milliseconds"); try { long curTimeMs = Time.monotonicNow(); while (true) { - synchronized(this) { + // Not all of the variables accessed here need the CRM lock, but take + // it anyway for simplicity + lock.lock(); + try { while (true) { if (shutdown) { LOG.info("Shutting down CacheReplicationMonitor"); return; } - if (rescanImmediately) { - LOG.info("Rescanning on request"); - rescanImmediately = false; + if (needsRescan) { + LOG.info("Rescanning because of pending operations"); break; } - long delta = (scanTimeMs + intervalMs) - curTimeMs; + long delta = (startTimeMs + intervalMs) - curTimeMs; if (delta <= 0) { - LOG.info("Rescanning after " + (curTimeMs - scanTimeMs) + + LOG.info("Rescanning after " + (curTimeMs - startTimeMs) + " milliseconds"); break; } - this.wait(delta); + doRescan.await(delta, TimeUnit.MILLISECONDS); curTimeMs = Time.monotonicNow(); } + } finally { + lock.unlock(); } - scanTimeMs = curTimeMs; + // Mark scan as started, clear needsRescan + lock.lock(); + try { + isScanning = true; + needsRescan = false; + } finally { + lock.unlock(); + } + startTimeMs = curTimeMs; mark = !mark; rescan(); curTimeMs = Time.monotonicNow(); + // Retake the CRM lock to update synchronization-related variables + lock.lock(); + try { + isScanning = false; + scanCount++; + scanFinished.signalAll(); + } finally { + lock.unlock(); + } LOG.info("Scanned " + scannedDirectives + " directive(s) and " + - scannedBlocks + " block(s) in " + (curTimeMs - scanTimeMs) + " " + + scannedBlocks + " block(s) in " + (curTimeMs - startTimeMs) + " " + "millisecond(s)."); } } catch (Throwable t) { @@ -164,15 +215,91 @@ public class CacheReplicationMonitor extends Thread implements Closeable { } /** - * Kick the monitor thread. - * - * If it is sleeping, it will wake up and start scanning. - * If it is currently scanning, it will finish the scan and immediately do - * another one. + * Similar to {@link CacheReplicationMonitor#waitForRescan()}, except it only + * waits if there are pending operations that necessitate a rescan as + * indicated by {@link #setNeedsRescan()}. + *

+ * Note that this call may release the FSN lock, so operations before and + * after are not necessarily atomic. */ - public synchronized void kick() { - rescanImmediately = true; - this.notifyAll(); + public void waitForRescanIfNeeded() { + lock.lock(); + try { + if (!needsRescan) { + return; + } + } finally { + lock.unlock(); + } + waitForRescan(); + } + + /** + * Waits for a rescan to complete. This doesn't guarantee consistency with + * pending operations, only relative recency, since it will not force a new + * rescan if a rescan is already underway. + *

+ * Note that this call will release the FSN lock, so operations before and + * after are not atomic. + */ + public void waitForRescan() { + // Drop the FSN lock temporarily and retake it after we finish waiting + // Need to handle both the read lock and the write lock + boolean retakeWriteLock = false; + if (namesystem.hasWriteLock()) { + namesystem.writeUnlock(); + retakeWriteLock = true; + } else if (namesystem.hasReadLock()) { + namesystem.readUnlock(); + } else { + // Expected to have at least one of the locks + Preconditions.checkState(false, + "Need to be holding either the read or write lock"); + } + // try/finally for retaking FSN lock + try { + lock.lock(); + // try/finally for releasing CRM lock + try { + // If no scan is already ongoing, mark the CRM as dirty and kick + if (!isScanning) { + needsRescan = true; + doRescan.signal(); + } + // Wait until the scan finishes and the count advances + final long startCount = scanCount; + while (startCount >= scanCount) { + try { + scanFinished.await(); + } catch (InterruptedException e) { + LOG.warn("Interrupted while waiting for CacheReplicationMonitor" + + " rescan", e); + break; + } + } + } finally { + lock.unlock(); + } + } finally { + if (retakeWriteLock) { + namesystem.writeLock(); + } else { + namesystem.readLock(); + } + } + } + + /** + * Indicates to the CacheReplicationMonitor that there have been CacheManager + * changes that require a rescan. + */ + public void setNeedsRescan() { + lock.lock(); + try { + this.needsRescan = true; + } finally { + lock.unlock(); + } } /** @@ -180,10 +307,14 @@ public class CacheReplicationMonitor extends Thread implements Closeable { */ @Override public void close() throws IOException { - synchronized(this) { + lock.lock(); + try { if (shutdown) return; shutdown = true; - this.notifyAll(); + doRescan.signalAll(); + scanFinished.signalAll(); + } finally { + lock.unlock(); } try { if (this.isAlive()) { @@ -228,12 +359,14 @@ public class CacheReplicationMonitor extends Thread implements Closeable { // Reset the directive's statistics directive.resetStatistics(); // Skip processing this entry if it has expired - LOG.info("Directive expiry is at " + directive.getExpiryTime()); + if (LOG.isTraceEnabled()) { + LOG.trace("Directive expiry is at " + directive.getExpiryTime()); + } if (directive.getExpiryTime() > 0 && directive.getExpiryTime() <= now) { if (LOG.isDebugEnabled()) { LOG.debug("Skipping directive id " + directive.getId() + " because it has expired (" + directive.getExpiryTime() + ">=" - + now); + + now + ")"); } continue; } @@ -280,15 +413,27 @@ public class CacheReplicationMonitor extends Thread implements Closeable { // Increment the "needed" statistics directive.addFilesNeeded(1); - long neededTotal = 0; - for (BlockInfo blockInfo : blockInfos) { - long neededByBlock = - directive.getReplication() * blockInfo.getNumBytes(); - neededTotal += neededByBlock; - } + // We don't cache UC blocks, don't add them to the total here + long neededTotal = file.computeFileSizeNotIncludingLastUcBlock() * + directive.getReplication(); directive.addBytesNeeded(neededTotal); - // TODO: Enforce per-pool quotas + // The pool's bytesNeeded is incremented as we scan. If the demand + // thus far plus the demand of this file would exceed the pool's limit, + // do not cache this file. + CachePool pool = directive.getPool(); + if (pool.getBytesNeeded() > pool.getLimit()) { + if (LOG.isDebugEnabled()) { + LOG.debug(String.format("Skipping directive id %d file %s because " + + "limit of pool %s would be exceeded (%d > %d)", + directive.getId(), + file.getFullPathName(), + pool.getPoolName(), + pool.getBytesNeeded(), + pool.getLimit())); + } + return; + } long cachedTotal = 0; for (BlockInfo blockInfo : blockInfos) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java index 94c62a9cbc1..82bb4e8f6a7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CacheManager.java @@ -27,11 +27,12 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS_DEFAULT; import java.io.DataInput; -import java.io.DataOutput; +import java.io.DataOutputStream; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.EnumSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; @@ -45,13 +46,16 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BatchedRemoteIterator.BatchedListEntries; +import org.apache.hadoop.fs.CacheFlag; import org.apache.hadoop.fs.InvalidRequestException; +import org.apache.hadoop.fs.UnresolvedLinkException; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.CacheDirective; import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry; import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo; +import org.apache.hadoop.hdfs.protocol.CacheDirectiveStats; import org.apache.hadoop.hdfs.protocol.CachePoolEntry; import org.apache.hadoop.hdfs.protocol.CachePoolInfo; import org.apache.hadoop.hdfs.protocol.DatanodeID; @@ -68,7 +72,7 @@ import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress; import org.apache.hadoop.hdfs.server.namenode.startupprogress.StartupProgress.Counter; import org.apache.hadoop.hdfs.server.namenode.startupprogress.Step; import org.apache.hadoop.hdfs.server.namenode.startupprogress.StepType; -import org.apache.hadoop.io.Text; +import org.apache.hadoop.hdfs.util.ReadOnlyList; import org.apache.hadoop.security.AccessControlException; import org.apache.hadoop.util.GSet; import org.apache.hadoop.util.LightWeightGSet; @@ -341,6 +345,67 @@ public final class CacheManager { return expiryTime; } + /** + * Throws an exception if the CachePool does not have enough capacity to + * cache the given path at the replication factor. + * + * @param pool CachePool where the path is being cached + * @param path Path that is being cached + * @param replication Replication factor of the path + * @throws InvalidRequestException if the pool does not have enough capacity + */ + private void checkLimit(CachePool pool, String path, + short replication) throws InvalidRequestException { + CacheDirectiveStats stats = computeNeeded(path, replication); + if (pool.getBytesNeeded() + (stats.getBytesNeeded() * replication) > pool + .getLimit()) { + throw new InvalidRequestException("Caching path " + path + " of size " + + stats.getBytesNeeded() / replication + " bytes at replication " + + replication + " would exceed pool " + pool.getPoolName() + + "'s remaining capacity of " + + (pool.getLimit() - pool.getBytesNeeded()) + " bytes."); + } + } + + /** + * Computes the needed number of bytes and files for a path. + * @return CacheDirectiveStats describing the needed stats for this path + */ + private CacheDirectiveStats computeNeeded(String path, short replication) { + FSDirectory fsDir = namesystem.getFSDirectory(); + INode node; + long requestedBytes = 0; + long requestedFiles = 0; + CacheDirectiveStats.Builder builder = new CacheDirectiveStats.Builder(); + try { + node = fsDir.getINode(path); + } catch (UnresolvedLinkException e) { + // We don't cache through symlinks + return builder.build(); + } + if (node == null) { + return builder.build(); + } + if (node.isFile()) { + requestedFiles = 1; + INodeFile file = node.asFile(); + requestedBytes = file.computeFileSize(); + } else if (node.isDirectory()) { + INodeDirectory dir = node.asDirectory(); + ReadOnlyList children = dir.getChildrenList(null); + requestedFiles = children.size(); + for (INode child : children) { + if (child.isFile()) { + requestedBytes += child.asFile().computeFileSize(); + } + } + } + return new CacheDirectiveStats.Builder() + .setBytesNeeded(requestedBytes) + .setFilesCached(requestedFiles) + .build(); + } + /** * Get a CacheDirective by ID, validating the ID and that the directive * exists. @@ -384,6 +449,15 @@ public final class CacheManager { directivesByPath.put(path, directives); } directives.add(directive); + // Fix up pool stats + CacheDirectiveStats stats = + computeNeeded(directive.getPath(), directive.getReplication()); + directive.addBytesNeeded(stats.getBytesNeeded()); + directive.addFilesNeeded(directive.getFilesNeeded()); + + if (monitor != null) { + monitor.setNeedsRescan(); + } } /** @@ -407,7 +481,7 @@ public final class CacheManager { } public CacheDirectiveInfo addDirective( - CacheDirectiveInfo info, FSPermissionChecker pc) + CacheDirectiveInfo info, FSPermissionChecker pc, EnumSet flags) throws IOException { assert namesystem.hasWriteLock(); CacheDirective directive; @@ -418,6 +492,14 @@ public final class CacheManager { short replication = validateReplication(info, (short)1); long expiryTime = validateExpiryTime(info, CacheDirectiveInfo.Expiration.EXPIRY_NEVER); + // Do quota validation if required + if (!flags.contains(CacheFlag.FORCE)) { + // Can't kick and wait if caching is disabled + if (monitor != null) { + monitor.waitForRescan(); + } + checkLimit(pool, path, replication); + } // All validation passed // Add a new entry with the next available ID. long id = getNextDirectiveId(); @@ -428,14 +510,11 @@ public final class CacheManager { throw e; } LOG.info("addDirective of " + info + " successful."); - if (monitor != null) { - monitor.kick(); - } return directive.toInfo(); } public void modifyDirective(CacheDirectiveInfo info, - FSPermissionChecker pc) throws IOException { + FSPermissionChecker pc, EnumSet flags) throws IOException { assert namesystem.hasWriteLock(); String idString = (info.getId() == null) ? @@ -463,6 +542,13 @@ public final class CacheManager { if (info.getPool() != null) { pool = getCachePool(validatePoolName(info)); checkWritePermission(pc, pool); + if (!flags.contains(CacheFlag.FORCE)) { + // Can't kick and wait if caching is disabled + if (monitor != null) { + monitor.waitForRescan(); + } + checkLimit(pool, path, replication); + } } removeInternal(prevEntry); CacheDirective newEntry = @@ -489,9 +575,18 @@ public final class CacheManager { if (directives.size() == 0) { directivesByPath.remove(path); } + // Fix up the stats from removing the pool + final CachePool pool = directive.getPool(); + directive.addBytesNeeded(-directive.getBytesNeeded()); + directive.addFilesNeeded(-directive.getFilesNeeded()); + directivesById.remove(directive.getId()); - directive.getPool().getDirectiveList().remove(directive); + pool.getDirectiveList().remove(directive); assert directive.getPool() == null; + + if (monitor != null) { + monitor.setNeedsRescan(); + } } public void removeDirective(long id, FSPermissionChecker pc) @@ -505,9 +600,6 @@ public final class CacheManager { LOG.warn("removeDirective of " + id + " failed: ", e); throw e; } - if (monitor != null) { - monitor.kick(); - } LOG.info("removeDirective of " + id + " successful."); } @@ -527,6 +619,9 @@ public final class CacheManager { if (filter.getReplication() != null) { throw new IOException("Filtering by replication is unsupported."); } + if (monitor != null) { + monitor.waitForRescanIfNeeded(); + } ArrayList replies = new ArrayList(NUM_PRE_ALLOCATED_ENTRIES); int numReplies = 0; @@ -573,16 +668,22 @@ public final class CacheManager { public CachePoolInfo addCachePool(CachePoolInfo info) throws IOException { assert namesystem.hasWriteLock(); - CachePoolInfo.validate(info); - String poolName = info.getPoolName(); - CachePool pool = cachePools.get(poolName); - if (pool != null) { - throw new InvalidRequestException("Cache pool " + poolName - + " already exists."); + CachePool pool; + try { + CachePoolInfo.validate(info); + String poolName = info.getPoolName(); + pool = cachePools.get(poolName); + if (pool != null) { + throw new InvalidRequestException("Cache pool " + poolName + + " already exists."); + } + pool = CachePool.createFromInfoAndDefaults(info); + cachePools.put(pool.getPoolName(), pool); + } catch (IOException e) { + LOG.info("addCachePool of " + info + " failed: ", e); + throw e; } - pool = CachePool.createFromInfoAndDefaults(info); - cachePools.put(pool.getPoolName(), pool); - LOG.info("Created new cache pool " + pool); + LOG.info("addCachePool of " + info + " successful."); return pool.getInfo(true); } @@ -597,42 +698,51 @@ public final class CacheManager { public void modifyCachePool(CachePoolInfo info) throws IOException { assert namesystem.hasWriteLock(); - CachePoolInfo.validate(info); - String poolName = info.getPoolName(); - CachePool pool = cachePools.get(poolName); - if (pool == null) { - throw new InvalidRequestException("Cache pool " + poolName - + " does not exist."); - } StringBuilder bld = new StringBuilder(); - String prefix = ""; - if (info.getOwnerName() != null) { - pool.setOwnerName(info.getOwnerName()); - bld.append(prefix). - append("set owner to ").append(info.getOwnerName()); - prefix = "; "; + try { + CachePoolInfo.validate(info); + String poolName = info.getPoolName(); + CachePool pool = cachePools.get(poolName); + if (pool == null) { + throw new InvalidRequestException("Cache pool " + poolName + + " does not exist."); + } + String prefix = ""; + if (info.getOwnerName() != null) { + pool.setOwnerName(info.getOwnerName()); + bld.append(prefix). + append("set owner to ").append(info.getOwnerName()); + prefix = "; "; + } + if (info.getGroupName() != null) { + pool.setGroupName(info.getGroupName()); + bld.append(prefix). + append("set group to ").append(info.getGroupName()); + prefix = "; "; + } + if (info.getMode() != null) { + pool.setMode(info.getMode()); + bld.append(prefix).append("set mode to " + info.getMode()); + prefix = "; "; + } + if (info.getLimit() != null) { + pool.setLimit(info.getLimit()); + bld.append(prefix).append("set limit to " + info.getLimit()); + prefix = "; "; + // New limit changes stats, need to set needs refresh + if (monitor != null) { + monitor.setNeedsRescan(); + } + } + if (prefix.isEmpty()) { + bld.append("no changes."); + } + } catch (IOException e) { + LOG.info("modifyCachePool of " + info + " failed: ", e); + throw e; } - if (info.getGroupName() != null) { - pool.setGroupName(info.getGroupName()); - bld.append(prefix). - append("set group to ").append(info.getGroupName()); - prefix = "; "; - } - if (info.getMode() != null) { - pool.setMode(info.getMode()); - bld.append(prefix).append("set mode to " + info.getMode()); - prefix = "; "; - } - if (info.getWeight() != null) { - pool.setWeight(info.getWeight()); - bld.append(prefix). - append("set weight to ").append(info.getWeight()); - prefix = "; "; - } - if (prefix.isEmpty()) { - bld.append("no changes."); - } - LOG.info("modified " + poolName + "; " + bld.toString()); + LOG.info("modifyCachePool of " + info.getPoolName() + " successful; " + + bld.toString()); } /** @@ -646,28 +756,37 @@ public final class CacheManager { public void removeCachePool(String poolName) throws IOException { assert namesystem.hasWriteLock(); - CachePoolInfo.validateName(poolName); - CachePool pool = cachePools.remove(poolName); - if (pool == null) { - throw new InvalidRequestException( - "Cannot remove non-existent cache pool " + poolName); - } - // Remove all directives in this pool. - Iterator iter = pool.getDirectiveList().iterator(); - while (iter.hasNext()) { - CacheDirective directive = iter.next(); - directivesByPath.remove(directive.getPath()); - directivesById.remove(directive.getId()); - iter.remove(); - } - if (monitor != null) { - monitor.kick(); + try { + CachePoolInfo.validateName(poolName); + CachePool pool = cachePools.remove(poolName); + if (pool == null) { + throw new InvalidRequestException( + "Cannot remove non-existent cache pool " + poolName); + } + // Remove all directives in this pool. + Iterator iter = pool.getDirectiveList().iterator(); + while (iter.hasNext()) { + CacheDirective directive = iter.next(); + directivesByPath.remove(directive.getPath()); + directivesById.remove(directive.getId()); + iter.remove(); + } + if (monitor != null) { + monitor.setNeedsRescan(); + } + } catch (IOException e) { + LOG.info("removeCachePool of " + poolName + " failed: ", e); + throw e; } + LOG.info("removeCachePool of " + poolName + " successful."); } public BatchedListEntries listCachePools(FSPermissionChecker pc, String prevKey) { assert namesystem.hasReadLock(); + if (monitor != null) { + monitor.waitForRescanIfNeeded(); + } final int NUM_PRE_ALLOCATED_ENTRIES = 16; ArrayList results = new ArrayList(NUM_PRE_ALLOCATED_ENTRIES); @@ -782,7 +901,7 @@ public final class CacheManager { * @param sdPath path of the storage directory * @throws IOException */ - public void saveState(DataOutput out, String sdPath) + public void saveState(DataOutputStream out, String sdPath) throws IOException { out.writeLong(nextDirectiveId); savePools(out, sdPath); @@ -805,7 +924,7 @@ public final class CacheManager { /** * Save cache pools to fsimage */ - private void savePools(DataOutput out, + private void savePools(DataOutputStream out, String sdPath) throws IOException { StartupProgress prog = NameNode.getStartupProgress(); Step step = new Step(StepType.CACHE_POOLS, sdPath); @@ -814,7 +933,7 @@ public final class CacheManager { Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step); out.writeInt(cachePools.size()); for (CachePool pool: cachePools.values()) { - pool.getInfo(true).writeTo(out); + FSImageSerialization.writeCachePoolInfo(out, pool.getInfo(true)); counter.increment(); } prog.endStep(Phase.SAVING_CHECKPOINT, step); @@ -823,7 +942,7 @@ public final class CacheManager { /* * Save cache entries to fsimage */ - private void saveDirectives(DataOutput out, String sdPath) + private void saveDirectives(DataOutputStream out, String sdPath) throws IOException { StartupProgress prog = NameNode.getStartupProgress(); Step step = new Step(StepType.CACHE_ENTRIES, sdPath); @@ -832,11 +951,7 @@ public final class CacheManager { Counter counter = prog.getCounter(Phase.SAVING_CHECKPOINT, step); out.writeInt(directivesById.size()); for (CacheDirective directive : directivesById.values()) { - out.writeLong(directive.getId()); - Text.writeString(out, directive.getPath()); - out.writeShort(directive.getReplication()); - Text.writeString(out, directive.getPool().getPoolName()); - out.writeLong(directive.getExpiryTime()); + FSImageSerialization.writeCacheDirectiveInfo(out, directive.toInfo()); counter.increment(); } prog.endStep(Phase.SAVING_CHECKPOINT, step); @@ -854,7 +969,7 @@ public final class CacheManager { prog.setTotal(Phase.LOADING_FSIMAGE, step, numberOfPools); Counter counter = prog.getCounter(Phase.LOADING_FSIMAGE, step); for (int i = 0; i < numberOfPools; i++) { - addCachePool(CachePoolInfo.readFrom(in)); + addCachePool(FSImageSerialization.readCachePoolInfo(in)); counter.increment(); } prog.endStep(Phase.LOADING_FSIMAGE, step); @@ -871,19 +986,17 @@ public final class CacheManager { prog.setTotal(Phase.LOADING_FSIMAGE, step, numDirectives); Counter counter = prog.getCounter(Phase.LOADING_FSIMAGE, step); for (int i = 0; i < numDirectives; i++) { - long directiveId = in.readLong(); - String path = Text.readString(in); - short replication = in.readShort(); - String poolName = Text.readString(in); - long expiryTime = in.readLong(); + CacheDirectiveInfo info = FSImageSerialization.readCacheDirectiveInfo(in); // Get pool reference by looking it up in the map + final String poolName = info.getPool(); CachePool pool = cachePools.get(poolName); if (pool == null) { throw new IOException("Directive refers to pool " + poolName + ", which does not exist."); } CacheDirective directive = - new CacheDirective(directiveId, path, replication, expiryTime); + new CacheDirective(info.getId(), info.getPath().toUri().getPath(), + info.getReplication(), info.getExpiration().getAbsoluteMillis()); boolean addedDirective = pool.getDirectiveList().add(directive); assert addedDirective; if (directivesById.put(directive.getId(), directive) != null) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CachePool.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CachePool.java index 249ea66b1d1..3da7437acc9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CachePool.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/CachePool.java @@ -49,8 +49,8 @@ import com.google.common.base.Preconditions; public final class CachePool { public static final Log LOG = LogFactory.getLog(CachePool.class); - public static final int DEFAULT_WEIGHT = 100; - + public static final long DEFAULT_LIMIT = Long.MAX_VALUE; + @Nonnull private final String poolName; @@ -71,7 +71,10 @@ public final class CachePool { @Nonnull private FsPermission mode; - private int weight; + /** + * Maximum number of bytes that can be cached in this pool. + */ + private long limit; private long bytesNeeded; private long bytesCached; @@ -118,10 +121,10 @@ public final class CachePool { } FsPermission mode = (info.getMode() == null) ? FsPermission.getCachePoolDefault() : info.getMode(); - Integer weight = (info.getWeight() == null) ? - DEFAULT_WEIGHT : info.getWeight(); + long limit = info.getLimit() == null ? + DEFAULT_LIMIT : info.getLimit(); return new CachePool(info.getPoolName(), - ownerName, groupName, mode, weight); + ownerName, groupName, mode, limit); } /** @@ -131,11 +134,11 @@ public final class CachePool { static CachePool createFromInfo(CachePoolInfo info) { return new CachePool(info.getPoolName(), info.getOwnerName(), info.getGroupName(), - info.getMode(), info.getWeight()); + info.getMode(), info.getLimit()); } CachePool(String poolName, String ownerName, String groupName, - FsPermission mode, int weight) { + FsPermission mode, long limit) { Preconditions.checkNotNull(poolName); Preconditions.checkNotNull(ownerName); Preconditions.checkNotNull(groupName); @@ -144,7 +147,7 @@ public final class CachePool { this.ownerName = ownerName; this.groupName = groupName; this.mode = new FsPermission(mode); - this.weight = weight; + this.limit = limit; } public String getPoolName() { @@ -177,16 +180,16 @@ public final class CachePool { this.mode = new FsPermission(mode); return this; } - - public int getWeight() { - return weight; + + public long getLimit() { + return limit; } - public CachePool setWeight(int weight) { - this.weight = weight; + public CachePool setLimit(long bytes) { + this.limit = bytes; return this; } - + /** * Get either full or partial information about this CachePool. * @@ -204,7 +207,7 @@ public final class CachePool { return info.setOwnerName(ownerName). setGroupName(groupName). setMode(new FsPermission(mode)). - setWeight(weight); + setLimit(limit); } /** @@ -241,6 +244,10 @@ public final class CachePool { return bytesCached; } + public long getBytesOverlimit() { + return Math.max(bytesNeeded-limit, 0); + } + public long getFilesNeeded() { return filesNeeded; } @@ -258,6 +265,7 @@ public final class CachePool { return new CachePoolStats.Builder(). setBytesNeeded(bytesNeeded). setBytesCached(bytesCached). + setBytesOverlimit(getBytesOverlimit()). setFilesNeeded(filesNeeded). setFilesCached(filesCached). build(); @@ -291,7 +299,7 @@ public final class CachePool { append(", ownerName:").append(ownerName). append(", groupName:").append(groupName). append(", mode:").append(mode). - append(", weight:").append(weight). + append(", limit:").append(limit). append(" }").toString(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java index d9c091ba748..be328f71772 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java @@ -24,12 +24,14 @@ import java.io.IOException; import java.io.InputStream; import java.util.Arrays; import java.util.EnumMap; +import java.util.EnumSet; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.CacheFlag; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo; import org.apache.hadoop.hdfs.protocol.HdfsConstants; @@ -650,7 +652,7 @@ public class FSEditLogLoader { ModifyCacheDirectiveInfoOp modifyOp = (ModifyCacheDirectiveInfoOp) op; fsNamesys.getCacheManager().modifyDirective( - modifyOp.directive, null); + modifyOp.directive, null, EnumSet.of(CacheFlag.FORCE)); if (toAddRetryCache) { fsNamesys.addCacheEntry(op.rpcClientId, op.rpcCallId); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java index 5b81d3a7f7b..6a852c43678 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java @@ -64,7 +64,6 @@ import java.io.EOFException; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; -import java.util.Date; import java.util.EnumMap; import java.util.List; import java.util.zip.CheckedInputStream; @@ -76,7 +75,6 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.ChecksumException; import org.apache.hadoop.fs.Options.Rename; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.permission.PermissionStatus; import org.apache.hadoop.hdfs.DFSConfigKeys; @@ -2895,56 +2893,25 @@ public abstract class FSEditLogOp { @Override void readFields(DataInputStream in, int logVersion) throws IOException { - long id = FSImageSerialization.readLong(in); - String path = FSImageSerialization.readString(in); - short replication = FSImageSerialization.readShort(in); - String pool = FSImageSerialization.readString(in); - long expiryTime = FSImageSerialization.readLong(in); - directive = new CacheDirectiveInfo.Builder(). - setId(id). - setPath(new Path(path)). - setReplication(replication). - setPool(pool). - setExpiration(CacheDirectiveInfo.Expiration.newAbsolute(expiryTime)). - build(); + directive = FSImageSerialization.readCacheDirectiveInfo(in); readRpcIds(in, logVersion); } @Override public void writeFields(DataOutputStream out) throws IOException { - FSImageSerialization.writeLong(directive.getId(), out); - FSImageSerialization.writeString(directive.getPath().toUri().getPath(), out); - FSImageSerialization.writeShort(directive.getReplication(), out); - FSImageSerialization.writeString(directive.getPool(), out); - FSImageSerialization.writeLong( - directive.getExpiration().getMillis(), out); + FSImageSerialization.writeCacheDirectiveInfo(out, directive); writeRpcIds(rpcClientId, rpcCallId, out); } @Override protected void toXml(ContentHandler contentHandler) throws SAXException { - XMLUtils.addSaxString(contentHandler, "ID", - directive.getId().toString()); - XMLUtils.addSaxString(contentHandler, "PATH", - directive.getPath().toUri().getPath()); - XMLUtils.addSaxString(contentHandler, "REPLICATION", - Short.toString(directive.getReplication())); - XMLUtils.addSaxString(contentHandler, "POOL", directive.getPool()); - XMLUtils.addSaxString(contentHandler, "EXPIRATION", - "" + directive.getExpiration().getMillis()); + FSImageSerialization.writeCacheDirectiveInfo(contentHandler, directive); appendRpcIdsToXml(contentHandler, rpcClientId, rpcCallId); } @Override void fromXml(Stanza st) throws InvalidXmlException { - directive = new CacheDirectiveInfo.Builder(). - setId(Long.parseLong(st.getValue("ID"))). - setPath(new Path(st.getValue("PATH"))). - setReplication(Short.parseShort(st.getValue("REPLICATION"))). - setPool(st.getValue("POOL")). - setExpiration(CacheDirectiveInfo.Expiration.newAbsolute( - Long.parseLong(st.getValue("EXPIRATION")))). - build(); + directive = FSImageSerialization.readCacheDirectiveInfo(st); readRpcIdsFromXml(st); } @@ -2988,104 +2955,25 @@ public abstract class FSEditLogOp { @Override void readFields(DataInputStream in, int logVersion) throws IOException { - CacheDirectiveInfo.Builder builder = - new CacheDirectiveInfo.Builder(); - builder.setId(FSImageSerialization.readLong(in)); - byte flags = in.readByte(); - if ((flags & 0x1) != 0) { - builder.setPath(new Path(FSImageSerialization.readString(in))); - } - if ((flags & 0x2) != 0) { - builder.setReplication(FSImageSerialization.readShort(in)); - } - if ((flags & 0x4) != 0) { - builder.setPool(FSImageSerialization.readString(in)); - } - if ((flags & 0x8) != 0) { - builder.setExpiration( - CacheDirectiveInfo.Expiration.newAbsolute( - FSImageSerialization.readLong(in))); - } - if ((flags & ~0xF) != 0) { - throw new IOException("unknown flags set in " + - "ModifyCacheDirectiveInfoOp: " + flags); - } - this.directive = builder.build(); + this.directive = FSImageSerialization.readCacheDirectiveInfo(in); readRpcIds(in, logVersion); } @Override public void writeFields(DataOutputStream out) throws IOException { - FSImageSerialization.writeLong(directive.getId(), out); - byte flags = (byte)( - ((directive.getPath() != null) ? 0x1 : 0) | - ((directive.getReplication() != null) ? 0x2 : 0) | - ((directive.getPool() != null) ? 0x4 : 0) | - ((directive.getExpiration() != null) ? 0x8 : 0) - ); - out.writeByte(flags); - if (directive.getPath() != null) { - FSImageSerialization.writeString( - directive.getPath().toUri().getPath(), out); - } - if (directive.getReplication() != null) { - FSImageSerialization.writeShort(directive.getReplication(), out); - } - if (directive.getPool() != null) { - FSImageSerialization.writeString(directive.getPool(), out); - } - if (directive.getExpiration() != null) { - FSImageSerialization.writeLong(directive.getExpiration().getMillis(), - out); - } + FSImageSerialization.writeCacheDirectiveInfo(out, directive); writeRpcIds(rpcClientId, rpcCallId, out); } @Override protected void toXml(ContentHandler contentHandler) throws SAXException { - XMLUtils.addSaxString(contentHandler, "ID", - Long.toString(directive.getId())); - if (directive.getPath() != null) { - XMLUtils.addSaxString(contentHandler, "PATH", - directive.getPath().toUri().getPath()); - } - if (directive.getReplication() != null) { - XMLUtils.addSaxString(contentHandler, "REPLICATION", - Short.toString(directive.getReplication())); - } - if (directive.getPool() != null) { - XMLUtils.addSaxString(contentHandler, "POOL", directive.getPool()); - } - if (directive.getExpiration() != null) { - XMLUtils.addSaxString(contentHandler, "EXPIRATION", - "" + directive.getExpiration().getMillis()); - } + FSImageSerialization.writeCacheDirectiveInfo(contentHandler, directive); appendRpcIdsToXml(contentHandler, rpcClientId, rpcCallId); } @Override void fromXml(Stanza st) throws InvalidXmlException { - CacheDirectiveInfo.Builder builder = - new CacheDirectiveInfo.Builder(); - builder.setId(Long.parseLong(st.getValue("ID"))); - String path = st.getValueOrNull("PATH"); - if (path != null) { - builder.setPath(new Path(path)); - } - String replicationString = st.getValueOrNull("REPLICATION"); - if (replicationString != null) { - builder.setReplication(Short.parseShort(replicationString)); - } - String pool = st.getValueOrNull("POOL"); - if (pool != null) { - builder.setPool(pool); - } - String expiryTime = st.getValueOrNull("EXPIRATION"); - if (expiryTime != null) { - builder.setExpiration(CacheDirectiveInfo.Expiration.newAbsolute( - Long.parseLong(expiryTime))); - } - this.directive = builder.build(); + this.directive = FSImageSerialization.readCacheDirectiveInfo(st); readRpcIdsFromXml(st); } @@ -3184,30 +3072,35 @@ public abstract class FSEditLogOp { public AddCachePoolOp setPool(CachePoolInfo info) { this.info = info; + assert(info.getPoolName() != null); + assert(info.getOwnerName() != null); + assert(info.getGroupName() != null); + assert(info.getMode() != null); + assert(info.getLimit() != null); return this; } @Override void readFields(DataInputStream in, int logVersion) throws IOException { - info = CachePoolInfo.readFrom(in); + info = FSImageSerialization.readCachePoolInfo(in); readRpcIds(in, logVersion); } @Override public void writeFields(DataOutputStream out) throws IOException { - info.writeTo(out); + FSImageSerialization.writeCachePoolInfo(out, info); writeRpcIds(rpcClientId, rpcCallId, out); } @Override protected void toXml(ContentHandler contentHandler) throws SAXException { - info.writeXmlTo(contentHandler); + FSImageSerialization.writeCachePoolInfo(contentHandler, info); appendRpcIdsToXml(contentHandler, rpcClientId, rpcCallId); } @Override void fromXml(Stanza st) throws InvalidXmlException { - this.info = CachePoolInfo.readXmlFrom(st); + this.info = FSImageSerialization.readCachePoolInfo(st); readRpcIdsFromXml(st); } @@ -3219,7 +3112,7 @@ public abstract class FSEditLogOp { builder.append("ownerName=" + info.getOwnerName() + ","); builder.append("groupName=" + info.getGroupName() + ","); builder.append("mode=" + Short.toString(info.getMode().toShort()) + ","); - builder.append("weight=" + Integer.toString(info.getWeight())); + builder.append("limit=" + Long.toString(info.getLimit())); appendRpcIdsToString(builder, rpcClientId, rpcCallId); builder.append("]"); return builder.toString(); @@ -3245,25 +3138,25 @@ public abstract class FSEditLogOp { @Override void readFields(DataInputStream in, int logVersion) throws IOException { - info = CachePoolInfo.readFrom(in); + info = FSImageSerialization.readCachePoolInfo(in); readRpcIds(in, logVersion); } @Override public void writeFields(DataOutputStream out) throws IOException { - info.writeTo(out); + FSImageSerialization.writeCachePoolInfo(out, info); writeRpcIds(rpcClientId, rpcCallId, out); } @Override protected void toXml(ContentHandler contentHandler) throws SAXException { - cachePoolInfoToXml(contentHandler, info); + FSImageSerialization.writeCachePoolInfo(contentHandler, info); appendRpcIdsToXml(contentHandler, rpcClientId, rpcCallId); } @Override void fromXml(Stanza st) throws InvalidXmlException { - this.info = cachePoolInfoFromXml(st); + this.info = FSImageSerialization.readCachePoolInfo(st); readRpcIdsFromXml(st); } @@ -3284,8 +3177,8 @@ public abstract class FSEditLogOp { if (info.getMode() != null) { fields.add("mode=" + info.getMode().toString()); } - if (info.getWeight() != null) { - fields.add("weight=" + info.getWeight()); + if (info.getLimit() != null) { + fields.add("limit=" + info.getLimit()); } builder.append(Joiner.on(",").join(fields)); appendRpcIdsToString(builder, rpcClientId, rpcCallId); @@ -3757,41 +3650,4 @@ public abstract class FSEditLogOp { short mode = Short.valueOf(st.getValue("MODE")); return new FsPermission(mode); } - - public static void cachePoolInfoToXml(ContentHandler contentHandler, - CachePoolInfo info) throws SAXException { - XMLUtils.addSaxString(contentHandler, "POOLNAME", info.getPoolName()); - if (info.getOwnerName() != null) { - XMLUtils.addSaxString(contentHandler, "OWNERNAME", info.getOwnerName()); - } - if (info.getGroupName() != null) { - XMLUtils.addSaxString(contentHandler, "GROUPNAME", info.getGroupName()); - } - if (info.getMode() != null) { - fsPermissionToXml(contentHandler, info.getMode()); - } - if (info.getWeight() != null) { - XMLUtils.addSaxString(contentHandler, "WEIGHT", - Integer.toString(info.getWeight())); - } - } - - public static CachePoolInfo cachePoolInfoFromXml(Stanza st) - throws InvalidXmlException { - String poolName = st.getValue("POOLNAME"); - CachePoolInfo info = new CachePoolInfo(poolName); - if (st.hasChildren("OWNERNAME")) { - info.setOwnerName(st.getValue("OWNERNAME")); - } - if (st.hasChildren("GROUPNAME")) { - info.setGroupName(st.getValue("GROUPNAME")); - } - if (st.hasChildren("MODE")) { - info.setMode(FSEditLogOp.fsPermissionFromXml(st)); - } - if (st.hasChildren("WEIGHT")) { - info.setWeight(Integer.parseInt(st.getValue("WEIGHT"))); - } - return info; - } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java index 2166b780d84..9d3fbcb6f7e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java @@ -30,6 +30,8 @@ import org.apache.hadoop.fs.permission.PermissionStatus; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.DeprecatedUTF8; import org.apache.hadoop.hdfs.protocol.Block; +import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo; +import org.apache.hadoop.hdfs.protocol.CachePoolInfo; import org.apache.hadoop.hdfs.protocol.LayoutVersion; import org.apache.hadoop.hdfs.protocol.LayoutVersion.Feature; import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; @@ -38,11 +40,16 @@ import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.BlockUCState; import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeDirectorySnapshottable; import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat; import org.apache.hadoop.hdfs.server.namenode.snapshot.SnapshotFSImageFormat.ReferenceMap; +import org.apache.hadoop.hdfs.util.XMLUtils; +import org.apache.hadoop.hdfs.util.XMLUtils.InvalidXmlException; +import org.apache.hadoop.hdfs.util.XMLUtils.Stanza; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.ShortWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.WritableUtils; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; import com.google.common.base.Preconditions; @@ -476,4 +483,202 @@ public class FSImageSerialization { } return ret; } + + public static void writeCacheDirectiveInfo(DataOutputStream out, + CacheDirectiveInfo directive) throws IOException { + writeLong(directive.getId(), out); + int flags = + ((directive.getPath() != null) ? 0x1 : 0) | + ((directive.getReplication() != null) ? 0x2 : 0) | + ((directive.getPool() != null) ? 0x4 : 0) | + ((directive.getExpiration() != null) ? 0x8 : 0); + out.writeInt(flags); + if (directive.getPath() != null) { + writeString(directive.getPath().toUri().getPath(), out); + } + if (directive.getReplication() != null) { + writeShort(directive.getReplication(), out); + } + if (directive.getPool() != null) { + writeString(directive.getPool(), out); + } + if (directive.getExpiration() != null) { + writeLong(directive.getExpiration().getMillis(), out); + } + } + + public static CacheDirectiveInfo readCacheDirectiveInfo(DataInput in) + throws IOException { + CacheDirectiveInfo.Builder builder = + new CacheDirectiveInfo.Builder(); + builder.setId(readLong(in)); + int flags = in.readInt(); + if ((flags & 0x1) != 0) { + builder.setPath(new Path(readString(in))); + } + if ((flags & 0x2) != 0) { + builder.setReplication(readShort(in)); + } + if ((flags & 0x4) != 0) { + builder.setPool(readString(in)); + } + if ((flags & 0x8) != 0) { + builder.setExpiration( + CacheDirectiveInfo.Expiration.newAbsolute(readLong(in))); + } + if ((flags & ~0xF) != 0) { + throw new IOException("unknown flags set in " + + "ModifyCacheDirectiveInfoOp: " + flags); + } + return builder.build(); + } + + public static CacheDirectiveInfo readCacheDirectiveInfo(Stanza st) + throws InvalidXmlException { + CacheDirectiveInfo.Builder builder = + new CacheDirectiveInfo.Builder(); + builder.setId(Long.parseLong(st.getValue("ID"))); + String path = st.getValueOrNull("PATH"); + if (path != null) { + builder.setPath(new Path(path)); + } + String replicationString = st.getValueOrNull("REPLICATION"); + if (replicationString != null) { + builder.setReplication(Short.parseShort(replicationString)); + } + String pool = st.getValueOrNull("POOL"); + if (pool != null) { + builder.setPool(pool); + } + String expiryTime = st.getValueOrNull("EXPIRATION"); + if (expiryTime != null) { + builder.setExpiration(CacheDirectiveInfo.Expiration.newAbsolute( + Long.parseLong(expiryTime))); + } + return builder.build(); + } + + public static void writeCacheDirectiveInfo(ContentHandler contentHandler, + CacheDirectiveInfo directive) throws SAXException { + XMLUtils.addSaxString(contentHandler, "ID", + Long.toString(directive.getId())); + if (directive.getPath() != null) { + XMLUtils.addSaxString(contentHandler, "PATH", + directive.getPath().toUri().getPath()); + } + if (directive.getReplication() != null) { + XMLUtils.addSaxString(contentHandler, "REPLICATION", + Short.toString(directive.getReplication())); + } + if (directive.getPool() != null) { + XMLUtils.addSaxString(contentHandler, "POOL", directive.getPool()); + } + if (directive.getExpiration() != null) { + XMLUtils.addSaxString(contentHandler, "EXPIRATION", + "" + directive.getExpiration().getMillis()); + } + } + + public static void writeCachePoolInfo(DataOutputStream out, CachePoolInfo info) + throws IOException { + writeString(info.getPoolName(), out); + + final String ownerName = info.getOwnerName(); + final String groupName = info.getGroupName(); + final Long limit = info.getLimit(); + final FsPermission mode = info.getMode(); + + boolean hasOwner, hasGroup, hasMode, hasLimit; + hasOwner = ownerName != null; + hasGroup = groupName != null; + hasMode = mode != null; + hasLimit = limit != null; + + int flags = + (hasOwner ? 0x1 : 0) | + (hasGroup ? 0x2 : 0) | + (hasMode ? 0x4 : 0) | + (hasLimit ? 0x8 : 0); + writeInt(flags, out); + + if (hasOwner) { + writeString(ownerName, out); + } + if (hasGroup) { + writeString(groupName, out); + } + if (hasMode) { + mode.write(out); + } + if (hasLimit) { + writeLong(limit, out); + } + } + + public static CachePoolInfo readCachePoolInfo(DataInput in) + throws IOException { + String poolName = readString(in); + CachePoolInfo info = new CachePoolInfo(poolName); + int flags = readInt(in); + if ((flags & 0x1) != 0) { + info.setOwnerName(readString(in)); + } + if ((flags & 0x2) != 0) { + info.setGroupName(readString(in)); + } + if ((flags & 0x4) != 0) { + info.setMode(FsPermission.read(in)); + } + if ((flags & 0x8) != 0) { + info.setLimit(readLong(in)); + } + if ((flags & ~0xF) != 0) { + throw new IOException("Unknown flag in CachePoolInfo: " + flags); + } + return info; + } + + public static void writeCachePoolInfo(ContentHandler contentHandler, + CachePoolInfo info) throws SAXException { + XMLUtils.addSaxString(contentHandler, "POOLNAME", info.getPoolName()); + + final String ownerName = info.getOwnerName(); + final String groupName = info.getGroupName(); + final Long limit = info.getLimit(); + final FsPermission mode = info.getMode(); + + if (ownerName != null) { + XMLUtils.addSaxString(contentHandler, "OWNERNAME", ownerName); + } + if (groupName != null) { + XMLUtils.addSaxString(contentHandler, "GROUPNAME", groupName); + } + if (mode != null) { + FSEditLogOp.fsPermissionToXml(contentHandler, mode); + } + if (limit != null) { + XMLUtils.addSaxString(contentHandler, "LIMIT", + Long.toString(limit)); + } + } + + public static CachePoolInfo readCachePoolInfo(Stanza st) + throws InvalidXmlException { + String poolName = st.getValue("POOLNAME"); + CachePoolInfo info = new CachePoolInfo(poolName); + if (st.hasChildren("OWNERNAME")) { + info.setOwnerName(st.getValue("OWNERNAME")); + } + if (st.hasChildren("GROUPNAME")) { + info.setGroupName(st.getValue("GROUPNAME")); + } + if (st.hasChildren("MODE")) { + info.setMode(FSEditLogOp.fsPermissionFromXml(st)); + } + if (st.hasChildren("LIMIT")) { + info.setLimit(Long.parseLong(st.getValue("LIMIT"))); + } + return info; + } + } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index fd3d06b2da9..d293006d97d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -126,6 +126,7 @@ import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BatchedRemoteIterator.BatchedListEntries; +import org.apache.hadoop.fs.CacheFlag; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.DirectoryListingStartAfterNotFoundException; @@ -7052,8 +7053,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats, } } - long addCacheDirective( - CacheDirectiveInfo directive) throws IOException { + long addCacheDirective(CacheDirectiveInfo directive, EnumSet flags) + throws IOException { checkOperation(OperationCategory.WRITE); final FSPermissionChecker pc = isPermissionEnabled ? getPermissionChecker() : null; @@ -7076,7 +7077,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, "for this operation."); } CacheDirectiveInfo effectiveDirective = - cacheManager.addDirective(directive, pc); + cacheManager.addDirective(directive, pc, flags); getEditLog().logAddCacheDirectiveInfo(effectiveDirective, cacheEntry != null); result = effectiveDirective.getId(); @@ -7094,8 +7095,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats, return result; } - void modifyCacheDirective( - CacheDirectiveInfo directive) throws IOException { + void modifyCacheDirective(CacheDirectiveInfo directive, + EnumSet flags) throws IOException { checkOperation(OperationCategory.WRITE); final FSPermissionChecker pc = isPermissionEnabled ? getPermissionChecker() : null; @@ -7111,7 +7112,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, throw new SafeModeException( "Cannot add cache directive", safeMode); } - cacheManager.modifyDirective(directive, pc); + cacheManager.modifyDirective(directive, pc, flags); getEditLog().logModifyCacheDirectiveInfo(directive, cacheEntry != null); success = true; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java index 84360e5eb42..af7262605ac 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNodeRpcServer.java @@ -29,6 +29,7 @@ import java.io.IOException; import java.net.InetSocketAddress; import java.util.Arrays; import java.util.Collection; +import java.util.EnumSet; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -36,6 +37,7 @@ import java.util.Set; import org.apache.commons.logging.Log; import org.apache.hadoop.HadoopIllegalArgumentException; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CacheFlag; import org.apache.hadoop.fs.CommonConfigurationKeys; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.CreateFlag; @@ -1239,14 +1241,14 @@ class NameNodeRpcServer implements NamenodeProtocols { @Override public long addCacheDirective( - CacheDirectiveInfo path) throws IOException { - return namesystem.addCacheDirective(path); + CacheDirectiveInfo path, EnumSet flags) throws IOException { + return namesystem.addCacheDirective(path, flags); } @Override public void modifyCacheDirective( - CacheDirectiveInfo directive) throws IOException { - namesystem.modifyCacheDirective(directive); + CacheDirectiveInfo directive, EnumSet flags) throws IOException { + namesystem.modifyCacheDirective(directive, flags); } @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/CacheAdmin.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/CacheAdmin.java index c6dc09360b4..0e34db3c0bd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/CacheAdmin.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/tools/CacheAdmin.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hdfs.tools; import java.io.IOException; +import java.util.EnumSet; import java.util.LinkedList; import java.util.List; @@ -25,6 +26,7 @@ import org.apache.commons.lang.WordUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.CacheFlag; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; @@ -135,6 +137,7 @@ public class CacheAdmin extends Configured implements Tool { public String getShortUsage() { return "[" + getName() + " -path -pool " + + "[-force] " + "[-replication ] [-ttl ]]\n"; } @@ -146,6 +149,8 @@ public class CacheAdmin extends Configured implements Tool { listing.addRow("", "The pool to which the directive will be " + "added. You must have write permission on the cache pool " + "in order to add new directives."); + listing.addRow("-force", + "Skips checking of cache pool resource limits."); listing.addRow("", "The cache replication factor to use. " + "Defaults to 1."); listing.addRow("", "How long the directive is " + @@ -174,7 +179,7 @@ public class CacheAdmin extends Configured implements Tool { return 1; } builder.setPool(poolName); - + boolean force = StringUtils.popOption("-force", args); String replicationString = StringUtils.popOptionWithArgument("-replication", args); if (replicationString != null) { @@ -201,8 +206,12 @@ public class CacheAdmin extends Configured implements Tool { DistributedFileSystem dfs = getDFS(conf); CacheDirectiveInfo directive = builder.build(); + EnumSet flags = EnumSet.noneOf(CacheFlag.class); + if (force) { + flags.add(CacheFlag.FORCE); + } try { - long id = dfs.addCacheDirective(directive); + long id = dfs.addCacheDirective(directive, flags); System.out.println("Added cache directive " + id); } catch (IOException e) { System.err.println(prettifyException(e)); @@ -282,7 +291,7 @@ public class CacheAdmin extends Configured implements Tool { @Override public String getShortUsage() { return "[" + getName() + - " -id [-path ] [-replication ] " + + " -id [-path ] [-force] [-replication ] " + "[-pool ] [-ttl ]]\n"; } @@ -292,6 +301,8 @@ public class CacheAdmin extends Configured implements Tool { listing.addRow("", "The ID of the directive to modify (required)"); listing.addRow("", "A path to cache. The path can be " + "a directory or a file. (optional)"); + listing.addRow("-force", + "Skips checking of cache pool resource limits."); listing.addRow("", "The cache replication factor to use. " + "(optional)"); listing.addRow("", "The pool to which the directive will be " + @@ -322,6 +333,7 @@ public class CacheAdmin extends Configured implements Tool { builder.setPath(new Path(path)); modified = true; } + boolean force = StringUtils.popOption("-force", args); String replicationString = StringUtils.popOptionWithArgument("-replication", args); if (replicationString != null) { @@ -357,8 +369,12 @@ public class CacheAdmin extends Configured implements Tool { return 1; } DistributedFileSystem dfs = getDFS(conf); + EnumSet flags = EnumSet.noneOf(CacheFlag.class); + if (force) { + flags.add(CacheFlag.FORCE); + } try { - dfs.modifyCacheDirective(builder.build()); + dfs.modifyCacheDirective(builder.build(), flags); System.out.println("Modified cache directive " + idString); } catch (IOException e) { System.err.println(prettifyException(e)); @@ -536,7 +552,7 @@ public class CacheAdmin extends Configured implements Tool { @Override public String getShortUsage() { return "[" + NAME + " [-owner ] " + - "[-group ] [-mode ] [-weight ]]\n"; + "[-group ] [-mode ] [-limit ]]\n"; } @Override @@ -551,11 +567,10 @@ public class CacheAdmin extends Configured implements Tool { listing.addRow("", "UNIX-style permissions for the pool. " + "Permissions are specified in octal, e.g. 0755. " + "By default, this is set to " + String.format("0%03o", - FsPermission.getCachePoolDefault().toShort())); - listing.addRow("", "Weight of the pool. " + - "This is a relative measure of the importance of the pool used " + - "during cache resource management. By default, it is set to " + - CachePool.DEFAULT_WEIGHT); + FsPermission.getCachePoolDefault().toShort()) + "."); + listing.addRow("", "The maximum number of bytes that can be " + + "cached by directives in this pool, in aggregate. By default, " + + "no limit is set."); return getShortUsage() + "\n" + "Add a new cache pool.\n\n" + @@ -564,34 +579,32 @@ public class CacheAdmin extends Configured implements Tool { @Override public int run(Configuration conf, List args) throws IOException { - String owner = StringUtils.popOptionWithArgument("-owner", args); - if (owner == null) { - owner = UserGroupInformation.getCurrentUser().getShortUserName(); - } - String group = StringUtils.popOptionWithArgument("-group", args); - if (group == null) { - group = UserGroupInformation.getCurrentUser().getGroupNames()[0]; - } - String modeString = StringUtils.popOptionWithArgument("-mode", args); - int mode; - if (modeString == null) { - mode = FsPermission.getCachePoolDefault().toShort(); - } else { - mode = Integer.parseInt(modeString, 8); - } - String weightString = StringUtils.popOptionWithArgument("-weight", args); - int weight; - if (weightString == null) { - weight = CachePool.DEFAULT_WEIGHT; - } else { - weight = Integer.parseInt(weightString); - } String name = StringUtils.popFirstNonOption(args); if (name == null) { System.err.println("You must specify a name when creating a " + "cache pool."); return 1; } + CachePoolInfo info = new CachePoolInfo(name); + + String owner = StringUtils.popOptionWithArgument("-owner", args); + if (owner != null) { + info.setOwnerName(owner); + } + String group = StringUtils.popOptionWithArgument("-group", args); + if (group != null) { + info.setGroupName(group); + } + String modeString = StringUtils.popOptionWithArgument("-mode", args); + if (modeString != null) { + short mode = Short.parseShort(modeString, 8); + info.setMode(new FsPermission(mode)); + } + String limitString = StringUtils.popOptionWithArgument("-limit", args); + if (limitString != null) { + long limit = Long.parseLong(limitString); + info.setLimit(limit); + } if (!args.isEmpty()) { System.err.print("Can't understand arguments: " + Joiner.on(" ").join(args) + "\n"); @@ -599,11 +612,6 @@ public class CacheAdmin extends Configured implements Tool { return 1; } DistributedFileSystem dfs = getDFS(conf); - CachePoolInfo info = new CachePoolInfo(name). - setOwnerName(owner). - setGroupName(group). - setMode(new FsPermission((short)mode)). - setWeight(weight); try { dfs.addCachePool(info); } catch (IOException e) { @@ -624,7 +632,7 @@ public class CacheAdmin extends Configured implements Tool { @Override public String getShortUsage() { return "[" + getName() + " [-owner ] " + - "[-group ] [-mode ] [-weight ]]\n"; + "[-group ] [-mode ] [-limit ]]\n"; } @Override @@ -635,11 +643,12 @@ public class CacheAdmin extends Configured implements Tool { listing.addRow("", "Username of the owner of the pool"); listing.addRow("", "Groupname of the group of the pool."); listing.addRow("", "Unix-style permissions of the pool in octal."); - listing.addRow("", "Weight of the pool."); + listing.addRow("", "Maximum number of bytes that can be cached " + + "by this pool."); return getShortUsage() + "\n" + WordUtils.wrap("Modifies the metadata of an existing cache pool. " + - "See usage of " + AddCachePoolCommand.NAME + " for more details", + "See usage of " + AddCachePoolCommand.NAME + " for more details.", MAX_LINE_WIDTH) + "\n\n" + listing.toString(); } @@ -651,9 +660,9 @@ public class CacheAdmin extends Configured implements Tool { String modeString = StringUtils.popOptionWithArgument("-mode", args); Integer mode = (modeString == null) ? null : Integer.parseInt(modeString, 8); - String weightString = StringUtils.popOptionWithArgument("-weight", args); - Integer weight = (weightString == null) ? - null : Integer.parseInt(weightString); + String limitString = StringUtils.popOptionWithArgument("-limit", args); + Long limit = (limitString == null) ? + null : Long.parseLong(limitString); String name = StringUtils.popFirstNonOption(args); if (name == null) { System.err.println("You must specify a name when creating a " + @@ -680,8 +689,8 @@ public class CacheAdmin extends Configured implements Tool { info.setMode(new FsPermission(mode.shortValue())); changed = true; } - if (weight != null) { - info.setWeight(weight); + if (limit != null) { + info.setLimit(limit); changed = true; } if (!changed) { @@ -709,8 +718,8 @@ public class CacheAdmin extends Configured implements Tool { System.out.print(prefix + "mode " + new FsPermission(mode.shortValue())); prefix = " and "; } - if (weight != null) { - System.out.print(prefix + "weight " + weight); + if (limit != null) { + System.out.print(prefix + "limit " + limit); prefix = " and "; } System.out.print("\n"); @@ -804,11 +813,12 @@ public class CacheAdmin extends Configured implements Tool { addField("OWNER", Justification.LEFT). addField("GROUP", Justification.LEFT). addField("MODE", Justification.LEFT). - addField("WEIGHT", Justification.RIGHT); + addField("LIMIT", Justification.RIGHT); if (printStats) { builder. addField("BYTES_NEEDED", Justification.RIGHT). addField("BYTES_CACHED", Justification.RIGHT). + addField("BYTES_OVERLIMIT", Justification.RIGHT). addField("FILES_NEEDED", Justification.RIGHT). addField("FILES_CACHED", Justification.RIGHT); } @@ -825,12 +835,19 @@ public class CacheAdmin extends Configured implements Tool { row.add(info.getOwnerName()); row.add(info.getGroupName()); row.add(info.getMode() != null ? info.getMode().toString() : null); - row.add( - info.getWeight() != null ? info.getWeight().toString() : null); + Long limit = info.getLimit(); + String limitString; + if (limit != null && limit.equals(CachePool.DEFAULT_LIMIT)) { + limitString = "unlimited"; + } else { + limitString = "" + limit; + } + row.add(limitString); if (printStats) { CachePoolStats stats = entry.getStats(); row.add(Long.toString(stats.getBytesNeeded())); row.add(Long.toString(stats.getBytesCached())); + row.add(Long.toString(stats.getBytesOverlimit())); row.add(Long.toString(stats.getFilesNeeded())); row.add(Long.toString(stats.getFilesCached())); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientNamenodeProtocol.proto b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientNamenodeProtocol.proto index 11eddc326fd..ee1d10415b9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientNamenodeProtocol.proto +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/proto/ClientNamenodeProtocol.proto @@ -385,8 +385,13 @@ message CacheDirectiveStatsProto { required bool hasExpired = 5; } +enum CacheFlagProto { + FORCE = 0x01; // Ignore pool resource limits +} + message AddCacheDirectiveRequestProto { required CacheDirectiveInfoProto info = 1; + optional uint32 cacheFlags = 2; // bits set using CacheFlag } message AddCacheDirectiveResponseProto { @@ -395,6 +400,7 @@ message AddCacheDirectiveResponseProto { message ModifyCacheDirectiveRequestProto { required CacheDirectiveInfoProto info = 1; + optional uint32 cacheFlags = 2; // bits set using CacheFlag } message ModifyCacheDirectiveResponseProto { @@ -427,14 +433,15 @@ message CachePoolInfoProto { optional string ownerName = 2; optional string groupName = 3; optional int32 mode = 4; - optional int32 weight = 5; + optional int64 limit = 5; } message CachePoolStatsProto { required int64 bytesNeeded = 1; required int64 bytesCached = 2; - required int64 filesNeeded = 3; - required int64 filesCached = 4; + required int64 bytesOverlimit = 3; + required int64 filesNeeded = 4; + required int64 filesCached = 5; } message AddCachePoolRequestProto { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java index ad452f97330..74152e27795 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java @@ -1036,20 +1036,20 @@ public class DFSTestUtil { // OP_ADD_CACHE_POOL filesystem.addCachePool(new CachePoolInfo("pool1")); // OP_MODIFY_CACHE_POOL - filesystem.modifyCachePool(new CachePoolInfo("pool1").setWeight(99)); + filesystem.modifyCachePool(new CachePoolInfo("pool1").setLimit(99l)); // OP_ADD_PATH_BASED_CACHE_DIRECTIVE long id = filesystem.addCacheDirective( new CacheDirectiveInfo.Builder(). setPath(new Path("/path")). setReplication((short)1). setPool("pool1"). - build()); + build(), EnumSet.of(CacheFlag.FORCE)); // OP_MODIFY_PATH_BASED_CACHE_DIRECTIVE filesystem.modifyCacheDirective( new CacheDirectiveInfo.Builder(). setId(id). setReplication((short)2). - build()); + build(), EnumSet.of(CacheFlag.FORCE)); // OP_REMOVE_PATH_BASED_CACHE_DIRECTIVE filesystem.removeCacheDirective(id); // OP_REMOVE_CACHE_POOL diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/OfflineEditsViewerHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/OfflineEditsViewerHelper.java index 9a3572b5ba9..43a4af1fe81 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/OfflineEditsViewerHelper.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/OfflineEditsViewerHelper.java @@ -239,7 +239,7 @@ public class OfflineEditsViewerHelper { .setOwnerName("carlton") .setGroupName("party") .setMode(new FsPermission((short)0700)) - .setWeight(1989)); + .setLimit(1989l)); // OP_ADD_PATH_BASED_CACHE_DIRECTIVE 33 long id = dfs.addCacheDirective( new CacheDirectiveInfo.Builder(). diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCacheDirectives.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCacheDirectives.java index eb5f7a0d57f..6dbbb8363e0 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCacheDirectives.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCacheDirectives.java @@ -34,6 +34,7 @@ import java.io.IOException; import java.security.PrivilegedExceptionAction; import java.util.ArrayList; import java.util.Date; +import java.util.EnumSet; import java.util.Iterator; import java.util.LinkedList; import java.util.List; @@ -43,6 +44,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; +import org.apache.hadoop.fs.CacheFlag; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystemTestHelper; @@ -92,25 +94,48 @@ public class TestCacheDirectives { static private MiniDFSCluster cluster; static private DistributedFileSystem dfs; static private NamenodeProtocols proto; + static private NameNode namenode; static private CacheManipulator prevCacheManipulator; static { EditLogFileOutputStream.setShouldSkipFsyncForTesting(false); } - @Before - public void setup() throws Exception { - conf = new HdfsConfiguration(); + private static final long BLOCK_SIZE = 512; + private static final int NUM_DATANODES = 4; + // Most Linux installs will allow non-root users to lock 64KB. + // In this test though, we stub out mlock so this doesn't matter. + private static final long CACHE_CAPACITY = 64 * 1024 / NUM_DATANODES; + + private static HdfsConfiguration createCachingConf() { + HdfsConfiguration conf = new HdfsConfiguration(); + conf.setLong(DFS_BLOCK_SIZE_KEY, BLOCK_SIZE); + conf.setLong(DFS_DATANODE_MAX_LOCKED_MEMORY_KEY, CACHE_CAPACITY); + conf.setLong(DFS_HEARTBEAT_INTERVAL_KEY, 1); + conf.setBoolean(DFS_NAMENODE_CACHING_ENABLED_KEY, true); + conf.setLong(DFS_CACHEREPORT_INTERVAL_MSEC_KEY, 1000); + conf.setLong(DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS, 1000); // set low limits here for testing purposes conf.setInt(DFSConfigKeys.DFS_NAMENODE_LIST_CACHE_POOLS_NUM_RESPONSES, 2); - conf.setInt(DFSConfigKeys.DFS_NAMENODE_LIST_CACHE_DIRECTIVES_NUM_RESPONSES, 2); - cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).build(); + conf.setInt(DFSConfigKeys.DFS_NAMENODE_LIST_CACHE_DIRECTIVES_NUM_RESPONSES, + 2); + + return conf; + } + + @Before + public void setup() throws Exception { + conf = createCachingConf(); + cluster = + new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATANODES).build(); cluster.waitActive(); dfs = cluster.getFileSystem(); proto = cluster.getNameNodeRpc(); + namenode = cluster.getNameNode(); prevCacheManipulator = NativeIO.POSIX.getCacheManipulator(); NativeIO.POSIX.setCacheManipulator(new NoMlockCacheManipulator()); - LogManager.getLogger(CacheReplicationMonitor.class).setLevel(Level.TRACE); + LogManager.getLogger(CacheReplicationMonitor.class.getName()).setLevel( + Level.TRACE); } @After @@ -127,7 +152,7 @@ public class TestCacheDirectives { final String poolName = "pool1"; CachePoolInfo info = new CachePoolInfo(poolName). setOwnerName("bob").setGroupName("bobgroup"). - setMode(new FsPermission((short)0755)).setWeight(150); + setMode(new FsPermission((short)0755)).setLimit(150l); // Add a pool dfs.addCachePool(info); @@ -168,7 +193,7 @@ public class TestCacheDirectives { // Modify the pool info.setOwnerName("jane").setGroupName("janegroup") - .setMode(new FsPermission((short)0700)).setWeight(314); + .setMode(new FsPermission((short)0700)).setLimit(314l); dfs.modifyCachePool(info); // Do some invalid modify pools @@ -263,10 +288,10 @@ public class TestCacheDirectives { String ownerName = "abc"; String groupName = "123"; FsPermission mode = new FsPermission((short)0755); - int weight = 150; + long limit = 150; dfs.addCachePool(new CachePoolInfo(poolName). setOwnerName(ownerName).setGroupName(groupName). - setMode(mode).setWeight(weight)); + setMode(mode).setLimit(limit)); RemoteIterator iter = dfs.listCachePools(); CachePoolInfo info = iter.next().getInfo(); @@ -277,10 +302,10 @@ public class TestCacheDirectives { ownerName = "def"; groupName = "456"; mode = new FsPermission((short)0700); - weight = 151; + limit = 151; dfs.modifyCachePool(new CachePoolInfo(poolName). setOwnerName(ownerName).setGroupName(groupName). - setMode(mode).setWeight(weight)); + setMode(mode).setLimit(limit)); iter = dfs.listCachePools(); info = iter.next().getInfo(); @@ -288,7 +313,7 @@ public class TestCacheDirectives { assertEquals(ownerName, info.getOwnerName()); assertEquals(groupName, info.getGroupName()); assertEquals(mode, info.getMode()); - assertEquals(Integer.valueOf(weight), info.getWeight()); + assertEquals(limit, (long)info.getLimit()); dfs.removeCachePool(poolName); iter = dfs.listCachePools(); @@ -495,30 +520,22 @@ public class TestCacheDirectives { @Test(timeout=60000) public void testCacheManagerRestart() throws Exception { - cluster.shutdown(); - cluster = null; - HdfsConfiguration conf = createCachingConf(); - cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0).build(); - - cluster.waitActive(); - DistributedFileSystem dfs = cluster.getFileSystem(); - // Create and validate a pool final String pool = "poolparty"; String groupName = "partygroup"; FsPermission mode = new FsPermission((short)0777); - int weight = 747; + long limit = 747; dfs.addCachePool(new CachePoolInfo(pool) .setGroupName(groupName) .setMode(mode) - .setWeight(weight)); + .setLimit(limit)); RemoteIterator pit = dfs.listCachePools(); assertTrue("No cache pools found", pit.hasNext()); CachePoolInfo info = pit.next().getInfo(); assertEquals(pool, info.getPoolName()); assertEquals(groupName, info.getGroupName()); assertEquals(mode, info.getMode()); - assertEquals(weight, (int)info.getWeight()); + assertEquals(limit, (long)info.getLimit()); assertFalse("Unexpected # of cache pools found", pit.hasNext()); // Create some cache entries @@ -556,7 +573,7 @@ public class TestCacheDirectives { assertEquals(pool, info.getPoolName()); assertEquals(groupName, info.getGroupName()); assertEquals(mode, info.getMode()); - assertEquals(weight, (int)info.getWeight()); + assertEquals(limit, (long)info.getLimit()); assertFalse("Unexpected # of cache pools found", pit.hasNext()); dit = dfs.listCacheDirectives(null); @@ -762,91 +779,64 @@ public class TestCacheDirectives { numCachedReplicas); } - private static final long BLOCK_SIZE = 512; - private static final int NUM_DATANODES = 4; - - // Most Linux installs will allow non-root users to lock 64KB. - private static final long CACHE_CAPACITY = 64 * 1024 / NUM_DATANODES; - - private static HdfsConfiguration createCachingConf() { - HdfsConfiguration conf = new HdfsConfiguration(); - conf.setLong(DFS_BLOCK_SIZE_KEY, BLOCK_SIZE); - conf.setLong(DFS_DATANODE_MAX_LOCKED_MEMORY_KEY, CACHE_CAPACITY); - conf.setLong(DFS_HEARTBEAT_INTERVAL_KEY, 1); - conf.setBoolean(DFS_NAMENODE_CACHING_ENABLED_KEY, true); - conf.setLong(DFS_CACHEREPORT_INTERVAL_MSEC_KEY, 1000); - conf.setLong(DFS_NAMENODE_PATH_BASED_CACHE_REFRESH_INTERVAL_MS, 1000); - return conf; - } - @Test(timeout=120000) public void testWaitForCachedReplicas() throws Exception { - HdfsConfiguration conf = createCachingConf(); FileSystemTestHelper helper = new FileSystemTestHelper(); - MiniDFSCluster cluster = - new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATANODES).build(); + GenericTestUtils.waitFor(new Supplier() { + @Override + public Boolean get() { + return ((namenode.getNamesystem().getCacheCapacity() == + (NUM_DATANODES * CACHE_CAPACITY)) && + (namenode.getNamesystem().getCacheUsed() == 0)); + } + }, 500, 60000); - try { - cluster.waitActive(); - DistributedFileSystem dfs = cluster.getFileSystem(); - final NameNode namenode = cluster.getNameNode(); - GenericTestUtils.waitFor(new Supplier() { - @Override - public Boolean get() { - return ((namenode.getNamesystem().getCacheCapacity() == - (NUM_DATANODES * CACHE_CAPACITY)) && - (namenode.getNamesystem().getCacheUsed() == 0)); - } - }, 500, 60000); - - NamenodeProtocols nnRpc = namenode.getRpcServer(); - Path rootDir = helper.getDefaultWorkingDirectory(dfs); - // Create the pool - final String pool = "friendlyPool"; - nnRpc.addCachePool(new CachePoolInfo("friendlyPool")); - // Create some test files - final int numFiles = 2; - final int numBlocksPerFile = 2; - final List paths = new ArrayList(numFiles); - for (int i=0; i entries = - new CacheDirectiveIterator(nnRpc, null); - for (int i=0; i paths = new ArrayList(numFiles); + for (int i=0; i entries = + new CacheDirectiveIterator(nnRpc, null); + for (int i=0; i paths = new LinkedList(); - paths.add(new Path("/foo/bar")); - paths.add(new Path("/foo/baz")); - paths.add(new Path("/foo2/bar2")); - paths.add(new Path("/foo2/baz2")); - dfs.mkdir(new Path("/foo"), FsPermission.getDirDefault()); - dfs.mkdir(new Path("/foo2"), FsPermission.getDirDefault()); - final int numBlocksPerFile = 2; - for (Path path : paths) { - FileSystemTestHelper.createFile(dfs, path, numBlocksPerFile, - (int)BLOCK_SIZE, (short)3, false); - } - waitForCachedBlocks(namenode, 0, 0, - "testWaitForCachedReplicasInDirectory:0"); - - // cache entire directory - long id = dfs.addCacheDirective( - new CacheDirectiveInfo.Builder(). - setPath(new Path("/foo")). - setReplication((short)2). - setPool(pool). - build()); - waitForCachedBlocks(namenode, 4, 8, - "testWaitForCachedReplicasInDirectory:1:blocks"); - // Verify that listDirectives gives the stats we want. - waitForCacheDirectiveStats(dfs, - 4 * numBlocksPerFile * BLOCK_SIZE, 4 * numBlocksPerFile * BLOCK_SIZE, - 2, 2, - new CacheDirectiveInfo.Builder(). - setPath(new Path("/foo")). - build(), - "testWaitForCachedReplicasInDirectory:1:directive"); - waitForCachePoolStats(dfs, - 4 * numBlocksPerFile * BLOCK_SIZE, 4 * numBlocksPerFile * BLOCK_SIZE, - 2, 2, - poolInfo, "testWaitForCachedReplicasInDirectory:1:pool"); - - long id2 = dfs.addCacheDirective( - new CacheDirectiveInfo.Builder(). - setPath(new Path("/foo/bar")). - setReplication((short)4). - setPool(pool). - build()); - // wait for an additional 2 cached replicas to come up - waitForCachedBlocks(namenode, 4, 10, - "testWaitForCachedReplicasInDirectory:2:blocks"); - // the directory directive's stats are unchanged - waitForCacheDirectiveStats(dfs, - 4 * numBlocksPerFile * BLOCK_SIZE, 4 * numBlocksPerFile * BLOCK_SIZE, - 2, 2, - new CacheDirectiveInfo.Builder(). - setPath(new Path("/foo")). - build(), - "testWaitForCachedReplicasInDirectory:2:directive-1"); - // verify /foo/bar's stats - waitForCacheDirectiveStats(dfs, - 4 * numBlocksPerFile * BLOCK_SIZE, - // only 3 because the file only has 3 replicas, not 4 as requested. - 3 * numBlocksPerFile * BLOCK_SIZE, - 1, - // only 0 because the file can't be fully cached - 0, - new CacheDirectiveInfo.Builder(). - setPath(new Path("/foo/bar")). - build(), - "testWaitForCachedReplicasInDirectory:2:directive-2"); - waitForCachePoolStats(dfs, - (4+4) * numBlocksPerFile * BLOCK_SIZE, - (4+3) * numBlocksPerFile * BLOCK_SIZE, - 3, 2, - poolInfo, "testWaitForCachedReplicasInDirectory:2:pool"); - - // remove and watch numCached go to 0 - dfs.removeCacheDirective(id); - dfs.removeCacheDirective(id2); - waitForCachedBlocks(namenode, 0, 0, - "testWaitForCachedReplicasInDirectory:3:blocks"); - waitForCachePoolStats(dfs, - 0, 0, - 0, 0, - poolInfo, "testWaitForCachedReplicasInDirectory:3:pool"); - } finally { - cluster.shutdown(); + // Create the pool + final String pool = "friendlyPool"; + final CachePoolInfo poolInfo = new CachePoolInfo(pool); + dfs.addCachePool(poolInfo); + // Create some test files + final List paths = new LinkedList(); + paths.add(new Path("/foo/bar")); + paths.add(new Path("/foo/baz")); + paths.add(new Path("/foo2/bar2")); + paths.add(new Path("/foo2/baz2")); + dfs.mkdir(new Path("/foo"), FsPermission.getDirDefault()); + dfs.mkdir(new Path("/foo2"), FsPermission.getDirDefault()); + final int numBlocksPerFile = 2; + for (Path path : paths) { + FileSystemTestHelper.createFile(dfs, path, numBlocksPerFile, + (int)BLOCK_SIZE, (short)3, false); } + waitForCachedBlocks(namenode, 0, 0, + "testWaitForCachedReplicasInDirectory:0"); + + // cache entire directory + long id = dfs.addCacheDirective( + new CacheDirectiveInfo.Builder(). + setPath(new Path("/foo")). + setReplication((short)2). + setPool(pool). + build()); + waitForCachedBlocks(namenode, 4, 8, + "testWaitForCachedReplicasInDirectory:1:blocks"); + // Verify that listDirectives gives the stats we want. + waitForCacheDirectiveStats(dfs, + 4 * numBlocksPerFile * BLOCK_SIZE, 4 * numBlocksPerFile * BLOCK_SIZE, + 2, 2, + new CacheDirectiveInfo.Builder(). + setPath(new Path("/foo")). + build(), + "testWaitForCachedReplicasInDirectory:1:directive"); + waitForCachePoolStats(dfs, + 4 * numBlocksPerFile * BLOCK_SIZE, 4 * numBlocksPerFile * BLOCK_SIZE, + 2, 2, + poolInfo, "testWaitForCachedReplicasInDirectory:1:pool"); + + long id2 = dfs.addCacheDirective( + new CacheDirectiveInfo.Builder(). + setPath(new Path("/foo/bar")). + setReplication((short)4). + setPool(pool). + build()); + // wait for an additional 2 cached replicas to come up + waitForCachedBlocks(namenode, 4, 10, + "testWaitForCachedReplicasInDirectory:2:blocks"); + // the directory directive's stats are unchanged + waitForCacheDirectiveStats(dfs, + 4 * numBlocksPerFile * BLOCK_SIZE, 4 * numBlocksPerFile * BLOCK_SIZE, + 2, 2, + new CacheDirectiveInfo.Builder(). + setPath(new Path("/foo")). + build(), + "testWaitForCachedReplicasInDirectory:2:directive-1"); + // verify /foo/bar's stats + waitForCacheDirectiveStats(dfs, + 4 * numBlocksPerFile * BLOCK_SIZE, + // only 3 because the file only has 3 replicas, not 4 as requested. + 3 * numBlocksPerFile * BLOCK_SIZE, + 1, + // only 0 because the file can't be fully cached + 0, + new CacheDirectiveInfo.Builder(). + setPath(new Path("/foo/bar")). + build(), + "testWaitForCachedReplicasInDirectory:2:directive-2"); + waitForCachePoolStats(dfs, + (4+4) * numBlocksPerFile * BLOCK_SIZE, + (4+3) * numBlocksPerFile * BLOCK_SIZE, + 3, 2, + poolInfo, "testWaitForCachedReplicasInDirectory:2:pool"); + + // remove and watch numCached go to 0 + dfs.removeCacheDirective(id); + dfs.removeCacheDirective(id2); + waitForCachedBlocks(namenode, 0, 0, + "testWaitForCachedReplicasInDirectory:3:blocks"); + waitForCachePoolStats(dfs, + 0, 0, + 0, 0, + poolInfo, "testWaitForCachedReplicasInDirectory:3:pool"); } /** @@ -1000,68 +979,57 @@ public class TestCacheDirectives { */ @Test(timeout=120000) public void testReplicationFactor() throws Exception { - HdfsConfiguration conf = createCachingConf(); - MiniDFSCluster cluster = - new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATANODES).build(); - - try { - cluster.waitActive(); - DistributedFileSystem dfs = cluster.getFileSystem(); - NameNode namenode = cluster.getNameNode(); - // Create the pool - final String pool = "friendlyPool"; - dfs.addCachePool(new CachePoolInfo(pool)); - // Create some test files - final List paths = new LinkedList(); - paths.add(new Path("/foo/bar")); - paths.add(new Path("/foo/baz")); - paths.add(new Path("/foo2/bar2")); - paths.add(new Path("/foo2/baz2")); - dfs.mkdir(new Path("/foo"), FsPermission.getDirDefault()); - dfs.mkdir(new Path("/foo2"), FsPermission.getDirDefault()); - final int numBlocksPerFile = 2; - for (Path path : paths) { - FileSystemTestHelper.createFile(dfs, path, numBlocksPerFile, - (int)BLOCK_SIZE, (short)3, false); - } - waitForCachedBlocks(namenode, 0, 0, "testReplicationFactor:0"); - checkNumCachedReplicas(dfs, paths, 0, 0); - // cache directory - long id = dfs.addCacheDirective( - new CacheDirectiveInfo.Builder(). - setPath(new Path("/foo")). - setReplication((short)1). - setPool(pool). - build()); - waitForCachedBlocks(namenode, 4, 4, "testReplicationFactor:1"); - checkNumCachedReplicas(dfs, paths, 4, 4); - // step up the replication factor - for (int i=2; i<=3; i++) { - dfs.modifyCacheDirective( - new CacheDirectiveInfo.Builder(). - setId(id). - setReplication((short)i). - build()); - waitForCachedBlocks(namenode, 4, 4*i, "testReplicationFactor:2"); - checkNumCachedReplicas(dfs, paths, 4, 4*i); - } - // step it down - for (int i=2; i>=1; i--) { - dfs.modifyCacheDirective( - new CacheDirectiveInfo.Builder(). - setId(id). - setReplication((short)i). - build()); - waitForCachedBlocks(namenode, 4, 4*i, "testReplicationFactor:3"); - checkNumCachedReplicas(dfs, paths, 4, 4*i); - } - // remove and watch numCached go to 0 - dfs.removeCacheDirective(id); - waitForCachedBlocks(namenode, 0, 0, "testReplicationFactor:4"); - checkNumCachedReplicas(dfs, paths, 0, 0); - } finally { - cluster.shutdown(); + // Create the pool + final String pool = "friendlyPool"; + dfs.addCachePool(new CachePoolInfo(pool)); + // Create some test files + final List paths = new LinkedList(); + paths.add(new Path("/foo/bar")); + paths.add(new Path("/foo/baz")); + paths.add(new Path("/foo2/bar2")); + paths.add(new Path("/foo2/baz2")); + dfs.mkdir(new Path("/foo"), FsPermission.getDirDefault()); + dfs.mkdir(new Path("/foo2"), FsPermission.getDirDefault()); + final int numBlocksPerFile = 2; + for (Path path : paths) { + FileSystemTestHelper.createFile(dfs, path, numBlocksPerFile, + (int)BLOCK_SIZE, (short)3, false); } + waitForCachedBlocks(namenode, 0, 0, "testReplicationFactor:0"); + checkNumCachedReplicas(dfs, paths, 0, 0); + // cache directory + long id = dfs.addCacheDirective( + new CacheDirectiveInfo.Builder(). + setPath(new Path("/foo")). + setReplication((short)1). + setPool(pool). + build()); + waitForCachedBlocks(namenode, 4, 4, "testReplicationFactor:1"); + checkNumCachedReplicas(dfs, paths, 4, 4); + // step up the replication factor + for (int i=2; i<=3; i++) { + dfs.modifyCacheDirective( + new CacheDirectiveInfo.Builder(). + setId(id). + setReplication((short)i). + build()); + waitForCachedBlocks(namenode, 4, 4*i, "testReplicationFactor:2"); + checkNumCachedReplicas(dfs, paths, 4, 4*i); + } + // step it down + for (int i=2; i>=1; i--) { + dfs.modifyCacheDirective( + new CacheDirectiveInfo.Builder(). + setId(id). + setReplication((short)i). + build()); + waitForCachedBlocks(namenode, 4, 4*i, "testReplicationFactor:3"); + checkNumCachedReplicas(dfs, paths, 4, 4*i); + } + // remove and watch numCached go to 0 + dfs.removeCacheDirective(id); + waitForCachedBlocks(namenode, 0, 0, "testReplicationFactor:4"); + checkNumCachedReplicas(dfs, paths, 0, 0); } @Test(timeout=60000) @@ -1081,11 +1049,12 @@ public class TestCacheDirectives { assertNull("Unexpected owner name", info.getOwnerName()); assertNull("Unexpected group name", info.getGroupName()); assertNull("Unexpected mode", info.getMode()); - assertNull("Unexpected weight", info.getWeight()); + assertNull("Unexpected limit", info.getLimit()); // Modify the pool so myuser is now the owner + final long limit = 99; dfs.modifyCachePool(new CachePoolInfo(poolName) .setOwnerName(myUser.getShortUserName()) - .setWeight(99)); + .setLimit(limit)); // Should see full info it = myDfs.listCachePools(); info = it.next().getInfo(); @@ -1096,60 +1065,127 @@ public class TestCacheDirectives { assertNotNull("Expected group name", info.getGroupName()); assertEquals("Mismatched mode", (short) 0700, info.getMode().toShort()); - assertEquals("Mismatched weight", 99, (int)info.getWeight()); + assertEquals("Mismatched limit", limit, (long)info.getLimit()); } - @Test(timeout=60000) + @Test(timeout=120000) public void testExpiry() throws Exception { - HdfsConfiguration conf = createCachingConf(); - MiniDFSCluster cluster = - new MiniDFSCluster.Builder(conf).numDataNodes(NUM_DATANODES).build(); + String pool = "pool1"; + dfs.addCachePool(new CachePoolInfo(pool)); + Path p = new Path("/mypath"); + DFSTestUtil.createFile(dfs, p, BLOCK_SIZE*2, (short)2, 0x999); + // Expire after test timeout + Date start = new Date(); + Date expiry = DateUtils.addSeconds(start, 120); + final long id = dfs.addCacheDirective(new CacheDirectiveInfo.Builder() + .setPath(p) + .setPool(pool) + .setExpiration(CacheDirectiveInfo.Expiration.newAbsolute(expiry)) + .setReplication((short)2) + .build()); + waitForCachedBlocks(cluster.getNameNode(), 2, 4, "testExpiry:1"); + // Change it to expire sooner + dfs.modifyCacheDirective(new CacheDirectiveInfo.Builder().setId(id) + .setExpiration(Expiration.newRelative(0)).build()); + waitForCachedBlocks(cluster.getNameNode(), 0, 0, "testExpiry:2"); + RemoteIterator it = dfs.listCacheDirectives(null); + CacheDirectiveEntry ent = it.next(); + assertFalse(it.hasNext()); + Date entryExpiry = new Date(ent.getInfo().getExpiration().getMillis()); + assertTrue("Directive should have expired", + entryExpiry.before(new Date())); + // Change it back to expire later + dfs.modifyCacheDirective(new CacheDirectiveInfo.Builder().setId(id) + .setExpiration(Expiration.newRelative(120000)).build()); + waitForCachedBlocks(cluster.getNameNode(), 2, 4, "testExpiry:3"); + it = dfs.listCacheDirectives(null); + ent = it.next(); + assertFalse(it.hasNext()); + entryExpiry = new Date(ent.getInfo().getExpiration().getMillis()); + assertTrue("Directive should not have expired", + entryExpiry.after(new Date())); + // Verify that setting a negative TTL throws an error try { - DistributedFileSystem dfs = cluster.getFileSystem(); - String pool = "pool1"; - dfs.addCachePool(new CachePoolInfo(pool)); - Path p = new Path("/mypath"); - DFSTestUtil.createFile(dfs, p, BLOCK_SIZE*2, (short)2, 0x999); - // Expire after test timeout - Date start = new Date(); - Date expiry = DateUtils.addSeconds(start, 120); - final long id = dfs.addCacheDirective(new CacheDirectiveInfo.Builder() - .setPath(p) - .setPool(pool) - .setExpiration(CacheDirectiveInfo.Expiration.newAbsolute(expiry)) - .setReplication((short)2) - .build()); - waitForCachedBlocks(cluster.getNameNode(), 2, 4, "testExpiry:1"); - // Change it to expire sooner dfs.modifyCacheDirective(new CacheDirectiveInfo.Builder().setId(id) - .setExpiration(Expiration.newRelative(0)).build()); - waitForCachedBlocks(cluster.getNameNode(), 0, 0, "testExpiry:2"); - RemoteIterator it = dfs.listCacheDirectives(null); - CacheDirectiveEntry ent = it.next(); - assertFalse(it.hasNext()); - Date entryExpiry = new Date(ent.getInfo().getExpiration().getMillis()); - assertTrue("Directive should have expired", - entryExpiry.before(new Date())); - // Change it back to expire later - dfs.modifyCacheDirective(new CacheDirectiveInfo.Builder().setId(id) - .setExpiration(Expiration.newRelative(120000)).build()); - waitForCachedBlocks(cluster.getNameNode(), 2, 4, "testExpiry:3"); - it = dfs.listCacheDirectives(null); - ent = it.next(); - assertFalse(it.hasNext()); - entryExpiry = new Date(ent.getInfo().getExpiration().getMillis()); - assertTrue("Directive should not have expired", - entryExpiry.after(new Date())); - // Verify that setting a negative TTL throws an error - try { - dfs.modifyCacheDirective(new CacheDirectiveInfo.Builder().setId(id) - .setExpiration(Expiration.newRelative(-1)).build()); - } catch (InvalidRequestException e) { - GenericTestUtils - .assertExceptionContains("Cannot set a negative expiration", e); - } - } finally { - cluster.shutdown(); + .setExpiration(Expiration.newRelative(-1)).build()); + } catch (InvalidRequestException e) { + GenericTestUtils + .assertExceptionContains("Cannot set a negative expiration", e); } } + + @Test(timeout=120000) + public void testLimit() throws Exception { + try { + dfs.addCachePool(new CachePoolInfo("poolofnegativity").setLimit(-99l)); + fail("Should not be able to set a negative limit"); + } catch (InvalidRequestException e) { + GenericTestUtils.assertExceptionContains("negative", e); + } + final String destiny = "poolofdestiny"; + final Path path1 = new Path("/destiny"); + DFSTestUtil.createFile(dfs, path1, 2*BLOCK_SIZE, (short)1, 0x9494); + // Start off with a limit that is too small + final CachePoolInfo poolInfo = new CachePoolInfo(destiny) + .setLimit(2*BLOCK_SIZE-1); + dfs.addCachePool(poolInfo); + final CacheDirectiveInfo info1 = new CacheDirectiveInfo.Builder() + .setPool(destiny).setPath(path1).build(); + try { + dfs.addCacheDirective(info1); + fail("Should not be able to cache when there is no more limit"); + } catch (InvalidRequestException e) { + GenericTestUtils.assertExceptionContains("remaining capacity", e); + } + // Raise the limit up to fit and it should work this time + poolInfo.setLimit(2*BLOCK_SIZE); + dfs.modifyCachePool(poolInfo); + long id1 = dfs.addCacheDirective(info1); + waitForCachePoolStats(dfs, + 2*BLOCK_SIZE, 2*BLOCK_SIZE, + 1, 1, + poolInfo, "testLimit:1"); + // Adding another file, it shouldn't be cached + final Path path2 = new Path("/failure"); + DFSTestUtil.createFile(dfs, path2, BLOCK_SIZE, (short)1, 0x9495); + try { + dfs.addCacheDirective(new CacheDirectiveInfo.Builder() + .setPool(destiny).setPath(path2).build(), + EnumSet.noneOf(CacheFlag.class)); + fail("Should not be able to add another cached file"); + } catch (InvalidRequestException e) { + GenericTestUtils.assertExceptionContains("remaining capacity", e); + } + // Bring the limit down, the first file should get uncached + poolInfo.setLimit(BLOCK_SIZE); + dfs.modifyCachePool(poolInfo); + waitForCachePoolStats(dfs, + 2*BLOCK_SIZE, 0, + 1, 0, + poolInfo, "testLimit:2"); + RemoteIterator it = dfs.listCachePools(); + assertTrue("Expected a cache pool", it.hasNext()); + CachePoolStats stats = it.next().getStats(); + assertEquals("Overlimit bytes should be difference of needed and limit", + BLOCK_SIZE, stats.getBytesOverlimit()); + // Moving a directive to a pool without enough limit should fail + CachePoolInfo inadequate = + new CachePoolInfo("poolofinadequacy").setLimit(BLOCK_SIZE); + dfs.addCachePool(inadequate); + try { + dfs.modifyCacheDirective(new CacheDirectiveInfo.Builder(info1) + .setId(id1).setPool(inadequate.getPoolName()).build(), + EnumSet.noneOf(CacheFlag.class)); + } catch(InvalidRequestException e) { + GenericTestUtils.assertExceptionContains("remaining capacity", e); + } + // Succeeds when force=true + dfs.modifyCacheDirective(new CacheDirectiveInfo.Builder(info1).setId(id1) + .setPool(inadequate.getPoolName()).build(), + EnumSet.of(CacheFlag.FORCE)); + // Also can add with force=true + dfs.addCacheDirective( + new CacheDirectiveInfo.Builder().setPool(inadequate.getPoolName()) + .setPath(path1).build(), EnumSet.of(CacheFlag.FORCE)); + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java index a477b7107e3..2f36da11d47 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestRetryCacheWithHA.java @@ -38,6 +38,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CacheFlag; import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileStatus; @@ -760,7 +761,7 @@ public class TestRetryCacheWithHA { @Override void invoke() throws Exception { - result = client.addCacheDirective(directive); + result = client.addCacheDirective(directive, EnumSet.of(CacheFlag.FORCE)); } @Override @@ -802,7 +803,7 @@ public class TestRetryCacheWithHA { @Override void prepare() throws Exception { dfs.addCachePool(new CachePoolInfo(directive.getPool())); - id = client.addCacheDirective(directive); + id = client.addCacheDirective(directive, EnumSet.of(CacheFlag.FORCE)); } @Override @@ -811,7 +812,7 @@ public class TestRetryCacheWithHA { new CacheDirectiveInfo.Builder(). setId(id). setReplication(newReplication). - build()); + build(), EnumSet.of(CacheFlag.FORCE)); } @Override @@ -858,7 +859,7 @@ public class TestRetryCacheWithHA { @Override void prepare() throws Exception { dfs.addCachePool(new CachePoolInfo(directive.getPool())); - id = dfs.addCacheDirective(directive); + id = dfs.addCacheDirective(directive, EnumSet.of(CacheFlag.FORCE)); } @Override @@ -936,19 +937,19 @@ public class TestRetryCacheWithHA { @Override void prepare() throws Exception { - client.addCachePool(new CachePoolInfo(pool).setWeight(10)); + client.addCachePool(new CachePoolInfo(pool).setLimit(10l)); } @Override void invoke() throws Exception { - client.modifyCachePool(new CachePoolInfo(pool).setWeight(99)); + client.modifyCachePool(new CachePoolInfo(pool).setLimit(99l)); } @Override boolean checkNamenodeBeforeReturn() throws Exception { for (int i = 0; i < CHECKTIMES; i++) { RemoteIterator iter = dfs.listCachePools(); - if (iter.hasNext() && iter.next().getInfo().getWeight() == 99) { + if (iter.hasNext() && (long)iter.next().getInfo().getLimit() == 99) { return true; } Thread.sleep(1000); @@ -1216,7 +1217,7 @@ public class TestRetryCacheWithHA { CacheDirectiveInfo directiveInfo = new CacheDirectiveInfo.Builder().setPool(poolName).setPath(path).build(); dfs.addCachePool(new CachePoolInfo(poolName)); - dfs.addCacheDirective(directiveInfo); + dfs.addCacheDirective(directiveInfo, EnumSet.of(CacheFlag.FORCE)); poolNames.add(poolName); } listCacheDirectives(poolNames, 0); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/editsStored index 3c7fcbbf9809c28b49a88f037af110511ca54464..dc60e3424bd9bfe74f33d61b79a8fe23ad55aef6 100644 GIT binary patch literal 4599 zcmcIoYfMx}6h3$Fvalcz@rlnZ6ssbSMIM4yukwZpRI1fBVO`)VE3k`qccDmAUnwd- zh*temQyUc@p=nEPYa1{|Q;lg8rMCLRq_#2oLl8uskS4TDzWC-OdSeh;g{-K?$ghH$zs+qr*l=jwt-{_9*r;eAqu&G_ zKX%kxp0=Yo??_i1g@Tb>NCudYxWQ2?+bR{=?vZU?Ava~A6$Aib#l!5>6u-p)gNicQ zomGn580_}e%gRQ@<*gUmAv!y^EUVTbJ3Y4IlH!%iOIPJ&ug=P`#iu4GS*=Od44W0q z*5verB=l#6h|(RE-HwFpj9rU-d%o|?h(XMO{U5C^GOH2*0o20i4%Uv(kd>%&HON(zwt?%Y+5GvLC1fp-*TBK;2WDd zwz?X0O~v;z%USRNV`=imnUu;rlA?IiuIsbtaPY|C@>2`WF?7ht;mqH+$4;Z@aU+Mp z>~`Ag-PJA+Kak-ElWvF2p!m?y@fuY!cgZ4-?7k~cr&M91QStHPVekG+6b|9!LNfB> z&yxNMU3X-0d$~y;d2MU6i6wmmQAEF^?#k<9b?~sVb3NVZF#b859p&*2fmAn=`;6dw zZ+S&G%~zPfeFlP@a$`xOQ%1>qc8ck8C}o_;e;S0i#h<#@qIsxxO<(6vVH7cm6J&Kr zyyv6Ma`>I8G?p-#FEDaXrs0J^A?8({s3|=|Yp6aAU2lu6xfXxMP}hdECfJ>AV>Uqtn1b5eK7Y+m{o!2?8-3&{jw^c78fOwv-Y znYxE8ZiJdg`y$@Xb1TBuS6fsefEa8g0OR?B+W{pM@<}?>&o*5hX`I zNr80KIrq4)paHxK;Am|r7<;y_4dQxjvJt7lf{kDx|1@ens5@@K%4>v35Y z+>P2i{38LFx*fsaF`fCi6UFjD<@-Gcm`0HN6R^rU$Ltq=n2cY1%w>^J};i_ zoOo(t1g*{yyQoc1*FS*oB{VNx{{voL9BOyGL7oGMCl^wn&OPlgnD!ZU4_VxZD?Hj4 zk!~r6T8&(eHoL%^%M@oOo(t1WjU$ zT-2tg>wk#vB><|bJ^zT87agbFz3hmyh+EDPnL+38h=@|J2f1(ZWgp+!w{1Lj3bhPk zq#y_-71Z5SSL<-rJV#nB^-n4NQ_ci@TuG_bu{JT1NI@nrT%oJtEyZ40gHA-Pm^mGP zk4-G?HV9~ugcZY^_8Y2~7A>1Vfs6kKFg9Y#hIJDuaLHJJSBJ~)#!$fPV6Jz$YU}Na zrwMh1Gd#1xz*c(B;!ZbvxFGyzTIyk`o-44@uGD&5P6$x>h4Vx-@Di6R$9sQc340eW zqyFI?8xF?A3cG@G-0v%OiXVhrZ`aw~>bnD>rDvD~2*6DIuY2&=cj zsNN2ddi(Xl$NL#HZ!FLd4I14c3e1&!>dIqfFxWMj+(5yY-Zi7FduTrT$AzufUMCm3 us^m$n4K1vI7E%cq#btIBliE08^DhNq3SkrAh0|ah8m3pGJj- zY>BsR><)cBk*Y!fd65kelUP&du-mFsyW+OnnuYw-6e|b-LiVwo>s>yH6%3k|QBi6Y zwN-JLUCm8)wN`aDHwo=9JvVV@ew&_m2%!g;@3ibRX*2+VRDxe4J!m>J7AyEO zQe9NjjeOHu5Fa&!#G}F4Vy`B^o(Qg~rhSg>g@g>*4II5#uAaGf_jes4HNr@wEJ9>r zb)9Ojay!*FTSGm1r{u3N$qoBJ*poxgCQ~reNRX2CA9NuEt(iv-R~(pohG~b49M1T4 zbHW6w9XfItOs+d`dgu%jL{pp`e(^Q*F8i% z2lZfur6rg$M|0;0LmL)Xo}zUNG2A&|Y_zwQwl>;P5uY9xbSaD?j^WP+VNT{3u2skr zO_uj|UJ0ifV>zkThQvEVv`KqckEa=hSUv$~ZN7m}P@;KzP+ZUXcD80<95F-=^ddVO zs}#4*t*EtjH`^#ZWi$;);`Z`uWnx?LGN+`G0cgjAj7*qRQ2hQB+8IZrc(=E`L!v)m z0@n>~OT_xHL%a)s0{f4p-(Zy}lD6*9u;KLi8BZ8IUDsl7dd87%20xh#uSEmH?=TPo z0F1U(7(&FDtZ={KoCKt4k(U9)%_~++V`Nl{eXX5mwlXq_OQ!9tNMA>WGo{grKH7qi z-sNn$O~u1nW6O=3XA~WL*5G{Org`RN9M*?kaU_YOy8d3;PtO2SD`J5u0H$Fn0|cC1@>?KF-gGi3 z_UCmr&t}O>(0jo06M#yjpOnSP+qa4&q6{;zl+m5wnP*oILw=PEiUT1TZEXyBrXE>~ z0a6Xn$`4@^AYUCk?wXS$;cnDs;fD+mbgIt$4dbEl7w=WQcWoErnWN`nb)e(%R$h>g z@nog(F{`|}m{Dub$>|N`fL3(FbLu4{s+y1J6^D+vr|Tb}{Xx3^d;Hqi|Mj+uWF~-x zyvV^ihrQRt8|YNQ24aaDF@_KPVu~)cJ1y>zq>%y4!%{|fumce*@3557o31!m(SLF& zOIecM16FjwsOSbs(Pa*{{3u~W)U5a+0~m`hU-B^IWZa1&eSb%N^j?OXqDR(ZAS0u+ zqC?mO$X5cPo}6tG?nW&YKV$$?%iBkQ@yy4ai08rQVWsC7&jLLUs{2%YF*N345 zS1GE)?Q9eR394`wfdqfg1$6%`e?50za5xhVO;Ya>=`M+ZWX z8Hxab7>56)Q1H{-l~?2G)WuANi(dFyQ~oarbV~Lxo4B)=guincrIoZCB4iW9C9YG{ zYKz0Jxa=160f@dk)i#UU?($wPZ``6aha_$U#G8_r@D*OvbnkDoq>%ySVkrZdr#zCJ zY;(#ZgW|(K_uo9sHm7_)&fs#!ltOCCBX!X1 zj3qx{$tcemKhX~>ME}sRl_(AN5@)r&yyy5)ws{s&67XX*-hlCvzWtb!JYO&fQV4T_ UpVY~bA8w-GAy9m+2 1 - 1386695013416 - 360a10c6ecac725e + 1387701670577 + 7bb5467995769b59 @@ -24,8 +24,8 @@ 3 2 - 1386695013425 - 9b110c0b83225f7d + 1387701670580 + a5a3a2755e36827b @@ -37,17 +37,17 @@ 16386 /file_create_u\0001;F431 1 - 1386003814612 - 1386003814612 + 1387010471220 + 1387010471220 512 - DFSClient_NONMAPREDUCE_-1253204429_1 + DFSClient_NONMAPREDUCE_-52011019_1 127.0.0.1 - aagarwal + andrew supergroup 420 - f583267a-ef8c-4f3f-9014-b067b83945ad + 508263bb-692e-4439-8738-ff89b8b03923 7 @@ -59,13 +59,13 @@ 0 /file_create_u\0001;F431 1 - 1386003814665 - 1386003814612 + 1387010471276 + 1387010471220 512 - aagarwal + andrew supergroup 420 @@ -78,8 +78,8 @@ 0 /file_create_u\0001;F431 /file_moved - 1386003814671 - f583267a-ef8c-4f3f-9014-b067b83945ad + 1387010471286 + 508263bb-692e-4439-8738-ff89b8b03923 9 @@ -89,8 +89,8 @@ 7 0 /file_moved - 1386003814678 - f583267a-ef8c-4f3f-9014-b067b83945ad + 1387010471299 + 508263bb-692e-4439-8738-ff89b8b03923 10 @@ -101,9 +101,9 @@ 0 16387 /directory_mkdir - 1386003814686 + 1387010471312 - aagarwal + andrew supergroup 493 @@ -136,7 +136,7 @@ 12 /directory_mkdir snapshot1 - f583267a-ef8c-4f3f-9014-b067b83945ad + 508263bb-692e-4439-8738-ff89b8b03923 15 @@ -147,7 +147,7 @@ /directory_mkdir snapshot1 snapshot2 - f583267a-ef8c-4f3f-9014-b067b83945ad + 508263bb-692e-4439-8738-ff89b8b03923 16 @@ -157,7 +157,7 @@ 14 /directory_mkdir snapshot2 - f583267a-ef8c-4f3f-9014-b067b83945ad + 508263bb-692e-4439-8738-ff89b8b03923 17 @@ -169,17 +169,17 @@ 16388 /file_create_u\0001;F431 1 - 1386003814712 - 1386003814712 + 1387010471373 + 1387010471373 512 - DFSClient_NONMAPREDUCE_-1253204429_1 + DFSClient_NONMAPREDUCE_-52011019_1 127.0.0.1 - aagarwal + andrew supergroup 420 - f583267a-ef8c-4f3f-9014-b067b83945ad + 508263bb-692e-4439-8738-ff89b8b03923 18 @@ -191,13 +191,13 @@ 0 /file_create_u\0001;F431 1 - 1386003814714 - 1386003814712 + 1387010471380 + 1387010471373 512 - aagarwal + andrew supergroup 420 @@ -253,9 +253,9 @@ 0 /file_create_u\0001;F431 /file_moved - 1386003814732 + 1387010471428 NONE - f583267a-ef8c-4f3f-9014-b067b83945ad + 508263bb-692e-4439-8738-ff89b8b03923 25 @@ -267,17 +267,17 @@ 16389 /file_concat_target 1 - 1386003814737 - 1386003814737 + 1387010471438 + 1387010471438 512 - DFSClient_NONMAPREDUCE_-1253204429_1 + DFSClient_NONMAPREDUCE_-52011019_1 127.0.0.1 - aagarwal + andrew supergroup 420 - f583267a-ef8c-4f3f-9014-b067b83945ad + 508263bb-692e-4439-8738-ff89b8b03923 27 @@ -388,8 +388,8 @@ 0 /file_concat_target 1 - 1386003814889 - 1386003814737 + 1387010471540 + 1387010471438 512 @@ -409,7 +409,7 @@ 1003 - aagarwal + andrew supergroup 420 @@ -423,17 +423,17 @@ 16390 /file_concat_0 1 - 1386003814891 - 1386003814891 + 1387010471547 + 1387010471547 512 - DFSClient_NONMAPREDUCE_-1253204429_1 + DFSClient_NONMAPREDUCE_-52011019_1 127.0.0.1 - aagarwal + andrew supergroup 420 - f583267a-ef8c-4f3f-9014-b067b83945ad + 508263bb-692e-4439-8738-ff89b8b03923 40 @@ -544,8 +544,8 @@ 0 /file_concat_0 1 - 1386003814914 - 1386003814891 + 1387010471588 + 1387010471547 512 @@ -565,7 +565,7 @@ 1006 - aagarwal + andrew supergroup 420 @@ -579,17 +579,17 @@ 16391 /file_concat_1 1 - 1386003814916 - 1386003814916 + 1387010471595 + 1387010471595 512 - DFSClient_NONMAPREDUCE_-1253204429_1 + DFSClient_NONMAPREDUCE_-52011019_1 127.0.0.1 - aagarwal + andrew supergroup 420 - f583267a-ef8c-4f3f-9014-b067b83945ad + 508263bb-692e-4439-8738-ff89b8b03923 52 @@ -700,8 +700,8 @@ 0 /file_concat_1 1 - 1386003814938 - 1386003814916 + 1387010471651 + 1387010471595 512 @@ -721,7 +721,7 @@ 1009 - aagarwal + andrew supergroup 420 @@ -733,12 +733,12 @@ 56 0 /file_concat_target - 1386003814940 + 1387010471663 /file_concat_0 /file_concat_1 - f583267a-ef8c-4f3f-9014-b067b83945ad + 508263bb-692e-4439-8738-ff89b8b03923 63 @@ -750,14 +750,14 @@ 16392 /file_symlink /file_concat_target - 1386003814956 - 1386003814956 + 1387010471674 + 1387010471674 - aagarwal + andrew supergroup 511 - f583267a-ef8c-4f3f-9014-b067b83945ad + 508263bb-692e-4439-8738-ff89b8b03923 64 @@ -768,14 +768,14 @@ HDFS_DELEGATION_TOKEN 1 - aagarwal + andrew JobTracker - 1386003814961 - 1386608614961 + 1387010471682 + 1387615271682 2 - 1386090214961 + 1387096871682 @@ -785,14 +785,14 @@ HDFS_DELEGATION_TOKEN 1 - aagarwal + andrew JobTracker - 1386003814961 - 1386608614961 + 1387010471682 + 1387615271682 2 - 1386090215078 + 1387096871717 @@ -802,11 +802,11 @@ HDFS_DELEGATION_TOKEN 1 - aagarwal + andrew JobTracker - 1386003814961 - 1386608614961 + 1387010471682 + 1387615271682 2 @@ -816,13 +816,11 @@ 61 poolparty - - aagarwal - staff - 493 - - 100 - f583267a-ef8c-4f3f-9014-b067b83945ad + andrew + andrew + 493 + 9223372036854775807 + 508263bb-692e-4439-8738-ff89b8b03923 68 @@ -834,8 +832,8 @@ carlton party 448 - 1989 - f583267a-ef8c-4f3f-9014-b067b83945ad + 1989 + 508263bb-692e-4439-8738-ff89b8b03923 69 @@ -848,7 +846,7 @@ 1 poolparty -1 - f583267a-ef8c-4f3f-9014-b067b83945ad + 508263bb-692e-4439-8738-ff89b8b03923 70 @@ -858,7 +856,7 @@ 64 1 /bar2 - f583267a-ef8c-4f3f-9014-b067b83945ad + 508263bb-692e-4439-8738-ff89b8b03923 71 @@ -867,7 +865,7 @@ 65 1 - f583267a-ef8c-4f3f-9014-b067b83945ad + 508263bb-692e-4439-8738-ff89b8b03923 72 @@ -876,7 +874,7 @@ 66 poolparty - f583267a-ef8c-4f3f-9014-b067b83945ad + 508263bb-692e-4439-8738-ff89b8b03923 73 @@ -888,17 +886,17 @@ 16393 /hard-lease-recovery-test 1 - 1386003815135 - 1386003815135 + 1387010471802 + 1387010471802 512 - DFSClient_NONMAPREDUCE_-1253204429_1 + DFSClient_NONMAPREDUCE_-52011019_1 127.0.0.1 - aagarwal + andrew supergroup 420 - f583267a-ef8c-4f3f-9014-b067b83945ad + 508263bb-692e-4439-8738-ff89b8b03923 74 @@ -955,7 +953,7 @@ OP_REASSIGN_LEASE 73 - DFSClient_NONMAPREDUCE_-1253204429_1 + DFSClient_NONMAPREDUCE_-52011019_1 /hard-lease-recovery-test HDFS_NameNode @@ -968,8 +966,8 @@ 0 /hard-lease-recovery-test 1 - 1386003817462 - 1386003815135 + 1387010474126 + 1387010471802 512 @@ -979,7 +977,7 @@ 1011 - aagarwal + andrew supergroup 420 diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testCacheAdminConf.xml b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testCacheAdminConf.xml index c793bf964ae..77f8671748f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testCacheAdminConf.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/resources/testCacheAdminConf.xml @@ -80,8 +80,8 @@ Testing modifying a cache pool - -addPool poolparty -owner alice -group alicegroup -mode 0000 -weight 50 - -modifyPool poolparty -owner bob -group bobgroup -mode 0777 -weight 51 + -addPool poolparty -owner alice -group alicegroup -mode 0000 -limit 50 + -modifyPool poolparty -owner bob -group bobgroup -mode 0777 -limit 51 -listPools @@ -90,7 +90,7 @@ SubstringComparator - poolparty bob bobgroup rwxrwxrwx 51 + poolparty bob bobgroup rwxrwxrwx 51 @@ -129,11 +129,11 @@ SubstringComparator - bar alice alicegroup rwxr-xr-x 100 + bar alice alicegroup rwxr-xr-x unlimited SubstringComparator - foo bob bob rw-rw-r-- 100 + foo bob bob rw-rw-r-- unlimited @@ -156,7 +156,7 @@ SubstringComparator - foo bob bob rw-rw-r-- 100 + foo bob bob rw-rw-r-- unlimited @@ -417,11 +417,11 @@ SubstringComparator - bar alice alicegroup rwxr-xr-x 100 0 0 0 0 + bar alice alicegroup rwxr-xr-x unlimited 0 0 0 0 0 SubstringComparator - foo bob bob rw-rw-r-- 100 0 0 0 0 + foo bob bob rw-rw-r-- unlimited 0 0 0 0 0 From 5792d59da390842caec86ccaa8472d5be7933837 Mon Sep 17 00:00:00 2001 From: Brandon Li Date: Tue, 17 Dec 2013 20:40:15 +0000 Subject: [PATCH 10/32] HDFS-5657. race condition causes writeback state error in NFS gateway. Contributed by Brandon Li git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1551691 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop/hdfs/nfs/nfs3/OpenFileCtx.java | 23 +++++++++++++++---- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 2 ++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtx.java b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtx.java index 219660c6fa7..6d2a7441ef2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtx.java +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/src/main/java/org/apache/hadoop/hdfs/nfs/nfs3/OpenFileCtx.java @@ -85,6 +85,7 @@ class OpenFileCtx { private volatile boolean activeState; // The stream write-back status. True means one thread is doing write back. private volatile boolean asyncStatus; + private volatile long asyncWriteBackStartOffset; /** * The current offset of the file in HDFS. All the content before this offset @@ -209,6 +210,7 @@ class OpenFileCtx { updateLastAccessTime(); activeState = true; asyncStatus = false; + asyncWriteBackStartOffset = 0; dumpOut = null; raf = null; nonSequentialWriteInMemory = new AtomicLong(0); @@ -580,6 +582,7 @@ class OpenFileCtx { + nextOffset.get()); } asyncStatus = true; + asyncWriteBackStartOffset = writeCtx.getOffset(); asyncDataService.execute(new AsyncDataService.WriteBackTask(this)); } else { if (LOG.isDebugEnabled()) { @@ -903,9 +906,11 @@ class OpenFileCtx { /** Invoked by AsynDataService to write back to HDFS */ void executeWriteBack() { Preconditions.checkState(asyncStatus, - "The openFileCtx has false async status"); + "openFileCtx has false asyncStatus, fileId:" + latestAttr.getFileid()); + final long startOffset = asyncWriteBackStartOffset; try { while (activeState) { + // asyncStatus could be changed to false in offerNextToWrite() WriteCtx toWrite = offerNextToWrite(); if (toWrite != null) { // Do the write @@ -921,8 +926,18 @@ class OpenFileCtx { + latestAttr.getFileId()); } } finally { - // make sure we reset asyncStatus to false - asyncStatus = false; + // Make sure to reset asyncStatus to false unless a race happens + synchronized (this) { + if (startOffset == asyncWriteBackStartOffset) { + asyncStatus = false; + } else { + LOG.info("Another asyn task is already started before this one" + + " is finalized. fileId:" + latestAttr.getFileid() + + " asyncStatus:" + asyncStatus + " original startOffset:" + + startOffset + " new startOffset:" + asyncWriteBackStartOffset + + ". Won't change asyncStatus here."); + } + } } } @@ -1177,4 +1192,4 @@ class OpenFileCtx { return String.format("activeState: %b asyncStatus: %b nextOffset: %d", activeState, asyncStatus, nextOffset.get()); } -} \ No newline at end of file +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 264cc411fca..92e85d9cc14 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -976,6 +976,8 @@ Release 2.3.0 - UNRELEASED HDFS-4201. NPE in BPServiceActor#sendHeartBeat. (jxiang via cmccabe) HDFS-5666. Fix inconsistent synchronization in BPOfferService (jxiang via cmccabe) + + HDFS-5657. race condition causes writeback state error in NFS gateway (brandonli) Release 2.2.0 - 2013-10-13 From 124e507674c0d396f8494585e64226957199097b Mon Sep 17 00:00:00 2001 From: Colin McCabe Date: Tue, 17 Dec 2013 20:57:00 +0000 Subject: [PATCH 11/32] HDFS-5634. Allow BlockReaderLocal to switch between checksumming and not (cmccabe) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1551701 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../org/apache/hadoop/hdfs/BlockReader.java | 6 +- .../hadoop/hdfs/BlockReaderFactory.java | 24 +- .../apache/hadoop/hdfs/BlockReaderLocal.java | 1013 ++++++++++------- .../hadoop/hdfs/BlockReaderLocalLegacy.java | 6 +- .../apache/hadoop/hdfs/DFSInputStream.java | 35 +- .../apache/hadoop/hdfs/RemoteBlockReader.java | 6 +- .../hadoop/hdfs/RemoteBlockReader2.java | 6 +- .../server/datanode/BlockMetadataHeader.java | 27 +- .../src/main/resources/hdfs-default.xml | 5 +- .../org/apache/hadoop/hdfs/DFSTestUtil.java | 7 + .../hadoop/hdfs/TestBlockReaderLocal.java | 356 +++++- .../hdfs/TestShortCircuitLocalRead.java | 1 - 13 files changed, 989 insertions(+), 506 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 92e85d9cc14..0e68fb0828a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -256,6 +256,9 @@ Trunk (Unreleased) HDFS-5431. Support cachepool-based limit management in path-based caching (awang via cmccabe) + HDFS-5634. Allow BlockReaderLocal to switch between checksumming and not + (cmccabe) + OPTIMIZATIONS HDFS-5349. DNA_CACHE and DNA_UNCACHE should be by blockId only. (cmccabe) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReader.java index 2f0686a9beb..b957f00914f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReader.java @@ -18,8 +18,10 @@ package org.apache.hadoop.hdfs; import java.io.IOException; +import java.util.EnumSet; import org.apache.hadoop.fs.ByteBufferReadable; +import org.apache.hadoop.fs.ReadOption; import org.apache.hadoop.hdfs.client.ClientMmap; import org.apache.hadoop.hdfs.client.ClientMmapManager; import org.apache.hadoop.hdfs.protocol.LocatedBlock; @@ -89,10 +91,10 @@ public interface BlockReader extends ByteBufferReadable { /** * Get a ClientMmap object for this BlockReader. * - * @param curBlock The current block. + * @param opts The read options to use. * @return The ClientMmap object, or null if mmap is not * supported. */ - ClientMmap getClientMmap(LocatedBlock curBlock, + ClientMmap getClientMmap(EnumSet opts, ClientMmapManager mmapManager); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java index 9f11327f758..ae98e573c06 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderFactory.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hdfs.protocolPB.PBHelper; import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier; import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants; +import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader; import org.apache.hadoop.hdfs.server.datanode.CachingStrategy; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.ipc.RemoteException; @@ -98,7 +99,7 @@ public class BlockReaderFactory { // enabled, try to set up a BlockReaderLocal. BlockReader reader = newShortCircuitBlockReader(conf, file, block, blockToken, startOffset, len, peer, datanodeID, - domSockFactory, verifyChecksum, fisCache); + domSockFactory, verifyChecksum, fisCache, cachingStrategy); if (reader != null) { // One we've constructed the short-circuit block reader, we don't // need the socket any more. So let's return it to the cache. @@ -160,7 +161,8 @@ public class BlockReaderFactory { * @param verifyChecksum True if we should verify the checksums. * Note: even if this is true, when * DFS_CLIENT_READ_CHECKSUM_SKIP_CHECKSUM_KEY is - * set, we will skip checksums. + * set or the block is mlocked, we will skip + * checksums. * * @return The BlockReaderLocal, or null if the * DataNode declined to provide short-circuit @@ -172,7 +174,8 @@ public class BlockReaderFactory { Token blockToken, long startOffset, long len, Peer peer, DatanodeID datanodeID, DomainSocketFactory domSockFactory, boolean verifyChecksum, - FileInputStreamCache fisCache) throws IOException { + FileInputStreamCache fisCache, + CachingStrategy cachingStrategy) throws IOException { final DataOutputStream out = new DataOutputStream(new BufferedOutputStream( peer.getOutputStream())); @@ -189,9 +192,18 @@ public class BlockReaderFactory { FileInputStream fis[] = new FileInputStream[2]; sock.recvFileInputStreams(fis, buf, 0, buf.length); try { - reader = new BlockReaderLocal(conf, file, block, - startOffset, len, fis[0], fis[1], datanodeID, verifyChecksum, - fisCache); + reader = new BlockReaderLocal.Builder(conf). + setFilename(file). + setBlock(block). + setStartOffset(startOffset). + setStreams(fis). + setDatanodeID(datanodeID). + setVerifyChecksum(verifyChecksum). + setBlockMetadataHeader( + BlockMetadataHeader.preadHeader(fis[1].getChannel())). + setFileInputStreamCache(fisCache). + setCachingStrategy(cachingStrategy). + build(); } finally { if (reader == null) { IOUtils.cleanup(DFSClient.LOG, fis[0], fis[1]); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java index aeac1757976..f702e9b5c77 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocal.java @@ -17,25 +17,30 @@ */ package org.apache.hadoop.hdfs; -import java.io.BufferedInputStream; -import java.io.DataInputStream; import java.io.FileInputStream; import java.io.IOException; import java.nio.ByteBuffer; -import org.apache.hadoop.conf.Configuration; +import java.nio.channels.FileChannel; +import java.util.EnumSet; +import java.util.concurrent.atomic.AtomicBoolean; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.ReadOption; import org.apache.hadoop.hdfs.client.ClientMmap; +import org.apache.hadoop.hdfs.DFSClient.Conf; import org.apache.hadoop.hdfs.client.ClientMmapManager; import org.apache.hadoop.hdfs.protocol.DatanodeID; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; -import org.apache.hadoop.hdfs.protocol.LocatedBlock; import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader; +import org.apache.hadoop.hdfs.server.datanode.CachingStrategy; import org.apache.hadoop.hdfs.util.DirectBufferPool; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.util.DataChecksum; +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.Preconditions; + /** * BlockReaderLocal enables local short circuited reads. If the DFS client is on * the same machine as the datanode, then the client can read files directly @@ -55,480 +60,566 @@ import org.apache.hadoop.util.DataChecksum; class BlockReaderLocal implements BlockReader { static final Log LOG = LogFactory.getLog(BlockReaderLocal.class); - private final FileInputStream dataIn; // reader for the data file - private final FileInputStream checksumIn; // reader for the checksum file + private static DirectBufferPool bufferPool = new DirectBufferPool(); + + public static class Builder { + private int bufferSize; + private boolean verifyChecksum; + private int maxReadahead; + private String filename; + private FileInputStream streams[]; + private long dataPos; + private DatanodeID datanodeID; + private FileInputStreamCache fisCache; + private boolean mlocked; + private BlockMetadataHeader header; + private ExtendedBlock block; + + public Builder(Conf conf) { + this.maxReadahead = Integer.MAX_VALUE; + this.verifyChecksum = !conf.skipShortCircuitChecksums; + this.bufferSize = conf.shortCircuitBufferSize; + } + + public Builder setVerifyChecksum(boolean verifyChecksum) { + this.verifyChecksum = verifyChecksum; + return this; + } + + public Builder setCachingStrategy(CachingStrategy cachingStrategy) { + long readahead = cachingStrategy.getReadahead() != null ? + cachingStrategy.getReadahead() : + DFSConfigKeys.DFS_DATANODE_READAHEAD_BYTES_DEFAULT; + this.maxReadahead = (int)Math.min(Integer.MAX_VALUE, readahead); + return this; + } + + public Builder setFilename(String filename) { + this.filename = filename; + return this; + } + + public Builder setStreams(FileInputStream streams[]) { + this.streams = streams; + return this; + } + + public Builder setStartOffset(long startOffset) { + this.dataPos = Math.max(0, startOffset); + return this; + } + + public Builder setDatanodeID(DatanodeID datanodeID) { + this.datanodeID = datanodeID; + return this; + } + + public Builder setFileInputStreamCache(FileInputStreamCache fisCache) { + this.fisCache = fisCache; + return this; + } + + public Builder setMlocked(boolean mlocked) { + this.mlocked = mlocked; + return this; + } + + public Builder setBlockMetadataHeader(BlockMetadataHeader header) { + this.header = header; + return this; + } + + public Builder setBlock(ExtendedBlock block) { + this.block = block; + return this; + } + + public BlockReaderLocal build() { + Preconditions.checkNotNull(streams); + Preconditions.checkArgument(streams.length == 2); + Preconditions.checkNotNull(header); + return new BlockReaderLocal(this); + } + } + + private boolean closed = false; + + /** + * Pair of streams for this block. + */ + private final FileInputStream streams[]; + + /** + * The data FileChannel. + */ + private final FileChannel dataIn; + + /** + * The next place we'll read from in the block data FileChannel. + * + * If data is buffered in dataBuf, this offset will be larger than the + * offset of the next byte which a read() operation will give us. + */ + private long dataPos; + + /** + * The Checksum FileChannel. + */ + private final FileChannel checksumIn; + + /** + * Checksum type and size. + */ + private final DataChecksum checksum; + + /** + * If false, we will always skip the checksum. + */ private final boolean verifyChecksum; /** - * Offset from the most recent chunk boundary at which the next read should - * take place. Is only set to non-zero at construction time, and is - * decremented (usually to 0) by subsequent reads. This avoids having to do a - * checksum read at construction to position the read cursor correctly. + * If true, this block is mlocked on the DataNode. */ - private int offsetFromChunkBoundary; - - private byte[] skipBuf = null; + private final AtomicBoolean mlocked; /** - * Used for checksummed reads that need to be staged before copying to their - * output buffer because they are either a) smaller than the checksum chunk - * size or b) issued by the slower read(byte[]...) path + * Name of the block, for logging purposes. */ - private ByteBuffer slowReadBuff = null; - private ByteBuffer checksumBuff = null; - private DataChecksum checksum; - - private static DirectBufferPool bufferPool = new DirectBufferPool(); - - private final int bytesPerChecksum; - private final int checksumSize; - - /** offset in block where reader wants to actually read */ - private long startOffset; private final String filename; + /** + * DataNode which contained this block. + */ private final DatanodeID datanodeID; + + /** + * Block ID and Block Pool ID. + */ private final ExtendedBlock block; + /** + * Cache of Checksum#bytesPerChecksum. + */ + private int bytesPerChecksum; + + /** + * Cache of Checksum#checksumSize. + */ + private int checksumSize; + + /** + * FileInputStream cache to return the streams to upon closing, + * or null if we should just close them unconditionally. + */ private final FileInputStreamCache fisCache; + + /** + * Maximum number of chunks to allocate. + * + * This is used to allocate dataBuf and checksumBuf, in the event that + * we need them. + */ + private final int maxAllocatedChunks; + + /** + * True if zero readahead was requested. + */ + private final boolean zeroReadaheadRequested; + + /** + * Maximum amount of readahead we'll do. This will always be at least the, + * size of a single chunk, even if {@link zeroReadaheadRequested} is true. + * The reason is because we need to do a certain amount of buffering in order + * to do checksumming. + * + * This determines how many bytes we'll use out of dataBuf and checksumBuf. + * Why do we allocate buffers, and then (potentially) only use part of them? + * The rationale is that allocating a lot of buffers of different sizes would + * make it very difficult for the DirectBufferPool to re-use buffers. + */ + private int maxReadaheadLength; + private ClientMmap clientMmap; - private boolean mmapDisabled; - - private static int getSlowReadBufferNumChunks(int bufSize, - int bytesPerChecksum) { - if (bufSize < bytesPerChecksum) { - throw new IllegalArgumentException("Configured BlockReaderLocal buffer size (" + - bufSize + ") is not large enough to hold a single chunk (" + - bytesPerChecksum + "). Please configure " + - DFSConfigKeys.DFS_CLIENT_READ_SHORTCIRCUIT_BUFFER_SIZE_KEY + " appropriately"); - } - // Round down to nearest chunk size - return bufSize / bytesPerChecksum; - } + /** + * Buffers data starting at the current dataPos and extending on + * for dataBuf.limit(). + * + * This may be null if we don't need it. + */ + private ByteBuffer dataBuf; - public BlockReaderLocal(DFSClient.Conf conf, String filename, - ExtendedBlock block, long startOffset, long length, - FileInputStream dataIn, FileInputStream checksumIn, - DatanodeID datanodeID, boolean verifyChecksum, - FileInputStreamCache fisCache) throws IOException { - this.dataIn = dataIn; - this.checksumIn = checksumIn; - this.startOffset = Math.max(startOffset, 0); - this.filename = filename; - this.datanodeID = datanodeID; - this.block = block; - this.fisCache = fisCache; - this.clientMmap = null; - this.mmapDisabled = false; + /** + * Buffers checksums starting at the current checksumPos and extending on + * for checksumBuf.limit(). + * + * This may be null if we don't need it. + */ + private ByteBuffer checksumBuf; - // read and handle the common header here. For now just a version - checksumIn.getChannel().position(0); - BlockMetadataHeader header = BlockMetadataHeader - .readHeader(new DataInputStream( - new BufferedInputStream(checksumIn, - BlockMetadataHeader.getHeaderSize()))); - short version = header.getVersion(); - if (version != BlockMetadataHeader.VERSION) { - throw new IOException("Wrong version (" + version + ") of the " + - "metadata file for " + filename + "."); - } - this.verifyChecksum = verifyChecksum && !conf.skipShortCircuitChecksums; - long firstChunkOffset; - if (this.verifyChecksum) { - this.checksum = header.getChecksum(); - this.bytesPerChecksum = this.checksum.getBytesPerChecksum(); - this.checksumSize = this.checksum.getChecksumSize(); - firstChunkOffset = startOffset - - (startOffset % checksum.getBytesPerChecksum()); - this.offsetFromChunkBoundary = (int) (startOffset - firstChunkOffset); + private boolean mmapDisabled = false; - int chunksPerChecksumRead = getSlowReadBufferNumChunks( - conf.shortCircuitBufferSize, bytesPerChecksum); - slowReadBuff = bufferPool.getBuffer(bytesPerChecksum * chunksPerChecksumRead); - checksumBuff = bufferPool.getBuffer(checksumSize * chunksPerChecksumRead); - // Initially the buffers have nothing to read. - slowReadBuff.flip(); - checksumBuff.flip(); - long checkSumOffset = (firstChunkOffset / bytesPerChecksum) * checksumSize; - IOUtils.skipFully(checksumIn, checkSumOffset); + private BlockReaderLocal(Builder builder) { + this.streams = builder.streams; + this.dataIn = builder.streams[0].getChannel(); + this.dataPos = builder.dataPos; + this.checksumIn = builder.streams[1].getChannel(); + this.checksum = builder.header.getChecksum(); + this.verifyChecksum = builder.verifyChecksum && + (this.checksum.getChecksumType().id != DataChecksum.CHECKSUM_NULL); + this.mlocked = new AtomicBoolean(builder.mlocked); + this.filename = builder.filename; + this.datanodeID = builder.datanodeID; + this.fisCache = builder.fisCache; + this.block = builder.block; + this.bytesPerChecksum = checksum.getBytesPerChecksum(); + this.checksumSize = checksum.getChecksumSize(); + + this.maxAllocatedChunks = (bytesPerChecksum == 0) ? 0 : + ((builder.bufferSize + bytesPerChecksum - 1) / bytesPerChecksum); + // Calculate the effective maximum readahead. + // We can't do more readahead than there is space in the buffer. + int maxReadaheadChunks = (bytesPerChecksum == 0) ? 0 : + ((Math.min(builder.bufferSize, builder.maxReadahead) + + bytesPerChecksum - 1) / bytesPerChecksum); + if (maxReadaheadChunks == 0) { + this.zeroReadaheadRequested = true; + maxReadaheadChunks = 1; } else { - firstChunkOffset = startOffset; - this.checksum = null; - this.bytesPerChecksum = 0; - this.checksumSize = 0; - this.offsetFromChunkBoundary = 0; + this.zeroReadaheadRequested = false; } - - boolean success = false; - try { - // Reposition both input streams to the beginning of the chunk - // containing startOffset - this.dataIn.getChannel().position(firstChunkOffset); - success = true; - } finally { - if (success) { - if (LOG.isDebugEnabled()) { - LOG.debug("Created BlockReaderLocal for file " + filename - + " block " + block + " in datanode " + datanodeID); - } - } else { - if (slowReadBuff != null) bufferPool.returnBuffer(slowReadBuff); - if (checksumBuff != null) bufferPool.returnBuffer(checksumBuff); - } + this.maxReadaheadLength = maxReadaheadChunks * bytesPerChecksum; + } + + private synchronized void createDataBufIfNeeded() { + if (dataBuf == null) { + dataBuf = bufferPool.getBuffer(maxAllocatedChunks * bytesPerChecksum); + dataBuf.position(0); + dataBuf.limit(0); } } - /** - * Reads bytes into a buffer until EOF or the buffer's limit is reached - */ - private int fillBuffer(FileInputStream stream, ByteBuffer buf) + private synchronized void freeDataBufIfExists() { + if (dataBuf != null) { + // When disposing of a dataBuf, we have to move our stored file index + // backwards. + dataPos -= dataBuf.remaining(); + dataBuf.clear(); + bufferPool.returnBuffer(dataBuf); + dataBuf = null; + } + } + + private synchronized void createChecksumBufIfNeeded() { + if (checksumBuf == null) { + checksumBuf = bufferPool.getBuffer(maxAllocatedChunks * checksumSize); + checksumBuf.position(0); + checksumBuf.limit(0); + } + } + + private synchronized void freeChecksumBufIfExists() { + if (checksumBuf != null) { + checksumBuf.clear(); + bufferPool.returnBuffer(checksumBuf); + checksumBuf = null; + } + } + + private synchronized int drainDataBuf(ByteBuffer buf) throws IOException { - int bytesRead = stream.getChannel().read(buf); - if (bytesRead < 0) { - //EOF - return bytesRead; + if (dataBuf == null) return 0; + int oldLimit = dataBuf.limit(); + int nRead = Math.min(dataBuf.remaining(), buf.remaining()); + if (nRead == 0) return 0; + try { + dataBuf.limit(dataBuf.position() + nRead); + buf.put(dataBuf); + } finally { + dataBuf.limit(oldLimit); } - while (buf.remaining() > 0) { - int n = stream.getChannel().read(buf); - if (n < 0) { - //EOF - return bytesRead; + return nRead; + } + + /** + * Read from the block file into a buffer. + * + * This function overwrites checksumBuf. It will increment dataPos. + * + * @param buf The buffer to read into. May be dataBuf. + * The position and limit of this buffer should be set to + * multiples of the checksum size. + * @param canSkipChecksum True if we can skip checksumming. + * + * @return Total bytes read. 0 on EOF. + */ + private synchronized int fillBuffer(ByteBuffer buf, boolean canSkipChecksum) + throws IOException { + int total = 0; + long startDataPos = dataPos; + int startBufPos = buf.position(); + while (buf.hasRemaining()) { + int nRead = dataIn.read(buf, dataPos); + if (nRead < 0) { + break; } - bytesRead += n; + dataPos += nRead; + total += nRead; } - return bytesRead; + if (canSkipChecksum) { + freeChecksumBufIfExists(); + return total; + } + if (total > 0) { + try { + buf.limit(buf.position()); + buf.position(startBufPos); + createChecksumBufIfNeeded(); + int checksumsNeeded = (total + bytesPerChecksum - 1) / bytesPerChecksum; + checksumBuf.clear(); + checksumBuf.limit(checksumsNeeded * checksumSize); + long checksumPos = + 7 + ((startDataPos / bytesPerChecksum) * checksumSize); + while (checksumBuf.hasRemaining()) { + int nRead = checksumIn.read(checksumBuf, checksumPos); + if (nRead < 0) { + throw new IOException("Got unexpected checksum file EOF at " + + checksumPos + ", block file position " + startDataPos + " for " + + "block " + block + " of file " + filename); + } + checksumPos += nRead; + } + checksumBuf.flip(); + + checksum.verifyChunkedSums(buf, checksumBuf, filename, startDataPos); + } finally { + buf.position(buf.limit()); + } + } + return total; + } + + private boolean getCanSkipChecksum() { + return (!verifyChecksum) || mlocked.get(); } - /** - * Utility method used by read(ByteBuffer) to partially copy a ByteBuffer into - * another. - */ - private void writeSlice(ByteBuffer from, ByteBuffer to, int length) { - int oldLimit = from.limit(); - from.limit(from.position() + length); - try { - to.put(from); - } finally { - from.limit(oldLimit); - } - } - @Override public synchronized int read(ByteBuffer buf) throws IOException { - int nRead = 0; - if (verifyChecksum) { - // A 'direct' read actually has three phases. The first drains any - // remaining bytes from the slow read buffer. After this the read is - // guaranteed to be on a checksum chunk boundary. If there are still bytes - // to read, the fast direct path is used for as many remaining bytes as - // possible, up to a multiple of the checksum chunk size. Finally, any - // 'odd' bytes remaining at the end of the read cause another slow read to - // be issued, which involves an extra copy. - - // Every 'slow' read tries to fill the slow read buffer in one go for - // efficiency's sake. As described above, all non-checksum-chunk-aligned - // reads will be served from the slower read path. - - if (slowReadBuff.hasRemaining()) { - // There are remaining bytes from a small read available. This usually - // means this read is unaligned, which falls back to the slow path. - int fromSlowReadBuff = Math.min(buf.remaining(), slowReadBuff.remaining()); - writeSlice(slowReadBuff, buf, fromSlowReadBuff); - nRead += fromSlowReadBuff; + boolean canSkipChecksum = getCanSkipChecksum(); + + String traceString = null; + if (LOG.isTraceEnabled()) { + traceString = new StringBuilder(). + append("read("). + append("buf.remaining=").append(buf.remaining()). + append(", block=").append(block). + append(", filename=").append(filename). + append(", canSkipChecksum=").append(canSkipChecksum). + append(")").toString(); + LOG.info(traceString + ": starting"); + } + int nRead; + try { + if (canSkipChecksum && zeroReadaheadRequested) { + nRead = readWithoutBounceBuffer(buf); + } else { + nRead = readWithBounceBuffer(buf, canSkipChecksum); } + } catch (IOException e) { + if (LOG.isTraceEnabled()) { + LOG.info(traceString + ": I/O error", e); + } + throw e; + } + if (LOG.isTraceEnabled()) { + LOG.info(traceString + ": returning " + nRead); + } + return nRead; + } - if (buf.remaining() >= bytesPerChecksum && offsetFromChunkBoundary == 0) { - // Since we have drained the 'small read' buffer, we are guaranteed to - // be chunk-aligned - int len = buf.remaining() - (buf.remaining() % bytesPerChecksum); + private synchronized int readWithoutBounceBuffer(ByteBuffer buf) + throws IOException { + freeDataBufIfExists(); + freeChecksumBufIfExists(); + int total = 0; + while (buf.hasRemaining()) { + int nRead = dataIn.read(buf, dataPos); + if (nRead < 0) { + break; + } + dataPos += nRead; + total += nRead; + } + return (total == 0) ? -1 : total; + } - // There's only enough checksum buffer space available to checksum one - // entire slow read buffer. This saves keeping the number of checksum - // chunks around. - len = Math.min(len, slowReadBuff.capacity()); - int oldlimit = buf.limit(); - buf.limit(buf.position() + len); - int readResult = 0; + /** + * Fill the data buffer. If necessary, validate the data against the + * checksums. + * + * We always want the offsets of the data contained in dataBuf to be + * aligned to the chunk boundary. If we are validating checksums, we + * accomplish this by seeking backwards in the file until we're on a + * chunk boundary. (This is necessary because we can't checksum a + * partial chunk.) If we are not validating checksums, we simply only + * fill the latter part of dataBuf. + * + * @param canSkipChecksum true if we can skip checksumming. + * @return true if we hit EOF. + * @throws IOException + */ + private synchronized boolean fillDataBuf(boolean canSkipChecksum) + throws IOException { + createDataBufIfNeeded(); + final int slop = (int)(dataPos % bytesPerChecksum); + final long oldDataPos = dataPos; + dataBuf.limit(maxReadaheadLength); + if (canSkipChecksum) { + dataBuf.position(slop); + fillBuffer(dataBuf, canSkipChecksum); + } else { + dataPos -= slop; + dataBuf.position(0); + fillBuffer(dataBuf, canSkipChecksum); + } + dataBuf.limit(dataBuf.position()); + dataBuf.position(Math.min(dataBuf.position(), slop)); + if (LOG.isTraceEnabled()) { + LOG.trace("loaded " + dataBuf.remaining() + " bytes into bounce " + + "buffer from offset " + oldDataPos + " of " + block); + } + return dataBuf.limit() != maxReadaheadLength; + } + + /** + * Read using the bounce buffer. + * + * A 'direct' read actually has three phases. The first drains any + * remaining bytes from the slow read buffer. After this the read is + * guaranteed to be on a checksum chunk boundary. If there are still bytes + * to read, the fast direct path is used for as many remaining bytes as + * possible, up to a multiple of the checksum chunk size. Finally, any + * 'odd' bytes remaining at the end of the read cause another slow read to + * be issued, which involves an extra copy. + * + * Every 'slow' read tries to fill the slow read buffer in one go for + * efficiency's sake. As described above, all non-checksum-chunk-aligned + * reads will be served from the slower read path. + * + * @param buf The buffer to read into. + * @param canSkipChecksum True if we can skip checksums. + */ + private synchronized int readWithBounceBuffer(ByteBuffer buf, + boolean canSkipChecksum) throws IOException { + int total = 0; + boolean eof = false; + while (true) { + int bb = drainDataBuf(buf); // drain bounce buffer if possible + total += bb; + int needed = buf.remaining(); + if (eof || (needed == 0)) { + break; + } else if (buf.isDirect() && (needed >= maxReadaheadLength) + && ((dataPos % bytesPerChecksum) == 0)) { + // Fast lane: try to read directly into user-supplied buffer, bypassing + // bounce buffer. + int oldLimit = buf.limit(); + int nRead; try { - readResult = doByteBufferRead(buf); + buf.limit(buf.position() + maxReadaheadLength); + nRead = fillBuffer(buf, canSkipChecksum); } finally { - buf.limit(oldlimit); + buf.limit(oldLimit); } - if (readResult == -1) { - return nRead; - } else { - nRead += readResult; - buf.position(buf.position() + readResult); + if (nRead < maxReadaheadLength) { + eof = true; + } + total += nRead; + } else { + // Slow lane: refill bounce buffer. + if (fillDataBuf(canSkipChecksum)) { + eof = true; } } - - // offsetFromChunkBoundary > 0 => unaligned read, use slow path to read - // until chunk boundary - if ((buf.remaining() > 0 && buf.remaining() < bytesPerChecksum) || offsetFromChunkBoundary > 0) { - int toRead = Math.min(buf.remaining(), bytesPerChecksum - offsetFromChunkBoundary); - int readResult = fillSlowReadBuffer(toRead); - if (readResult == -1) { - return nRead; - } else { - int fromSlowReadBuff = Math.min(readResult, buf.remaining()); - writeSlice(slowReadBuff, buf, fromSlowReadBuff); - nRead += fromSlowReadBuff; - } - } - } else { - // Non-checksummed reads are much easier; we can just fill the buffer directly. - nRead = doByteBufferRead(buf); - if (nRead > 0) { - buf.position(buf.position() + nRead); - } } - return nRead; - } - - /** - * Tries to read as many bytes as possible into supplied buffer, checksumming - * each chunk if needed. - * - * Preconditions: - *

    - *
  • - * If checksumming is enabled, buf.remaining must be a multiple of - * bytesPerChecksum. Note that this is not a requirement for clients of - * read(ByteBuffer) - in the case of non-checksum-sized read requests, - * read(ByteBuffer) will substitute a suitably sized buffer to pass to this - * method. - *
  • - *
- * Postconditions: - *
    - *
  • buf.limit and buf.mark are unchanged.
  • - *
  • buf.position += min(offsetFromChunkBoundary, totalBytesRead) - so the - * requested bytes can be read straight from the buffer
  • - *
- * - * @param buf - * byte buffer to write bytes to. If checksums are not required, buf - * can have any number of bytes remaining, otherwise there must be a - * multiple of the checksum chunk size remaining. - * @return max(min(totalBytesRead, len) - offsetFromChunkBoundary, 0) - * that is, the the number of useful bytes (up to the amount - * requested) readable from the buffer by the client. - */ - private synchronized int doByteBufferRead(ByteBuffer buf) throws IOException { - if (verifyChecksum) { - assert buf.remaining() % bytesPerChecksum == 0; - } - int dataRead = -1; - - int oldpos = buf.position(); - // Read as much as we can into the buffer. - dataRead = fillBuffer(dataIn, buf); - - if (dataRead == -1) { - return -1; - } - - if (verifyChecksum) { - ByteBuffer toChecksum = buf.duplicate(); - toChecksum.position(oldpos); - toChecksum.limit(oldpos + dataRead); - - checksumBuff.clear(); - // Equivalent to (int)Math.ceil(toChecksum.remaining() * 1.0 / bytesPerChecksum ); - int numChunks = - (toChecksum.remaining() + bytesPerChecksum - 1) / bytesPerChecksum; - checksumBuff.limit(checksumSize * numChunks); - - fillBuffer(checksumIn, checksumBuff); - checksumBuff.flip(); - - checksum.verifyChunkedSums(toChecksum, checksumBuff, filename, - this.startOffset); - } - - if (dataRead >= 0) { - buf.position(oldpos + Math.min(offsetFromChunkBoundary, dataRead)); - } - - if (dataRead < offsetFromChunkBoundary) { - // yikes, didn't even get enough bytes to honour offset. This can happen - // even if we are verifying checksums if we are at EOF. - offsetFromChunkBoundary -= dataRead; - dataRead = 0; - } else { - dataRead -= offsetFromChunkBoundary; - offsetFromChunkBoundary = 0; - } - - return dataRead; - } - - /** - * Ensures that up to len bytes are available and checksummed in the slow read - * buffer. The number of bytes available to read is returned. If the buffer is - * not already empty, the number of remaining bytes is returned and no actual - * read happens. - * - * @param len - * the maximum number of bytes to make available. After len bytes - * are read, the underlying bytestream must be at a checksum - * boundary, or EOF. That is, (len + currentPosition) % - * bytesPerChecksum == 0. - * @return the number of bytes available to read, or -1 if EOF. - */ - private synchronized int fillSlowReadBuffer(int len) throws IOException { - int nRead = -1; - if (slowReadBuff.hasRemaining()) { - // Already got data, good to go. - nRead = Math.min(len, slowReadBuff.remaining()); - } else { - // Round a complete read of len bytes (plus any implicit offset) to the - // next chunk boundary, since we try and read in multiples of a chunk - int nextChunk = len + offsetFromChunkBoundary + - (bytesPerChecksum - ((len + offsetFromChunkBoundary) % bytesPerChecksum)); - int limit = Math.min(nextChunk, slowReadBuff.capacity()); - assert limit % bytesPerChecksum == 0; - - slowReadBuff.clear(); - slowReadBuff.limit(limit); - - nRead = doByteBufferRead(slowReadBuff); - - if (nRead > 0) { - // So that next time we call slowReadBuff.hasRemaining(), we don't get a - // false positive. - slowReadBuff.limit(nRead + slowReadBuff.position()); - } - } - return nRead; + return total == 0 ? -1 : total; } @Override - public synchronized int read(byte[] buf, int off, int len) throws IOException { + public synchronized int read(byte[] arr, int off, int len) + throws IOException { + boolean canSkipChecksum = getCanSkipChecksum(); + String traceString = null; if (LOG.isTraceEnabled()) { - LOG.trace("read off " + off + " len " + len); + traceString = new StringBuilder(). + append("read(arr.length=").append(arr.length). + append(", off=").append(off). + append(", len=").append(len). + append(", filename=").append(filename). + append(", block=").append(block). + append(", canSkipChecksum=").append(canSkipChecksum). + append(")").toString(); + LOG.trace(traceString + ": starting"); } - if (!verifyChecksum) { - return dataIn.read(buf, off, len); + int nRead; + try { + if (canSkipChecksum && zeroReadaheadRequested) { + nRead = readWithoutBounceBuffer(arr, off, len); + } else { + nRead = readWithBounceBuffer(arr, off, len, canSkipChecksum); + } + } catch (IOException e) { + if (LOG.isTraceEnabled()) { + LOG.trace(traceString + ": I/O error", e); + } + throw e; } - - int nRead = fillSlowReadBuffer(slowReadBuff.capacity()); - - if (nRead > 0) { - // Possible that buffer is filled with a larger read than we need, since - // we tried to read as much as possible at once - nRead = Math.min(len, nRead); - slowReadBuff.get(buf, off, nRead); + if (LOG.isTraceEnabled()) { + LOG.trace(traceString + ": returning " + nRead); } - return nRead; } + private synchronized int readWithoutBounceBuffer(byte arr[], int off, + int len) throws IOException { + freeDataBufIfExists(); + freeChecksumBufIfExists(); + int nRead = dataIn.read(ByteBuffer.wrap(arr, off, len), dataPos); + if (nRead > 0) { + dataPos += nRead; + } + return nRead == 0 ? -1 : nRead; + } + + private synchronized int readWithBounceBuffer(byte arr[], int off, int len, + boolean canSkipChecksum) throws IOException { + createDataBufIfNeeded(); + if (!dataBuf.hasRemaining()) { + dataBuf.position(0); + dataBuf.limit(maxReadaheadLength); + fillDataBuf(canSkipChecksum); + } + int toRead = Math.min(dataBuf.remaining(), len); + dataBuf.get(arr, off, toRead); + return toRead == 0 ? -1 : toRead; + } + @Override public synchronized long skip(long n) throws IOException { - if (LOG.isDebugEnabled()) { - LOG.debug("skip " + n); + int discardedFromBuf = 0; + long remaining = n; + if ((dataBuf != null) && dataBuf.hasRemaining()) { + discardedFromBuf = (int)Math.min(dataBuf.remaining(), n); + dataBuf.position(dataBuf.position() + discardedFromBuf); + remaining -= discardedFromBuf; } - if (n <= 0) { - return 0; + if (LOG.isTraceEnabled()) { + LOG.trace("skip(n=" + n + ", block=" + block + ", filename=" + + filename + "): discarded " + discardedFromBuf + " bytes from " + + "dataBuf and advanced dataPos by " + remaining); } - if (!verifyChecksum) { - return dataIn.skip(n); - } - - // caller made sure newPosition is not beyond EOF. - int remaining = slowReadBuff.remaining(); - int position = slowReadBuff.position(); - int newPosition = position + (int)n; - - // if the new offset is already read into dataBuff, just reposition - if (n <= remaining) { - assert offsetFromChunkBoundary == 0; - slowReadBuff.position(newPosition); - return n; - } - - // for small gap, read through to keep the data/checksum in sync - if (n - remaining <= bytesPerChecksum) { - slowReadBuff.position(position + remaining); - if (skipBuf == null) { - skipBuf = new byte[bytesPerChecksum]; - } - int ret = read(skipBuf, 0, (int)(n - remaining)); - return ret; - } - - // optimize for big gap: discard the current buffer, skip to - // the beginning of the appropriate checksum chunk and then - // read to the middle of that chunk to be in sync with checksums. - - // We can't use this.offsetFromChunkBoundary because we need to know how - // many bytes of the offset were really read. Calling read(..) with a - // positive this.offsetFromChunkBoundary causes that many bytes to get - // silently skipped. - int myOffsetFromChunkBoundary = newPosition % bytesPerChecksum; - long toskip = n - remaining - myOffsetFromChunkBoundary; - - slowReadBuff.position(slowReadBuff.limit()); - checksumBuff.position(checksumBuff.limit()); - - IOUtils.skipFully(dataIn, toskip); - long checkSumOffset = (toskip / bytesPerChecksum) * checksumSize; - IOUtils.skipFully(checksumIn, checkSumOffset); - - // read into the middle of the chunk - if (skipBuf == null) { - skipBuf = new byte[bytesPerChecksum]; - } - assert skipBuf.length == bytesPerChecksum; - assert myOffsetFromChunkBoundary < bytesPerChecksum; - - int ret = read(skipBuf, 0, myOffsetFromChunkBoundary); - - if (ret == -1) { // EOS - return toskip; - } else { - return (toskip + ret); - } - } - - @Override - public synchronized void close() throws IOException { - if (clientMmap != null) { - clientMmap.unref(); - clientMmap = null; - } - if (fisCache != null) { - if (LOG.isDebugEnabled()) { - LOG.debug("putting FileInputStream for " + filename + - " back into FileInputStreamCache"); - } - fisCache.put(datanodeID, block, new FileInputStream[] {dataIn, checksumIn}); - } else { - LOG.debug("closing FileInputStream for " + filename); - IOUtils.cleanup(LOG, dataIn, checksumIn); - } - if (slowReadBuff != null) { - bufferPool.returnBuffer(slowReadBuff); - slowReadBuff = null; - } - if (checksumBuff != null) { - bufferPool.returnBuffer(checksumBuff); - checksumBuff = null; - } - startOffset = -1; - checksum = null; - } - - @Override - public int readAll(byte[] buf, int offset, int len) throws IOException { - return BlockReaderUtil.readAll(this, buf, offset, len); - } - - @Override - public void readFully(byte[] buf, int off, int len) throws IOException { - BlockReaderUtil.readFully(this, buf, off, len); + dataPos += remaining; + return n; } @Override @@ -537,25 +628,71 @@ class BlockReaderLocal implements BlockReader { return Integer.MAX_VALUE; } + @Override + public synchronized void close() throws IOException { + if (closed) return; + closed = true; + if (LOG.isTraceEnabled()) { + LOG.trace("close(filename=" + filename + ", block=" + block + ")"); + } + if (clientMmap != null) { + clientMmap.unref(); + clientMmap = null; + } + if (fisCache != null) { + if (LOG.isDebugEnabled()) { + LOG.debug("putting FileInputStream for " + filename + + " back into FileInputStreamCache"); + } + fisCache.put(datanodeID, block, streams); + } else { + LOG.debug("closing FileInputStream for " + filename); + IOUtils.cleanup(LOG, dataIn, checksumIn); + } + freeDataBufIfExists(); + freeChecksumBufIfExists(); + } + + @Override + public synchronized void readFully(byte[] arr, int off, int len) + throws IOException { + BlockReaderUtil.readFully(this, arr, off, len); + } + + @Override + public synchronized int readAll(byte[] buf, int off, int len) + throws IOException { + return BlockReaderUtil.readAll(this, buf, off, len); + } + @Override public boolean isLocal() { return true; } - + @Override public boolean isShortCircuit() { return true; } @Override - public ClientMmap getClientMmap(LocatedBlock curBlock, - ClientMmapManager mmapManager) { + public synchronized ClientMmap getClientMmap(EnumSet opts, + ClientMmapManager mmapManager) { + if ((!opts.contains(ReadOption.SKIP_CHECKSUMS)) && + verifyChecksum && (!mlocked.get())) { + if (LOG.isTraceEnabled()) { + LOG.trace("can't get an mmap for " + block + " of " + filename + + " since SKIP_CHECKSUMS was not given, " + + "we aren't skipping checksums, and the block is not mlocked."); + } + return null; + } if (clientMmap == null) { if (mmapDisabled) { return null; } try { - clientMmap = mmapManager.fetch(datanodeID, block, dataIn); + clientMmap = mmapManager.fetch(datanodeID, block, streams[0]); if (clientMmap == null) { mmapDisabled = true; return null; @@ -572,4 +709,24 @@ class BlockReaderLocal implements BlockReader { } return clientMmap; } + + /** + * Set the mlocked state of the BlockReader. + * This method does NOT need to be synchronized because mlocked is atomic. + * + * @param mlocked the new mlocked state of the BlockReader. + */ + public void setMlocked(boolean mlocked) { + this.mlocked.set(mlocked); + } + + @VisibleForTesting + boolean getVerifyChecksum() { + return this.verifyChecksum; + } + + @VisibleForTesting + int getMaxReadaheadLength() { + return this.maxReadaheadLength; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocalLegacy.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocalLegacy.java index 85ee41b6305..2f661933619 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocalLegacy.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockReaderLocalLegacy.java @@ -24,10 +24,12 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.security.PrivilegedExceptionAction; import java.util.Collections; +import java.util.EnumSet; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.Map; +import org.apache.hadoop.fs.ReadOption; import org.apache.hadoop.hdfs.client.ClientMmap; import org.apache.hadoop.hdfs.client.ClientMmapManager; import org.apache.commons.logging.Log; @@ -706,8 +708,8 @@ class BlockReaderLocalLegacy implements BlockReader { } @Override - public ClientMmap getClientMmap(LocatedBlock curBlock, - ClientMmapManager mmapManager) { + public ClientMmap getClientMmap(EnumSet opts, + ClientMmapManager mmapManager) { return null; } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java index bb9a066afe0..0ab51c7e716 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java @@ -57,6 +57,7 @@ import org.apache.hadoop.hdfs.protocol.LocatedBlocks; import org.apache.hadoop.hdfs.protocol.datatransfer.InvalidEncryptionKeyException; import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier; import org.apache.hadoop.hdfs.security.token.block.InvalidBlockTokenException; +import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader; import org.apache.hadoop.hdfs.server.datanode.CachingStrategy; import org.apache.hadoop.hdfs.server.datanode.ReplicaNotFoundException; import org.apache.hadoop.io.ByteBufferPool; @@ -1073,9 +1074,18 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead, DFSClient.LOG.debug("got FileInputStreams for " + block + " from " + "the FileInputStreamCache."); } - return new BlockReaderLocal(dfsClient.getConf(), file, - block, startOffset, len, fis[0], fis[1], chosenNode, verifyChecksum, - fileInputStreamCache); + return new BlockReaderLocal.Builder(dfsClient.getConf()). + setFilename(file). + setBlock(block). + setStartOffset(startOffset). + setStreams(fis). + setDatanodeID(chosenNode). + setVerifyChecksum(verifyChecksum). + setBlockMetadataHeader(BlockMetadataHeader. + preadHeader(fis[1].getChannel())). + setFileInputStreamCache(fileInputStreamCache). + setCachingStrategy(cachingStrategy). + build(); } // If the legacy local block reader is enabled and we are reading a local @@ -1479,23 +1489,19 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead, "at position " + pos); } } - boolean canSkipChecksums = opts.contains(ReadOption.SKIP_CHECKSUMS); - if (canSkipChecksums) { - ByteBuffer buffer = tryReadZeroCopy(maxLength); - if (buffer != null) { - return buffer; - } + ByteBuffer buffer = tryReadZeroCopy(maxLength, opts); + if (buffer != null) { + return buffer; } - ByteBuffer buffer = ByteBufferUtil. - fallbackRead(this, bufferPool, maxLength); + buffer = ByteBufferUtil.fallbackRead(this, bufferPool, maxLength); if (buffer != null) { extendedReadBuffers.put(buffer, bufferPool); } return buffer; } - private synchronized ByteBuffer tryReadZeroCopy(int maxLength) - throws IOException { + private synchronized ByteBuffer tryReadZeroCopy(int maxLength, + EnumSet opts) throws IOException { // Java ByteBuffers can't be longer than 2 GB, because they use // 4-byte signed integers to represent capacity, etc. // So we can't mmap the parts of the block higher than the 2 GB offset. @@ -1518,8 +1524,7 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead, long blockPos = curPos - blockStartInFile; long limit = blockPos + length; ClientMmap clientMmap = - blockReader.getClientMmap(currentLocatedBlock, - dfsClient.getMmapManager()); + blockReader.getClientMmap(opts, dfsClient.getMmapManager()); if (clientMmap == null) { if (DFSClient.LOG.isDebugEnabled()) { DFSClient.LOG.debug("unable to perform a zero-copy read from offset " + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader.java index f587c3b5d58..94a00ccc580 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader.java @@ -23,10 +23,12 @@ import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.IOException; import java.nio.ByteBuffer; +import java.util.EnumSet; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.fs.FSInputChecker; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.ReadOption; import org.apache.hadoop.hdfs.client.ClientMmap; import org.apache.hadoop.hdfs.client.ClientMmapManager; import org.apache.hadoop.hdfs.net.Peer; @@ -490,8 +492,8 @@ public class RemoteBlockReader extends FSInputChecker implements BlockReader { } @Override - public ClientMmap getClientMmap(LocatedBlock curBlock, - ClientMmapManager mmapManager) { + public ClientMmap getClientMmap(EnumSet opts, + ClientMmapManager mmapManager) { return null; } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader2.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader2.java index 521fb70aa38..e76a65a40b7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader2.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/RemoteBlockReader2.java @@ -25,10 +25,12 @@ import java.io.OutputStream; import java.net.InetSocketAddress; import java.nio.ByteBuffer; import java.nio.channels.ReadableByteChannel; +import java.util.EnumSet; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.fs.ReadOption; import org.apache.hadoop.hdfs.client.ClientMmap; import org.apache.hadoop.hdfs.client.ClientMmapManager; import org.apache.hadoop.hdfs.net.Peer; @@ -455,8 +457,8 @@ public class RemoteBlockReader2 implements BlockReader { } @Override - public ClientMmap getClientMmap(LocatedBlock curBlock, - ClientMmapManager manager) { + public ClientMmap getClientMmap(EnumSet opts, + ClientMmapManager mmapManager) { return null; } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockMetadataHeader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockMetadataHeader.java index 870f8e58839..6bb9227883b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockMetadataHeader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockMetadataHeader.java @@ -21,10 +21,13 @@ import java.io.BufferedInputStream; import java.io.ByteArrayInputStream; import java.io.DataInputStream; import java.io.DataOutputStream; +import java.io.EOFException; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.RandomAccessFile; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.util.DataChecksum; @@ -67,7 +70,29 @@ public class BlockMetadataHeader { return checksum; } - + /** + * Read the header without changing the position of the FileChannel. + * + * @param fc The FileChannel to read. + * @return the Metadata Header. + * @throws IOException on error. + */ + public static BlockMetadataHeader preadHeader(FileChannel fc) + throws IOException { + byte arr[] = new byte[2 + DataChecksum.HEADER_LEN]; + ByteBuffer buf = ByteBuffer.wrap(arr); + + while (buf.hasRemaining()) { + if (fc.read(buf, 0) <= 0) { + throw new EOFException("unexpected EOF while reading " + + "metadata file header"); + } + } + short version = (short)((arr[0] << 8) | (arr[1] & 0xff)); + DataChecksum dataChecksum = DataChecksum.newDataChecksum(arr, 2); + return new BlockMetadataHeader(version, dataChecksum); + } + /** * This reads all the fields till the beginning of checksum. * @param in diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index 78c14bb861f..b961c32bb72 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -1394,12 +1394,15 @@ dfs.client.cache.readahead - Just like dfs.datanode.readahead.bytes, this setting causes the datanode to + When using remote reads, this setting causes the datanode to read ahead in the block file using posix_fadvise, potentially decreasing I/O wait times. Unlike dfs.datanode.readahead.bytes, this is a client-side setting rather than a setting for the entire datanode. If present, this setting will override the DataNode default. + When using local reads, this setting determines how much readahead we do in + BlockReaderLocal. + If the native libraries are not available to the DataNode, this configuration has no effect. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java index 74152e27795..8c29b1c0208 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java @@ -58,6 +58,7 @@ import org.apache.hadoop.util.VersionInfo; import java.io.*; import java.net.*; +import java.nio.ByteBuffer; import java.security.PrivilegedExceptionAction; import java.util.*; import java.util.concurrent.TimeoutException; @@ -1059,4 +1060,10 @@ public class DFSTestUtil { public static void abortStream(DFSOutputStream out) throws IOException { out.abort(); } + + public static byte[] asArray(ByteBuffer buf) { + byte arr[] = new byte[buf.remaining()]; + buf.duplicate().get(arr); + return arr; + } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocal.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocal.java index 57f5ce979ad..aab4df849c6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocal.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestBlockReaderLocal.java @@ -32,6 +32,8 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.client.HdfsDataInputStream; import org.apache.hadoop.hdfs.protocol.DatanodeID; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; +import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader; +import org.apache.hadoop.hdfs.server.datanode.CachingStrategy; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.net.unix.DomainSocket; import org.apache.hadoop.net.unix.TemporarySocketDirectory; @@ -92,22 +94,35 @@ public class TestBlockReaderLocal { } } - private static interface BlockReaderLocalTest { - final int TEST_LENGTH = 12345; + private static class BlockReaderLocalTest { + final static int TEST_LENGTH = 12345; + final static int BYTES_PER_CHECKSUM = 512; + + public void setConfiguration(HdfsConfiguration conf) { + // default: no-op + } public void setup(File blockFile, boolean usingChecksums) - throws IOException; + throws IOException { + // default: no-op + } public void doTest(BlockReaderLocal reader, byte original[]) - throws IOException; + throws IOException { + // default: no-op + } } public void runBlockReaderLocalTest(BlockReaderLocalTest test, - boolean checksum) throws IOException { + boolean checksum, long readahead) throws IOException { MiniDFSCluster cluster = null; HdfsConfiguration conf = new HdfsConfiguration(); conf.setBoolean(DFSConfigKeys. DFS_CLIENT_READ_SHORTCIRCUIT_SKIP_CHECKSUM_KEY, !checksum); + conf.setLong(DFSConfigKeys.DFS_BYTES_PER_CHECKSUM_KEY, + BlockReaderLocalTest.BYTES_PER_CHECKSUM); conf.set(DFSConfigKeys.DFS_CHECKSUM_TYPE_KEY, "CRC32C"); - FileInputStream dataIn = null, checkIn = null; + conf.setLong(DFSConfigKeys.DFS_CLIENT_CACHE_READAHEAD, readahead); + test.setConfiguration(conf); + FileInputStream dataIn = null, metaIn = null; final Path TEST_PATH = new Path("/a"); final long RANDOM_SEED = 4567L; BlockReaderLocal blockReaderLocal = null; @@ -143,45 +158,51 @@ public class TestBlockReaderLocal { cluster.shutdown(); cluster = null; test.setup(dataFile, checksum); - dataIn = new FileInputStream(dataFile); - checkIn = new FileInputStream(metaFile); - blockReaderLocal = new BlockReaderLocal(new DFSClient.Conf(conf), - TEST_PATH.getName(), block, 0, -1, - dataIn, checkIn, datanodeID, checksum, null); + FileInputStream streams[] = { + new FileInputStream(dataFile), + new FileInputStream(metaFile) + }; + dataIn = streams[0]; + metaIn = streams[1]; + blockReaderLocal = new BlockReaderLocal.Builder( + new DFSClient.Conf(conf)). + setFilename(TEST_PATH.getName()). + setBlock(block). + setStreams(streams). + setDatanodeID(datanodeID). + setCachingStrategy(new CachingStrategy(false, readahead)). + setVerifyChecksum(checksum). + setBlockMetadataHeader(BlockMetadataHeader.preadHeader( + metaIn.getChannel())). + build(); dataIn = null; - checkIn = null; + metaIn = null; test.doTest(blockReaderLocal, original); + // BlockReaderLocal should not alter the file position. + Assert.assertEquals(0, streams[0].getChannel().position()); + Assert.assertEquals(0, streams[1].getChannel().position()); } finally { if (fsIn != null) fsIn.close(); if (fs != null) fs.close(); if (cluster != null) cluster.shutdown(); if (dataIn != null) dataIn.close(); - if (checkIn != null) checkIn.close(); + if (metaIn != null) metaIn.close(); if (blockReaderLocal != null) blockReaderLocal.close(); } } private static class TestBlockReaderLocalImmediateClose - implements BlockReaderLocalTest { - @Override - public void setup(File blockFile, boolean usingChecksums) - throws IOException { } - @Override - public void doTest(BlockReaderLocal reader, byte original[]) - throws IOException { } + extends BlockReaderLocalTest { } @Test public void testBlockReaderLocalImmediateClose() throws IOException { - runBlockReaderLocalTest(new TestBlockReaderLocalImmediateClose(), true); - runBlockReaderLocalTest(new TestBlockReaderLocalImmediateClose(), false); + runBlockReaderLocalTest(new TestBlockReaderLocalImmediateClose(), true, 0); + runBlockReaderLocalTest(new TestBlockReaderLocalImmediateClose(), false, 0); } private static class TestBlockReaderSimpleReads - implements BlockReaderLocalTest { - @Override - public void setup(File blockFile, boolean usingChecksums) - throws IOException { } + extends BlockReaderLocalTest { @Override public void doTest(BlockReaderLocal reader, byte original[]) throws IOException { @@ -194,24 +215,43 @@ public class TestBlockReaderLocal { assertArrayRegionsEqual(original, 1024, buf, 1024, 513); reader.readFully(buf, 1537, 514); assertArrayRegionsEqual(original, 1537, buf, 1537, 514); + // Readahead is always at least the size of one chunk in this test. + Assert.assertTrue(reader.getMaxReadaheadLength() >= + BlockReaderLocalTest.BYTES_PER_CHECKSUM); } } @Test public void testBlockReaderSimpleReads() throws IOException { - runBlockReaderLocalTest(new TestBlockReaderSimpleReads(), true); + runBlockReaderLocalTest(new TestBlockReaderSimpleReads(), true, + DFSConfigKeys.DFS_DATANODE_READAHEAD_BYTES_DEFAULT); + } + + @Test + public void testBlockReaderSimpleReadsShortReadahead() throws IOException { + runBlockReaderLocalTest(new TestBlockReaderSimpleReads(), true, + BlockReaderLocalTest.BYTES_PER_CHECKSUM - 1); } @Test public void testBlockReaderSimpleReadsNoChecksum() throws IOException { - runBlockReaderLocalTest(new TestBlockReaderSimpleReads(), false); + runBlockReaderLocalTest(new TestBlockReaderSimpleReads(), false, + DFSConfigKeys.DFS_DATANODE_READAHEAD_BYTES_DEFAULT); + } + + @Test + public void testBlockReaderSimpleReadsNoReadahead() throws IOException { + runBlockReaderLocalTest(new TestBlockReaderSimpleReads(), true, 0); + } + + @Test + public void testBlockReaderSimpleReadsNoChecksumNoReadahead() + throws IOException { + runBlockReaderLocalTest(new TestBlockReaderSimpleReads(), false, 0); } private static class TestBlockReaderLocalArrayReads2 - implements BlockReaderLocalTest { - @Override - public void setup(File blockFile, boolean usingChecksums) - throws IOException { } + extends BlockReaderLocalTest { @Override public void doTest(BlockReaderLocal reader, byte original[]) throws IOException { @@ -234,21 +274,30 @@ public class TestBlockReaderLocal { @Test public void testBlockReaderLocalArrayReads2() throws IOException { runBlockReaderLocalTest(new TestBlockReaderLocalArrayReads2(), - true); + true, DFSConfigKeys.DFS_DATANODE_READAHEAD_BYTES_DEFAULT); } @Test public void testBlockReaderLocalArrayReads2NoChecksum() throws IOException { runBlockReaderLocalTest(new TestBlockReaderLocalArrayReads2(), - false); + false, DFSConfigKeys.DFS_DATANODE_READAHEAD_BYTES_DEFAULT); + } + + @Test + public void testBlockReaderLocalArrayReads2NoReadahead() + throws IOException { + runBlockReaderLocalTest(new TestBlockReaderLocalArrayReads2(), true, 0); + } + + @Test + public void testBlockReaderLocalArrayReads2NoChecksumNoReadahead() + throws IOException { + runBlockReaderLocalTest(new TestBlockReaderLocalArrayReads2(), false, 0); } private static class TestBlockReaderLocalByteBufferReads - implements BlockReaderLocalTest { - @Override - public void setup(File blockFile, boolean usingChecksums) - throws IOException { } + extends BlockReaderLocalTest { @Override public void doTest(BlockReaderLocal reader, byte original[]) throws IOException { @@ -268,19 +317,105 @@ public class TestBlockReaderLocal { @Test public void testBlockReaderLocalByteBufferReads() throws IOException { - runBlockReaderLocalTest( - new TestBlockReaderLocalByteBufferReads(), true); + runBlockReaderLocalTest(new TestBlockReaderLocalByteBufferReads(), + true, DFSConfigKeys.DFS_DATANODE_READAHEAD_BYTES_DEFAULT); } @Test public void testBlockReaderLocalByteBufferReadsNoChecksum() throws IOException { runBlockReaderLocalTest( - new TestBlockReaderLocalByteBufferReads(), false); + new TestBlockReaderLocalByteBufferReads(), + false, DFSConfigKeys.DFS_DATANODE_READAHEAD_BYTES_DEFAULT); } + @Test + public void testBlockReaderLocalByteBufferReadsNoReadahead() + throws IOException { + runBlockReaderLocalTest(new TestBlockReaderLocalByteBufferReads(), + true, 0); + } + + @Test + public void testBlockReaderLocalByteBufferReadsNoChecksumNoReadahead() + throws IOException { + runBlockReaderLocalTest(new TestBlockReaderLocalByteBufferReads(), + false, 0); + } + + /** + * Test reads that bypass the bounce buffer (because they are aligned + * and bigger than the readahead). + */ + private static class TestBlockReaderLocalByteBufferFastLaneReads + extends BlockReaderLocalTest { + @Override + public void doTest(BlockReaderLocal reader, byte original[]) + throws IOException { + ByteBuffer buf = ByteBuffer.allocateDirect(TEST_LENGTH); + readFully(reader, buf, 0, 5120); + buf.flip(); + assertArrayRegionsEqual(original, 0, + DFSTestUtil.asArray(buf), 0, + 5120); + reader.skip(1537); + readFully(reader, buf, 0, 1); + buf.flip(); + assertArrayRegionsEqual(original, 6657, + DFSTestUtil.asArray(buf), 0, + 1); + reader.setMlocked(true); + readFully(reader, buf, 0, 5120); + buf.flip(); + assertArrayRegionsEqual(original, 6658, + DFSTestUtil.asArray(buf), 0, + 5120); + reader.setMlocked(false); + readFully(reader, buf, 0, 513); + buf.flip(); + assertArrayRegionsEqual(original, 11778, + DFSTestUtil.asArray(buf), 0, + 513); + reader.skip(3); + readFully(reader, buf, 0, 50); + buf.flip(); + assertArrayRegionsEqual(original, 12294, + DFSTestUtil.asArray(buf), 0, + 50); + } + } + + @Test + public void testBlockReaderLocalByteBufferFastLaneReads() + throws IOException { + runBlockReaderLocalTest(new TestBlockReaderLocalByteBufferFastLaneReads(), + true, 2 * BlockReaderLocalTest.BYTES_PER_CHECKSUM); + } + + @Test + public void testBlockReaderLocalByteBufferFastLaneReadsNoChecksum() + throws IOException { + runBlockReaderLocalTest( + new TestBlockReaderLocalByteBufferFastLaneReads(), + false, 2 * BlockReaderLocalTest.BYTES_PER_CHECKSUM); + } + + @Test + public void testBlockReaderLocalByteBufferFastLaneReadsNoReadahead() + throws IOException { + runBlockReaderLocalTest(new TestBlockReaderLocalByteBufferFastLaneReads(), + true, 0); + } + + @Test + public void testBlockReaderLocalByteBufferFastLaneReadsNoChecksumNoReadahead() + throws IOException { + runBlockReaderLocalTest(new TestBlockReaderLocalByteBufferFastLaneReads(), + false, 0); + } + private static class TestBlockReaderLocalReadCorruptStart - implements BlockReaderLocalTest { + extends BlockReaderLocalTest { boolean usingChecksums = false; @Override public void setup(File blockFile, boolean usingChecksums) @@ -314,11 +449,12 @@ public class TestBlockReaderLocal { @Test public void testBlockReaderLocalReadCorruptStart() throws IOException { - runBlockReaderLocalTest(new TestBlockReaderLocalReadCorruptStart(), true); + runBlockReaderLocalTest(new TestBlockReaderLocalReadCorruptStart(), true, + DFSConfigKeys.DFS_DATANODE_READAHEAD_BYTES_DEFAULT); } private static class TestBlockReaderLocalReadCorrupt - implements BlockReaderLocalTest { + extends BlockReaderLocalTest { boolean usingChecksums = false; @Override public void setup(File blockFile, boolean usingChecksums) @@ -364,8 +500,136 @@ public class TestBlockReaderLocal { @Test public void testBlockReaderLocalReadCorrupt() throws IOException { - runBlockReaderLocalTest(new TestBlockReaderLocalReadCorrupt(), true); - runBlockReaderLocalTest(new TestBlockReaderLocalReadCorrupt(), false); + runBlockReaderLocalTest(new TestBlockReaderLocalReadCorrupt(), true, + DFSConfigKeys.DFS_DATANODE_READAHEAD_BYTES_DEFAULT); + } + + @Test + public void testBlockReaderLocalReadCorruptNoChecksum() + throws IOException { + runBlockReaderLocalTest(new TestBlockReaderLocalReadCorrupt(), false, + DFSConfigKeys.DFS_DATANODE_READAHEAD_BYTES_DEFAULT); + } + + @Test + public void testBlockReaderLocalReadCorruptNoReadahead() + throws IOException { + runBlockReaderLocalTest(new TestBlockReaderLocalReadCorrupt(), true, 0); + } + + @Test + public void testBlockReaderLocalReadCorruptNoChecksumNoReadahead() + throws IOException { + runBlockReaderLocalTest(new TestBlockReaderLocalReadCorrupt(), false, 0); + } + + private static class TestBlockReaderLocalWithMlockChanges + extends BlockReaderLocalTest { + @Override + public void setup(File blockFile, boolean usingChecksums) + throws IOException { + } + + @Override + public void doTest(BlockReaderLocal reader, byte original[]) + throws IOException { + ByteBuffer buf = ByteBuffer.wrap(new byte[TEST_LENGTH]); + reader.skip(1); + readFully(reader, buf, 1, 9); + assertArrayRegionsEqual(original, 1, buf.array(), 1, 9); + readFully(reader, buf, 10, 100); + assertArrayRegionsEqual(original, 10, buf.array(), 10, 100); + reader.setMlocked(true); + readFully(reader, buf, 110, 700); + assertArrayRegionsEqual(original, 110, buf.array(), 110, 700); + reader.setMlocked(false); + reader.skip(1); // skip from offset 810 to offset 811 + readFully(reader, buf, 811, 5); + assertArrayRegionsEqual(original, 811, buf.array(), 811, 5); + } + } + + @Test + public void testBlockReaderLocalWithMlockChanges() + throws IOException { + runBlockReaderLocalTest(new TestBlockReaderLocalWithMlockChanges(), + true, DFSConfigKeys.DFS_DATANODE_READAHEAD_BYTES_DEFAULT); + } + + @Test + public void testBlockReaderLocalWithMlockChangesNoChecksum() + throws IOException { + runBlockReaderLocalTest(new TestBlockReaderLocalWithMlockChanges(), + false, DFSConfigKeys.DFS_DATANODE_READAHEAD_BYTES_DEFAULT); + } + + @Test + public void testBlockReaderLocalWithMlockChangesNoReadahead() + throws IOException { + runBlockReaderLocalTest(new TestBlockReaderLocalWithMlockChanges(), + true, 0); + } + + @Test + public void testBlockReaderLocalWithMlockChangesNoChecksumNoReadahead() + throws IOException { + runBlockReaderLocalTest(new TestBlockReaderLocalWithMlockChanges(), + false, 0); + } + + private static class TestBlockReaderLocalOnFileWithoutChecksum + extends BlockReaderLocalTest { + @Override + public void setConfiguration(HdfsConfiguration conf) { + conf.set(DFSConfigKeys.DFS_CHECKSUM_TYPE_KEY, "NULL"); + } + + @Override + public void doTest(BlockReaderLocal reader, byte original[]) + throws IOException { + Assert.assertTrue(!reader.getVerifyChecksum()); + ByteBuffer buf = ByteBuffer.wrap(new byte[TEST_LENGTH]); + reader.skip(1); + readFully(reader, buf, 1, 9); + assertArrayRegionsEqual(original, 1, buf.array(), 1, 9); + readFully(reader, buf, 10, 100); + assertArrayRegionsEqual(original, 10, buf.array(), 10, 100); + reader.setMlocked(true); + readFully(reader, buf, 110, 700); + assertArrayRegionsEqual(original, 110, buf.array(), 110, 700); + reader.setMlocked(false); + reader.skip(1); // skip from offset 810 to offset 811 + readFully(reader, buf, 811, 5); + assertArrayRegionsEqual(original, 811, buf.array(), 811, 5); + } + } + + @Test + public void testBlockReaderLocalOnFileWithoutChecksum() + throws IOException { + runBlockReaderLocalTest(new TestBlockReaderLocalOnFileWithoutChecksum(), + true, DFSConfigKeys.DFS_DATANODE_READAHEAD_BYTES_DEFAULT); + } + + @Test + public void testBlockReaderLocalOnFileWithoutChecksumNoChecksum() + throws IOException { + runBlockReaderLocalTest(new TestBlockReaderLocalOnFileWithoutChecksum(), + false, DFSConfigKeys.DFS_DATANODE_READAHEAD_BYTES_DEFAULT); + } + + @Test + public void testBlockReaderLocalOnFileWithoutChecksumNoReadahead() + throws IOException { + runBlockReaderLocalTest(new TestBlockReaderLocalOnFileWithoutChecksum(), + true, 0); + } + + @Test + public void testBlockReaderLocalOnFileWithoutChecksumNoChecksumNoReadahead() + throws IOException { + runBlockReaderLocalTest(new TestBlockReaderLocalOnFileWithoutChecksum(), + false, 0); } @Test(timeout=60000) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java index ed6fd745a54..57f1c117b46 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestShortCircuitLocalRead.java @@ -259,7 +259,6 @@ public class TestShortCircuitLocalRead { assertTrue("/ should be a directory", fs.getFileStatus(path) .isDirectory() == true); - // create a new file in home directory. Do not close it. byte[] fileData = AppendTestUtil.randomBytes(seed, size); Path file1 = fs.makeQualified(new Path("filelocal.dat")); FSDataOutputStream stm = createFile(fs, file1, 1); From 7f53e90fd19f644a4cbadcfecd14a7a00a53b3d9 Mon Sep 17 00:00:00 2001 From: Jing Zhao Date: Tue, 17 Dec 2013 21:36:30 +0000 Subject: [PATCH 12/32] Move HDFS-5538, HDFS-5545, HDFS-5536, HDFS-5312, and HDFS-5629 from trunk to 2.4.0 section. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1551724 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 30 ++++++++++----------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 0e68fb0828a..15980dd4dc1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -216,36 +216,21 @@ Trunk (Unreleased) and INodeFileUnderConstructionWithSnapshot with FileUnderContructionFeature. (jing9 via szetszwo) - HDFS-5538. URLConnectionFactory should pick up the SSL related configuration - by default. (Haohui Mai via jing9) - HDFS-5286. Flatten INodeDirectory hierarchy: Replace INodeDirectoryWithQuota with DirectoryWithQuotaFeature. (szetszwo) HDFS-5556. Add some more NameNode cache statistics, cache pool stats (cmccabe) - HDFS-5545. Allow specifying endpoints for listeners in HttpServer. (Haohui - Mai via jing9) - HDFS-5537. Remove FileWithSnapshot interface. (jing9 via szetszwo) HDFS-5430. Support TTL on CacheDirectives. (wang) - HDFS-5536. Implement HTTP policy for Namenode and DataNode. (Haohui Mai via - jing9) - HDFS-5630. Hook up cache directive and pool usage statistics. (wang) - HDFS-5312. Generate HTTP / HTTPS URL in DFSUtil#getInfoServer() based on the - configured http policy. (Haohui Mai via jing9) - HDFS-5554. Flatten INodeFile hierarchy: Replace INodeFileWithSnapshot with FileWithSnapshotFeature. (jing9 via szetszwo) - HDFS-5629. Support HTTPS in JournalNode and SecondaryNameNode. - (Haohui Mai via jing9) - HDFS-5647. Merge INodeDirectory.Feature and INodeFile.Feature. (Haohui Mai via jing9) @@ -750,6 +735,21 @@ Release 2.4.0 - UNRELEASED HDFS-5350. Name Node should report fsimage transfer time as a metric. (Jimmy Xiang via wang) + HDFS-5538. URLConnectionFactory should pick up the SSL related configuration + by default. (Haohui Mai via jing9) + + HDFS-5545. Allow specifying endpoints for listeners in HttpServer. (Haohui + Mai via jing9) + + HDFS-5536. Implement HTTP policy for Namenode and DataNode. (Haohui Mai via + jing9) + + HDFS-5312. Generate HTTP / HTTPS URL in DFSUtil#getInfoServer() based on the + configured http policy. (Haohui Mai via jing9) + + HDFS-5629. Support HTTPS in JournalNode and SecondaryNameNode. + (Haohui Mai via jing9) + OPTIMIZATIONS HDFS-5239. Allow FSNamesystem lock fairness to be configurable (daryn) From 9218b4aec1be7b73bd321c4e71d0a371a66ee5ad Mon Sep 17 00:00:00 2001 From: Christopher Douglas Date: Tue, 17 Dec 2013 21:37:25 +0000 Subject: [PATCH 13/32] MAPREDUCE-5197. Add a service for checkpointing task state. Contributed by Carlo Curino git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1551726 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 + .../mapreduce/checkpoint/CheckpointID.java | 30 +++ .../checkpoint/CheckpointNamingService.java | 31 +++ .../checkpoint/CheckpointService.java | 100 +++++++++ .../mapreduce/checkpoint/FSCheckpointID.java | 72 +++++++ .../checkpoint/FSCheckpointService.java | 193 ++++++++++++++++++ .../mapreduce/checkpoint/RandomNameCNS.java | 32 +++ .../checkpoint/SimpleNamingService.java | 39 ++++ .../checkpoint/TestFSCheckpointID.java | 48 +++++ .../checkpoint/TestFSCheckpointService.java | 102 +++++++++ 10 files changed, 650 insertions(+) create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/CheckpointID.java create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/CheckpointNamingService.java create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/CheckpointService.java create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/FSCheckpointID.java create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/FSCheckpointService.java create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/RandomNameCNS.java create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/SimpleNamingService.java create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/checkpoint/TestFSCheckpointID.java create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/checkpoint/TestFSCheckpointService.java diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 39475c193aa..2a80119b0f9 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -71,6 +71,9 @@ Trunk (Unreleased) MAPREDUCE-5014. Extend Distcp to accept a custom CopyListing. (Srikanth Sundarrajan via amareshwari) + MAPREDUCE-5197. Add a service for checkpointing task state. + (Carlo Curino via cdouglas) + BUG FIXES MAPREDUCE-4272. SortedRanges.Range#compareTo is not spec compliant. diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/CheckpointID.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/CheckpointID.java new file mode 100644 index 00000000000..4e3c3d66fa6 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/CheckpointID.java @@ -0,0 +1,30 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.mapreduce.checkpoint; + +import org.apache.hadoop.io.Writable; + +/** + * This class represent the identified (memento) for a checkpoint. It is allowed + * to contain small amount of metadata about a checkpoint and must provide + * sufficient information to the corresponding CheckpointService to locate and + * retrieve the data contained in the checkpoint. + */ +public interface CheckpointID extends Writable { + +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/CheckpointNamingService.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/CheckpointNamingService.java new file mode 100644 index 00000000000..0bb99a49bf2 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/CheckpointNamingService.java @@ -0,0 +1,31 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.mapreduce.checkpoint; + +/** + * This class represent a naming service for checkpoints. + */ +public interface CheckpointNamingService { + + /** + * Generate a new checkpoint Name + * @return the checkpoint name + */ + public String getNewName(); + +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/CheckpointService.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/CheckpointService.java new file mode 100644 index 00000000000..7fc4d689ff3 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/CheckpointService.java @@ -0,0 +1,100 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.mapreduce.checkpoint; + +import java.io.IOException; +import java.nio.channels.ReadableByteChannel; +import java.nio.channels.WritableByteChannel; + +/** + * The CheckpointService provides a simple API to store and retrieve the state of a task. + * + * Checkpoints are atomic, single-writer, write-once, multiple-readers, + * ready-many type of objects. This is provided by releasing the CheckpointID + * for a checkpoint only upon commit of the checkpoint, and by preventing a + * checkpoint to be re-opened for writes. + * + * Non-functional properties such as durability, availability, compression, + * garbage collection, quotas are left to the implementation. + * + * This API is envisioned as the basic building block for a checkpoint service, + * on top of which richer interfaces can be layered (e.g., frameworks providing + * object-serialization, checkpoint metadata and provenance, etc.) + * + */ +public interface CheckpointService { + + public interface CheckpointWriteChannel extends WritableByteChannel { } + public interface CheckpointReadChannel extends ReadableByteChannel { } + + /** + * This method creates a checkpoint and provide a channel to write to it. The + * name/location of the checkpoint are unknown to the user as of this time, in + * fact, the CheckpointID is not released to the user until commit is called. + * This makes enforcing atomicity of writes easy. + * @return a channel that can be used to write to the checkpoint + * @throws IOException + * @throws InterruptedException + */ + public CheckpointWriteChannel create() + throws IOException, InterruptedException; + + /** + * Used to finalize and existing checkpoint. It returns the CheckpointID that + * can be later used to access (read-only) this checkpoint. This guarantees + * atomicity of the checkpoint. + * @param ch the CheckpointWriteChannel to commit + * @return a CheckpointID + * @throws IOException + * @throws InterruptedException + */ + public CheckpointID commit(CheckpointWriteChannel ch) + throws IOException, InterruptedException; + + /** + * Dual to commit, it aborts the current checkpoint. Garbage collection + * choices are left to the implementation. The CheckpointID is not generated + * nor released to the user so the checkpoint is not accessible. + * @param ch the CheckpointWriteChannel to abort + * @throws IOException + * @throws InterruptedException + */ + public void abort(CheckpointWriteChannel ch) + throws IOException, InterruptedException; + + /** + * Given a CheckpointID returns a reading channel. + * @param id CheckpointID for the checkpoint to be opened + * @return a CheckpointReadChannel + * @throws IOException + * @throws InterruptedException + */ + public CheckpointReadChannel open(CheckpointID id) + throws IOException, InterruptedException; + + /** + * It discards an existing checkpoint identified by its CheckpointID. + * @param id CheckpointID for the checkpoint to be deleted + * @return a boolean confirming success of the deletion + * @throws IOException + * @throws InterruptedException + */ + public boolean delete(CheckpointID id) + throws IOException, InterruptedException; + +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/FSCheckpointID.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/FSCheckpointID.java new file mode 100644 index 00000000000..196146c0035 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/FSCheckpointID.java @@ -0,0 +1,72 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.mapreduce.checkpoint; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.Text; + +/** + * A FileSystem based checkpoint ID contains reference to the Path + * where the checkpoint has been saved. + */ +public class FSCheckpointID implements CheckpointID { + + private Path path; + + public FSCheckpointID(){ + } + + public FSCheckpointID(Path path) { + this.path = path; + } + + public Path getPath() { + return path; + } + + @Override + public String toString() { + return path.toString(); + } + + @Override + public void write(DataOutput out) throws IOException { + Text.writeString(out, path.toString()); + } + + @Override + public void readFields(DataInput in) throws IOException { + this.path = new Path(Text.readString(in)); + } + + @Override + public boolean equals(Object other) { + return other instanceof FSCheckpointID + && path.equals(((FSCheckpointID)other).path); + } + + @Override + public int hashCode() { + return path.hashCode(); + } + +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/FSCheckpointService.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/FSCheckpointService.java new file mode 100644 index 00000000000..18a92561ac4 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/FSCheckpointService.java @@ -0,0 +1,193 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.mapreduce.checkpoint; + +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.Channels; +import java.nio.channels.ReadableByteChannel; +import java.nio.channels.WritableByteChannel; + +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +/** + * A FileSystem based CheckpointService. + */ +public class FSCheckpointService implements CheckpointService { + + private final Path base; + private final FileSystem fs; + private final CheckpointNamingService namingPolicy; + private final short replication; + + public FSCheckpointService(FileSystem fs, Path base, + CheckpointNamingService namingPolicy, short replication) { + this.fs = fs; + this.base = base; + this.namingPolicy = namingPolicy; + this.replication = replication; + } + + public CheckpointWriteChannel create() + throws IOException { + + String name = namingPolicy.getNewName(); + + Path p = new Path(name); + if (p.isUriPathAbsolute()) { + throw new IOException("Checkpoint cannot be an absolute path"); + } + return createInternal(new Path(base, p)); + } + + CheckpointWriteChannel createInternal(Path name) throws IOException { + + //create a temp file, fail if file exists + return new FSCheckpointWriteChannel(name, fs.create(tmpfile(name), + replication)); + } + + private static class FSCheckpointWriteChannel + implements CheckpointWriteChannel { + private boolean isOpen = true; + private final Path finalDst; + private final WritableByteChannel out; + + FSCheckpointWriteChannel(Path finalDst, FSDataOutputStream out) { + this.finalDst = finalDst; + this.out = Channels.newChannel(out); + } + + public int write(ByteBuffer b) throws IOException { + return out.write(b); + } + + public Path getDestination() { + return finalDst; + } + + @Override + public void close() throws IOException { + isOpen=false; + out.close(); + } + + @Override + public boolean isOpen() { + return isOpen; + } + + } + + @Override + public CheckpointReadChannel open(CheckpointID id) + throws IOException, InterruptedException { + if (!(id instanceof FSCheckpointID)) { + throw new IllegalArgumentException( + "Mismatched checkpoint type: " + id.getClass()); + } + return new FSCheckpointReadChannel( + fs.open(((FSCheckpointID) id).getPath())); + } + + private static class FSCheckpointReadChannel + implements CheckpointReadChannel { + + private boolean isOpen = true; + private final ReadableByteChannel in; + + FSCheckpointReadChannel(FSDataInputStream in){ + this.in = Channels.newChannel(in); + } + + @Override + public int read(ByteBuffer bb) throws IOException { + return in.read(bb); + } + + @Override + public void close() throws IOException { + isOpen = false; + in.close(); + } + + @Override + public boolean isOpen() { + return isOpen; + } + + } + + @Override + public CheckpointID commit(CheckpointWriteChannel ch) + throws IOException, InterruptedException { + if (ch.isOpen()) { + ch.close(); + } + FSCheckpointWriteChannel hch = (FSCheckpointWriteChannel)ch; + Path dst = hch.getDestination(); + if (!fs.rename(tmpfile(dst), dst)) { + // attempt to clean up + abort(ch); + throw new IOException("Failed to promote checkpoint" + + tmpfile(dst) + " -> " + dst); + } + return new FSCheckpointID(hch.getDestination()); + } + + @Override + public void abort(CheckpointWriteChannel ch) throws IOException { + if (ch.isOpen()) { + ch.close(); + } + FSCheckpointWriteChannel hch = (FSCheckpointWriteChannel)ch; + Path tmp = tmpfile(hch.getDestination()); + try { + if (!fs.delete(tmp, false)) { + throw new IOException("Failed to delete checkpoint during abort"); + } + } catch (FileNotFoundException e) { + // IGNORE + } + } + + @Override + public boolean delete(CheckpointID id) throws IOException, + InterruptedException { + if (!(id instanceof FSCheckpointID)) { + throw new IllegalArgumentException( + "Mismatched checkpoint type: " + id.getClass()); + } + Path tmp = ((FSCheckpointID)id).getPath(); + try { + return fs.delete(tmp, false); + } catch (FileNotFoundException e) { + // IGNORE + } + return true; + } + + static final Path tmpfile(Path p) { + return new Path(p.getParent(), p.getName() + ".tmp"); + } + +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/RandomNameCNS.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/RandomNameCNS.java new file mode 100644 index 00000000000..7387c1c4915 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/RandomNameCNS.java @@ -0,0 +1,32 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.mapreduce.checkpoint; + +import org.apache.commons.lang.RandomStringUtils; + +/** + * Simple naming service that generates a random checkpoint name. + */ +public class RandomNameCNS implements CheckpointNamingService { + + @Override + public String getNewName() { + return "checkpoint_" + RandomStringUtils.randomAlphanumeric(8); + } + +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/SimpleNamingService.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/SimpleNamingService.java new file mode 100644 index 00000000000..85630ad5309 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/SimpleNamingService.java @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.mapreduce.checkpoint; + +/** + * A naming service that simply returns the name it has been initialized with. + */ +public class SimpleNamingService implements CheckpointNamingService{ + + final String name; + + public SimpleNamingService(String name){ + this.name = name; + } + + /** + * Generate a new checkpoint Name + * @return the checkpoint name + */ + public String getNewName(){ + return "checkpoint_" + name; + } + +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/checkpoint/TestFSCheckpointID.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/checkpoint/TestFSCheckpointID.java new file mode 100644 index 00000000000..58abb32a28b --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/checkpoint/TestFSCheckpointID.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.mapreduce.checkpoint; + +import java.io.IOException; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.DataInputBuffer; +import org.apache.hadoop.io.DataOutputBuffer; +import org.junit.Test; + +public class TestFSCheckpointID { + + @Test + public void testFSCheckpointIDSerialization() throws IOException { + + Path inpath = new Path("/tmp/blah"); + FSCheckpointID cidin = new FSCheckpointID(inpath); + DataOutputBuffer out = new DataOutputBuffer(); + cidin.write(out); + out.close(); + + FSCheckpointID cidout = new FSCheckpointID(null); + DataInputBuffer in = new DataInputBuffer(); + in.reset(out.getData(), 0, out.getLength()); + cidout.readFields(in); + in.close(); + + assert cidin.equals(cidout); + + } + +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/checkpoint/TestFSCheckpointService.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/checkpoint/TestFSCheckpointService.java new file mode 100644 index 00000000000..d60c908b15a --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/checkpoint/TestFSCheckpointService.java @@ -0,0 +1,102 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.mapreduce.checkpoint; + +import java.nio.ByteBuffer; + +import java.util.Arrays; +import java.util.Random; + +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.DataOutputBuffer; +import org.apache.hadoop.mapreduce.checkpoint.CheckpointService.CheckpointWriteChannel; +import org.junit.Test; +import static org.junit.Assert.*; +import static org.mockito.Mockito.*; +import org.mockito.*; + +public class TestFSCheckpointService { + + private final int BUFSIZE = 1024; + + @Test + public void testCheckpointCreate() throws Exception { + checkpointCreate(ByteBuffer.allocate(BUFSIZE)); + } + + @Test + public void testCheckpointCreateDirect() throws Exception { + checkpointCreate(ByteBuffer.allocateDirect(BUFSIZE)); + } + + public void checkpointCreate(ByteBuffer b) throws Exception { + int WRITES = 128; + FileSystem fs = mock(FileSystem.class); + DataOutputBuffer dob = new DataOutputBuffer(); + FSDataOutputStream hdfs = spy(new FSDataOutputStream(dob, null)); + @SuppressWarnings("resource") // backed by array + DataOutputBuffer verif = new DataOutputBuffer(); + when(fs.create(isA(Path.class), eq((short)1))).thenReturn(hdfs); + when(fs.rename(isA(Path.class), isA(Path.class))).thenReturn(true); + + Path base = new Path("/chk"); + Path finalLoc = new Path("/chk/checkpoint_chk0"); + Path tmp = FSCheckpointService.tmpfile(finalLoc); + + FSCheckpointService chk = new FSCheckpointService(fs, base, + new SimpleNamingService("chk0"), (short) 1); + CheckpointWriteChannel out = chk.create(); + + Random r = new Random(); + + final byte[] randBytes = new byte[BUFSIZE]; + for (int i = 0; i < WRITES; ++i) { + r.nextBytes(randBytes); + int s = r.nextInt(BUFSIZE - 1); + int e = r.nextInt(BUFSIZE - s) + 1; + verif.write(randBytes, s, e); + b.clear(); + b.put(randBytes).flip(); + b.position(s).limit(b.position() + e); + out.write(b); + } + verify(fs, never()).rename(any(Path.class), eq(finalLoc)); + CheckpointID cid = chk.commit(out); + verify(hdfs).close(); + verify(fs).rename(eq(tmp), eq(finalLoc)); + + assertArrayEquals(Arrays.copyOfRange(verif.getData(), 0, verif.getLength()), + Arrays.copyOfRange(dob.getData(), 0, dob.getLength())); + } + + @Test + public void testDelete() throws Exception { + FileSystem fs = mock(FileSystem.class); + Path chkloc = new Path("/chk/chk0"); + when(fs.delete(eq(chkloc), eq(false))).thenReturn(true); + Path base = new Path("/otherchk"); + FSCheckpointID id = new FSCheckpointID(chkloc); + FSCheckpointService chk = new FSCheckpointService(fs, base, + new SimpleNamingService("chk0"), (short) 1); + assertTrue(chk.delete(id)); + verify(fs).delete(eq(chkloc), eq(false)); + } + +} From ca125153b319a8f5d75585d25cb0f37ae717be01 Mon Sep 17 00:00:00 2001 From: Vinod Kumar Vavilapalli Date: Tue, 17 Dec 2013 22:32:38 +0000 Subject: [PATCH 14/32] YARN-1028. Added FailoverProxyProvider capability to ResourceManager to help with RM failover. Contributed by Karthik Kambatla. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1551739 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/hadoop/io/retry/RetryPolicies.java | 17 +- hadoop-yarn-project/CHANGES.txt | 3 + .../dev-support/findbugs-exclude.xml | 8 + .../hadoop/yarn/conf/YarnConfiguration.java | 42 ++++- .../hadoop/yarn/client/ClientRMProxy.java | 45 ++++- .../yarn/client/api/impl/YarnClientImpl.java | 12 +- .../hadoop/yarn/client/TestRMFailover.java | 155 ++++++++++++++++ .../ConfiguredRMFailoverProxyProvider.java | 122 ++++++++++++ .../yarn/client/RMFailoverProxyProvider.java | 35 ++++ .../apache/hadoop/yarn/client/RMProxy.java | 174 ++++++++++++++++-- .../src/main/resources/yarn-default.xml | 55 ++++++ .../hadoop/yarn/server/api/ServerRMProxy.java | 46 ++++- .../hadoop/yarn/server/MiniYARNCluster.java | 102 +++++++--- .../yarn/server/TestMiniYARNClusterForHA.java | 14 +- 14 files changed, 742 insertions(+), 88 deletions(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestRMFailover.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/ConfiguredRMFailoverProxyProvider.java create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/RMFailoverProxyProvider.java diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicies.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicies.java index a248f22cfc5..e2770030b11 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicies.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/retry/RetryPolicies.java @@ -68,7 +68,14 @@ public class RetryPolicies { *

*/ public static final RetryPolicy RETRY_FOREVER = new RetryForever(); - + + /** + *

+ * Keep failing over forever + *

+ */ + public static final RetryPolicy FAILOVER_FOREVER = new FailoverForever(); + /** *

* Keep trying a limited number of times, waiting a fixed time between attempts, @@ -166,6 +173,14 @@ public class RetryPolicies { return RetryAction.RETRY; } } + + static class FailoverForever implements RetryPolicy { + @Override + public RetryAction shouldRetry(Exception e, int retries, int failovers, + boolean isIdempotentOrAtMostOnce) throws Exception { + return RetryAction.FAILOVER_AND_RETRY; + } + } /** * Retry up to maxRetries. diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index e69ea1bda3a..e8e262caaab 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -52,6 +52,9 @@ Release 2.4.0 - UNRELEASED YARN-312. Introduced ResourceManagerAdministrationProtocol changes to support changing resources on node. (Junping Du via vinodkv) + YARN-1028. Added FailoverProxyProvider capability to ResourceManager to help + with RM failover. (Karthik Kambatla via vinodkv) + IMPROVEMENTS YARN-7. Support CPU resource for DistributedShell. (Junping Du via llu) diff --git a/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml b/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml index 80598a43bb8..486bebfec50 100644 --- a/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml +++ b/hadoop-yarn-project/hadoop-yarn/dev-support/findbugs-exclude.xml @@ -310,4 +310,12 @@ + + + + + + + + diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java index de420b05e35..e96c217b8cc 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/conf/YarnConfiguration.java @@ -296,6 +296,31 @@ public class YarnConfiguration extends Configuration { HttpConfig.isSecure() ? RM_WEBAPP_HTTPS_ADDRESS : RM_WEBAPP_ADDRESS)); + public static final String CLIENT_FAILOVER_PREFIX = + YARN_PREFIX + "client.failover-"; + public static final String CLIENT_FAILOVER_PROXY_PROVIDER = + CLIENT_FAILOVER_PREFIX + "proxy-provider"; + public static final String DEFAULT_CLIENT_FAILOVER_PROXY_PROVIDER = + "org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider"; + + public static final String CLIENT_FAILOVER_MAX_ATTEMPTS = + CLIENT_FAILOVER_PREFIX + "max-attempts"; + + public static final String CLIENT_FAILOVER_SLEEPTIME_BASE_MS = + CLIENT_FAILOVER_PREFIX + "sleep-base-ms"; + + public static final String CLIENT_FAILOVER_SLEEPTIME_MAX_MS = + CLIENT_FAILOVER_PREFIX + "sleep-max-ms"; + + public static final String CLIENT_FAILOVER_RETRIES = + CLIENT_FAILOVER_PREFIX + "retries"; + public static final int DEFAULT_CLIENT_FAILOVER_RETRIES = 0; + + public static final String CLIENT_FAILOVER_RETRIES_ON_SOCKET_TIMEOUTS = + CLIENT_FAILOVER_PREFIX + "retries-on-socket-timeouts"; + public static final int + DEFAULT_CLIENT_FAILOVER_RETRIES_ON_SOCKET_TIMEOUTS = 0; + //////////////////////////////// // RM state store configs //////////////////////////////// @@ -850,22 +875,31 @@ public class YarnConfiguration extends Configuration { public static final String IS_MINI_YARN_CLUSTER = YARN_PREFIX + "is.minicluster"; + public static final String YARN_MC_PREFIX = YARN_PREFIX + "minicluster."; + /** Whether to use fixed ports with the minicluster. */ - public static final String YARN_MINICLUSTER_FIXED_PORTS = YARN_PREFIX - + "minicluster.fixed.ports"; + public static final String YARN_MINICLUSTER_FIXED_PORTS = + YARN_MC_PREFIX + "fixed.ports"; /** * Default is false to be able to run tests concurrently without port * conflicts. */ - public static boolean DEFAULT_YARN_MINICLUSTER_FIXED_PORTS = false; + public static final boolean DEFAULT_YARN_MINICLUSTER_FIXED_PORTS = false; + + /** + * Whether the NM should use RPC to connect to the RM. Default is false. + * Can be set to true only when using fixed ports. + */ + public static final String YARN_MINICLUSTER_USE_RPC = YARN_MC_PREFIX + "use-rpc"; + public static final boolean DEFAULT_YARN_MINICLUSTER_USE_RPC = false; /** * Whether users are explicitly trying to control resource monitoring * configuration for the MiniYARNCluster. Disabled by default. */ public static final String YARN_MINICLUSTER_CONTROL_RESOURCE_MONITORING = - YARN_PREFIX + "minicluster.control-resource-monitoring"; + YARN_MC_PREFIX + "control-resource-monitoring"; public static final boolean DEFAULT_YARN_MINICLUSTER_CONTROL_RESOURCE_MONITORING = false; diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/ClientRMProxy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/ClientRMProxy.java index 049f4cc8266..06bbc3555c4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/ClientRMProxy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/ClientRMProxy.java @@ -23,6 +23,7 @@ import java.net.InetSocketAddress; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.SecurityUtil; import org.apache.hadoop.security.UserGroupInformation; @@ -34,17 +35,37 @@ import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.security.AMRMTokenIdentifier; import org.apache.hadoop.yarn.server.api.ResourceManagerAdministrationProtocol; -public class ClientRMProxy extends RMProxy { +import com.google.common.base.Preconditions; +public class ClientRMProxy extends RMProxy { private static final Log LOG = LogFactory.getLog(ClientRMProxy.class); + private interface ClientRMProtocols extends ApplicationClientProtocol, + ApplicationMasterProtocol, ResourceManagerAdministrationProtocol { + // Add nothing + } + + static { + INSTANCE = new ClientRMProxy(); + } + + private ClientRMProxy(){ + super(); + } + + /** + * Create a proxy to the ResourceManager for the specified protocol. + * @param configuration Configuration with all the required information. + * @param protocol Client protocol for which proxy is being requested. + * @param Type of proxy. + * @return Proxy to the ResourceManager for the specified client protocol. + * @throws IOException + */ public static T createRMProxy(final Configuration configuration, final Class protocol) throws IOException { - YarnConfiguration conf = (configuration instanceof YarnConfiguration) - ? (YarnConfiguration) configuration - : new YarnConfiguration(configuration); - InetSocketAddress rmAddress = getRMAddress(conf, protocol); - return createRMProxy(conf, protocol, rmAddress); + // This method exists only to initiate this class' static INSTANCE. TODO: + // FIX if possible + return RMProxy.createRMProxy(configuration, protocol); } private static void setupTokens(InetSocketAddress resourceManagerAddress) @@ -63,7 +84,9 @@ public class ClientRMProxy extends RMProxy { } } - private static InetSocketAddress getRMAddress(YarnConfiguration conf, + @InterfaceAudience.Private + @Override + protected InetSocketAddress getRMAddress(YarnConfiguration conf, Class protocol) throws IOException { if (protocol == ApplicationClientProtocol.class) { return conf.getSocketAddr(YarnConfiguration.RM_ADDRESS, @@ -89,4 +112,12 @@ public class ClientRMProxy extends RMProxy { throw new IllegalStateException(message); } } + + @InterfaceAudience.Private + @Override + protected void checkAllowedProtocols(Class protocol) { + Preconditions.checkArgument( + protocol.isAssignableFrom(ClientRMProtocols.class), + "RM does not support this client protocol"); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java index 7c446045fe5..a5ff9f67dc9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/main/java/org/apache/hadoop/yarn/client/api/impl/YarnClientImpl.java @@ -19,7 +19,6 @@ package org.apache.hadoop.yarn.client.api.impl; import java.io.IOException; -import java.net.InetSocketAddress; import java.util.ArrayList; import java.util.EnumSet; import java.util.List; @@ -79,7 +78,6 @@ public class YarnClientImpl extends YarnClient { private static final Log LOG = LogFactory.getLog(YarnClientImpl.class); protected ApplicationClientProtocol rmClient; - protected InetSocketAddress rmAddress; protected long submitPollIntervalMillis; private long asyncApiPollIntervalMillis; @@ -89,15 +87,9 @@ public class YarnClientImpl extends YarnClient { super(YarnClientImpl.class.getName()); } - private static InetSocketAddress getRmAddress(Configuration conf) { - return conf.getSocketAddr(YarnConfiguration.RM_ADDRESS, - YarnConfiguration.DEFAULT_RM_ADDRESS, YarnConfiguration.DEFAULT_RM_PORT); - } - @SuppressWarnings("deprecation") @Override protected void serviceInit(Configuration conf) throws Exception { - this.rmAddress = getRmAddress(conf); asyncApiPollIntervalMillis = conf.getLong(YarnConfiguration.YARN_CLIENT_APPLICATION_CLIENT_PROTOCOL_POLL_INTERVAL_MS, YarnConfiguration.DEFAULT_YARN_CLIENT_APPLICATION_CLIENT_PROTOCOL_POLL_INTERVAL_MS); @@ -180,9 +172,7 @@ public class YarnClientImpl extends YarnClient { } } - - LOG.info("Submitted application " + applicationId + " to ResourceManager" - + " at " + rmAddress); + LOG.info("Submitted application " + applicationId); return applicationId; } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestRMFailover.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestRMFailover.java new file mode 100644 index 00000000000..8545a1a2839 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/src/test/java/org/apache/hadoop/yarn/client/TestRMFailover.java @@ -0,0 +1,155 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.client; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.IOException; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.ha.HAServiceProtocol; +import org.apache.hadoop.yarn.client.api.YarnClient; +import org.apache.hadoop.yarn.conf.HAUtil; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.exceptions.YarnException; +import org.apache.hadoop.yarn.server.MiniYARNCluster; +import org.apache.hadoop.yarn.server.resourcemanager.AdminService; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +public class TestRMFailover { + private static final Log LOG = + LogFactory.getLog(TestRMFailover.class.getName()); + + private static final String RM1_NODE_ID = "rm1"; + private static final int RM1_PORT_BASE = 10000; + private static final String RM2_NODE_ID = "rm2"; + private static final int RM2_PORT_BASE = 20000; + private static final HAServiceProtocol.StateChangeRequestInfo req = + new HAServiceProtocol.StateChangeRequestInfo( + HAServiceProtocol.RequestSource.REQUEST_BY_USER_FORCED); + + private static Configuration conf; + private static MiniYARNCluster cluster; + + private static void setConfForRM(String rmId, String prefix, String value) { + conf.set(HAUtil.addSuffix(prefix, rmId), value); + } + + private static void setRpcAddressForRM(String rmId, int base) { + setConfForRM(rmId, YarnConfiguration.RM_ADDRESS, "0.0.0.0:" + + (base + YarnConfiguration.DEFAULT_RM_PORT)); + setConfForRM(rmId, YarnConfiguration.RM_SCHEDULER_ADDRESS, "0.0.0.0:" + + (base + YarnConfiguration.DEFAULT_RM_SCHEDULER_PORT)); + setConfForRM(rmId, YarnConfiguration.RM_ADMIN_ADDRESS, "0.0.0.0:" + + (base + YarnConfiguration.DEFAULT_RM_ADMIN_PORT)); + setConfForRM(rmId, YarnConfiguration.RM_RESOURCE_TRACKER_ADDRESS, "0.0.0.0:" + + (base + YarnConfiguration.DEFAULT_RM_RESOURCE_TRACKER_PORT)); + setConfForRM(rmId, YarnConfiguration.RM_WEBAPP_ADDRESS, "0.0.0.0:" + + (base + YarnConfiguration.DEFAULT_RM_WEBAPP_PORT)); + setConfForRM(rmId, YarnConfiguration.RM_WEBAPP_HTTPS_ADDRESS, "0.0.0.0:" + + (base + YarnConfiguration.DEFAULT_RM_WEBAPP_HTTPS_PORT)); + } + + private static AdminService getRMAdminService(int index) { + return + cluster.getResourceManager(index).getRMContext().getRMAdminService(); + } + + @BeforeClass + public static void setup() throws IOException { + conf = new YarnConfiguration(); + conf.setBoolean(YarnConfiguration.RM_HA_ENABLED, true); + conf.set(YarnConfiguration.RM_HA_IDS, RM1_NODE_ID + "," + RM2_NODE_ID); + setRpcAddressForRM(RM1_NODE_ID, RM1_PORT_BASE); + setRpcAddressForRM(RM2_NODE_ID, RM2_PORT_BASE); + + conf.setInt(YarnConfiguration.CLIENT_FAILOVER_MAX_ATTEMPTS, 100); + conf.setLong(YarnConfiguration.CLIENT_FAILOVER_SLEEPTIME_BASE_MS, 100L); + conf.setLong(YarnConfiguration.CLIENT_FAILOVER_SLEEPTIME_MAX_MS, 1000L); + conf.setBoolean(YarnConfiguration.YARN_MINICLUSTER_FIXED_PORTS, true); + conf.setBoolean(YarnConfiguration.YARN_MINICLUSTER_USE_RPC, true); + + cluster = new MiniYARNCluster(TestRMFailover.class.getName(), 2, 1, 1, 1); + cluster.init(conf); + cluster.start(); + + cluster.getResourceManager(0).getRMContext().getRMAdminService() + .transitionToActive(req); + assertFalse("RM never turned active", -1 == cluster.getActiveRMIndex()); + } + + @AfterClass + public static void teardown() { + cluster.stop(); + } + + private void verifyClientConnection() { + int numRetries = 3; + while(numRetries-- > 0) { + Configuration conf = new YarnConfiguration(TestRMFailover.conf); + YarnClient client = YarnClient.createYarnClient(); + client.init(conf); + client.start(); + try { + client.getApplications(); + return; + } catch (Exception e) { + LOG.error(e); + } finally { + client.stop(); + } + } + fail("Client couldn't connect to the Active RM"); + } + + @Test + public void testExplicitFailover() + throws YarnException, InterruptedException, IOException { + assertTrue("NMs failed to connect to the RM", + cluster.waitForNodeManagersToConnect(5000)); + verifyClientConnection(); + + // Failover to the second RM + getRMAdminService(0).transitionToStandby(req); + getRMAdminService(1).transitionToActive(req); + assertEquals("Wrong ResourceManager is active", + HAServiceProtocol.HAServiceState.ACTIVE, + getRMAdminService(1).getServiceStatus().getState()); + assertTrue("NMs failed to connect to the RM", + cluster.waitForNodeManagersToConnect(5000)); + verifyClientConnection(); + + // Failover back to the first RM + getRMAdminService(1).transitionToStandby(req); + getRMAdminService(0).transitionToActive(req); + assertEquals("Wrong ResourceManager is active", + HAServiceProtocol.HAServiceState.ACTIVE, + getRMAdminService(0).getServiceStatus().getState()); + assertTrue("NMs failed to connect to the RM", + cluster.waitForNodeManagersToConnect(5000)); + verifyClientConnection(); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/ConfiguredRMFailoverProxyProvider.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/ConfiguredRMFailoverProxyProvider.java new file mode 100644 index 00000000000..ef56edd4293 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/ConfiguredRMFailoverProxyProvider.java @@ -0,0 +1,122 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.client; + +import java.io.Closeable; +import java.io.IOException; +import java.net.InetSocketAddress; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.CommonConfigurationKeysPublic; +import org.apache.hadoop.ipc.RPC; +import org.apache.hadoop.yarn.conf.HAUtil; +import org.apache.hadoop.yarn.conf.YarnConfiguration; + +@InterfaceAudience.Private +@InterfaceStability.Unstable +public class ConfiguredRMFailoverProxyProvider + implements RMFailoverProxyProvider { + private static final Log LOG = + LogFactory.getLog(ConfiguredRMFailoverProxyProvider.class); + + private int currentProxyIndex = 0; + Map proxies = new HashMap(); + + private RMProxy rmProxy; + private Class protocol; + protected YarnConfiguration conf; + protected String[] rmServiceIds; + + @Override + public void init(Configuration configuration, RMProxy rmProxy, + Class protocol) { + this.rmProxy = rmProxy; + this.protocol = protocol; + this.rmProxy.checkAllowedProtocols(this.protocol); + this.conf = new YarnConfiguration(configuration); + Collection rmIds = HAUtil.getRMHAIds(conf); + this.rmServiceIds = rmIds.toArray(new String[rmIds.size()]); + conf.set(YarnConfiguration.RM_HA_ID, rmServiceIds[currentProxyIndex]); + + conf.setInt(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECT_MAX_RETRIES_KEY, + conf.getInt(YarnConfiguration.CLIENT_FAILOVER_RETRIES, + YarnConfiguration.DEFAULT_CLIENT_FAILOVER_RETRIES)); + + conf.setInt(CommonConfigurationKeysPublic. + IPC_CLIENT_CONNECT_MAX_RETRIES_ON_SOCKET_TIMEOUTS_KEY, + conf.getInt(YarnConfiguration.CLIENT_FAILOVER_RETRIES_ON_SOCKET_TIMEOUTS, + YarnConfiguration.DEFAULT_CLIENT_FAILOVER_RETRIES_ON_SOCKET_TIMEOUTS)); + } + + private T getProxyInternal() { + try { + final InetSocketAddress rmAddress = rmProxy.getRMAddress(conf, protocol); + return RMProxy.getProxy(conf, protocol, rmAddress); + } catch (IOException ioe) { + LOG.error("Unable to create proxy to the ResourceManager " + + rmServiceIds[currentProxyIndex], ioe); + return null; + } + } + + @Override + public synchronized T getProxy() { + String rmId = rmServiceIds[currentProxyIndex]; + T current = proxies.get(rmId); + if (current == null) { + current = getProxyInternal(); + proxies.put(rmId, current); + } + return current; + } + + @Override + public synchronized void performFailover(T currentProxy) { + currentProxyIndex = (currentProxyIndex + 1) % rmServiceIds.length; + conf.set(YarnConfiguration.RM_HA_ID, rmServiceIds[currentProxyIndex]); + LOG.info("Failing over to " + rmServiceIds[currentProxyIndex]); + } + + @Override + public Class getInterface() { + return protocol; + } + + /** + * Close all the proxy objects which have been opened over the lifetime of + * this proxy provider. + */ + @Override + public synchronized void close() throws IOException { + for (T proxy : proxies.values()) { + if (proxy instanceof Closeable) { + ((Closeable)proxy).close(); + } else { + RPC.stopProxy(proxy); + } + } + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/RMFailoverProxyProvider.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/RMFailoverProxyProvider.java new file mode 100644 index 00000000000..63b4764ab5a --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/RMFailoverProxyProvider.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.client; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.retry.FailoverProxyProvider; + +@InterfaceAudience.Private +public interface RMFailoverProxyProvider extends FailoverProxyProvider { + /** + * Initialize internal data structures, invoked right after instantiation. + * + * @param conf Configuration to use + * @param proxy The {@link RMProxy} instance to use + * @param protocol The communication protocol to use + */ + public void init(Configuration conf, RMProxy proxy, Class protocol); +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/RMProxy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/RMProxy.java index 5fff760eb2d..1651c13100c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/RMProxy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/RMProxy.java @@ -36,6 +36,8 @@ import org.apache.hadoop.io.retry.RetryPolicies; import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.io.retry.RetryProxy; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.hadoop.yarn.conf.HAUtil; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; import org.apache.hadoop.yarn.ipc.YarnRPC; @@ -48,7 +50,68 @@ import com.google.common.annotations.VisibleForTesting; public class RMProxy { private static final Log LOG = LogFactory.getLog(RMProxy.class); + protected static RMProxy INSTANCE; + protected RMProxy() {} + + /** + * Verify the passed protocol is supported. + */ + @Private + protected void checkAllowedProtocols(Class protocol) {} + + /** + * Get the ResourceManager address from the provided Configuration for the + * given protocol. + */ + @Private + protected InetSocketAddress getRMAddress( + YarnConfiguration conf, Class protocol) throws IOException { + throw new UnsupportedOperationException("This method should be invoked " + + "from an instance of ClientRMProxy or ServerRMProxy"); + } + + /** + * Create a proxy for the specified protocol. For non-HA, + * this is a direct connection to the ResourceManager address. When HA is + * enabled, the proxy handles the failover between the ResourceManagers as + * well. + */ + @Private + protected static T createRMProxy(final Configuration configuration, + final Class protocol) throws IOException { + YarnConfiguration conf = (configuration instanceof YarnConfiguration) + ? (YarnConfiguration) configuration + : new YarnConfiguration(configuration); + RetryPolicy retryPolicy = createRetryPolicy(conf); + if (HAUtil.isHAEnabled(conf)) { + RMFailoverProxyProvider provider = + INSTANCE.createRMFailoverProxyProvider(conf, protocol); + return (T) RetryProxy.create(protocol, provider, retryPolicy); + } else { + InetSocketAddress rmAddress = INSTANCE.getRMAddress(conf, protocol); + LOG.info("Connecting to ResourceManager at " + rmAddress); + T proxy = RMProxy.getProxy(conf, protocol, rmAddress); + return (T) RetryProxy.create(protocol, proxy, retryPolicy); + } + } + + /** + * @deprecated + * This method is deprecated and is not used by YARN internally any more. + * To create a proxy to the RM, use ClientRMProxy#createRMProxy or + * ServerRMProxy#createRMProxy. + * + * Create a proxy to the ResourceManager at the specified address. + * + * @param conf Configuration to generate retry policy + * @param protocol Protocol for the proxy + * @param rmAddress Address of the ResourceManager + * @param Type information of the proxy + * @return Proxy to the RM + * @throws IOException + */ + @Deprecated public static T createRMProxy(final Configuration conf, final Class protocol, InetSocketAddress rmAddress) throws IOException { RetryPolicy retryPolicy = createRetryPolicy(conf); @@ -57,12 +120,16 @@ public class RMProxy { return (T) RetryProxy.create(protocol, proxy, retryPolicy); } - private static T getProxy(final Configuration conf, + /** + * Get a proxy to the RM at the specified address. To be used to create a + * RetryProxy. + */ + @Private + static T getProxy(final Configuration conf, final Class protocol, final InetSocketAddress rmAddress) throws IOException { return UserGroupInformation.getCurrentUser().doAs( new PrivilegedAction() { - @Override public T run() { return (T) YarnRPC.create(conf).getProxy(protocol, rmAddress, conf); @@ -70,6 +137,50 @@ public class RMProxy { }); } + /** + * Helper method to create FailoverProxyProvider. + */ + private RMFailoverProxyProvider createRMFailoverProxyProvider( + Configuration conf, Class protocol) { + Class> defaultProviderClass; + try { + defaultProviderClass = (Class>) + Class.forName( + YarnConfiguration.DEFAULT_CLIENT_FAILOVER_PROXY_PROVIDER); + } catch (Exception e) { + throw new YarnRuntimeException("Invalid default failover provider class" + + YarnConfiguration.DEFAULT_CLIENT_FAILOVER_PROXY_PROVIDER, e); + } + + RMFailoverProxyProvider provider = ReflectionUtils.newInstance( + conf.getClass(YarnConfiguration.CLIENT_FAILOVER_PROXY_PROVIDER, + defaultProviderClass, RMFailoverProxyProvider.class), conf); + provider.init(conf, (RMProxy) this, protocol); + return provider; + } + + /** + * A RetryPolicy to allow failing over upto the specified maximum time. + */ + private static class FailoverUptoMaximumTimePolicy implements RetryPolicy { + private long maxTime; + + FailoverUptoMaximumTimePolicy(long maxTime) { + this.maxTime = maxTime; + } + + @Override + public RetryAction shouldRetry(Exception e, int retries, int failovers, + boolean isIdempotentOrAtMostOnce) throws Exception { + return System.currentTimeMillis() < maxTime + ? RetryAction.FAILOVER_AND_RETRY + : RetryAction.FAIL; + } + } + + /** + * Fetch retry policy from Configuration + */ @Private @VisibleForTesting public static RetryPolicy createRetryPolicy(Configuration conf) { @@ -81,19 +192,10 @@ public class RMProxy { conf.getLong( YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS, YarnConfiguration - .DEFAULT_RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS); - - if (rmConnectionRetryIntervalMS < 0) { - throw new YarnRuntimeException("Invalid Configuration. " + - YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS + - " should not be negative."); - } + .DEFAULT_RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS); boolean waitForEver = (rmConnectWaitMS == -1); - - if (waitForEver) { - return RetryPolicies.RETRY_FOREVER; - } else { + if (!waitForEver) { if (rmConnectWaitMS < 0) { throw new YarnRuntimeException("Invalid Configuration. " + YarnConfiguration.RESOURCEMANAGER_CONNECT_MAX_WAIT_MS @@ -110,18 +212,54 @@ public class RMProxy { } } + // Handle HA case first + if (HAUtil.isHAEnabled(conf)) { + final long failoverSleepBaseMs = conf.getLong( + YarnConfiguration.CLIENT_FAILOVER_SLEEPTIME_BASE_MS, + rmConnectionRetryIntervalMS); + + final long failoverSleepMaxMs = conf.getLong( + YarnConfiguration.CLIENT_FAILOVER_SLEEPTIME_MAX_MS, + rmConnectionRetryIntervalMS); + + int maxFailoverAttempts = conf.getInt( + YarnConfiguration.CLIENT_FAILOVER_MAX_ATTEMPTS, -1); + + RetryPolicy basePolicy = RetryPolicies.TRY_ONCE_THEN_FAIL; + if (maxFailoverAttempts == -1) { + if (waitForEver) { + basePolicy = RetryPolicies.FAILOVER_FOREVER; + } else { + basePolicy = new FailoverUptoMaximumTimePolicy( + System.currentTimeMillis() + rmConnectWaitMS); + } + maxFailoverAttempts = 0; + } + + return RetryPolicies.failoverOnNetworkException(basePolicy, + maxFailoverAttempts, failoverSleepBaseMs, failoverSleepMaxMs); + } + + if (waitForEver) { + return RetryPolicies.RETRY_FOREVER; + } + + if (rmConnectionRetryIntervalMS < 0) { + throw new YarnRuntimeException("Invalid Configuration. " + + YarnConfiguration.RESOURCEMANAGER_CONNECT_RETRY_INTERVAL_MS + + " should not be negative."); + } + RetryPolicy retryPolicy = RetryPolicies.retryUpToMaximumTimeWithFixedSleep(rmConnectWaitMS, - rmConnectionRetryIntervalMS, - TimeUnit.MILLISECONDS); + rmConnectionRetryIntervalMS, TimeUnit.MILLISECONDS); Map, RetryPolicy> exceptionToPolicyMap = new HashMap, RetryPolicy>(); exceptionToPolicyMap.put(ConnectException.class, retryPolicy); //TO DO: after HADOOP-9576, IOException can be changed to EOFException exceptionToPolicyMap.put(IOException.class, retryPolicy); - - return RetryPolicies.retryByException(RetryPolicies.TRY_ONCE_THEN_FAIL, - exceptionToPolicyMap); + return RetryPolicies.retryByException( + RetryPolicies.TRY_ONCE_THEN_FAIL, exceptionToPolicyMap); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml index 9673826c2ae..f13d92be1c4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/yarn-default.xml @@ -424,6 +424,61 @@ + + When HA is enabled, the class to be used by Clients, AMs and + NMs to failover to the Active RM. It should extend + org.apache.hadoop.yarn.client.RMFailoverProxyProvider + yarn.client.failover-proxy-provider + org.apache.hadoop.yarn.client.ConfiguredRMFailoverProxyProvider + + + + When HA is enabled, the max number of times + FailoverProxyProvider should attempt failover. When set, + this overrides the yarn.resourcemanager.connect.max-wait.ms. When + not set, this is inferred from + yarn.resourcemanager.connect.max-wait.ms. + yarn.client.failover-max-attempts + + + + + When HA is enabled, the sleep base (in milliseconds) to be + used for calculating the exponential delay between failovers. When set, + this overrides the yarn.resourcemanager.connect.* settings. When + not set, yarn.resourcemanager.connect.retry-interval.ms is used instead. + + yarn.client.failover-sleep-base-ms + + + + + When HA is enabled, the maximum sleep time (in milliseconds) + between failovers. When set, this overrides the + yarn.resourcemanager.connect.* settings. When not set, + yarn.resourcemanager.connect.retry-interval.ms is used instead. + yarn.client.failover-sleep-max-ms + + + + + When HA is enabled, the number of retries per + attempt to connect to a ResourceManager. In other words, + it is the ipc.client.connect.max.retries to be used during + failover attempts + yarn.client.failover-retries + 0 + + + + When HA is enabled, the number of retries per + attempt to connect to a ResourceManager on socket timeouts. In other + words, it is the ipc.client.connect.max.retries.on.timeouts to be used + during failover attempts + yarn.client.failover-retries-on-socket-timeouts + 0 + + The maximum number of completed applications RM keeps. yarn.resourcemanager.max-completed-applications diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/ServerRMProxy.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/ServerRMProxy.java index c25c5977b80..15a26e51260 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/ServerRMProxy.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/src/main/java/org/apache/hadoop/yarn/server/api/ServerRMProxy.java @@ -23,25 +23,43 @@ import java.net.InetSocketAddress; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.yarn.client.RMProxy; import org.apache.hadoop.yarn.conf.YarnConfiguration; -public class ServerRMProxy extends RMProxy { +import com.google.common.base.Preconditions; +public class ServerRMProxy extends RMProxy { private static final Log LOG = LogFactory.getLog(ServerRMProxy.class); - public static T createRMProxy(final Configuration configuration, - final Class protocol) throws IOException { - YarnConfiguration conf = (configuration instanceof YarnConfiguration) - ? (YarnConfiguration) configuration - : new YarnConfiguration(configuration); - InetSocketAddress rmAddress = getRMAddress(conf, protocol); - return createRMProxy(conf, protocol, rmAddress); + static { + INSTANCE = new ServerRMProxy(); } - private static InetSocketAddress getRMAddress(YarnConfiguration conf, - Class protocol) { + private ServerRMProxy() { + super(); + } + + /** + * Create a proxy to the ResourceManager for the specified protocol. + * @param configuration Configuration with all the required information. + * @param protocol Server protocol for which proxy is being requested. + * @param Type of proxy. + * @return Proxy to the ResourceManager for the specified server protocol. + * @throws IOException + */ + public static T createRMProxy(final Configuration configuration, + final Class protocol) throws IOException { + // This method exists only to initiate this class' static INSTANCE. TODO: + // FIX if possible + return RMProxy.createRMProxy(configuration, protocol); + } + + @InterfaceAudience.Private + @Override + protected InetSocketAddress getRMAddress(YarnConfiguration conf, + Class protocol) { if (protocol == ResourceTracker.class) { return conf.getSocketAddr( YarnConfiguration.RM_RESOURCE_TRACKER_ADDRESS, @@ -55,4 +73,12 @@ public class ServerRMProxy extends RMProxy { throw new IllegalStateException(message); } } + + @InterfaceAudience.Private + @Override + protected void checkAllowedProtocols(Class protocol) { + Preconditions.checkArgument( + protocol.isAssignableFrom(ResourceTracker.class), + "ResourceManager does not support this protocol"); + } } \ No newline at end of file diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java index 54de419a63d..78bbea43852 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/MiniYARNCluster.java @@ -22,6 +22,7 @@ import java.io.File; import java.io.IOException; import java.net.InetAddress; import java.net.UnknownHostException; +import java.util.Collection; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; @@ -38,6 +39,7 @@ import org.apache.hadoop.service.AbstractService; import org.apache.hadoop.service.CompositeService; import org.apache.hadoop.util.Shell; import org.apache.hadoop.util.Shell.ShellCommandExecutor; +import org.apache.hadoop.yarn.api.protocolrecords.GetClusterMetricsRequest; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.conf.HAUtil; import org.apache.hadoop.yarn.conf.YarnConfiguration; @@ -65,6 +67,8 @@ import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAt import org.apache.hadoop.yarn.server.resourcemanager.rmapp.attempt.event.RMAppAttemptUnregistrationEvent; import org.apache.hadoop.yarn.webapp.util.WebAppUtils; +import static org.junit.Assert.fail; + /** * Embedded Yarn minicluster for testcases that need to interact with a cluster. *

@@ -91,9 +95,11 @@ public class MiniYARNCluster extends CompositeService { private NodeManager[] nodeManagers; private ResourceManager[] resourceManagers; + private String[] rmIds; + + private boolean useFixedPorts; + private boolean useRpc = false; - private ResourceManagerWrapper resourceManagerWrapper; - private ConcurrentMap appMasters = new ConcurrentHashMap(16, 0.75f, 2); @@ -163,15 +169,7 @@ public class MiniYARNCluster extends CompositeService { } resourceManagers = new ResourceManager[numResourceManagers]; - for (int i = 0; i < numResourceManagers; i++) { - resourceManagers[i] = new ResourceManager(); - addService(new ResourceManagerWrapper(i)); - } - nodeManagers = new CustomNodeManager[numNodeManagers]; - for(int index = 0; index < numNodeManagers; index++) { - addService(new NodeManagerWrapper(index)); - nodeManagers[index] = new CustomNodeManager(); - } + nodeManagers = new NodeManager[numNodeManagers]; } /** @@ -185,20 +183,45 @@ public class MiniYARNCluster extends CompositeService { this(testName, 1, numNodeManagers, numLocalDirs, numLogDirs); } - @Override + @Override public void serviceInit(Configuration conf) throws Exception { + useFixedPorts = conf.getBoolean( + YarnConfiguration.YARN_MINICLUSTER_FIXED_PORTS, + YarnConfiguration.DEFAULT_YARN_MINICLUSTER_FIXED_PORTS); + useRpc = conf.getBoolean(YarnConfiguration.YARN_MINICLUSTER_USE_RPC, + YarnConfiguration.DEFAULT_YARN_MINICLUSTER_USE_RPC); + + if (useRpc && !useFixedPorts) { + throw new YarnRuntimeException("Invalid configuration!" + + " Minicluster can use rpc only when configured to use fixed ports"); + } + if (resourceManagers.length > 1) { conf.setBoolean(YarnConfiguration.RM_HA_ENABLED, true); - - StringBuilder rmIds = new StringBuilder(); - for (int i = 0; i < resourceManagers.length; i++) { - if (i != 0) { - rmIds.append(","); + if (conf.get(YarnConfiguration.RM_HA_IDS) == null) { + StringBuilder rmIds = new StringBuilder(); + for (int i = 0; i < resourceManagers.length; i++) { + if (i != 0) { + rmIds.append(","); + } + rmIds.append("rm" + i); } - rmIds.append("rm" + i); + conf.set(YarnConfiguration.RM_HA_IDS, rmIds.toString()); } - conf.set(YarnConfiguration.RM_HA_IDS, rmIds.toString()); + Collection rmIdsCollection = HAUtil.getRMHAIds(conf); + rmIds = rmIdsCollection.toArray(new String[rmIdsCollection.size()]); } + + for (int i = 0; i < resourceManagers.length; i++) { + resourceManagers[i] = new ResourceManager(); + addService(new ResourceManagerWrapper(i)); + } + for(int index = 0; index < nodeManagers.length; index++) { + nodeManagers[index] = + useRpc ? new CustomNodeManager() : new ShortCircuitedNodeManager(); + addService(new NodeManagerWrapper(index)); + } + super.serviceInit( conf instanceof YarnConfiguration ? conf : new YarnConfiguration(conf)); } @@ -213,11 +236,12 @@ public class MiniYARNCluster extends CompositeService { * * In an non-HA cluster, return the index of the only RM. * - * @return index of the active RM + * @return index of the active RM or -1 if none of them transition to + * active even after 5 seconds of waiting */ @InterfaceAudience.Private @VisibleForTesting - int getActiveRMIndex() { + public int getActiveRMIndex() { if (resourceManagers.length == 1) { return 0; } @@ -292,9 +316,7 @@ public class MiniYARNCluster extends CompositeService { } private void setHARMConfiguration(Configuration conf) { - String rmId = "rm" + index; String hostname = MiniYARNCluster.getHostname(); - conf.set(YarnConfiguration.RM_HA_ID, rmId); for (String confKey : YarnConfiguration.RM_SERVICES_ADDRESS_CONF_KEYS) { for (String id : HAUtil.getRMHAIds(conf)) { conf.set(HAUtil.addSuffix(confKey, id), hostname + ":0"); @@ -306,15 +328,17 @@ public class MiniYARNCluster extends CompositeService { protected synchronized void serviceInit(Configuration conf) throws Exception { conf.setBoolean(YarnConfiguration.IS_MINI_YARN_CLUSTER, true); - if (!conf.getBoolean( - YarnConfiguration.YARN_MINICLUSTER_FIXED_PORTS, - YarnConfiguration.DEFAULT_YARN_MINICLUSTER_FIXED_PORTS)) { + + if (!useFixedPorts) { if (HAUtil.isHAEnabled(conf)) { setHARMConfiguration(conf); } else { setNonHARMConfiguration(conf); } } + if (HAUtil.isHAEnabled(conf)) { + conf.set(YarnConfiguration.RM_HA_ID, rmIds[index]); + } resourceManagers[index].init(conf); resourceManagers[index].getRMContext().getDispatcher().register (RMAppAttemptEventType.class, @@ -500,7 +524,9 @@ public class MiniYARNCluster extends CompositeService { protected void doSecureLogin() throws IOException { // Don't try to login using keytab in the testcase. } + } + private class ShortCircuitedNodeManager extends CustomNodeManager { @Override protected NodeStatusUpdater createNodeStatusUpdater(Context context, Dispatcher dispatcher, NodeHealthCheckerService healthChecker) { @@ -553,4 +579,28 @@ public class MiniYARNCluster extends CompositeService { }; } } + + /** + * Wait for all the NodeManagers to connect to the ResourceManager. + * + * @param timeout Time to wait (sleeps in 100 ms intervals) in milliseconds. + * @return true if all NodeManagers connect to the (Active) + * ResourceManager, false otherwise. + * @throws YarnException + * @throws InterruptedException + */ + public boolean waitForNodeManagersToConnect(long timeout) + throws YarnException, InterruptedException { + ResourceManager rm = getResourceManager(); + GetClusterMetricsRequest req = GetClusterMetricsRequest.newInstance(); + + for (int i = 0; i < timeout / 100; i++) { + if (nodeManagers.length == rm.getClientRMService().getClusterMetrics(req) + .getClusterMetrics().getNumNodeManagers()) { + return true; + } + Thread.sleep(100); + } + return false; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestMiniYARNClusterForHA.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestMiniYARNClusterForHA.java index f62124e5d39..05266858a22 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestMiniYARNClusterForHA.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/src/test/java/org/apache/hadoop/yarn/server/TestMiniYARNClusterForHA.java @@ -33,6 +33,7 @@ import java.io.IOException; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotSame; +import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; public class TestMiniYARNClusterForHA { @@ -56,16 +57,7 @@ public class TestMiniYARNClusterForHA { @Test public void testClusterWorks() throws YarnException, InterruptedException { - ResourceManager rm = cluster.getResourceManager(0); - GetClusterMetricsRequest req = GetClusterMetricsRequest.newInstance(); - - for (int i = 0; i < 600; i++) { - if (1 == rm.getClientRMService().getClusterMetrics(req) - .getClusterMetrics().getNumNodeManagers()) { - return; - } - Thread.sleep(100); - } - fail("NodeManager never registered with the RM"); + assertTrue("NMs fail to connect to the RM", + cluster.waitForNodeManagersToConnect(5000)); } } From 9ca394d54dd24e67867c845a58150f6b51761512 Mon Sep 17 00:00:00 2001 From: Christopher Douglas Date: Tue, 17 Dec 2013 22:54:31 +0000 Subject: [PATCH 15/32] MAPREDUCE-5189. Add policies and wiring to respond to preemption requests from YARN. Contributed by Carlo Curino. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1551748 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 + .../mapred/TaskAttemptListenerImpl.java | 6 +- .../hadoop/mapreduce/v2/app/MRAppMaster.java | 26 +++- .../v2/app/rm/RMContainerAllocator.java | 51 ++++++- .../app/rm/preemption/AMPreemptionPolicy.java | 117 ++++++++++++++++ .../rm/preemption/KillAMPreemptionPolicy.java | 111 +++++++++++++++ .../rm/preemption/NoopAMPreemptionPolicy.java | 72 ++++++++++ .../mapred/TestTaskAttemptListenerImpl.java | 6 +- .../apache/hadoop/mapreduce/v2/app/MRApp.java | 4 +- .../mapreduce/v2/app/MRAppBenchmark.java | 9 +- .../hadoop/mapreduce/v2/app/TestFail.java | 6 +- .../v2/app/TestRMContainerAllocator.java | 16 ++- .../apache/hadoop/mapreduce/JobCounter.java | 6 +- .../apache/hadoop/mapreduce/MRJobConfig.java | 11 +- .../mapreduce/checkpoint/EnumCounter.java | 26 ++++ .../checkpoint/TaskCheckpointID.java | 126 ++++++++++++++++++ .../hadoop/mapreduce/JobCounter.properties | 4 + 17 files changed, 575 insertions(+), 25 deletions(-) create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/preemption/AMPreemptionPolicy.java create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/preemption/KillAMPreemptionPolicy.java create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/preemption/NoopAMPreemptionPolicy.java create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/EnumCounter.java create mode 100644 hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/TaskCheckpointID.java diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 2a80119b0f9..63daf66a3ec 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -74,6 +74,9 @@ Trunk (Unreleased) MAPREDUCE-5197. Add a service for checkpointing task state. (Carlo Curino via cdouglas) + MAPREDUCE-5189. Add policies and wiring to respond to preemption requests + from YARN. (Carlo Curino via cdouglas) + BUG FIXES MAPREDUCE-4272. SortedRanges.Range#compareTo is not spec compliant. diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java index aae95d47fe1..8af7e379873 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapred/TaskAttemptListenerImpl.java @@ -48,6 +48,7 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptStatusUpdateEvent.TaskAttemptStatus; import org.apache.hadoop.mapreduce.v2.app.rm.RMHeartbeatHandler; +import org.apache.hadoop.mapreduce.v2.app.rm.preemption.AMPreemptionPolicy; import org.apache.hadoop.mapreduce.v2.app.security.authorize.MRAMPolicyProvider; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.security.authorize.PolicyProvider; @@ -84,14 +85,17 @@ public class TaskAttemptListenerImpl extends CompositeService .newSetFromMap(new ConcurrentHashMap()); private JobTokenSecretManager jobTokenSecretManager = null; + private AMPreemptionPolicy preemptionPolicy; public TaskAttemptListenerImpl(AppContext context, JobTokenSecretManager jobTokenSecretManager, - RMHeartbeatHandler rmHeartbeatHandler) { + RMHeartbeatHandler rmHeartbeatHandler, + AMPreemptionPolicy preemptionPolicy) { super(TaskAttemptListenerImpl.class.getName()); this.context = context; this.jobTokenSecretManager = jobTokenSecretManager; this.rmHeartbeatHandler = rmHeartbeatHandler; + this.preemptionPolicy = preemptionPolicy; } @Override diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java index b60b64764a2..ca6aadfb1cc 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/MRAppMaster.java @@ -102,6 +102,8 @@ import org.apache.hadoop.mapreduce.v2.app.rm.RMCommunicator; import org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator; import org.apache.hadoop.mapreduce.v2.app.rm.RMContainerRequestor; import org.apache.hadoop.mapreduce.v2.app.rm.RMHeartbeatHandler; +import org.apache.hadoop.mapreduce.v2.app.rm.preemption.AMPreemptionPolicy; +import org.apache.hadoop.mapreduce.v2.app.rm.preemption.NoopAMPreemptionPolicy; import org.apache.hadoop.mapreduce.v2.app.speculate.DefaultSpeculator; import org.apache.hadoop.mapreduce.v2.app.speculate.Speculator; import org.apache.hadoop.mapreduce.v2.app.speculate.SpeculatorEvent; @@ -188,8 +190,8 @@ public class MRAppMaster extends CompositeService { private ContainerLauncher containerLauncher; private EventHandler committerEventHandler; private Speculator speculator; - private TaskAttemptListener taskAttemptListener; - private JobTokenSecretManager jobTokenSecretManager = + protected TaskAttemptListener taskAttemptListener; + protected JobTokenSecretManager jobTokenSecretManager = new JobTokenSecretManager(); private JobId jobId; private boolean newApiCommitter; @@ -197,6 +199,7 @@ public class MRAppMaster extends CompositeService { private JobEventDispatcher jobEventDispatcher; private JobHistoryEventHandler jobHistoryEventHandler; private SpeculatorEventDispatcher speculatorEventDispatcher; + private AMPreemptionPolicy preemptionPolicy; private Job job; private Credentials jobCredentials = new Credentials(); // Filled during init @@ -383,8 +386,12 @@ public class MRAppMaster extends CompositeService { committerEventHandler = createCommitterEventHandler(context, committer); addIfService(committerEventHandler); + //policy handling preemption requests from RM + preemptionPolicy = createPreemptionPolicy(conf); + preemptionPolicy.init(context); + //service to handle requests to TaskUmbilicalProtocol - taskAttemptListener = createTaskAttemptListener(context); + taskAttemptListener = createTaskAttemptListener(context, preemptionPolicy); addIfService(taskAttemptListener); //service to log job history events @@ -475,6 +482,12 @@ public class MRAppMaster extends CompositeService { return committer; } + protected AMPreemptionPolicy createPreemptionPolicy(Configuration conf) { + return ReflectionUtils.newInstance(conf.getClass( + MRJobConfig.MR_AM_PREEMPTION_POLICY, + NoopAMPreemptionPolicy.class, AMPreemptionPolicy.class), conf); + } + protected boolean keepJobFiles(JobConf conf) { return (conf.getKeepTaskFilesPattern() != null || conf .getKeepFailedTaskFiles()); @@ -692,10 +705,11 @@ public class MRAppMaster extends CompositeService { } } - protected TaskAttemptListener createTaskAttemptListener(AppContext context) { + protected TaskAttemptListener createTaskAttemptListener(AppContext context, + AMPreemptionPolicy preemptionPolicy) { TaskAttemptListener lis = new TaskAttemptListenerImpl(context, jobTokenSecretManager, - getRMHeartbeatHandler()); + getRMHeartbeatHandler(), preemptionPolicy); return lis; } @@ -805,7 +819,7 @@ public class MRAppMaster extends CompositeService { , containerID); } else { this.containerAllocator = new RMContainerAllocator( - this.clientService, this.context); + this.clientService, this.context, preemptionPolicy); } ((Service)this.containerAllocator).init(getConfig()); ((Service)this.containerAllocator).start(); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java index d6e45931632..dd739f2b7c3 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/RMContainerAllocator.java @@ -57,6 +57,7 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptDiagnosticsUpdate import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType; import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptKillEvent; +import org.apache.hadoop.mapreduce.v2.app.rm.preemption.AMPreemptionPolicy; import org.apache.hadoop.util.StringInterner; import org.apache.hadoop.yarn.api.protocolrecords.AllocateResponse; import org.apache.hadoop.yarn.api.records.Container; @@ -67,6 +68,7 @@ import org.apache.hadoop.yarn.api.records.NMToken; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.NodeReport; import org.apache.hadoop.yarn.api.records.NodeState; +import org.apache.hadoop.yarn.api.records.PreemptionMessage; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.client.api.NMTokenCache; import org.apache.hadoop.yarn.exceptions.YarnRuntimeException; @@ -147,13 +149,17 @@ public class RMContainerAllocator extends RMContainerRequestor private long retryInterval; private long retrystartTime; + private final AMPreemptionPolicy preemptionPolicy; + BlockingQueue eventQueue = new LinkedBlockingQueue(); private ScheduleStats scheduleStats = new ScheduleStats(); - public RMContainerAllocator(ClientService clientService, AppContext context) { + public RMContainerAllocator(ClientService clientService, AppContext context, + AMPreemptionPolicy preemptionPolicy) { super(clientService, context); + this.preemptionPolicy = preemptionPolicy; this.stopped = new AtomicBoolean(false); } @@ -361,11 +367,15 @@ public class RMContainerAllocator extends RMContainerRequestor LOG.error("Could not deallocate container for task attemptId " + aId); } + preemptionPolicy.handleCompletedContainer(event.getAttemptID()); } else if ( event.getType() == ContainerAllocator.EventType.CONTAINER_FAILED) { ContainerFailedEvent fEv = (ContainerFailedEvent) event; String host = getHost(fEv.getContMgrAddress()); containerFailedOnHost(host); + // propagate failures to preemption policy to discard checkpoints for + // failed tasks + preemptionPolicy.handleFailedContainer(event.getAttemptID()); } } @@ -399,7 +409,7 @@ public class RMContainerAllocator extends RMContainerRequestor } scheduledRequests.reduces.clear(); - //preempt for making space for atleast one map + //preempt for making space for at least one map int premeptionLimit = Math.max(mapResourceReqt, (int) (maxReducePreemptionLimit * memLimit)); @@ -409,7 +419,7 @@ public class RMContainerAllocator extends RMContainerRequestor int toPreempt = (int) Math.ceil((float) preemptMem/reduceResourceReqt); toPreempt = Math.min(toPreempt, assignedRequests.reduces.size()); - LOG.info("Going to preempt " + toPreempt); + LOG.info("Going to preempt " + toPreempt + " due to lack of space for maps"); assignedRequests.preemptReduce(toPreempt); } } @@ -595,6 +605,14 @@ public class RMContainerAllocator extends RMContainerRequestor } List finishedContainers = response.getCompletedContainersStatuses(); + + // propagate preemption requests + final PreemptionMessage preemptReq = response.getPreemptionMessage(); + if (preemptReq != null) { + preemptionPolicy.preempt( + new PreemptionContext(assignedRequests), preemptReq); + } + if (newContainers.size() + finishedContainers.size() > 0 || headRoom != newHeadRoom) { //something changed recalculateReduceSchedule = true; @@ -630,7 +648,9 @@ public class RMContainerAllocator extends RMContainerRequestor String diagnostics = StringInterner.weakIntern(cont.getDiagnostics()); eventHandler.handle(new TaskAttemptDiagnosticsUpdateEvent(attemptID, diagnostics)); - } + + preemptionPolicy.handleCompletedContainer(attemptID); + } } return newContainers; } @@ -1232,4 +1252,27 @@ public class RMContainerAllocator extends RMContainerRequestor " RackLocal:" + rackLocalAssigned); } } + + static class PreemptionContext extends AMPreemptionPolicy.Context { + final AssignedRequests reqs; + + PreemptionContext(AssignedRequests reqs) { + this.reqs = reqs; + } + @Override + public TaskAttemptId getTaskAttempt(ContainerId container) { + return reqs.get(container); + } + + @Override + public List getContainers(TaskType t){ + if(TaskType.REDUCE.equals(t)) + return new ArrayList(reqs.reduces.values()); + if(TaskType.MAP.equals(t)) + return new ArrayList(reqs.maps.values()); + return null; + } + + } + } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/preemption/AMPreemptionPolicy.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/preemption/AMPreemptionPolicy.java new file mode 100644 index 00000000000..0bbe75bdea3 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/preemption/AMPreemptionPolicy.java @@ -0,0 +1,117 @@ +/** +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +package org.apache.hadoop.mapreduce.v2.app.rm.preemption; + +import java.util.List; + +import org.apache.hadoop.mapred.TaskAttemptID; +import org.apache.hadoop.mapred.TaskID; +import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID; +import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId; +import org.apache.hadoop.mapreduce.v2.api.records.TaskType; +import org.apache.hadoop.mapreduce.v2.app.AppContext; +import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.PreemptionMessage; + +/** + * Policy encoding the {@link org.apache.hadoop.mapreduce.v2.app.MRAppMaster} + * response to preemption requests from the ResourceManager. + * @see org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator + */ +public interface AMPreemptionPolicy { + + public abstract class Context { + + /** + * @param container ID of container to preempt + * @return Task associated with the running container or null + * if no task is bound to that container. + */ + public abstract TaskAttemptId getTaskAttempt(ContainerId container); + + /** + * Method provides the complete list of containers running task of type t + * for this AM. + * @param t the type of containers + * @return a map containing + */ + public abstract List getContainers(TaskType t); + + } + + public void init(AppContext context); + + /** + * Callback informing the policy of ResourceManager. requests for resources + * to return to the cluster. The policy may take arbitrary action to satisfy + * requests by checkpointing task state, returning containers, or ignoring + * requests. The RM may elect to enforce these requests by forcibly killing + * containers not returned after some duration. + * @param context Handle to the current state of running containers + * @param preemptionRequests Request from RM for resources to return. + */ + public void preempt(Context context, PreemptionMessage preemptionRequests); + + /** + * This method is invoked by components interested to learn whether a certain + * task is being preempted. + * @param attemptID Task attempt to query + * @return true if this attempt is being preempted + */ + public boolean isPreempted(TaskAttemptId attemptID); + + /** + * This method is used to report to the policy that a certain task has been + * successfully preempted (for bookeeping, counters, etc..) + * @param attemptID Task attempt that preempted + */ + public void reportSuccessfulPreemption(TaskAttemptID attemptID); + + /** + * Callback informing the policy of containers exiting with a failure. This + * allows the policy to implemnt cleanup/compensating actions. + * @param attemptID Task attempt that failed + */ + public void handleFailedContainer(TaskAttemptId attemptID); + + /** + * Callback informing the policy of containers exiting cleanly. This is + * reported to the policy for bookeeping purposes. + * @param attemptID Task attempt that completed + */ + public void handleCompletedContainer(TaskAttemptId attemptID); + + /** + * Method to retrieve the latest checkpoint for a given {@link TaskID} + * @param taskId TaskID + * @return CheckpointID associated with this task or null + */ + public TaskCheckpointID getCheckpointID(TaskID taskId); + + /** + * Method to store the latest {@link + * org.apache.hadoop.mapreduce.checkpoint.CheckpointID} for a given {@link + * TaskID}. Assigning a null is akin to remove all previous checkpoints for + * this task. + * @param taskId TaskID + * @param cid Checkpoint to assign or null to remove it. + */ + public void setCheckpointID(TaskID taskId, TaskCheckpointID cid); + +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/preemption/KillAMPreemptionPolicy.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/preemption/KillAMPreemptionPolicy.java new file mode 100644 index 00000000000..100ef4f7af4 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/preemption/KillAMPreemptionPolicy.java @@ -0,0 +1,111 @@ +/** +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +package org.apache.hadoop.mapreduce.v2.app.rm.preemption; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.mapred.TaskAttemptID; +import org.apache.hadoop.mapred.TaskID; +import org.apache.hadoop.mapreduce.JobCounter; +import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID; +import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId; +import org.apache.hadoop.mapreduce.v2.app.AppContext; +import org.apache.hadoop.mapreduce.v2.app.job.event.JobCounterUpdateEvent; +import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEvent; +import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptEventType; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.PreemptionContainer; +import org.apache.hadoop.yarn.api.records.PreemptionMessage; +import org.apache.hadoop.yarn.event.EventHandler; + +/** + * Sample policy that aggressively kills tasks when requested. + */ +public class KillAMPreemptionPolicy implements AMPreemptionPolicy { + + private static final Log LOG = + LogFactory.getLog(KillAMPreemptionPolicy.class); + + @SuppressWarnings("rawtypes") + private EventHandler dispatcher = null; + + @Override + public void init(AppContext context) { + dispatcher = context.getEventHandler(); + } + + @Override + public void preempt(Context ctxt, PreemptionMessage preemptionRequests) { + // for both strict and negotiable preemption requests kill the + // container + for (PreemptionContainer c : + preemptionRequests.getStrictContract().getContainers()) { + killContainer(ctxt, c); + } + for (PreemptionContainer c : + preemptionRequests.getContract().getContainers()) { + killContainer(ctxt, c); + } + } + + @SuppressWarnings("unchecked") + private void killContainer(Context ctxt, PreemptionContainer c){ + ContainerId reqCont = c.getId(); + TaskAttemptId reqTask = ctxt.getTaskAttempt(reqCont); + LOG.info("Evicting " + reqTask); + dispatcher.handle(new TaskAttemptEvent(reqTask, + TaskAttemptEventType.TA_KILL)); + + // add preemption to counters + JobCounterUpdateEvent jce = new JobCounterUpdateEvent(reqTask + .getTaskId().getJobId()); + jce.addCounterUpdate(JobCounter.TASKS_REQ_PREEMPT, 1); + dispatcher.handle(jce); + } + + @Override + public void handleFailedContainer(TaskAttemptId attemptID) { + // ignore + } + + @Override + public boolean isPreempted(TaskAttemptId yarnAttemptID) { + return false; + } + + @Override + public void reportSuccessfulPreemption(TaskAttemptID taskAttemptID) { + // ignore + } + + @Override + public TaskCheckpointID getCheckpointID(TaskID taskId) { + return null; + } + + @Override + public void setCheckpointID(TaskID taskId, TaskCheckpointID cid) { + // ignore + } + + @Override + public void handleCompletedContainer(TaskAttemptId attemptID) { + // ignore + } + +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/preemption/NoopAMPreemptionPolicy.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/preemption/NoopAMPreemptionPolicy.java new file mode 100644 index 00000000000..0c020aca22b --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/rm/preemption/NoopAMPreemptionPolicy.java @@ -0,0 +1,72 @@ +/** +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ +package org.apache.hadoop.mapreduce.v2.app.rm.preemption; + +import org.apache.hadoop.mapred.TaskAttemptID; +import org.apache.hadoop.mapred.TaskID; +import org.apache.hadoop.mapreduce.checkpoint.TaskCheckpointID; +import org.apache.hadoop.mapreduce.v2.api.records.TaskAttemptId; +import org.apache.hadoop.mapreduce.v2.app.AppContext; +import org.apache.hadoop.yarn.api.records.PreemptionMessage; + +/** + * NoOp policy that ignores all the requests for preemption. + */ +public class NoopAMPreemptionPolicy implements AMPreemptionPolicy { + + @Override + public void init(AppContext context){ + // do nothing + } + + @Override + public void preempt(Context ctxt, PreemptionMessage preemptionRequests) { + // do nothing, ignore all requeusts + } + + @Override + public void handleFailedContainer(TaskAttemptId attemptID) { + // do nothing + } + + @Override + public boolean isPreempted(TaskAttemptId yarnAttemptID) { + return false; + } + + @Override + public void reportSuccessfulPreemption(TaskAttemptID taskAttemptID) { + // ignore + } + + @Override + public TaskCheckpointID getCheckpointID(TaskID taskId) { + return null; + } + + @Override + public void setCheckpointID(TaskID taskId, TaskCheckpointID cid) { + // ignore + } + + @Override + public void handleCompletedContainer(TaskAttemptId attemptID) { + // ignore + } + +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapred/TestTaskAttemptListenerImpl.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapred/TestTaskAttemptListenerImpl.java index 506523db76b..ba8e3d30261 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapred/TestTaskAttemptListenerImpl.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapred/TestTaskAttemptListenerImpl.java @@ -60,7 +60,7 @@ public class TestTaskAttemptListenerImpl { JobTokenSecretManager jobTokenSecretManager, RMHeartbeatHandler rmHeartbeatHandler, TaskHeartbeatHandler hbHandler) { - super(context, jobTokenSecretManager, rmHeartbeatHandler); + super(context, jobTokenSecretManager, rmHeartbeatHandler, null); this.taskHeartbeatHandler = hbHandler; } @@ -191,7 +191,7 @@ public class TestTaskAttemptListenerImpl { mock(RMHeartbeatHandler.class); final TaskHeartbeatHandler hbHandler = mock(TaskHeartbeatHandler.class); TaskAttemptListenerImpl listener = - new TaskAttemptListenerImpl(appCtx, secret, rmHeartbeatHandler) { + new TaskAttemptListenerImpl(appCtx, secret, rmHeartbeatHandler, null) { @Override protected void registerHeartbeatHandler(Configuration conf) { taskHeartbeatHandler = hbHandler; @@ -245,7 +245,7 @@ public class TestTaskAttemptListenerImpl { mock(RMHeartbeatHandler.class); final TaskHeartbeatHandler hbHandler = mock(TaskHeartbeatHandler.class); TaskAttemptListenerImpl listener = - new TaskAttemptListenerImpl(appCtx, secret, rmHeartbeatHandler) { + new TaskAttemptListenerImpl(appCtx, secret, rmHeartbeatHandler, null) { @Override protected void registerHeartbeatHandler(Configuration conf) { taskHeartbeatHandler = hbHandler; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRApp.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRApp.java index de573fe3007..7f698c7c035 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRApp.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRApp.java @@ -79,6 +79,7 @@ import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherEvent; import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocator; import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocatorEvent; import org.apache.hadoop.mapreduce.v2.app.rm.RMHeartbeatHandler; +import org.apache.hadoop.mapreduce.v2.app.rm.preemption.AMPreemptionPolicy; import org.apache.hadoop.mapreduce.v2.util.MRApps; import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem; import org.apache.hadoop.net.NetUtils; @@ -467,7 +468,8 @@ public class MRApp extends MRAppMaster { } @Override - protected TaskAttemptListener createTaskAttemptListener(AppContext context) { + protected TaskAttemptListener createTaskAttemptListener( + AppContext context, AMPreemptionPolicy policy) { return new TaskAttemptListener(){ @Override public InetSocketAddress getAddress() { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRAppBenchmark.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRAppBenchmark.java index a6496d4e96e..baff0c069f4 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRAppBenchmark.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/MRAppBenchmark.java @@ -33,6 +33,8 @@ import org.apache.hadoop.mapreduce.v2.app.job.event.TaskAttemptContainerAssigned import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocator; import org.apache.hadoop.mapreduce.v2.app.rm.ContainerAllocatorEvent; import org.apache.hadoop.mapreduce.v2.app.rm.RMContainerAllocator; +import org.apache.hadoop.mapreduce.v2.app.rm.preemption.AMPreemptionPolicy; +import org.apache.hadoop.mapreduce.v2.app.rm.preemption.NoopAMPreemptionPolicy; import org.apache.hadoop.service.AbstractService; import org.apache.hadoop.yarn.api.ApplicationMasterProtocol; import org.apache.hadoop.yarn.api.protocolrecords.AllocateRequest; @@ -61,6 +63,8 @@ public class MRAppBenchmark { /** * Runs memory and time benchmark with Mock MRApp. + * @param app Application to submit + * @throws Exception On application failure */ public void run(MRApp app) throws Exception { Logger rootLogger = LogManager.getRootLogger(); @@ -133,6 +137,7 @@ public class MRAppBenchmark { protected void serviceStart() throws Exception { thread = new Thread(new Runnable() { @Override + @SuppressWarnings("unchecked") public void run() { ContainerAllocatorEvent event = null; while (!Thread.currentThread().isInterrupted()) { @@ -192,7 +197,9 @@ public class MRAppBenchmark { @Override protected ContainerAllocator createContainerAllocator( ClientService clientService, AppContext context) { - return new RMContainerAllocator(clientService, context) { + + AMPreemptionPolicy policy = new NoopAMPreemptionPolicy(); + return new RMContainerAllocator(clientService, context, policy) { @Override protected ApplicationMasterProtocol createSchedulerProxy() { return new ApplicationMasterProtocol() { diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestFail.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestFail.java index 5d5af9435c6..0fabb207f27 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestFail.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestFail.java @@ -43,6 +43,7 @@ import org.apache.hadoop.mapreduce.v2.app.job.impl.TaskAttemptImpl; import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncher; import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherEvent; import org.apache.hadoop.mapreduce.v2.app.launcher.ContainerLauncherImpl; +import org.apache.hadoop.mapreduce.v2.app.rm.preemption.AMPreemptionPolicy; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.yarn.api.ContainerManagementProtocol; import org.apache.hadoop.yarn.api.records.ContainerId; @@ -247,13 +248,14 @@ public class TestFail { super(maps, reduces, false, "TimeOutTaskMRApp", true); } @Override - protected TaskAttemptListener createTaskAttemptListener(AppContext context) { + protected TaskAttemptListener createTaskAttemptListener( + AppContext context, AMPreemptionPolicy policy) { //This will create the TaskAttemptListener with TaskHeartbeatHandler //RPC servers are not started //task time out is reduced //when attempt times out, heartbeat handler will send the lost event //leading to Attempt failure - return new TaskAttemptListenerImpl(getContext(), null, null) { + return new TaskAttemptListenerImpl(getContext(), null, null, policy) { @Override public void startRpcServer(){}; @Override diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java index 9a962364e92..3a6644e4349 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/TestRMContainerAllocator.java @@ -18,6 +18,8 @@ package org.apache.hadoop.mapreduce.v2.app; +import org.apache.hadoop.mapreduce.v2.app.rm.preemption.NoopAMPreemptionPolicy; + import static org.mockito.Matchers.anyFloat; import static org.mockito.Matchers.anyInt; import static org.mockito.Matchers.isA; @@ -1428,14 +1430,15 @@ public class TestRMContainerAllocator { // Use this constructor when using a real job. MyContainerAllocator(MyResourceManager rm, ApplicationAttemptId appAttemptId, AppContext context) { - super(createMockClientService(), context); + super(createMockClientService(), context, new NoopAMPreemptionPolicy()); this.rm = rm; } // Use this constructor when you are using a mocked job. public MyContainerAllocator(MyResourceManager rm, Configuration conf, ApplicationAttemptId appAttemptId, Job job) { - super(createMockClientService(), createAppContext(appAttemptId, job)); + super(createMockClientService(), createAppContext(appAttemptId, job), + new NoopAMPreemptionPolicy()); this.rm = rm; super.init(conf); super.start(); @@ -1444,7 +1447,8 @@ public class TestRMContainerAllocator { public MyContainerAllocator(MyResourceManager rm, Configuration conf, ApplicationAttemptId appAttemptId, Job job, Clock clock) { super(createMockClientService(), - createAppContext(appAttemptId, job, clock)); + createAppContext(appAttemptId, job, clock), + new NoopAMPreemptionPolicy()); this.rm = rm; super.init(conf); super.start(); @@ -1671,7 +1675,8 @@ public class TestRMContainerAllocator { ApplicationId.newInstance(1, 1)); RMContainerAllocator allocator = new RMContainerAllocator( - mock(ClientService.class), appContext) { + mock(ClientService.class), appContext, + new NoopAMPreemptionPolicy()) { @Override protected void register() { } @@ -1721,7 +1726,8 @@ public class TestRMContainerAllocator { @Test public void testCompletedContainerEvent() { RMContainerAllocator allocator = new RMContainerAllocator( - mock(ClientService.class), mock(AppContext.class)); + mock(ClientService.class), mock(AppContext.class), + new NoopAMPreemptionPolicy()); TaskAttemptId attemptId = MRBuilderUtils.newTaskAttemptId( MRBuilderUtils.newTaskId( diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobCounter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobCounter.java index 85e0267b041..f7a87d1ab88 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobCounter.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobCounter.java @@ -45,5 +45,9 @@ public enum JobCounter { TOTAL_LAUNCHED_UBERTASKS, NUM_UBER_SUBMAPS, NUM_UBER_SUBREDUCES, - NUM_FAILED_UBERTASKS + NUM_FAILED_UBERTASKS, + TASKS_REQ_PREEMPT, + CHECKPOINTS, + CHECKPOINT_BYTES, + CHECKPOINT_TIME } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java index 2622ec5da66..e696b865533 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java @@ -459,7 +459,13 @@ public interface MRJobConfig { public static final String MR_AM_JOB_REDUCE_PREEMPTION_LIMIT = MR_AM_PREFIX + "job.reduce.preemption.limit"; public static final float DEFAULT_MR_AM_JOB_REDUCE_PREEMPTION_LIMIT = 0.5f; - + + /** + * Policy class encoding responses to preemption requests. + */ + public static final String MR_AM_PREEMPTION_POLICY = + MR_AM_PREFIX + "preemption.policy"; + /** AM ACL disabled. **/ public static final String JOB_AM_ACCESS_DISABLED = "mapreduce.job.am-access-disabled"; @@ -708,4 +714,7 @@ public interface MRJobConfig { public static final String MR_APPLICATION_TYPE = "MAPREDUCE"; + public static final String TASK_PREEMPTION = + "mapreduce.job.preemption"; + } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/EnumCounter.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/EnumCounter.java new file mode 100644 index 00000000000..d2ff26d6b4d --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/EnumCounter.java @@ -0,0 +1,26 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.mapreduce.checkpoint; + +public enum EnumCounter { + INPUTKEY, + INPUTVALUE, + OUTPUTRECORDS, + CHECKPOINT_BYTES, + CHECKPOINT_MS +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/TaskCheckpointID.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/TaskCheckpointID.java new file mode 100644 index 00000000000..102b84f2483 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/checkpoint/TaskCheckpointID.java @@ -0,0 +1,126 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.mapreduce.checkpoint; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.WritableUtils; +import org.apache.hadoop.mapred.Counters; + +/** + * Implementation of CheckpointID used in MR. It contains a reference to an + * underlying FileSsytem based checkpoint, and various metadata about the + * cost of checkpoints and other counters. This is sent by the task to the AM + * to be stored and provided to the next execution of the same task. + */ +public class TaskCheckpointID implements CheckpointID{ + + FSCheckpointID rawId; + private List partialOutput; + private Counters counters; + + public TaskCheckpointID() { + this.rawId = new FSCheckpointID(); + this.partialOutput = new ArrayList(); + } + + public TaskCheckpointID(FSCheckpointID rawId, List partialOutput, + Counters counters) { + this.rawId = rawId; + this.counters = counters; + if(partialOutput == null) + this.partialOutput = new ArrayList(); + else + this.partialOutput = partialOutput; + } + + @Override + public void write(DataOutput out) throws IOException { + counters.write(out); + if (partialOutput == null) { + WritableUtils.writeVLong(out, 0L); + } else { + WritableUtils.writeVLong(out, partialOutput.size()); + for(Path p:partialOutput){ + Text.writeString(out, p.toString()); + } + } + rawId.write(out); + } + + @Override + public void readFields(DataInput in) throws IOException { + partialOutput.clear(); + counters.readFields(in); + long numPout = WritableUtils.readVLong(in); + for(int i=0;i getPartialCommittedOutput() { + return partialOutput; + } + + public Counters getCounters() { + return counters; + } + +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/JobCounter.properties b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/JobCounter.properties index 59dff060571..42539a097b2 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/JobCounter.properties +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/org/apache/hadoop/mapreduce/JobCounter.properties @@ -27,3 +27,7 @@ SLOTS_MILLIS_MAPS.name= Total time spent by all maps in occupied slot SLOTS_MILLIS_REDUCES.name= Total time spent by all reduces in occupied slots (ms) FALLOW_SLOTS_MILLIS_MAPS.name= Total time spent by all maps waiting after reserving slots (ms) FALLOW_SLOTS_MILLIS_REDUCES.name= Total time spent by all reduces waiting after reserving slots (ms) +TASKS_REQ_PREEMPT.name= Tasks that have been asked to preempt +CHECKPOINTS.name= Number of checkpoints reported +CHECKPOINT_BYTES.name= Total amount of bytes in checkpoints +CHECKPOINT_TIME.name= Total time spent checkpointing (ms) \ No newline at end of file From d52bfdef952f294322728b293af32bb090ded0fd Mon Sep 17 00:00:00 2001 From: Vinod Kumar Vavilapalli Date: Wed, 18 Dec 2013 00:13:58 +0000 Subject: [PATCH 16/32] MAPREDUCE-5687. Fixed failure in TestYARNRunner caused by YARN-1446. Contributed by Jian He. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1551774 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-mapreduce-project/CHANGES.txt | 3 +++ .../src/test/java/org/apache/hadoop/mapred/TestYARNRunner.java | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 63daf66a3ec..e3b559eaf32 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -249,6 +249,9 @@ Release 2.4.0 - UNRELEASED MAPREDUCE-5679. TestJobHistoryParsing has race condition (Liyin Liang via jlowe) + MAPREDUCE-5687. Fixed failure in TestYARNRunner caused by YARN-1446. (Jian He + via vinodkv) + Release 2.3.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestYARNRunner.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestYARNRunner.java index 0c08b813aef..025a10f9124 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestYARNRunner.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestYARNRunner.java @@ -75,6 +75,7 @@ import org.apache.hadoop.yarn.api.protocolrecords.GetQueueInfoResponse; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoRequest; import org.apache.hadoop.yarn.api.protocolrecords.GetQueueUserAclsInfoResponse; import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationRequest; +import org.apache.hadoop.yarn.api.protocolrecords.KillApplicationResponse; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ApplicationReport; import org.apache.hadoop.yarn.api.records.ApplicationSubmissionContext; @@ -208,7 +209,7 @@ public class TestYARNRunner extends TestCase { }; /* make sure kill calls finish application master */ when(clientRMProtocol.forceKillApplication(any(KillApplicationRequest.class))) - .thenReturn(null); + .thenReturn(KillApplicationResponse.newInstance(true)); delegate.killApplication(appId); verify(clientRMProtocol).forceKillApplication(any(KillApplicationRequest.class)); From 504bd0bca3ebed1941bbf5407fac0636447e745b Mon Sep 17 00:00:00 2001 From: Tsz-wo Sze Date: Wed, 18 Dec 2013 02:46:00 +0000 Subject: [PATCH 17/32] HDFS-5674. Editlog code cleanup: remove @SuppressWarnings("deprecation") in FSEditLogOp; change FSEditLogOpCodes.fromByte(..) to be more efficient; and change Some fields in FSEditLog to final. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1551812 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 4 +++ .../hdfs/server/namenode/FSEditLog.java | 12 +++----- .../hdfs/server/namenode/FSEditLogOp.java | 27 ++++++----------- .../server/namenode/FSEditLogOpCodes.java | 28 ++++++++--------- .../server/namenode/TestFSEditLogLoader.java | 30 +++++++++++++++++++ 5 files changed, 60 insertions(+), 41 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 15980dd4dc1..12b21802a07 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -750,6 +750,10 @@ Release 2.4.0 - UNRELEASED HDFS-5629. Support HTTPS in JournalNode and SecondaryNameNode. (Haohui Mai via jing9) + HDFS-5674. Editlog code cleanup: remove @SuppressWarnings("deprecation") in + FSEditLogOp; change FSEditLogOpCodes.fromByte(..) to be more efficient; and + change Some fields in FSEditLog to final. (szetszwo) + OPTIMIZATIONS HDFS-5239. Allow FSNamesystem lock fairness to be configurable (daryn) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java index 1abfcdfb5e7..098d4574d8a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java @@ -160,10 +160,10 @@ public class FSEditLog implements LogsPurgeable { private long totalTimeTransactions; // total time for all transactions private NameNodeMetrics metrics; - private NNStorage storage; - private Configuration conf; + private final NNStorage storage; + private final Configuration conf; - private List editsDirs; + private final List editsDirs; private ThreadLocal cache = new ThreadLocal() { @@ -176,7 +176,7 @@ public class FSEditLog implements LogsPurgeable { /** * The edit directories that are shared between primary and secondary. */ - private List sharedEditsDirs; + private final List sharedEditsDirs; private static class TransactionId { public long txid; @@ -203,10 +203,6 @@ public class FSEditLog implements LogsPurgeable { * @param editsDirs List of journals to use */ FSEditLog(Configuration conf, NNStorage storage, List editsDirs) { - init(conf, storage, editsDirs); - } - - private void init(Configuration conf, NNStorage storage, List editsDirs) { isSyncRunning = false; this.conf = conf; this.storage = storage; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java index 6a852c43678..5828c3e29a9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOp.java @@ -118,11 +118,10 @@ import com.google.common.base.Preconditions; @InterfaceStability.Unstable public abstract class FSEditLogOp { public final FSEditLogOpCodes opCode; - long txid; + long txid = HdfsConstants.INVALID_TXID; byte[] rpcClientId = RpcConstants.DUMMY_CLIENT_ID; int rpcCallId = RpcConstants.INVALID_CALL_ID; - @SuppressWarnings("deprecation") final public static class OpInstanceCache { private EnumMap inst = new EnumMap(FSEditLogOpCodes.class); @@ -147,13 +146,10 @@ public abstract class FSEditLogOp { inst.put(OP_REASSIGN_LEASE, new ReassignLeaseOp()); inst.put(OP_GET_DELEGATION_TOKEN, new GetDelegationTokenOp()); inst.put(OP_RENEW_DELEGATION_TOKEN, new RenewDelegationTokenOp()); - inst.put(OP_CANCEL_DELEGATION_TOKEN, - new CancelDelegationTokenOp()); + inst.put(OP_CANCEL_DELEGATION_TOKEN, new CancelDelegationTokenOp()); inst.put(OP_UPDATE_MASTER_KEY, new UpdateMasterKeyOp()); - inst.put(OP_START_LOG_SEGMENT, - new LogSegmentOp(OP_START_LOG_SEGMENT)); - inst.put(OP_END_LOG_SEGMENT, - new LogSegmentOp(OP_END_LOG_SEGMENT)); + inst.put(OP_START_LOG_SEGMENT, new LogSegmentOp(OP_START_LOG_SEGMENT)); + inst.put(OP_END_LOG_SEGMENT, new LogSegmentOp(OP_END_LOG_SEGMENT)); inst.put(OP_UPDATE_BLOCKS, new UpdateBlocksOp()); inst.put(OP_ALLOW_SNAPSHOT, new AllowSnapshotOp()); @@ -163,12 +159,10 @@ public abstract class FSEditLogOp { inst.put(OP_RENAME_SNAPSHOT, new RenameSnapshotOp()); inst.put(OP_SET_GENSTAMP_V2, new SetGenstampV2Op()); inst.put(OP_ALLOCATE_BLOCK_ID, new AllocateBlockIdOp()); - inst.put(OP_ADD_CACHE_DIRECTIVE, - new AddCacheDirectiveInfoOp()); - inst.put(OP_MODIFY_CACHE_DIRECTIVE, - new ModifyCacheDirectiveInfoOp()); - inst.put(OP_REMOVE_CACHE_DIRECTIVE, - new RemoveCacheDirectiveInfoOp()); + + inst.put(OP_ADD_CACHE_DIRECTIVE, new AddCacheDirectiveInfoOp()); + inst.put(OP_MODIFY_CACHE_DIRECTIVE, new ModifyCacheDirectiveInfoOp()); + inst.put(OP_REMOVE_CACHE_DIRECTIVE, new RemoveCacheDirectiveInfoOp()); inst.put(OP_ADD_CACHE_POOL, new AddCachePoolOp()); inst.put(OP_MODIFY_CACHE_POOL, new ModifyCachePoolOp()); inst.put(OP_REMOVE_CACHE_POOL, new RemoveCachePoolOp()); @@ -185,7 +179,6 @@ public abstract class FSEditLogOp { */ private FSEditLogOp(FSEditLogOpCodes opCode) { this.opCode = opCode; - this.txid = HdfsConstants.INVALID_TXID; } public long getTransactionId() { @@ -3332,9 +3325,7 @@ public abstract class FSEditLogOp { * @param in The stream to read from. * @param logVersion The version of the data coming from the stream. */ - @SuppressWarnings("deprecation") - public Reader(DataInputStream in, StreamLimiter limiter, - int logVersion) { + public Reader(DataInputStream in, StreamLimiter limiter, int logVersion) { this.logVersion = logVersion; if (LayoutVersion.supports(Feature.EDITS_CHESKUM, logVersion)) { this.checksum = new PureJavaCrc32(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOpCodes.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOpCodes.java index 26252c198c0..42cb6d7d4ed 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOpCodes.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogOpCodes.java @@ -17,9 +17,6 @@ */ package org.apache.hadoop.hdfs.server.namenode; -import java.util.Map; -import java.util.HashMap; - import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; @@ -30,7 +27,6 @@ import org.apache.hadoop.classification.InterfaceStability; @InterfaceStability.Unstable public enum FSEditLogOpCodes { // last op code in file - OP_INVALID ((byte) -1), OP_ADD ((byte) 0), OP_RENAME_OLD ((byte) 1), // deprecated operation OP_DELETE ((byte) 2), @@ -69,9 +65,12 @@ public enum FSEditLogOpCodes { OP_ADD_CACHE_POOL ((byte) 35), OP_MODIFY_CACHE_POOL ((byte) 36), OP_REMOVE_CACHE_POOL ((byte) 37), - OP_MODIFY_CACHE_DIRECTIVE ((byte) 38); + OP_MODIFY_CACHE_DIRECTIVE ((byte) 38), - private byte opCode; + // Note that fromByte(..) depends on OP_INVALID being at the last position. + OP_INVALID ((byte) -1); + + private final byte opCode; /** * Constructor @@ -91,14 +90,7 @@ public enum FSEditLogOpCodes { return opCode; } - private static final Map byteToEnum = - new HashMap(); - - static { - // initialize byte to enum map - for(FSEditLogOpCodes opCode : values()) - byteToEnum.put(opCode.getOpCode(), opCode); - } + private static final FSEditLogOpCodes[] VALUES = FSEditLogOpCodes.values(); /** * Converts byte to FSEditLogOpCodes enum value @@ -107,6 +99,12 @@ public enum FSEditLogOpCodes { * @return enum with byte value of opCode */ public static FSEditLogOpCodes fromByte(byte opCode) { - return byteToEnum.get(opCode); + if (opCode == -1) { + return OP_INVALID; + } + if (opCode >= 0 && opCode < OP_INVALID.ordinal()) { + return VALUES[opCode]; + } + return null; } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java index 4db7e6a7dfa..77293119085 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestFSEditLogLoader.java @@ -31,6 +31,7 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.RandomAccessFile; import java.nio.channels.FileChannel; +import java.util.HashMap; import java.util.Map; import java.util.SortedMap; @@ -383,4 +384,33 @@ public class TestFSEditLogLoader { assertTrue(!validation.hasCorruptHeader()); assertEquals(HdfsConstants.INVALID_TXID, validation.getEndTxId()); } + + private static final Map byteToEnum = + new HashMap(); + static { + for(FSEditLogOpCodes opCode : FSEditLogOpCodes.values()) { + byteToEnum.put(opCode.getOpCode(), opCode); + } + } + + private static FSEditLogOpCodes fromByte(byte opCode) { + return byteToEnum.get(opCode); + } + + @Test + public void testFSEditLogOpCodes() throws IOException { + //try all codes + for(FSEditLogOpCodes c : FSEditLogOpCodes.values()) { + final byte code = c.getOpCode(); + assertEquals("c=" + c + ", code=" + code, + c, FSEditLogOpCodes.fromByte(code)); + } + + //try all byte values + for(int b = 0; b < (1 << Byte.SIZE); b++) { + final byte code = (byte)b; + assertEquals("b=" + b + ", code=" + code, + fromByte(code), FSEditLogOpCodes.fromByte(code)); + } + } } From af367d140993a423139b200d08d2400fa72267c9 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Wed, 18 Dec 2013 11:16:57 +0000 Subject: [PATCH 18/32] HADOOP-9611 mvn-rpmbuild against google-guice > 3.0 yields missing cglib dependency git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1551916 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-common-project/hadoop-common/CHANGES.txt | 3 +++ hadoop-project/pom.xml | 6 ++++++ hadoop-tools/hadoop-extras/pom.xml | 5 +++++ hadoop-tools/hadoop-streaming/pom.xml | 5 +++++ hadoop-yarn-project/hadoop-yarn/pom.xml | 5 +++++ hadoop-yarn-project/pom.xml | 4 ++++ 6 files changed, 28 insertions(+) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index b31423bb5b2..9a65867de87 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -474,6 +474,9 @@ Release 2.4.0 - UNRELEASED HADOOP-10106. Incorrect thread name in RPC log messages. (Ming Ma via jing9) + HADOOP-9611 mvn-rpmbuild against google-guice > 3.0 yields missing cglib + dependency (Timothy St. Clair via stevel) + Release 2.3.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 1a0c7eb11f3..ade8afa5f50 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -433,6 +433,12 @@ guice 3.0 + + + cglib + cglib + 2.2 + com.google.inject.extensions diff --git a/hadoop-tools/hadoop-extras/pom.xml b/hadoop-tools/hadoop-extras/pom.xml index eb9aad8451e..429f4a303a9 100644 --- a/hadoop-tools/hadoop-extras/pom.xml +++ b/hadoop-tools/hadoop-extras/pom.xml @@ -94,6 +94,11 @@ test-jar test + + cglib + cglib + test + diff --git a/hadoop-tools/hadoop-streaming/pom.xml b/hadoop-tools/hadoop-streaming/pom.xml index b6e4931fc71..9acc43b06a6 100644 --- a/hadoop-tools/hadoop-streaming/pom.xml +++ b/hadoop-tools/hadoop-streaming/pom.xml @@ -100,6 +100,11 @@ test-jar test + + cglib + cglib + test + diff --git a/hadoop-yarn-project/hadoop-yarn/pom.xml b/hadoop-yarn-project/hadoop-yarn/pom.xml index d9d9b9c945b..f534fb59630 100644 --- a/hadoop-yarn-project/hadoop-yarn/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/pom.xml @@ -111,6 +111,11 @@ com.google.inject guice + + cglib + cglib + provided + com.sun.jersey.jersey-test-framework jersey-test-framework-core diff --git a/hadoop-yarn-project/pom.xml b/hadoop-yarn-project/pom.xml index 610091182ba..5696220dfe1 100644 --- a/hadoop-yarn-project/pom.xml +++ b/hadoop-yarn-project/pom.xml @@ -132,6 +132,10 @@ com.google.inject guice + + cglib + cglib + com.sun.jersey jersey-server From a5ec6df324f259f7e0bbab6144a804c3a85620f4 Mon Sep 17 00:00:00 2001 From: Jonathan Turner Eagles Date: Wed, 18 Dec 2013 17:02:58 +0000 Subject: [PATCH 19/32] HADOOP-10171. TestRPC fails intermittently on jkd7 (Mit Desai via jeagles) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1552024 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-common-project/hadoop-common/CHANGES.txt | 2 ++ .../src/test/java/org/apache/hadoop/ipc/TestRPC.java | 1 + 2 files changed, 3 insertions(+) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 9a65867de87..9baee82ef0e 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -477,6 +477,8 @@ Release 2.4.0 - UNRELEASED HADOOP-9611 mvn-rpmbuild against google-guice > 3.0 yields missing cglib dependency (Timothy St. Clair via stevel) + HADOOP-10171. TestRPC fails intermittently on jkd7 (Mit Desai via jeagles) + Release 2.3.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java index f6917d2159a..33d1f68d4ad 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestRPC.java @@ -957,6 +957,7 @@ public class TestRPC { proxy.sleep(pingInterval*4); } finally { if (proxy != null) RPC.stopProxy(proxy); + server.stop(); } } From 9184c4d1794c9a2d02a7ae7807a00626ac35f8ec Mon Sep 17 00:00:00 2001 From: Vinod Kumar Vavilapalli Date: Wed, 18 Dec 2013 19:27:43 +0000 Subject: [PATCH 20/32] MAPREDUCE-5687. Correcting the previous commit by ushing the right patch. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1552069 13f79535-47bb-0310-9956-ffa450edef68 --- .../test/java/org/apache/hadoop/mapred/TestClientRedirect.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java index 1f0b89f00ac..d85a23d6179 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/src/test/java/org/apache/hadoop/mapred/TestClientRedirect.java @@ -304,7 +304,7 @@ public class TestClientRedirect { @Override public KillApplicationResponse forceKillApplication( KillApplicationRequest request) throws IOException { - return recordFactory.newRecordInstance(KillApplicationResponse.class); + return KillApplicationResponse.newInstance(true); } @Override From c9d74139bc63a9144a5aab8909be5ebf47445269 Mon Sep 17 00:00:00 2001 From: Robert Joseph Evans Date: Wed, 18 Dec 2013 21:20:56 +0000 Subject: [PATCH 21/32] HADOOP-10164. Allow UGI to login with a known Subject (bobby) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1552104 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-common/CHANGES.txt | 2 + .../hadoop/security/UserGroupInformation.java | 89 +++++++++++-------- 2 files changed, 54 insertions(+), 37 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 9baee82ef0e..c78bd83ec4b 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -399,6 +399,8 @@ Release 2.4.0 - UNRELEASED HADOOP-10168. fix javadoc of ReflectionUtils#copy. (Thejas Nair via suresh) + HADOOP-10164. Allow UGI to login with a known Subject (bobby) + OPTIMIZATIONS HADOOP-9748. Reduce blocking on UGI.ensureInitialized (daryn) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java index 5f82250583b..972fc780fcf 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java @@ -477,7 +477,7 @@ public class UserGroupInformation { private static final AppConfigurationEntry[] SIMPLE_CONF = new AppConfigurationEntry[]{OS_SPECIFIC_LOGIN, HADOOP_LOGIN}; - + private static final AppConfigurationEntry[] USER_KERBEROS_CONF = new AppConfigurationEntry[]{OS_SPECIFIC_LOGIN, USER_KERBEROS_LOGIN, HADOOP_LOGIN}; @@ -682,45 +682,60 @@ public class UserGroupInformation { public synchronized static UserGroupInformation getLoginUser() throws IOException { if (loginUser == null) { - ensureInitialized(); - try { - Subject subject = new Subject(); - LoginContext login = - newLoginContext(authenticationMethod.getLoginAppName(), - subject, new HadoopConfiguration()); - login.login(); - UserGroupInformation realUser = new UserGroupInformation(subject); - realUser.setLogin(login); - realUser.setAuthenticationMethod(authenticationMethod); - realUser = new UserGroupInformation(login.getSubject()); - // If the HADOOP_PROXY_USER environment variable or property - // is specified, create a proxy user as the logged in user. - String proxyUser = System.getenv(HADOOP_PROXY_USER); - if (proxyUser == null) { - proxyUser = System.getProperty(HADOOP_PROXY_USER); - } - loginUser = proxyUser == null ? realUser : createProxyUser(proxyUser, realUser); - - String fileLocation = System.getenv(HADOOP_TOKEN_FILE_LOCATION); - if (fileLocation != null) { - // Load the token storage file and put all of the tokens into the - // user. Don't use the FileSystem API for reading since it has a lock - // cycle (HADOOP-9212). - Credentials cred = Credentials.readTokenStorageFile( - new File(fileLocation), conf); - loginUser.addCredentials(cred); - } - loginUser.spawnAutoRenewalThreadForUserCreds(); - } catch (LoginException le) { - LOG.debug("failure to login", le); - throw new IOException("failure to login", le); - } - if (LOG.isDebugEnabled()) { - LOG.debug("UGI loginUser:"+loginUser); - } + loginUserFromSubject(null); } return loginUser; } + + /** + * Log in a user using the given subject + * @parma subject the subject to use when logging in a user, or null to + * create a new subject. + * @throws IOException if login fails + */ + @InterfaceAudience.Public + @InterfaceStability.Evolving + public synchronized + static void loginUserFromSubject(Subject subject) throws IOException { + ensureInitialized(); + try { + if (subject == null) { + subject = new Subject(); + } + LoginContext login = + newLoginContext(authenticationMethod.getLoginAppName(), + subject, new HadoopConfiguration()); + login.login(); + UserGroupInformation realUser = new UserGroupInformation(subject); + realUser.setLogin(login); + realUser.setAuthenticationMethod(authenticationMethod); + realUser = new UserGroupInformation(login.getSubject()); + // If the HADOOP_PROXY_USER environment variable or property + // is specified, create a proxy user as the logged in user. + String proxyUser = System.getenv(HADOOP_PROXY_USER); + if (proxyUser == null) { + proxyUser = System.getProperty(HADOOP_PROXY_USER); + } + loginUser = proxyUser == null ? realUser : createProxyUser(proxyUser, realUser); + + String fileLocation = System.getenv(HADOOP_TOKEN_FILE_LOCATION); + if (fileLocation != null) { + // Load the token storage file and put all of the tokens into the + // user. Don't use the FileSystem API for reading since it has a lock + // cycle (HADOOP-9212). + Credentials cred = Credentials.readTokenStorageFile( + new File(fileLocation), conf); + loginUser.addCredentials(cred); + } + loginUser.spawnAutoRenewalThreadForUserCreds(); + } catch (LoginException le) { + LOG.debug("failure to login", le); + throw new IOException("failure to login", le); + } + if (LOG.isDebugEnabled()) { + LOG.debug("UGI loginUser:"+loginUser); + } + } @InterfaceAudience.Private @InterfaceStability.Unstable From fc966461e03f595b46f3adf7debfd8ef2e92f99f Mon Sep 17 00:00:00 2001 From: Brandon Li Date: Wed, 18 Dec 2013 22:26:33 +0000 Subject: [PATCH 22/32] HDFS-5662. Can't decommission a DataNode due to file's replication factor larger than the rest of the cluster size. Contributed by Brandon Li git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1552131 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 + .../server/blockmanagement/BlockManager.java | 12 ++- .../apache/hadoop/hdfs/TestDecommission.java | 84 ++++++++++++++++++- 3 files changed, 94 insertions(+), 5 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 12b21802a07..991526c8b00 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -839,6 +839,9 @@ Release 2.3.0 - UNRELEASED HDFS-5592. statechangeLog of completeFile should be logged only in case of success. (Vinayakumar via umamahesh) + HDFS-5662. Can't decommission a DataNode due to file's replication factor + larger than the rest of the cluster size. (brandonli) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java index 6c11ab00a50..56c501f576c 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockManager.java @@ -2893,6 +2893,7 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block */ boolean isReplicationInProgress(DatanodeDescriptor srcNode) { boolean status = false; + boolean firstReplicationLog = true; int underReplicatedBlocks = 0; int decommissionOnlyReplicas = 0; int underReplicatedInOpenFiles = 0; @@ -2907,10 +2908,17 @@ assert storedBlock.findDatanode(dn) < 0 : "Block " + block int curExpectedReplicas = getReplication(block); if (isNeededReplication(block, curExpectedReplicas, curReplicas)) { if (curExpectedReplicas > curReplicas) { - //Log info about one block for this node which needs replication + // Log info about one block for this node which needs replication if (!status) { status = true; - logBlockReplicationInfo(block, srcNode, num); + if (firstReplicationLog) { + logBlockReplicationInfo(block, srcNode, num); + } + // Allowing decommission as long as default replication is met + if (curReplicas >= defaultReplication) { + status = false; + firstReplicationLog = false; + } } underReplicatedBlocks++; if ((curReplicas == 0) && (num.decommissionedReplicas() > 0)) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java index 39088620c84..ac04dd2117a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDecommission.java @@ -336,6 +336,58 @@ public class TestDecommission { testDecommission(1, 6); } + /** + * Tests decommission with replicas on the target datanode cannot be migrated + * to other datanodes and satisfy the replication factor. Make sure the + * datanode won't get stuck in decommissioning state. + */ + @Test(timeout = 360000) + public void testDecommission2() throws IOException { + LOG.info("Starting test testDecommission"); + int numNamenodes = 1; + int numDatanodes = 4; + conf.setInt(DFSConfigKeys.DFS_REPLICATION_KEY, 3); + startCluster(numNamenodes, numDatanodes, conf); + + ArrayList> namenodeDecomList = new ArrayList>( + numNamenodes); + namenodeDecomList.add(0, new ArrayList(numDatanodes)); + + Path file1 = new Path("testDecommission2.dat"); + int replicas = 4; + + // Start decommissioning one namenode at a time + ArrayList decommissionedNodes = namenodeDecomList.get(0); + FileSystem fileSys = cluster.getFileSystem(0); + FSNamesystem ns = cluster.getNamesystem(0); + + writeFile(fileSys, file1, replicas); + + int deadDecomissioned = ns.getNumDecomDeadDataNodes(); + int liveDecomissioned = ns.getNumDecomLiveDataNodes(); + + // Decommission one node. Verify that node is decommissioned. + DatanodeInfo decomNode = decommissionNode(0, decommissionedNodes, + AdminStates.DECOMMISSIONED); + decommissionedNodes.add(decomNode); + assertEquals(deadDecomissioned, ns.getNumDecomDeadDataNodes()); + assertEquals(liveDecomissioned + 1, ns.getNumDecomLiveDataNodes()); + + // Ensure decommissioned datanode is not automatically shutdown + DFSClient client = getDfsClient(cluster.getNameNode(0), conf); + assertEquals("All datanodes must be alive", numDatanodes, + client.datanodeReport(DatanodeReportType.LIVE).length); + assertNull(checkFile(fileSys, file1, replicas, decomNode.getXferAddr(), + numDatanodes)); + cleanupFile(fileSys, file1); + + // Restart the cluster and ensure recommissioned datanodes + // are allowed to register with the namenode + cluster.shutdown(); + startCluster(1, 4, conf); + cluster.shutdown(); + } + /** * Tests recommission for non federated cluster */ @@ -388,7 +440,20 @@ public class TestDecommission { DFSClient client = getDfsClient(cluster.getNameNode(i), conf); assertEquals("All datanodes must be alive", numDatanodes, client.datanodeReport(DatanodeReportType.LIVE).length); - assertNull(checkFile(fileSys, file1, replicas, decomNode.getXferAddr(), numDatanodes)); + // wait for the block to be replicated + int tries = 0; + while (tries++ < 20) { + try { + Thread.sleep(1000); + if (checkFile(fileSys, file1, replicas, decomNode.getXferAddr(), + numDatanodes) == null) { + break; + } + } catch (InterruptedException ie) { + } + } + assertTrue("Checked if block was replicated after decommission, tried " + + tries + " times.", tries < 20); cleanupFile(fileSys, file1); } } @@ -429,12 +494,25 @@ public class TestDecommission { DFSClient client = getDfsClient(cluster.getNameNode(i), conf); assertEquals("All datanodes must be alive", numDatanodes, client.datanodeReport(DatanodeReportType.LIVE).length); - assertNull(checkFile(fileSys, file1, replicas, decomNode.getXferAddr(), numDatanodes)); + int tries =0; + // wait for the block to be replicated + while (tries++ < 20) { + try { + Thread.sleep(1000); + if (checkFile(fileSys, file1, replicas, decomNode.getXferAddr(), + numDatanodes) == null) { + break; + } + } catch (InterruptedException ie) { + } + } + assertTrue("Checked if block was replicated after decommission, tried " + + tries + " times.", tries < 20); // stop decommission and check if the new replicas are removed recomissionNode(decomNode); // wait for the block to be deleted - int tries = 0; + tries = 0; while (tries++ < 20) { try { Thread.sleep(1000); From 90122f25e142ff5ae9e2610b6b8968ac5fee8f79 Mon Sep 17 00:00:00 2001 From: Colin McCabe Date: Wed, 18 Dec 2013 23:29:05 +0000 Subject: [PATCH 23/32] HDFS-5676. fix inconsistent synchronization of CachingStrategy (cmccabe) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1552162 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 8 +++-- .../apache/hadoop/hdfs/DFSInputStream.java | 34 +++++++++++------- .../apache/hadoop/hdfs/DFSOutputStream.java | 17 ++++++--- .../hdfs/server/datanode/CachingStrategy.java | 36 ++++++++++++------- .../org/apache/hadoop/hdfs/TestConnCache.java | 4 ++- 5 files changed, 66 insertions(+), 33 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 991526c8b00..deb1a55114a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -241,9 +241,6 @@ Trunk (Unreleased) HDFS-5431. Support cachepool-based limit management in path-based caching (awang via cmccabe) - HDFS-5634. Allow BlockReaderLocal to switch between checksumming and not - (cmccabe) - OPTIMIZATIONS HDFS-5349. DNA_CACHE and DNA_UNCACHE should be by blockId only. (cmccabe) @@ -754,6 +751,9 @@ Release 2.4.0 - UNRELEASED FSEditLogOp; change FSEditLogOpCodes.fromByte(..) to be more efficient; and change Some fields in FSEditLog to final. (szetszwo) + HDFS-5634. Allow BlockReaderLocal to switch between checksumming and not + (cmccabe) + OPTIMIZATIONS HDFS-5239. Allow FSNamesystem lock fairness to be configurable (daryn) @@ -809,6 +809,8 @@ Release 2.4.0 - UNRELEASED HDFS-5580. Fix infinite loop in Balancer.waitForMoveCompletion. (Binglin Chang via junping_du) + HDFS-5676. fix inconsistent synchronization of CachingStrategy (cmccabe) + Release 2.3.0 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java index 0ab51c7e716..bdc660d484a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java @@ -228,7 +228,7 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead, dfsClient.getConf().shortCircuitStreamsCacheSize, dfsClient.getConf().shortCircuitStreamsCacheExpiryMs); this.cachingStrategy = - dfsClient.getDefaultReadCachingStrategy().duplicate(); + dfsClient.getDefaultReadCachingStrategy(); openInfo(); } @@ -574,7 +574,7 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead, Token accessToken = targetBlock.getBlockToken(); blockReader = getBlockReader(targetAddr, chosenNode, src, blk, accessToken, offsetIntoBlock, blk.getNumBytes() - offsetIntoBlock, - buffersize, verifyChecksum, dfsClient.clientName); + buffersize, verifyChecksum, dfsClient.clientName, cachingStrategy); if(connectFailedOnce) { DFSClient.LOG.info("Successfully connected to " + targetAddr + " for " + blk); @@ -928,7 +928,11 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead, // cached block locations may have been updated by chooseDataNode() // or fetchBlockAt(). Always get the latest list of locations at the // start of the loop. - block = getBlockAt(block.getStartOffset(), false); + CachingStrategy curCachingStrategy; + synchronized (this) { + block = getBlockAt(block.getStartOffset(), false); + curCachingStrategy = cachingStrategy; + } DNAddrPair retval = chooseDataNode(block); DatanodeInfo chosenNode = retval.info; InetSocketAddress targetAddr = retval.addr; @@ -940,7 +944,7 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead, int len = (int) (end - start + 1); reader = getBlockReader(targetAddr, chosenNode, src, block.getBlock(), blockToken, start, len, buffersize, verifyChecksum, - dfsClient.clientName); + dfsClient.clientName, curCachingStrategy); int nread = reader.readAll(buf, offset, len); if (nread != len) { throw new IOException("truncated return from reader.read(): " + @@ -1053,6 +1057,7 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead, * @param bufferSize The IO buffer size (not the client buffer size) * @param verifyChecksum Whether to verify checksum * @param clientName Client name + * @param CachingStrategy caching strategy to use * @return New BlockReader instance */ protected BlockReader getBlockReader(InetSocketAddress dnAddr, @@ -1064,7 +1069,8 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead, long len, int bufferSize, boolean verifyChecksum, - String clientName) + String clientName, + CachingStrategy curCachingStrategy) throws IOException { // Firstly, we check to see if we have cached any file descriptors for // local blocks. If so, we can just re-use those file descriptors. @@ -1084,7 +1090,7 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead, setBlockMetadataHeader(BlockMetadataHeader. preadHeader(fis[1].getChannel())). setFileInputStreamCache(fileInputStreamCache). - setCachingStrategy(cachingStrategy). + setCachingStrategy(curCachingStrategy). build(); } @@ -1119,7 +1125,7 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead, dfsClient.getConf(), file, block, blockToken, startOffset, len, verifyChecksum, clientName, peer, chosenNode, dsFactory, peerCache, fileInputStreamCache, - allowShortCircuitLocalReads, cachingStrategy); + allowShortCircuitLocalReads, curCachingStrategy); return reader; } catch (IOException ex) { DFSClient.LOG.debug("Error making BlockReader with DomainSocket. " + @@ -1142,7 +1148,7 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead, dfsClient.getConf(), file, block, blockToken, startOffset, len, verifyChecksum, clientName, peer, chosenNode, dsFactory, peerCache, fileInputStreamCache, - allowShortCircuitLocalReads, cachingStrategy); + allowShortCircuitLocalReads, curCachingStrategy); return reader; } catch (IOException e) { DFSClient.LOG.warn("failed to connect to " + domSock, e); @@ -1166,7 +1172,7 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead, dfsClient.getConf(), file, block, blockToken, startOffset, len, verifyChecksum, clientName, peer, chosenNode, dsFactory, peerCache, fileInputStreamCache, false, - cachingStrategy); + curCachingStrategy); return reader; } catch (IOException ex) { DFSClient.LOG.debug("Error making BlockReader. Closing stale " + @@ -1186,7 +1192,7 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead, dfsClient.getConf(), file, block, blockToken, startOffset, len, verifyChecksum, clientName, peer, chosenNode, dsFactory, peerCache, fileInputStreamCache, false, - cachingStrategy); + curCachingStrategy); } @@ -1460,14 +1466,18 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead, @Override public synchronized void setReadahead(Long readahead) throws IOException { - this.cachingStrategy.setReadahead(readahead); + this.cachingStrategy = + new CachingStrategy.Builder(this.cachingStrategy). + setReadahead(readahead).build(); closeCurrentBlockReader(); } @Override public synchronized void setDropBehind(Boolean dropBehind) throws IOException { - this.cachingStrategy.setDropBehind(dropBehind); + this.cachingStrategy = + new CachingStrategy.Builder(this.cachingStrategy). + setDropBehind(dropBehind).build(); closeCurrentBlockReader(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java index 26e2fc2247e..4f4e71a944d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSOutputStream.java @@ -150,7 +150,7 @@ public class DFSOutputStream extends FSOutputSummer private Progressable progress; private final short blockReplication; // replication factor of file private boolean shouldSyncBlock = false; // force blocks to disk upon close - private CachingStrategy cachingStrategy; + private AtomicReference cachingStrategy; private boolean failPacket = false; private static class Packet { @@ -1183,7 +1183,7 @@ public class DFSOutputStream extends FSOutputSummer new Sender(out).writeBlock(block, accessToken, dfsClient.clientName, nodes, null, recoveryFlag? stage.getRecoveryStage() : stage, nodes.length, block.getNumBytes(), bytesSent, newGS, checksum, - cachingStrategy); + cachingStrategy.get()); // receive ack for connect BlockOpResponseProto resp = BlockOpResponseProto.parseFrom( @@ -1378,8 +1378,8 @@ public class DFSOutputStream extends FSOutputSummer this.blockSize = stat.getBlockSize(); this.blockReplication = stat.getReplication(); this.progress = progress; - this.cachingStrategy = - dfsClient.getDefaultWriteCachingStrategy().duplicate(); + this.cachingStrategy = new AtomicReference( + dfsClient.getDefaultWriteCachingStrategy()); if ((progress != null) && DFSClient.LOG.isDebugEnabled()) { DFSClient.LOG.debug( "Set non-null progress callback on DFSOutputStream " + src); @@ -1993,7 +1993,14 @@ public class DFSOutputStream extends FSOutputSummer @Override public void setDropBehind(Boolean dropBehind) throws IOException { - this.cachingStrategy.setDropBehind(dropBehind); + CachingStrategy prevStrategy, nextStrategy; + // CachingStrategy is immutable. So build a new CachingStrategy with the + // modifications we want, and compare-and-swap it in. + do { + prevStrategy = this.cachingStrategy.get(); + nextStrategy = new CachingStrategy.Builder(prevStrategy). + setDropBehind(dropBehind).build(); + } while (!this.cachingStrategy.compareAndSet(prevStrategy, nextStrategy)); } @VisibleForTesting diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/CachingStrategy.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/CachingStrategy.java index 3795dbba3cb..215df134669 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/CachingStrategy.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/CachingStrategy.java @@ -21,8 +21,8 @@ package org.apache.hadoop.hdfs.server.datanode; * The caching strategy we should use for an HDFS read or write operation. */ public class CachingStrategy { - private Boolean dropBehind; // null = use server defaults - private Long readahead; // null = use server defaults + private final Boolean dropBehind; // null = use server defaults + private final Long readahead; // null = use server defaults public static CachingStrategy newDefaultStrategy() { return new CachingStrategy(null, null); @@ -32,8 +32,28 @@ public class CachingStrategy { return new CachingStrategy(true, null); } - public CachingStrategy duplicate() { - return new CachingStrategy(this.dropBehind, this.readahead); + public static class Builder { + private Boolean dropBehind; + private Long readahead; + + public Builder(CachingStrategy prev) { + this.dropBehind = prev.dropBehind; + this.readahead = prev.readahead; + } + + public Builder setDropBehind(Boolean dropBehind) { + this.dropBehind = dropBehind; + return this; + } + + public Builder setReadahead(Long readahead) { + this.readahead = readahead; + return this; + } + + public CachingStrategy build() { + return new CachingStrategy(dropBehind, readahead); + } } public CachingStrategy(Boolean dropBehind, Long readahead) { @@ -45,18 +65,10 @@ public class CachingStrategy { return dropBehind; } - public void setDropBehind(Boolean dropBehind) { - this.dropBehind = dropBehind; - } - public Long getReadahead() { return readahead; } - public void setReadahead(Long readahead) { - this.readahead = readahead; - } - public String toString() { return "CachingStrategy(dropBehind=" + dropBehind + ", readahead=" + readahead + ")"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestConnCache.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestConnCache.java index 837e5523210..cffd91dfa47 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestConnCache.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestConnCache.java @@ -28,6 +28,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier; +import org.apache.hadoop.hdfs.server.datanode.CachingStrategy; import org.apache.hadoop.hdfs.net.Peer; import org.apache.hadoop.security.token.Token; import org.junit.Assert; @@ -138,7 +139,8 @@ public class TestConnCache { Matchers.anyLong(), Matchers.anyInt(), Matchers.anyBoolean(), - Matchers.anyString()); + Matchers.anyString(), + (CachingStrategy)Matchers.anyObject()); // Initial read pread(in, 0, dataBuf, 0, dataBuf.length, authenticData); From e07f3c87cd3542760efcf713f909375b69eec00f Mon Sep 17 00:00:00 2001 From: Christopher Douglas Date: Thu, 19 Dec 2013 00:07:59 +0000 Subject: [PATCH 24/32] YARN-1471. Preserve scheduler typeinfo in simulator to work with resource monitors. Contributed by Carlo Curino. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1552173 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/hadoop/yarn/sls/SLSRunner.java | 26 +- .../yarn/sls/appmaster/AMSimulator.java | 8 +- .../scheduler/ResourceSchedulerWrapper.java | 10 +- .../sls/scheduler/SLSCapacityScheduler.java | 808 ++++++++++++++++++ .../yarn/sls/scheduler/SchedulerWrapper.java | 43 + .../apache/hadoop/yarn/sls/web/SLSWebApp.java | 6 +- .../src/test/resources/capacity-scheduler.xml | 60 ++ .../src/test/resources/yarn-site.xml | 13 +- 8 files changed, 955 insertions(+), 19 deletions(-) create mode 100644 hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SLSCapacityScheduler.java create mode 100644 hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SchedulerWrapper.java create mode 100644 hadoop-tools/hadoop-sls/src/test/resources/capacity-scheduler.xml diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/SLSRunner.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/SLSRunner.java index 79437f98938..501d11e0ccd 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/SLSRunner.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/SLSRunner.java @@ -40,12 +40,15 @@ import org.apache.hadoop.tools.rumen.LoggedTaskAttempt; import org.apache.hadoop.yarn.api.records.NodeState; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.server.resourcemanager.rmnode.RMNode; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler; import org.apache.hadoop.yarn.sls.appmaster.AMSimulator; import org.apache.hadoop.yarn.sls.conf.SLSConfiguration; import org.apache.hadoop.yarn.sls.nodemanager.NMSimulator; import org.apache.hadoop.yarn.sls.scheduler.ContainerSimulator; import org.apache.hadoop.yarn.sls.scheduler.ResourceSchedulerWrapper; +import org.apache.hadoop.yarn.sls.scheduler.SLSCapacityScheduler; import org.apache.hadoop.yarn.sls.scheduler.TaskRunner; +import org.apache.hadoop.yarn.sls.scheduler.SchedulerWrapper; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; @@ -140,9 +143,9 @@ public class SLSRunner { // start application masters startAM(); // set queue & tracked apps information - ((ResourceSchedulerWrapper) rm.getResourceScheduler()) + ((SchedulerWrapper) rm.getResourceScheduler()) .setQueueSet(this.queueAppNumMap.keySet()); - ((ResourceSchedulerWrapper) rm.getResourceScheduler()) + ((SchedulerWrapper) rm.getResourceScheduler()) .setTrackedAppSet(this.trackedApps); // print out simulation info printSimulationInfo(); @@ -151,13 +154,24 @@ public class SLSRunner { // starting the runner once everything is ready to go, runner.start(); } - + private void startRM() throws IOException, ClassNotFoundException { Configuration rmConf = new YarnConfiguration(); String schedulerClass = rmConf.get(YarnConfiguration.RM_SCHEDULER); - rmConf.set(SLSConfiguration.RM_SCHEDULER, schedulerClass); - rmConf.set(YarnConfiguration.RM_SCHEDULER, - ResourceSchedulerWrapper.class.getName()); + + // For CapacityScheduler we use a sub-classing instead of wrapping + // to allow scheduler-specific invocations from monitors to work + // this can be used for other schedulers as well if we care to + // exercise/track behaviors that are not common to the scheduler api + if(Class.forName(schedulerClass) == CapacityScheduler.class) { + rmConf.set(YarnConfiguration.RM_SCHEDULER, + SLSCapacityScheduler.class.getName()); + } else { + rmConf.set(YarnConfiguration.RM_SCHEDULER, + ResourceSchedulerWrapper.class.getName()); + rmConf.set(SLSConfiguration.RM_SCHEDULER, schedulerClass); + } + rmConf.set(SLSConfiguration.METRICS_OUTPUT_DIR, metricsOutputDir); rm = new ResourceManager(); rm.init(rmConf); diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/AMSimulator.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/AMSimulator.java index c6b994e5400..67c09940120 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/AMSimulator.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/appmaster/AMSimulator.java @@ -65,7 +65,7 @@ import org.apache.hadoop.yarn.util.Records; import org.apache.log4j.Logger; import org.apache.hadoop.yarn.sls.scheduler.ContainerSimulator; -import org.apache.hadoop.yarn.sls.scheduler.ResourceSchedulerWrapper; +import org.apache.hadoop.yarn.sls.scheduler.SchedulerWrapper; import org.apache.hadoop.yarn.sls.SLSRunner; import org.apache.hadoop.yarn.sls.scheduler.TaskRunner; import org.apache.hadoop.yarn.sls.utils.SLSUtils; @@ -193,7 +193,7 @@ public abstract class AMSimulator extends TaskRunner.Task { simulateFinishTimeMS = System.currentTimeMillis() - SLSRunner.getRunner().getStartTimeMS(); // record job running information - ((ResourceSchedulerWrapper)rm.getResourceScheduler()) + ((SchedulerWrapper)rm.getResourceScheduler()) .addAMRuntime(appId, traceStartTimeMS, traceFinishTimeMS, simulateStartTimeMS, simulateFinishTimeMS); @@ -314,13 +314,13 @@ public abstract class AMSimulator extends TaskRunner.Task { private void trackApp() { if (isTracked) { - ((ResourceSchedulerWrapper) rm.getResourceScheduler()) + ((SchedulerWrapper) rm.getResourceScheduler()) .addTrackedApp(appAttemptId, oldAppId); } } public void untrackApp() { if (isTracked) { - ((ResourceSchedulerWrapper) rm.getResourceScheduler()) + ((SchedulerWrapper) rm.getResourceScheduler()) .removeTrackedApp(appAttemptId, oldAppId); } } diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java index ad066e75d28..bc7f7a086ad 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/ResourceSchedulerWrapper.java @@ -85,8 +85,8 @@ import com.codahale.metrics.MetricRegistry; import com.codahale.metrics.SlidingWindowReservoir; import com.codahale.metrics.Timer; -public class ResourceSchedulerWrapper implements ResourceScheduler, - Configurable { +public class ResourceSchedulerWrapper implements + SchedulerWrapper,ResourceScheduler,Configurable { private static final String EOL = System.getProperty("line.separator"); private static final int SAMPLING_SIZE = 60; private ScheduledExecutorService pool; @@ -150,9 +150,8 @@ public class ResourceSchedulerWrapper implements ResourceScheduler, public void setConf(Configuration conf) { this.conf = conf; // set scheduler - Class klass = - conf.getClass(SLSConfiguration.RM_SCHEDULER, null, - ResourceScheduler.class); + Class klass = conf.getClass( + SLSConfiguration.RM_SCHEDULER, null, ResourceScheduler.class); scheduler = ReflectionUtils.newInstance(klass, conf); // start metrics @@ -861,4 +860,3 @@ public class ResourceSchedulerWrapper implements ResourceScheduler, return scheduler.getAppsInQueue(queue); } } - diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SLSCapacityScheduler.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SLSCapacityScheduler.java new file mode 100644 index 00000000000..1b304de79af --- /dev/null +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SLSCapacityScheduler.java @@ -0,0 +1,808 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.sls.scheduler; + +import org.apache.hadoop.util.ShutdownHookManager; +import org.apache.hadoop.yarn.sls.SLSRunner; +import org.apache.hadoop.yarn.sls.conf.SLSConfiguration; +import org.apache.hadoop.yarn.sls.web.SLSWebApp; +import com.codahale.metrics.Counter; +import com.codahale.metrics.CsvReporter; +import com.codahale.metrics.Gauge; +import com.codahale.metrics.Histogram; +import com.codahale.metrics.MetricRegistry; +import com.codahale.metrics.SlidingWindowReservoir; +import com.codahale.metrics.Timer; + +import org.apache.hadoop.security.AccessControlException; +import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport; +import org.apache.hadoop.yarn.api.records.Container; +import org.apache.hadoop.yarn.api.records.ContainerExitStatus; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerStatus; +import org.apache.hadoop.yarn.api.records.NodeId; +import org.apache.hadoop.yarn.api.records.QueueInfo; +import org.apache.hadoop.yarn.api.records.QueueUserACLInfo; +import org.apache.hadoop.yarn.api.records.QueueACL; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.ResourceRequest; +import org.apache.hadoop.yarn.server.resourcemanager.RMContext; +import org.apache.hadoop.yarn.server.resourcemanager.recovery.RMStateStore; +import org.apache.hadoop.yarn.server.resourcemanager.rmcontainer.RMContainer; +import org.apache.hadoop.yarn.server.resourcemanager.rmnode + .UpdatedContainerInfo; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Allocation; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.PreemptableResourceScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.Queue; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.QueueMetrics; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler + .ResourceScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler + .SchedulerAppReport; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.SchedulerApplication; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler + .SchedulerNodeReport; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity + .CapacityScheduler; + +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptAddedSchedulerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event.AppAttemptRemovedSchedulerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event + .NodeUpdateSchedulerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event + .SchedulerEvent; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.event + .SchedulerEventType; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair + .FairScheduler; +import org.apache.hadoop.yarn.server.resourcemanager.scheduler.fifo + .FifoScheduler; +import org.apache.hadoop.yarn.util.resource.Resources; +import org.apache.log4j.Logger; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Set; +import java.util.SortedMap; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.ScheduledThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; + +public class SLSCapacityScheduler extends CapacityScheduler implements + SchedulerWrapper,Configurable { + private static final String EOL = System.getProperty("line.separator"); + private static final int SAMPLING_SIZE = 60; + private ScheduledExecutorService pool; + // counters for scheduler allocate/handle operations + private Counter schedulerAllocateCounter; + private Counter schedulerHandleCounter; + private Map schedulerHandleCounterMap; + // Timers for scheduler allocate/handle operations + private Timer schedulerAllocateTimer; + private Timer schedulerHandleTimer; + private Map schedulerHandleTimerMap; + private List schedulerHistogramList; + private Map histogramTimerMap; + private Lock samplerLock; + private Lock queueLock; + + private Configuration conf; + + private Map appQueueMap = + new ConcurrentHashMap(); + private BufferedWriter jobRuntimeLogBW; + + // Priority of the ResourceSchedulerWrapper shutdown hook. + public static final int SHUTDOWN_HOOK_PRIORITY = 30; + + // web app + private SLSWebApp web; + + private Map preemptionContainerMap = + new ConcurrentHashMap(); + + // metrics + private MetricRegistry metrics; + private SchedulerMetrics schedulerMetrics; + private boolean metricsON; + private String metricsOutputDir; + private BufferedWriter metricsLogBW; + private boolean running = false; + private static Map defaultSchedulerMetricsMap = + new HashMap(); + static { + defaultSchedulerMetricsMap.put(FairScheduler.class, + FairSchedulerMetrics.class); + defaultSchedulerMetricsMap.put(FifoScheduler.class, + FifoSchedulerMetrics.class); + defaultSchedulerMetricsMap.put(CapacityScheduler.class, + CapacitySchedulerMetrics.class); + } + // must set by outside + private Set queueSet; + private Set trackedAppSet; + + public final Logger LOG = Logger.getLogger(SLSCapacityScheduler.class); + + public SLSCapacityScheduler() { + samplerLock = new ReentrantLock(); + queueLock = new ReentrantLock(); + } + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + super.setConf(conf); + // start metrics + metricsON = conf.getBoolean(SLSConfiguration.METRICS_SWITCH, true); + if (metricsON) { + try { + initMetrics(); + } catch (Exception e) { + e.printStackTrace(); + } + } + + ShutdownHookManager.get().addShutdownHook(new Runnable() { + @Override + public void run() { + try { + if (metricsLogBW != null) { + metricsLogBW.write("]"); + metricsLogBW.close(); + } + if (web != null) { + web.stop(); + } + tearDown(); + } catch (Exception e) { + e.printStackTrace(); + } + } + }, SHUTDOWN_HOOK_PRIORITY); + } + + @Override + public Allocation allocate(ApplicationAttemptId attemptId, + List resourceRequests, + List containerIds, + List strings, List strings2) { + if (metricsON) { + final Timer.Context context = schedulerAllocateTimer.time(); + Allocation allocation = null; + try { + allocation = super.allocate(attemptId, resourceRequests, + containerIds, strings, strings2); + return allocation; + } finally { + context.stop(); + schedulerAllocateCounter.inc(); + try { + updateQueueWithAllocateRequest(allocation, attemptId, + resourceRequests, containerIds); + } catch (IOException e) { + e.printStackTrace(); + } + } + } else { + return super.allocate(attemptId, + resourceRequests, containerIds, strings, strings2); + } + } + + @Override + public void handle(SchedulerEvent schedulerEvent) { + // metrics off + if (! metricsON) { + super.handle(schedulerEvent); + return; + } + if(!running) running = true; + + // metrics on + Timer.Context handlerTimer = null; + Timer.Context operationTimer = null; + + NodeUpdateSchedulerEventWrapper eventWrapper; + try { + //if (schedulerEvent instanceof NodeUpdateSchedulerEvent) { + if (schedulerEvent.getType() == SchedulerEventType.NODE_UPDATE + && schedulerEvent instanceof NodeUpdateSchedulerEvent) { + eventWrapper = new NodeUpdateSchedulerEventWrapper( + (NodeUpdateSchedulerEvent)schedulerEvent); + schedulerEvent = eventWrapper; + updateQueueWithNodeUpdate(eventWrapper); + } else if (schedulerEvent.getType() == SchedulerEventType.APP_ATTEMPT_REMOVED + && schedulerEvent instanceof AppAttemptRemovedSchedulerEvent) { + // check if having AM Container, update resource usage information + AppAttemptRemovedSchedulerEvent appRemoveEvent = + (AppAttemptRemovedSchedulerEvent) schedulerEvent; + ApplicationAttemptId appAttemptId = + appRemoveEvent.getApplicationAttemptID(); + String queue = appQueueMap.get(appAttemptId); + SchedulerAppReport app = super.getSchedulerAppInfo(appAttemptId); + if (! app.getLiveContainers().isEmpty()) { // have 0 or 1 + // should have one container which is AM container + RMContainer rmc = app.getLiveContainers().iterator().next(); + updateQueueMetrics(queue, + rmc.getContainer().getResource().getMemory(), + rmc.getContainer().getResource().getVirtualCores()); + } + } + + handlerTimer = schedulerHandleTimer.time(); + operationTimer = schedulerHandleTimerMap + .get(schedulerEvent.getType()).time(); + + super.handle(schedulerEvent); + } finally { + if (handlerTimer != null) handlerTimer.stop(); + if (operationTimer != null) operationTimer.stop(); + schedulerHandleCounter.inc(); + schedulerHandleCounterMap.get(schedulerEvent.getType()).inc(); + + if (schedulerEvent.getType() == SchedulerEventType.APP_ATTEMPT_REMOVED + && schedulerEvent instanceof AppAttemptRemovedSchedulerEvent) { + SLSRunner.decreaseRemainingApps(); + AppAttemptRemovedSchedulerEvent appRemoveEvent = + (AppAttemptRemovedSchedulerEvent) schedulerEvent; + ApplicationAttemptId appAttemptId = + appRemoveEvent.getApplicationAttemptID(); + appQueueMap.remove(appRemoveEvent.getApplicationAttemptID()); + } else if (schedulerEvent.getType() == SchedulerEventType.APP_ATTEMPT_ADDED + && schedulerEvent instanceof AppAttemptAddedSchedulerEvent) { + AppAttemptAddedSchedulerEvent appAddEvent = + (AppAttemptAddedSchedulerEvent) schedulerEvent; + String queueName = appAddEvent.getQueue(); + appQueueMap.put(appAddEvent.getApplicationAttemptId(), queueName); + } + } + } + + private void updateQueueWithNodeUpdate( + NodeUpdateSchedulerEventWrapper eventWrapper) { + RMNodeWrapper node = (RMNodeWrapper) eventWrapper.getRMNode(); + List containerList = node.getContainerUpdates(); + for (UpdatedContainerInfo info : containerList) { + for (ContainerStatus status : info.getCompletedContainers()) { + ContainerId containerId = status.getContainerId(); + SchedulerAppReport app = super.getSchedulerAppInfo( + containerId.getApplicationAttemptId()); + + if (app == null) { + // this happens for the AM container + // The app have already removed when the NM sends the release + // information. + continue; + } + + String queue = appQueueMap.get(containerId.getApplicationAttemptId()); + int releasedMemory = 0, releasedVCores = 0; + if (status.getExitStatus() == ContainerExitStatus.SUCCESS) { + for (RMContainer rmc : app.getLiveContainers()) { + if (rmc.getContainerId() == containerId) { + releasedMemory += rmc.getContainer().getResource().getMemory(); + releasedVCores += rmc.getContainer() + .getResource().getVirtualCores(); + break; + } + } + } else if (status.getExitStatus() == ContainerExitStatus.ABORTED) { + if (preemptionContainerMap.containsKey(containerId)) { + Resource preResource = preemptionContainerMap.get(containerId); + releasedMemory += preResource.getMemory(); + releasedVCores += preResource.getVirtualCores(); + preemptionContainerMap.remove(containerId); + } + } + // update queue counters + updateQueueMetrics(queue, releasedMemory, releasedVCores); + } + } + } + + private void updateQueueWithAllocateRequest(Allocation allocation, + ApplicationAttemptId attemptId, + List resourceRequests, + List containerIds) throws IOException { + // update queue information + Resource pendingResource = Resources.createResource(0, 0); + Resource allocatedResource = Resources.createResource(0, 0); + String queueName = appQueueMap.get(attemptId); + // container requested + for (ResourceRequest request : resourceRequests) { + if (request.getResourceName().equals(ResourceRequest.ANY)) { + Resources.addTo(pendingResource, + Resources.multiply(request.getCapability(), + request.getNumContainers())); + } + } + // container allocated + for (Container container : allocation.getContainers()) { + Resources.addTo(allocatedResource, container.getResource()); + Resources.subtractFrom(pendingResource, container.getResource()); + } + // container released from AM + SchedulerAppReport report = super.getSchedulerAppInfo(attemptId); + for (ContainerId containerId : containerIds) { + Container container = null; + for (RMContainer c : report.getLiveContainers()) { + if (c.getContainerId().equals(containerId)) { + container = c.getContainer(); + break; + } + } + if (container != null) { + // released allocated containers + Resources.subtractFrom(allocatedResource, container.getResource()); + } else { + for (RMContainer c : report.getReservedContainers()) { + if (c.getContainerId().equals(containerId)) { + container = c.getContainer(); + break; + } + } + if (container != null) { + // released reserved containers + Resources.subtractFrom(pendingResource, container.getResource()); + } + } + } + // containers released/preemption from scheduler + Set preemptionContainers = new HashSet(); + if (allocation.getContainerPreemptions() != null) { + preemptionContainers.addAll(allocation.getContainerPreemptions()); + } + if (allocation.getStrictContainerPreemptions() != null) { + preemptionContainers.addAll(allocation.getStrictContainerPreemptions()); + } + if (! preemptionContainers.isEmpty()) { + for (ContainerId containerId : preemptionContainers) { + if (! preemptionContainerMap.containsKey(containerId)) { + Container container = null; + for (RMContainer c : report.getLiveContainers()) { + if (c.getContainerId().equals(containerId)) { + container = c.getContainer(); + break; + } + } + if (container != null) { + preemptionContainerMap.put(containerId, container.getResource()); + } + } + + } + } + + // update metrics + SortedMap counterMap = metrics.getCounters(); + String names[] = new String[]{ + "counter.queue." + queueName + ".pending.memory", + "counter.queue." + queueName + ".pending.cores", + "counter.queue." + queueName + ".allocated.memory", + "counter.queue." + queueName + ".allocated.cores"}; + int values[] = new int[]{pendingResource.getMemory(), + pendingResource.getVirtualCores(), + allocatedResource.getMemory(), allocatedResource.getVirtualCores()}; + for (int i = names.length - 1; i >= 0; i --) { + if (! counterMap.containsKey(names[i])) { + metrics.counter(names[i]); + counterMap = metrics.getCounters(); + } + counterMap.get(names[i]).inc(values[i]); + } + + queueLock.lock(); + try { + if (! schedulerMetrics.isTracked(queueName)) { + schedulerMetrics.trackQueue(queueName); + } + } finally { + queueLock.unlock(); + } + } + + private void tearDown() throws IOException { + // close job runtime writer + if (jobRuntimeLogBW != null) { + jobRuntimeLogBW.close(); + } + // shut pool + if (pool != null) pool.shutdown(); + } + + @SuppressWarnings("unchecked") + private void initMetrics() throws Exception { + metrics = new MetricRegistry(); + // configuration + metricsOutputDir = conf.get(SLSConfiguration.METRICS_OUTPUT_DIR); + int metricsWebAddressPort = conf.getInt( + SLSConfiguration.METRICS_WEB_ADDRESS_PORT, + SLSConfiguration.METRICS_WEB_ADDRESS_PORT_DEFAULT); + // create SchedulerMetrics for current scheduler + String schedulerMetricsType = conf.get(CapacityScheduler.class.getName()); + Class schedulerMetricsClass = schedulerMetricsType == null? + defaultSchedulerMetricsMap.get(CapacityScheduler.class) : + Class.forName(schedulerMetricsType); + schedulerMetrics = (SchedulerMetrics)ReflectionUtils + .newInstance(schedulerMetricsClass, new Configuration()); + schedulerMetrics.init(this, metrics); + + // register various metrics + registerJvmMetrics(); + registerClusterResourceMetrics(); + registerContainerAppNumMetrics(); + registerSchedulerMetrics(); + + // .csv output + initMetricsCSVOutput(); + + // start web app to provide real-time tracking + web = new SLSWebApp(this, metricsWebAddressPort); + web.start(); + + // a thread to update histogram timer + pool = new ScheduledThreadPoolExecutor(2); + pool.scheduleAtFixedRate(new HistogramsRunnable(), 0, 1000, + TimeUnit.MILLISECONDS); + + // a thread to output metrics for real-tiem tracking + pool.scheduleAtFixedRate(new MetricsLogRunnable(), 0, 1000, + TimeUnit.MILLISECONDS); + + // application running information + jobRuntimeLogBW = new BufferedWriter( + new FileWriter(metricsOutputDir + "/jobruntime.csv")); + jobRuntimeLogBW.write("JobID,real_start_time,real_end_time," + + "simulate_start_time,simulate_end_time" + EOL); + jobRuntimeLogBW.flush(); + } + + private void registerJvmMetrics() { + // add JVM gauges + metrics.register("variable.jvm.free.memory", + new Gauge() { + @Override + public Long getValue() { + return Runtime.getRuntime().freeMemory(); + } + } + ); + metrics.register("variable.jvm.max.memory", + new Gauge() { + @Override + public Long getValue() { + return Runtime.getRuntime().maxMemory(); + } + } + ); + metrics.register("variable.jvm.total.memory", + new Gauge() { + @Override + public Long getValue() { + return Runtime.getRuntime().totalMemory(); + } + } + ); + } + + private void registerClusterResourceMetrics() { + metrics.register("variable.cluster.allocated.memory", + new Gauge() { + @Override + public Integer getValue() { + if( getRootQueueMetrics() == null) { + return 0; + } else { + return getRootQueueMetrics().getAllocatedMB(); + } + } + } + ); + metrics.register("variable.cluster.allocated.vcores", + new Gauge() { + @Override + public Integer getValue() { + if(getRootQueueMetrics() == null) { + return 0; + } else { + return getRootQueueMetrics().getAllocatedVirtualCores(); + } + } + } + ); + metrics.register("variable.cluster.available.memory", + new Gauge() { + @Override + public Integer getValue() { + if(getRootQueueMetrics() == null) { + return 0; + } else { + return getRootQueueMetrics().getAvailableMB(); + } + } + } + ); + metrics.register("variable.cluster.available.vcores", + new Gauge() { + @Override + public Integer getValue() { + if(getRootQueueMetrics() == null) { + return 0; + } else { + return getRootQueueMetrics().getAvailableVirtualCores(); + } + } + } + ); + } + + private void registerContainerAppNumMetrics() { + metrics.register("variable.running.application", + new Gauge() { + @Override + public Integer getValue() { + if(getRootQueueMetrics() == null) { + return 0; + } else { + return getRootQueueMetrics().getAppsRunning(); + } + } + } + ); + metrics.register("variable.running.container", + new Gauge() { + @Override + public Integer getValue() { + if(getRootQueueMetrics() == null) { + return 0; + } else { + return getRootQueueMetrics().getAllocatedContainers(); + } + } + } + ); + } + + private void registerSchedulerMetrics() { + samplerLock.lock(); + try { + // counters for scheduler operations + schedulerAllocateCounter = metrics.counter( + "counter.scheduler.operation.allocate"); + schedulerHandleCounter = metrics.counter( + "counter.scheduler.operation.handle"); + schedulerHandleCounterMap = new HashMap(); + for (SchedulerEventType e : SchedulerEventType.values()) { + Counter counter = metrics.counter( + "counter.scheduler.operation.handle." + e); + schedulerHandleCounterMap.put(e, counter); + } + // timers for scheduler operations + int timeWindowSize = conf.getInt( + SLSConfiguration.METRICS_TIMER_WINDOW_SIZE, + SLSConfiguration.METRICS_TIMER_WINDOW_SIZE_DEFAULT); + schedulerAllocateTimer = new Timer( + new SlidingWindowReservoir(timeWindowSize)); + schedulerHandleTimer = new Timer( + new SlidingWindowReservoir(timeWindowSize)); + schedulerHandleTimerMap = new HashMap(); + for (SchedulerEventType e : SchedulerEventType.values()) { + Timer timer = new Timer(new SlidingWindowReservoir(timeWindowSize)); + schedulerHandleTimerMap.put(e, timer); + } + // histogram for scheduler operations (Samplers) + schedulerHistogramList = new ArrayList(); + histogramTimerMap = new HashMap(); + Histogram schedulerAllocateHistogram = new Histogram( + new SlidingWindowReservoir(SAMPLING_SIZE)); + metrics.register("sampler.scheduler.operation.allocate.timecost", + schedulerAllocateHistogram); + schedulerHistogramList.add(schedulerAllocateHistogram); + histogramTimerMap.put(schedulerAllocateHistogram, schedulerAllocateTimer); + Histogram schedulerHandleHistogram = new Histogram( + new SlidingWindowReservoir(SAMPLING_SIZE)); + metrics.register("sampler.scheduler.operation.handle.timecost", + schedulerHandleHistogram); + schedulerHistogramList.add(schedulerHandleHistogram); + histogramTimerMap.put(schedulerHandleHistogram, schedulerHandleTimer); + for (SchedulerEventType e : SchedulerEventType.values()) { + Histogram histogram = new Histogram( + new SlidingWindowReservoir(SAMPLING_SIZE)); + metrics.register( + "sampler.scheduler.operation.handle." + e + ".timecost", + histogram); + schedulerHistogramList.add(histogram); + histogramTimerMap.put(histogram, schedulerHandleTimerMap.get(e)); + } + } finally { + samplerLock.unlock(); + } + } + + private void initMetricsCSVOutput() { + int timeIntervalMS = conf.getInt( + SLSConfiguration.METRICS_RECORD_INTERVAL_MS, + SLSConfiguration.METRICS_RECORD_INTERVAL_MS_DEFAULT); + File dir = new File(metricsOutputDir + "/metrics"); + if(! dir.exists() + && ! dir.mkdirs()) { + LOG.error("Cannot create directory " + dir.getAbsoluteFile()); + } + final CsvReporter reporter = CsvReporter.forRegistry(metrics) + .formatFor(Locale.US) + .convertRatesTo(TimeUnit.SECONDS) + .convertDurationsTo(TimeUnit.MILLISECONDS) + .build(new File(metricsOutputDir + "/metrics")); + reporter.start(timeIntervalMS, TimeUnit.MILLISECONDS); + } + + class HistogramsRunnable implements Runnable { + @Override + public void run() { + samplerLock.lock(); + try { + for (Histogram histogram : schedulerHistogramList) { + Timer timer = histogramTimerMap.get(histogram); + histogram.update((int) timer.getSnapshot().getMean()); + } + } finally { + samplerLock.unlock(); + } + } + } + + class MetricsLogRunnable implements Runnable { + private boolean firstLine = true; + public MetricsLogRunnable() { + try { + metricsLogBW = new BufferedWriter( + new FileWriter(metricsOutputDir + "/realtimetrack.json")); + metricsLogBW.write("["); + } catch (IOException e) { + e.printStackTrace(); + } + } + + @Override + public void run() { + if(running) { + // all WebApp to get real tracking json + String metrics = web.generateRealTimeTrackingMetrics(); + // output + try { + if(firstLine) { + metricsLogBW.write(metrics + EOL); + firstLine = false; + } else { + metricsLogBW.write("," + metrics + EOL); + } + metricsLogBW.flush(); + } catch (IOException e) { + e.printStackTrace(); + } + } + } + } + + // the following functions are used by AMSimulator + public void addAMRuntime(ApplicationId appId, + long traceStartTimeMS, long traceEndTimeMS, + long simulateStartTimeMS, long simulateEndTimeMS) { + + try { + // write job runtime information + StringBuilder sb = new StringBuilder(); + sb.append(appId).append(",").append(traceStartTimeMS).append(",") + .append(traceEndTimeMS).append(",").append(simulateStartTimeMS) + .append(",").append(simulateEndTimeMS); + jobRuntimeLogBW.write(sb.toString() + EOL); + jobRuntimeLogBW.flush(); + } catch (IOException e) { + e.printStackTrace(); + } + } + + private void updateQueueMetrics(String queue, + int releasedMemory, int releasedVCores) { + // update queue counters + SortedMap counterMap = metrics.getCounters(); + if (releasedMemory != 0) { + String name = "counter.queue." + queue + ".allocated.memory"; + if (! counterMap.containsKey(name)) { + metrics.counter(name); + counterMap = metrics.getCounters(); + } + counterMap.get(name).inc(-releasedMemory); + } + if (releasedVCores != 0) { + String name = "counter.queue." + queue + ".allocated.cores"; + if (! counterMap.containsKey(name)) { + metrics.counter(name); + counterMap = metrics.getCounters(); + } + counterMap.get(name).inc(-releasedVCores); + } + } + + public void setQueueSet(Set queues) { + this.queueSet = queues; + } + + public Set getQueueSet() { + return this.queueSet; + } + + public void setTrackedAppSet(Set apps) { + this.trackedAppSet = apps; + } + + public Set getTrackedAppSet() { + return this.trackedAppSet; + } + + public MetricRegistry getMetrics() { + return metrics; + } + + public SchedulerMetrics getSchedulerMetrics() { + return schedulerMetrics; + } + + // API open to out classes + public void addTrackedApp(ApplicationAttemptId appAttemptId, + String oldAppId) { + if (metricsON) { + schedulerMetrics.trackApp(appAttemptId, oldAppId); + } + } + + public void removeTrackedApp(ApplicationAttemptId appAttemptId, + String oldAppId) { + if (metricsON) { + schedulerMetrics.untrackApp(appAttemptId, oldAppId); + } + } + + @Override + public Configuration getConf() { + return conf; + } + + + + +} + diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SchedulerWrapper.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SchedulerWrapper.java new file mode 100644 index 00000000000..44629f5347f --- /dev/null +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/scheduler/SchedulerWrapper.java @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.yarn.sls.scheduler; + +import java.util.Set; + +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; + +import com.codahale.metrics.MetricRegistry; + +public interface SchedulerWrapper { + + public MetricRegistry getMetrics(); + public SchedulerMetrics getSchedulerMetrics(); + public Set getQueueSet(); + public void setQueueSet(Set queues); + public Set getTrackedAppSet(); + public void setTrackedAppSet(Set apps); + public void addTrackedApp(ApplicationAttemptId appAttemptId, + String oldAppId); + public void removeTrackedApp(ApplicationAttemptId appAttemptId, + String oldAppId); + public void addAMRuntime(ApplicationId appId, + long traceStartTimeMS, long traceEndTimeMS, + long simulateStartTimeMS, long simulateEndTimeMS); + +} diff --git a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/web/SLSWebApp.java b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/web/SLSWebApp.java index 123ccea718a..e6dd8467898 100644 --- a/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/web/SLSWebApp.java +++ b/hadoop-tools/hadoop-sls/src/main/java/org/apache/hadoop/yarn/sls/web/SLSWebApp.java @@ -41,6 +41,8 @@ import org.apache.hadoop.yarn.sls.SLSRunner; import org.apache.hadoop.yarn.sls.scheduler.FairSchedulerMetrics; import org.apache.hadoop.yarn.sls.scheduler.ResourceSchedulerWrapper; import org.apache.hadoop.yarn.sls.scheduler.SchedulerMetrics; +import org.apache.hadoop.yarn.sls.scheduler.SchedulerWrapper; + import com.codahale.metrics.Counter; import com.codahale.metrics.Gauge; import com.codahale.metrics.Histogram; @@ -50,7 +52,7 @@ import org.mortbay.jetty.handler.ResourceHandler; public class SLSWebApp extends HttpServlet { private static final long serialVersionUID = 1905162041950251407L; private transient Server server; - private transient ResourceSchedulerWrapper wrapper; + private transient SchedulerWrapper wrapper; private transient MetricRegistry metrics; private transient SchedulerMetrics schedulerMetrics; // metrics objects @@ -90,7 +92,7 @@ public class SLSWebApp extends HttpServlet { } } - public SLSWebApp(ResourceSchedulerWrapper wrapper, int metricsAddressPort) { + public SLSWebApp(SchedulerWrapper wrapper, int metricsAddressPort) { this.wrapper = wrapper; metrics = wrapper.getMetrics(); handleOperTimecostHistogramMap = diff --git a/hadoop-tools/hadoop-sls/src/test/resources/capacity-scheduler.xml b/hadoop-tools/hadoop-sls/src/test/resources/capacity-scheduler.xml new file mode 100644 index 00000000000..61be96ae6d4 --- /dev/null +++ b/hadoop-tools/hadoop-sls/src/test/resources/capacity-scheduler.xml @@ -0,0 +1,60 @@ + + + + + + + + yarn.scheduler.capacity.root.queues + sls_queue_1,sls_queue_2,sls_queue_3 + The queues at the this level (root is the root queue). + + + + + yarn.scheduler.capacity.root.sls_queue_1.capacity + 25 + + + + yarn.scheduler.capacity.root.sls_queue_1.maximum-capacity + 100 + + + + yarn.scheduler.capacity.root.sls_queue_2.capacity + 25 + + + + yarn.scheduler.capacity.root.sls_queue_2.maximum-capacity + 100 + + + + yarn.scheduler.capacity.root.sls_queue_3.capacity + 50 + + + + yarn.scheduler.capacity.root.sls_queue_3.maximum-capacity + 100 + + diff --git a/hadoop-tools/hadoop-sls/src/test/resources/yarn-site.xml b/hadoop-tools/hadoop-sls/src/test/resources/yarn-site.xml index f6c6a4a983d..78aa6f2dd7a 100644 --- a/hadoop-tools/hadoop-sls/src/test/resources/yarn-site.xml +++ b/hadoop-tools/hadoop-sls/src/test/resources/yarn-site.xml @@ -17,7 +17,18 @@ yarn.resourcemanager.scheduler.class - org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler + org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler + + + + + yarn.resourcemanager.scheduler.monitor.enable + true + + + + yarn.resourcemanager.scheduler.monitor.policies + org.apache.hadoop.yarn.server.resourcemanager.monitor.capacity.ProportionalCapacityPreemptionPolicy From a9d80ae59d15dc35ae5975e07251bdbabe083d29 Mon Sep 17 00:00:00 2001 From: Jing Zhao Date: Thu, 19 Dec 2013 00:27:16 +0000 Subject: [PATCH 25/32] HDFS-5661. Browsing FileSystem via web ui, should use datanode's fqdn instead of ip address. Contributed by Benoy Antony. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1552177 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../hadoop/hdfs/server/common/JspHelper.java | 17 +++++++++++++++-- .../hdfs/server/datanode/DatanodeJspHelper.java | 16 ++-------------- 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index deb1a55114a..36c50dc498b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -991,6 +991,9 @@ Release 2.3.0 - UNRELEASED HDFS-5657. race condition causes writeback state error in NFS gateway (brandonli) + HDFS-5661. Browsing FileSystem via web ui, should use datanode's fqdn instead of ip + address. (Benoy Antony via jing9) + Release 2.2.0 - 2013-10-13 INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java index a34f2cf217a..a147c0fb189 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/common/JspHelper.java @@ -117,6 +117,18 @@ public class JspHelper { return 0; } } + + /** + * convenience method for canonicalizing host name. + * @param addr name:port or name + * @return canonicalized host name + */ + public static String canonicalize(String addr) { + // default port 1 is supplied to allow addr without port. + // the port will be ignored. + return NetUtils.createSocketAddr(addr, 1).getAddress() + .getCanonicalHostName(); + } /** * A helper class that generates the correct URL for different schema. @@ -124,10 +136,11 @@ public class JspHelper { */ public static final class Url { public static String authority(String scheme, DatanodeID d) { + String fqdn = canonicalize(d.getIpAddr()); if (scheme.equals("http")) { - return d.getInfoAddr(); + return fqdn + ":" + d.getInfoPort(); } else if (scheme.equals("https")) { - return d.getInfoSecureAddr(); + return fqdn + ":" + d.getInfoSecurePort(); } else { throw new IllegalArgumentException("Unknown scheme:" + scheme); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DatanodeJspHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DatanodeJspHelper.java index c931698a32a..c9e548b4d06 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DatanodeJspHelper.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DatanodeJspHelper.java @@ -77,18 +77,6 @@ public class DatanodeJspHelper { }); } - /** - * Internal convenience method for canonicalizing host name. - * @param addr name:port or name - * @return canonicalized host name - */ - private static String canonicalize(String addr) { - // default port 1 is supplied to allow addr without port. - // the port will be ignored. - return NetUtils.createSocketAddr(addr, 1).getAddress() - .getCanonicalHostName(); - } - /** * Get the default chunk size. * @param conf the configuration @@ -228,7 +216,7 @@ public class DatanodeJspHelper { } } out.print("
Go back to DFS home"); dfs.close(); } @@ -359,7 +347,7 @@ public class DatanodeJspHelper { // generate a table and dump the info out.println("\n"); - String nnCanonicalName = canonicalize(nnAddr); + String nnCanonicalName = JspHelper.canonicalize(nnAddr); for (LocatedBlock cur : blocks) { out.print(""); final String blockidstring = Long.toString(cur.getBlock().getBlockId()); From 93907baa0b033c1431dc7055116746fc9db508cc Mon Sep 17 00:00:00 2001 From: Vinod Kumar Vavilapalli Date: Thu, 19 Dec 2013 02:33:05 +0000 Subject: [PATCH 26/32] YARN-1307. Redesign znode structure for Zookeeper based RM state-store for better organization and scalability. Contributed by Tsuyoshi OZAWA. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1552209 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-yarn-project/CHANGES.txt | 3 + .../recovery/FileSystemRMStateStore.java | 36 +-- .../recovery/MemoryRMStateStore.java | 31 +- .../recovery/NullRMStateStore.java | 10 +- .../recovery/RMStateStore.java | 22 +- .../recovery/ZKRMStateStore.java | 281 ++++++++++++------ .../server/resourcemanager/TestRMRestart.java | 11 +- .../recovery/RMStateStoreTestBase.java | 29 ++ .../recovery/TestFSRMStateStore.java | 10 +- 9 files changed, 277 insertions(+), 156 deletions(-) diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index e8e262caaab..8e5c4636138 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -184,6 +184,9 @@ Release 2.4.0 - UNRELEASED YARN-1446. Changed client API to retry killing application till RM acknowledges so as to account for RM crashes/failover. (Jian He via vinodkv) + YARN-1307. Redesign znode structure for Zookeeper based RM state-store for + better organization and scalability. (Tsuyoshi OZAWA via vinodkv) + OPTIMIZATIONS BUG FIXES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileSystemRMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileSystemRMStateStore.java index 88b1a90bf6f..d60e8ada086 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileSystemRMStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/FileSystemRMStateStore.java @@ -287,11 +287,12 @@ public class FileSystemRMStateStore extends RMStateStore { } @Override - public synchronized void storeApplicationStateInternal(String appId, + public synchronized void storeApplicationStateInternal(ApplicationId appId, ApplicationStateDataPBImpl appStateDataPB) throws Exception { - Path appDirPath = getAppDir(rmAppRoot, appId); + String appIdStr = appId.toString(); + Path appDirPath = getAppDir(rmAppRoot, appIdStr); fs.mkdirs(appDirPath); - Path nodeCreatePath = getNodePath(appDirPath, appId); + Path nodeCreatePath = getNodePath(appDirPath, appIdStr); LOG.info("Storing info for app: " + appId + " at: " + nodeCreatePath); byte[] appStateData = appStateDataPB.getProto().toByteArray(); @@ -306,10 +307,11 @@ public class FileSystemRMStateStore extends RMStateStore { } @Override - public synchronized void updateApplicationStateInternal(String appId, + public synchronized void updateApplicationStateInternal(ApplicationId appId, ApplicationStateDataPBImpl appStateDataPB) throws Exception { - Path appDirPath = getAppDir(rmAppRoot, appId); - Path nodeCreatePath = getNodePath(appDirPath, appId); + String appIdStr = appId.toString(); + Path appDirPath = getAppDir(rmAppRoot, appIdStr); + Path nodeCreatePath = getNodePath(appDirPath, appIdStr); LOG.info("Updating info for app: " + appId + " at: " + nodeCreatePath); byte[] appStateData = appStateDataPB.getProto().toByteArray(); @@ -325,14 +327,13 @@ public class FileSystemRMStateStore extends RMStateStore { @Override public synchronized void storeApplicationAttemptStateInternal( - String attemptId, ApplicationAttemptStateDataPBImpl attemptStateDataPB) + ApplicationAttemptId appAttemptId, + ApplicationAttemptStateDataPBImpl attemptStateDataPB) throws Exception { - ApplicationAttemptId appAttemptId = - ConverterUtils.toApplicationAttemptId(attemptId); Path appDirPath = getAppDir(rmAppRoot, appAttemptId.getApplicationId().toString()); - Path nodeCreatePath = getNodePath(appDirPath, attemptId); - LOG.info("Storing info for attempt: " + attemptId + " at: " + Path nodeCreatePath = getNodePath(appDirPath, appAttemptId.toString()); + LOG.info("Storing info for attempt: " + appAttemptId + " at: " + nodeCreatePath); byte[] attemptStateData = attemptStateDataPB.getProto().toByteArray(); try { @@ -340,21 +341,20 @@ public class FileSystemRMStateStore extends RMStateStore { // based on whether we have lost the right to write to FS writeFile(nodeCreatePath, attemptStateData); } catch (Exception e) { - LOG.info("Error storing info for attempt: " + attemptId, e); + LOG.info("Error storing info for attempt: " + appAttemptId, e); throw e; } } @Override public synchronized void updateApplicationAttemptStateInternal( - String attemptId, ApplicationAttemptStateDataPBImpl attemptStateDataPB) + ApplicationAttemptId appAttemptId, + ApplicationAttemptStateDataPBImpl attemptStateDataPB) throws Exception { - ApplicationAttemptId appAttemptId = - ConverterUtils.toApplicationAttemptId(attemptId); Path appDirPath = getAppDir(rmAppRoot, appAttemptId.getApplicationId().toString()); - Path nodeCreatePath = getNodePath(appDirPath, attemptId); - LOG.info("Updating info for attempt: " + attemptId + " at: " + Path nodeCreatePath = getNodePath(appDirPath, appAttemptId.toString()); + LOG.info("Updating info for attempt: " + appAttemptId + " at: " + nodeCreatePath); byte[] attemptStateData = attemptStateDataPB.getProto().toByteArray(); try { @@ -362,7 +362,7 @@ public class FileSystemRMStateStore extends RMStateStore { // based on whether we have lost the right to write to FS updateFile(nodeCreatePath, attemptStateData); } catch (Exception e) { - LOG.info("Error updating info for attempt: " + attemptId, e); + LOG.info("Error updating info for attempt: " + appAttemptId, e); throw e; } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/MemoryRMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/MemoryRMStateStore.java index 961bec3165a..5a20ff28b95 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/MemoryRMStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/MemoryRMStateStore.java @@ -80,7 +80,7 @@ public class MemoryRMStateStore extends RMStateStore { } @Override - public void storeApplicationStateInternal(String appId, + public void storeApplicationStateInternal(ApplicationId appId, ApplicationStateDataPBImpl appStateData) throws Exception { ApplicationState appState = @@ -88,11 +88,11 @@ public class MemoryRMStateStore extends RMStateStore { appStateData.getStartTime(), appStateData.getApplicationSubmissionContext(), appStateData.getUser()); - state.appState.put(appState.getAppId(), appState); + state.appState.put(appId, appState); } @Override - public void updateApplicationStateInternal(String appId, + public void updateApplicationStateInternal(ApplicationId appId, ApplicationStateDataPBImpl appStateData) throws Exception { ApplicationState updatedAppState = new ApplicationState(appStateData.getSubmitTime(), @@ -102,21 +102,19 @@ public class MemoryRMStateStore extends RMStateStore { appStateData.getDiagnostics(), appStateData.getFinishTime()); LOG.info("Updating final state " + appStateData.getState() + " for app: " + appId); - ApplicationId applicationId = updatedAppState.getAppId(); - if (state.appState.get(applicationId) != null) { + if (state.appState.get(appId) != null) { // add the earlier attempts back updatedAppState.attempts - .putAll(state.appState.get(applicationId).attempts); + .putAll(state.appState.get(appId).attempts); } - state.appState.put(applicationId, updatedAppState); + state.appState.put(appId, updatedAppState); } @Override - public synchronized void storeApplicationAttemptStateInternal(String attemptIdStr, - ApplicationAttemptStateDataPBImpl attemptStateData) - throws Exception { - ApplicationAttemptId attemptId = ConverterUtils - .toApplicationAttemptId(attemptIdStr); + public synchronized void storeApplicationAttemptStateInternal( + ApplicationAttemptId appAttemptId, + ApplicationAttemptStateDataPBImpl attemptStateData) + throws Exception { Credentials credentials = null; if(attemptStateData.getAppAttemptTokens() != null){ DataInputByteBuffer dibb = new DataInputByteBuffer(); @@ -125,7 +123,7 @@ public class MemoryRMStateStore extends RMStateStore { credentials.readTokenStorageStream(dibb); } ApplicationAttemptState attemptState = - new ApplicationAttemptState(attemptId, + new ApplicationAttemptState(appAttemptId, attemptStateData.getMasterContainer(), credentials, attemptStateData.getStartTime()); @@ -139,10 +137,9 @@ public class MemoryRMStateStore extends RMStateStore { @Override public synchronized void updateApplicationAttemptStateInternal( - String attemptIdStr, ApplicationAttemptStateDataPBImpl attemptStateData) + ApplicationAttemptId appAttemptId, + ApplicationAttemptStateDataPBImpl attemptStateData) throws Exception { - ApplicationAttemptId attemptId = - ConverterUtils.toApplicationAttemptId(attemptIdStr); Credentials credentials = null; if (attemptStateData.getAppAttemptTokens() != null) { DataInputByteBuffer dibb = new DataInputByteBuffer(); @@ -151,7 +148,7 @@ public class MemoryRMStateStore extends RMStateStore { credentials.readTokenStorageStream(dibb); } ApplicationAttemptState updatedAttemptState = - new ApplicationAttemptState(attemptId, + new ApplicationAttemptState(appAttemptId, attemptStateData.getMasterContainer(), credentials, attemptStateData.getStartTime(), attemptStateData.getState(), attemptStateData.getFinalTrackingUrl(), diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/NullRMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/NullRMStateStore.java index 3098b260b7b..af28a0152ef 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/NullRMStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/NullRMStateStore.java @@ -22,6 +22,8 @@ package org.apache.hadoop.yarn.server.resourcemanager.recovery; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.security.token.delegation.DelegationKey; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.security.client.RMDelegationTokenIdentifier; import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.RMStateVersion; import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.ApplicationAttemptStateDataPBImpl; @@ -51,13 +53,13 @@ public class NullRMStateStore extends RMStateStore { } @Override - protected void storeApplicationStateInternal(String appId, + protected void storeApplicationStateInternal(ApplicationId appId, ApplicationStateDataPBImpl appStateData) throws Exception { // Do nothing } @Override - protected void storeApplicationAttemptStateInternal(String attemptId, + protected void storeApplicationAttemptStateInternal(ApplicationAttemptId attemptId, ApplicationAttemptStateDataPBImpl attemptStateData) throws Exception { // Do nothing } @@ -92,13 +94,13 @@ public class NullRMStateStore extends RMStateStore { } @Override - protected void updateApplicationStateInternal(String appId, + protected void updateApplicationStateInternal(ApplicationId appId, ApplicationStateDataPBImpl appStateData) throws Exception { // Do nothing } @Override - protected void updateApplicationAttemptStateInternal(String attemptId, + protected void updateApplicationAttemptStateInternal(ApplicationAttemptId attemptId, ApplicationAttemptStateDataPBImpl attemptStateData) throws Exception { } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java index a8452642ce5..dcfdad37033 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStore.java @@ -387,10 +387,10 @@ public abstract class RMStateStore extends AbstractService { * Derived classes must implement this method to store the state of an * application. */ - protected abstract void storeApplicationStateInternal(String appId, + protected abstract void storeApplicationStateInternal(ApplicationId appId, ApplicationStateDataPBImpl appStateData) throws Exception; - protected abstract void updateApplicationStateInternal(String appId, + protected abstract void updateApplicationStateInternal(ApplicationId appId, ApplicationStateDataPBImpl appStateData) throws Exception; @SuppressWarnings("unchecked") @@ -424,10 +424,12 @@ public abstract class RMStateStore extends AbstractService { * Derived classes must implement this method to store the state of an * application attempt */ - protected abstract void storeApplicationAttemptStateInternal(String attemptId, + protected abstract void storeApplicationAttemptStateInternal( + ApplicationAttemptId attemptId, ApplicationAttemptStateDataPBImpl attemptStateData) throws Exception; - protected abstract void updateApplicationAttemptStateInternal(String attemptId, + protected abstract void updateApplicationAttemptStateInternal( + ApplicationAttemptId attemptId, ApplicationAttemptStateDataPBImpl attemptStateData) throws Exception; /** @@ -592,11 +594,11 @@ public abstract class RMStateStore extends AbstractService { LOG.info("Storing info for app: " + appId); try { if (event.getType().equals(RMStateStoreEventType.STORE_APP)) { - storeApplicationStateInternal(appId.toString(), appStateData); + storeApplicationStateInternal(appId, appStateData); notifyDoneStoringApplication(appId, storedException); } else { assert event.getType().equals(RMStateStoreEventType.UPDATE_APP); - updateApplicationStateInternal(appId.toString(), appStateData); + updateApplicationStateInternal(appId, appStateData); notifyDoneUpdatingApplication(appId, storedException); } } catch (Exception e) { @@ -637,15 +639,15 @@ public abstract class RMStateStore extends AbstractService { LOG.debug("Storing info for attempt: " + attemptState.getAttemptId()); } if (event.getType().equals(RMStateStoreEventType.STORE_APP_ATTEMPT)) { - storeApplicationAttemptStateInternal(attemptState.getAttemptId() - .toString(), attemptStateData); + storeApplicationAttemptStateInternal(attemptState.getAttemptId(), + attemptStateData); notifyDoneStoringApplicationAttempt(attemptState.getAttemptId(), storedException); } else { assert event.getType().equals( RMStateStoreEventType.UPDATE_APP_ATTEMPT); - updateApplicationAttemptStateInternal(attemptState.getAttemptId() - .toString(), attemptStateData); + updateApplicationAttemptStateInternal(attemptState.getAttemptId(), + attemptStateData); notifyDoneUpdatingApplicationAttempt(attemptState.getAttemptId(), storedException); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java index 87377814c8a..b8b3d1e6fc7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/ZKRMStateStore.java @@ -78,16 +78,51 @@ public class ZKRMStateStore extends RMStateStore { protected static final String ROOT_ZNODE_NAME = "ZKRMStateRoot"; protected static final RMStateVersion CURRENT_VERSION_INFO = RMStateVersion .newInstance(1, 0); + private static final String RM_DELEGATION_TOKENS_ROOT_ZNODE_NAME = + "RMDelegationTokensRoot"; + private static final String RM_DT_SEQUENTIAL_NUMBER_ZNODE_NAME = + "RMDTSequentialNumber"; + private static final String RM_DT_MASTER_KEYS_ROOT_ZNODE_NAME = + "RMDTMasterKeysRoot"; private int numRetries; private String zkHostPort = null; private int zkSessionTimeout; private long zkRetryInterval; private List zkAcl; + + /** + * + * ROOT_DIR_PATH + * |--- VERSION_INFO + * |--- RM_ZK_FENCING_LOCK + * |--- RM_APP_ROOT + * | |----- (#ApplicationId1) + * | | |----- (#ApplicationAttemptIds) + * | | + * | |----- (#ApplicationId2) + * | | |----- (#ApplicationAttemptIds) + * | .... + * | + * |--- RM_DT_SECRET_MANAGER_ROOT + * |----- RM_DT_SEQUENTIAL_NUMBER_ZNODE_NAME + * |----- RM_DELEGATION_TOKENS_ROOT_ZNODE_NAME + * | |----- Token_1 + * | |----- Token_2 + * | .... + * | + * |----- RM_DT_MASTER_KEYS_ROOT_ZNODE_NAME + * | |----- Key_1 + * | |----- Key_2 + * .... + * + */ private String zkRootNodePath; - private String rmDTSecretManagerRoot; private String rmAppRoot; - private String dtSequenceNumberPath = null; + private String rmDTSecretManagerRoot; + private String dtMasterKeysRootPath; + private String delegationTokensRootPath; + private String dtSequenceNumberPath; @VisibleForTesting protected String znodeWorkingPath; @@ -178,12 +213,11 @@ public class ZKRMStateStore extends RMStateStore { throw bafe; } - zkRootNodePath = znodeWorkingPath + "/" + ROOT_ZNODE_NAME; - rmDTSecretManagerRoot = zkRootNodePath + "/" + RM_DT_SECRET_MANAGER_ROOT; - rmAppRoot = zkRootNodePath + "/" + RM_APP_ROOT; + zkRootNodePath = getNodePath(znodeWorkingPath, ROOT_ZNODE_NAME); + rmAppRoot = getNodePath(zkRootNodePath, RM_APP_ROOT); /* Initialize fencing related paths, acls, and ops */ - fencingNodePath = zkRootNodePath + "/" + FENCING_LOCK; + fencingNodePath = getNodePath(zkRootNodePath, FENCING_LOCK); createFencingNodePathOp = Op.create(fencingNodePath, new byte[0], zkAcl, CreateMode.PERSISTENT); deleteFencingNodePathOp = Op.delete(fencingNodePath, -1); @@ -204,6 +238,15 @@ public class ZKRMStateStore extends RMStateStore { zkRootNodeAcl = constructZkRootNodeACL(conf, zkAcl); } } + + rmDTSecretManagerRoot = + getNodePath(zkRootNodePath, RM_DT_SECRET_MANAGER_ROOT); + dtMasterKeysRootPath = getNodePath(rmDTSecretManagerRoot, + RM_DT_MASTER_KEYS_ROOT_ZNODE_NAME); + delegationTokensRootPath = getNodePath(rmDTSecretManagerRoot, + RM_DELEGATION_TOKENS_ROOT_ZNODE_NAME); + dtSequenceNumberPath = getNodePath(rmDTSecretManagerRoot, + RM_DT_SEQUENTIAL_NUMBER_ZNODE_NAME); } @Override @@ -217,8 +260,11 @@ public class ZKRMStateStore extends RMStateStore { if (HAUtil.isHAEnabled(getConfig())){ fence(); } - createRootDir(rmDTSecretManagerRoot); createRootDir(rmAppRoot); + createRootDir(rmDTSecretManagerRoot); + createRootDir(dtMasterKeysRootPath); + createRootDir(delegationTokensRootPath); + createRootDir(dtSequenceNumberPath); } private void createRootDir(final String rootPath) throws Exception { @@ -350,26 +396,69 @@ public class ZKRMStateStore extends RMStateStore { private synchronized void loadRMDTSecretManagerState(RMState rmState) throws Exception { - List childNodes = - getChildrenWithRetries(rmDTSecretManagerRoot, true); + loadRMDelegationKeyState(rmState); + loadRMSequentialNumberState(rmState); + loadRMDelegationTokenState(rmState); + } + private void loadRMDelegationKeyState(RMState rmState) throws Exception { + List childNodes = + getChildrenWithRetries(dtMasterKeysRootPath, true); for (String childNodeName : childNodes) { - if (childNodeName.startsWith(DELEGATION_TOKEN_SEQUENCE_NUMBER_PREFIX)) { - rmState.rmSecretManagerState.dtSequenceNumber = - Integer.parseInt(childNodeName.split("_")[1]); + String childNodePath = getNodePath(dtMasterKeysRootPath, childNodeName); + byte[] childData = getDataWithRetries(childNodePath, true); + + if (childData == null) { + LOG.warn("Content of " + childNodePath + " is broken."); continue; } - String childNodePath = getNodePath(rmDTSecretManagerRoot, childNodeName); - byte[] childData = getDataWithRetries(childNodePath, true); ByteArrayInputStream is = new ByteArrayInputStream(childData); DataInputStream fsIn = new DataInputStream(is); + try { if (childNodeName.startsWith(DELEGATION_KEY_PREFIX)) { DelegationKey key = new DelegationKey(); key.readFields(fsIn); rmState.rmSecretManagerState.masterKeyState.add(key); - } else if (childNodeName.startsWith(DELEGATION_TOKEN_PREFIX)) { + } + } finally { + is.close(); + } + } + } + + private void loadRMSequentialNumberState(RMState rmState) throws Exception { + byte[] seqData = getDataWithRetries(dtSequenceNumberPath, false); + if (seqData != null) { + ByteArrayInputStream seqIs = new ByteArrayInputStream(seqData); + DataInputStream seqIn = new DataInputStream(seqIs); + + try { + rmState.rmSecretManagerState.dtSequenceNumber = seqIn.readInt(); + } finally { + seqIn.close(); + } + } + } + + private void loadRMDelegationTokenState(RMState rmState) throws Exception { + List childNodes = zkClient.getChildren(delegationTokensRootPath, true); + for (String childNodeName : childNodes) { + String childNodePath = + getNodePath(delegationTokensRootPath, childNodeName); + byte[] childData = getDataWithRetries(childNodePath, true); + + if (childData == null) { + LOG.warn("Content of " + childNodePath + " is broken."); + continue; + } + + ByteArrayInputStream is = new ByteArrayInputStream(childData); + DataInputStream fsIn = new DataInputStream(is); + + try { + if (childNodeName.startsWith(DELEGATION_TOKEN_PREFIX)) { RMDelegationTokenIdentifier identifier = new RMDelegationTokenIdentifier(); identifier.readFields(fsIn); @@ -385,8 +474,6 @@ public class ZKRMStateStore extends RMStateStore { private synchronized void loadRMAppState(RMState rmState) throws Exception { List childNodes = getChildrenWithRetries(rmAppRoot, true); - List attempts = - new ArrayList(); for (String childNodeName : childNodes) { String childNodePath = getNodePath(rmAppRoot, childNodeName); byte[] childData = getDataWithRetries(childNodePath, true); @@ -411,17 +498,28 @@ public class ZKRMStateStore extends RMStateStore { "from the application id"); } rmState.appState.put(appId, appState); - } else if (childNodeName - .startsWith(ApplicationAttemptId.appAttemptIdStrPrefix)) { - // attempt - if (LOG.isDebugEnabled()) { - LOG.debug("Loading application attempt from znode: " + childNodeName); - } + loadApplicationAttemptState(appState, appId); + } else { + LOG.info("Unknown child node with name: " + childNodeName); + } + } + } + + private void loadApplicationAttemptState(ApplicationState appState, + ApplicationId appId) + throws Exception { + String appPath = getNodePath(rmAppRoot, appId.toString()); + List attempts = getChildrenWithRetries(appPath, false); + for (String attemptIDStr : attempts) { + if (attemptIDStr.startsWith(ApplicationAttemptId.appAttemptIdStrPrefix)) { + String attemptPath = getNodePath(appPath, attemptIDStr); + byte[] attemptData = getDataWithRetries(attemptPath, true); + ApplicationAttemptId attemptId = - ConverterUtils.toApplicationAttemptId(childNodeName); + ConverterUtils.toApplicationAttemptId(attemptIDStr); ApplicationAttemptStateDataPBImpl attemptStateData = new ApplicationAttemptStateDataPBImpl( - ApplicationAttemptStateDataProto.parseFrom(childData)); + ApplicationAttemptStateDataProto.parseFrom(attemptData)); Credentials credentials = null; if (attemptStateData.getAppAttemptTokens() != null) { credentials = new Credentials(); @@ -429,47 +527,26 @@ public class ZKRMStateStore extends RMStateStore { dibb.reset(attemptStateData.getAppAttemptTokens()); credentials.readTokenStorageStream(dibb); } + ApplicationAttemptState attemptState = new ApplicationAttemptState(attemptId, - attemptStateData.getMasterContainer(), credentials, - attemptStateData.getStartTime(), - attemptStateData.getState(), - attemptStateData.getFinalTrackingUrl(), - attemptStateData.getDiagnostics(), - attemptStateData.getFinalApplicationStatus()); - if (!attemptId.equals(attemptState.getAttemptId())) { - throw new YarnRuntimeException("The child node name is different " + - "from the application attempt id"); - } - attempts.add(attemptState); - } else { - LOG.info("Unknown child node with name: " + childNodeName); - } - } + attemptStateData.getMasterContainer(), credentials, + attemptStateData.getStartTime(), + attemptStateData.getState(), + attemptStateData.getFinalTrackingUrl(), + attemptStateData.getDiagnostics(), + attemptStateData.getFinalApplicationStatus()); - // go through all attempts and add them to their apps - for (ApplicationAttemptState attemptState : attempts) { - ApplicationId appId = attemptState.getAttemptId().getApplicationId(); - ApplicationState appState = rmState.appState.get(appId); - if (appState != null) { appState.attempts.put(attemptState.getAttemptId(), attemptState); - } else { - // the application znode may have been removed when the application - // completed but the RM might have stopped before it could remove the - // application attempt znodes - LOG.info("Application node not found for attempt: " - + attemptState.getAttemptId()); - deleteWithRetries( - getNodePath(rmAppRoot, attemptState.getAttemptId().toString()), -1); } } LOG.info("Done Loading applications from ZK state store"); } @Override - public synchronized void storeApplicationStateInternal(String appId, + public synchronized void storeApplicationStateInternal(ApplicationId appId, ApplicationStateDataPBImpl appStateDataPB) throws Exception { - String nodeCreatePath = getNodePath(rmAppRoot, appId); + String nodeCreatePath = getNodePath(rmAppRoot, appId.toString()); if (LOG.isDebugEnabled()) { LOG.debug("Storing info for app: " + appId + " at: " + nodeCreatePath); @@ -481,25 +558,29 @@ public class ZKRMStateStore extends RMStateStore { } @Override - public synchronized void updateApplicationStateInternal(String appId, + public synchronized void updateApplicationStateInternal(ApplicationId appId, ApplicationStateDataPBImpl appStateDataPB) throws Exception { - String nodeCreatePath = getNodePath(rmAppRoot, appId); + String nodeUpdatePath = getNodePath(rmAppRoot, appId.toString()); if (LOG.isDebugEnabled()) { LOG.debug("Storing final state info for app: " + appId + " at: " - + nodeCreatePath); + + nodeUpdatePath); } byte[] appStateData = appStateDataPB.getProto().toByteArray(); - setDataWithRetries(nodeCreatePath, appStateData, 0); + setDataWithRetries(nodeUpdatePath, appStateData, 0); } @Override public synchronized void storeApplicationAttemptStateInternal( - String attemptId, ApplicationAttemptStateDataPBImpl attemptStateDataPB) + ApplicationAttemptId appAttemptId, + ApplicationAttemptStateDataPBImpl attemptStateDataPB) throws Exception { - String nodeCreatePath = getNodePath(rmAppRoot, attemptId); + String appDirPath = getNodePath(rmAppRoot, + appAttemptId.getApplicationId().toString()); + String nodeCreatePath = getNodePath(appDirPath, appAttemptId.toString()); + if (LOG.isDebugEnabled()) { - LOG.debug("Storing info for attempt: " + attemptId + " at: " + LOG.debug("Storing info for attempt: " + appAttemptId + " at: " + nodeCreatePath); } byte[] attemptStateData = attemptStateDataPB.getProto().toByteArray(); @@ -509,31 +590,36 @@ public class ZKRMStateStore extends RMStateStore { @Override public synchronized void updateApplicationAttemptStateInternal( - String attemptId, ApplicationAttemptStateDataPBImpl attemptStateDataPB) + ApplicationAttemptId appAttemptId, + ApplicationAttemptStateDataPBImpl attemptStateDataPB) throws Exception { - String nodeCreatePath = getNodePath(rmAppRoot, attemptId); + String appIdStr = appAttemptId.getApplicationId().toString(); + String appAttemptIdStr = appAttemptId.toString(); + String appDirPath = getNodePath(rmAppRoot, appIdStr); + String nodeUpdatePath = getNodePath(appDirPath, appAttemptIdStr); if (LOG.isDebugEnabled()) { - LOG.debug("Storing final state info for attempt: " + attemptId + " at: " - + nodeCreatePath); + LOG.debug("Storing final state info for attempt: " + appAttemptIdStr + + " at: " + nodeUpdatePath); } byte[] attemptStateData = attemptStateDataPB.getProto().toByteArray(); - setDataWithRetries(nodeCreatePath, attemptStateData, 0); + setDataWithRetries(nodeUpdatePath, attemptStateData, 0); } @Override public synchronized void removeApplicationStateInternal(ApplicationState appState) throws Exception { String appId = appState.getAppId().toString(); - String nodeRemovePath = getNodePath(rmAppRoot, appId); + String appIdRemovePath = getNodePath(rmAppRoot, appId); ArrayList opList = new ArrayList(); - opList.add(Op.delete(nodeRemovePath, -1)); for (ApplicationAttemptId attemptId : appState.attempts.keySet()) { - String attemptRemovePath = getNodePath(rmAppRoot, attemptId.toString()); + String attemptRemovePath = getNodePath(appIdRemovePath, attemptId.toString()); opList.add(Op.delete(attemptRemovePath, -1)); } + opList.add(Op.delete(appIdRemovePath, -1)); + if (LOG.isDebugEnabled()) { - LOG.debug("Removing info for app: " + appId + " at: " + nodeRemovePath + LOG.debug("Removing info for app: " + appId + " at: " + appIdRemovePath + " and its attempts."); } doMultiWithRetries(opList); @@ -546,38 +632,37 @@ public class ZKRMStateStore extends RMStateStore { ArrayList opList = new ArrayList(); // store RM delegation token String nodeCreatePath = - getNodePath(rmDTSecretManagerRoot, DELEGATION_TOKEN_PREFIX + getNodePath(delegationTokensRootPath, DELEGATION_TOKEN_PREFIX + rmDTIdentifier.getSequenceNumber()); - ByteArrayOutputStream os = new ByteArrayOutputStream(); - DataOutputStream fsOut = new DataOutputStream(os); + ByteArrayOutputStream tokenOs = new ByteArrayOutputStream(); + DataOutputStream tokenOut = new DataOutputStream(tokenOs); + ByteArrayOutputStream seqOs = new ByteArrayOutputStream(); + DataOutputStream seqOut = new DataOutputStream(seqOs); + try { - rmDTIdentifier.write(fsOut); - fsOut.writeLong(renewDate); + rmDTIdentifier.write(tokenOut); + tokenOut.writeLong(renewDate); if (LOG.isDebugEnabled()) { LOG.debug("Storing RMDelegationToken_" + rmDTIdentifier.getSequenceNumber()); } - opList.add(Op.create(nodeCreatePath, os.toByteArray(), zkAcl, + + opList.add(Op.create(nodeCreatePath, tokenOs.toByteArray(), zkAcl, CreateMode.PERSISTENT)); + + + seqOut.writeInt(latestSequenceNumber); + if (LOG.isDebugEnabled()) { + LOG.debug("Storing " + dtSequenceNumberPath + + ". SequenceNumber: " + latestSequenceNumber); + } + + opList.add(Op.setData(dtSequenceNumberPath, seqOs.toByteArray(), -1)); } finally { - os.close(); + tokenOs.close(); + seqOs.close(); } - // store sequence number - String latestSequenceNumberPath = - getNodePath(rmDTSecretManagerRoot, - DELEGATION_TOKEN_SEQUENCE_NUMBER_PREFIX + latestSequenceNumber); - if (LOG.isDebugEnabled()) { - LOG.debug("Storing " + DELEGATION_TOKEN_SEQUENCE_NUMBER_PREFIX + - latestSequenceNumber); - } - - if (dtSequenceNumberPath != null) { - opList.add(Op.delete(dtSequenceNumberPath, -1)); - } - opList.add(Op.create(latestSequenceNumberPath, null, zkAcl, - CreateMode.PERSISTENT)); - dtSequenceNumberPath = latestSequenceNumberPath; doMultiWithRetries(opList); } @@ -585,7 +670,7 @@ public class ZKRMStateStore extends RMStateStore { protected synchronized void removeRMDelegationTokenState( RMDelegationTokenIdentifier rmDTIdentifier) throws Exception { String nodeRemovePath = - getNodePath(rmDTSecretManagerRoot, DELEGATION_TOKEN_PREFIX + getNodePath(delegationTokensRootPath, DELEGATION_TOKEN_PREFIX + rmDTIdentifier.getSequenceNumber()); if (LOG.isDebugEnabled()) { LOG.debug("Removing RMDelegationToken_" @@ -598,7 +683,7 @@ public class ZKRMStateStore extends RMStateStore { protected synchronized void storeRMDTMasterKeyState( DelegationKey delegationKey) throws Exception { String nodeCreatePath = - getNodePath(rmDTSecretManagerRoot, DELEGATION_KEY_PREFIX + getNodePath(dtMasterKeysRootPath, DELEGATION_KEY_PREFIX + delegationKey.getKeyId()); ByteArrayOutputStream os = new ByteArrayOutputStream(); DataOutputStream fsOut = new DataOutputStream(os); @@ -618,7 +703,7 @@ public class ZKRMStateStore extends RMStateStore { protected synchronized void removeRMDTMasterKeyState( DelegationKey delegationKey) throws Exception { String nodeRemovePath = - getNodePath(rmDTSecretManagerRoot, DELEGATION_KEY_PREFIX + getNodePath(dtMasterKeysRootPath, DELEGATION_KEY_PREFIX + delegationKey.getKeyId()); if (LOG.isDebugEnabled()) { LOG.debug("Removing RMDelegationKey_" + delegationKey.getKeyId()); @@ -757,8 +842,7 @@ public class ZKRMStateStore extends RMStateStore { return new ZKAction() { @Override public byte[] run() throws KeeperException, InterruptedException { - Stat stat = new Stat(); - return zkClient.getData(path, watch, stat); + return zkClient.getData(path, watch, null); } }.runWithRetries(); } @@ -865,4 +949,5 @@ public class ZKRMStateStore extends RMStateStore { zk.register(new ForwardingWatcher()); return zk; } + } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java index d273cab3e6e..fe220c07ebd 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/TestRMRestart.java @@ -683,14 +683,14 @@ public class TestRMRestart { MemoryRMStateStore memStore = new MemoryRMStateStore() { @Override public synchronized void storeApplicationAttemptStateInternal( - String attemptIdStr, + ApplicationAttemptId attemptId, ApplicationAttemptStateDataPBImpl attemptStateData) throws Exception { // ignore attempt saving request. } @Override public synchronized void updateApplicationAttemptStateInternal( - String attemptIdStr, + ApplicationAttemptId attemptId, ApplicationAttemptStateDataPBImpl attemptStateData) throws Exception { // ignore attempt saving request. } @@ -1540,7 +1540,7 @@ public class TestRMRestart { public int updateAttempt = 0; @Override - public void updateApplicationStateInternal(String appId, + public void updateApplicationStateInternal(ApplicationId appId, ApplicationStateDataPBImpl appStateData) throws Exception { updateApp = ++count; super.updateApplicationStateInternal(appId, appStateData); @@ -1548,11 +1548,12 @@ public class TestRMRestart { @Override public synchronized void - updateApplicationAttemptStateInternal(String attemptIdStr, + updateApplicationAttemptStateInternal( + ApplicationAttemptId attemptId, ApplicationAttemptStateDataPBImpl attemptStateData) throws Exception { updateAttempt = ++count; - super.updateApplicationAttemptStateInternal(attemptIdStr, + super.updateApplicationAttemptStateInternal(attemptId, attemptStateData); } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreTestBase.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreTestBase.java index ff110b31f2f..30cdbc157ff 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreTestBase.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/RMStateStoreTestBase.java @@ -234,6 +234,12 @@ public class RMStateStoreTestBase extends ClientBaseWithFixes{ attempts.put(attemptIdRemoved, mockRemovedAttempt); store.removeApplication(mockRemovedApp); + // remove application directory recursively. + storeApp(store, appIdRemoved, submitTime, startTime); + storeAttempt(store, attemptIdRemoved, + "container_1352994193343_0002_01_000001", null, null, dispatcher); + store.removeApplication(mockRemovedApp); + // let things settle down Thread.sleep(1000); store.close(); @@ -373,7 +379,30 @@ public class RMStateStoreTestBase extends ClientBaseWithFixes{ Assert.assertEquals(keySet, secretManagerState.getMasterKeyState()); Assert.assertEquals(sequenceNumber, secretManagerState.getDTSequenceNumber()); + + // check to delete delegationKey + store.removeRMDTMasterKey(key); + keySet.clear(); + RMDTSecretManagerState noKeySecretManagerState = + store.loadState().getRMDTSecretManagerState(); + Assert.assertEquals(token1, noKeySecretManagerState.getTokenState()); + Assert.assertEquals(keySet, noKeySecretManagerState.getMasterKeyState()); + Assert.assertEquals(sequenceNumber, + noKeySecretManagerState.getDTSequenceNumber()); + + // check to delete delegationToken + store.removeRMDelegationToken(dtId1, sequenceNumber); + RMDTSecretManagerState noKeyAndTokenSecretManagerState = + store.loadState().getRMDTSecretManagerState(); + token1.clear(); + Assert.assertEquals(token1, + noKeyAndTokenSecretManagerState.getTokenState()); + Assert.assertEquals(keySet, + noKeyAndTokenSecretManagerState.getMasterKeyState()); + Assert.assertEquals(sequenceNumber, + noKeySecretManagerState.getDTSequenceNumber()); store.close(); + } private Token generateAMRMToken( diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestFSRMStateStore.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestFSRMStateStore.java index 27e8411cc1e..84275523188 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestFSRMStateStore.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/test/java/org/apache/hadoop/yarn/server/resourcemanager/recovery/TestFSRMStateStore.java @@ -35,6 +35,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.conf.YarnConfiguration; import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.RMStateVersion; import org.apache.hadoop.yarn.server.resourcemanager.recovery.records.impl.pb.ApplicationStateDataPBImpl; @@ -178,10 +179,11 @@ public class TestFSRMStateStore extends RMStateStoreTestBase { @Override public void run() { try { - store.storeApplicationStateInternal("application1", - (ApplicationStateDataPBImpl) ApplicationStateDataPBImpl - .newApplicationStateData(111, 111, "user", null, - RMAppState.ACCEPTED, "diagnostics", 333)); + store.storeApplicationStateInternal( + ApplicationId.newInstance(100L, 1), + (ApplicationStateDataPBImpl) ApplicationStateDataPBImpl + .newApplicationStateData(111, 111, "user", null, + RMAppState.ACCEPTED, "diagnostics", 333)); } catch (Exception e) { // TODO 0 datanode exception will not be retried by dfs client, fix // that separately. From b0947f93056939d419882c13d4acf3e935a66e62 Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Thu, 19 Dec 2013 05:21:46 +0000 Subject: [PATCH 27/32] HDFS-5663 make the retry time and interval value configurable in openInfo() git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1552232 13f79535-47bb-0310-9956-ffa450edef68 --- .../main/java/org/apache/hadoop/hdfs/DFSClient.java | 12 ++++++++++-- .../java/org/apache/hadoop/hdfs/DFSConfigKeys.java | 4 ++++ .../java/org/apache/hadoop/hdfs/DFSInputStream.java | 4 ++-- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java index 49d197f74bc..ab98fa07a4d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSClient.java @@ -99,10 +99,10 @@ import org.apache.hadoop.fs.MD5MD5CRC32CastagnoliFileChecksum; import org.apache.hadoop.fs.MD5MD5CRC32FileChecksum; import org.apache.hadoop.fs.MD5MD5CRC32GzipFileChecksum; import org.apache.hadoop.fs.Options; -import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.Options.ChecksumOpt; import org.apache.hadoop.fs.ParentNotDirectoryException; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.UnresolvedLinkException; import org.apache.hadoop.fs.VolumeId; import org.apache.hadoop.fs.permission.FsPermission; @@ -110,6 +110,7 @@ import org.apache.hadoop.hdfs.client.ClientMmapManager; import org.apache.hadoop.hdfs.client.HdfsDataInputStream; import org.apache.hadoop.hdfs.client.HdfsDataOutputStream; import org.apache.hadoop.hdfs.protocol.CacheDirectiveEntry; +import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo; import org.apache.hadoop.hdfs.protocol.CacheDirectiveIterator; import org.apache.hadoop.hdfs.protocol.CachePoolEntry; import org.apache.hadoop.hdfs.protocol.CachePoolInfo; @@ -122,7 +123,6 @@ import org.apache.hadoop.hdfs.protocol.DirectoryListing; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; import org.apache.hadoop.hdfs.protocol.HdfsBlocksMetadata; import org.apache.hadoop.hdfs.protocol.HdfsConstants; -import org.apache.hadoop.hdfs.protocol.CacheDirectiveInfo; import org.apache.hadoop.hdfs.protocol.HdfsConstants.DatanodeReportType; import org.apache.hadoop.hdfs.protocol.HdfsConstants.SafeModeAction; import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; @@ -283,6 +283,8 @@ public class DFSClient implements java.io.Closeable { final boolean getHdfsBlocksMetadataEnabled; final int getFileBlockStorageLocationsNumThreads; final int getFileBlockStorageLocationsTimeout; + final int retryTimesForGetLastBlockLength; + final int retryIntervalForGetLastBlockLength; final boolean useLegacyBlockReader; final boolean useLegacyBlockReaderLocal; @@ -356,6 +358,12 @@ public class DFSClient implements java.io.Closeable { getFileBlockStorageLocationsTimeout = conf.getInt( DFSConfigKeys.DFS_CLIENT_FILE_BLOCK_STORAGE_LOCATIONS_TIMEOUT, DFSConfigKeys.DFS_CLIENT_FILE_BLOCK_STORAGE_LOCATIONS_TIMEOUT_DEFAULT); + retryTimesForGetLastBlockLength = conf.getInt( + DFSConfigKeys.DFS_CLIENT_RETRY_TIMES_GET_LAST_BLOCK_LENGTH, + DFSConfigKeys.DFS_CLIENT_RETRY_TIMES_GET_LAST_BLOCK_LENGTH_DEFAULT); + retryIntervalForGetLastBlockLength = conf.getInt( + DFSConfigKeys.DFS_CLIENT_RETRY_INTERVAL_GET_LAST_BLOCK_LENGTH, + DFSConfigKeys.DFS_CLIENT_RETRY_INTERVAL_GET_LAST_BLOCK_LENGTH_DEFAULT); useLegacyBlockReader = conf.getBoolean( DFSConfigKeys.DFS_CLIENT_USE_LEGACY_BLOCKREADER, diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index ab170058808..dce98d54e69 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -65,6 +65,10 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final int DFS_CLIENT_FILE_BLOCK_STORAGE_LOCATIONS_NUM_THREADS_DEFAULT = 10; public static final String DFS_CLIENT_FILE_BLOCK_STORAGE_LOCATIONS_TIMEOUT = "dfs.client.file-block-storage-locations.timeout"; public static final int DFS_CLIENT_FILE_BLOCK_STORAGE_LOCATIONS_TIMEOUT_DEFAULT = 60; + public static final String DFS_CLIENT_RETRY_TIMES_GET_LAST_BLOCK_LENGTH = "dfs.client.retry.times.get-last-block-length"; + public static final int DFS_CLIENT_RETRY_TIMES_GET_LAST_BLOCK_LENGTH_DEFAULT = 3; + public static final String DFS_CLIENT_RETRY_INTERVAL_GET_LAST_BLOCK_LENGTH = "dfs.client.retry.interval-ms.get-last-block-length"; + public static final int DFS_CLIENT_RETRY_INTERVAL_GET_LAST_BLOCK_LENGTH_DEFAULT = 4000; // HA related configuration public static final String DFS_CLIENT_FAILOVER_PROXY_PROVIDER_KEY_PREFIX = "dfs.client.failover.proxy.provider"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java index bdc660d484a..3d26a98b56d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSInputStream.java @@ -237,7 +237,7 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead, */ synchronized void openInfo() throws IOException, UnresolvedLinkException { lastBlockBeingWrittenLength = fetchLocatedBlocksAndGetLastBlockLength(); - int retriesForLastBlockLength = 3; + int retriesForLastBlockLength = dfsClient.getConf().retryTimesForGetLastBlockLength; while (retriesForLastBlockLength > 0) { // Getting last block length as -1 is a special case. When cluster // restarts, DNs may not report immediately. At this time partial block @@ -247,7 +247,7 @@ implements ByteBufferReadable, CanSetDropBehind, CanSetReadahead, DFSClient.LOG.warn("Last block locations not available. " + "Datanodes might not have reported blocks completely." + " Will retry for " + retriesForLastBlockLength + " times"); - waitFor(4000); + waitFor(dfsClient.getConf().retryIntervalForGetLastBlockLength); lastBlockBeingWrittenLength = fetchLocatedBlocksAndGetLastBlockLength(); } else { break; From d4e108156699e0c8e31b45e0aebab150a173c727 Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Thu, 19 Dec 2013 06:15:09 +0000 Subject: [PATCH 28/32] HDFS-5663 make the retry time and interval value configurable in openInfo() git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1552237 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 36c50dc498b..1034fff1dd1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -754,6 +754,9 @@ Release 2.4.0 - UNRELEASED HDFS-5634. Allow BlockReaderLocal to switch between checksumming and not (cmccabe) + HDFS-5663 make the retry time and interval value configurable in openInfo() + (Liang Xie via stack) + OPTIMIZATIONS HDFS-5239. Allow FSNamesystem lock fairness to be configurable (daryn) From 42e7446add04966350823fa36dc81dd89f0e1cf8 Mon Sep 17 00:00:00 2001 From: Junping Du Date: Thu, 19 Dec 2013 09:21:19 +0000 Subject: [PATCH 29/32] HDFS-5540. Fix intermittent failure in TestBlocksWithNotEnoughRacks. (Binglin Chang via junping_du) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1552256 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../test/java/org/apache/hadoop/hdfs/DFSTestUtil.java | 10 ++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 1034fff1dd1..de3bf8ff957 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -757,6 +757,9 @@ Release 2.4.0 - UNRELEASED HDFS-5663 make the retry time and interval value configurable in openInfo() (Liang Xie via stack) + HDFS-5540. Fix intermittent failure in TestBlocksWithNotEnoughRacks. + (Binglin Chang via junping_du) + OPTIMIZATIONS HDFS-5239. Allow FSNamesystem lock fairness to be configurable (daryn) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java index 8c29b1c0208..e9cb6cca9b2 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/DFSTestUtil.java @@ -347,9 +347,15 @@ public class DFSTestUtil { // Swallow exceptions } System.out.println("Waiting for "+corruptRepls+" corrupt replicas"); - repls = ns.getBlockManager().numCorruptReplicas(b.getLocalBlock()); count++; - Thread.sleep(1000); + // check more often so corrupt block reports are not easily missed + for (int i = 0; i < 10; i++) { + repls = ns.getBlockManager().numCorruptReplicas(b.getLocalBlock()); + Thread.sleep(100); + if (repls == corruptRepls) { + break; + } + } } if (count == ATTEMPTS) { throw new TimeoutException("Timed out waiting for corrupt replicas." From 011a7b210d852a0a9c7f95116a0bf166fa624b3f Mon Sep 17 00:00:00 2001 From: Daryn Sharp Date: Thu, 19 Dec 2013 18:21:33 +0000 Subject: [PATCH 30/32] HADOOP-10172. Cache SASL server factories (daryn) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1552389 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-common/CHANGES.txt | 2 + .../apache/hadoop/security/SaslRpcServer.java | 52 ++++++++++++++++++- .../org/apache/hadoop/ipc/TestSaslRPC.java | 4 +- 3 files changed, 56 insertions(+), 2 deletions(-) diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index c78bd83ec4b..3d45e134c12 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -408,6 +408,8 @@ Release 2.4.0 - UNRELEASED HADOOP-10047. Add a direct-buffer based apis for compression. (Gopal V via acmurthy) + HADOOP-10172. Cache SASL server factories (daryn) + BUG FIXES HADOOP-9964. Fix deadlocks in TestHttpServer by synchronize diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslRpcServer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslRpcServer.java index 72b56c81749..bbabd887a2f 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslRpcServer.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/SaslRpcServer.java @@ -25,6 +25,10 @@ import java.io.DataOutput; import java.io.IOException; import java.security.PrivilegedExceptionAction; import java.security.Security; +import java.util.ArrayList; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.TreeMap; @@ -38,6 +42,7 @@ import javax.security.sasl.RealmCallback; import javax.security.sasl.Sasl; import javax.security.sasl.SaslException; import javax.security.sasl.SaslServer; +import javax.security.sasl.SaslServerFactory; import org.apache.commons.codec.binary.Base64; import org.apache.commons.logging.Log; @@ -63,6 +68,7 @@ public class SaslRpcServer { public static final String SASL_DEFAULT_REALM = "default"; public static final Map SASL_PROPS = new TreeMap(); + private static SaslServerFactory saslFactory; public static enum QualityOfProtection { AUTHENTICATION("auth"), @@ -151,7 +157,7 @@ public class SaslRpcServer { new PrivilegedExceptionAction() { @Override public SaslServer run() throws SaslException { - return Sasl.createSaslServer(mechanism, protocol, serverId, + return saslFactory.createSaslServer(mechanism, protocol, serverId, SaslRpcServer.SASL_PROPS, callback); } }); @@ -180,6 +186,7 @@ public class SaslRpcServer { SASL_PROPS.put(Sasl.QOP, saslQOP.getSaslQop()); SASL_PROPS.put(Sasl.SERVER_AUTH, "true"); Security.addProvider(new SaslPlainServer.SecurityProvider()); + saslFactory = new FastSaslServerFactory(SASL_PROPS); } static String encodeIdentifier(byte[] identifier) { @@ -363,4 +370,47 @@ public class SaslRpcServer { } } } + + // Sasl.createSaslServer is 100-200X slower than caching the factories! + private static class FastSaslServerFactory implements SaslServerFactory { + private final Map> factoryCache = + new HashMap>(); + + FastSaslServerFactory(Map props) { + final Enumeration factories = + Sasl.getSaslServerFactories(); + while (factories.hasMoreElements()) { + SaslServerFactory factory = factories.nextElement(); + for (String mech : factory.getMechanismNames(props)) { + if (!factoryCache.containsKey(mech)) { + factoryCache.put(mech, new ArrayList()); + } + factoryCache.get(mech).add(factory); + } + } + } + + @Override + public SaslServer createSaslServer(String mechanism, String protocol, + String serverName, Map props, CallbackHandler cbh) + throws SaslException { + SaslServer saslServer = null; + List factories = factoryCache.get(mechanism); + if (factories != null) { + for (SaslServerFactory factory : factories) { + saslServer = factory.createSaslServer( + mechanism, protocol, serverName, props, cbh); + if (saslServer != null) { + break; + } + } + } + return saslServer; + } + + @Override + public String[] getMechanismNames(Map props) { + return factoryCache.keySet().toArray(new String[0]); + } + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestSaslRPC.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestSaslRPC.java index 1e96ed1ddbc..eecefc70c8d 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestSaslRPC.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/ipc/TestSaslRPC.java @@ -137,7 +137,9 @@ public class TestSaslRPC { LOG.info("Testing QOP:"+expectedQop); LOG.info("---------------------------------"); conf = new Configuration(); - conf.set(HADOOP_SECURITY_AUTHENTICATION, KERBEROS.toString()); + // the specific tests for kerberos will enable kerberos. forcing it + // for all tests will cause tests to fail if the user has a TGT + conf.set(HADOOP_SECURITY_AUTHENTICATION, SIMPLE.toString()); conf.set("hadoop.rpc.protection", expectedQop.name().toLowerCase()); UserGroupInformation.setConfiguration(conf); enableSecretManager = null; From c7153e182aa28a078f6c0384a610cd7a4f36bc5a Mon Sep 17 00:00:00 2001 From: Andrew Wang Date: Fri, 20 Dec 2013 00:00:45 +0000 Subject: [PATCH 31/32] HDFS-5679. TestCacheDirectives should handle the case where native code is not available. (wang) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1552461 13f79535-47bb-0310-9956-ffa450edef68 --- hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt | 3 +++ .../hadoop/hdfs/server/namenode/TestCacheDirectives.java | 1 + 2 files changed, 4 insertions(+) diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index de3bf8ff957..45aaaccbebe 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -448,6 +448,9 @@ Trunk (Unreleased) HDFS-5454. DataNode UUID should be assigned prior to FsDataset initialization. (Arpit Agarwal) + HDFS-5679. TestCacheDirectives should handle the case where native code + is not available. (wang) + BREAKDOWN OF HDFS-2832 SUBTASKS AND RELATED JIRAS HDFS-4985. Add storage type to the protocol and expose it in block report diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCacheDirectives.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCacheDirectives.java index 6dbbb8363e0..03bfc30792e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCacheDirectives.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestCacheDirectives.java @@ -98,6 +98,7 @@ public class TestCacheDirectives { static private CacheManipulator prevCacheManipulator; static { + NativeIO.POSIX.setCacheManipulator(new NoMlockCacheManipulator()); EditLogFileOutputStream.setShouldSkipFsyncForTesting(false); } From 77306291643838ed7b57b99d6497553314a525f2 Mon Sep 17 00:00:00 2001 From: Owen O'Malley Date: Fri, 20 Dec 2013 00:25:42 +0000 Subject: [PATCH 32/32] HADOOP-10141. Create KeyProvider API to separate encryption key storage from the applications. (omalley) git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1552462 13f79535-47bb-0310-9956-ffa450edef68 --- .../hadoop-common/CHANGES.txt | 3 + hadoop-common-project/hadoop-common/pom.xml | 4 + .../crypto/key/JavaKeyStoreProvider.java | 313 ++++++++++++++ .../apache/hadoop/crypto/key/KeyProvider.java | 384 ++++++++++++++++++ .../hadoop/crypto/key/KeyProviderFactory.java | 76 ++++ .../hadoop/crypto/key/UserProvider.java | 145 +++++++ .../apache/hadoop/security/Credentials.java | 10 +- ...pache.hadoop.crypto.key.KeyProviderFactory | 17 + .../hadoop/crypto/key/TestKeyProvider.java | 112 +++++ .../crypto/key/TestKeyProviderFactory.java | 191 +++++++++ hadoop-project/pom.xml | 5 + 11 files changed, 1259 insertions(+), 1 deletion(-) create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/JavaKeyStoreProvider.java create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProvider.java create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProviderFactory.java create mode 100644 hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/UserProvider.java create mode 100644 hadoop-common-project/hadoop-common/src/main/resources/META-INF/services/org.apache.hadoop.crypto.key.KeyProviderFactory create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestKeyProvider.java create mode 100644 hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestKeyProviderFactory.java diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index 3d45e134c12..0a6962f6f5a 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -105,6 +105,9 @@ Trunk (Unreleased) HADOOP-9833 move slf4j to version 1.7.5 (Kousuke Saruta via stevel) + HADOOP-10141. Create KeyProvider API to separate encryption key storage + from the applications. (omalley) + BUG FIXES HADOOP-9451. Fault single-layer config if node group topology is enabled. diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index 7ebf9b4cf08..366aa38e0a5 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -209,6 +209,10 @@ protobuf-java compile + + com.google.code.gson + gson + org.apache.hadoop hadoop-auth diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/JavaKeyStoreProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/JavaKeyStoreProvider.java new file mode 100644 index 00000000000..3c82563628e --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/JavaKeyStoreProvider.java @@ -0,0 +1,313 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.crypto.key; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; + +import javax.crypto.spec.SecretKeySpec; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.io.Serializable; +import java.net.URI; +import java.security.Key; +import java.security.KeyStore; +import java.security.KeyStoreException; +import java.security.NoSuchAlgorithmException; +import java.security.UnrecoverableKeyException; +import java.security.cert.CertificateException; +import java.util.Date; +import java.util.HashMap; +import java.util.Map; + +/** + * KeyProvider based on Java's KeyStore file format. The file may be stored in + * any Hadoop FileSystem using the following name mangling: + * jks://hdfs@nn1.example.com/my/keys.jks -> hdfs://nn1.example.com/my/keys.jks + * jks://file/home/owen/keys.jks -> file:///home/owen/keys.jks + * + * The password for the keystore is taken from the HADOOP_KEYSTORE_PASSWORD + * environment variable with a default of 'none'. + * + * It is expected for encrypted InputFormats and OutputFormats to copy the keys + * from the original provider into the job's Credentials object, which is + * accessed via the UserProvider. Therefore, this provider won't be used by + * MapReduce tasks. + */ +@InterfaceAudience.Private +public class JavaKeyStoreProvider extends KeyProvider { + public static final String SCHEME_NAME = "jceks"; + public static final String KEYSTORE_PASSWORD_NAME = + "HADOOP_KEYSTORE_PASSWORD"; + public static final String KEYSTORE_PASSWORD_DEFAULT = "none"; + + private final URI uri; + private final Path path; + private final FileSystem fs; + private final KeyStore keyStore; + private final char[] password; + private boolean changed = false; + + private final Map cache = new HashMap(); + + private JavaKeyStoreProvider(URI uri, Configuration conf) throws IOException { + this.uri = uri; + path = unnestUri(uri); + fs = FileSystem.get(conf); + // Get the password from the user's environment + String pw = System.getenv(KEYSTORE_PASSWORD_NAME); + if (pw == null) { + pw = KEYSTORE_PASSWORD_DEFAULT; + } + password = pw.toCharArray(); + try { + keyStore = KeyStore.getInstance(SCHEME_NAME); + if (fs.exists(path)) { + keyStore.load(fs.open(path), password); + } else { + // required to create an empty keystore. *sigh* + keyStore.load(null, password); + } + } catch (KeyStoreException e) { + throw new IOException("Can't create keystore", e); + } catch (NoSuchAlgorithmException e) { + throw new IOException("Can't load keystore " + path, e); + } catch (CertificateException e) { + throw new IOException("Can't load keystore " + path, e); + } + } + + @Override + public KeyVersion getKeyVersion(String versionName) throws IOException { + SecretKeySpec key = null; + try { + if (!keyStore.containsAlias(versionName)) { + return null; + } + key = (SecretKeySpec) keyStore.getKey(versionName, password); + } catch (KeyStoreException e) { + throw new IOException("Can't get key " + versionName + " from " + + path, e); + } catch (NoSuchAlgorithmException e) { + throw new IOException("Can't get algorithm for key " + key + " from " + + path, e); + } catch (UnrecoverableKeyException e) { + throw new IOException("Can't recover key " + key + " from " + path, e); + } + return new KeyVersion(versionName, key.getEncoded()); + } + + @Override + public Metadata getMetadata(String name) throws IOException { + if (cache.containsKey(name)) { + return cache.get(name); + } + try { + if (!keyStore.containsAlias(name)) { + return null; + } + Metadata meta = ((KeyMetadata) keyStore.getKey(name, password)).metadata; + cache.put(name, meta); + return meta; + } catch (KeyStoreException e) { + throw new IOException("Can't get metadata for " + name + + " from keystore " + path, e); + } catch (NoSuchAlgorithmException e) { + throw new IOException("Can't get algorithm for " + name + + " from keystore " + path, e); + } catch (UnrecoverableKeyException e) { + throw new IOException("Can't recover key for " + name + + " from keystore " + path, e); + } + } + + @Override + public KeyVersion createKey(String name, byte[] material, + Options options) throws IOException { + try { + if (keyStore.containsAlias(name) || cache.containsKey(name)) { + throw new IOException("Key " + name + " already exists in " + this); + } + } catch (KeyStoreException e) { + throw new IOException("Problem looking up key " + name + " in " + this, + e); + } + Metadata meta = new Metadata(options.getCipher(), options.getBitLength(), + new Date(), 1); + if (options.getBitLength() != 8 * material.length) { + throw new IOException("Wrong key length. Required " + + options.getBitLength() + ", but got " + (8 * material.length)); + } + cache.put(name, meta); + String versionName = buildVersionName(name, 0); + return innerSetKeyVersion(versionName, material, meta.getCipher()); + } + + @Override + public void deleteKey(String name) throws IOException { + Metadata meta = getMetadata(name); + if (meta == null) { + throw new IOException("Key " + name + " does not exist in " + this); + } + for(int v=0; v < meta.getVersions(); ++v) { + String versionName = buildVersionName(name, v); + try { + if (keyStore.containsAlias(versionName)) { + keyStore.deleteEntry(versionName); + } + } catch (KeyStoreException e) { + throw new IOException("Problem removing " + versionName + " from " + + this, e); + } + } + try { + if (keyStore.containsAlias(name)) { + keyStore.deleteEntry(name); + } + } catch (KeyStoreException e) { + throw new IOException("Problem removing " + name + " from " + this, e); + } + cache.remove(name); + changed = true; + } + + KeyVersion innerSetKeyVersion(String versionName, byte[] material, + String cipher) throws IOException { + try { + keyStore.setKeyEntry(versionName, new SecretKeySpec(material, cipher), + password, null); + } catch (KeyStoreException e) { + throw new IOException("Can't store key " + versionName + " in " + this, + e); + } + changed = true; + return new KeyVersion(versionName, material); + } + + @Override + public KeyVersion rollNewVersion(String name, + byte[] material) throws IOException { + Metadata meta = getMetadata(name); + if (meta == null) { + throw new IOException("Key " + name + " not found"); + } + if (meta.getBitLength() != 8 * material.length) { + throw new IOException("Wrong key length. Required " + + meta.getBitLength() + ", but got " + (8 * material.length)); + } + int nextVersion = meta.addVersion(); + String versionName = buildVersionName(name, nextVersion); + return innerSetKeyVersion(versionName, material, meta.getCipher()); + } + + @Override + public void flush() throws IOException { + if (!changed) { + return; + } + // put all of the updates into the keystore + for(Map.Entry entry: cache.entrySet()) { + try { + keyStore.setKeyEntry(entry.getKey(), new KeyMetadata(entry.getValue()), + password, null); + } catch (KeyStoreException e) { + throw new IOException("Can't set metadata key " + entry.getKey(),e ); + } + } + // write out the keystore + FSDataOutputStream out = fs.create(path, true); + try { + keyStore.store(out, password); + } catch (KeyStoreException e) { + throw new IOException("Can't store keystore " + this, e); + } catch (NoSuchAlgorithmException e) { + throw new IOException("No such algorithm storing keystore " + this, e); + } catch (CertificateException e) { + throw new IOException("Certificate exception storing keystore " + this, + e); + } + out.close(); + changed = false; + } + + @Override + public String toString() { + return uri.toString(); + } + + /** + * The factory to create JksProviders, which is used by the ServiceLoader. + */ + public static class Factory extends KeyProviderFactory { + @Override + public KeyProvider createProvider(URI providerName, + Configuration conf) throws IOException { + if (SCHEME_NAME.equals(providerName.getScheme())) { + return new JavaKeyStoreProvider(providerName, conf); + } + return null; + } + } + + /** + * An adapter between a KeyStore Key and our Metadata. This is used to store + * the metadata in a KeyStore even though isn't really a key. + */ + public static class KeyMetadata implements Key, Serializable { + private Metadata metadata; + private final static long serialVersionUID = 8405872419967874451L; + + private KeyMetadata(Metadata meta) { + this.metadata = meta; + } + + @Override + public String getAlgorithm() { + return metadata.getCipher(); + } + + @Override + public String getFormat() { + return "KeyMetadata"; + } + + @Override + public byte[] getEncoded() { + return new byte[0]; + } + + private void writeObject(ObjectOutputStream out) throws IOException { + byte[] serialized = metadata.serialize(); + out.writeInt(serialized.length); + out.write(serialized); + } + + private void readObject(ObjectInputStream in + ) throws IOException, ClassNotFoundException { + byte[] buf = new byte[in.readInt()]; + in.readFully(buf); + metadata = new Metadata(buf); + } + + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProvider.java new file mode 100644 index 00000000000..a8e95e5eb6e --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProvider.java @@ -0,0 +1,384 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.crypto.key; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.io.OutputStreamWriter; +import java.net.URI; +import java.util.Date; +import java.util.List; + +import com.google.gson.stream.JsonReader; +import com.google.gson.stream.JsonWriter; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; + +/** + * A provider of secret key material for Hadoop applications. Provides an + * abstraction to separate key storage from users of encryption. It + * is intended to support getting or storing keys in a variety of ways, + * including third party bindings. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +public abstract class KeyProvider { + public static final String DEFAULT_CIPHER_NAME = + "hadoop.security.key.default.cipher"; + public static final String DEFAULT_CIPHER = "AES/CTR/NoPadding"; + public static final String DEFAULT_BITLENGTH_NAME = + "hadoop.security.key.default.bitlength"; + public static final int DEFAULT_BITLENGTH = 256; + + /** + * The combination of both the key version name and the key material. + */ + public static class KeyVersion { + private final String versionName; + private final byte[] material; + + protected KeyVersion(String versionName, + byte[] material) { + this.versionName = versionName; + this.material = material; + } + + public String getVersionName() { + return versionName; + } + + public byte[] getMaterial() { + return material; + } + + public String toString() { + StringBuilder buf = new StringBuilder(); + buf.append("key("); + buf.append(versionName); + buf.append(")="); + if (material == null) { + buf.append("null"); + } else { + for(byte b: material) { + buf.append(' '); + int right = b & 0xff; + if (right < 0x10) { + buf.append('0'); + } + buf.append(Integer.toHexString(right)); + } + } + return buf.toString(); + } + } + + /** + * Key metadata that is associated with the key. + */ + public static class Metadata { + private final static String CIPHER_FIELD = "cipher"; + private final static String BIT_LENGTH_FIELD = "bitLength"; + private final static String CREATED_FIELD = "created"; + private final static String VERSIONS_FIELD = "versions"; + + private final String cipher; + private final int bitLength; + private final Date created; + private int versions; + + protected Metadata(String cipher, int bitLength, + Date created, int versions) { + this.cipher = cipher; + this.bitLength = bitLength; + this.created = created; + this.versions = versions; + } + + public Date getCreated() { + return created; + } + + public String getCipher() { + return cipher; + } + + /** + * Get the algorithm from the cipher. + * @return the algorithm name + */ + public String getAlgorithm() { + int slash = cipher.indexOf('/'); + if (slash == - 1) { + return cipher; + } else { + return cipher.substring(0, slash); + } + } + + public int getBitLength() { + return bitLength; + } + + public int getVersions() { + return versions; + } + + protected int addVersion() { + return versions++; + } + + /** + * Serialize the metadata to a set of bytes. + * @return the serialized bytes + * @throws IOException + */ + protected byte[] serialize() throws IOException { + ByteArrayOutputStream buffer = new ByteArrayOutputStream(); + JsonWriter writer = new JsonWriter(new OutputStreamWriter(buffer)); + writer.beginObject(); + if (cipher != null) { + writer.name(CIPHER_FIELD).value(cipher); + } + if (bitLength != 0) { + writer.name(BIT_LENGTH_FIELD).value(bitLength); + } + if (created != null) { + writer.name(CREATED_FIELD).value(created.getTime()); + } + writer.name(VERSIONS_FIELD).value(versions); + writer.endObject(); + writer.flush(); + return buffer.toByteArray(); + } + + /** + * Deserialize a new metadata object from a set of bytes. + * @param bytes the serialized metadata + * @throws IOException + */ + protected Metadata(byte[] bytes) throws IOException { + String cipher = null; + int bitLength = 0; + Date created = null; + int versions = 0; + JsonReader reader = new JsonReader(new InputStreamReader + (new ByteArrayInputStream(bytes))); + reader.beginObject(); + while (reader.hasNext()) { + String field = reader.nextName(); + if (CIPHER_FIELD.equals(field)) { + cipher = reader.nextString(); + } else if (BIT_LENGTH_FIELD.equals(field)) { + bitLength = reader.nextInt(); + } else if (CREATED_FIELD.equals(field)) { + created = new Date(reader.nextLong()); + } else if (VERSIONS_FIELD.equals(field)) { + versions = reader.nextInt(); + } + } + reader.endObject(); + this.cipher = cipher; + this.bitLength = bitLength; + this.created = created; + this.versions = versions; + } + } + + /** + * Options when creating key objects. + */ + public static class Options { + private String cipher; + private int bitLength; + + public Options(Configuration conf) { + cipher = conf.get(DEFAULT_CIPHER_NAME, DEFAULT_CIPHER); + bitLength = conf.getInt(DEFAULT_BITLENGTH_NAME, DEFAULT_BITLENGTH); + } + + public Options setCipher(String cipher) { + this.cipher = cipher; + return this; + } + + public Options setBitLength(int bitLength) { + this.bitLength = bitLength; + return this; + } + + protected String getCipher() { + return cipher; + } + + protected int getBitLength() { + return bitLength; + } + } + + /** + * A helper function to create an options object. + * @param conf the configuration to use + * @return a new options object + */ + public static Options options(Configuration conf) { + return new Options(conf); + } + + /** + * Get the key material for a specific version of the key. This method is used + * when decrypting data. + * @param versionName the name of a specific version of the key + * @return the key material + * @throws IOException + */ + public abstract KeyVersion getKeyVersion(String versionName + ) throws IOException; + + /** + * Get the current version of the key, which should be used for encrypting new + * data. + * @param name the base name of the key + * @return the version name of the current version of the key or null if the + * key version doesn't exist + * @throws IOException + */ + public KeyVersion getCurrentKey(String name) throws IOException { + Metadata meta = getMetadata(name); + if (meta == null) { + return null; + } + return getKeyVersion(buildVersionName(name, meta.getVersions() - 1)); + } + + /** + * Get metadata about the key. + * @param name the basename of the key + * @return the key's metadata or null if the key doesn't exist + * @throws IOException + */ + public abstract Metadata getMetadata(String name) throws IOException; + + /** + * Create a new key. The given key must not already exist. + * @param name the base name of the key + * @param material the key material for the first version of the key. + * @param options the options for the new key. + * @return the version name of the first version of the key. + * @throws IOException + */ + public abstract KeyVersion createKey(String name, byte[] material, + Options options) throws IOException; + + /** + * Delete the given key. + * @param name the name of the key to delete + * @throws IOException + */ + public abstract void deleteKey(String name) throws IOException; + + /** + * Roll a new version of the given key. + * @param name the basename of the key + * @param material the new key material + * @return the name of the new version of the key + * @throws IOException + */ + public abstract KeyVersion rollNewVersion(String name, + byte[] material + ) throws IOException; + + /** + * Ensures that any changes to the keys are written to persistent store. + * @throws IOException + */ + public abstract void flush() throws IOException; + + /** + * Split the versionName in to a base name. Converts "/aaa/bbb/3" to + * "/aaa/bbb". + * @param versionName the version name to split + * @return the base name of the key + * @throws IOException + */ + public static String getBaseName(String versionName) throws IOException { + int div = versionName.lastIndexOf('@'); + if (div == -1) { + throw new IOException("No version in key path " + versionName); + } + return versionName.substring(0, div); + } + + /** + * Build a version string from a basename and version number. Converts + * "/aaa/bbb" and 3 to "/aaa/bbb@3". + * @param name the basename of the key + * @param version the version of the key + * @return the versionName of the key. + */ + protected static String buildVersionName(String name, int version) { + return name + "@" + version; + } + + /** + * Convert a nested URI to decode the underlying path. The translation takes + * the authority and parses it into the underlying scheme and authority. + * For example, "myscheme://hdfs@nn/my/path" is converted to + * "hdfs://nn/my/path". + * @param nestedUri the URI from the nested URI + * @return the unnested path + */ + public static Path unnestUri(URI nestedUri) { + String[] parts = nestedUri.getAuthority().split("@", 2); + StringBuilder result = new StringBuilder(parts[0]); + result.append("://"); + if (parts.length == 2) { + result.append(parts[1]); + } + result.append(nestedUri.getPath()); + if (nestedUri.getQuery() != null) { + result.append("?"); + result.append(nestedUri.getQuery()); + } + if (nestedUri.getFragment() != null) { + result.append("#"); + result.append(nestedUri.getFragment()); + } + return new Path(result.toString()); + } + + /** + * Find the provider with the given key. + * @param providerList the list of providers + * @param keyName the key name we are looking for + * @return the KeyProvider that has the key + */ + public static KeyProvider findProvider(List providerList, + String keyName) throws IOException { + for(KeyProvider provider: providerList) { + if (provider.getMetadata(keyName) != null) { + return provider; + } + } + throw new IOException("Can't find KeyProvider for key " + keyName); + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProviderFactory.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProviderFactory.java new file mode 100644 index 00000000000..05890dc8f57 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProviderFactory.java @@ -0,0 +1,76 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.crypto.key; + +import java.io.IOException; +import java.net.URI; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.List; +import java.util.ServiceLoader; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; + +/** + * A factory to create a list of KeyProvider based on the path given in a + * Configuration. It uses a service loader interface to find the available + * KeyProviders and create them based on the list of URIs. + */ +@InterfaceAudience.Public +@InterfaceStability.Unstable +public abstract class KeyProviderFactory { + public static final String KEY_PROVIDER_PATH = + "hadoop.security.key.provider.path"; + + public abstract KeyProvider createProvider(URI providerName, + Configuration conf + ) throws IOException; + + private static final ServiceLoader serviceLoader = + ServiceLoader.load(KeyProviderFactory.class); + + public static List getProviders(Configuration conf + ) throws IOException { + List result = new ArrayList(); + for(String path: conf.getStringCollection(KEY_PROVIDER_PATH)) { + try { + URI uri = new URI(path); + boolean found = false; + for(KeyProviderFactory factory: serviceLoader) { + KeyProvider kp = factory.createProvider(uri, conf); + if (kp != null) { + result.add(kp); + found = true; + break; + } + } + if (!found) { + throw new IOException("No KeyProviderFactory for " + uri + " in " + + KEY_PROVIDER_PATH); + } + } catch (URISyntaxException error) { + throw new IOException("Bad configuration of " + KEY_PROVIDER_PATH + + " at " + path, error); + } + } + return result; + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/UserProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/UserProvider.java new file mode 100644 index 00000000000..42ce69341d1 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/UserProvider.java @@ -0,0 +1,145 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.crypto.key; + +import java.io.IOException; +import java.net.URI; +import java.util.Date; +import java.util.HashMap; +import java.util.Map; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.security.Credentials; +import org.apache.hadoop.security.UserGroupInformation; + +/** + * A KeyProvider factory for UGIs. It uses the credentials object associated + * with the current user to find keys. This provider is created using a + * URI of "user:///". + */ +@InterfaceAudience.Private +public class UserProvider extends KeyProvider { + public static final String SCHEME_NAME = "user"; + private final UserGroupInformation user; + private final Credentials credentials; + private final Map cache = new HashMap(); + + private UserProvider() throws IOException { + user = UserGroupInformation.getCurrentUser(); + credentials = user.getCredentials(); + } + + @Override + public KeyVersion getKeyVersion(String versionName) { + byte[] bytes = credentials.getSecretKey(new Text(versionName)); + if (bytes == null) { + return null; + } + return new KeyVersion(versionName, bytes); + } + + @Override + public Metadata getMetadata(String name) throws IOException { + if (cache.containsKey(name)) { + return cache.get(name); + } + byte[] serialized = credentials.getSecretKey(new Text(name)); + if (serialized == null) { + return null; + } + Metadata result = new Metadata(serialized); + cache.put(name, result); + return result; + } + + @Override + public KeyVersion createKey(String name, byte[] material, + Options options) throws IOException { + Text nameT = new Text(name); + if (credentials.getSecretKey(nameT) != null) { + throw new IOException("Key " + name + " already exists in " + this); + } + if (options.getBitLength() != 8 * material.length) { + throw new IOException("Wrong key length. Required " + + options.getBitLength() + ", but got " + (8 * material.length)); + } + Metadata meta = new Metadata(options.getCipher(), options.getBitLength(), + new Date(), 1); + cache.put(name, meta); + String versionName = buildVersionName(name, 0); + credentials.addSecretKey(nameT, meta.serialize()); + credentials.addSecretKey(new Text(versionName), material); + return new KeyVersion(versionName, material); + } + + @Override + public void deleteKey(String name) throws IOException { + Metadata meta = getMetadata(name); + if (meta == null) { + throw new IOException("Key " + name + " does not exist in " + this); + } + for(int v=0; v < meta.getVersions(); ++v) { + credentials.removeSecretKey(new Text(buildVersionName(name, v))); + } + credentials.removeSecretKey(new Text(name)); + cache.remove(name); + } + + @Override + public KeyVersion rollNewVersion(String name, + byte[] material) throws IOException { + Metadata meta = getMetadata(name); + if (meta == null) { + throw new IOException("Key " + name + " not found"); + } + if (meta.getBitLength() != 8 * material.length) { + throw new IOException("Wrong key length. Required " + + meta.getBitLength() + ", but got " + (8 * material.length)); + } + int nextVersion = meta.addVersion(); + credentials.addSecretKey(new Text(name), meta.serialize()); + String versionName = buildVersionName(name, nextVersion); + credentials.addSecretKey(new Text(versionName), material); + return new KeyVersion(versionName, material); + } + + @Override + public String toString() { + return SCHEME_NAME + ":///"; + } + + @Override + public void flush() { + user.addCredentials(credentials); + } + + public static class Factory extends KeyProviderFactory { + + @Override + public KeyProvider createProvider(URI providerName, + Configuration conf) throws IOException { + if (SCHEME_NAME.equals(providerName.getScheme())) { + return new UserProvider(); + } + return null; + } + } +} diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Credentials.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Credentials.java index 0745bed83a5..88f54de61af 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Credentials.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/Credentials.java @@ -133,7 +133,15 @@ public class Credentials implements Writable { public void addSecretKey(Text alias, byte[] key) { secretKeysMap.put(alias, key); } - + + /** + * Remove the key for a given alias. + * @param alias the alias for the key + */ + public void removeSecretKey(Text alias) { + secretKeysMap.remove(alias); + } + /** * Convenience method for reading a token storage file, and loading the Tokens * therein in the passed UGI diff --git a/hadoop-common-project/hadoop-common/src/main/resources/META-INF/services/org.apache.hadoop.crypto.key.KeyProviderFactory b/hadoop-common-project/hadoop-common/src/main/resources/META-INF/services/org.apache.hadoop.crypto.key.KeyProviderFactory new file mode 100644 index 00000000000..dbce4d5274a --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/main/resources/META-INF/services/org.apache.hadoop.crypto.key.KeyProviderFactory @@ -0,0 +1,17 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.hadoop.crypto.key.JavaKeyStoreProvider$Factory +org.apache.hadoop.crypto.key.UserProvider$Factory diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestKeyProvider.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestKeyProvider.java new file mode 100644 index 00000000000..197355ed0ec --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestKeyProvider.java @@ -0,0 +1,112 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.crypto.key; + +import org.apache.hadoop.conf.Configuration; + +import org.apache.hadoop.fs.Path; +import org.junit.Test; + +import java.io.IOException; +import java.net.URI; +import java.text.DateFormat; +import java.text.SimpleDateFormat; +import java.util.Date; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.assertArrayEquals; + +public class TestKeyProvider { + + @Test + public void testBuildVersionName() throws Exception { + assertEquals("/a/b@3", KeyProvider.buildVersionName("/a/b", 3)); + assertEquals("/aaa@12", KeyProvider.buildVersionName("/aaa", 12)); + } + + @Test + public void testParseVersionName() throws Exception { + assertEquals("/a/b", KeyProvider.getBaseName("/a/b@3")); + assertEquals("/aaa", KeyProvider.getBaseName("/aaa@112")); + try { + KeyProvider.getBaseName("no-slashes"); + assertTrue("should have thrown", false); + } catch (IOException e) { + assertTrue(true); + } + } + + @Test + public void testKeyMaterial() throws Exception { + byte[] key1 = new byte[]{1,2,3,4}; + KeyProvider.KeyVersion obj = new KeyProvider.KeyVersion("key1@1", key1); + assertEquals("key1@1", obj.getVersionName()); + assertArrayEquals(new byte[]{1,2,3,4}, obj.getMaterial()); + } + + @Test + public void testMetadata() throws Exception { + DateFormat format = new SimpleDateFormat("y/m/d"); + Date date = format.parse("2013/12/25"); + KeyProvider.Metadata meta = new KeyProvider.Metadata("myCipher", 100, + date, 123); + assertEquals("myCipher", meta.getCipher()); + assertEquals(100, meta.getBitLength()); + assertEquals(date, meta.getCreated()); + assertEquals(123, meta.getVersions()); + KeyProvider.Metadata second = new KeyProvider.Metadata(meta.serialize()); + assertEquals(meta.getCipher(), second.getCipher()); + assertEquals(meta.getBitLength(), second.getBitLength()); + assertEquals(meta.getCreated(), second.getCreated()); + assertEquals(meta.getVersions(), second.getVersions()); + int newVersion = second.addVersion(); + assertEquals(123, newVersion); + assertEquals(124, second.getVersions()); + assertEquals(123, meta.getVersions()); + } + + @Test + public void testOptions() throws Exception { + Configuration conf = new Configuration(); + conf.set(KeyProvider.DEFAULT_CIPHER_NAME, "myCipher"); + conf.setInt(KeyProvider.DEFAULT_BITLENGTH_NAME, 512); + KeyProvider.Options options = KeyProvider.options(conf); + assertEquals("myCipher", options.getCipher()); + assertEquals(512, options.getBitLength()); + options.setCipher("yourCipher"); + options.setBitLength(128); + assertEquals("yourCipher", options.getCipher()); + assertEquals(128, options.getBitLength()); + options = KeyProvider.options(new Configuration()); + assertEquals(KeyProvider.DEFAULT_CIPHER, options.getCipher()); + assertEquals(KeyProvider.DEFAULT_BITLENGTH, options.getBitLength()); + } + + @Test + public void testUnnestUri() throws Exception { + assertEquals(new Path("hdfs://nn.example.com/my/path"), + KeyProvider.unnestUri(new URI("myscheme://hdfs@nn.example.com/my/path"))); + assertEquals(new Path("hdfs://nn/my/path?foo=bar&baz=bat#yyy"), + KeyProvider.unnestUri(new URI("myscheme://hdfs@nn/my/path?foo=bar&baz=bat#yyy"))); + assertEquals(new Path("inner://hdfs@nn1.example.com/my/path"), + KeyProvider.unnestUri(new URI("outer://inner@hdfs@nn1.example.com/my/path"))); + assertEquals(new Path("user:///"), + KeyProvider.unnestUri(new URI("outer://user/"))); + } +} diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestKeyProviderFactory.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestKeyProviderFactory.java new file mode 100644 index 00000000000..8d073f7d514 --- /dev/null +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/crypto/key/TestKeyProviderFactory.java @@ -0,0 +1,191 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.crypto.key; + +import java.io.File; +import java.io.IOException; +import java.util.List; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.security.Credentials; +import org.apache.hadoop.security.UserGroupInformation; +import org.junit.Test; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +public class TestKeyProviderFactory { + + private static final File tmpDir = + new File(System.getProperty("test.build.data", "/tmp"), "key"); + + @Test + public void testFactory() throws Exception { + Configuration conf = new Configuration(); + conf.set(KeyProviderFactory.KEY_PROVIDER_PATH, + UserProvider.SCHEME_NAME + ":///," + + JavaKeyStoreProvider.SCHEME_NAME + "://file" + tmpDir + "/test.jks"); + List providers = KeyProviderFactory.getProviders(conf); + assertEquals(2, providers.size()); + assertEquals(UserProvider.class, providers.get(0).getClass()); + assertEquals(JavaKeyStoreProvider.class, providers.get(1).getClass()); + assertEquals(UserProvider.SCHEME_NAME + + ":///", providers.get(0).toString()); + assertEquals(JavaKeyStoreProvider.SCHEME_NAME + + "://file" + tmpDir + "/test.jks", + providers.get(1).toString()); + } + + @Test + public void testFactoryErrors() throws Exception { + Configuration conf = new Configuration(); + conf.set(KeyProviderFactory.KEY_PROVIDER_PATH, "unknown:///"); + try { + List providers = KeyProviderFactory.getProviders(conf); + assertTrue("should throw!", false); + } catch (IOException e) { + assertEquals("No KeyProviderFactory for unknown:/// in " + + KeyProviderFactory.KEY_PROVIDER_PATH, + e.getMessage()); + } + } + + @Test + public void testUriErrors() throws Exception { + Configuration conf = new Configuration(); + conf.set(KeyProviderFactory.KEY_PROVIDER_PATH, "unkn@own:/x/y"); + try { + List providers = KeyProviderFactory.getProviders(conf); + assertTrue("should throw!", false); + } catch (IOException e) { + assertEquals("Bad configuration of " + + KeyProviderFactory.KEY_PROVIDER_PATH + + " at unkn@own:/x/y", e.getMessage()); + } + } + + static void checkSpecificProvider(Configuration conf, + String ourUrl) throws Exception { + KeyProvider provider = KeyProviderFactory.getProviders(conf).get(0); + byte[] key1 = new byte[32]; + byte[] key2 = new byte[32]; + byte[] key3 = new byte[32]; + for(int i =0; i < key1.length; ++i) { + key1[i] = (byte) i; + key2[i] = (byte) (i * 2); + key3[i] = (byte) (i * 3); + } + // ensure that we get nulls when the key isn't there + assertEquals(null, provider.getKeyVersion("no-such-key")); + assertEquals(null, provider.getMetadata("key")); + // create a new key + try { + provider.createKey("key3", key3, KeyProvider.options(conf)); + } catch (Exception e) { + e.printStackTrace(); + throw e; + } + // check the metadata for key3 + KeyProvider.Metadata meta = provider.getMetadata("key3"); + assertEquals(KeyProvider.DEFAULT_CIPHER, meta.getCipher()); + assertEquals(KeyProvider.DEFAULT_BITLENGTH, meta.getBitLength()); + assertEquals(1, meta.getVersions()); + // make sure we get back the right key + assertArrayEquals(key3, provider.getCurrentKey("key3").getMaterial()); + assertEquals("key3@0", provider.getCurrentKey("key3").getVersionName()); + // try recreating key3 + try { + provider.createKey("key3", key3, KeyProvider.options(conf)); + assertTrue("should throw", false); + } catch (IOException e) { + assertEquals("Key key3 already exists in " + ourUrl, e.getMessage()); + } + provider.deleteKey("key3"); + try { + provider.deleteKey("key3"); + assertTrue("should throw", false); + } catch (IOException e) { + assertEquals("Key key3 does not exist in " + ourUrl, e.getMessage()); + } + provider.createKey("key3", key3, KeyProvider.options(conf)); + try { + provider.createKey("key4", key3, + KeyProvider.options(conf).setBitLength(8)); + assertTrue("should throw", false); + } catch (IOException e) { + assertEquals("Wrong key length. Required 8, but got 256", e.getMessage()); + } + provider.createKey("key4", new byte[]{1}, + KeyProvider.options(conf).setBitLength(8)); + provider.rollNewVersion("key4", new byte[]{2}); + meta = provider.getMetadata("key4"); + assertEquals(2, meta.getVersions()); + assertArrayEquals(new byte[]{2}, + provider.getCurrentKey("key4").getMaterial()); + assertArrayEquals(new byte[]{1}, + provider.getKeyVersion("key4@0").getMaterial()); + assertEquals("key4@1", provider.getCurrentKey("key4").getVersionName()); + try { + provider.rollNewVersion("key4", key1); + assertTrue("should throw", false); + } catch (IOException e) { + assertEquals("Wrong key length. Required 8, but got 256", e.getMessage()); + } + try { + provider.rollNewVersion("no-such-key", key1); + assertTrue("should throw", false); + } catch (IOException e) { + assertEquals("Key no-such-key not found", e.getMessage()); + } + provider.flush(); + // get a new instance of the provider to ensure it was saved correctly + provider = KeyProviderFactory.getProviders(conf).get(0); + assertArrayEquals(new byte[]{2}, + provider.getCurrentKey("key4").getMaterial()); + assertArrayEquals(key3, provider.getCurrentKey("key3").getMaterial()); + assertEquals("key3@0", provider.getCurrentKey("key3").getVersionName()); + } + + @Test + public void testUserProvider() throws Exception { + Configuration conf = new Configuration(); + final String ourUrl = UserProvider.SCHEME_NAME + ":///"; + conf.set(KeyProviderFactory.KEY_PROVIDER_PATH, ourUrl); + checkSpecificProvider(conf, ourUrl); + // see if the credentials are actually in the UGI + Credentials credentials = + UserGroupInformation.getCurrentUser().getCredentials(); + assertArrayEquals(new byte[]{1}, + credentials.getSecretKey(new Text("key4@0"))); + assertArrayEquals(new byte[]{2}, + credentials.getSecretKey(new Text("key4@1"))); + } + + @Test + public void testJksProvider() throws Exception { + Configuration conf = new Configuration(); + final String ourUrl = + JavaKeyStoreProvider.SCHEME_NAME + "://file" + tmpDir + "/test.jks"; + File file = new File(tmpDir, "test.jks"); + file.delete(); + conf.set(KeyProviderFactory.KEY_PROVIDER_PATH, ourUrl); + checkSpecificProvider(conf, ourUrl); + assertTrue(file + " should exist", file.isFile()); + } +} diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index ade8afa5f50..b296a294e2d 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -317,6 +317,11 @@ guava 11.0.2 + + com.google.code.gson + gson + 2.2.4 + commons-cli commons-cli