From 2c6c701affb99f250ec25abae7f76c44bc5b701a Mon Sep 17 00:00:00 2001 From: Karthik Kambatla Date: Sat, 26 Dec 2015 20:22:16 -0800 Subject: [PATCH] YARN-2882. Add an OPPORTUNISTIC ExecutionType. (Konstantinos Karanasos and Inigo Goiri via kasha) (cherry picked from commit fb00794368e0aa7aafa9dfc8d453810f641b82b2) --- hadoop-yarn-project/CHANGES.txt | 6514 +++++++++++++++++ .../yarn/api/records/ContainerStatus.java | 24 + .../yarn/api/records/ExecutionType.java | 43 + .../yarn/server/api/ContainerContext.java | 22 + .../src/main/proto/yarn_protos.proto | 7 + .../impl/pb/ContainerStatusPBImpl.java | 31 +- .../yarn/api/records/impl/pb/ProtoUtils.java | 12 + .../security/ContainerTokenIdentifier.java | 35 +- .../src/main/proto/yarn_security_token.proto | 1 + .../security/TestYARNTokenIdentifier.java | 13 +- 10 files changed, 6699 insertions(+), 3 deletions(-) create mode 100644 hadoop-yarn-project/CHANGES.txt create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ExecutionType.java diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt new file mode 100644 index 00000000000..e380893e959 --- /dev/null +++ b/hadoop-yarn-project/CHANGES.txt @@ -0,0 +1,6514 @@ +Hadoop YARN Change Log + +Trunk - Unreleased + + INCOMPATIBLE CHANGES + + NEW FEATURES + + YARN-2882. Add an OPPORTUNISTIC ExecutionType. + (Konstantinos Karanasos and Inigo Goiri via kasha) + + IMPROVEMENTS + + YARN-2438. yarn-env.sh cleanup (aw) + + YARN-2472. yarn-daemons.sh should jsut call yarn directly (Masatake Iwasaki + via aw) + + YARN-2437. start-yarn.sh/stop-yarn should give info (Varun Saxena via aw) + + YARN-2796. deprecate sbin/yarn-daemon.sh (aw) + + YARN-2980. Move health check script related functionality to hadoop-common + (Varun Saxena via aw) + + YARN-3168. Convert site documentation from apt to markdown (Gururaj Shetty + via aw) + + YARN-3199. Fair Scheduler documentation improvements (Rohit Agarwal via + aw) + + YARN-2280. Resource manager web service fields are not accessible + (Krisztian Horvath via aw) + + YARN-3261. rewrite resourcemanager restart doc to remove roadmap bits (Gururaj Shetty via aw) + + OPTIMIZATIONS + + BUG FIXES + + YARN-524 TestYarnVersionInfo failing if generated properties doesn't + include an SVN URL. (stevel) + + YARN-1471. The SLS simulator is not running the preemption policy + for CapacityScheduler (Carlo Curino via cdouglas) + + YARN-2436. [post-HADOOP-9902] yarn application help doesn't work (aw) + + YARN-2525. yarn logs command gives error on trunk (Akira AJISAKA via aw) + + YARN-3002. YARN documentation needs updating post-shell rewrite (aw) + + YARN-2428. LCE default banned user list should have yarn (Varun + Saxena via aw) + + YARN-2355. MAX_APP_ATTEMPTS_ENV may no longer be a useful env var + for a container (Darrell Taylor via aw) + + YARN-3915. scmadmin help message correction (Bibin A Chundatt via aw) + +Release 2.9.0 - UNRELEASED + + INCOMPATIBLE CHANGES + + NEW FEATURES + + YARN-1856. Added cgroups based memory monitoring for containers as another + alternative to custom memory-monitoring. (Varun Vasudev via vinodkv) + + IMPROVEMENTS + + YARN-4072. ApplicationHistoryServer, WebAppProxyServer, NodeManager and + ResourceManager to support JvmPauseMonitor as a service. + (Sunil G via Stevel) + + YARN-4341. add doc about timeline performance tool usage (Chang Li via + sjlee) + + OPTIMIZATIONS + + BUG FIXES + + YARN-4109. Exception on RM scheduler page loading with labels. + (Mohammad Shahid Khan via rohithsharmaks) + + YARN-4156. TestAMRestart#testAMBlacklistPreventsRestartOnSameNode + assumes CapacityScheduler. (Anubhav Dhoot via kasha) + + YARN-2934. Improve handling of container's stderr. + (Naganarasimha G R via gera) + +Release 2.8.0 - UNRELEASED + + INCOMPATIBLE CHANGES + + YARN-2336. Fair scheduler's REST API returns a missing '[' bracket JSON for + deep queue tree. (Kenji Kikushima and Akira Ajisaka via ozawa) + + NEW FEATURES + + YARN-3360. Add JMX metrics to TimelineDataManager (Jason Lowe via jeagles) + + YARN-3345. Add non-exclusive node label API. (Wangda Tan via jianhe) + + YARN-3365. Enhanced NodeManager to support using the 'tc' tool via + container-executor for outbound network traffic control. (Sidharta Seethana + via vinodkv) + + YARN-1376. NM need to notify the log aggregation status to RM through + heartbeat. (Xuan Gong via junping_du) + + YARN-3348. Add a 'yarn top' tool to help understand cluster usage. (Varun + Vasudev via jianhe) + + YARN-3347. Improve YARN log command to get AMContainer logs as well as + running containers logs. (Xuan Gong via junping_du) + + YARN-3443. Create a 'ResourceHandler' subsystem to ease addition of support + for new resource types on the NM. (Sidharta Seethana via junping_du) + + YARN-3361. CapacityScheduler side changes to support non-exclusive node + labels. (Wangda Tan via jianhe) + + YARN-3318. Create Initial OrderingPolicy Framework and FifoOrderingPolicy. + (Craig Welch via wangda) + + YARN-3326. Support RESTful API for getLabelsToNodes. (Naganarasimha G R + via ozawa) + + YARN-3354. Add node label expression in ContainerTokenIdentifier to support + RM recovery. (Wangda Tan via jianhe) + + YARN-1402. Update related Web UI and CLI with exposing client API to check + log aggregation status. (Xuan Gong via junping_du) + + YARN-3463. Integrate OrderingPolicy Framework with CapacityScheduler. + (Craig Welch via wangda) + + YARN-3410. YARN admin should be able to remove individual application + records from RMStateStore. (Rohith Sharmaks via wangda) + + YARN-3225. New parameter of CLI for decommissioning node gracefully in + RMAdmin CLI. (Devaraj K via junping_du) + + YARN-3366. Enhanced NodeManager to support classifying/shaping outgoing + network bandwidth traffic originating from YARN containers (Sidharta Seethana + via vinodkv) + + YARN-3319. Implement a FairOrderingPolicy. (Craig Welch via wangda) + + YARN-2498. Respect labels in preemption policy of capacity scheduler for + inter-queue preemption. (Wangda Tan via jianhe) + + YARN-2619. Added NodeManager support for disk io isolation through cgroups. + (Varun Vasudev and Wei Yan via vinodkv) + + YARN-3448. Added a rolling time-to-live LevelDB timeline store implementation. + (Jonathan Eagles via zjshen) + + YARN-3505. Node's Log Aggregation Report with SUCCEED should not cached in + RMApps. (Xuan Gong via junping_du) + + YARN-3541. Add version info on timeline service / generic history web UI + and REST API. (Zhijie Shen via xgong) + + YARN-160. Enhanced NodeManager to automatically obtain cpu/memory values from + underlying OS when configured to do so. (Varun Vasudev via vinodkv) + + YARN-41. The RM should handle the graceful shutdown of the NM. (Devaraj K via + junping_du) + + YARN-1012. Report NM aggregated container resource utilization in heartbeat. + (Inigo Goiri via kasha) + + YARN-3116. RM notifies NM whether a container is an AM container or normal + task container. (Giovanni Matteo Fumarola via zjshen) + + YARN-2003. Support for Application priority : Changes in RM and Capacity + Scheduler. (Sunil G via wangda) + + YARN-3656. LowCost: A Cost-Based Placement Agent for YARN Reservations. + (Jonathan Yaniv and Ishai Menache via curino) + + YARN-3852. Add docker container support to container-executor + (Abin Shahab via vvasudev) + + YARN-3853. Add docker container runtime support to LinuxContainterExecutor. + (Sidharta Seethana via vvasudev) + + YARN-3736. Add RMStateStore apis to store and load accepted reservations for + failover (adhoot via asuresh) + + YARN-3948. Display Application Priority in RM Web UI.(Sunil G via rohithsharmaks) + + YARN-3873. PendingApplications in LeafQueue should also use OrderingPolicy. + (Sunil G via wangda) + + YARN-3887. Support changing Application priority during runtime. (Sunil G + via jianhe) + + YARN-4023. Publish Application Priority to TimelineServer. (Sunil G + via rohithsharmaks) + + YARN-3534. Collect memory/cpu usage on the node. (Inigo Goiri via kasha) + + YARN-4055. Report node resource utilization in heartbeat. + (Inigo Goiri via kasha) + + YARN-2923. Support configuration based NodeLabelsProvider Service in Distributed + Node Label Configuration Setup. (Naganarasimha G R) + + YARN-221. NM should provide a way for AM to tell it not to aggregate logs. + (Ming Ma via xgong) + + YARN-4014. Support user cli interface in for Application Priority. + (Rohith Sharma K S via jianhe) + + YARN-3250. Support admin cli interface in for Application Priority. + (Rohith Sharma K S via jianhe) + + YARN-3970. Add REST api support for Application Priority. + (Naganarasimha G R via vvasudev) + + YARN-2884. Added a proxy service in NM to proxy the the communication + between AM and RM. (Kishore Chaliparambil via jianhe) + + YARN-313. Add Admin API for supporting node resource configuration in + command line. (Inigo Goiri, Kenji Kikushima and Junping Du + via junping_du) + + YARN-4034. Render cluster Max Priority in scheduler metrics in RM web + UI. (Rohith Sharma K S via jianhe) + + YARN-3212. RMNode State Transition Update with DECOMMISSIONING state. + (Junping Du via wangda) + + YARN-3866. AM-RM protocol changes to support container resizing. (Meng Ding + via jianhe) + + YARN-1449. AM-NM protocol changes to support container resizing. + (Meng Ding & Wangda Tan via jianhe) + + YARN-1645. ContainerManager implementation to support container resizing. + (Meng Ding & Wangda Tan via jianhe) + + YARN-3867. ContainerImpl changes to support container resizing. (Meng Ding + via jianhe) + + YARN-1643. Make ContainersMonitor support changing monitoring size of an + allocated container. (Meng Ding and Wangda Tan) + + YARN-1644. RM-NM protocol changes and NodeStatusUpdater implementation to + support container resizing. (Meng Ding via jianhe) + + YARN-3868. Recovery support for container resizing. (Meng Ding via jianhe) + + YARN-1651. CapacityScheduler side changes to support container resize. + (Wangda Tan via jianhe) + + YARN-1897. CLI and core support for signal container functionality. + (Ming Ma via xgong) + + YARN-261. Ability to fail AM attempts (Andrey Klochkov and + Rohith Sharma K S via jlowe) + + YARN-3964. Support NodeLabelsProvider at Resource Manager side. + (Dian Fu via devaraj) + + YARN-2556. Tool to measure the performance of the timeline server (Chang Li + via sjlee) + + YARN-4262. Allow whitelisted users to run privileged docker containers. + (Sidharta Seethana via vvasudev) + + YARN-3739. Add reservation system recovery to RM recovery process. + (Subru Krishnan via adhoot) + + YARN-2729. Support script based NodeLabelsProvider Interface in Distributed Node Label + Configuration Setup. (Naganarasimha G R via rohithsharmaks) + + YARN-1510. Make NMClient support change container resources. + (Meng Ding via wangda) + + YARN-1509. Make AMRMClient support send increase container request and + get increased/decreased containers. (Meng Ding via wangda) + + YARN-4184. Remove update reservation state api from state store as its not used by + ReservationSystem (Sean Po via asuresh) + + YARN-4349. Support CallerContext in YARN. (wtan via jianhe) + + YARN-3623. Add a new config to indicate the Timeline Service version. + (Xuan Gong via junping_du) + + YARN-3226. UI changes for decommissioning node. (Sunil G via + junping_du) + + YARN-3458. CPU resource monitoring in Windows. (Inigo Goiri via cnauroth) + + YARN-4234. New put APIs in TimelineClient for ats v1.5. (Xuan Gong via + junping_du) + + IMPROVEMENTS + + YARN-644. Basic null check is not performed on passed in arguments before + using them in ContainerManagerImpl.startContainer (Varun Saxena via bobby) + + YARN-1880. Cleanup TestApplicationClientProtocolOnHA + (ozawa via harsh) + + YARN-3357. Move TestFifoScheduler to FIFO package. (Rohith Sharmaks + via devaraj) + + YARN-3356. Capacity Scheduler FiCaSchedulerApp should use ResourceUsage to + track used-resources-by-label. (Wangda Tan via jianhe) + + YARN-3350. YARN RackResolver spams logs with messages at info level. + (Wilfred Spiegelenburg via junping_du) + + YARN-2868. FairScheduler: Metric for latency to allocate first container + for an application. (Ray Chiang via kasha) + + YARN-3397. yarn rmadmin should skip -failover. (J.Andreina via kasha) + + YARN-3288. Document and fix indentation in the DockerContainerExecutor code + + YARN-2495. Allow admin specify labels from each NM (Distributed + configuration for node label). (Naganarasimha G R via wangda) + + YARN-3258. FairScheduler: Need to add more logging to investigate + allocations. (Anubhav Dhoot via ozawa) + + YARN-3428. Debug log resources to be localized for a container. (kasha) + + YARN-3424. Change logs for ContainerMonitorImpl's resourse monitoring + from info to debug. (Anubhav Dhoot via ozawa) + + YARN-2901. Add errors and warning metrics page to RM, NM web UI. + (Varun Vasudev via wangda) + + YARN-3294. Allow dumping of Capacity Scheduler debug logs via + web UI for a fixed time period. (Varun Vasudev via xgong) + + YARN-3293. Track and display capacity scheduler health metrics + in web UI. (Varun Vasudev via xgong) + + YARN-3394. Enrich WebApplication proxy documentation. (Naganarasimha G R + via jianhe) + + YARN-3404. Display queue name on application page. (Ryu Kobayashi via jianhe) + + YARN-2696. Queue sorting in CapacityScheduler should consider node label. + (Wangda Tan via jianhe) + + YARN-3451. Display attempt start time and elapsed time on the web UI. + (Rohith Sharmaks via jianhe) + + YARN-3494. Expose AM resource limit and usage in CS QueueMetrics. (Rohith + Sharmaks via jianhe) + + YARN-3503. Expose disk utilization percentage and bad local and log dir + counts in NM metrics. (Varun Vasudev via jianhe) + + YARN-3511. Add errors and warnings page to ATS. (Varun Vasudev via xgong) + + YARN-3406. Display count of running containers in the RM's Web UI. + (Ryu Kobayashi via ozawa) + + YARN-3363. add localization and container launch time to ContainerMetrics + at NM to show these timing information for each active container. + (zxu via rkanter) + + YARN-3396. Handle URISyntaxException in ResourceLocalizationService. + (Brahma Reddy Battula via junping_du) + + YARN-3491. PublicLocalizer#addResource is too slow. (zxu via rkanter) + + YARN-3523. Cleanup ResourceManagerAdministrationProtocol interface audience. + (Naganarasimha G R via junping_du) + + YARN-3169. Drop YARN's overview document. (Brahma Reddy Battula via ozawa) + + YARN-2784. Make POM project names consistent. (Rohith via devaraj) + + YARN-20. More information for "yarn.resourcemanager.webapp.address" in + yarn-default.xml (Bartosz Ługowski vai tgraves) + + YARN-3593. Add label-type and Improve "DEFAULT_PARTITION" in Node Labels Page. + (Naganarasimha G R via wangda) + + YARN-3018. Unified the default value for the configuration property + yarn.scheduler.capacity.node-locality-delay in code and default xml file. + (Nijel SF via vinodkv) + + YARN-2331. Distinguish shutdown during supervision vs. shutdown for + rolling upgrade. (Jason Lowe via xgong) + + YARN-1050. Document the Fair Scheduler REST API. + (Kenji Kikushima and Roman Shaposhnik via kasha) + + YARN-3271. FairScheduler: Move tests related to max-runnable-apps from + TestFairScheduler to TestAppRunnability. (nijel via kasha) + + YARN-1912. ResourceLocalizer started without any jvm memory control. + (Masatake Iwasaki via xgong) + + YARN-1287. Consolidate MockClocks. + (Sebastian Wong and Anubhav Dhoot via kasha) + + YARN-3395. FairScheduler: Trim whitespaces when using username for + queuename. (Zhihai Xu via kasha) + + YARN-3587. Fix the javadoc of DelegationTokenSecretManager in yarn, etc. + projects. (Gabor Liptak via junping_du) + + YARN-3513. Remove unused variables in ContainersMonitorImpl and add debug + log for overall resource usage by all containers. (Naganarasimha G R via devaraj) + + YARN-3613. TestContainerManagerSecurity should init and start Yarn cluster in + setup instead of individual methods. (nijel via kasha) + + YARN-3579. CommonNodeLabelsManager should support NodeLabel instead of string + label name when getting node-to-label/label-to-label mappings. (Sunil G via wangda) + + YARN-3521. Support return structured NodeLabel objects in REST API (Sunil G via wangda) + + YARN-3362. Add node label usage in RM CapacityScheduler web UI. + (Naganarasimha G R via wangda) + + YARN-3565. NodeHeartbeatRequest/RegisterNodeManagerRequest should use + NodeLabel object instead of String. (Naganarasimha G R via wangda) + + YARN-3583. Support of NodeLabel object instead of plain String + in YarnClient side. (Sunil G via wangda) + + YARN-3684. Changed ContainerExecutor's primary lifecycle methods to use a more + extensible mechanism of context objects. (Sidharta Seethana via vinodkv) + + YARN-3594. WintuilsProcessStubExecutor.startStreamReader leaks streams. + (Lars Francke via junping_du) + + YARN-3647. RMWebServices api's should use updated api from CommonNodeLabelsManager + to get NodeLabel object. (Sunil G via wangda) + + YARN-3581. Deprecate -directlyAccessNodeLabelStore in RMAdminCLI. + (Naganarasimha G R via wangda) + + YARN-3722. Merge multiple TestWebAppUtils into o.a.h.yarn.webapp.util.TestWebAppUtils. + (Masatake Iwasaki via devaraj) + + YARN-3713. Remove duplicate function call storeContainerDiagnostics in + ContainerDiagnosticsUpdateTransition (zxu via rkanter) + + YARN-3467. Expose allocatedMB, allocatedVCores, and runningContainers metrics on + running Applications in RM Web UI. (Anubhav Dhoot via kasha) + + YARN-2392. Add more diags about app retry limits on AM failures. (Steve + Loughran via jianhe) + + YARN-1462. AHS API and other AHS changes to handle tags for completed MR jobs. (xgong) + + YARN-2716. Refactor ZKRMStateStore retry code with Apache Curator. + (Karthik Kambatla via jianhe) + + YARN-3786. Document yarn class path options. + (Brahma Reddy Battula via cnauroth) + + YARN-3787. Allowed generic history service to load a number of applications whose + started time is within the given range. (Xuan Gong via zjshen) + + YARN-3785. Support for Resource as an argument during submitApp call in MockRM + test class. (Sunil G via xgong) + + YARN-3789. Improve logs for LeafQueue#activateApplications(). + (Bibin A Chundatt via devaraj) + + YARN-3148. Allow CORS related headers to passthrough in WebAppProxyServlet. + (Varun Saxena via devaraj) + + YARN-3834. Scrub debug logging of tokens during resource localization. + (Chris Nauroth via xgong) + + YARN-3827. Migrate YARN native build to new CMake framework (Alan Burlison + via Colin P. McCabe) + + YARN-3800. Reduce storage footprint for ReservationAllocation. (Anubhav Dhoot + via curino) + + YARN-3069. Document missing properties in yarn-default.xml. + (Ray Chiang via aajisaka) + + YARN-3844. Make hadoop-yarn-project Native code -Wall-clean (Alan Burlison + via Colin P. McCabe) + + YARN-3026. Move application-specific container allocation logic from + LeafQueue to FiCaSchedulerApp. (Wangda Tan via jianhe) + + YARN-3950. Add unique SHELL_ID environment variable to DistributedShell + (Robert Kanter via jlowe) + + YARN-3965. Add startup timestamp to nodemanager UI (Hong Zhiguo via jlowe) + + YARN-3543. ApplicationReport should be able to tell whether the Application + is AM managed or not. (Rohith Sharma K S via xgong) + + YARN-4004. container-executor should print output of docker logs if the docker + container exits with non-0 exit status. (Varun Vasudev via xgong) + + YARN-3961. Expose pending, running and reserved containers of a queue in REST + api and yarn top (adhoot via asuresh) + + YARN-4019. Add JvmPauseMonitor to ResourceManager and NodeManager. (Robert Kanter + via junping_du) + + YARN-3974. Refactor the reservation system test cases to use parameterized + base test. (subru via curino) + + YARN-3966. Fix excessive loggings in CapacityScheduler. (Jian He via wangda) + + YARN-4026. Refactored ContainerAllocator to accept a list of priorites + rather than a single priority. (Wangda Tan via jianhe) + + YARN-4031. Add JvmPauseMonitor to ApplicationHistoryServer and + WebAppProxyServer (djp via rkanter) + + YARN-4057. If ContainersMonitor is not enabled, only print + related log info one time. (Jun Gong via zxu) + + YARN-1556. NPE getting application report with a null appId. (Weiwei Yang via + junping_du) + + YARN-4121. Fix typos in capacity scheduler documentation. + (Kai Sasaki via vvasudev) + + YARN-4086. Allow Aggregated Log readers to handle HAR files (rkanter) + + YARN-4145. Make RMHATestBase abstract so its not run when running all + tests under that namespace (adhoot via rkanter) + + YARN-2005. Blacklisting support for scheduling AMs. (Anubhav Dhoot via kasha) + + YARN-3717. Expose app/am/queue's node-label-expression to RM web UI / + CLI / REST-API. (Naganarasimha G R via wangda) + + YARN-4149. yarn logs -am should provide an option to fetch all the log files + (Varun Vasudev via xgong) + + YARN-2597. MiniYARNCluster should propagate reason for AHS not starting. + (stevel) + + MAPREDUCE-6478. Add an option to skip cleanupJob stage or ignore cleanup + failure during commitJob. (Junping Du via wangda) + + YARN-3920. FairScheduler container reservation on a node should be + configurable to limit it to large containers (adhoot via asuresh) + + HADOOP-12428. Fix inconsistency between log-level guards and statements. + (Jagadesh Kiran N and Jackie Chang via ozawa) + + YARN-4095. Avoid sharing AllocatorPerContext object in LocalDirAllocator + between ShuffleHandler and LocalDirsHandlerService. (Zhihai Xu via jlowe) + + YARN-4176. Resync NM nodelabels with RM periodically for distributed nodelabels. + (Bibin A Chundatt via wangda) + + YARN-4215. RMNodeLabels Manager Need to verify and replace node labels for the + only modified Node Label Mappings in the request. (Naganarasimha G R via wangda) + + YARN-3943. Use separate threshold configurations for disk-full detection + and disk-not-full detection. (Zhihai Xu via jlowe) + + YARN-4252. Log container-executor invocation details when exit code is non-zero. + (Sidharta Seethana via vvasudev) + + YARN-4258. Add support for controlling capabilities for docker containers. + (Sidharta Seethana via vvasudev) + + YARN-4162. CapacityScheduler: Add resource usage by partition and queue capacity + by partition to REST API. (Naganarasimha G R via wangda) + + YARN-4170. AM need to be notified with priority in AllocateResponse. + (Sunil G via jianhe) + + YARN-4267. Add additional logging to container launch implementations in + container-executor. (Sidharta Seethana via vvasudev) + + YARN-3985. Make ReservationSystem persist state using RMStateStore + reservation APIs. (adhoot via asuresh) + + YARN-4243. Add retry on establishing Zookeeper conenction in + EmbeddedElectorService#serviceInit. (Xuan Gong via junping_du) + + YARN-2913. Fair scheduler should have ability to set MaxResourceDefault for + each queue. (Siqi Li via mingma) + + YARN-4296. DistributedShell Log.info is not friendly. + (Xiaowei Wang via stevel) + + YARN-3738. Add support for recovery of reserved apps running under dynamic + queues (subru via asuresh) + + YARN-4285. Display resource usage as percentage of queue and cluster in the + RM UI (Varun Vasudev via wangda) + + YARN-3216. Max-AM-Resource-Percentage should respect node labels. + (Sunil G via wangda) + + YARN-4310. FairScheduler: Log skipping reservation messages at DEBUG level (asuresh) + + YARN-4279. Mark ApplicationId and ApplicationAttemptId static methods as @Public, + @Unstable. (stevel) + + YARN-3454. Add efficient merge operation to RLESparseResourceAllocation + (Carlo Curino via asuresh) + + YARN-3980. Plumb resource-utilization info in node heartbeat through to the + scheduler. (Inigo Goiri via kasha) + + YARN-4132. Separate configs for nodemanager to resourcemanager connection + timeout and retries (Chang Li via jlowe) + + YARN-4292. ResourceUtilization should be a part of NodeInfo REST API. + (Sunil G via wangda) + + YARN-4405. Support node label store in non-appendable file system. (Wangda + Tan via jianhe) + + YARN-4358. Reservation System: Improve relationship between SharingPolicy + and ReservationAgent. (Carlo Curino via asuresh) + + YARN-3456. Improve handling of incomplete TimelineEntities. (Varun Saxena + via rohithsharmaks) + + YARN-4248. REST API for submit/update/delete Reservations. (curino) + + YARN-3946. Update exact reason as to why a submitted app is in ACCEPTED state to + app's diagnostic message. (Naganarasimha G R via wangda) + + YARN-4309. Add container launch related debug information to container logs + when a container fails. (Varun Vasudev via wangda) + + YARN-4418. AM Resource Limit per partition can be updated to ResourceUsage as well. + (Sunil G via wangda) + + YARN-4207. Add a non-judgemental YARN app completion status. (Rich Haase via sseth) + + YARN-4293. ResourceUtilization should be a part of yarn node CLI. (Sunil G via wangda) + + YARN-4164. Changed updateApplicationPriority API to return the updated + application priority. (Rohith Sharma K S via jianhe) + + YARN-4480. Clean up some inappropriate imports. (Kai Zheng via umamahesh) + + YARN-4290. Add -showDetails option to YARN Nodes CLI to print all nodes reports + information. (Sunil G via wangda) + + OPTIMIZATIONS + + YARN-3339. TestDockerContainerExecutor should pull a single image and not + the entire centos repository. (Ravindra Kumar Naik via raviprak) + + YARN-3469. ZKRMStateStore: Avoid setting watches that are not required. + (Jun Gong via kasha) + + YARN-3006. Improve the error message when attempting manual failover with + auto-failover enabled. (Akira AJISAKA via wangda) + + YARN-3547. FairScheduler: Apps that have no resource demand should not participate + scheduling. (Xianyin Xin via kasha) + + YARN-3259. FairScheduler: Trigger fairShare updates on node events. + (Anubhav Dhoot via kasha) + + YARN-2768. Avoid cloning Resource in FSAppAttempt#updateDemand. + (Hong Zhiguo via kasha) + + YARN-3983. Refactored CapacityScheduleri#FiCaSchedulerApp to easier extend + container allocation logic. (Wangda Tan via jianhe) + + YARN-3635. Refactored current queue mapping implementation in CapacityScheduler + to use a generic PlacementManager framework. (Wangda Tan via jianhe) + + YARN-4066. Large number of queues choke fair scheduler. + (Johan Gustavsson via kasha) + + BUG FIXES + + YARN-3197. Confusing log generated by CapacityScheduler. (Varun Saxena + via devaraj) + + YARN-3305. Normalize AM resource request on app submission. (Rohith Sharmaks + via jianhe) + + YARN-3205 FileSystemRMStateStore should disable FileSystem Cache to avoid + get a Filesystem with an old configuration. (Zhihai Xu via ozawa) + + YARN-3269. Yarn.nodemanager.remote-app-log-dir could not be configured to + fully qualified path. (Xuan Gong via junping_du) + + YARN-3241. FairScheduler handles "invalid" queue names inconsistently. + (Zhihai Xu via kasha) + + YARN-3383. AdminService should use "warn" instead of "info" to log exception + when operation fails. (Li Lu via wangda) + + YARN-3400. [JDK 8] Build Failure due to unreported exceptions in + RPCUtil (rkanter) + + YARN-3412. RM tests should use MockRM where possible. (kasha) + + YARN-3425. NPE from RMNodeLabelsManager.serviceStop when + NodeLabelsManager.serviceInit failed. (Bibin A Chundatt via wangda) + + YARN-3415. Non-AM containers can be counted towards amResourceUsage of a + Fair Scheduler queue (Zhihai Xu via Sandy Ryza) + + YARN-3435. AM container to be allocated Appattempt AM container shown as null. + (Bibin A Chundatt via xgong) + + YARN-2666. TestFairScheduler.testContinuousScheduling fails Intermittently. + (Zhihai Xu via ozawa) + + YARN-3429. TestAMRMTokens.testTokenExpiry fails Intermittently with + error message:Invalid AMRMToken (zxu via rkanter) + + YARN-3110. Few issues in ApplicationHistory web ui. (Naganarasimha G R via xgong) + + YARN-3459. Fix failiure of TestLog4jWarningErrorMetricsAppender. + (Varun Vasudev via wangda) + + YARN-3266. RMContext#inactiveNodes should have NodeId as map key. + (Chengbing Liu via jianhe) + + YARN-3436. Fix URIs in documantion of YARN web service REST APIs. + (Bibin A Chundatt via ozawa) + + YARN-3021. YARN's delegation-token handling disallows certain trust setups + to operate properly over DistCp. (Yongjun Zhang via jianhe) + + YARN-3136. Fixed a synchronization problem of + AbstractYarnScheduler#getTransferredContainers. (Sunil G via jianhe) + + YARN-3495. Confusing log generated by FairScheduler. + (Brahma Reddy Battula via ozawa) + + YARN-3387. Previous AM's container completed status couldn't pass to current + AM if AM and RM restarted during the same time. (sandflee via jianhe) + + YARN-3444. Fix typo capabililty. (Gabor Liptak via aajisaka) + + YARN-3530. ATS throws exception on trying to filter results without otherinfo. + (zhijie shen via xgong) + + YARN-2740. Fix NodeLabelsManager to properly handle node label modifications + when distributed node label configuration enabled. (Naganarasimha G R via wangda) + + YARN-3517. RM web ui for dumping scheduler logs should be for admins only + (Varun Vasudev via tgraves) + + YARN-3533. Test: Fix launchAM in MockRM to wait for attempt to be scheduled. + (Anubhav Dhoot via jianhe) + + YARN-3564. Fix TestContainerAllocation.testAMContainerAllocationWhenDNSUnavailable + fails randomly. (Jian He via wangda) + + YARN-2893. AMLaucher: sporadic job failures due to EOFException in + readTokenStorageStream. (Zhihai Xu via gera) + + YARN-2454. Fix compareTo of variable UNBOUNDED in o.a.h.y.util.resource.Resources. + (Xu Yang via junping_du) + + YARN-1993. Cross-site scripting vulnerability in TextView.java. (Kenji Kikushima + via ozawa) + + YARN-3097. Logging of resource recovery on NM restart has redundancies + (Eric Payne via jlowe) + + YARN-3375. NodeHealthScriptRunner.shouldRun() check is performing 3 times for + starting NodeHealthScriptRunner. (Devaraj K via wangda) + + YARN-2725. Added test cases of retrying creating znode in ZKRMStateStore. + (Tsuyoshi Ozawa via jianhe) + + YARN-3552. RM Web UI shows -1 running containers for completed apps + (Rohith via jlowe) + + YARN-2123. Progress bars in Web UI always at 100% due to non-US locale. + (Akira AJISAKA via xgong) + + YARN-3343. Increased TestCapacitySchedulerNodeLabelUpdate#testNodeUpdate + timeout. (Rohith Sharmaks via jianhe) + + YARN-3582. NPE in WebAppProxyServlet. (jian he via xgong) + + YARN-3577. Misspelling of threshold in log4j.properties for tests. + (Brahma Reddy Battula via aajisaka) + + YARN-3584. Fixed attempt diagnostics format shown on the UI. (nijel via + jianhe) + + YARN-1832. Fix wrong MockLocalizerStatus#equals implementation. + (Hong Zhiguo via aajisaka) + + YARN-3572. Correct typos in WritingYarnApplications.md. + (Gabor Liptak via aajisaka) + + YARN-3592. Fix typos in RMNodeLabelsManager. (Sunil G via devaraj) + + YARN-3589. RM and AH web UI display DOCTYPE wrongly. (Rohith via ozawa) + + YARN-3600. AM container link is broken (Naganarasimha G R via tgraves) + + YARN-3604. Fixed ZKRMStateStore#removeApplication to also disable watch. + (zhihai xu via jianhe) + + YARN-3473. Fix RM Web UI configuration for some properties (rchiang via rkanter) + + YARN-2206. Updated document for applications REST API response examples. (Kenji + Kikushima and Brahma Reddy Battula via zjshen) + + YARN-3602. TestResourceLocalizationService.testPublicResourceInitializesLocalDir + fails Intermittently due to IOException from cleanup. (zhihai xu via xgong) + + YARN-3629. NodeID is always printed as "null" in node manager initialization log. + (nijel via devaraj) + + YARN-2921. Fix MockRM/MockAM#waitForState sleep too long. + (Tsuyoshi Ozawa via wangda) + + YARN-1519. Check in container-executor if sysconf is implemented before + using it (Radim Kolar and Eric Payne via raviprak) + + YARN-2421. RM still allocates containers to an app in the FINISHING + state (Chang Li via jlowe) + + YARN-3302. TestDockerContainerExecutor should run automatically if it can + detect docker in the usual place (Ravindra Kumar Naik via raviprak) + + YARN-2821. Fixed a problem that DistributedShell AM may hang if restarted. + (Varun Vasudev via jianhe) + + YARN-3654. ContainerLogsPage web UI should not have meta-refresh. (Xuan Gong + via jianhe) + + YARN-3707. RM Web UI queue filter doesn't work. (Wangda Tan via jianhe) + + YARN-3632. Ordering policy should be allowed to reorder an application when + demand changes. (Craig Welch via jianhe) + + YARN-3716. Node-label-expression should be included by + ResourceRequestPBImpl.toString. (Xianyin Xin via wangda) + + YARN-3751. Fixed AppInfo to check if used resources are null. (Sunil G via + zjshen) + + YARN-3762. FairScheduler: CME on FSParentQueue#getQueueUserAclInfo. (kasha) + + YARN-3749. We should make a copy of configuration when init MiniYARNCluster + with multiple RMs. (Chun Chen via xgong) + + YARN-3766. Fixed the apps table column error of generic history web UI. + (Xuan Gong via zjshen) + + YARN-3655. FairScheduler: potential livelock due to maxAMShare limitation + and container reservation. (Zhihai Xu via kasha) + + YARN-3747. TestLocalDirsHandlerService should delete the created test directory logDir2. + (David Moore via devaraj) + + YARN-3778. Fix Yarn resourcemanger CLI usage. (Brahma Reddy Battula via xgong) + + YARN-3794. TestRMEmbeddedElector fails because of ambiguous LOG reference. + (Chengbing Liu via devaraj) + + YARN-3714. AM proxy filter can not get RM webapp address from + yarn.resourcemanager.hostname.rm-id. (Masatake Iwasaki via xgong) + + YARN-3617. Fix WindowsResourceCalculatorPlugin.getCpuFrequency() + returning always -1. (J.Andreina via devaraj) + + YARN-3824. Fix two minor nits in member variable properties + of YarnConfiguration. (Ray Chiang via devaraj) + + YARN-3835. hadoop-yarn-server-resourcemanager test package bundles + core-site.xml, yarn-site.xml (vamsee via rkanter) + + YARN-3790. usedResource from rootQueue metrics may get stale data for FS + scheduler after recovering the container (Zhihai Xu via rohithsharmaks) + + YARN-3826. Race condition in ResourceTrackerService leads to + wrong diagnostics messages. (Chengbing Liu via devaraj) + + YARN-3745. SerializedException should also try to instantiate internal + exception with the default constructor. (Lavkesh Lahngir via devaraj) + + YARN-2871. TestRMRestart#testRMRestartGetApplicationList sometime fails in trunk. + (zhihai xu via xgong) + + YARN-3859. LeafQueue doesn't print user properly for application add. + (Varun Saxena via devaraj) + + YARN-3860. rmadmin -transitionToActive should check the state of non-target node. + (Masatake Iwasaki via junping_du) + + YARN-3695. ServerProxy (NMProxy, etc.) shouldn't retry forever for non + network exception. (Raju Bairishetti via jianhe) + + YARN-3770. SerializedException should also handle java.lang.Error on + de-serialization. (Lavkesh Lahngir via jianhe) + + YARN-3768. ArrayIndexOutOfBoundsException with empty environment variables. + (Zhihai Xu via gera) + + YARN-3823. Fix mismatch in default values for + yarn.scheduler.maximum-allocation-vcores property. (Ray Chiang via devaraj) + + YARN-3830. AbstractYarnScheduler.createReleaseCache may try to clean a null + attempt. (nijel via devaraj) + + YARN-3875. FSSchedulerNode#reserveResource() doesn't print Application Id + properly in log. (Bibin A Chundatt via devaraj) + + YARN-3882. AggregatedLogFormat should close aclScanner and ownerScanner + after create them. (zhihai xu via xgong) + + YARN-3837. javadocs of TimelineAuthenticationFilterInitializer give wrong + prefix for auth options. (Bibin A Chundatt via devaraj) + + YARN-2194. Fix bug causing CGroups functionality to fail on RHEL7. + (Wei Yan via vvasudev) + + YARN-3892. Fixed NPE on RMStateStore#serviceStop when + CapacityScheduler#serviceInit fails. (Bibin A Chundatt via jianhe) + + YARN-3888. ApplicationMaster link is broken in RM WebUI when appstate is NEW. + (Bibin A Chundatt via xgong) + + YARN-3917. getResourceCalculatorPlugin for the default should intercept all + exceptions. (gera) + + YARN-3894. RM startup should fail for wrong CS xml NodeLabel capacity + configuration. (Bibin A Chundatt via wangda) + + YARN-3381. Fix typo InvalidStateTransitonException. + (Brahma Reddy Battula via aajisaka) + + YARN-3453. Ensure preemption logic in FairScheduler uses DominantResourceCalculator + in DRF queues to prevent unnecessary thrashing. (asuresh) + + YARN-3174. Consolidate the NodeManager and NodeManagerRestart documentation + into one. (Masatake Iwasaki via ozawa) + + YARN-3805. Update the documentation of Disk Checker based on YARN-90. + (Masatake Iwasaki via ozawa) + + YARN-3930. FileSystemNodeLabelsStore should make sure edit log file closed when + exception is thrown. (Dian Fu via wangda) + + YARN-3885. ProportionalCapacityPreemptionPolicy doesn't preempt if queue is + more than 2 level. (Ajith S via wangda) + + YARN-3932. SchedulerApplicationAttempt#getResourceUsageReport and UserInfo + should based on total-used-resources. (Bibin A Chundatt via wangda) + + YARN-3954. Fix TestYarnConfigurationFields#testCompareConfigurationClassAgainstXml. + (varun saxena via rohithsharmaks) + + YARN-3956. Fix TestNodeManagerHardwareUtils fails on Mac (Varun Vasudev via wangda) + + YARN-3941. Proportional Preemption policy should try to avoid sending duplicate + PREEMPT_CONTAINER event to scheduler. (Sunil G via wangda) + + YARN-3900. Protobuf layout of yarn_security_token causes errors in other protos + that include it (adhoot via rkanter) + + YARN-3845. Scheduler page does not render RGBA color combinations in IE11. + (Contributed by Mohammad Shahid Khan) + + YARN-3957. FairScheduler NPE In FairSchedulerQueueInfo causing scheduler page to + return 500. (Anubhav Dhoot via kasha) + + YARN-3973. Recent changes to application priority management break + reservation system from YARN-1051. (Carlo Curino via wangda) + + YARN-3958. TestYarnConfigurationFields should be moved to hadoop-yarn-api + module. (Varun Saxena via aajisaka) + + YARN-3846. RM Web UI queue filter is not working for sub queue. + (Mohammad Shahid Khan via jianhe) + + YARN-3982. container-executor parsing of container-executor.cfg broken in + trunk and branch-2. (Varun Vasudev via xgong) + + YARN-3919. NPEs' while stopping service after exception during + CommonNodeLabelsManager#start. (varun saxane via rohithsharmaks) + + YARN-3963. AddNodeLabel on duplicate label addition shows success. + (Bibin A Chundatt via wangda) + + YARN-3971. Skip RMNodeLabelsManager#checkRemoveFromClusterNodeLabelsOfQueue + on nodelabel recovery. (Bibin A Chundatt via wangda) + + YARN-433. When RM is catching up with node updates then it should not expire + acquired containers. (Xuan Gong via zxu) + + YARN-3992. TestApplicationPriority.testApplicationPriorityAllocation fails + intermittently. (Contributed by Sunil G) + + YARN-3987. Send AM container completed msg to NM once AM finishes. + (sandflee via jianhe) + + YARN-4028. AppBlock page key update and diagnostics value null on recovery + (Bibin A Chundatt via xgong) + + YARN-3986. getTransferredContainers in AbstractYarnScheduler should be present + in YarnScheduler interface instead. (Varun Saxena via rohithsharmaks) + + YARN-4082. Container shouldn't be killed when node's label updated. + (Wangda Tan via vvasudev) + + YARN-4073. Removed unused ApplicationACLsManager in ContainerManagerImpl constructor. + (Naganarasimha G R via rohithsharmaks) + + YARN-4024. YARN RM should avoid unnecessary resolving IP when NMs doing heartbeat. + (Hong Zhiguo via wangda) + + YARN-3591. Resource localization on a bad disk causes subsequent containers failure. + (Lavkesh Lahngir via vvasudev) + + YARN-4106. NodeLabels for NM in distributed mode is not updated even after + clusterNodelabel addition in RM. (Bibin A Chundatt via wangda) + + YARN-4115. Reduce loglevel of ContainerManagementProtocolProxy to Debug + (adhoot via rkanter) + + YARN-4151. Fix findbugs errors in hadoop-yarn-server-common module. + (Meng Ding via wangda) + + YARN-4078. Add getPendingResourceRequestForAttempt in YarnScheduler interface. + (Naganarasimha G R via jianhe) + + YARN-4135. Improve the assertion message in MockRM while failing after waiting for the state. + (Nijel S F via rohithsharmaks) + + YARN-4167. NPE on RMActiveServices#serviceStop when store is null. (Bibin A Chundatt via rohithsharmaks) + + YARN-4113. RM should respect retry-interval when uses RetryPolicies.RETRY_FOREVER. + (Sunil G via wangda) + + YARN-4188. Make MoveApplicationAcrossQueues abstract, newInstance static. + (Giovanni Matteo Fumarola via cdouglas) + + YARN-4171. Fix findbugs warnings in YARN-1197 branch. (Wangda Tan via jianhe) + + YARN-4152. NodeManager crash with NPE when LogAggregationService#stopContainer called for + absent container. (Bibin A Chundatt via rohithsharmaks) + + YARN-4044. Running applications information changes such as movequeue is not published to + TimeLine server. (Sunil G via rohithsharmaks) + + YARN-4204. ConcurrentModificationException in FairSchedulerQueueInfo. (adhoot) + + YARN-4141. Runtime Application Priority change should not throw exception + for applications at finishing states (Sunil G via jlowe) + + YARN-4228. FileSystemRMStateStore use IOUtils#close instead of fs#close. (Bibin A Chundatt via rohithsharmaks) + + YARN-4235. FairScheduler PrimaryGroup does not handle empty groups returned + for a user. (Anubhav Dhoot via rohithsharmaks) + + YARN-4140. RM container allocation delayed incase of app submitted to + Nodelabel partition. (Bibin A Chundatt via wangda) + + YARN-4201. AMBlacklist does not work for minicluster. (Jun Gong via zxu) + + YARN-4230. RM crashes with NPE when increasing container resource if there is no headroom left. + (Meng Ding via jianhe) + + YARN-4017. container-executor overuses PATH_MAX. (Sidharta Seethana via vvasudev) + + YARN-4253. Standardize on using PrivilegedOperationExecutor for all + invocations of container-executor in LinuxContainerExecutor. (Sidharta Seethana via vvasudev) + + YARN-4255. container-executor does not clean up docker operation command files. + (Sidharta Seethana via vvasudev) + + YARN-4250. NPE in AppSchedulingInfo#isRequestLabelChanged. (Brahma Reddy Battula via rohithsharmaks) + + YARN-4000. RM crashes with NPE if leaf queue becomes parent queue during restart. + (Varun Saxena via jianhe) + + YARN-4155. TestLogAggregationService.testLogAggregationServiceWithInterval failing + (Bibin A Chundatt via stevel) + + YARN-4270. Limit application resource reservation on nodes for non-node/rack + specific requests (asuresh) + + YARN-4256. YARN fair scheduler vcores with decimal values. (Jun Gong via zxu) + + YARN-4294. [JDK8] Fix javadoc errors caused by wrong reference and illegal + tag. (aajisaka) + + YARN-4289. TestDistributedShell failing with bind exception. + (Brahma Reddy Battula via stevel) + + YARN-3724. Use POSIX nftw(3) instead of fts(3) (Alan Burlison via aw) + + YARN-4246. NPE while listing app attempt. (Nijel S F via rohithsharmaks) + + YARN-3528. Tests with 12345 as hard-coded port break jenkins. + (Brahma Reddy Battula via ozawa) + + YARN-4223. Fixed findbugs warnings in hadoop-yarn-server-nodemanager project + (varun saxena via rohithsharmaks) + + YARN-4284. condition for AM blacklisting is too narrow (Sangjin Lee via + jlowe) + + YARN-4169. Fix racing condition of TestNodeStatusUpdaterForLabels. + (Naganarasimha G R via wangda) + + YARN-4300. [JDK8] Fix javadoc errors caused by wrong tags. (aajisaka) + + YARN-3573. MiniMRYarnCluster constructor that starts the timeline server + using a boolean should be marked deprecated. (Brahma Reddy Battula via ozawa) + + YARN-4302. SLS not able start due to NPE in SchedulerApplicationAttempt + (Bibin A Chundatt via vvasudev) + + YARN-4251. TestAMRMClientOnRMRestart#testAMRMClientOnAMRMTokenRollOverOnRMRestart + is failing. (Brahma Reddy Battula via ozawa) + + YARN-4130. Duplicate declaration of ApplicationId in RMAppManager#submitApplication method. + (Kai Sasaki via rohithsharmaks) + + YARN-4288. Fixed RMProxy to retry on IOException from local host. + (Junping Du via jianhe) + + YARN-4127. RM fail with noAuth error if switched from failover to non-failover. + (Varun Saxena via jianhe) + + YARN-4345. yarn rmadmin -updateNodeResource doesn't work (Junping Du via + jlowe) + + YARN-4367. SLS webapp doesn't load. (kasha). + + YARN-4298. Fix findbugs warnings in hadoop-yarn-common. + (Sunil G via aajisaka) + + YARN-4387. Fix typo in FairScheduler log message. (Xin Wang via ozawa) + + YARN-4384. updateNodeResource CLI should not accept negative values for resource. + (Junping Du via wangda) + + YARN-4408. Fix issue that NodeManager reports negative running containers. + (Robert Kanter via junping_du) + + YARN-4392. ApplicationCreatedEvent event time resets after RM restart/failover. + (Naganarasimha G R and Xuan Gong via xgong) + + YARN-4431. Not necessary to do unRegisterNM() if NM get stop due to failed to connect + to RM. (Junping Du via rohithsharmaks) + + YARN-4421. Remove dead code in RmAppImpl.RMAppRecoveredTransition. + (Daniel Templeton via rohithsharmaks) + + YARN-4403. (AM/NM/Container)LivelinessMonitor should use monotonic time + when calculating period. (Junping Du via jianhe) + + YARN-4402. TestNodeManagerShutdown And TestNodeManagerResync fails with + bind exception. (Brahma Reddy Battula via jianhe) + + YARN-4440. FSAppAttempt#getAllowedLocalityLevelByTime should init the + lastScheduler time. (Lin Yiqun via zxu) + + YARN-4452. NPE when submit Unmanaged application. (Naganarasimha G R + via junping_du) + + YARN-4225. Add preemption status to yarn queue -status for capacity scheduler. + (Eric Payne via wangda) + + YARN-4416. Deadlock due to synchronised get Methods in AbstractCSQueue. + (Naganarasimha G R via wangda) + + YARN-4461. Redundant nodeLocalityDelay log in LeafQueue (Eric Payne via + jlowe) + + YARN-4454. NM to nodelabel mapping going wrong after RM restart. + (Bibin A Chundatt via wangda) + + YARN-4477. FairScheduler: Handle condition which can result in an + infinite loop in attemptScheduling. (Tao Jie via asuresh) + + YARN-4400. AsyncDispatcher.waitForDrained should be final. (Daniel Templeton + via junping_du) + +Release 2.7.3 - UNRELEASED + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + YARN-4287. Capacity Scheduler: Rack Locality improvement (Nathan Roberts via wangda) + + OPTIMIZATIONS + + BUG FIXES + + YARN-4326. Fix TestDistributedShell timeout as AHS in MiniYarnCluster no longer + binds to default port 8188. (Meng Ding via wangda) + + YARN-4347. Resource manager fails with Null pointer exception. (Jian He via wangda) + + YARN-4374. RM capacity scheduler UI rounds user limit factor (Chang Li via + jlowe) + + YARN-3849. Too much of preemption activity causing continuos killing of + containers across queues. (Sunil G via wangda) + + YARN-3769. Consider user limit when calculating total pending resource for + preemption policy in Capacity Scheduler. (Eric Payne via wangda) + + YARN-4344. NMs reconnecting with changed capabilities can lead to wrong + cluster resource calculations (Varun Vasudev via jlowe) + + YARN-4365. FileSystemNodeLabelStore should check for root dir existence on + startup (Kuhu Shukla via jlowe) + + YARN-4380. TestResourceLocalizationService.testDownloadingResourcesOnContainerKill + fails intermittently. (Varun Saxena via ozawa) + + YARN-4398. Remove unnecessary synchronization in RMStateStore. (Ning Ding via jianhe) + + YARN-3840. Resource Manager web ui issue when sorting application by id + (with application having id > 9999) (Mohammad Shahid Khan & Varun Saxena + via jianhe) + + YARN-4422. Generic AHS sometimes doesn't show started, node, or logs on App page + (Eric Payne via jeagles) + + YARN-4439. Clarify NMContainerStatus#toString method. (Jian He via xgong) + + YARN-4452. NPE when submit Unmanaged application. (Naganarasimha G R via + junping_du) + +Release 2.7.2 - UNRELEASED + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + YARN-4009. CORS support for ResourceManager REST API. ( Varun Vasudev via jeagles) + + YARN-3170. YARN architecture document needs updating. (Brahma Reddy Battula + via ozawa) + + YARN-3967. Fetch the application report from the AHS if the RM does not know about it. + (Mit Desai via xgong) + + YARN-2801. Add documentation for node labels feature. (Wangda Tan and Naganarasimha + G R via ozawa) + + YARN-2513. Host framework UIs in YARN for use with the ATS (jeagles) + + OPTIMIZATIONS + + BUG FIXES + + YARN-3793. Several NPEs when deleting local files on NM recovery (Varun + Saxena via jlowe) + + YARN-3508. Prevent processing preemption events on the main RM dispatcher. + (Varun Saxena via wangda) + + YARN-3690. [JDK8] 'mvn site' fails. (Brahma Reddy Battula via aajisaka) + + YARN-3905. Application History Server UI NPEs when accessing apps run after + RM restart (Eric Payne via jeagles) + + YARN-3535. Scheduler must re-request container resources when RMContainer transitions + from ALLOCATED to KILLED (rohithsharma and peng.zhang via asuresh) + + YARN-3878. AsyncDispatcher can hang while stopping if it is configured for + draining events on stop. (Varun Saxena via jianhe) + + YARN-3969. Allow jobs to be submitted to reservation that is active + but does not have any allocations. (subru via curino) + + YARN-3925. ContainerLogsUtils#getContainerLogFile fails to read container + log files from full disks. (zhihai xu via jlowe) + + YARN-3857: Memory leak in ResourceManager with SIMPLE mode. + (mujunchao via zxu) + + YARN-3893. Both RM in active state when Admin#transitionToActive failure + from refeshAll() (Bibin A Chundatt via rohithsharmaks) + + YARN-4103. RM WebServices missing scheme for appattempts logLinks. + (Jonathan Eagles via vvasudeb) + + YARN-4105. Capacity Scheduler headroom for DRF is wrong (Chang Li via + jlowe) + + YARN-4096. App local logs are leaked if log aggregation fails to initialize + for the app. (Jason Lowe via zxu) + + YARN-4136. LinuxContainerExecutor loses info when forwarding + ResourceHandlerException. (Bibin A Chundatt via vvasudev) + + YARN-3697. FairScheduler: ContinuousSchedulingThread can fail to shutdown. + (Zhihai Xu via kasha) + + YARN-4126. RM should not issue delegation tokens in unsecure mode. + (Bibin A Chundatt via jianhe) + + YARN-4158. Remove duplicate close for LogWriter in + AppLogAggregatorImpl#uploadLogsForContainers (Zhihai Xu via jlowe) + + YARN-3433. Jersey tests failing with Port in Use -again. + (Brahma Reddy Battula) + + YARN-3975. WebAppProxyServlet should not redirect to RM page if AHS is + enabled (Mit Desai via jlowe) + + YARN-3624. ApplicationHistoryServer should not reverse the order of the + filters it gets. (Mit Desai via xgong) + + YARN-4180. AMLauncher does not retry on failures when talking to NM. + (adhoot) + + YARN-3619. ContainerMetrics unregisters during getMetrics and leads to + ConcurrentModificationException (Zhihai Xu via jlowe) + + YARN-4209. RMStateStore FENCED state doesn’t work due to updateFencedState called + by stateMachine.doTransition. (Zhihai Xu via rohithsharmaks) + + YARN-4041. Slow delegation token renewal can severely prolong RM recovery + (Sunil G via jlowe) + + YARN-2902. Killing a container that is localizing can orphan resources in + the DOWNLOADING state (Varun Saxena via jlowe) + + YARN-4313. Race condition in MiniMRYarnCluster when getting history server + address. (Jian He via xgong) + + YARN-3580. [JDK8] TestClientRMService.testGetLabelsToNodes fails. (Robert Kanter + via junping_du) + + YARN-4312. TestSubmitApplicationWithRMHA fails on branch-2.7 and branch-2.6 + as some of the test cases time out. (Varun Saxena via ozawa) + + YARN-4320. TestJobHistoryEventHandler fails as AHS in MiniYarnCluster no longer + binds to default port 8188. (Varun Saxena via ozawa) + + YARN-4354. Public resource localization fails with NPE. (Jason Lowe via + junping_du) + +Release 2.7.1 - 2015-07-06 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + YARN-3243. CapacityScheduler should pass headroom from parent to children + to make sure ParentQueue obey its capacity limits. (Wangda Tan via jianhe) + + YARN-3489. RMServerUtils.validateResourceRequests should only obtain queue + info once. (Varun Saxena via wangda) + + YARN-3539. Updated timeline server documentation and marked REST APIs evolving. + (Steve Loughran via zjshen) + + YARN-3723. Need to clearly document primaryFilter and otherInfo value type. + (Zhijie Shen via xgong) + + YARN-3711. Documentation of ResourceManager HA should explain configurations + about listen addresses. (Masatake Iwasaki via ozawa) + + OPTIMIZATIONS + + BUG FIXES + + YARN-3497. ContainerManagementProtocolProxy modifies IPC timeout conf + without making a copy. (Jason Lowe via jianhe) + + YARN-2605. [RM HA] Rest api endpoints doing redirect incorrectly. + (Xuan Gong via stevel) + + YARN-3522. Fixed DistributedShell to instantiate TimeLineClient as the + correct user. (Zhijie Shen via jianhe) + + YARN-3351. AppMaster tracking URL is broken in HA. (Anubhav Dhoot via kasha) + + YARN-3382. Some of UserMetricsInfo metrics are incorrectly set to root + queue metrics. (Rohit Agarwal via jianhe) + + YARN-3472. Fixed possible leak in DelegationTokenRenewer#allTokens. + (Rohith Sharmaks via jianhe) + + YARN-3465. Use LinkedHashMap to preserve order of resource requests. + (Zhihai Xu via kasha) + + YARN-3516. killing ContainerLocalizer action doesn't take effect when + private localizer receives FETCH_FAILURE status.(zhihai xu via xgong) + + YARN-3485. FairScheduler headroom calculation doesn't consider + maxResources for Fifo and FairShare policies. (kasha) + + YARN-3301. Fixed the format issue of the new RM attempt web page. + (Xuan Gong via jianhe) + + YARN-3385. Fixed a race-condition in ResourceManager's ZooKeeper based + state-store to avoid crashing on duplicate deletes. (Zhihai Xu via vinodkv) + + YARN-3358. Audit log not present while refreshing Service ACLs. + (Varun Saxena via devaraj) + + YARN-3476. Nodemanager can fail to delete local logs if log aggregation + fails (Rohith via jlowe) + + YARN-3434. Interaction between reservations and userlimit can result in + significant ULF violation (tgraves) + + YARN-3626. On Windows localized resources are not moved to the front + of the classpath when they should be. (Craig Welch via xgong) + + YARN-3457. NPE when NodeManager.serviceInit fails and stopRecoveryStore called. + (Bibin A Chundatt via ozawa) + + YARN-3537. NPE when NodeManager.serviceInit fails and stopRecoveryStore + invoked (Brahma Reddy Battula via jlowe) + + YARN-3601. Fix UT TestRMFailover.testRMWebAppRedirect. (Weiwei Yang via xgong) + + YARN-3677. Fix findbugs warnings in yarn-server-resourcemanager. + (Vinod Kumar Vavilapalli via ozawa) + + YARN-3681. yarn cmd says "could not find main class 'queue'" in windows. + (Craig Welch and Varun Saxena via xgong) + + YARN-3609. Load node labels from storage inside RM serviceStart. (Wangda + Tan via jianhe) + + YARN-3694. Fix dead link for TimelineServer REST API. + (Jagadesh Kiran N via aajisaka) + + YARN-3646. Applications are getting stuck some times in case of retry + policy forever. (Raju Bairishetti via devaraj) + + YARN-3675. FairScheduler: RM quits when node removal races with + continuous-scheduling on the same node. (Anubhav Dhoot via kasha) + + YARN-3701. Isolating the error of generating a single app report when + getting all apps from generic history service. (Zhijie Shen via xgong) + + YARN-2238. filtering on UI sticks even if I move away from the page. + (Jian He via xgong) + + YARN-3686. CapacityScheduler should trim default_node_label_expression. + (Sunil G via wangda) + + YARN-3764. CapacityScheduler should forbid moving LeafQueue from one parent + to another. (Wangda Tan via jianhe) + + YARN-3804. Both RM are on standBy state when kerberos user not in yarn.admin.acl + (Varun Saxena via xgong) + + YARN-3842. NMProxy should retry on NMNotYetReadyException. + (Robert Kanter via kasha) + + YARN-3809. Failed to launch new attempts because + ApplicationMasterLauncher's threads all hang (Jun Gong via jlowe) + +Release 2.7.0 - 2015-04-20 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + YARN-2179. [YARN-1492] Initial cache manager structure and context. + (Chris Trezzo via kasha) + + YARN-2180. [YARN-1492] In-memory backing store for cache manager. + (Chris Trezzo via kasha) + + YARN-2183. [YARN-1492] Cleaner service for cache manager. + (Chris Trezzo and Sangjin Lee via kasha) + + YARN-2186. [YARN-1492] Node Manager uploader service for cache manager. + (Chris Trezzo and Sangjin Lee via kasha) + + YARN-2236. [YARN-1492] Shared Cache uploader service on the Node + Manager. (Chris Trezzo and Sangjin Lee via kasha) + + YARN-2188. [YARN-1492] Client service for cache manager. + (Chris Trezzo and Sangjin Lee via kasha) + + YARN-2189. [YARN-1492] Admin service for cache manager. + (Chris Trezzo via kasha) + + YARN-2765. Added leveldb-based implementation for RMStateStore. (Jason Lowe + via jianhe) + + YARN-2203. [YARN-1492] Web UI for cache manager. (Chris Trezzo via kasha) + + YARN-2738. [YARN-2574] Add FairReservationSystem for FairScheduler. + (Anubhav Dhoot via kasha) + + YARN-2881. [YARN-2574] Implement PlanFollower for FairScheduler. + (Anubhav Dhoot via kasha) + + YARN-2427. Added the API of moving apps between queues in RM web services. + (Varun Vasudev via zjshen) + + YARN-2217. [YARN-1492] Shared cache client side changes. + (Chris Trezzo via kasha) + + YARN-2616 [YARN-913] Add CLI client to the registry to list, view + and manipulate entries. (Akshay Radia via stevel) + + YARN-2994. Document work-preserving RM restart. (Jian He via ozawa) + + YARN-2786. Created a yarn cluster CLI and seeded with one command for listing + node-labels collection. (Wangda Tan via vinodkv) + + IMPROVEMENTS + + YARN-3005. [JDK7] Use switch statement for String instead of if-else + statement in RegistrySecurity.java (Kengo Seki via aajisaka) + + YARN-2950. Change message to mandate, not suggest JS requirement on UI. + (Dustin Cote via harsh) + + YARN-2891. Failed Container Executor does not provide a clear error + message. (Dustin Cote via harsh) + + YARN-1979. TestDirectoryCollection fails when the umask is unusual. + (Vinod Kumar Vavilapalli and Tsuyoshi OZAWA via junping_du) + + YARN-2641. Decommission nodes on -refreshNodes instead of next + NM-RM heartbeat. (Zhihai Xu via kasha) + + YARN-2742. FairSchedulerConfiguration should allow extra spaces + between value and unit. (Wei Yan via kasha) + + YARN-2712. TestWorkPreservingRMRestart: Augment FS tests with + queue and headroom checks. (Tsuyoshi Ozawa via kasha) + + YARN-2735. diskUtilizationPercentageCutoff and diskUtilizationSpaceCutoff + are initialized twice in DirectoryCollection. (Zhihai Xu via kasha) + + YARN-570. Time strings are formated in different timezone. + (Akira Ajisaka and Peng Zhang via kasha) + + YARN-2780. Log aggregated resource allocation in rm-appsummary.log (Eric + Payne via jlowe) + + YARN-2690. [YARN-2574] Make ReservationSystem and its dependent classes + independent of Scheduler type. (Anubhav Dhoot via kasha) + + YARN-2157. Added YARN metrics in the documentaion. (Akira AJISAKA via + jianhe) + + YARN-2802. ClusterMetrics to include AM launch and register delays. + (Zhihai Xu via kasha) + + YARN-2375. Allow enabling/disabling timeline server per framework. + (Mit Desai via jeagles) + + YARN-2604. Scheduler should consider max-allocation-* in conjunction + with the largest node. (Robert Kanter via kasha) + + YARN-2679. Add metric for container launch duration. (Zhihai Xu via kasha) + + YARN-2669. FairScheduler: queue names shouldn't allow periods + (Wei Yan via Sandy Ryza) + + YARN-2404. Removed ApplicationAttemptState and ApplicationState class in + RMStateStore. (Tsuyoshi OZAWA via jianhe) + + YARN-2165. Added the sanity check for the numeric configuration values of + the timeline service. (Vasanth kumar RJ via zjshen) + + YARN-2907. SchedulerNode#toString should print all resource detail instead + of only memory. (Rohith via junping_du) + + YARN-2136. Changed RMStateStore to ignore store opearations when fenced. + (Varun Saxena via jianhe) + + YARN-1156. Enhance NodeManager AllocatedGB and AvailableGB metrics + for aggregation of decimal values. (Tsuyoshi OZAWA via junping_du) + + YARN-2056. Disable preemption at Queue level (Eric Payne via jlowe) + + YARN-2762. Fixed RMAdminCLI to trim and check node-label related arguments + before sending to RM. (Rohith Sharmaks via jianhe) + + YARN-2972. DelegationTokenRenewer thread pool never expands. (Jason Lowe + via junping_du) + + YARN-2949. Add documentation for CGroups (Varun Vasudev via junping_du) + + YARN-2970. NodeLabel operations in RMAdmin CLI get missing in help command. + (Varun Saxena via junping_du) + + YARN-2837. Support TimeLine server to recover delegation token when + restarting. (Zhijie Shen via jianhe) + + YARN-2993. Several fixes (missing acl check, error log msg ...) and some + refinement in AdminService. (Yi Liu via junping_du) + + YARN-2943. Added node-labels page on RM web UI. (Wangda Tan via jianhe) + + YARN-2998. Abstract out scheduler independent PlanFollower components. + (Anubhav Dhoot via kasha) + + YARN-2360. Fair Scheduler: Display dynamic fair share for queues on the + scheduler page. (Ashwin Shankar and Wei Yan via kasha) + + YARN-2880. Added a test to make sure node labels will be recovered + if RM restart is enabled. (Rohith Sharmaks via jianhe) + + YARN-2996. Improved synchronization and I/O operations of FS- and Mem- + RMStateStore. (Yi Liu via zjshen) + + YARN-2956. Added missing links in YARN documentation. (Masatake Iwasaki via + jianhe) + + YARN-2957. Create unit test to automatically compare YarnConfiguration + and yarn-default.xml. (rchiang via rkanter) + + YARN-2643. Don't create a new DominantResourceCalculator on every + FairScheduler.allocate call. (kasha via rkanter) + + YARN-3019. Make work-preserving-recovery the default mechanism for RM + recovery. (Jian He via junping_du) + + YARN-2807. Option "--forceactive" not works as described in usage of + "yarn rmadmin -transitionToActive". (Masatake Iwasaki via xgong) + + YARN-2984. Metrics for container's actual memory usage. (kasha) + + YARN-2800. Remove MemoryNodeLabelsStore and add a way to enable/disable + node labels feature. (Wangda Tan via ozawa) + + YARN-3086. Make NodeManager memory configurable in MiniYARNCluster. + (Robert Metzger via ozawa) + + YARN-2897. CrossOriginFilter needs more log statements (Mit Desai via + jeagles) + + YARN-3028. Better syntax for replaceLabelsOnNode in RMAdmin CLI + (Rohith Sharmaks via wangda) + + YARN-2932. Add entry for "preemptable" status (enabled/disabled) to + scheduler web UI and queue initialize/refresh logging. + (Eric Payne via wangda) + + YARN-3108. ApplicationHistoryServer doesn't process -D arguments (Chang Li + via jeagles) + + YARN-2808. Made YARN CLI list attempt’s finished containers of a running + application. (Naganarasimha G R via zjshen) + + YARN-3085. Application summary should include the application type (Rohith + via jlowe) + + YARN-3022. Expose Container resource information from NodeManager for + monitoring (adhoot via ranter) + + YARN-3075. NodeLabelsManager implementation to retrieve label to node + mapping (Varun Saxena via wangda) + + YARN-1393. SLS: Add how-to-use instructions. (Wei Yan via kasha) + + YARN-1723. AMRMClientAsync missing blacklist addition and removal + functionality. (Bartosz Ługowski via sseth) + + YARN-3123. Made YARN CLI show a single completed container even if the app + is running. (Naganarasimha G R via zjshen) + + YARN-1582. Capacity Scheduler: add a maximum-allocation-mb setting per + queue (Thomas Graves via jlowe) + + YARN-1904. Ensure exceptions thrown in ClientRMService & + ApplicationHistoryClientService are uniform when application-attempt is + not found. (zjshen via acmurthy) + + YARN-3144. Configuration for making delegation token failures to timeline + server not-fatal (Jonathan Eagles via jlowe) + + YARN-3155. Refactor the exception handling code for TimelineClientImpl's + retryOn method (Li Lu via wangda) + + YARN-3100. Made YARN authorization pluggable. (Jian He via zjshen) + + YARN-2683. [YARN-913] registry config options: document and move to + core-default. (stevel) + + YARN-1237. Description for yarn.nodemanager.aux-services in + yarn-default.xml is misleading. (Brahma Reddy Battula via ozawa) + + YARN-3157. Refactor the exception handling in ConverterUtils#to*Id. + (Bibin A Chundatt via ozawa) + + YARN-3147. Clean up RM web proxy code. (Steve Loughran via xgong) + + YARN-2079. Recover NonAggregatingLogHandler state upon nodemanager + restart. (Jason Lowe via junping_du) + + YARN-3158. Correct log messages in ResourceTrackerService. + (Varun Saxena via xgong) + + YARN-3179. Update use of Iterator to Iterable in RMAdminCLI and + CommonNodeLabelsManager. (Ray Chiang via xgong) + + YARN-3182. Cleanup switch statement in ApplicationMasterLauncher#handle(). + (Ray Chiang via ozawa) + + YARN-3203. Correct a log message in AuxServices. (Brahma Reddy Battula + via ozawa) + + YARN-1299. Improve a log message in AppSchedulingInfo by adding application + id. (Ashutosh Jindal and Devaraj K via ozawa) + + YARN-1514. Utility to benchmark ZKRMStateStore#loadState for RM HA. + (Tsuyoshi OZAWA via jianhe) + + YARN-3076. Add API/Implementation to YarnClient to retrieve label-to-node + mapping. (Varun Saxena via wangda) + + YARN-2799. Cleanup TestLogAggregationService based on the change in YARN-90. + (Zhihai Xu via junping_du) + + YARN-3237. AppLogAggregatorImpl fails to log error cause. + (Rushabh S Shah via xgong) + + YARN-3236. Cleanup RMAuthenticationFilter#AUTH_HANDLER_PROPERTY. + (zhihai xu via xgong) + + YARN-2797. TestWorkPreservingRMRestart should use ParametrizedSchedulerTestBase + (Karthik Kambatla via xgong) + + YARN-2797. Add -help to yarn logs and nodes CLI command. + (Jagadesh Kiran N via devaraj) + + YARN-3217. Remove httpclient dependency from hadoop-yarn-server-web-proxy. + (Brahma Reddy Battula via ozawa). + + YARN-3255. RM, NM, JobHistoryServer, and WebAppProxyServer's main() + should support generic options. (shv) + + YARN-2820. Retry in FileSystemRMStateStore when FS's operations fail + due to IOException. (Zhihai Xu via ozawa) + + YARN-3262. Surface application outstanding resource requests table + in RM web UI. (Jian He via wangda) + + YARN-3281. Added RMStateStore to StateMachine visualization list. + (Chengbing Liu via jianhe) + + YARN-3272. Surface container locality info in RM web UI. + (Jian He via wangda) + + YARN-3122. Metrics for container's actual CPU usage. + (Anubhav Dhoot via kasha) + + YARN-2190. Added CPU and memory limit options to the default container + executor for Windows containers. (Chuan Liu via jianhe) + + YARN-3296. Mark ResourceCalculatorProcessTree class as Public for configurable + resource monitoring. (Hitesh Shah via junping_du) + + YARN-3187. Documentation of Capacity Scheduler Queue mapping based on user + or group. (Gururaj Shetty via jianhe) + + YARN-2854. Updated the documentation of the timeline service and the generic + history service. (Naganarasimha G R via zjshen) + + YARN-2777. Mark the end of individual log in aggregated log. + (Varun Saxena via xgong) + + YARN-3273. Improve scheduler UI to facilitate scheduling analysis and + debugging. (Rohith Sharmaks via jianhe) + + OPTIMIZATIONS + + YARN-2990. FairScheduler's delay-scheduling always waits for node-local and + rack-local delays, even for off-rack-only requests. (kasha) + + BUG FIXES + + YARN-3071. Remove invalid char from sample conf in doc of FairScheduler. + (Masatake Iwasaki via aajisaka) + + YARN-2254. TestRMWebServicesAppsModification should run against both + CS and FS. (Zhihai Xu via kasha) + + YARN-2713. "RM Home" link in NM should point to one of the RMs in an + HA setup. (kasha) + + YARN-2857. ConcurrentModificationException in ContainerLogAppender + (Mohammad Kamrul Islam via jlowe) + + YARN-2432. RMStateStore should process the pending events before close. + (Varun Saxena via jianhe) + + YARN-1703. Fixed ResourceManager web-proxy to close connections correctly. + (Rohith Sharma via vinodkv) + + YARN-2870. Updated the command to run the timeline server in the document. + (Masatake Iwasaki via zjshen) + + YARN-2878. Fix DockerContainerExecutor.apt.vm formatting. (Abin Shahab via + jianhe) + + YARN-2315. FairScheduler: Set current capacity in addition to capacity. + (Zhihai Xu via kasha) + + YARN-2697. Remove useless RMAuthenticationHandler. (Haosong Huang via zjshen) + + YARN-2461. Fix PROCFS_USE_SMAPS_BASED_RSS_ENABLED property in + YarnConfiguration. (rchiang via rkanter) + + YARN-2869. CapacityScheduler should trim sub queue names when parse + configuration. (Wangda Tan via jianhe) + + YARN-2927. [YARN-1492] InMemorySCMStore properties are inconsistent. + (Ray Chiang via kasha) + + YARN-2931. PublicLocalizer may fail until directory is initialized by + LocalizeRunner. (Anubhav Dhoot via kasha) + + YARN-2930. Fixed TestRMRestart#testRMRestartRecoveringNodeLabelManager + intermittent failure. (Wangda Tan via jianhe) + + YARN-2924. Fixed RMAdminCLI to not convert node labels to lower case. + (Wangda Tan via jianhe) + + YARN-2243. Order of arguments for Preconditions.checkNotNull() is wrong in + SchedulerApplicationAttempt ctor. (devaraj) + + YARN-2912 Jersey Tests failing with port in use. (varun saxena via stevel) + + YARN-2356. yarn status command for non-existent application/application + attempt/container is too verbose. (Sunil G via devaraj) + + YARN-2914. [YARN-1492] Potential race condition in Singleton implementation of + SharedCacheUploaderMetrics, CleanerMetrics, ClientSCMMetrics. (Varun Saxena via kasha) + + YARN-2945. FSLeafQueue#assignContainer - document the reason for using both write and + read locks. (Tsuyoshi Ozawa via kasha) + + YARN-2944. InMemorySCMStore can not be instantiated with ReflectionUtils#newInstance. + (Chris Trezzo via kasha) + + YARN-2675. containersKilled metrics is not updated when the container is killed + during localization. (Zhihai Xu via kasha) + + YARN-2975. FSLeafQueue app lists are accessed without required locks. (kasha) + + YARN-2977. Fixed intermittent TestNMClient failure. + (Junping Du via ozawa) + + YARN-2939. Fix new findbugs warnings in hadoop-yarn-common. (Li Lu via junping_du) + + YARN-2940. Fix new findbugs warnings in rest of the hadoop-yarn components. (Li Lu + via junping_du) + + YARN-2937. Fixed new findbugs warnings in hadoop-yarn-nodemanager. (Varun Saxena + via zjshen) + + YARN-2946. Fixed potential deadlock in RMStateStore. (Rohith Sharmaks via + jianhe) + + YARN-2988. Graph#save() may leak file descriptors. (Ted Yu via ozawa) + + YARN-2938. Fixed new findbugs warnings in hadoop-yarn-resourcemanager and + hadoop-yarn-applicationhistoryservice. (Varun Saxena via zjshen) + + YARN-2987. Fixed ClientRMService#getQueueInfo to check against queue and + app ACLs. (Varun Saxena via jianhe) + + YARN-2991. Fixed DrainDispatcher to reuse the draining code path in + AsyncDispatcher. (Rohith Sharmaks via zjshen) + + YARN-2958. Made RMStateStore not update the last sequence number when updating the + delegation token. (Varun Saxena via zjshen) + + YARN-2230. Fixed few configs description in yarn-default.xml. (Vijay Bhat + via jianhe) + + YARN-3010. Fixed findbugs warning in AbstractYarnScheduler. (Yi Liu via + jianhe) + + YARN-2936. Changed YARNDelegationTokenIdentifier to set proto fields on + getProto method. (Varun Saxena via jianhe) + + YARN-3014. Replaces labels on a host should update all NM's labels on that + host. (Wangda Tan via jianhe) + + YARN-3027. Scheduler should use totalAvailable resource from node instead of + availableResource for maxAllocation. (adhoot via rkanter) + + YARN-2861. Fixed Timeline DT secret manager to not reuse RM's configs. + (Zhijie Shen via jianhe) + + YARN-3064. TestRMRestart/TestContainerResourceUsage/TestNodeManagerResync + failure with allocation timeout. (Jian He via junping_du) + + YARN-2815. Excluded transitive dependency of JLine in hadoop-yarn-server-common. + (Ferdinand Xu via zjshen) + + YARN-3070. TestRMAdminCLI#testHelp fails for transitionToActive command. + (Contributed by Junping Du) + + YARN-3015. yarn classpath command should support same options as hadoop + classpath. (Contributed by Varun Saxena) + + YARN-2933. Capacity Scheduler preemption policy should only consider capacity + without labels temporarily. (Mayank Bansal via wangda) + + YARN-2731. Fixed RegisterApplicationMasterResponsePBImpl to properly invoke + maybeInitBuilder. (Carlo Curino via wangda) + + YARN-3078. LogCLIHelpers lacks of a blank space before string 'does not exist'. + (Sam Liu via ozawa) + + YARN-3082. Non thread safe access to systemCredentials in NodeHeartbeatResponse + processing. (Anubhav Dhoot via ozawa) + + YARN-3088. LinuxContainerExecutor.deleteAsUser can throw NPE if native + executor returns an error (Eric Payne via jlowe) + + YARN-3079. Scheduler should also update maximumAllocation when updateNodeResource. + (Zhihai Xu via wangda) + + YARN-3029. FSDownload.unpack() uses local locale for FS case conversion, may not + work everywhere. (Varun Saxena via ozawa) + + YARN-3077. Fixed RM to create zk root path recursively. (Chun Chen via jianhe) + + YARN-3113. Release audit warning for Sorting icons.psd. (stevel via kihwal) + + YARN-3056. Add verification for containerLaunchDuration + in TestNodeManagerMetrics. (zhihai xu via xgong) + + YARN-2543. Made resource usage be published to the timeline server too. + (Naganarasimha G R via zjshen) + + YARN-3058. Fix error message of tokens' activation delay configuration. + (Yi Liu via ozawa) + + YARN-3101. In Fair Scheduler, fix canceling of reservations for exceeding + max share (Anubhav Dhoot via Sandy Ryza) + + YARN-3149. Fix typo in message for invalid application id. + (Bibin A Chundatt via xgong) + + YARN-3145. Fixed ConcurrentModificationException on CapacityScheduler + ParentQueue#getQueueUserAclInfo. (Tsuyoshi OZAWA via jianhe) + + YARN-1537. Fix race condition in + TestLocalResourcesTrackerImpl.testLocalResourceCache. (xgong via acmurthy) + + YARN-3089. LinuxContainerExecutor does not handle file arguments to + deleteAsUser (Eric Payne via jlowe) + + YARN-3143. RM Apps REST API can return NPE or entries missing id and other + fields (jlowe) + + YARN-2971. RM uses conf instead of token service address to renew timeline + delegation tokens (jeagles) + + YARN-3090. DeletionService can silently ignore deletion task failures + (Varun Saxena via jlowe) + + YARN-2809. Implement workaround for linux kernel panic when removing + cgroup (Nathan Roberts via jlowe) + + YARN-3160. Fix non-atomic operation on nodeUpdateQueue in RMNodeImpl. + (Chengbing Liu via junping_du) + + YARN-3074. Nodemanager dies when localizer runner tries to write to a full + disk (Varun Saxena via jlowe) + + YARN-3151. On Failover tracking url wrong in application cli for + KILLED application (Rohith via xgong) + + YARN-1580. Documentation error regarding "container-allocation.expiry-interval-ms" + (Brahma Reddy Battula via junping_du) + + YARN-3104. Fixed RM to not generate new AMRM tokens on every heartbeat + between rolling and activation. (Jason Lowe via jianhe) + + YARN-3191. Log object should be initialized with its own class. (Rohith via + aajisaka) + + YARN-3164. RMAdmin command usage prints incorrect command name. + (Bibin A Chundatt via xgong) + + YARN-2847. Linux native container executor segfaults if default banned + user detected (Olaf Flebbe via jlowe) + + YARN-2899. Run TestDockerContainerExecutorWithMocks on Linux only. + (Ming Ma via cnauroth) + + YARN-2749. Fix some testcases from TestLogAggregationService fails in trunk. + (Xuan Gong via junping_du) + + YARN-3132. RMNodeLabelsManager should remove node from node-to-label mapping + when node becomes deactivated. (Wangda Tan via jianhe) + + YARN-1615. Fix typos in description about delay scheduling. (Akira Ajisaka via + ozawa) + + YARN-933. Fixed InvalidStateTransitonException at FINAL_SAVING state in + RMApp. (Rohith Sharmaks via jianhe) + + YARN-3247. TestQueueMappings should use CapacityScheduler explicitly. + (Zhihai Xu via ozawa) + + YARN-3256. TestClientToAMTokens#testClientTokenRace is not running against + all Schedulers even when using ParameterizedSchedulerTestBase. + (Anubhav Dhoot via devaraj) + + YARN-3270. Fix node label expression not getting set in + ApplicationSubmissionContext (Rohit Agarwal via wangda) + + YARN-3265. Fixed a deadlock in CapacityScheduler by always passing a queue's + available resource-limit from the parent queue. (Wangda Tan via vinodkv) + + YARN-3131. YarnClientImpl should check FAILED and KILLED state in + submitApplication (Chang Li via jlowe) + + YARN-3275. CapacityScheduler: Preemption happening on non-preemptable + queues (Eric Payne via jlowe) + + YARN-3300. Outstanding_resource_requests table should not be shown in AHS. + (Xuan Gong via jianhe) + + YARN-3295. Fix documentation nits found in markdown conversion. + (Masatake Iwasaki via ozawa) + + YARN-3338. Exclude jline dependency from YARN. (Zhijie Shen via xgong) + + YARN-3154. Added additional APIs in LogAggregationContext to avoid aggregating + running logs of application when rolling is enabled. (Xuan Gong via vinodkv) + + YARN-1453. [JDK8] Fix Javadoc errors caused by incorrect or illegal tags in + doc comments. (Akira AJISAKA, Andrew Purtell, and Allen Wittenauer via ozawa) + + YARN-3349. Treat all exceptions as failure in + TestFSRMStateStore#testFSRMStateStoreClientRetry. (Zhihai Xu via ozawa) + + YARN-3379. Fixed missing data in localityTable and ResourceRequests table + in RM WebUI. (Xuan Gong via jianhe) + + YARN-3384. TestLogAggregationService.verifyContainerLogs fails after + YARN-2777. (Naganarasimha G R via ozawa) + + YARN-3336. FileSystem memory leak in DelegationTokenRenewer. + (Zhihai Xu via cnauroth) + + YARN-2213. Change proxy-user cookie log in AmIpFilter to DEBUG. + (Varun Saxena via xgong) + + YARN-3304. Cleaning up ResourceCalculatorProcessTree APIs for public use and + removing inconsistencies in the default values. (Junping Du and Karthik + Kambatla via vinodkv) + + YARN-3430. Made headroom data available on app attempt page of RM WebUI. + (Xuan Gong via zjshen) + + YARN-3466. Fix RM nodes web page to sort by node HTTP-address, #containers + and node-label column (Jason Lowe via wangda) + +Release 2.6.4 - UNRELEASED + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + + YARN-3857: Memory leak in ResourceManager with SIMPLE mode. + (mujunchao via zxu) + + YARN-3535. Scheduler must re-request container resources when RMContainer transitions + from ALLOCATED to KILLED (rohithsharma and peng.zhang via asuresh) + + YARN-4452. NPE when submit Unmanaged application. (Naganarasimha G R + via junping_du) + +Release 2.6.3 - UNRELEASED + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + + YARN-4241. Fix typo of property name in yarn-default.xml. + (Anthony Rojas via aajisaka) + + YARN-2859. ApplicationHistoryServer binds to default port 8188 in MiniYARNCluster. + (Vinod Kumar Vavilapalli via xgong) + + YARN-4344. NMs reconnecting with changed capabilities can lead to wrong + cluster resource calculations (Varun Vasudev via jlowe) + + YARN-3925. ContainerLogsUtils#getContainerLogFile fails to read container + log files from full disks. (zhihai xu via jlowe) + + YARN-4365. FileSystemNodeLabelStore should check for root dir existence on + startup (Kuhu Shukla via jlowe) + + YARN-4348. ZKRMStateStore.syncInternal shouldn't wait for sync completion for + avoiding blocking ZK's event thread. (ozawa) + + YARN-4424. Fix deadlock in RMAppImpl. (Jian he via wangda) + + YARN-4434. NodeManager Disk Checker parameter documentation is not correct. + (Weiwei Yang via aajisaka) + +Release 2.6.2 - 2015-10-28 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + YARN-3727. For better error recovery, check if the directory exists before + using it for localization. (Zhihai Xu via jlowe) + + YARN-4092. Fixed UI redirection to print useful messages when both RMs are + in standby mode. (Xuan Gong via jianhe) + + YARN-4101. RM should print alert messages if Zookeeper and Resourcemanager + gets connection issue. (Xuan Gong via jianhe) + + OPTIMIZATIONS + + BUG FIXES + + YARN-2019. Retrospect on decision of making RM crashed if any exception throw + in ZKRMStateStore. (Jian He via junping_du) + + YARN-4087. Followup fixes after YARN-2019 regarding RM behavior when + state-store error occurs. (Jian He via xgong) + + YARN-3554. Default value for maximum nodemanager connect wait time is too + high (Naganarasimha G R via jlowe) + + YARN-4005. Completed container whose app is finished is possibly not + removed from NMStateStore. (Jun Gong via jianhe) + + YARN-3780. Should use equals when compare Resource in RMNodeImpl#ReconnectNodeTransition. + (zhihai xu via devaraj) + + YARN-3802. Two RMNodes for the same NodeId are used in RM sometimes + after NM is reconnected. (zhihai xu via xgong) + + YARN-3194. RM should handle NMContainerStatuses sent by NM while + registering if NM is Reconnected node (Rohith via jlowe) + + YARN-3896. RMNode transitioned from RUNNING to REBOOTED because its response id + has not been reset synchronously. (Jun Gong via rohithsharmaks) + + YARN-3798. ZKRMStateStore shouldn't create new session without occurrance of + SESSIONEXPIED. (ozawa and Varun Saxena) + +Release 2.6.1 - 2015-09-23 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + YARN-3249. Add a 'kill application' button to Resource Manager's Web UI. + (Ryu Kobayashi via ozawa) + + IMPROVEMENTS + + YARN-3230. Clarify application states on the web UI. (Jian He via wangda) + + YARN-1809. Synchronize RM and TimeLineServer Web-UIs. (Zhijie Shen and + Xuan Gong via jianhe) + + YARN-3092. Created a common ResourceUsage class to track labeled resource + usages in Capacity Scheduler. (Wangda Tan via jianhe) + + YARN-3098. Created common QueueCapacities class in Capacity Scheduler to + track capacities-by-labels of queues. (Wangda Tan via jianhe) + + YARN-2301. Improved yarn container command. (Naganarasimha G R via jianhe) + + YARN-3978. Configurably turn off the saving of container info in Generic AHS + (Eric Payne via jeagles) + + YARN-3248. Display count of nodes blacklisted by apps in the web UI. + (Varun Vasudev via xgong) + + OPTIMIZATIONS + + BUG FIXES + + YARN-2856. Fixed RMAppImpl to handle ATTEMPT_KILLED event at ACCEPTED state + on app recovery. (Rohith Sharmaks via jianhe) + + YARN-2816. NM fail to start with NPE during container recovery (Zhihai Xu + via jlowe) + + YARN-2414. RM web UI: app page will crash if app is failed before any + attempt has been created (Wangda Tan via jlowe) + + YARN-2865. Fixed RM to always create a new RMContext when transtions from + StandBy to Active. (Rohith Sharmaks via jianhe) + + YARN-2906. CapacitySchedulerPage shows HTML tags for a queue's Active Users. + (Jason Lowe via jianhe) + + YARN-2905. AggregatedLogsBlock page can infinitely loop if the aggregated + log file is corrupted (Varun Saxena via jlowe) + + YARN-2890. MiniYARNCluster should start the timeline server based on the + configuration. (Mit Desai via zjshen) + + YARN-2894. Fixed a bug regarding application view acl when RM fails over. + (Rohith Sharmaks via jianhe) + + YARN-2874. Dead lock in "DelegationTokenRenewer" which blocks RM to execute + any further apps. (Naganarasimha G R via kasha) + + YARN-2910. FSLeafQueue can throw ConcurrentModificationException. + (Wilfred Spiegelenburg via kasha) + + YARN-2917. Fixed potential deadlock when system.exit is called in AsyncDispatcher + (Rohith Sharmaks via jianhe) + + YARN-2964. RM prematurely cancels tokens for jobs that submit jobs (oozie) + (Jian He via jlowe) + + YARN-1984. LeveldbTimelineStore does not handle db exceptions properly + (Varun Saxena via jlowe) + + YARN-2952. Fixed incorrect version check in StateStore. (Rohith Sharmaks + via jianhe) + + YARN-2340. Fixed NPE when queue is stopped during RM restart. + (Rohith Sharmaks via jianhe) + + YARN-2992. ZKRMStateStore crashes due to session expiry. (Karthik Kambatla + via jianhe) + + YARN-2922. ConcurrentModificationException in CapacityScheduler's LeafQueue. + (Rohith Sharmaks via ozawa) + + YARN-2997. Fixed NodeStatusUpdater to not send alreay-sent completed + container statuses on heartbeat. (Chengbing Liu via jianhe) + + YARN-3011. Possible IllegalArgumentException in ResourceLocalizationService + might lead NM to crash. (Varun Saxena via jianhe) + + YARN-3103. AMRMClientImpl does not update AMRM token properly. (Jason Lowe + via jianhe) + + YARN-3094. Reset timer for liveness monitors after RM recovery. (Jun Gong + via jianhe) + + YARN-2246. Made the proxy tracking URL always be + http(s)://proxy addr:port/proxy/ to avoid duplicate sections. (Devaraj + K via zjshen) + + YARN-3207. Secondary filter matches entites which do not have the key being + filtered for. (Zhijie Shen via xgong) + + YARN-3238. Connection timeouts to nodemanagers are retried at + multiple levels (Jason Lowe via xgong) + + YARN-3239. WebAppProxy does not support a final tracking url which has + query fragments and params (Jian He via jlowe) + + YARN-3222. Fixed RMNode to send scheduler events in sequential order when a + node reconnects. (Rohith Sharma K S via jianhe) + + YARN-3231. FairScheduler: Changing queueMaxRunningApps interferes with pending + jobs. (Siqi Li via kasha) + + YARN-3242. Asynchrony in ZK-close can lead to ZKRMStateStore watcher receiving + events for old client. (Zhihai Xu via kasha) + + YARN-3227. Timeline renew delegation token fails when RM user's TGT is expired + (Zhijie Shen via xgong) + + YARN-3287. Made TimelineClient put methods do as the correct login context. + (Daryn Sharp and Jonathan Eagles via zjshen) + + YARN-3267. Timelineserver applies the ACL rules after applying the limit on + the number of records (Chang Li via jeagles) + + YARN-3369. Missing NullPointer check in AppSchedulingInfo causes RM to die. + (Brahma Reddy Battula via wangda) + + YARN-3393. Getting application(s) goes wrong when app finishes before + starting the attempt. (Zhijie Shen via xgong) + + YARN-3055. Fixed ResourceManager's DelegationTokenRenewer to not stop token + renewal of applications part of a bigger workflow. (Daryn Sharp via vinodkv) + + YARN-3493. RM fails to come up with error "Failed to load/recover state" + when mem settings are changed. (Jian He via wangda) + + YARN-3487. CapacityScheduler scheduler lock obtained unnecessarily when + calling getQueue (Jason Lowe via wangda) + + YARN-3024. LocalizerRunner should give DIE action when all resources are + localized. (Chengbing Liu via xgong) + + YARN-3464. Race condition in LocalizerRunner kills localizer before + localizing all resources. (Zhihai Xu via kasha) + + YARN-3641. NodeManager: stopRecoveryStore() shouldn't be skipped when + exceptions happen in stopping NM's sub-services. (Junping Du via jlowe) + + YARN-3526. ApplicationMaster tracking URL is incorrectly redirected + on a QJM cluster. (Weiwei Yang via xgong) + + YARN-2766. Made ApplicationHistoryManager return a sorted list of apps, + attempts and containers. (Robert Kanter via zjshen) + + YARN-3700. Made generic history service load a number of latest applications + according to the parameter or the configuration. (Xuan Gong via zjshen) + + YARN-2900. Application (Attempt and Container) Not Found in AHS results + in InternalServer Error (500). (Zhijie Shen and Mit Desai via xgong) + + YARN-3725. App submission via REST API is broken in secure mode due to + Timeline DT service address is empty. (Zhijie Shen via wangda) + + YARN-3585. NodeManager cannot exit on SHUTDOWN event triggered and NM + recovery is enabled (Rohith Sharmaks via jlowe) + + YARN-3832. Resource Localization fails on a cluster due to existing cache + directories (Brahma Reddy Battula via jlowe) + + YARN-3850. NM fails to read files from full disks which can lead to + container logs being lost and other issues (Varun Saxena via jlowe) + + YARN-3990. AsyncDispatcher may overloaded with RMAppNodeUpdateEvent when + Node is connected/disconnected (Bibin A Chundatt via jlowe) + + YARN-2637. Fixed max-am-resource-percent calculation in CapacityScheduler + when activating applications. (Craig Welch via jianhe) + + YARN-3733. Fix DominantRC#compare() does not work as expected if + cluster resource is empty. (Rohith Sharmaks via wangda) + + YARN-2920. Changed CapacityScheduler to kill containers on nodes where + node labels are changed. (Wangda Tan via jianhe) + + YARN-2978. Fixed potential NPE while getting queue info. (Varun Saxena via + jianhe) + + YARN-3099. Capacity Scheduler LeafQueue/ParentQueue should use ResourceUsage + to track used-resources-by-label.(Wangda Tan via jianhe) + + YARN-2694. Ensure only single node label specified in ResourceRequest. + (Wangda Tan via jianhe) + + YARN-3124. Fixed CS LeafQueue/ParentQueue to use QueueCapacities to track + capacities-by-label. (Wangda Tan via jianhe) + + YARN-2918. RM should not fail on startup if queue's configured labels do + not exist in cluster-node-labels. (Wangda Tan via jianhe) + + YARN-3999. RM hangs on draing events. (Jian He via xgong) + + YARN-4047. ClientRMService getApplications has high scheduler lock contention. + (Jason Lowe via jianhe) + + YARN-1884. Added nodeHttpAddress into ContainerReport and fixed the link to NM + web page. (Xuan Gong via zjshen) + + YARN-3171. Sort by Application id, AppAttempt and ContainerID doesn't work + in ATS / RM web ui. (Naganarasimha G R via xgong) + + YARN-3740. Fixed the typo in the configuration name: + APPLICATION_HISTORY_PREFIX_MAX_APPS. (Xuan Gong via zjshen) + + YARN-3544. Got back AM logs link on the RM web UI for a completed app. + (Xuan Gong via zjshen) + +Release 2.6.0 - 2014-11-18 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + YARN-1964. Create Docker analog of the LinuxContainerExecutor in YARN. (Abin + Shahab via raviprak) + + YARN-2131. Add a way to format the RMStateStore. (Robert Kanter via kasha) + + YARN-1367. Changed NM to not kill containers on NM resync if RM work-preserving + restart is enabled. (Anubhav Dhoot via jianhe) + + YARN-1366. Changed AMRMClient to re-register with RM and send outstanding requests + back to RM on work-preserving RM restart. (Rohith via jianhe) + + YARN-2181. Added preemption info to logs and RM web UI. (Wangda Tan via + jianhe) + + YARN-1354. Recover applications upon nodemanager restart. (Jason Lowe via + junping_du) + + YARN-1337. Recover containers upon nodemanager restart. (Jason Lowe via + junping_du) + + YARN-2277. Added cross-origin support for the timeline server web services. + (Jonathan Eagles via zjshen) + + YARN-2378. Added support for moving applications across queues in + CapacityScheduler. (Subramaniam Venkatraman Krishnan via jianhe) + + YARN-2411. Support simple user and group mappings to queues. (Ram Venkatesh + via jianhe) + + YARN-2174. Enable HTTPs for the writer REST API of TimelineServer. + (Zhijie Shen via jianhe) + + YARN-2393. FairScheduler: Add the notion of steady fair share. + (Wei Yan via kasha) + + YARN-2395. FairScheduler: Preemption timeout should be configurable per + queue. (Wei Yan via kasha) + + YARN-2394. FairScheduler: Configure fairSharePreemptionThreshold per queue. + (Wei Yan via kasha) + + YARN-415. Capture aggregate memory allocation at the app-level for chargeback. + (Eric Payne & Andrey Klochkov via jianhe) + + YARN-2440. Enabled Nodemanagers to limit the aggregate cpu usage across all + containers to a preconfigured limit. (Varun Vasudev via vinodkv) + + YARN-2033. Merging generic-history into the Timeline Store + (Zhijie Shen via junping_du) + + YARN-611. Added an API to let apps specify an interval beyond which AM + failures should be ignored towards counting max-attempts. (Xuan Gong via + vinodkv) + + YARN-2531. Added a configuration for admins to be able to override app-configs + and enforce/not-enforce strict control of per-container cpu usage. (Varun + Vasudev via vinodkv) + + YARN-1250. Generic history service should support application-acls. (Zhijie Shen + via junping_du) + + YARN-2569. Added the log handling APIs for the long running services. (Xuan + Gong via zjshen) + + YARN-2102. Added the concept of a Timeline Domain to handle read/write ACLs + on Timeline service event data. (Zhijie Shen via vinodkv) + + YARN-2581. Passed LogAggregationContext to NM via ContainerTokenIdentifier. + (Xuan Gong via zjshen) + + YARN-1063. Augmented Hadoop common winutils to have the ability to create + containers as domain users. (Remus Rusanu via vinodkv) + + YARN-1972. Added a secure container-executor for Windows. (Remus Rusanu via + vinodkv) + + YARN-2613. Support retry in NMClient for rolling-upgrades. (Jian He via + junping_du) + + YARN-2446. Augmented Timeline service APIs to start taking in domains as a + parameter while posting entities and events. (Zhijie Shen via vinodkv) + + YARN-2468. Enhanced NodeManager to support log handling APIs (YARN-2569) for + use by long running services. (Xuan Gong via vinodkv) + + YARN-1051. Add a system for creating reservations of cluster capacity. + (see breakdown below) + + YARN-913. Add a way to register long-lived services in a YARN cluster. + (stevel) + + YARN-2493. Added user-APIs for using node-labels. (Wangda Tan via vinodkv) + + YARN-2544. Added admin-API objects for using node-labels. (Wangda Tan via + vinodkv) + + YARN-2494. Added NodeLabels Manager internal API and implementation. (Wangda + Tan via vinodkv) + + YARN-2501. Enhanced AMRMClient library to support requests against node + labels. (Wangda Tan via vinodkv) + + YARN-2656. Made RM web services authentication filter support proxy user. + (Varun Vasudev and Zhijie Shen via zjshen) + + YARN-2496. Enhanced Capacity Scheduler to have basic support for allocating + resources based on node-labels. (Wangda Tan via vinodkv) + + YARN-2500. Enhaced ResourceManager to support schedulers allocating resources + based on node-labels. (Wangda Tan via vinodkv) + + YARN-2504. Enhanced RM Admin CLI to support management of node-labels. + (Wangda Tan via vinodkv) + + YARN-2198. Remove the need to run NodeManager as privileged account for + Windows Secure Container Executor. (Remus Rusanu via jianhe) + + YARN-2647. Added a queue CLI for getting queue information. (Sunil Govind via + vinodkv) + + YARN-2632. Document NM Restart feature. (Junping Du and Vinod Kumar + Vavilapalli via jlowe) + + YARN-2505. Supported get/add/remove/change labels in RM REST API. (Craig Welch + via zjshen) + + YARN-2811. In Fair Scheduler, reservation fulfillments shouldn't ignore max + share (Siqi Li via Sandy Ryza) + + YARN-3445. Cache runningApps in RMNode for getting running apps on given + NodeId. (Junping Du via mingma) + + IMPROVEMENTS + + YARN-2197. Add a link to YARN CHANGES.txt in the left side of doc + (Akira AJISAKA via aw) + + YARN-1918. Typo in description and error message for + 'yarn.resourcemanager.cluster-id' (Anandha L Ranganathan via aw) + + YARN-2242. Improve exception information on AM launch crashes. (Li Lu + via junping_du) + + YARN-2274. FairScheduler: Add debug information about cluster capacity, + availability and reservations. (kasha) + + YARN-2228. Augmented TimelineServer to load pseudo authentication filter when + authentication = simple. (Zhijie Shen via vinodkv) + + YARN-1341. Recover NMTokens upon nodemanager restart. (Jason Lowe via + junping_du) + + YARN-2208. AMRMTokenManager need to have a way to roll over AMRMToken. (xgong) + + YARN-2323. FairShareComparator creates too many Resource objects (Hong Zhiguo + via Sandy Ryza) + + YARN-2045. Data persisted in NM should be versioned (Junping Du via jlowe) + + YARN-2013. The diagnostics is always the ExitCodeException stack when the container + crashes. (Tsuyoshi OZAWA via junping_du) + + YARN-2295. Refactored DistributedShell to use public APIs of protocol records. + (Li Lu via jianhe) + + YARN-1342. Recover container tokens upon nodemanager restart. (Jason Lowe via + devaraj) + + YARN-2214. FairScheduler: preemptContainerPreCheck() in FSParentQueue delays + convergence towards fairness. (Ashwin Shankar via kasha) + + YARN-2211. Persist AMRMToken master key in RMStateStore for RM recovery. + (Xuan Gong via jianhe) + + YARN-2328. FairScheduler: Verify update and continuous scheduling threads are + stopped when the scheduler is stopped. (kasha) + + YARN-2347. Consolidated RMStateVersion and NMDBSchemaVersion into Version in + yarn-server-common. (Junping Du via zjshen) + + YARN-1994. Expose YARN/MR endpoints on multiple interfaces. (Craig Welch, + Milan Potocnik, Arpit Agarwal via xgong) + + YARN-2343. Improve NMToken expire exception message. (Li Lu via jianhe) + + YARN-2370. Fix comment in o.a.h.y.server.resourcemanager.schedulerAppSchedulingInfo + (Wenwu Peng via junping_du) + + YARN-2298. Move TimelineClient to yarn-common project (Zhijie Shen via + junping_du) + + YARN-2288. Made persisted data in LevelDB timeline store be versioned. (Junping Du + via zjshen) + + YARN-2352. FairScheduler: Collect metrics on duration of critical methods that + affect performance. (kasha) + + YARN-2212. ApplicationMaster needs to find a way to update the AMRMToken + periodically. (xgong) + + YARN-2026. Fair scheduler: Consider only active queues for computing fairshare. + (Ashwin Shankar via kasha) + + YARN-1954. Added waitFor to AMRMClient(Async). (Tsuyoshi Ozawa via zjshen) + + YARN-2302. Refactor TimelineWebServices. (Zhijie Shen via junping_du) + + YARN-2337. ResourceManager sets ClientRMService in RMContext multiple times. + (Zhihai Xu via kasha) + + YARN-2138. Cleaned up notifyDone* APIs in RMStateStore. (Varun Saxena via + jianhe) + + YARN-2373. Changed WebAppUtils to use Configuration#getPassword for + accessing SSL passwords. (Larry McCay via jianhe) + + YARN-2317. Updated the document about how to write YARN applications. (Li Lu via + zjshen) + + YARN-2399. FairScheduler: Merge AppSchedulable and FSSchedulerApp into + FSAppAttempt. (kasha) + + YARN-1370. Fair scheduler to re-populate container allocation state. + (Anubhav Dhoot via kasha) + + YARN-2389. Added functionality for schedulers to kill all applications in a + queue. (Subramaniam Venkatraman Krishnan via jianhe) + + YARN-1326. RM should log using RMStore at startup time. + (Tsuyoshi Ozawa via kasha) + + YARN-2182. Updated ContainerId#toString() to append RM Epoch number. + (Tsuyoshi OZAWA via jianhe) + + YARN-2406. Move RM recovery related proto to + yarn_server_resourcemanager_recovery.proto. (Tsuyoshi Ozawa via jianhe) + + YARN-1506. Changed RMNode/SchedulerNode to update resource with event + notification. (Junping Du via jianhe) + + YARN-2509. Enable Cross Origin Filter for timeline server only and not all + Yarn servers (Mit Desai via jeagles) + + YARN-2511. Allowed all origins by default when CrossOriginFilter is + enabled. (Jonathan Eagles via zjshen) + + YARN-2508. Cross Origin configuration parameters prefix are not honored + (Mit Desai via jeagles) + + YARN-2512. Allowed pattern matching for origins in CrossOriginFilter. + (Jonathan Eagles via zjshen) + + YARN-2507. Documented CrossOriginFilter configurations for the timeline + server. (Jonathan Eagles via zjshen) + + YARN-2515. Updated ConverterUtils#toContainerId to parse epoch. + (Tsuyoshi OZAWA via jianhe) + + YARN-2448. Changed ApplicationMasterProtocol to expose RM-recognized resource + types to the AMs. (Varun Vasudev via vinodkv) + + YARN-2538. Added logs when RM sends roll-overed AMRMToken to AM. (Xuan Gong + via zjshen) + + YARN-2229. Changed the integer field of ContainerId to be long type. + (Tsuyoshi OZAWA via jianhe) + + YARN-2547. Cross Origin Filter throws UnsupportedOperationException upon + destroy (Mit Desai via jeagles) + + YARN-2557. Add a parameter "attempt_Failures_Validity_Interval" into + DistributedShell (xgong) + + YARN-2001. Added a time threshold for RM to wait before starting container + allocations after restart/failover. (Jian He via vinodkv) + + YARN-1372. Ensure all completed containers are reported to the AMs across + RM restart. (Anubhav Dhoot via jianhe) + + YARN-2539. FairScheduler: Set the default value for maxAMShare to 0.5. + (Wei Yan via kasha) + + YARN-1959. Fix headroom calculation in FairScheduler. + (Anubhav Dhoot via kasha) + + YARN-2577. Clarify ACL delimiter and how to configure ACL groups only + (Miklos Christine via aw) + + YARN-2372. There are Chinese Characters in the FairScheduler's document + (Fengdong Yu via aw) + + YARN-668. Changed NMTokenIdentifier/AMRMTokenIdentifier/ContainerTokenIdentifier + to use protobuf object as the payload. (Junping Du via jianhe) + + YARN-1769. CapacityScheduler: Improve reservations (Thomas Graves via + jlowe) + + YARN-2627. Added the info logs of attemptFailuresValidityInterval and number + of previous failed attempts. (Xuan Gong via zjshen) + + YARN-2562. Changed ContainerId#toString() to be more readable. (Tsuyoshi + OZAWA via jianhe) + + YARN-2615. Changed ClientToAMTokenIdentifier/RM(Timeline)DelegationTokenIdentifier + to use protobuf as payload. (Junping Du via jianhe) + + YARN-2629. Made the distributed shell use the domain-based timeline ACLs. + (zjshen) + + YARN-2583. Modified AggregatedLogDeletionService to be able to delete rolling + aggregated logs. (Xuan Gong via zjshen) + + YARN-2312. Deprecated old ContainerId#getId API and updated MapReduce to + use ContainerId#getContainerId instead. (Tsuyoshi OZAWA via jianhe) + + YARN-2621. Simplify the output when the user doesn't have the access for + getDomain(s). (Zhijie Shen via jianhe) + + YARN-1879. Marked Idempotent/AtMostOnce annotations to ApplicationMasterProtocol + for RM fail over. (Tsuyoshi OZAWA via jianhe) + + YARN-2676. Enhanced Timeline auth-filter to support proxy users. (Zhijie Shen + via vinodkv) + + YARN-2673. Made timeline client put APIs retry if ConnectException happens. + (Li Lu via zjshen) + + YARN-2582. Fixed Log CLI and Web UI for showing aggregated logs of LRS. (Xuan + Gong via zjshen) + + YARN-90. NodeManager should identify failed disks becoming good again + (Varun Vasudev via jlowe) + + YARN-2709. Made timeline client getDelegationToken API retry if ConnectException + happens. (Li Lu via zjshen) + + YARN-2682. Updated WindowsSecureContainerExecutor to not use + DefaultContainerExecutor#getFirstApplicationDir and use getWorkingDir() + instead. (Zhihai Xu via jianhe) + + YARN-2209. Replaced AM resync/shutdown command with corresponding exceptions and + made related MR changes. (Jian He via zjshen) + + YARN-2703. Added logUploadedTime into LogValue for better display. (Xuan Gong + via zjshen) + + YARN-2704. Changed ResourceManager to optionally obtain tokens itself for the + sake of localization and log-aggregation for long-running services. (Jian He + via vinodkv) + + YARN-2502. Changed DistributedShell to support node labels. (Wangda Tan via + jianhe) + + YARN-2760. Remove 'experimental' from FairScheduler docs. (Harsh J via kasha) + + YARN-2503. Added node lablels in web UI. (Wangda Tan via jianhe) + + YARN-2779. Fixed ResourceManager to not require delegation tokens for + communicating with Timeline Service. (Zhijie Shen via vinodkv) + + YARN-2778. Moved node-lables' reports to the yarn nodes CLI from the admin + CLI. (Wangda Tan via vinodkv) + + YARN-2770. Added functionality to renew/cancel TimeLineDelegationToken. + (Zhijie Shen via jianhe) + + YARN-2818. Removed the now unnecessary user entity injection from Timeline + service given we now have domains. (Zhijie Shen via vinodkv) + + YARN-2635. TestRM, TestRMRestart, TestClientToAMTokens should run + with both CS and FS. (Wei Yan and kasha via kasha) + + OPTIMIZATIONS + + BUG FIXES + + YARN-2251. Avoid negative elapsed time in JHS/MRAM web UI and services. + (Zhijie Shen via junping_du) + + YARN-2088. Fixed a bug in GetApplicationsRequestPBImpl#mergeLocalToBuilder. + (Binglin Chang via jianhe) + + YARN-2260. Fixed ResourceManager's RMNode to correctly remember containers + when nodes resync during work-preserving RM restart. (Jian He via vinodkv) + + YARN-2264. Fixed a race condition in DrainDispatcher which may cause random + test failures. (Li Lu via jianhe) + + YARN-2219. Changed ResourceManager to avoid AMs and NMs getting exceptions + after RM recovery but before scheduler learns about apps and app-attempts. + (Jian He via vinodkv) + + YARN-2244. FairScheduler missing handling of containers for unknown + application attempts. (Anubhav Dhoot via kasha) + + YARN-2321. NodeManager web UI can incorrectly report Pmem enforcement + (Leitao Guo via jlowe) + + YARN-2273. NPE in ContinuousScheduling thread when we lose a node. + (Wei Yan via kasha) + + YARN-2313. Livelock can occur in FairScheduler when there are lots of + running apps (Tsuyoshi Ozawa via Sandy Ryza) + + YARN-2147. client lacks delegation token exception details when + application submit fails (Chen He via jlowe) + + YARN-1796. container-executor shouldn't require o-r permissions (atm) + + YARN-2354. DistributedShell may allocate more containers than client + specified after AM restarts. (Li Lu via jianhe) + + YARN-2051. Fix bug in PBimpls and add more unit tests with reflection. + (Binglin Chang via junping_du) + + YARN-2374. Fixed TestDistributedShell#testDSShell failure due to hostname + dismatch. (Varun Vasudev via jianhe) + + YARN-2359. Application hangs when it fails to launch AM container. + (Zhihai Xu via kasha) + + YARN-2388. Fixed TestTimelineWebServices failure due to HADOOP-10791. (zjshen) + + YARN-2008. Fixed CapacityScheduler to calculate headroom based on max available + capacity instead of configured max capacity. (Craig Welch via jianhe) + + YARN-2400. Fixed TestAMRestart fails intermittently. (Jian He via xgong) + + YARN-2361. RMAppAttempt state machine entries for KILLED state has duplicate + event entries. (Zhihai Xu via kasha) + + YARN-2070. Made DistributedShell publish the short user name to the timeline + server. (Robert Kanter via zjshen) + + YARN-2397. Avoided loading two authentication filters for RM and TS web + interfaces. (Varun Vasudev via zjshen) + + YARN-2409. RM ActiveToStandBy transition missing stoping previous rmDispatcher. + (Rohith via jianhe) + + YARN-2249. Avoided AM release requests being lost on work preserving RM + restart. (Jian He via zjshen) + + YARN-2034. Description for yarn.nodemanager.localizer.cache.target-size-mb + is incorrect (Chen He via jlowe) + + YARN-1919. Potential NPE in EmbeddedElectorService#stop. + (Tsuyoshi Ozawa via kasha) + + YARN-2424. LCE should support non-cgroups, non-secure mode (Chris Douglas + via aw) + + YARN-2434. RM should not recover containers from previously failed attempt + when AM restart is not enabled (Jian He via jlowe) + + YARN-2035. FileSystemApplicationHistoryStore should not make working dir + when it already exists. (Jonathan Eagles via zjshen) + + YARN-2405. NPE in FairSchedulerAppsBlock. (Tsuyoshi Ozawa via kasha) + + YARN-2449. Fixed the bug that TimelineAuthenticationFilterInitializer + is not automatically added when hadoop.http.filter.initializers is not + configured. (Varun Vasudev via zjshen) + + YARN-2450. Fix typos in log messages. (Ray Chiang via hitesh) + + YARN-2447. RM web service app submission doesn't pass secrets correctly. + (Varun Vasudev via jianhe) + + YARN-2462. TestNodeManagerResync#testBlockNewContainerRequestsOnStartAndResync + should have a test timeout (Eric Payne via jlowe) + + YARN-2431. NM restart: cgroup is not removed for reacquired containers + (jlowe) + + YARN-2519. Credential Provider related unit tests failed on Windows. + (Xiaoyu Yao via cnauroth) + + YARN-2526. SLS can deadlock when all the threads are taken by AMSimulators. + (Wei Yan via kasha) + + YARN-1458. FairScheduler: Zero weight can lead to livelock. + (Zhihai Xu via kasha) + + YARN-2459. RM crashes if App gets rejected for any reason + and HA is enabled. (Jian He and Mayank Bansal via xgong) + + YARN-2158. Fixed TestRMWebServicesAppsModification#testSingleAppKill test + failure. (Varun Vasudev via jianhe) + + YARN-2534. FairScheduler: Potential integer overflow calculating totalMaxShare. + (Zhihai Xu via kasha) + + YARN-2541. Fixed ResourceManagerRest.apt.vm table syntax error. (jianhe) + + YARN-2484. FileSystemRMStateStore#readFile/writeFile should close + FSData(In|Out)putStream in final block (Tsuyoshi OZAWA via jlowe) + + YARN-2456. Possible livelock in CapacityScheduler when RM is recovering apps. + (Jian He via xgong) + + YARN-2542. Fixed NPE when retrieving ApplicationReport from TimeLineServer. + (Zhijie Shen via jianhe) + + YARN-2528. Relaxed http response split vulnerability protection for the origins + header and made it accept multiple origins in CrossOriginFilter. (Jonathan + Eagles via zjshen) + + YARN-2549. TestContainerLaunch fails due to classpath problem with hamcrest + classes. (cnauroth) + + YARN-2529. Generic history service RPC interface doesn't work when service + authorization is enabled. (Zhijie Shen via jianhe) + + YARN-2558. Updated ContainerTokenIdentifier#read/write to use + ContainerId#getContainerId. (Tsuyoshi OZAWA via jianhe) + + YARN-2559. Fixed NPE in SystemMetricsPublisher when retrieving + FinalApplicationStatus. (Zhijie Shen via jianhe) + + YARN-1779. Fixed AMRMClient to handle AMRMTokens correctly across + ResourceManager work-preserving-restart or failover. (Jian He via vinodkv) + + YARN-2363. Submitted applications occasionally lack a tracking URL (jlowe) + + YARN-2561. MR job client cannot reconnect to AM after NM restart. (Junping + Du via jlowe) + + YARN-2563. Fixed YarnClient to call getTimeLineDelegationToken only if the + Token is not present. (Zhijie Shen via jianhe) + + YARN-2568. Fixed the potential test failures due to race conditions when RM + work-preserving recovery is enabled. (Jian He via zjshen) + + YARN-2565. Fixed RM to not use FileSystemApplicationHistoryStore unless + explicitly set. (Zhijie Shen via jianhe) + + YARN-2460. Remove obsolete entries from yarn-default.xml (Ray Chiang via + aw) + + YARN-2452. TestRMApplicationHistoryWriter fails with FairScheduler. + (Zhihai Xu via kasha) + + YARN-2453. TestProportionalCapacityPreemptionPolicy fails with + FairScheduler. (Zhihai Xu via kasha) + + YARN-2540. FairScheduler: Queue filters not working on scheduler page in + RM UI. (Ashwin Shankar via kasha) + + YARN-2584. TestContainerManagerSecurity fails on trunk. (Jian He via + junping_du) + + YARN-2252. Intermittent failure of + TestFairScheduler.testContinuousScheduling. + (Ratandeep Ratti and kasha via kasha) + + YARN-2161. Fix build on macosx: YARN parts (Binglin Chang via aw) + + YARN-2596. TestWorkPreservingRMRestart fails with FairScheduler. (kasha) + + YARN-2546. Made REST API for application creation/submission use numeric and + boolean types instead of the string of them. (Varun Vasudev via zjshen) + + YARN-2523. ResourceManager UI showing negative value for "Decommissioned + Nodes" field (Rohith via jlowe) + + YARN-2608. FairScheduler: Potential deadlocks in loading alloc files and + clock access. (Wei Yan via kasha) + + YARN-2606. Application History Server tries to access hdfs before doing + secure login (Mit Desai via jeagles) + + YARN-2610. Hamlet should close table tags. (Ray Chiang via kasha) + + YARN-2387. Resource Manager crashes with NPE due to lack of + synchronization (Mit Desai via jlowe) + + YARN-2594. Potential deadlock in RM when querying + ApplicationResourceUsageReport. (Wangda Tan via kasha) + + YARN-2602. Fixed possible NPE in ApplicationHistoryManagerOnTimelineStore. + (Zhijie Shen via jianhe) + + YARN-2630. Prevented previous AM container status from being acquired by the + current restarted AM. (Jian He via zjshen) + + YARN-2617. Fixed NM to not send duplicate container status whose app is not + running. (Jun Gong via jianhe) + + YARN-2624. Resource Localization fails on a cluster due to existing cache + directories (Anubhav Dhoot via jlowe) + + YARN-2527. Fixed the potential NPE in ApplicationACLsManager and added test + cases for it. (Benoy Antony via zjshen) + + YARN-2628. Capacity scheduler with DominantResourceCalculator carries out + reservation even though slots are free. (Varun Vasudev via jianhe) + + YARN-2685. Fixed a bug in CommonNodeLabelsManager that caused wrong resource + tracking per label when a host runs multiple node-managers. (Wangda Tan via + vinodkv) + + YARN-2699. Fixed a bug in CommonNodeLabelsManager that caused tests to fail + when using ephemeral ports on NodeIDs. (Wangda Tan via vinodkv) + + YARN-2705. Fixed bugs in ResourceManager node-label manager that were causing + test-failures: added a dummy in-memory labels-manager. (Wangda Tan via + vinodkv) + + YARN-2715. Fixed ResourceManager to respect common configurations for proxy + users/groups beyond just the YARN level config. (Zhijie Shen via vinodkv) + + YARN-2743. Fixed a bug in ResourceManager that was causing RMDelegationToken + identifiers to be tampered and thus causing app submission failures in + secure mode. (Jian He via vinodkv) + + BREAKDOWN OF YARN-1051 SUBTASKS AND RELATED JIRAS + + YARN-1707. Introduce APIs to add/remove/resize queues in the + CapacityScheduler. (Carlo Curino and Subru Krishnan via curino) + + YARN-2475. Logic for responding to capacity drops for the + ReservationSystem. (Carlo Curino and Subru Krishnan via curino) + + YARN-1708. Public YARN APIs for creating/updating/deleting + reservations. (Subru Krishnan and Carlo Curino via subru) + + YARN-1709. In-memory data structures used to track resources over + time to enable reservations. (Subru Krishnan and Carlo Curino via + subru) + + YARN-1710. Logic to find allocations within a Plan that satisfy + user ReservationRequest(s). (Carlo Curino and Subru Krishnan via + curino) + + YARN-1711. Policy to enforce instantaneous and over-time quotas + on user reservations. (Carlo Curino and Subru Krishnan via curino) + + YARN-1712. Plan follower that synchronizes the current state of reservation + subsystem with the scheduler. (Subru Krishnan and Carlo Curino via subru) + + YARN-2080. Integrating reservation system with ResourceManager and + client-RM protocol. (Subru Krishnan and Carlo Curino via subru) + + MAPREDUCE-6103. Adding reservation APIs to MR resource manager + delegate. (Subru Krishnan and Carlo Curino via subru) + + YARN-2576. Fixing compilation, javadocs and audit issues to pass + test patch in branch. (Subru Krishnan and Carlo Curino via subru) + + YARN-2611. Fixing jenkins findbugs warning and TestRMWebServicesCapacitySched + for branch YARN-1051. (Subru Krishnan and Carlo Curino via subru) + + YARN-2644. Fixed CapacityScheduler to return up-to-date headroom when + AM allocates. (Craig Welch via jianhe) + + YARN-1857. CapacityScheduler headroom doesn't account for other AM's running. + (Chen He and Craig Welch via jianhe) + + YARN-2649. Fixed TestAMRMRPCNodeUpdates test failure. (Ming Ma via jianhe) + + YARN-2662. TestCgroupsLCEResourcesHandler leaks file descriptors. (cnauroth) + + BREAKDOWN OF YARN-913 SUBTASKS AND RELATED JIRAS + + YARN-2652 Add hadoop-yarn-registry package under hadoop-yarn. (stevel) + + YARN-2668 yarn-registry JAR won't link against ZK 3.4.5. (stevel) + + YARN-2689 TestSecureRMRegistryOperations failing on windows: + secure ZK won't start (stevel) + + YARN-2692 ktutil test hanging on some machines/ktutil versions (stevel) + + YARN-2700 TestSecureRMRegistryOperations failing on windows: auth problems + (stevel) + + YARN-2677 registry punycoding of usernames doesn't fix all usernames to be + DNS-valid (stevel) + + YARN-2768 Improved Yarn Registry service record structure (stevel) + + --- + + YARN-2598 GHS should show N/A instead of null for the inaccessible information + (Zhijie Shen via mayank) + + YARN-2671. Fixed ApplicationSubmissionContext to still set resource for + backward compatibility. (Wangda Tan via zjshen) + + YARN-2667. Fix the release audit warning caused by hadoop-yarn-registry + (Yi Liu via jlowe) + + YARN-2651. Spun off LogRollingInterval from LogAggregationContext. (Xuan Gong + via zjshen) + + YARN-2377. Localization exception stack traces are not passed as + diagnostic info (Gera Shegalov via jlowe) + + YARN-2308. Changed CapacityScheduler to explicitly throw exception if the + queue to which the apps were submitted is changed across RM restart. + (Craig Welch & Chang Li via jianhe) + + YARN-2566. DefaultContainerExecutor should pick a working directory randomly. + (Zhihai Xu via kasha) + + YARN-2588. Standby RM fails to transitionToActive if previous + transitionToActive failed with ZK exception. (Rohith Sharmaks via jianhe) + + YARN-2701. Potential race condition in startLocalizer when using + LinuxContainerExecutor. (Xuan Gong via jianhe) + + YARN-2717. Avoided duplicate logging when container logs are not found. (Xuan + Gong via zjshen) + + YARN-2720. Windows: Wildcard classpath variables not expanded against + resources contained in archives. (Craig Welch via cnauroth) + + YARN-2721. Suppress NodeExist exception thrown by ZKRMStateStore when it + retries creating znode. (Jian He via zjshen) + + YARN-2732. Fixed syntax error in SecureContainer.apt.vm. (Jian He via zjshen) + + YARN-2724. Skipped uploading a local log file to HDFS if exception is raised + when opening it. (Xuan Gong via zjshen) + + YARN-1915. Fixed a race condition that client could use the ClientToAMToken + to contact with AM before AM actually receives the ClientToAMTokenMasterKey. + (Jason Lowe via jianhe) + + YARN-2314. Disable ContainerManagementProtocolProxy cache by default to + prevent creating thousands of threads in a large cluster. (Jason Lowe via + jianhe) + + YARN-2723. Fix rmadmin -replaceLabelsOnNode does not correctly parse port. + (Naganarasimha G R via xgong) + + YARN-2734. Skipped sub-folders in the local log dir when aggregating logs. + (Xuan Gong via zjshen) + + YARN-2726. CapacityScheduler should explicitly log when an accessible + label has no capacity. (Wangda Tan via xgong) + + YARN-2591. Fixed AHSWebServices to return FORBIDDEN(403) if the request user + doesn't have access to the history data. (Zhijie Shen via jianhe) + + YARN-2279. Add UTs to cover timeline server authentication. + (Zhijie Shen via xgong) + + YARN-2758. Update TestApplicationHistoryClientService to use the new generic + history store. (Zhijie Shen via xgong) + + YARN-2741. Made NM web UI serve logs on the drive other than C: on Windows. (Craig + Welch via zjshen) + + YARN-2747. Fixed the test failure of TestAggregatedLogFormat when native I/O is + enabled. (Xuan Gong via zjshen) + + YARN-2769. Fixed the problem that timeline domain is not set in distributed shell + AM when using shell_command on Windows. (Varun Vasudev via zjshen) + + YARN-2755. NM fails to clean up usercache_DEL_ dirs after + YARN-661 (Siqi Li via jlowe) + + YARN-2698. Moved some node label APIs to be correctly placed in client + protocol. (Wangda Tan via vinodkv) + + YARN-2789. Re-instated the NodeReport.newInstance private unstable API + modified in YARN-2698 so that tests in YARN frameworks don't break. (Wangda + Tan via vinodkv) + + YARN-2707. Potential null dereference in FSDownload (Gera Shegalov via + jlowe) + + YARN-2711. Fixed TestDefaultContainerExecutor#testContainerLaunchError failure on + Windows. (Varun Vasudev via zjshen) + + YARN-2790. Fixed a NodeManager bug that was causing log-aggregation to fail + beyond HFDS delegation-token expiry even when RM is a proxy-user (YARN-2704). + (Jian He via vinodkv) + + YARN-2785. Fixed intermittent TestContainerResourceUsage failure. (Varun Vasudev + via zjshen) + + YARN-2730. DefaultContainerExecutor runs only one localizer at a time + (Siqi Li via jlowe) + + YARN-2798. Fixed YarnClient to populate the renewer correctly for Timeline + delegation tokens. (Zhijie Shen via vinodkv) + + YARN-2788. Fixed backwards compatiblity issues with log-aggregation feature + that were caused when adding log-upload-time via YARN-2703. (Xuan Gong via + vinodkv) + + YARN-2795. Fixed ResourceManager to not crash loading node-label data from + HDFS in secure mode. (Wangda Tan via vinodkv) + + YARN-1922. Fixed NodeManager to kill process-trees correctly in the presence + of races between the launch and the stop-container call and when root + processes crash. (Billie Rinaldi via vinodkv) + + YARN-2010. Handle app-recovery failures gracefully. + (Jian He and Karthik Kambatla via kasha) + + YARN-2804. Fixed Timeline service to not fill the logs with JAXB bindings + exceptions. (Zhijie Shen via vinodkv) + + YARN-2767. Added a test case to verify that http static user cannot kill or submit + apps in the secure mode. (Varun Vasudev via zjshen) + + YARN-2805. Fixed ResourceManager to load HA configs correctly before kerberos + login. (Wangda Tan via vinodkv) + + YARN-2579. Fixed a deadlock issue when EmbeddedElectorService and + FatalEventDispatcher try to transition RM to StandBy at the same time. + (Rohith Sharmaks via jianhe) + + YARN-2813. Fixed NPE from MemoryTimelineStore.getDomains. (Zhijie Shen via xgong) + + YARN-2812. TestApplicationHistoryServer is likely to fail on less powerful machine. + (Zhijie Shen via xgong) + + YARN-2744. Fixed CapacityScheduler to validate node-labels correctly against + queues. (Wangda Tan via vinodkv) + + YARN-2823. Fixed ResourceManager app-attempt state machine to inform + schedulers about previous finished attempts of a running appliation to avoid + expectation mismatch w.r.t transferred containers. (Jian He via vinodkv) + + YARN-2810. TestRMProxyUsersConf fails on Windows VMs. (Varun Vasudev via xgong) + + YARN-2824. Fixed Capacity Scheduler to not crash when some node-labels are + not mapped to queues by making default capacities per label to be zero. + (Wangda Tan via vinodkv) + + YARN-2827. Fixed bugs in "yarn queue" CLI. (Wangda Tan via vinodkv) + + YARN-2803. MR distributed cache not working correctly on Windows after + NodeManager privileged account changes. (Craig Welch via cnauroth) + + YARN-2753. Fixed a bunch of bugs in the NodeLabelsManager classes. (Zhihai xu + via vinodkv) + + YARN-2825. Container leak on NM (Jian He via jlowe) + + YARN-2819. NPE in ATS Timeline Domains when upgrading from 2.4 to 2.6. + (Zhijie Shen via xgong) + + YARN-2826. Fixed user-groups mappings' refresh bug caused by YARN-2826. + (Wangda Tan via vinodkv) + + YARN-2607. Fixed issues in TestDistributedShell. (Wangda Tan via vinodkv) + + YARN-2830. Add backwords compatible ContainerId.newInstance constructor. + (jeagles via acmurthy) + + YARN-2834. Fixed ResourceManager to ignore token-renewal failures on recovery + consistent with the (somewhat incorrect) behaviour in the non-recovery case. + (Jian He via vinodkv) + + YARN-2841. RMProxy should retry EOFException. (Jian He via xgong) + + YARN-2843. Fixed NodeLabelsManager to trim inputs for hosts and labels so + as to make them work correctly. (Wangda Tan via vinodkv) + + YARN-2794. Fixed log messages about distributing system-credentials. (Jian He via + zjshen) + + YARN-2846. Incorrect persist exit code for running containers in + reacquireContainer() that interrupted by NodeManager restart. (Junping Du + via jlowe) + + YARN-2853. Fixed a bug in ResourceManager causing apps to hang when the user + kill request races with ApplicationMaster finish. (Jian He via vinodkv) + +Release 2.5.2 - 2014-11-19 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + + +Release 2.5.1 - 2014-09-05 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + +Release 2.5.0 - 2014-08-11 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + YARN-1757. NM Recovery. Auxiliary service support. (Jason Lowe via kasha) + + YARN-1864. Fair Scheduler Dynamic Hierarchical User Queues (Ashwin Shankar + via Sandy Ryza) + + YARN-1362. Distinguish between nodemanager shutdown for decommission vs shutdown + for restart. (Jason Lowe via junping_du) + + YARN-1338. Recover localized resource cache state upon nodemanager restart + (Jason Lowe via junping_du) + + YARN-1368. Added core functionality of recovering container state into + schedulers after ResourceManager Restart so as to preserve running work in + the cluster. (Jian He via vinodkv) + + YARN-1702. Added kill app functionality to RM web services. (Varun Vasudev + via vinodkv) + + YARN-1339. Recover DeletionService state upon nodemanager restart. (Jason Lowe + via junping_du) + + YARN-1365. Changed ApplicationMasterService to allow an app to re-register + after RM restart. (Anubhav Dhoot via jianhe) + + YARN-2052. Embedded an epoch number in container id to ensure the uniqueness + of container id after RM restarts. (Tsuyoshi OZAWA via jianhe) + + YARN-1713. Added get-new-app and submit-app functionality to RM web services. + (Varun Vasudev via vinodkv) + + YARN-2233. Implemented ResourceManager web-services to create, renew and + cancel delegation tokens. (Varun Vasudev via vinodkv) + + YARN-2247. Made RM web services authenticate users via kerberos and delegation + token. (Varun Vasudev via zjshen) + + IMPROVEMENTS + + YARN-1479. Invalid NaN values in Hadoop REST API JSON response (Chen He via + jeagles) + + YARN-1736. FS: AppSchedulable.assignContainer's priority argument is + redundant. (Naren Koneru via kasha) + + YARN-1678. Fair scheduler gabs incessantly about reservations (Sandy Ryza) + + YARN-1561. Fix a generic type warning in FairScheduler. (Chen He via junping_du) + + YARN-1429. *nix: Allow a way for users to augment classpath of YARN daemons. + (Jarek Jarcec Cecho via kasha) + + YARN-1520. update capacity scheduler docs to include necessary parameters + (Chen He via jeagles) + + YARN-1845. Elapsed time for failed tasks that never started is wrong + (Rushabh S Shah via jeagles) + + YARN-1136. Replace junit.framework.Assert with org.junit.Assert (Chen He + via jeagles) + + YARN-1889. In Fair Scheduler, avoid creating objects on each call to + AppSchedulable comparator (Hong Zhiguo via Sandy Ryza) + + YARN-1923. Make Fair Scheduler resource ratio calculations terminate faster + (Anubhav Dhoot via Sandy Ryza) + + YARN-1870. FileInputStream is not closed in ProcfsBasedProcessTree#constructProcessSMAPInfo. + (Fengdong Yu via junping_du) + + YARN-1970. Prepare YARN codebase for JUnit 4.11. (cnauroth) + + YARN-483. Improve documentation on log aggregation in yarn-default.xml + (Akira Ajisaka via Sandy Ryza) + + YARN-2036. Document yarn.resourcemanager.hostname in ClusterSetup (Ray + Chiang via Sandy Ryza) + + YARN-766. TestNodeManagerShutdown in branch-2 should use Shell to form the output path and a format + issue in trunk. (Contributed by Siddharth Seth) + + YARN-1982. Renamed the daemon name to be TimelineServer instead of History + Server and deprecated the old usage. (Zhijie Shen via vinodkv) + + YARN-1987. Wrapper for leveldb DBIterator to aid in handling database exceptions. + (Jason Lowe via kasha) + + YARN-1751. Improve MiniYarnCluster for log aggregation testing (Ming Ma + via jlowe) + + YARN-1981. Nodemanager version is not updated when a node reconnects (Jason + Lowe via jeagles) + + YARN-1938. Added kerberos login for the Timeline Server. (Zhijie Shen via + vinodkv) + + YARN-2017. Merged some of the common scheduler code. (Jian He via vinodkv) + + YARN-2049. Added delegation-token support for the Timeline Server. (Zhijie + Shen via vinodkv) + + YARN-1936. Added security support for the Timeline Client. (Zhijie Shen via + vinodkv) + + YARN-1937. Added owner-only ACLs support for Timeline Client and server. + (Zhijie Shen via vinodkv) + + YARN-2012. Fair Scheduler: allow default queue placement rule to take an + arbitrary queue (Ashwin Shankar via Sandy Ryza) + + YARN-2059. Added admin ACLs support to Timeline Server. (Zhijie Shen via + vinodkv) + + YARN-2073. Fair Scheduler: Add a utilization threshold to prevent preempting + resources when cluster is free (Karthik Kambatla via Sandy Ryza) + + YARN-2071. Modified levelDB store permissions to be readable only by the + server user. (Zhijie Shen via vinodkv) + + YARN-2107. Refactored timeline classes into o.a.h.y.s.timeline package. (Vinod + Kumar Vavilapalli via zjshen) + + YARN-596. Use scheduling policies throughout the queue hierarchy to decide + which containers to preempt (Wei Yan via Sandy Ryza) + + YARN-2054. Better defaults for YARN ZK configs for retries and retry-inteval + when HA is enabled. (kasha) + + YARN-1877. Document yarn.resourcemanager.zk-auth and its scope. + (Robert Kanter via kasha) + + YARN-2115. Replaced RegisterNodeManagerRequest's ContainerStatus with a new + NMContainerStatus which has more information that is needed for + work-preserving RM-restart. (Jian He via vinodkv) + + YARN-1474. Make schedulers services. (Tsuyoshi Ozawa via kasha) + + YARN-1913. With Fair Scheduler, cluster can logjam when all resources are + consumed by AMs (Wei Yan via Sandy Ryza) + + YARN-2061. Revisit logging levels in ZKRMStateStore. (Ray Chiang via kasha) + + YARN-1977. Add tests on getApplicationRequest with filtering start time range. (junping_du) + + YARN-2122. In AllocationFileLoaderService, the reloadThread should be created + in init() and started in start(). (Robert Kanter via kasha) + + YARN-2132. ZKRMStateStore.ZKAction#runWithRetries doesn't log the exception + it encounters. (Vamsee Yarlagadda via kasha) + + YARN-2030. Augmented RMStateStore with state machine.(Binglin Chang via jianhe) + + YARN-1424. RMAppAttemptImpl should return the + DummyApplicationResourceUsageReport for all invalid accesses. + (Ray Chiang via kasha) + + YARN-2091. Add more values to ContainerExitStatus and pass it from NM to + RM and then to app masters (Tsuyoshi OZAWA via bikas) + + YARN-2125. Changed ProportionalCapacityPreemptionPolicy to log CSV in debug + level. (Wangda Tan via jianhe) + + YARN-2159. Better logging in SchedulerNode#allocateContainer. + (Ray Chiang via kasha) + + YARN-2191. Added a new test to ensure NM will clean up completed applications + in the case of RM restart. (Wangda Tan via jianhe) + + YARN-2195. Clean a piece of code in ResourceRequest. (Wei Yan via devaraj) + + YARN-2074. Changed ResourceManager to not count AM preemptions towards app + failures. (Jian He via vinodkv) + + YARN-2192. TestRMHA fails when run with a mix of Schedulers. + (Anubhav Dhoot via kasha) + + YARN-2109. Fix TestRM to work with both schedulers. (Anubhav Dhoot via kasha) + + YARN-2072. RM/NM UIs and webservices are missing vcore information. + (Nathan Roberts via tgraves) + + YARN-2152. Added missing information into ContainerTokenIdentifier so that + NodeManagers can report the same to RM when RM restarts. (Jian He via vinodkv) + + YARN-2171. Improved CapacityScheduling to not lock on nodemanager-count when + AMs heartbeat in. (Jason Lowe via vinodkv) + + YARN-614. Changed ResourceManager to not count disk failure, node loss and + RM restart towards app failures. (Xuan Gong via jianhe) + + YARN-2224. Explicitly enable vmem check in + TestContainersMonitor#testContainerKillOnMemoryOverflow. + (Anubhav Dhoot via kasha) + + YARN-2022. Preempting an Application Master container can be kept as least priority + when multiple applications are marked for preemption by + ProportionalCapacityPreemptionPolicy (Sunil G via mayank) + + YARN-2241. ZKRMStateStore: On startup, show nicer messages if znodes already + exist. (Robert Kanter via kasha) + + YARN-1408 Preemption caused Invalid State Event: ACQUIRED at KILLED and + caused a task timeout for 30mins. (Sunil G via mayank) + + YARN-2300. Improved the documentation of the sample requests for RM REST API - + submitting an app. (Varun Vasudev via zjshen) + + OPTIMIZATIONS + + BUG FIXES + + YARN-1718. Fix a couple isTerminals in Fair Scheduler queue placement rules + (Sandy Ryza) + + YARN-1790. Fair Scheduler UI not showing apps table (bc Wong via Sandy Ryza) + + YARN-1784. TestContainerAllocation assumes CapacityScheduler. + (Robert Kanter via kasha) + + YARN-1940. deleteAsUser() terminates early without deleting more files on + error (Rushabh S Shah via jlowe) + + YARN-1865. ShellScriptBuilder does not check for some error conditions. + (Remus Rusanu via ivanmi) + + YARN-738. TestClientRMTokens is failing irregularly while running all yarn + tests (Ming Ma via jlowe) + + YARN-2018. TestClientRMService.testTokenRenewalWrongUser fails after + HADOOP-10562 (Ming Ma via Arpit Agarwal) + + YARN-2011. Fix typo and warning in TestLeafQueue (Chen He via junping_du) + + + YARN-2042. String shouldn't be compared using == in + QueuePlacementRule#NestedUserQueue#getQueueForApp (Chen He via Sandy Ryza) + + YARN-2050. Fix LogCLIHelpers to create the correct FileContext (Ming Ma + via jlowe) + + YARN-2089. FairScheduler: QueuePlacementPolicy and QueuePlacementRule + are missing audience annotations. (Zhihai Xu via kasha) + + YARN-2096. Race in TestRMRestart#testQueueMetricsOnRMRestart. + (Anubhav Dhoot via kasha) + + YARN-2105. Fix TestFairScheduler after YARN-2012. (Ashwin Shankar via + Sandy Ryza) + + YARN-2112. Fixed yarn-common's pom.xml to include jackson dependencies so + that both Timeline Server and client can access them. (Zhijie Shen via + vinodkv) + + YARN-1868. YARN status web ui does not show correctly in IE 11. + (Chuan Liu via cnauroth) + + YARN-2103. Inconsistency between viaProto flag and initial value of + SerializedExceptionProto.Builder (Binglin Chang via junping_du) + + YARN-1550. NPE in FairSchedulerAppsBlock#render. (Anubhav Dhoot via kasha) + + YARN-2119. DEFAULT_PROXY_ADDRESS should use DEFAULT_PROXY_PORT. + (Anubhav Dhoot via kasha) + + YARN-2118. Fixed the type mismatch in Map#containsKey check of + TimelineWebServices#injectOwnerInfo. (Ted Yu via zjshen) + + YARN-2117. Fixed the issue that secret file reader is potentially not + closed in TimelineAuthenticationFilterInitializer. (Chen He via zjshen) + + YARN-2121. Fixed NPE handling in Timeline Server's TimelineAuthenticator. + (Zhijie Shen via vinodkv) + + YARN-2128. FairScheduler: Incorrect calculation of amResource usage. + (Wei Yan via kasha) + + YARN-2124. Fixed NPE in ProportionalCapacityPreemptionPolicy. (Wangda Tan + via jianhe) + + YARN-2148. TestNMClient failed due more exit code values added and passed + to AM (Wangda Tan via bikas) + + YARN-2075. Fixed the test failure of TestRMAdminCLI. (Kenji Kikushima via + zjshen) + + YARN-2155. FairScheduler: Incorrect threshold check for preemption. + (Wei Yan via kasha) + + YARN-1885. Fixed a bug that RM may not send application-clean-up signal + to NMs where the completed applications previously ran in case of RM restart. + (Wangda Tan via jianhe) + + YARN-2167. LeveldbIterator should get closed in + NMLeveldbStateStoreService#loadLocalizationState() within finally block + (Junping Du via jlowe) + + YARN-2187. FairScheduler: Disable max-AM-share check by default. + (Robert Kanter via kasha) + + YARN-2111. In FairScheduler.attemptScheduling, we don't count containers + as assigned if they have 0 memory but non-zero cores (Sandy Ryza) + + YARN-2204. TestAMRestart#testAMRestartWithExistingContainers assumes + CapacityScheduler. (Robert Kanter via kasha) + + YARN-2163. WebUI: Order of AppId in apps table should be consistent with + ApplicationId.compareTo(). (Wangda Tan via raviprak) + + YARN-2104. Scheduler queue filter failed to work because index of queue + column changed. (Wangda Tan via jlowe) + + YARN-2201. Made TestRMWebServicesAppsModification be independent of the + changes on yarn-default.xml. (Varun Vasudev via zjshen) + + YARN-2216 YARN-2065 AM cannot create new containers after restart + (Jian He via stevel) + + YARN-2232. Fixed ResourceManager to allow DelegationToken owners to be able + to cancel their own tokens in secure mode. (Varun Vasudev via vinodkv) + + YARN-2250. FairScheduler.findLowestCommonAncestorQueue returns null when + queues not identical (Krisztian Horvath via Sandy Ryza) + + YARN-2158. Improved assertion messages of TestRMWebServicesAppsModification. + (Varun Vasudev via zjshen) + + YARN-2269. Remove external links from YARN UI. (Craig Welch via xgong) + + YARN-2270. Made TestFSDownload#testDownloadPublicWithStatCache be skipped + when there’s no ancestor permissions. (Akira Ajisaka via zjshen) + + YARN-2319. Made the MiniKdc instance start/close before/after the class of + TestRMWebServicesDelegationTokens. (Wenwu Peng via zjshen) + + YARN-2335. Annotate all hadoop-sls APIs as @Private. (Wei Yan via kasha) + + YARN-1726. ResourceSchedulerWrapper broken due to AbstractYarnScheduler. + (Wei Yan via kasha) + + YARN-2216. TestRMApplicationHistoryWriter sometimes fails in trunk. + (Zhijie Shen via xgong) + +Release 2.4.1 - 2014-06-23 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + YARN-1892. Improved some logs in the scheduler. (Jian He via zjshen) + + YARN-1696. Added documentation for ResourceManager fail-over. (Karthik + Kambatla, Masatake Iwasaki, Tsuyoshi OZAWA via vinodkv) + + YARN-1701. Improved default paths of the timeline store and the generic + history store. (Tsuyoshi Ozawa via zjshen) + + YARN-1962. Changed Timeline Service client configuration to be off by default + given the non-readiness of the feature yet. (Mohammad Kamrul Islam via + vinodkv) + + OPTIMIZATIONS + + BUG FIXES + + YARN-1898. Made Standby RM links conf, stacks, logLevel, metrics, jmx, logs + and static not be redirected to Active RM. (Xuan Gong via zjshen) + + YARN-1837. Fixed TestMoveApplication#testMoveRejectedByScheduler failure. + (Hong Zhiguo via jianhe) + + YARN-1905. TestProcfsBasedProcessTree must only run on Linux. (cnauroth) + + YARN-1883. TestRMAdminService fails due to inconsistent entries in + UserGroups (Mit Desai via jeagles) + + YARN-1908. Fixed DistributedShell to not fail in secure clusters. (Vinod + Kumar Vavilapalli and Jian He via vinodkv) + + YARN-1910. Fixed a race condition in TestAMRMTokens that causes the test to + fail more often on Windows. (Xuan Gong via vinodkv) + + YARN-1920. Fixed TestFileSystemApplicationHistoryStore failure on windows. + (Vinod Kumar Vavilapalli via zjshen) + + YARN-1914. Fixed resource-download on NodeManagers to skip permission + verification of public cache files in Windows+local file-system environment. + (Varun Vasudev via vinodkv) + + YARN-1903. Set exit code and diagnostics when container is killed at + NEW/LOCALIZING state. (Zhijie Shen via jianhe) + + YARN-1924. Made ZKRMStateStore updateApplication(Attempt)StateInternal work + when Application(Attempt) state hasn't been stored before. (Jian He via + zjshen) + + YARN-1926. Changed DistributedShell to use appIDs as unique identifiers for + HDFS paths and thus avoid test failures on Windows. (Varun Vasudev via + vinodkv) + + YARN-1833. TestRMAdminService Fails in trunk and branch-2 (Mit Desai via + jeagles) + + YARN-1907. TestRMApplicationHistoryWriter#testRMWritingMassiveHistory + intermittently fails. (Mit Desai via kihwal) + + YARN-1933. Fixed test issues with TestAMRestart and TestNodeHealthService. + (Jian He via vinodkv) + + YARN-1928. Fixed a race condition in TestAMRMRPCNodeUpdates which caused it + to fail occassionally. (Zhijie Shen via vinodkv) + + YARN-1934. Fixed a potential NPE in ZKRMStateStore caused by handling + Disconnected event from ZK. (Karthik Kambatla via jianhe) + + YARN-1931. Private API change in YARN-1824 in 2.4 broke compatibility + with previous releases (Sandy Ryza via tgraves) + + YARN-1750. TestNodeStatusUpdater#testNMRegistration is incorrect in test + case. (Wangda Tan via junping_du) + + YARN-1947. TestRMDelegationTokens#testRMDTMasterKeyStateOnRollingMasterKey + is failing intermittently. (Jian He via junping_du) + + YARN-1281. Fixed TestZKRMStateStoreZKClientConnections to not fail + intermittently due to ZK-client timeouts. (Tsuyoshi Ozawa via vinodkv) + + YARN-1932. Javascript injection on the job status page (Mit Desai via + jlowe) + + YARN-1975. Used resources shows escaped html in CapacityScheduler and + FairScheduler page (Mit Desai via jlowe) + + YARN-1929. Fixed a deadlock in ResourceManager that occurs when failover + happens right at the time of shutdown. (Karthik Kambatla via vinodkv) + + YARN-1201. TestAMAuthorization fails with local hostname cannot be resolved. + (Wangda Tan via junping_du) + + YARN-1861. Fixed a bug in RM to reset leader-election on fencing that was + causing both RMs to be stuck in standby mode when automatic failover is + enabled. (Karthik Kambatla and Xuan Gong via vinodkv) + + YARN-1957. Consider the max capacity of the queue when computing the ideal + capacity for preemption. (Carlo Curino via cdouglas) + + YARN-1986. In Fifo Scheduler, node heartbeat in between creating app and + attempt causes NPE (Hong Zhiguo via Sandy Ryza) + + YARN-1976. Fix yarn application CLI to print the scheme of the tracking url + of failed/killed applications. (Junping Du via jianhe) + + YARN-2016. Fix a bug in GetApplicationsRequestPBImpl to add the missed fields + to proto. (Junping Du via jianhe) + + YARN-2053. Fixed a bug in AMS to not add null NMToken into NMTokens list from + previous attempts for work-preserving AM restart. (Wangda Tan via jianhe) + + YARN-2066. Wrong field is referenced in GetApplicationsRequestPBImpl#mergeLocalToBuilder() + (Hong Zhiguo via junping_du) + + YARN-2081. Fixed TestDistributedShell failure after YARN-1962. (Zhiguo Hong + via zjshen) + +Release 2.4.0 - 2014-04-07 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + YARN-930. Bootstrapping ApplicationHistoryService module. (vinodkv) + + YARN-947. Implementing the data objects to be used by the History reader + and writer interfaces. (Zhijie Shen via vinodkv) + + YARN-934. Defined a Writer Interface for HistoryStorage. (Zhijie Shen via + vinodkv) + + YARN-925. Defined a Reader Interface for HistoryStorage. (Mayank Bansal via + vinodkv) + + YARN-978. Created ApplicationAttemptReport. (Mayank Bansal via vinodkv) + + YARN-956. Added a testable in-memory HistoryStorage. (Mayank Bansal via + vinodkv) + + YARN-975. Added a file-system implementation for HistoryStorage. (Zhijie Shen + via vinodkv) + + YARN-1123. Added a new ContainerReport and its Protobuf implementation. (Mayank + Bansal via vinodkv) + + YARN-979. Added more APIs for getting information about ApplicationAttempts + and Containers from ApplicationHistoryProtocol. (Mayank Bansal and Zhijie Shen + via vinodkv) + + YARN-953. Changed ResourceManager to start writing history data. (Zhijie Shen + via vinodkv) + + YARN-1266. Implemented PB service and client wrappers for + ApplicationHistoryProtocol. (Mayank Bansal via vinodkv) + + YARN-955. Implemented ApplicationHistoryProtocol handler. (Mayank Bansal via + vinodkv) + + YARN-1242. Changed yarn scripts to be able to start ApplicationHistoryServer + as an individual process. (Mayank Bansal via vinodkv) + + YARN-954. Implemented web UI for the ApplicationHistoryServer and wired it into + the HistoryStorage. (Zhijie Shen via vinodkv) + + YARN-967. Added the client and CLI interfaces for obtaining ApplicationHistory + data. (Mayank Bansal via vinodkv) + + YARN-1023. Added Webservices REST APIs support for Application History. (Zhijie + Shen via vinodkv) + + YARN-1413. Implemented serving of aggregated-logs in the ApplicationHistory + server. (Mayank Bansal via vinodkv) + + YARN-1633. Defined user-facing entity, entity-info and event objects related + to Application Timeline feature. (Zhijie Shen via vinodkv) + + YARN-1611. Introduced the concept of a configuration provider which can be + used by ResourceManager to read configuration locally or from remote systems + so as to help RM failover. (Xuan Gong via vinodkv) + + YARN-1659. Defined the ApplicationTimelineStore store as an abstraction for + implementing different storage impls for storing timeline information. + (Billie Rinaldi via vinodkv) + + YARN-1634. Added a testable in-memory implementation of + ApplicationTimelineStore. (Zhijie Shen via vinodkv) + + YARN-1461. Added tags for YARN applications and changed RM to handle them. + (Karthik Kambatla via zjshen) + + YARN-1636. Augmented Application-history server's web-services to also expose + new APIs for retrieving and storing timeline information. (Zhijie Shen via + vinodkv) + + YARN-1490. Introduced the ability to make ResourceManager optionally not kill + all containers when an ApplicationMaster exits. (Jian He via vinodkv) + + YARN-1041. Added the ApplicationMasterProtocol API for applications to use the + ability in ResourceManager to optionally not kill containers when the + ApplicationMaster exits. (Jian He via vinodkv) + + YARN-1566. Changed Distributed Shell to retain containers across application + attempts. (Jian He via vinodkv) + + YARN-1635. Implemented a Leveldb based ApplicationTimelineStore. (Billie + Rinaldi via zjshen) + + YARN-1637. Implemented a client library for Java users to post timeline + entities and events. (zjshen) + + YARN-1496. Protocol additions to allow moving apps between queues (Sandy + Ryza) + + YARN-1498. Common scheduler changes for moving apps between queues (Sandy + Ryza) + + YARN-1504. RM changes for moving apps between queues (Sandy Ryza) + + YARN-1499. Fair Scheduler changes for moving apps between queues (Sandy + Ryza) + + YARN-1497. Command line additions for moving apps between queues (Sandy + Ryza) + + YARN-1588. Enhanced RM and the scheduling protocol to also send NMTokens of + transferred containers from previous app-attempts to new AMs after YARN-1490. + (Jian He via vinodkv) + + YARN-1717. Enabled periodically discarding old data in LeveldbTimelineStore. + (Billie Rinaldi via zjshen) + + YARN-1690. Made DistributedShell send timeline entities+events. (Mayank Bansal + via zjshen) + + YARN-1775. Enhanced ProcfsBasedProcessTree to optionally add the ability to + use smaps for obtaining used memory information. (Rajesh Balamohan via + vinodkv) + + YARN-1838. Enhanced timeline service getEntities API to get entities from a + given entity ID or insertion timestamp. (Billie Rinaldi via zjshen) + + IMPROVEMENTS + + YARN-1007. Enhance History Reader interface for Containers. (Mayank Bansal via + devaraj) + + YARN-974. Added more information to RMContainer to be collected and recorded in + Application-History. (Zhijie Shen via vinodkv) + + YARN-987. Added ApplicationHistoryManager responsible for exposing reports to + all clients. (Mayank Bansal via vinodkv) + + YARN-1630. Introduce timeout for async polling operations in YarnClientImpl + (Aditya Acharya via Sandy Ryza) + + YARN-1617. Remove ancient comment and surround LOG.debug in + AppSchedulingInfo.allocate (Sandy Ryza) + + YARN-1639. Modified RM HA configuration handling to have a way of not + requiring separate configuration files for each RM. (Xuan Gong via vinodkv) + + YARN-1668. Modified RM HA handling of admin-acls to be available across RM + failover by making using of a remote configuration-provider. (Xuan Gong via + vinodkv) + + YARN-1667. Modified RM HA handling of super users (with proxying ability) to + be available across RM failover by making using of a remote + configuration-provider. (Xuan Gong via vinodkv) + + YARN-1285. Changed the default value of yarn.acl.enable in yarn-default.xml + to be consistent with what exists (false) in the code and documentation. + (Kenji Kikushima via vinodkv) + + YARN-1669. Modified RM HA handling of protocol level service-ACLS to + be available across RM failover by making using of a remote + configuration-provider. (Xuan Gong via vinodkv) + + YARN-1665. Simplify the configuration of RM HA by having better default + values. (Xuan Gong via vinodkv) + + YARN-1660. Simplified the RM HA configuration to accept and be able to simply + depend just on configuration properties of the form + yarn.resourcemanager.hostname.RMID and use the default ports for all service + addresses. (Xuan Gong via vinodkv) + + YARN-1493. Changed ResourceManager and Scheduler interfacing to recognize + app-attempts separately from apps. (Jian He via vinodkv) + + YARN-1459. Changed ResourceManager to depend its service initialization + on the configuration-provider mechanism during startup too. (Xuan Gong via + vinodkv) + + YARN-1706. Created an utility method to dump timeline records to JSON + strings. (zjshen) + + YARN-1641. ZK store should attempt a write periodically to ensure it is + still Active. (kasha) + + YARN-1531. True up yarn command documentation (Akira Ajisaka via kasha) + + YARN-1345. Remove FINAL_SAVING state from YarnApplicationAttemptState + (Zhijie Shen via jianhe) + + YARN-1676. Modified RM HA handling of user-to-group mappings to + be available across RM failover by making using of a remote + configuration-provider. (Xuan Gong via vinodkv) + + YARN-1666. Modified RM HA handling of include/exclude node-lists to be + available across RM failover by making using of a remote + configuration-provider. (Xuan Gong via vinodkv) + + YARN-1171. Add default queue properties to Fair Scheduler documentation + (Naren Koneru via Sandy Ryza) + + YARN-1470. Add audience annotations to MiniYARNCluster. (Anubhav Dhoot + via kasha) + + YARN-1732. Changed types of related-entities and primary-filters in the + timeline-service to be sets instead of maps. (Billie Rinaldi via vinodkv) + + YARN-1687. Renamed user-facing records for the timeline-service to be simply + named after 'timeline' instead of 'apptimeline'. (Zhijie Shen via vinodkv) + + YARN-1749. Updated application-history related configs to reflect the latest + reality and to be consistently named. (Zhijie Shen via vinodkv) + + YARN-1301. Added the INFO level log of the non-empty blacklist additions + and removals inside ApplicationMasterService. (Tsuyoshi Ozawa via zjshen) + + YARN-1528. Allow setting auth for ZK connections. (kasha) + + YARN-1704. Modified LICENSE and NOTICE files to reflect newly used levelDB + related libraries. (Billie Rinaldi via vinodkv) + + YARN-1765. Added test cases to verify that killApplication API works across + ResourceManager failover. (Xuan Gong via vinodkv) + + YARN-1730. Implemented simple write-locking in the LevelDB based timeline- + store. (Billie Rinaldi via vinodkv) + + YARN-986. Changed client side to be able to figure out the right RM Delegation + token for the right ResourceManager when HA is enabled. (Karthik Kambatla via + vinodkv) + + YARN-1761. Modified RMAdmin CLI to check whether HA is enabled or not before + it executes any of the HA admin related commands. (Xuan Gong via vinodkv) + + YARN-1780. Improved logging in the Timeline client and server. (Zhijie Shen + via vinodkv) + + YARN-1525. Web UI should redirect to active RM when HA is enabled. (Cindy Li + via kasha) + + YARN-1781. Modified NodeManagers to allow admins to specify max disk + utilization for local disks so as to be able to offline full disks. (Varun + Vasudev via vinodkv) + + YARN-1410. Added tests to validate that clients can fail-over to a new RM + after getting an application-ID but before submission and can still submit to + the newly active RM with no issues. (Xuan Gong via vinodkv) + + YARN-1764. Modified YarnClient to correctly handle failover of ResourceManager + after the submitApplication call goes through. (Xuan Gong via vinodkv) + + YARN-1389. Made ApplicationClientProtocol and ApplicationHistoryProtocol + expose analogous getApplication(s)/Attempt(s)/Container(s) APIs. (Mayank + Bansal via zjshen) + + YARN-1658. Modified web-app framework to let standby RMs redirect + web-service calls to the active RM. (Cindy Li via vinodkv) + + YARN-1824. Improved NodeManager and clients to be able to handle cross + platform application submissions. (Jian He via vinodkv) + + YARN-1512. Enhanced CapacityScheduler to be able to decouple scheduling from + node-heartbeats. (Arun C Murthy via vinodkv) + + YARN-1570. Fixed formatting of the lines in YarnCommands.apt.vm docs source. + (Akira Ajisaka via vinodkv) + + YARN-1536. Cleanup: Get rid of ResourceManager#get*SecretManager() methods + and use the RMContext methods instead. (Anubhav Dhoot via kasha) + + YARN-1850. Introduced the ability to optionally disable sending out timeline- + events in the TimelineClient. (Zhijie Shen via vinodkv) + + YARN-1452. Added documentation about the configuration and usage of generic + application history and the timeline data service. (Zhijie Shen via vinodkv) + + YARN-1891. Added documentation for NodeManager health-monitoring. (Varun + Vasudev via vinodkv) + + YARN-1017. Added documentation for ResourceManager Restart.(jianhe) + + OPTIMIZATIONS + + YARN-1771. Reduce the number of NameNode operations during localization of + public resources using a cache. (Sangjin Lee via cdouglas) + + BUG FIXES + + YARN-935. Correcting pom.xml to build applicationhistoryserver module + successfully. (Zhijie Shen via vinodkv) + + YARN-962. Fixed bug in application-history proto file and renamed it be just + a client proto file. (Zhijie Shen via vinodkv) + + YARN-984. Renamed the incorrectly named applicationhistoryservice.records.pb.impl + package to be applicationhistoryservice.records.impl.pb. (Devaraj K via vinodkv) + + YARN-1534. Fixed failure of test TestAHSWebApp. (Shinichi Yamashita via vinodkv) + + YARN-1555. Fixed test failures in applicationhistoryservice.* (Vinod Kumar + Vavilapalli via mayank) + + YARN-1594. Updated pom.xml of applicationhistoryservice sub-project according to + YARN-888. (Vinod Kumar Vavilapalli via zjshen) + + YARN-1596. Fixed Javadoc warnings on branch YARN-321. (Vinod Kumar Vavilapalli + via zjshen) + + YARN-1597. Fixed Findbugs warnings on branch YARN-321. (Vinod Kumar Vavilapalli + via zjshen) + + YARN-1595. Made enabling history service configurable and fixed test failures on + branch YARN-321. (Vinod Kumar Vavilapalli via zjshen) + + YARN-1605. Fixed formatting issues in the new module on branch YARN-321. (Vinod + Kumar Vavilapalli via zjshen) + + YARN-1625. Fixed RAT warnings after YARN-321 merge. (Shinichi Yamashita via + vinodkv) + + YARN-1613. Fixed the typo with the configuration name + YARN_HISTORY_SERVICE_ENABLED. (Akira Ajisaka via vinodkv) + + YARN-1618. Fix invalid RMApp transition from NEW to FINAL_SAVING (kasha) + + YARN-1600. RM does not startup when security is enabled without spnego + configured (Haohui Mai via jlowe) + + YARN-1642. RMDTRenewer#getRMClient should use ClientRMProxy (kasha) + + YARN-1632. TestApplicationMasterServices should be under + org.apache.hadoop.yarn.server.resourcemanager package (Chen He via jeagles) + + YARN-1673. Fix option parsing in YARN's application CLI after it is broken + by YARN-967. (Mayank Bansal via vinodkv) + + YARN-1684. Fixed history server heap size in yarn script. (Billie Rinaldi + via zjshen) + + YARN-1166. Fixed app-specific and attempt-specific QueueMetrics to be + triggered by accordingly app event and attempt event. + + YARN-1689. Made RMAppAttempt get killed when RMApp is at ACCEPTED. (Vinod + Kumar Vavilapalli via zjshen) + + YARN-1661. Fixed DS ApplicationMaster to write the correct exit log. (Vinod + Kumar Vavilapalli via zjshen) + + YARN-1672. YarnConfiguration is missing a default for + yarn.nodemanager.log.retain-seconds (Naren Koneru via kasha) + + YARN-1698. Fixed default TimelineStore in code to match what is documented + in yarn-default.xml (Zhijie Shen via vinodkv) + + YARN-1697. NodeManager reports negative running containers (Sandy Ryza) + + YARN-1719. Fixed the root path related Jersey warnings produced in + ATSWebServices. (Billie Rinaldi via zjshen) + + YARN-1692. ConcurrentModificationException in fair scheduler AppSchedulable + (Sangjin Lee via Sandy Ryza) + + YARN-1578. Fixed reading incomplete application attempt and container data + in FileSystemApplicationHistoryStore. (Shinichi Yamashita via zjshen) + + YARN-1417. Modified RM to generate container-tokens not at creation time, but + at allocation time so as to prevent RM from shelling out containers with + expired tokens. (Omkar Vinit Joshi and Jian He via vinodkv) + + YARN-1553. Modified YARN and MR to stop using HttpConfig.isSecure() and + instead rely on the http policy framework. And also fix some bugs related + to https handling in YARN web-apps. (Haohui Mai via vinodkv) + + YARN-1721. When moving app between queues in Fair Scheduler, grab lock on + FSSchedulerApp (Sandy Ryza) + + YARN-1724. Race condition in Fair Scheduler when continuous scheduling is + turned on (Sandy Ryza) + + YARN-1590. Fixed ResourceManager, web-app proxy and MR JobHistoryServer to + expand _HOST properly in their kerberos principles. (Mohammad Kamrul Islam + va vinodkv) + + YARN-1428. Fixed RM to write the final state of RMApp/RMAppAttempt to the + application history store in the transition to the final state. (Contributed + by Zhijie Shen) + + YARN-713. Fixed ResourceManager to not crash while building tokens when DNS + issues happen transmittently. (Jian He via vinodkv) + + YARN-1398. Fixed a deadlock in ResourceManager between users requesting + queue-acls and completing containers. (vinodkv) + + YARN-1071. Enabled ResourceManager to recover cluster metrics + numDecommissionedNMs after restarting. (Jian He via zjshen) + + YARN-1742. Fixed javadoc of configuration parameter + DEFAULT_NM_MIN_HEALTHY_DISKS_FRACTION. (Akira Ajisaka via vinodkv) + + YARN-1686. Fixed NodeManager to properly handle any errors during + re-registration after a RESYNC and thus avoid hanging. (Rohith Sharma via + vinodkv) + + YARN-1734. Fixed ResourceManager to update the configurations when it + transits from standby to active mode so as to assimilate any changes that + happened while it was in standby mode. (Xuan Gong via vinodkv) + + YARN-1760. TestRMAdminService assumes CapacityScheduler. (kasha) + + YARN-1758. Fixed ResourceManager to not mandate the presence of site specific + configuration files and thus fix failures in downstream tests. (Xuan Gong via + vinodkv) + + YARN-1748. Excluded core-site.xml from hadoop-yarn-server-tests package's jar + and thus avoid breaking downstream tests. (Sravya Tirukkovalur via vinodkv) + + YARN-1729. Made TimelineWebServices deserialize the string primary- and + secondary-filters param into the JSON-compatible object. (Billie Rinaldi via + zjshen) + + YARN-1766. Fixed a bug in ResourceManager to use configuration loaded from the + configuration-provider when booting up. (Xuan Gong via vinodkv) + + YARN-1768. Fixed error message being too verbose when killing a non-existent + application. (Tsuyoshi OZAWA via raviprak) + + YARN-1774. FS: Submitting to non-leaf queue throws NPE. (Anubhav Dhoot and + Karthik Kambatla via kasha) + + YARN-1783. Fixed a bug in NodeManager's status-updater that was losing + completed container statuses when NodeManager is forced to resync by the + ResourceManager. (Jian He via vinodkv) + + YARN-1787. Fixed help messages for applicationattempt and container + sub-commands in bin/yarn. (Zhijie Shen via vinodkv) + + YARN-1793. Fixed ClientRMService#forceKillApplication not killing unmanaged + application. (Karthik Kambatla via jianhe) + + YARN-1788. Fixed a bug in ResourceManager to set the apps-completed and + apps-killed metrics correctly for killed applications. (Varun Vasudev via + vinodkv) + + YARN-1821. NPE on registerNodeManager if the request has containers for + UnmanagedAMs. (kasha) + + YARN-1800. Fixed NodeManager to gracefully handle RejectedExecutionException + in the public-localizer thread-pool. (Varun Vasudev via vinodkv) + + YARN-1444. Fix CapacityScheduler to deal with cases where applications + specify host/rack requests without off-switch request. (Wangda Tan via + acmurthy) + + YARN-1812. Fixed ResourceManager to synchrously renew tokens after recovery + and thus recover app itself synchronously and avoid races with resyncing + NodeManagers. (Jian He via vinodkv) + + YARN-1816. Fixed ResourceManager to get RMApp correctly handle + ATTEMPT_FINISHED event at ACCEPTED state that can happen after RM restarts. + (Jian He via vinodkv) + + YARN-1789. ApplicationSummary does not escape newlines in the app name + (Tsuyoshi OZAWA via jlowe) + + YARN-1830. Fixed TestRMRestart#testQueueMetricsOnRMRestart failure due to + race condition when app is submitted. (Zhijie Shen via jianhe) + + YARN-1685. Fixed few bugs related to handling of containers' log-URLs on + ResourceManager and history-service. (Zhijie Shen via vinodkv) + + YARN-1206. Fixed AM container log to show on NM web page after application + finishes if log-aggregation is disabled. (Rohith Sharmaks via jianhe) + + YARN-1591. Fixed AsyncDispatcher to handle interrupts on shutdown in a sane + manner and thus fix failure of TestResourceTrackerService. (Tsuyoshi Ozawa + via vinodkv) + + YARN-1839. Fixed handling of NMTokens in ResourceManager such that containers + launched by AMs running on the same machine as the AM are correctly + propagated. (Jian He via vinodkv) + + YARN-1640. Fixed manual failover of ResourceManagers to work correctly in + secure clusters. (Xuan Gong via vinodkv) + + YARN-1855. Made Application-history server to be optional in MiniYARNCluster + and thus avoid the failure of TestRMFailover#testRMWebAppRedirect. (Zhijie + Shen via vinodkv) + + YARN-1859. Fixed WebAppProxyServlet to correctly handle applications absent + on the ResourceManager. (Zhijie Shen via vinodkv) + + YARN-1811. Fixed AMFilters in YARN to correctly accept requests from either + web-app proxy or the RMs when HA is enabled. (Robert Kanter via vinodkv) + + YARN-1670. Fixed a bug in log-aggregation that can cause the writer to write + more log-data than the log-length that it records. (Mit Desai via vinodk) + + YARN-1849. Fixed NPE in ResourceTrackerService#registerNodeManager for UAM + (Karthik Kambatla via jianhe ) + + YARN-1863. Fixed test failure in TestRMFailover after YARN-1859. (Xuan Gong + via vinodkv) + + YARN-1854. Fixed test failure in TestRMHA#testStartAndTransitions. (Rohith + Sharma KS via vinodkv) + + YARN-1776. Fixed DelegationToken renewal to survive RM failover. (Zhijie + Shen via jianhe) + + YARN-1577. Made UnmanagedAMLauncher do launchAM after the attempt reaches + the LAUNCHED state. (Jian He via zjshen) + + YARN-1785. FairScheduler treats app lookup failures as ERRORs. + (bc Wong via kasha) + + YARN-1752. Fixed ApplicationMasterService to reject unregister request if + AM did not register before. (Rohith Sharma via jianhe) + + YARN-1846. TestRM#testNMTokenSentForNormalContainer assumes CapacityScheduler. + (Robert Kanter via kasha) + + YARN-1705. Reset cluster-metrics on transition to standby. (Rohith via kasha) + + YARN-1852. Fixed RMAppAttempt to not resend AttemptFailed/AttemptKilled + events to already recovered Failed/Killed RMApps. (Rohith via jianhe) + + YARN-1866. Fixed an issue with renewal of RM-delegation tokens on restart or + fail-over. (Jian He via vinodkv) + + YARN-1867. Fixed a bug in ResourceManager that was causing invalid ACL checks + in the web-services after fail-over. (Vinod Kumar Vavilapalli) + + YARN-1521. Mark Idempotent/AtMostOnce annotations to the APIs in + ApplicationClientProtcol, ResourceManagerAdministrationProtocol and + ResourceTrackerProtocol so that they work in HA scenario. (Xuan Gong + via jianhe) + + YARN-1873. Fixed TestDistributedShell failure when the test cases are out of + order. (Mit Desai via zjshen) + + YARN-1893. Mark AtMostOnce annotation to ApplicationMasterProtocol#allocate. + (Xuan Gong via jianhe) + +Release 2.3.1 - UNRELEASED + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + +Release 2.3.0 - 2014-02-18 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + YARN-649. Added a new NM web-service to serve container logs in plain text + over HTTP. (Sandy Ryza via vinodkv) + + YARN-1021. Yarn Scheduler Load Simulator. (ywskycn via tucu) + + YARN-1010. FairScheduler: decouple container scheduling from nodemanager + heartbeats. (Wei Yan via Sandy Ryza) + + YARN-1253. Changes to LinuxContainerExecutor to run containers as a single + dedicated user in non-secure mode. (rvs via tucu) + + YARN-1027. Implement RMHAProtocolService (Karthik Kambatla via bikas) + + YARN-1068. Add admin support for HA operations (Karthik Kambatla via + bikas) + + YARN-311. RM/scheduler support for dynamic resource configuration. + (Junping Du via llu) + + YARN-1392. Allow sophisticated app-to-queue placement policies in the Fair + Scheduler (Sandy Ryza) + + YARN-1447. Common PB type definitions for container resizing. (Wangda Tan + via Sandy Ryza) + + YARN-1448. AM-RM protocol changes to support container resizing (Wangda Tan + via Sandy Ryza) + + YARN-312. Introduced ResourceManagerAdministrationProtocol changes to support + changing resources on node. (Junping Du via vinodkv) + + YARN-1028. Added FailoverProxyProvider capability to ResourceManager to help + with RM failover. (Karthik Kambatla via vinodkv) + + YARN-1029. Added embedded leader election in the ResourceManager. (Karthik + Kambatla via vinodkv) + + YARN-1033. Expose RM active/standby state to Web UI and REST API (kasha) + + IMPROVEMENTS + + YARN-305. Fair scheduler logs too many "Node offered to app" messages. + (Lohit Vijayarenu via Sandy Ryza) + + YARN-1258. Allow configuring the Fair Scheduler root queue (Sandy Ryza) + + YARN-1288. Make Fair Scheduler ACLs more user friendly (Sandy Ryza) + + YARN-1315. TestQueueACLs should also test FairScheduler (Sandy Ryza) + + YARN-1335. Move duplicate code from FSSchedulerApp and FiCaSchedulerApp + into SchedulerApplication (Sandy Ryza) + + YARN-1333. Support blacklisting in the Fair Scheduler (Tsuyoshi Ozawa via + Sandy Ryza) + + YARN-1109. Demote NodeManager "Sending out status for container" logs to + debug (haosdent via Sandy Ryza) + + YARN-1321. Changed NMTokenCache to support both singleton and an instance + usage. (Alejandro Abdelnur via vinodkv) + + YARN-1388. Fair Scheduler page always displays blank fair share (Liyin Liang + via Sandy Ryza) + + YARN-7. Support CPU resource for DistributedShell. (Junping Du via llu) + + YARN-905. Add state filters to nodes CLI (Wei Yan via Sandy Ryza) + + YARN-1098. Separate out RM services into Always On and Active (Karthik + Kambatla via bikas) + + YARN-353. Add Zookeeper-based store implementation for RMStateStore. + (Bikas Saha, Jian He and Karthik Kambatla via hitesh) + + YARN-819. ResourceManager and NodeManager should check for a minimum allowed + version (Robert Parker via jeagles) + + YARN-425. coverage fix for yarn api (Aleksey Gorshkov via jeagles) + + YARN-1199. Make NM/RM Versions Available (Mit Desai via jeagles) + + YARN-1232. Configuration to support multiple RMs (Karthik Kambatla via + bikas) + + YARN-465. fix coverage org.apache.hadoop.yarn.server.webproxy (Aleksey + Gorshkov and Andrey Klochkov via jlowe) + + YARN-976. Document the meaning of a virtual core. (Sandy Ryza) + + YARN-1182. MiniYARNCluster creates and inits the RM/NM only on start() + (Karthik Kambatla via Sandy Ryza) + + HADOOP-9598. Improve code coverage of RMAdminCLI (Aleksey Gorshkov and + Andrey Klochkov via jeagles) + + YARN-1306. Clean up hadoop-sls sample-conf according to YARN-1228 (Wei Yan + via Sandy Ryza) + + YARN-891. Modified ResourceManager state-store to remember completed + applications so that clients can get information about them post RM-restart. + (Jian He via vinodkv) + + YARN-1290. Let continuous scheduling achieve more balanced task assignment + (Wei Yan via Sandy Ryza) + + YARN-786. Expose application resource usage in RM REST API (Sandy Ryza) + + YARN-1323. Set HTTPS webapp address along with other RPC addresses in HAUtil + (Karthik Kambatla via Sandy Ryza) + + YARN-1121. Changed ResourceManager's state-store to drain all events on + shut-down. (Jian He via vinodkv) + + YARN-1387. RMWebServices should use ClientRMService for filtering + applications (Karthik Kambatla via Sandy Ryza) + + YARN-1222. Make improvements in ZKRMStateStore for fencing (Karthik + Kambatla via bikas) + + YARN-709. Added tests to verify validity of delegation tokens and logging of + appsummary after RM restart. (Jian He via vinodkv) + + YARN-1210. Changed RM to start new app-attempts on RM restart only after + ensuring that previous AM exited or after expiry time. (Omkar Vinit Joshi via + vinodkv) + + YARN-674. Fixed ResourceManager to renew DelegationTokens on submission + asynchronously to work around potential slowness in state-store. (Omkar Vinit + Joshi via vinodkv) + + YARN-584. In scheduler web UIs, queues unexpand on refresh. (Harshit + Daga via Sandy Ryza) + + YARN-1303. Fixed DistributedShell to not fail with multiple commands separated + by a semi-colon as shell-command. (Xuan Gong via vinodkv) + + YARN-1423. Support queue placement by secondary group in the Fair Scheduler + (Ted Malaska via Sandy Ryza) + + YARN-1314. Fixed DistributedShell to not fail with multiple arguments for a + shell command separated by spaces. (Xuan Gong via vinodkv) + + YARN-1239. Modified ResourceManager state-store implementations to start + storing version numbers. (Jian He via vinodkv) + + YARN-1241. In Fair Scheduler, maxRunningApps does not work for non-leaf + queues. (Sandy Ryza) + + YARN-1318. Promoted AdminService to an Always-On service and merged it into + RMHAProtocolService. (Karthik Kambatla via vinodkv) + + YARN-1332. In TestAMRMClient, replace assertTrue with assertEquals where + possible (Sebastian Wong via Sandy Ryza) + + YARN-1403. Separate out configuration loading from QueueManager in the Fair + Scheduler (Sandy Ryza) + + YARN-1181. Augment MiniYARNCluster to support HA mode (Karthik Kambatla) + + YARN-546. Allow disabling the Fair Scheduler event log (Sandy Ryza) + + YARN-807. When querying apps by queue, iterating over all apps is + inefficient and limiting (Sandy Ryza) + + YARN-1378. Implemented a cleaner of old finished applications from the RM + state-store. (Jian He via vinodkv) + + YARN-1481. Move internal services logic from AdminService to ResourceManager. + (vinodkv via kasha) + + YARN-1491. Upgrade JUnit3 TestCase to JUnit 4 (Chen He via jeagles) + + YARN-408. Change CapacityScheduler to not disable delay-scheduling by default. + (Mayank Bansal via vinodkv) + + YARN-1325. Modified RM HA configuration validation to also ensure that + multiple RMs are configured. (Xuan Gong via vinodkv) + + YARN-1311. Fixed app specific scheduler-events' names to be app-attempt + based. (vinodkv via jianhe) + + YARN-1485. Modified RM HA configuration validation to also ensure that + service-address configuration are configured for every RM. (Xuan Gong via + vinodkv) + + YARN-1435. Modified Distributed Shell to accept either the command or the + custom script. (Xuan Gong via zjshen) + + YARN-1446. Changed client API to retry killing application till RM + acknowledges so as to account for RM crashes/failover. (Jian He via vinodkv) + + YARN-1307. Redesign znode structure for Zookeeper based RM state-store for + better organization and scalability. (Tsuyoshi OZAWA via vinodkv) + + YARN-1172. Convert SecretManagers in RM to services (Tsuyoshi OZAWA via kasha) + + YARN-1523. Use StandbyException instead of RMNotYetReadyException (kasha) + + YARN-1541. Changed ResourceManager to invalidate ApplicationMaster host/port + information once an AM crashes. (Jian He via vinodkv) + + YARN-1482. Modified WebApplicationProxy to make it work across ResourceManager + fail-over. (Xuan Gong via vinodkv) + + YARN-1568. Rename clusterid to clusterId in ActiveRMInfoProto (kasha) + + YARN-1579. ActiveRMInfoProto fields should be optional (kasha) + + YARN-888. Cleaned up POM files so that non-leaf modules don't include any + dependencies and thus compact the dependency list for leaf modules. + (Alejandro Abdelnur via vinodkv) + + YARN-1567. In Fair Scheduler, allow empty queues to change between leaf and + parent on allocation file reload (Sandy Ryza) + + YARN-1616. RMFatalEventDispatcher should log the cause of the event (kasha) + + YARN-1624. QueuePlacementPolicy format is not easily readable via a JAXB + parser (Aditya Acharya via Sandy Ryza) + + YARN-1623. Include queue name in RegisterApplicationMasterResponse (Sandy + Ryza) + + YARN-1573. ZK store should use a private password for root-node-acls. + (kasha). + + OPTIMIZATIONS + + BUG FIXES + + YARN-1284. LCE: Race condition leaves dangling cgroups entries for killed + containers. (Alejandro Abdelnur via Sandy Ryza) + + YARN-1283. Fixed RM to give a fully-qualified proxy URL for an application + so that clients don't need to do scheme-mangling. (Omkar Vinit Joshi via + vinodkv) + + YARN-879. Fixed tests w.r.t o.a.h.y.server.resourcemanager.Application. + (Junping Du via devaraj) + + YARN-1265. Fair Scheduler chokes on unhealthy node reconnect (Sandy Ryza) + + YARN-1044. used/min/max resources do not display info in the scheduler page + (Sangjin Lee via Sandy Ryza) + + YARN-1259. In Fair Scheduler web UI, queue num pending and num active apps + switched. (Robert Kanter via Sandy Ryza) + + YARN-1295. In UnixLocalWrapperScriptBuilder, using bash -c can cause Text + file busy errors (Sandy Ryza) + + YARN-1185. Fixed FileSystemRMStateStore to not leave partial files that + prevent subsequent ResourceManager recovery. (Omkar Vinit Joshi via vinodkv) + + YARN-1331. yarn.cmd exits with NoClassDefFoundError trying to run rmadmin or + logs. (cnauroth) + + YARN-1330. Fair Scheduler: defaultQueueSchedulingPolicy does not take effect + (Sandy Ryza) + + YARN-1022. Unnecessary INFO logs in AMRMClientAsync (haosdent via bikas) + + YARN-1349. yarn.cmd does not support passthrough to any arbitrary class. + (cnauroth) + + YARN-1357. TestContainerLaunch.testContainerEnvVariables fails on Windows. + (Chuan Liu via cnauroth) + + YARN-1358. TestYarnCLI fails on Windows due to line endings. (Chuan Liu via + cnauroth) + + YARN-1343. NodeManagers additions/restarts are not reported as node updates + in AllocateResponse responses to AMs. (tucu) + + YARN-1381. Same relaxLocality appears twice in exception message of + AMRMClientImpl#checkLocalityRelaxationConflict() (Ted Yu via Sandy Ryza) + + YARN-1407. RM Web UI and REST APIs should uniformly use + YarnApplicationState (Sandy Ryza) + + YARN-1438. Ensure container diagnostics includes exception from container + launch. (stevel via acmurthy) + + YARN-1138. yarn.application.classpath is set to point to $HADOOP_CONF_DIR + etc., which does not work on Windows. (Chuan Liu via cnauroth) + + YARN-461. Fair scheduler should not accept apps with empty string queue name. + (ywskycn via tucu) + + YARN-1060. Two tests in TestFairScheduler are missing @Test annotation + (Niranjan Singh via Sandy Ryza) + + YARN-1188. The context of QueueMetrics becomes default when using + FairScheduler (Tsuyoshi Ozawa via Sandy Ryza) + + YARN-1268. TestFairScheduler.testContinuousScheduling is flaky (Sandy Ryza) + + YARN-1300. SLS tests fail because conf puts YARN properties in + fair-scheduler.xml (Ted Yu via Sandy Ryza) + + YARN-1183. MiniYARNCluster shutdown takes several minutes intermittently + (Andrey Klochkov via jeagles) + + YARN-1305. RMHAProtocolService#serviceInit should handle HAUtil's + IllegalArgumentException (Tsuyoshi Ozawa via bikas) + + YARN-1374. Changed ResourceManager to start the preemption policy monitors + as active services. (Karthik Kambatla via vinodkv) + + YARN-1395. Distributed shell application master launched with debug flag can + hang waiting for external ls process. (cnauroth) + + YARN-1400. yarn.cmd uses HADOOP_RESOURCEMANAGER_OPTS. Should be + YARN_RESOURCEMANAGER_OPTS. (Raja Aluri via cnauroth) + + YARN-1401. With zero sleep-delay-before-sigkill.ms, no signal is ever sent + (Gera Shegalov via Sandy Ryza) + + YARN-1411. HA config shouldn't affect NodeManager RPC addresses (Karthik + Kambatla via bikas) + + YARN-1419. TestFifoScheduler.testAppAttemptMetrics fails intermittently + under jdk7 (Jonathan Eagles via jlowe) + + YARN-744. Race condition in ApplicationMasterService.allocate .. It might + process same allocate request twice resulting in additional containers + getting allocated. (Omkar Vinit Joshi via bikas) + + YARN-1425. TestRMRestart fails because MockRM.waitForState(AttemptId) uses + current attempt instead of the attempt passed as argument (Omkar Vinit + Joshi via bikas) + + YARN-1053. Diagnostic message from ContainerExitEvent is ignored in + ContainerImpl (Omkar Vinit Joshi via bikas) + + YARN-1320. Fixed Distributed Shell application to respect custom log4j + properties file. (Xuan Gong via vinodkv) + + YARN-1416. Fixed a few invalid transitions in RMApp, RMAppAttempt and in some + tests. (Jian He via vinodkv) + + YARN-895. Changed RM state-store to not crash immediately if RM restarts while + the state-store is down. (Jian He via vinodkv) + + YARN-1454. Fixed test failure issue with TestRMRestart. (Karthik Kambatla + via vinodkv) + + YARN-1450. Fixed test failure in TestUnmanagedAMLauncher by removing its + dependency on distributed-shell. (Binglin Chang via vinodkv) + + YARN-1405. Fixed ResourceManager to not hang when init/start fails with an + exception w.r.t state-store. (Jian He via vinodkv) + + YARN-1505. Fixed Webapplication proxy server to not hardcode its bind + address. (Xuan Gong via vinodkv) + + YARN-1145. Fixed a potential file-handle leak in the web interface for + displaying aggregated logs. (Rohith Sharma via vinodkv) + + YARN-1451. TestResourceManager relies on the scheduler assigning multiple + containers in a single node update. (Sandy Ryza via kasha) + + YARN-1527. Fix yarn rmadmin command to print the correct usage info. + (Akira AJISAKA via jianhe) + + YARN-1522. Fixed a race condition in the test TestApplicationCleanup that was + causing it to randomly fail. (Liyin Liang via vinodkv) + + YARN-1549. Fixed a bug in ResourceManager's ApplicationMasterService that + was causing unamanged AMs to not finish correctly. (haosdent via vinodkv) + + YARN-1559. Race between ServerRMProxy and ClientRMProxy setting + RMProxy#INSTANCE. (kasha and vinodkv via kasha) + + YARN-1560. Fixed TestYarnClient#testAMMRTokens failure with null AMRM token. + (Ted Yu via jianhe) + + YARN-1409. NonAggregatingLogHandler can throw RejectedExecutionException + (Tsuyoshi OZAWA via jlowe) + + YARN-1293. Fixed TestContainerLaunch#testInvalidEnvSyntaxDiagnostics failure + caused by non-English system locale. (Tsuyoshi OZAWA via jianhe) + + YARN-1574. RMDispatcher should be reset on transition to standby. (Xuan Gong + via kasha) + + YARN-1598. HA-related rmadmin commands don't work on a secure cluster (kasha) + + YARN-1603. Remove two *.orig files which were unexpectedly committed. + (Zhijie Shen via junping_du) + + YARN-1601. 3rd party JARs are missing from hadoop-dist output. (tucu) + + YARN-1351. Invalid string format in Fair Scheduler log warn message + (Konstantin Weitz via Sandy Ryza) + + YARN-1608. LinuxContainerExecutor has a few DEBUG messages at INFO level + (kasha) + + YARN-1606. Fix the default value of yarn.resourcemanager.zk-timeout-ms + in yarn-default.xml (kasha) + + YARN-1607. TestRM relies on the scheduler assigning multiple containers in + a single node update (Sandy Ryza) + + YARN-1575. Public localizer crashes with "Localized unkown resource" + (jlowe) + + YARN-1629. IndexOutOfBoundsException in MaxRunningAppsEnforcer (Sandy Ryza) + + YARN-1628. Fixed the test failure in TestContainerManagerSecurity. (Vinod + Kumar Vavilapalli via zjshen) + +Release 2.2.0 - 2013-10-13 + + INCOMPATIBLE CHANGES + + YARN-1229. Define constraints on Auxiliary Service names. Change + ShuffleHandler service name from mapreduce.shuffle to + mapreduce_shuffle (Xuan Gong via sseth) + + NEW FEATURES + + IMPROVEMENTS + + YARN-1246. Added application finish-status to ApplicationSummary for the sake + of testing given ApplicationHistoryServer is not yet ready. (Arpit Gupta via + vinodkv) + + YARN-899. Added back queue level administrator-acls so that there is no + regression w.r.t 1.x. (Xuan Gong via vinodkv) + + YARN-1228. Clean up Fair Scheduler configuration loading. (Sandy Ryza) + + YARN-1213. Restore config to ban submitting to undeclared pools in the + Fair Scheduler. (Sandy Ryza) + + YARN-1277. Added a policy based configuration for http/https in common + HttpServer and using the same in YARN - related to per project https config + support via HADOOP-10022. (Suresh Srinivas and Omkar Vinit Joshi via vinodkv) + + OPTIMIZATIONS + + BUG FIXES + + YARN-1128. FifoPolicy.computeShares throws NPE on empty list of Schedulables + (Karthik Kambatla via Sandy Ryza) + + YARN-1214. Register ClientToken MasterKey in SecretManager after it is + saved (Jian He via bikas) + + YARN-49. Improve distributed shell application to work on a secure cluster. + (Vinod Kumar Vavilapalli via hitesh) + + YARN-1157. Fixed ResourceManager UI to behave correctly when apps like + distributed-shell do not set tracking urls. (Xuan Gong via vinodkv) + + YARN-1221. With Fair Scheduler, reserved MB reported in RM web UI increases + indefinitely (Siqi Li via Sandy Ryza) + + YARN-1247. test-container-executor has gotten out of sync with the changes to + container-executor. (rvs via tucu) + + YARN-1070. Fixed race conditions in NodeManager during container-kill. + (Zhijie Shen via vinodkv) + + YARN-1215. Yarn URL should include userinfo. (Chuan Liu via cnauroth) + + YARN-1262. TestApplicationCleanup relies on all containers assigned in a + single heartbeat (Karthik Kambatla via Sandy Ryza) + + YARN-1260. Added webapp.http.address to yarn-default.xml so that default + install with https enabled doesn't have broken link on NM UI. (Omkar Vinit + Joshi via vinodkv) + + YARN-1141. Updating resource requests should be decoupled with updating + blacklist (Zhijie Shen via bikas) + + YARN-876. Node resource is added twice when node comes back from unhealthy + to healthy. (Peng Zhang via Sandy Ryza) + + YARN-890. Ensure CapacityScheduler doesn't round-up metric for available + resources. (Xuan Gong & Hitesh Shah via acmurthy) + + YARN-621. Changed YARN web app to not add paths that can cause duplicate + additions of authenticated filters there by causing kerberos replay errors. + (Omkar Vinit Joshi via vinodkv) + + YARN-1236. FairScheduler setting queue name in RMApp is not working. + (Sandy Ryza) + + YARN-1256. NM silently ignores non-existent service in + StartContainerRequest (Xuan Gong via bikas) + + YARN-1149. NM throws InvalidStateTransitonException: Invalid event: + APPLICATION_LOG_HANDLING_FINISHED at RUNNING (Xuan Gong via hitesh) + + YARN-1271. "Text file busy" errors launching containers again + (Sandy Ryza) + + YARN-1131. $yarn logs command should return an appropriate error message if + YARN application is still running. (Siddharth Seth via hitesh) + + YARN-1219. FSDownload changes file suffix making FileUtil.unTar() throw + exception. (Shanyu Zhao via cnauroth) + + YARN-1251. TestDistributedShell#TestDSShell failed with timeout. (Xuan Gong + via hitesh) + + YARN-1167. Fixed Distributed Shell to not incorrectly show empty hostname + on RM UI. (Xuan Gong via vinodkv) + + YARN-1254. Fixed NodeManager to not pollute container's credentials. (Omkar + Vinit Joshi via vinodkv) + + YARN-1273. Fixed Distributed-shell to account for containers that failed + to start. (Hitesh Shah via vinodkv) + + YARN-1032. Fixed NPE in RackResolver. (Lohit Vijayarenu via acmurthy) + + YARN-1090. Fixed CS UI to better reflect applications as non-schedulable + and not as pending. (Jian He via acmurthy) + + YARN-1274. Fixed NodeManager's LinuxContainerExecutor to create user, app-dir + and log-dirs correctly even when there are no resources to localize for the + container. (Siddharth Seth via vinodkv) + + YARN-1278. Fixed NodeManager to not delete local resources for apps on resync + command from RM - a bug caused by YARN-1149. (Hitesh Shah via vinodkv) + + YARN-1463. Tests should avoid starting http-server where possible or creates + spnego keytab/principals (vinodkv via kasha) + +Release 2.1.1-beta - 2013-09-23 + + INCOMPATIBLE CHANGES + + YARN-707. Added user information also in the YARN ClientToken so that AMs + can implement authorization based on incoming users. (Jason Lowe via vinodkv) + + YARN-1170. YARN & MapReduce proto definitions fixed to specify protobuf + package as hadoop.yarn and hadoop.mapreduce respectively. (Binglin Chang + via acmurthy) + + NEW FEATURES + + IMPROVEMENTS + + YARN-589. Expose a REST API for monitoring the fair scheduler (Sandy Ryza). + + YARN-1074. Cleaned up YARN CLI application list to only display running + applications by default. (Xuan Gong via vinodkv) + + YARN-1093. Corrections to Fair Scheduler documentation (Wing Yew Poon via + Sandy Ryza) + + YARN-942. In Fair Scheduler documentation, inconsistency on which + properties have prefix (Akira Ajisaka via Sandy Ryza) + + YARN-1083. Changed ResourceManager to fail when the expiry interval is less + than the configured node-heartbeat interval. (Zhijie Shen via vinodkv) + + YARN-1081. Made a trivial change to YARN node CLI header to avoid potential + confusion. (Akira AJISAKA via vinodkv) + + YARN-1034. Remove "experimental" in the Fair Scheduler documentation. + (Karthik Kambatla via Sandy Ryza) + + YARN-1080. Improved help message for "yarn logs" command. (Xuan Gong via + vinodkv) + + YARN-771. AMRMClient support for resource blacklisting (Junping Du via + bikas) + + YARN-1117. Improved help messages for "yarn application" and "yarn node" + commands. (Xuan Gong via vinodkv) + + YARN-1120. Made ApplicationConstants.Environment.USER definition OS neutral + as the corresponding value is now set correctly end-to-end. (Chuan Liu via + vinodkv) + + YARN-1124. Modified YARN CLI application list to display new and submitted + applications together with running apps by default, following up YARN-1074. + (Xuan Gong via vinodkv) + + YARN-1065. NM should provide AuxillaryService data to the container (Xuan + Gong via bikas) + + YARN-758. Augment MockNM to use multiple cores (Karthik Kambatla via + Sandy Ryza) + + YARN-696. Changed RMWebservice apps call to take in multiple application + states. (Trevor Lorimer via vinodkv) + + YARN-910. Augmented auxiliary services to listen for container starts and + completions in addition to application events. (Alejandro Abdelnur via + vinodkv) + + YARN-1137. Add support whitelist for system users to Yarn + container-executor.c. (rvs via tucu) + + YARN-1001. Added a web-service to get statistics about per application-type + per state for consumption by downstream projects. (Zhijie Shen via vinodkv) + + YARN-1203. Changed YARN web-app proxy to handle http and https URLs from + AM registration and finish correctly. (Omkar Vinit Joshi via vinodkv) + + YARN-1204. Added separate configuration properties for https for RM and NM + without which servers enabled with https will also start on http ports. + (Omkar Vinit Joshi via vinodkv) + + OPTIMIZATIONS + + BUG FIXES + + YARN-948. Changed ResourceManager to validate the release container list + before actually releasing them. (Omkar Vinit Joshi via vinodkv) + + YARN-966. Fixed ContainerLaunch to not fail quietly when there are no + localized resources due to some other failure. (Zhijie Shen via vinodkv) + + YARN-502. Fixed a state machine issue with RMNode inside ResourceManager + which was crashing scheduler. (Mayank Bansal via vinodkv) + + YARN-573. Shared data structures in Public Localizer and Private Localizer + are not Thread safe. (Omkar Vinit Joshi via jlowe) + + YARN-903. Changed ContainerManager to suppress unnecessary warnings when + stopping already stopped containers. (Omkar Vinit Joshi via vinodkv) + + YARN-906. Fixed a bug in NodeManager where cancelling ContainerLaunch at + KILLING state causes that the container to hang. (Zhijie Shen via vinodkv) + + YARN-994. HeartBeat thread in AMRMClientAsync does not handle runtime + exception correctly (Xuan Gong via bikas) + + YARN-337. RM handles killed application tracking URL poorly (jlowe) + + YARN-107. Fixed ResourceManager and clients to better handle + forceKillApplication on non-running and finished applications. (Xuan Gong + via vinodkv) + + YARN-643. Fixed ResourceManager to remove all tokens consistently on app + finish. (Xuan Gong via vinodkv) + + YARN-1006. Fixed broken rendering in the Nodes list web page on the RM web + UI. (Xuan Gong via vinodkv) + + YARN-881. Priority#compareTo method seems to be wrong. (Jian He via bikas) + + YARN-1082. Create base directories on HDFS after RM login to ensure RM + recovery doesn't fail in secure mode. (vinodkv via acmurthy) + + YARN-1085. Modified YARN and MR2 web-apps to do HTTP authentication in + secure setup with kerberos. (Omkar Vinit Joshi via vinodkv) + + YARN-1094. Fixed a blocker with RM restart code because of which RM crashes + when try to recover an existing app. (vinodkv) + + YARN-1008. MiniYARNCluster with multiple nodemanagers, all nodes have same + key for allocations. (tucu) + + YARN-981. Fixed YARN webapp so that /logs servlet works like before. (Jian He + via vinodkv) + + YARN-602. Fixed NodeManager to not let users override some mandatory + environmental variables. (Kenji Kikushima via vinodkv) + + YARN-1101. Active nodes can be decremented below 0 (Robert Parker + via tgraves) + + YARN-1077. Fixed TestContainerLaunch test failure on Windows. (Chuan Liu via + vinodkv) + + YARN-957. Fixed a bug in CapacityScheduler because of which requests that + need more than a node's total capability were incorrectly allocated on that + node causing apps to hang. (Omkar Vinit Joshi via vinodkv) + + YARN-1107. Fixed a bug in ResourceManager because of which RM in secure mode + fails to restart. (Omkar Vinit Joshi via vinodkv) + + YARN-1049. ContainerExistStatus should define a status for preempted + containers. (tucu) + + YARN-1144. Unmanaged AMs registering a tracking URI should not be + proxy-fied. (tucu) + + YARN-1152. Fixed a bug in ResourceManager that was causing clients to get + invalid client token key errors when an appliation is about to finish. + (Jason Lowe via vinodkv) + + YARN-292. Fixed FifoScheduler and FairScheduler to make their applications + data structures thread safe to avoid RM crashing with + ArrayIndexOutOfBoundsException. (Zhijie Shen via vinodkv) + + YARN-1025. ResourceManager and NodeManager do not load native libraries on + Windows. (cnauroth) + + YARN-1176. RM web services ClusterMetricsInfo total nodes doesn't include + unhealthy nodes (Jonathan Eagles via tgraves) + + YARN-1078. TestNodeManagerResync, TestNodeManagerShutdown, and + TestNodeStatusUpdater fail on Windows. (Chuan Liu via cnauroth) + + YARN-1194. TestContainerLogsPage fails with native builds (Roman Shaposhnik + via jlowe) + + YARN-1116. Populate AMRMTokens back to AMRMTokenSecretManager after RM + restarts (Jian He via bikas) + + YARN-1189. NMTokenSecretManagerInNM is not being told when applications + have finished (Omkar Vinit Joshi via jlowe) + + YARN-540. Race condition causing RM to potentially relaunch already + unregistered AMs on RM restart (Jian He via bikas) + + YARN-1184. ClassCastException during preemption enforcement. (cdouglas) + +Release 2.1.0-beta - 2013-08-22 + + INCOMPATIBLE CHANGES + + YARN-396. Rationalize AllocateResponse in RM Scheduler API. (Zhijie Shen + via hitesh) + + YARN-439. Flatten NodeHeartbeatResponse. (Xuan Gong via sseth) + + YARN-440. Flatten RegisterNodeManagerResponse. (Xuan Gong via sseth) + + YARN-536. Removed the unused objects ContainerStatus and ContainerStatus from + Container which also don't belong to the container. (Xuan Gong via vinodkv) + + YARN-486. Changed NM's startContainer API to accept Container record given by + RM as a direct parameter instead of as part of the ContainerLaunchContext + record. (Xuan Gong via vinodkv) + + YARN-444. Moved special container exit codes from YarnConfiguration to API + where they belong. (Sandy Ryza via vinodkv) + + YARN-441. Removed unused utility methods for collections from two API + records. (Xuan Gong via vinodkv) + + YARN-561. Modified NodeManager to set key information into the environment + of every container that it launches. (Xuan Gong via vinodkv) + + YARN-579. Stop setting the Application Token in the AppMaster env, in + favour of the copy present in the container token field. + (Vinod Kumar Vavilapalli via sseth) + + YARN-629. Make YarnRemoteException not be rooted at IOException. (Xuan Gong + via vinodkv) + + YARN-633. Changed RMAdminProtocol api to throw IOException and + YarnRemoteException. (Xuan Gong via vinodkv) + + YARN-632. Changed ContainerManager api to throw IOException and + YarnRemoteException. (Xuan Gong via vinodkv) + + YARN-631. Changed ClientRMProtocol api to throw IOException and + YarnRemoteException. (Xuan Gong via vinodkv) + + YARN-630. Changed AMRMProtocol api to throw IOException and + YarnRemoteException. (Xuan Gong via vinodkv) + + YARN-615. Rename ContainerLaunchContext.containerTokens to tokens. + (Vinod Kumar Vavilapalli via sseth) + + YARN-571. Remove user from ContainerLaunchContext. (Omkar Vinit Joshi via + vinodkv) + + YARN-716. Making ApplicationID immutable. (Siddharth Seth via vinodkv) + + YARN-684. ContainerManager.startContainer should use + ContainerTokenIdentifier instead of the entire Container. + (Vinod Kumar Vavilapalli via sseth) + + YARN-735. Make ApplicationAttemptId, ContaienrId and NodeId immutable. + (Jian He via sseth) + + YARN-749. Rename ResourceRequest.(get,set)HostName to + ResourceRequest.(get,set)ResourceName. (acmurthy) + + YARN-720. container-log4j.properties should not refer to mapreduce + property names. (Zhijie Shen via sseth) + + YARN-748. Moved BuilderUtils from yarn-common to yarn-server-common for + eventual retirement. (Jian He via vinodkv) + + YARN-635. Renamed YarnRemoteException to YarnException. (Siddharth Seth via + vinodkv) + + YARN-755. Renamed AllocateResponse.reboot to AllocateResponse.resync. (Bikas + Saha via vinodkv) + + YARN-753. Added individual factory methods for all api protocol records and + converted the records to be abstract classes. (Jian He via vinodkv) + + YARN-724. Moved ProtoBase from api.records to api.records.impl.pb. (Jian He + via vinodkv) + + YARN-759. Create Command enum in AllocateResponse (bikas) + + YARN-777. Removed unreferenced objects from .proto files. (Jian He via + vinodkv) + + YARN-642. Removed health parameter from ResourceManager /nodes web-service + and cleaned the behaviour of the status parameter. (Sandy Ryza vid vinodkv) + + YARN-530. Defined Service model strictly, implemented AbstractService for + robust subclassing and migrated yarn-common services. (Steve Loughran via + vinodkv) + + YARN-746. Renamed Service.register() and Service.unregister() to + registerServiceListener() & unregisterServiceListener() respectively. + (Steve Loughran via vinodkv) + + YARN-792. Moved NodeHealthStatus from yarn.api.record to + yarn.server.api.record. (Jian He via vinodkv) + + YARN-806. Moved ContainerExitStatus from yarn.api to yarn.api.records. (Jian + He via vinodkv) + + YARN-821. Renamed setFinishApplicationStatus to setFinalApplicationStatus in + FinishApplicationMasterRequest for consistency. (Jian He via vinodkv) + + YARN-787. Removed minimum resource from RegisterApplicationMasterResponse. + (tucu via acmurthy) + + YARN-829. Renamed RMTokenSelector to be RMDelegationTokenSelector. (Zhijie + Shen via vinodkv) + + YARN-828. Removed the unsed YarnVersionAnnotation. (Zhijie Shen via vinodkv) + + YARN-823. Moved RMAdmin from yarn.client to yarn.client.cli and renamed it to + be RMAdminCLI. (Jian He via vinodkv) + + YARN-387. Renamed YARN protocols for consistency. + ClientRMProtocol -> ApplicationClientProtocol + AMRMProtocol -> ApplicationMasterProtocol + ContainerManager -> ContainerManagementProtocol + (vinodkv via acmurthy) + + YARN-831. Removed minimum resource from GetNewApplicationResponse as a + follow-up to YARN-787. (Jian He via acmurthy) + + YARN-824. Added static factory methods to hadoop-yarn-client interfaces. + (Jian He via acmurthy) + + YARN-826. Moved Clock and SystemClock into yarn.util package. (Zhijie Shen + via vinodkv) + + YARN-837. Moved yarn.ClusterInfo into MapReduce project as it doesn't belong + to YARN. (Zhijie Shen via vinodkv) + + YARN-822. Renamed ApplicationToken to be AMRMToken, and similarly the + corresponding TokenSelector and SecretManager. (Omkar Vinit Joshi via vinodkv) + + YARN-610. ClientToken is no longer set in the environment of the Containers. + (Omkar Vinit Joshi via vinodkv) + + YARN-834. Fixed annotations for yarn-client module, reorganized packages and + clearly differentiated *Async apis. (Arun C Murthy and Zhijie Shen via + vinodkv) + + YARN-840. Moved ProtoUtils to yarn.api.records.pb.impl. (Jian He via + acmurthy) + + YARN-841. Move Auxiliary service to yarn-api, annotate and document it. + (vinodkv) + + YARN-850. Rename getClusterAvailableResources to getAvailableResources in + AMRMClients (Jian He via bikas) + + YARN-694. Starting to use NMTokens to authenticate all communication with + NodeManagers. (Omkar Vinit Joshi via vinodkv) + + YARN-553. Replaced YarnClient.getNewApplication with + YarnClient.createApplication which provides a directly usable + ApplicationSubmissionContext to simplify the api. (Karthik Kambatla via + acmurthy) + + YARN-851. Share NMTokens using NMTokenCache (api-based) between AMRMClient + and NMClient instead of memory based approach which is used currently. (Omkar + Vinit Joshi via vinodkv) + + YARN-869. Move ResourceManagerAdministrationProtocol out of main YARN api. + (vinodkv via acmurthy) + + YARN-791. Changed RM APIs and web-services related to nodes to ensure that + both are consistent with each other. (Sandy Ryza via vinodkv) + + YARN-727. ClientRMProtocol.getAllApplications should accept ApplicationType as + a parameter. (Xuan Gong via hitesh) + + YARN-701. Use application tokens irrespective of secure or non-secure + mode. (vinodkv via acmurthy) + + YARN-918. Remove ApplicationAttemptId from + RegisterApplicationMasterRequestProto. (vinodkv via acmurthy) + + YARN-926. Modified ContainerManagerProtcol APIs to take in requests for + multiple containers. (Jian He via vinodkv) + + NEW FEATURES + + YARN-482. FS: Extend SchedulingMode to intermediate queues. + (kkambatl via tucu) + + YARN-45. Add protocol for schedulers to request containers back from + ApplicationMasters. (Carlo Curino, cdouglas) + + YARN-563. Add the concept of an application-type for each application. + (Mayank Bansal via vinodkv) + + HADOOP-8562. Enhancements to support Hadoop on Windows Server and Windows + Azure environments. (See breakdown of tasks below for subtasks and + contributors) + + YARN-422. Add a NM Client library to help application-writers. (Zhijie Shen + via vinodkv) + + YARN-392. Make it possible to specify hard locality constraints in resource + requests. (sandyr via tucu) + + YARN-326. Add multi-resource scheduling to the fair scheduler. + (sandyr via tucu) + + YARN-398. Make it possible to specify hard locality constraints in resource + requests for CapacityScheduler. (acmurthy) + + YARN-781. Exposing LOGDIR in all containers' environment which should be used + by containers for logging purposes. (Jian He via vinodkv) + + IMPROVEMENTS + + YARN-347. Node CLI should show CPU info besides memory in node status. + (Junping Du via llu) + + YARN-365. Change NM heartbeat handling to not generate a scheduler event + on each heartbeat. (Xuan Gong via sseth) + + YARN-380. Fix yarn node -status output to be better readable. (Omkar Vinit + Joshi via vinodkv) + + YARN-410. Fixed RM UI so that the new lines diagnostics for a failed app on + the per-application page are translated to html line breaks. (Omkar Vinit + Joshi via vinodkv) + + YARN-198. Added a link to RM pages from the NodeManager web app. (Jian He + via vinodkv) + + YARN-237. Refreshing the RM page forgets how many rows I had in my + Datatables (jian he via bobby) + + YARN-481. Add AM Host and RPC Port to ApplicationCLI Status Output + (Chris Riccomini via bikas) + + YARN-297. Improve hashCode implementations for PB records. (Xuan Gong via + hitesh) + + YARN-417. Create AMRMClient wrapper that provides asynchronous callbacks. + (Sandy Ryza via bikas) + + YARN-497. Yarn unmanaged-am launcher jar does not define a main class in + its manifest (Hitesh Shah via bikas) + + YARN-469. Make scheduling mode in FS pluggable. (kkambatl via tucu) + + YARN-450. Define value for * in the scheduling protocol (Zhijie Shen via + bikas) + + YARN-475. Remove a unused constant in the public API - + ApplicationConstants.AM_APP_ATTEMPT_ID_ENV. (Hitesh Shah via vinodkv) + + YARN-309. Changed NodeManager to obtain heart-beat interval from the + ResourceManager. (Xuan Gong via vinodkv) + + YARN-447. Move ApplicationComparator in CapacityScheduler to use comparator + in ApplicationId. (Nemon Lou via vinodkv) + + YARN-381. Improve fair scheduler docs. (Sandy Ryza via tomwhite) + + YARN-458. YARN daemon addresses must be placed in many different configs. + (sandyr via tucu) + + YARN-193. Scheduler.normalizeRequest does not account for allocation + requests that exceed maximumAllocation limits (Zhijie Shen via bikas) + + YARN-479. NM retry behavior for connection to RM should be similar for + lost heartbeats (Jian He via bikas) + + YARN-495. Changed NM reboot behaviour to be a simple resync - kill all + containers and re-register with RM. (Jian He via vinodkv) + + YARN-514. Delayed store operations should not result in RM unavailability + for app submission (Zhijie Shen via bikas) + + YARN-586. Fixed a typo in ApplicationSubmissionContext#setApplicationId. + (Zhijie Shen via vinodkv) + + YARN-542. Changed the default global AM max-attempts value to be not one. + (Zhijie Shen via vinodkv) + + YARN-583. Moved application level local resources to be localized under the + filecache sub-directory under application directory. (Omkar Vinit Joshi via + vinodkv) + + YARN-581. Added a test to verify that app delegation tokens are restored + after RM restart. (Jian He via vinodkv) + + YARN-577. Add application-progress also to ApplicationReport. (Hitesh Shah + via vinodkv) + + YARN-595. Refactor fair scheduler to use common Resources. (Sandy Ryza + via tomwhite) + + YARN-562. Modified NM to reject any containers allocated by a previous + ResourceManager. (Jian He via vinodkv) + + YARN-591. Moved RM recovery related records out of public API as they do not + belong there. (vinodkv) + + YARN-599. Refactoring submitApplication in ClientRMService and RMAppManager + to separate out various validation checks depending on whether they rely on + RM configuration or not. (Zhijie Shen via vinodkv) + + YARN-618. Modified RM_INVALID_IDENTIFIER to be -1 instead of zero. (Jian He + via vinodkv) + + YARN-625. Move the utility method unwrapAndThrowException from + YarnRemoteExceptionPBImpl to RPCUtil. (Siddharth Seth via vinodkv) + + YARN-645. Moved RMDelegationTokenSecretManager from yarn-server-common to + yarn-server-resourcemanager where it really belongs. (Jian He via vinodkv) + + YARN-651. Changed PBClientImpls of ContainerManager and RMAdmin to throw + IOExceptions also. (Xuan Gong via vinodkv) + + YARN-582. Changed ResourceManager to recover Application token and client + tokens for app attempt so that RM can be restarted while preserving current + applications. (Jian He via vinodkv) + + YARN-568. Add support for work preserving preemption to the FairScheduler. + (Carlo Curino and Sandy Ryza via cdouglas) + + YARN-598. Add virtual cores to queue metrics. (sandyr via tucu) + + YARN-634. Modified YarnRemoteException to be not backed by PB and introduced + a separate SerializedException record. (Siddharth Seth via vinodkv) + + YARN-663. Changed ResourceTracker API and LocalizationProtocol API to throw + YarnRemoteException and IOException. (Xuan Gong via vinodkv) + + YARN-590. Added an optional mesage to be returned by ResourceMaanger when RM + asks an RM to shutdown/resync etc so that NMs can log this message locally + for better debuggability. (Mayank Bansal via vinodkv) + + YARN-617. Made ContainerTokens to be used for validation at NodeManager + also in unsecure mode to prevent AMs from faking resource requirements in + unsecure mode. (Omkar Vinit Joshi via vinodkv) + + YARN-708. Moved RecordFactory classes to hadoop-yarn-api, and put some + miscellaneous fixes to the interfaces. (Siddharth Seth via vinodkv) + + YARN-711. Copied BuilderUtil methods in individual API records as + BuilderUtils is going to be dismantled. (Jian He via vinodkv) + + YARN-714. Added NMTokens to be sent to AMs as part of heart-beat response. + (Omkar Vinit Joshi via vinodkv) + + YARN-638. Modified ResourceManager to restore RMDelegationTokens after + restarting. (Jian He via vinodkv) + + YARN-660. Improve AMRMClient with matching requests (bikas) + + YARN-717. Put object creation factories for Token in the class itself and + remove useless derivations for specific tokens. (Jian He via vinodkv) + + YARN-756. Move Preemption* records to yarn.api where they really belong. + (Jian He via vinodkv) + + YARN-750. Allow for black-listing resources in YARN API and Impl in CS + (acmurthy via bikas) + + YARN-877. Support resource blacklisting for FifoScheduler. + (Junping Du via llu) + + YARN-686. Flatten NodeReport. (sandyr via tucu) + + YARN-737. Throw some specific exceptions directly instead of wrapping them + in YarnException. (Jian He via sseth) + + YARN-731. RPCUtil.unwrapAndThrowException should unwrap remote + RuntimeExceptions. (Zhijie Shen via sseth) + + YARN-600. Hook up cgroups CPU settings to the number of virtual cores + allocated. (sandyr via tucu) + + YARN-648. FS: Add documentation for pluggable policy. (kkambatl via tucu) + + YARN-773. Moved YarnRuntimeException from package api.yarn to + api.yarn.exceptions. (Jian He via vinodkv) + + YARN-692. Creating NMToken master key on RM and sharing it with NM as a part + of RM-NM heartbeat. (Omkar Vinit Joshi via vinodkv) + + YARN-782. vcores-pcores ratio functions differently from vmem-pmem ratio in + misleading way. (sandyr via tucu) + + YARN-803. factor out scheduler config validation from the ResourceManager + to each scheduler implementation. (tucu) + + YARN-789. Enable zero capabilities resource requests in fair scheduler. + (tucu) + + YARN-639. Modified Distributed Shell application to start using the new + NMClient library. (Zhijie Shen via vinodkv) + + YARN-693. Modified RM to send NMTokens on allocate call so that AMs can then + use them for authentication with NMs. (Omkar Vinit Joshi via vinodkv) + + YARN-752. In AMRMClient, automatically add corresponding rack requests for + requested nodes. (sandyr via tucu) + + YARN-825. Fixed javadoc and annotations for yarn-common module. (vinodkv) + + YARN-833. Moved Graph and VisualizeStateMachine into yarn.state package. + (Zhijie Shen via vinodkv) + + YARN-805. Fix javadoc and annotations on classes in the yarn-api + package. (Jian He via sseth) + + YARN-846. Move pb Impl classes from yarn-api to yarn-common. (Jian He via + vinodkv) + + YARN-827. Need to make Resource arithmetic methods accessible (Jian He via + bikas) + + YARN-866. Add test for class ResourceWeights. (ywskycn via tucu) + + YARN-736. Add a multi-resource fair sharing metric. (sandyr via tucu) + + YARN-883. Expose Fair Scheduler-specific queue metrics. (sandyr via tucu) + + YARN-569. Add support for requesting and enforcing preemption requests via + a capacity monitor. (Carlo Curino, cdouglas) + + YARN-521. Augment AM - RM client module to be able to request containers + only at specific locations (Sandy Ryza via bikas) + + YARN-513. Create common proxy client for communicating with RM. (Xuan Gong + & Jian He via bikas) + + YARN-927. Change ContainerRequest to not have more than 1 container count + and remove StoreContainerRequest (bikas) + + YARN-922. Change FileSystemRMStateStore to use directories (Jian He via + bikas) + + YARN-865. RM webservices can't query based on application Types. (Xuan Gong + via hitesh) + + YARN-912. Move client facing exceptions to yarn-api module. (Mayank Bansal + via vinodkv) + + YARN-84. Use Builder to build RPC server. (Brandon Li via suresh) + + YARN-1046. Disable mem monitoring by default in MiniYARNCluster. (Karthik + Kambatla via Sandy Ryza) + + YARN-1045. Improve toString implementation for PBImpls. (Jian He via sseth) + + OPTIMIZATIONS + + YARN-512. Log aggregation root directory check is more expensive than it + needs to be. (Maysam Yabandeh via jlowe) + + YARN-719. Move RMIdentifier from Container to ContainerTokenIdentifier. + (Vinod Kumar Vavilapalli via sseth) + + BUG FIXES + + YARN-383. AMRMClientImpl should handle null rmClient in stop() + (Hitesh Shah via sseth) + + YARN-385. Add missing fields - location and #containers to + ResourceRequestPBImpl's toString(). (Sandy Ryza via sseth) + + YARN-377. Use the new StringUtils methods added by HADOOP-9252 and fix + TestContainersMonitor. (Chris Nauroth via szetszwo) + + YARN-391. Formatting fixes for LCEResourceHandler classes. + (Steve Loughran via sseth) + + YARN-390. ApplicationCLI and NodeCLI hard-coded platform-specific line + separator causes test failures on Windows. (Chris Nauroth via suresh) + + YARN-406. Fix TestRackResolver to function in networks where "host1" + resolves to a valid host. (Hitesh Shah via sseth) + + YARN-376. Fixes a bug which would prevent the NM knowing about completed + containers and applications. (Jason Lowe via sseth) + + YARN-196. Nodemanager should be more robust in handling connection failure + to ResourceManager when a cluster is started (Xuan Gong via hitesh) + + YARN-485. TestProcfsProcessTree#testProcessTree() doesn't wait long enough + for the process to die. (kkambatl via tucu) + + YARN-71. Fix the NodeManager to clean up local-dirs on restart. + (Xuan Gong via sseth) + + YARN-378. Fix RM to make the AM max attempts/retries to be configurable + per application by clients. (Zhijie Shen via vinodkv) + + YARN-498. Unmanaged AM launcher does not set various constants in env for + an AM, also does not handle failed AMs properly. (Hitesh Shah via bikas) + + YARN-496. Fair scheduler configs are refreshed inconsistently in + reinitialize. (Sandy Ryza via tomwhite) + + YARN-474. Fix CapacityScheduler to trigger application-activation when + am-resource-percent configuration is refreshed. (Zhijie Shen via vinodkv) + + YARN-209. Fix CapacityScheduler to trigger application-activation when + the cluster capacity changes. (Zhijie Shen via vinodkv) + + YARN-24. Nodemanager fails to start if log aggregation enabled and + namenode unavailable. (sandyr via tucu) + + YARN-515. Node Manager not getting the master key. (Robert Joseph Evans + via jlowe) + + YARN-382. SchedulerUtils improve way normalizeRequest sets the resource + capabilities. (Zhijie Shen via bikas) + + YARN-467. Modify public distributed cache to localize files such that no + local directory hits unix file count limits and thus prevent job failures. + (Omkar Vinit Joshi via vinodkv) + + YARN-101. Fix NodeManager heartbeat processing to not lose track of completed + containers in case of dropped heartbeats. (Xuan Gong via vinodkv) + + YARN-538. RM address DNS lookup can cause unnecessary slowness on every JHS + page load. (sandyr via tucu) + + YARN-532. Change RMAdmin and Localization client protocol PB implementations + to implement closeable so that they can be stopped when needed via + RPC.stopProxy(). (Siddharth Seth via vinodkv) + + YARN-99. Modify private distributed cache to localize files such that no + local directory hits unix file count limits and thus prevent job failures. + (Omkar Vinit Joshi via vinodkv) + + YARN-112. Fixed a race condition during localization that fails containers. + (Omkar Vinit Joshi via vinodkv) + + YARN-534. Change RM restart recovery to also account for AM max-attempts + configuration after the restart. (Jian He via vinodkv) + + YARN-539. Addressed memory leak of LocalResource objects NM when a resource + localization fails. (Omkar Vinit Joshi via vinodkv) + + YARN-319. Submitting a job to a fair scheduler queue for which the user + does not have permission causes the client to wait forever. + (shenhong via tomwhite) + + YARN-412. Fixed FifoScheduler to check hostname of a NodeManager rather + than its host:port during scheduling which caused incorrect locality for + containers. (Roger Hoover via acmurthy) + + YARN-500. Fixed YARN webapps to not roll-over ports when explicitly asked + to use non-ephemeral ports. (Kenji Kikushima via vinodkv) + + YARN-518. Fair Scheduler's document link could be added to the hadoop 2.x + main doc page. (sandyr via tucu) + + YARN-476. ProcfsBasedProcessTree info message confuses users. + (sandyr via tucu) + + YARN-585. Fix failure in TestFairScheduler#testNotAllowSubmitApplication + caused by YARN-514. (Zhijie Shen via vinodkv) + + YARN-547. Fixed race conditions in public and private resource localization + which used to cause duplicate downloads. (Omkar Vinit Joshi via vinodkv) + + YARN-594. Update test and add comments in YARN-534 (Jian He via bikas) + + YARN-549. YarnClient.submitApplication should wait for application to be + accepted by the RM (Zhijie Shen via bikas) + + YARN-605. Fix failing unit test in TestNMWebServices when versionInfo has + parantheses like when running on a git checkout. (Hitesh Shah via vinodkv) + + YARN-289. Fair scheduler allows reservations that won't fit on node. + (Sandy Ryza via tomwhite) + + YARN-576. Modified ResourceManager to reject NodeManagers that don't satisy + minimum resource requirements. (Kenji Kikushima via vinodkv) + + YARN-646. Fix two typos in Fair Scheduler user guide. (Dapeng Sun via atm) + + YARN-507. Add interface visibility and stability annotations to FS + interfaces/classes. (kkambatl via tucu) + + YARN-637. FS: maxAssign is not honored. (kkambatl via tucu) + + YARN-655. Fair scheduler metrics should subtract allocated memory from + available memory. (sandyr via tucu) + + YARN-628. Fix the way YarnRemoteException is being unrolled to extract out + the underlying exception. (Siddharth Seth via vinodkv) + + YARN-695. Remove masterContainer and status unused fields from + ApplicationReportProto and fix bugs in ApplicationReportPBImpl. (Zhijie Shen + via vinodkv) + + YARN-706. Fixed race conditions in TestFSDownload. (Zhijie Shen via vinodkv). + + YARN-715. Fixed unit test failures - TestDistributedShell and + TestUnmanagedAMLauncher. (Vinod Kumar Vavilapalli via sseth) + + YARN-578. Fixed NM to use SecureIOUtils for reading and aggregating logs. + (Omkar Vinit Joshi via vinodkv) + + YARN-733. Fixed TestNMClient from failing occasionally. (Zhijie Shen via + vinodkv) + + YARN-730. Fix NMClientAsync to remove completed containers. (Zhijie Shen + via acmurthy) + + YARN-726. Fix queue & finish time fields in web-ui for ResourceManager. + (Mayank Bansal via acmurthy) + + YARN-757. Changed TestRMRestart to use the default scheduler to avoid test + failures. (Bikas Saha via vinodkv) + + YARN-742. Log aggregation causes a lot of redundant setPermission calls. + (jlowe via kihwal) + + YARN-764. blank Used Resources on Capacity Scheduler page (Nemon Lou via + tgraves) + + YARN-761. TestNMClientAsync fails sometimes (Zhijie Shen via bikas) + + YARN-760. NodeManager throws AvroRuntimeException on failed start. + (Niranjan Singh via jlowe) + + YARN-767. Initialize application metrics at RM bootup. (Jian He via + acmurthy) + + YARN-700. TestInfoBlock fails on Windows because of line ending missmatch. + (Ivan Mitic via cnauroth) + + YARN-117. Migrated rest of YARN to the new service model. (Steve Louhran via + vinodkv) + + YARN-812. Set default logger for application summary logger to + hadoop.root.logger. (sseth via acmurthy) + + YARN-848. Nodemanager does not register with RM using the fully qualified + hostname. (Hitesh Shah via sseth) + + YARN-854. Fixing YARN bugs that are failing applications in secure + environment. (Omkar Vinit Joshi via vinodkv) + + YARN-861. TestContainerManager is failing. (Vinod Kumar Vavilapalli via + hitesh) + + YARN-874. Making common RPC to switch to not switch to simple when other + mechanisms are enabled and thus fix YARN/MR test failures after HADOOP-9421. + (Daryn Sharp and Vinod Kumar Vavilapalli via vinodkv) + + YARN-845. RM crash with NPE on NODE_UPDATE (Mayank Bansal via bikas) + + YARN-369. Handle ( or throw a proper error when receiving) status updates + from application masters that have not registered (Mayank Bansal & + Abhishek Kapoor via bikas) + + YARN-541. getAllocatedContainers() is not returning all the allocated + containers (bikas) + + YARN-763. AMRMClientAsync should stop heartbeating after receiving + shutdown from RM (Xuan Gong via bikas) + + YARN-654. AMRMClient: Perform sanity checks for parameters of public + methods (Xuan Gong via bikas)" + + YARN-919. Document setting default heap sizes in yarn-env.sh (Mayank + Bansal via hitesh) + + YARN-795. Fair scheduler queue metrics should subtract allocated vCores from + available vCores. (ywskycn via tucu) + + YARN-799. Fix CgroupsLCEResourcesHandler to use /tasks instead of + /cgroup.procs. (Chris Riccomini via acmurthy) + + YARN-333. Schedulers cannot control the queue-name of an + application. (sandyr via tucu) + + YARN-368. Fixed a typo in error message in Auxiliary services. (Albert Chu + via vinodkv) + + YARN-295. Fixed a race condition in ResourceManager RMAppAttempt state + machine. (Mayank Bansal via vinodkv) + + YARN-523. Modified a test-case to validate container diagnostics on + localization failures. (Jian He via vinodkv) + + YARN-661. Fixed NM to cleanup users' local directories correctly when + starting up. (Omkar Vinit Joshi via vinodkv) + + YARN-820. Fixed an invalid state transition in NodeManager caused by failing + resource localization. (Mayank Bansal via vinodkv) + + YARN-62. Modified NodeManagers to avoid AMs from abusing container tokens for + repetitive container launches. (Omkar Vinit Joshi via vinodkv) + + YARN-814. Improving diagnostics when containers fail during launch due to + various reasons like invalid env etc. (Jian He via vinodkv) + + YARN-897. Ensure child queues are ordered correctly to account for + completed containers. (Djellel Eddine Difallah via acmurthy) + + YARN-853. Fixed CapacityScheduler's maximum-am-resource-percent to properly + work beyond refreshing queues. (Devaraj K via vinodkv) + + YARN-873. YARNClient.getApplicationReport(unknownAppId) returns a null + report (Xuan Gong via bikas) + + YARN-875. Application can hang if AMRMClientAsync callback thread has + exception (Xuan Gong via bikas) + + YARN-968. RM admin commands don't work. (vinodkv via kihwal) + + YARN-688. Fixed NodeManager to properly cleanup containers when it is shut + down. (Jian He via vinodkv) + + YARN-960. Fixed ResourceManager to propagate client-submitted credentials + irrespective of security. (Daryn Sharp via vinodkv) + + YARN-937. Fix unmanaged AM in non-secure/secure setup post YARN-701. (tucu) + + YARN-932. TestResourceLocalizationService.testLocalizationInit can fail on + JDK7. (Karthik Kambatla via Sandy Ryza) + + YARN-961. Changed ContainerManager to enforce Token auth irrespective of + security. (Omkar Vinit Joshi via vinodkv) + + YARN-945. Removed setting of AMRMToken's service from ResourceManager + and changed client libraries do it all the time and correctly. (vinodkv) + + YARN-656. In scheduler UI, including reserved memory in Memory Total can + make it exceed cluster capacity. (Sandy Ryza) + + BREAKDOWN OF HADOOP-8562/YARN-191 SUBTASKS AND RELATED JIRAS + + YARN-158. Yarn creating package-info.java must not depend on sh. + (Chris Nauroth via suresh) + + YARN-176. Some YARN tests fail to find winutils. (Chris Nauroth via suresh) + + YARN-207. YARN distribution build fails on Windows. (Chris Nauroth via + suresh) + + YARN-199. Yarn cmd line scripts for windows. (Ivan Mitic via suresh) + + YARN-213. YARN build script would be more readable using abspath. + (Chris Nauroth via suresh) + + YARN-233. Added support for running containers in MS Windows to YARN. (Chris + Nauroth via acmurthy) + + YARN-234. Added support for process tree and resource calculator in MS Windows + to YARN. (Chris Nauroth via acmurthy) + + YARN-259. Fix LocalDirsHandlerService to use Path rather than URIs. (Xuan + Gong via acmurthy) + + YARN-316. YARN container launch may exceed maximum Windows command line + length due to long classpath. (Chris Nauroth via suresh) + + YARN-359. Fixing commands for container signalling in Windows. (Chris Nauroth + via vinodkv) + + YARN-506. Move to common utils FileUtil#setReadable/Writable/Executable and + FileUtil#canRead/Write/Execute. (Ivan Mitic via suresh) + + YARN-488. TestContainerManagerSecurity fails on Windows. (Chris Nauroth + via hitesh) + + YARN-490. TestDistributedShell fails on Windows. (Chris Nauroth via hitesh) + + YARN-491. TestContainerLogsPage fails on Windows. (Chris Nauroth via hitesh) + + YARN-487. Modify path manipulation in LocalDirsHandlerService to let + TestDiskFailures pass on Windows. (Chris Nauroth via vinodkv) + + YARN-593. container launch on Windows does not correctly populate + classpath with new process's environment variables and localized resources + (Chris Nauroth via bikas) + + YARN-493. Fixed some shell related flaws in YARN on Windows. (Chris Nauroth + via vinodkv) + + YARN-839. TestContainerLaunch.testContainerEnvVariables fails on Windows. + (Chuan Liu via cnauroth) + + YARN-597. TestFSDownload fails on Windows due to dependencies on + tar/gzip/jar tools. (Ivan Mitic via acmurthy) + + YARN-852. TestAggregatedLogFormat.testContainerLogsFileAccess fails on + Windows. (Chuan Liu via cnauroth) + + YARN-894. NodeHealthScriptRunner timeout checking is inaccurate on Windows. + (Chuan Liu via cnauroth) + + YARN-909. Disable TestLinuxContainerExecutorWithMocks on Windows. (Chuan Liu + via cnauroth) + + YARN-1043. Push all metrics consistently. (Jian He via acmurthy) + + YARN-1056. Remove dual use of string 'resourcemanager' in + yarn.resourcemanager.connect.{max.wait.secs|retry_interval.secs} + (Karthik Kambatla via acmurthy) + +Release 2.0.6-alpha - 08/22/2013 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + + YARN-854. Fixing YARN bugs that are failing applications in secure + environment. (Omkar Vinit Joshi and shv) + +Release 2.0.5-alpha - 06/06/2013 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + +Release 2.0.4-alpha - 2013-04-25 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + + YARN-429. capacity-scheduler config missing from yarn-test artifact. + (sseth via hitesh) + + YARN-470. Support a way to disable resource monitoring on the NodeManager. + (Siddharth Seth via hitesh) + +Release 2.0.3-alpha - 2013-02-06 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + YARN-145. Add a Web UI to the fair share scheduler. (Sandy Ryza via tomwhite) + + YARN-3. Add support for CPU isolation/monitoring of containers. + (adferguson via tucu) + + YARN-230. RM Restart phase 1 - includes support for saving/restarting all + applications on an RM bounce. (Bikas Saha via acmurthy) + + YARN-103. Add a yarn AM-RM client module. (Bikas Saha via sseth) + + YARN-286. Add a YARN ApplicationClassLoader. (tomwhite) + + YARN-2. Enhanced CapacityScheduler to account for CPU alongwith memory for + multi-dimensional resource scheduling. (acmurthy) + + YARN-328. Use token request messages defined in hadoop common. (suresh) + + YARN-231. RM Restart - Add FS-based persistent store implementation for + RMStateStore (Bikas Saha via hitesh) + + IMPROVEMENTS + + YARN-223. Update process tree instead of getting new process trees. + (Radim Kolar via llu) + + YARN-57. Allow process-tree based resource calculation et al. to be + pluggable to support it on multiple platforms. (Radim Kolar via acmurthy) + + YARN-78. Changed UnManagedAM application to use YarnClient. (Bikas Saha via + vinodkv) + + YARN-93. Fixed RM to propagate diagnostics from applications that have + finished but failed (Jason Lowe via vinodkv). + + YARN-28. Fixed TestCompositeService to not depend on test-order and thus + made it pass on JDK7 (Thomas Graves via vinodkv). + + YARN-82. Change the default local and log dirs to be based on + hadoop.tmp.dir and yarn.log.dir. (Hemanth Yamijala via sseth) + + YARN-53. Added the missing getGroups API to ResourceManager. (Bo Wang via + vinodkv) + + YARN-116. Add the ability to change the RM include/exclude file without + a restart. (xieguiming and Harsh J via sseth) + + YARN-23. FairScheduler: FSQueueSchedulable#updateDemand() - potential + redundant aggregation. (kkambatl via tucu) + + YARN-127. Move RMAdmin tool to its correct location - the client module. + (vinodkv) + + YARN-40. Provided support for missing YARN commands (Devaraj K and Vinod + Kumar Vavilapalli via vinodkv) + + YARN-33. Change LocalDirsHandlerService to validate the configured local and + log dirs. (Mayank Bansal via sseth) + + YARN-94. Modify DistributedShell to point to main-class by default, clean up + the help message, and hard-code the AM class. (Hitesh Shah via vinodkv) + + YARN-146. Add unit tests for computing fair share in the fair scheduler. + (Sandy Ryza via tomwhite) + + HADOOP-8911. CRLF characters in source and text files. + (Raja Aluri via suresh) + + YARN-136. Make ClientToAMTokenSecretManager part of RMContext (Vinod Kumar + Vavilapalli via sseth) + + YARN-183. Clean up fair scheduler code. (Sandy Ryza via tomwhite) + + YARN-129. Simplify classpath construction for mini YARN tests. (tomwhite) + + YARN-254. Update fair scheduler web UI for hierarchical queues. + (sandyr via tucu) + + YARN-315. Using the common security token protobuf definition from hadoop + common. (Suresh Srinivas via vinodkv) + + YARN-170. Change NodeManager stop to be reentrant. (Sandy Ryza via vinodkv) + + YARN-331. Fill in missing fair scheduler documentation. (sandyr via tucu) + + YARN-277. Use AMRMClient in DistributedShell to exemplify the approach. + (Bikas Saha via hitesh) + + YARN-360. Allow apps to concurrently register tokens for renewal. + (Daryn Sharp via sseth) + + OPTIMIZATIONS + + BUG FIXES + + YARN-131. Fix incorrect ACL properties in capacity scheduler documentation. + (Ahmed Radwan via sseth) + + YARN-102. Move the apache header to the top of the file in MemStore.java. + (Devaraj K via sseth) + + YARN-134. ClientToAMSecretManager creates keys without checking for + validity of the appID. (Vinod Kumar Vavilapalli via sseth) + + YARN-30. Fixed tests verifying web-services to work on JDK7. (Thomas Graves + via vinodkv) + + YARN-150. Fixes AppRejectedTransition does not unregister a rejected + app-attempt from the ApplicationMasterService (Bikas Saha via sseth) + + YARN-140. Add capacity-scheduler-default.xml to provide a default set of + configurations for the capacity scheduler. (ahmed via tucu) + + YARN-179. Fix some unit test failures. (Vinod Kumar Vavilapalli via sseth) + + YARN-181. Fixed eclipse settings broken by capacity-scheduler.xml move via + YARN-140. (Siddharth Seth via vinodkv) + + YARN-169. Update log4j.appender.EventCounter to use + org.apache.hadoop.log.metrics.EventCounter (Anthony Rojas via tomwhite) + + YARN-184. Remove unnecessary locking in fair scheduler, and address + findbugs excludes. (sandyr via tucu) + + YARN-224. Fair scheduler logs too many nodeUpdate INFO messages. + (Sandy Ryza via tomwhite) + + YARN-222. Fair scheduler should create queue for each user by default. + (Sandy Ryza via tomwhite) + + MAPREDUCE-4778. Fair scheduler event log is only written if directory + exists on HDFS. (Sandy Ryza via tomwhite) + + YARN-229. Remove old unused RM recovery code. (Bikas Saha via acmurthy) + + YARN-187. Add hierarchical queues to the fair scheduler. + (Sandy Ryza via tomwhite) + + YARN-72. NM should handle cleaning up containers when it shuts down. + (Sandy Ryza via tomwhite) + + YARN-267. Fix fair scheduler web UI. (Sandy Ryza via tomwhite) + + YARN-264. y.s.rm.DelegationTokenRenewer attempts to renew token even + after removing an app. (kkambatl via tucu) + + YARN-271. Fair scheduler hits IllegalStateException trying to reserve + different apps on same node. (Sandy Ryza via tomwhite) + + YARN-272. Fair scheduler log messages try to print objects without + overridden toString methods. (sandyr via tucu) + + YARN-278. Fair scheduler maxRunningApps config causes no apps to make + progress. (sandyr via tucu) + + YARN-282. Fair scheduler web UI double counts Apps Submitted. + (sandyr via tucu) + + YARN-283. Fair scheduler fails to get queue info without root prefix. + (sandyr via tucu) + + YARN-192. Node update causes NPE in the fair scheduler. + (Sandy Ryza via tomwhite) + + YARN-288. Fair scheduler queue doesn't accept any jobs when ACLs are + configured. (Sandy Ryza via tomwhite) + + YARN-300. After YARN-271, fair scheduler can infinite loop and not + schedule any application. (Sandy Ryza via tomwhite) + + YARN-301. Fair scheduler throws ConcurrentModificationException when + iterating over app's priorities. (Sandy Ryza via tomwhite) + + YARN-217. Fix RMAdmin protocol description to make it work in secure mode + also. (Devaraj K via vinodkv) + + YARN-253. Fixed container-launch to not fail when there are no local + resources to localize. (Tom White via vinodkv) + + YARN-330. Fix flakey test: TestNodeManagerShutdown#testKillContainersOnShutdown. + (Sandy Ryza via hitesh) + + YARN-335. Fair scheduler doesn't check whether rack needs containers + before assigning to node. (Sandy Ryza via tomwhite) + + YARN-336. Fair scheduler FIFO scheduling within a queue only allows 1 + app at a time. (Sandy Ryza via tomwhite) + + YARN-135. Client tokens should be per app-attempt, and should be + unregistered on App-finish. (vinodkv via sseth) + + YARN-302. Fair scheduler assignmultiple should default to false. (sandyr via tucu) + + YARN-372. Move InlineDispatcher from hadoop-yarn-server-resourcemanager to + hadoop-yarn-common (sseth via hitesh) + + YARN-370. Fix SchedulerUtils to correctly round up the resource for + containers. (Zhijie Shen via acmurthy) + + YARN-355. Fixes a bug where RM app submission could jam under load. + (Daryn Sharp via sseth) + +Release 2.0.2-alpha - 2012-09-07 + + INCOMPATIBLE CHANGES + + YARN-9. Rename YARN_HOME to HADOOP_YARN_HOME. (vinodkv via acmurthy) + + NEW FEATURES + + YARN-1. Promote YARN to be a sub-project of Apache Hadoop. (acmurthy) + + IMPROVEMENTS + + YARN-29. Add a yarn-client module. (Vinod Kumar Vavilapalli via sseth) + + YARN-10. Fix DistributedShell module to not have a dependency on + hadoop-mapreduce-client-core. (Hitesh Shah via vinodkv) + + YARN-80. Add support for delaying rack-local containers in + CapacityScheduler. (acmurthy) + + YARN-137. Change the default YARN scheduler to be the CapacityScheduler. + (sseth via acmurthy) + + OPTIMAZATIONS + + BUG FIXES + + YARN-12. Fix findbugs warnings in FairScheduler. (Junping Du via acmurthy) + + YARN-22. Fix ContainerLogs to work if the log-dir is specified as a URI. + (Mayank Bansal via sseth) + + YARN-37. Change TestRMAppTransitions to use the DrainDispatcher. + (Mayank Bansal via sseth) + + YARN-79. Implement close on all clients to YARN so that RPC clients don't + throw exceptions on shut-down. (Vinod Kumar Vavilapalli) + + YARN-42. Modify NM's non-aggregating logs' handler to stop properly so that + NMs don't get NPEs on startup errors. (Devaraj K via vinodkv) + + YARN-15. Updated default classpath for YARN applications to reflect split of + YARN into a sub-project. (Arun C Murthy via vinodkv) + + YARN-75. Modified ResourceManager's RMContainer to handle a valid RELEASE + event at RUNNING state. (Siddharth Seth via vinodkv) + + MAPREDUCE-2374. "Text File Busy" errors launching MR tasks. (Andy Isaacson + via atm) + + YARN-138. Ensure default values for minimum/maximum container sizes is + sane. (harsh & sseth via acmurthy) + +Release 0.23.11 - UNRELEASED + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + BUG FIXES + + YARN-1180. Update capacity scheduler docs to include types on the configs + (Chen He via jeagles) + +Release 0.23.10 - 2013-12-09 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + YARN-985. Nodemanager should log where a resource was localized (Ravi + Prakash via jeagles) + + YARN-1119. Add ClusterMetrics checks to tho TestRMNodeTransitions tests + (Mit Desai via jeagles) + + OPTIMIZATIONS + + BUG FIXES + + YARN-337. RM handles killed application tracking URL poorly (jlowe) + + YARN-1101. Active nodes can be decremented below 0 (Robert Parker + via tgraves) + + YARN-1176. RM web services ClusterMetricsInfo total nodes doesn't include + unhealthy nodes (Jonathan Eagles via tgraves) + + YARN-1386. NodeManager mistakenly loses resources and relocalizes them + (Jason Lowe via jeagles) + +Release 0.23.9 - 2013-07-08 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + YARN-427. Coverage fix for org.apache.hadoop.yarn.server.api.* (Aleksey + Gorshkov via jeagles) + + YARN-478. fix coverage org.apache.hadoop.yarn.webapp.log (Aleksey Gorshkov + via jeagles) + + OPTIMIZATIONS + + BUG FIXES + +Release 0.23.8 - 2013-06-05 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMIZATIONS + + YARN-548. Add tests for YarnUncaughtExceptionHandler (Vadim Bondarev via + jeagles) + + BUG FIXES + + YARN-363. Add webapps/proxy directory without which YARN proxy-server fails + when started in stand-alone mode. (Kenji Kikushima via vinodkv) + + YARN-690. RM exits on token cancel/renew problems (daryn via bobby) + +Release 0.23.7 - 2013-04-18 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + YARN-133 Update web services docs for RM clusterMetrics (Ravi Prakash via + kihwal) + + YARN-249. Capacity Scheduler web page should show list of active users per + queue like it used to (in 1.x) (Ravi Prakash via tgraves) + + YARN-236. RM should point tracking URL to RM web page when app fails to + start (Jason Lowe via jeagles) + + YARN-269. Resource Manager not logging the health_check_script result when + taking it out (Jason Lowe via kihwal) + + YARN-227. Application expiration difficult to debug for end-users + (Jason Lowe via jeagles) + + YARN-443. allow OS scheduling priority of NM to be different than the + containers it launches (tgraves) + + YARN-468. coverage fix for org.apache.hadoop.yarn.server.webproxy.amfilter + (Aleksey Gorshkov via bobby) + + YARN-200. yarn log does not output all needed information, and is in a + binary format (Ravi Prakash via jlowe) + + YARN-525. make CS node-locality-delay refreshable (Thomas Graves via jlowe) + + OPTIMIZATIONS + + YARN-357. App submission should not be synchronized (daryn) + + BUG FIXES + + YARN-343. Capacity Scheduler maximum-capacity value -1 is invalid (Xuan + Gong via tgraves) + + YARN-364. AggregatedLogDeletionService can take too long to delete logs + (jlowe) + + YARN-362. Unexpected extra results when using webUI table search (Ravi + Prakash via jlowe) + + YARN-400. RM can return null application resource usage report leading to + NPE in client (Jason Lowe via tgraves) + + YARN-426. Failure to download a public resource prevents further downloads + (Jason Lowe via bobby) + + YARN-448. Remove unnecessary hflush from log aggregation (Kihwal Lee via + bobby) + + YARN-345. Many InvalidStateTransitonException errors for ApplicationImpl + in Node Manager (Robert Parker via jlowe) + + YARN-109. .tmp file is not deleted for localized archives (Mayank Bansal + via bobby) + + YARN-460. CS user left in list of active users for the queue even when + application finished (tgraves) + +Release 0.23.6 - 2013-02-06 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + YARN-285. Added a temporary plugin interface for RM to be able to redirect + to JobHistory server for apps that it no longer tracks. (Derek Dagit via + vinodkv) + + OPTIMIZATIONS + + BUG FIXES + + YARN-188. Coverage fixing for CapacityScheduler (Aleksey Gorshkov via + bobby) + + YARN-214. RMContainerImpl does not handle event EXPIRE at state RUNNING + (jeagles via bobby) + + YARN-151. Browser thinks RM main page JS is taking too long + (Ravi Prakash via bobby) + + YARN-204. test coverage for org.apache.hadoop.tools (Aleksey Gorshkov via + bobby) + + YARN-251. Proxy URI generation fails for blank tracking URIs (Tom White + via jlowe) + + YARN-258. RM web page UI shows Invalid Date for start and finish times + (Ravi Prakash via jlowe) + + YARN-266. RM and JHS Web UIs are blank because AppsBlock is not escaping + string properly (Ravi Prakash via jlowe) + + YARN-280. RM does not reject app submission with invalid tokens + (Daryn Sharp via tgraves) + + YARN-225. Proxy Link in RM UI thows NPE in Secure mode + (Devaraj K via bobby) + + YARN-293. Node Manager leaks LocalizerRunner object for every Container + (Robert Joseph Evans via jlowe) + + YARN-50. Implement renewal / cancellation of Delegation Tokens + (Siddharth Seth via tgraves) + + YARN-320. RM should always be able to renew its own tokens. + (Daryn Sharp via sseth) + + YARN-325. RM CapacityScheduler can deadlock when getQueueInfo() is + called and a container is completing (Arun C Murthy via tgraves) + + YARN-334. Maven RAT plugin is not checking all source files (tgraves) + + YARN-354. WebAppProxyServer exits immediately after startup (Liang Xie via + jlowe) + +Release 0.23.5 - 2012-11-28 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + YARN-161. Fix multiple compiler warnings for unchecked operations in YARN + common. (Chris Nauroth via vinodkv) + + YARN-43. Fix TestResourceTrackerService to not depend on test order and thus + pass on JDK7. (Thomas Graves via vinodkv) + + YARN-32. Fix TestApplicationTokens to not depend on test order and thus pass + on JDK7. (vinodkv) + + YARN-186. Coverage fixing LinuxContainerExecutor (Aleksey Gorshkov via + bobby) + + YARN-216. Remove jquery theming support. (Robert Joseph Evans via jlowe) + + OPTIMIZATIONS + + BUG FIXES + + YARN-163. Retrieving container log via NM webapp can hang with multibyte + characters in log (jlowe via bobby) + + YARN-174. Modify NodeManager to pass the user's configuration even when + rebooting. (vinodkv) + + YARN-177. CapacityScheduler - adding a queue while the RM is running has + wacky results (acmurthy vai tgraves) + + YARN-178. Fix custom ProcessTree instance creation (Radim Kolar via bobby) + + YARN-180. Capacity scheduler - containers that get reserved create + container token to early (acmurthy and bobby) + + YARN-139. Interrupted Exception within AsyncDispatcher leads to user + confusion. (Vinod Kumar Vavilapalli via jlowe) + + YARN-165. RM should point tracking URL to RM web page for app when AM fails + (jlowe via bobby) + + YARN-159. RM web ui applications page should be sorted to display last app + first (tgraves via bobby) + + YARN-166. capacity scheduler doesn't allow capacity < 1.0 (tgraves via + bobby) + + YARN-189. Fixed a deadlock between RM's ApplicationMasterService and the + dispatcher. (Thomas Graves via vinodkv) + + YARN-202. Log Aggregation generates a storm of fsync() for namenode + (Kihwal Lee via bobby) + + YARN-201. Fix CapacityScheduler to be less conservative for starved + off-switch requests. (jlowe via acmurthy) + + YARN-206. TestApplicationCleanup.testContainerCleanup occasionally fails. + (jlowe via jeagles) + + YARN-212. NM state machine ignores an APPLICATION_CONTAINER_FINISHED event + when it shouldn't (Nathan Roberts via jlowe) + + YARN-219. NM should aggregate logs when application finishes. (bobby) + +Release 0.23.4 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + Change package of YarnClient to org.apache.hadoop. (Bikas Saha via vinodkv) + + YARN-108. FSDownload can create cache directories with the wrong + permissions (Jason Lowe via bobby) + + OPTIMIZATIONS + + BUG FIXES + + YARN-88. DefaultContainerExecutor can fail to set proper permissions. + (Jason Lowe via sseth) + + YARN-106. Nodemanager needs to set permissions of local directories (jlowe + via bobby) + +Release 0.23.3 + + INCOMPATIBLE CHANGES + + NEW FEATURES + + IMPROVEMENTS + + OPTIMAZATIONS + + BUG FIXES + + YARN-14. Symlinks to peer distributed cache files no longer work + (Jason Lowe via bobby) + + YARN-25. remove old aggregated logs (Robert Evans via tgraves) + + YARN-27. Failed refreshQueues due to misconfiguration prevents further + refreshing of queues (Arun Murthy via tgraves) + + YARN-58. NM leaks filesystems (Jason Lowe via jeagles) + + YARN-39. RM-NM secret-keys should be randomly generated and rolled every + so often. (vinodkv and sseth via sseth) + + YARN-31. Fix TestDelegationTokenRenewer to not depend on test order so as to + pass tests on jdk7. (Thomas Graves via vinodkv) + + YARN-63. RMNodeImpl is missing valid transitions from the UNHEALTHY state + (Jason Lowe via bobby) + + YARN-60. Fixed a bug in ResourceManager which causes all NMs to get NPEs and + thus causes all containers to be rejected. (vinodkv) + + YARN-66. aggregated logs permissions not set properly (tgraves via bobby) + + YARN-68. NodeManager will refuse to shutdown indefinitely due to container + log aggregation (daryn via bobby) + + YARN-87. NM ResourceLocalizationService does not set permissions of local + cache directories (Jason Lowe via tgraves) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerStatus.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerStatus.java index 2c2238fa842..4f56535c4e1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerStatus.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ContainerStatus.java @@ -20,6 +20,7 @@ package org.apache.hadoop.yarn.api.records; import org.apache.hadoop.classification.InterfaceAudience.Private; import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Evolving; import org.apache.hadoop.classification.InterfaceStability.Stable; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.util.Records; @@ -31,6 +32,7 @@ import org.apache.hadoop.yarn.util.Records; * It provides details such as: *
    *
  • {@code ContainerId} of the container.
  • + *
  • {@code ExecutionType} of the container.
  • *
  • {@code ContainerState} of the container.
  • *
  • Exit status of a completed container.
  • *
  • Diagnostic message for a failed container.
  • @@ -45,7 +47,17 @@ public abstract class ContainerStatus { @Unstable public static ContainerStatus newInstance(ContainerId containerId, ContainerState containerState, String diagnostics, int exitStatus) { + return newInstance(containerId, ExecutionType.GUARANTEED, containerState, + diagnostics, exitStatus); + } + + @Private + @Unstable + public static ContainerStatus newInstance(ContainerId containerId, + ExecutionType executionType, ContainerState containerState, + String diagnostics, int exitStatus) { ContainerStatus containerStatus = Records.newRecord(ContainerStatus.class); + containerStatus.setExecutionType(executionType); containerStatus.setState(containerState); containerStatus.setContainerId(containerId); containerStatus.setDiagnostics(diagnostics); @@ -65,6 +77,18 @@ public abstract class ContainerStatus { @Unstable public abstract void setContainerId(ContainerId containerId); + /** + * Get the ExecutionType of the container. + * @return ExecutionType of the container + */ + @Public + @Evolving + public abstract ExecutionType getExecutionType(); + + @Private + @Unstable + public abstract void setExecutionType(ExecutionType executionType); + /** * Get the ContainerState of the container. * @return ContainerState of the container diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ExecutionType.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ExecutionType.java new file mode 100644 index 00000000000..27cc74d7d9b --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/api/records/ExecutionType.java @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.yarn.api.records; + +import org.apache.hadoop.classification.InterfaceAudience.Public; +import org.apache.hadoop.classification.InterfaceStability.Evolving; + +/** + * Container property encoding execution semantics. + * + *

    + * The execution types are the following: + *

      + *
    • {@link #GUARANTEED} - this container is guaranteed to start its + * execution, once the corresponding start container request is received by + * an NM. + *
    • {@link #OPPORTUNISTIC} - the execution of this container may not start + * immediately at the NM that receives the corresponding start container + * request (depending on the NM's available resources). Moreover, it may be + * preempted if it blocks a GUARANTEED container from being executed. + *
    + */ +@Public +@Evolving +public enum ExecutionType { + GUARANTEED, OPPORTUNISTIC +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/ContainerContext.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/ContainerContext.java index f7a9b02c2ab..fbf33452f80 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/ContainerContext.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/java/org/apache/hadoop/yarn/server/api/ContainerContext.java @@ -23,6 +23,7 @@ import org.apache.hadoop.classification.InterfaceAudience.Public; import org.apache.hadoop.classification.InterfaceStability.Evolving; import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ExecutionType; import org.apache.hadoop.yarn.api.records.Resource; /** @@ -36,6 +37,7 @@ public class ContainerContext { private final ContainerId containerId; private final Resource resource; private final ContainerType containerType; + private final ExecutionType executionType; @Private @Unstable @@ -48,10 +50,20 @@ public class ContainerContext { @Unstable public ContainerContext(String user, ContainerId containerId, Resource resource, ContainerType containerType) { + this(user, containerId, resource, containerType, + ExecutionType.GUARANTEED); + } + + @Private + @Unstable + public ContainerContext(String user, ContainerId containerId, + Resource resource, ContainerType containerType, + ExecutionType executionType) { this.user = user; this.containerId = containerId; this.resource = resource; this.containerType = containerType; + this.executionType = executionType; } /** @@ -91,4 +103,14 @@ public class ContainerContext { public ContainerType getContainerType() { return containerType; } + + /** + * Get {@link ExecutionType} the execution type of the container + * being initialized or stopped. + * + * @return the execution type of the container + */ + public ExecutionType getExecutionType() { + return executionType; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto index 0d508a64f99..3546b88490c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/src/main/proto/yarn_protos.proto @@ -288,6 +288,12 @@ message AMBlackListingRequestProto { optional bool blacklisting_enabled = 1 [default = false]; optional float blacklisting_failure_threshold = 2; } + +enum ExecutionTypeProto { + GUARANTEED = 1; + OPPORTUNISTIC = 2; +} + //////////////////////////////////////////////////////////////////////// ////// From AM_RM_Protocol ///////////////////////////////////////////// //////////////////////////////////////////////////////////////////////// @@ -513,6 +519,7 @@ message ContainerStatusProto { optional string diagnostics = 3 [default = "N/A"]; optional int32 exit_status = 4 [default = -1000]; optional ResourceProto capability = 5; + optional ExecutionTypeProto executionType = 6 [default = GUARANTEED]; } enum ContainerExitStatusProto { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerStatusPBImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerStatusPBImpl.java index d33d06dba46..f1fdae9f98e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerStatusPBImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ContainerStatusPBImpl.java @@ -24,9 +24,11 @@ import org.apache.hadoop.classification.InterfaceStability.Unstable; import org.apache.hadoop.yarn.api.records.ContainerId; import org.apache.hadoop.yarn.api.records.ContainerState; import org.apache.hadoop.yarn.api.records.ContainerStatus; +import org.apache.hadoop.yarn.api.records.ExecutionType; import org.apache.hadoop.yarn.api.records.Resource; import org.apache.hadoop.yarn.proto.YarnProtos.ResourceProto; import org.apache.hadoop.yarn.proto.YarnProtos.ContainerIdProto; +import org.apache.hadoop.yarn.proto.YarnProtos.ExecutionTypeProto; import org.apache.hadoop.yarn.proto.YarnProtos.ContainerStateProto; import org.apache.hadoop.yarn.proto.YarnProtos.ContainerStatusProto; import org.apache.hadoop.yarn.proto.YarnProtos.ContainerStatusProtoOrBuilder; @@ -79,6 +81,7 @@ public class ContainerStatusPBImpl extends ContainerStatus { StringBuilder sb = new StringBuilder(); sb.append("ContainerStatus: ["); sb.append("ContainerId: ").append(getContainerId()).append(", "); + sb.append("ExecutionType: ").append(getExecutionType()).append(", "); sb.append("State: ").append(getState()).append(", "); sb.append("Capability: ").append(getCapability()).append(", "); sb.append("Diagnostics: ").append(getDiagnostics()).append(", "); @@ -107,7 +110,25 @@ public class ContainerStatusPBImpl extends ContainerStatus { } viaProto = false; } - + + @Override + public synchronized ExecutionType getExecutionType() { + ContainerStatusProtoOrBuilder p = viaProto ? proto : builder; + if (!p.hasExecutionType()) { + return null; + } + return convertFromProtoFormat(p.getExecutionType()); + } + + @Override + public synchronized void setExecutionType(ExecutionType executionType) { + maybeInitBuilder(); + if (executionType == null) { + builder.clearExecutionType(); + return; + } + builder.setExecutionType(convertToProtoFormat(executionType)); + } @Override public synchronized ContainerState getState() { @@ -206,6 +227,14 @@ public class ContainerStatusPBImpl extends ContainerStatus { return ((ContainerIdPBImpl)t).getProto(); } + private ExecutionType convertFromProtoFormat(ExecutionTypeProto e) { + return ProtoUtils.convertFromProtoFormat(e); + } + + private ExecutionTypeProto convertToProtoFormat(ExecutionType e) { + return ProtoUtils.convertToProtoFormat(e); + } + private ResourceProto convertToProtoFormat(Resource e) { return ((ResourcePBImpl)e).getProto(); } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ProtoUtils.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ProtoUtils.java index 15df7844423..789efe39ea5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ProtoUtils.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/api/records/impl/pb/ProtoUtils.java @@ -28,6 +28,7 @@ import org.apache.hadoop.yarn.api.records.ApplicationAccessType; import org.apache.hadoop.yarn.api.records.ApplicationResourceUsageReport; import org.apache.hadoop.yarn.api.records.ContainerRetryPolicy; import org.apache.hadoop.yarn.api.records.ContainerState; +import org.apache.hadoop.yarn.api.records.ExecutionType; import org.apache.hadoop.yarn.api.records.FinalApplicationStatus; import org.apache.hadoop.yarn.api.records.LocalResourceType; import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; @@ -56,6 +57,7 @@ import org.apache.hadoop.yarn.proto.YarnProtos.YarnApplicationAttemptStateProto; import org.apache.hadoop.yarn.proto.YarnProtos.YarnApplicationStateProto; import org.apache.hadoop.yarn.proto.YarnProtos.ContainerRetryPolicyProto; import org.apache.hadoop.yarn.proto.YarnProtos.ContainerTypeProto; +import org.apache.hadoop.yarn.proto.YarnProtos.ExecutionTypeProto; import org.apache.hadoop.yarn.proto.YarnServiceProtos; import org.apache.hadoop.yarn.server.api.ContainerType; @@ -297,4 +299,14 @@ public class ProtoUtils { ContainerRetryPolicyProto e) { return ContainerRetryPolicy.valueOf(e.name()); } + + /* + * ExecutionType + */ + public static ExecutionTypeProto convertToProtoFormat(ExecutionType e) { + return ExecutionTypeProto.valueOf(e.name()); + } + public static ExecutionType convertFromProtoFormat(ExecutionTypeProto e) { + return ExecutionType.valueOf(e.name()); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/security/ContainerTokenIdentifier.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/security/ContainerTokenIdentifier.java index 106e6d5d7e8..0b03f506f5a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/security/ContainerTokenIdentifier.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/security/ContainerTokenIdentifier.java @@ -33,6 +33,7 @@ import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.security.token.TokenIdentifier; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ExecutionType; import org.apache.hadoop.yarn.api.records.LogAggregationContext; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; @@ -43,6 +44,7 @@ import org.apache.hadoop.yarn.api.records.impl.pb.ProtoUtils; import org.apache.hadoop.yarn.api.records.impl.pb.ResourcePBImpl; import org.apache.hadoop.yarn.nodelabels.CommonNodeLabelsManager; import org.apache.hadoop.yarn.proto.YarnProtos.ContainerTypeProto; +import org.apache.hadoop.yarn.proto.YarnProtos.ExecutionTypeProto; import org.apache.hadoop.yarn.proto.YarnSecurityTokenProtos.ContainerTokenIdentifierProto; import org.apache.hadoop.yarn.server.api.ContainerType; @@ -85,6 +87,16 @@ public class ContainerTokenIdentifier extends TokenIdentifier { long rmIdentifier, Priority priority, long creationTime, LogAggregationContext logAggregationContext, String nodeLabelExpression, ContainerType containerType) { + this(containerID, hostName, appSubmitter, r, expiryTimeStamp, masterKeyId, + rmIdentifier, priority, creationTime, logAggregationContext, + nodeLabelExpression, containerType, ExecutionType.GUARANTEED); + } + + public ContainerTokenIdentifier(ContainerId containerID, String hostName, + String appSubmitter, Resource r, long expiryTimeStamp, int masterKeyId, + long rmIdentifier, Priority priority, long creationTime, + LogAggregationContext logAggregationContext, String nodeLabelExpression, + ContainerType containerType, ExecutionType executionType) { ContainerTokenIdentifierProto.Builder builder = ContainerTokenIdentifierProto.newBuilder(); if (containerID != null) { @@ -112,6 +124,7 @@ public class ContainerTokenIdentifier extends TokenIdentifier { builder.setNodeLabelExpression(nodeLabelExpression); } builder.setContainerType(convertToProtoFormat(containerType)); + builder.setExecutionType(convertToProtoFormat(executionType)); proto = builder.build(); } @@ -163,7 +176,7 @@ public class ContainerTokenIdentifier extends TokenIdentifier { return proto.getCreationTime(); } /** - * Get the RMIdentifier of RM in which containers are allocated + * Get the RMIdentifier of RM in which containers are allocated. * @return RMIdentifier */ public long getRMIdentifier() { @@ -181,6 +194,17 @@ public class ContainerTokenIdentifier extends TokenIdentifier { return convertFromProtoFormat(proto.getContainerType()); } + /** + * Get the ExecutionType of container to allocate + * @return ExecutionType + */ + public ExecutionType getExecutionType(){ + if (!proto.hasExecutionType()) { + return null; + } + return convertFromProtoFormat(proto.getExecutionType()); + } + public ContainerTokenIdentifierProto getProto() { return proto; } @@ -265,4 +289,13 @@ public class ContainerTokenIdentifier extends TokenIdentifier { ContainerTypeProto containerType) { return ProtoUtils.convertFromProtoFormat(containerType); } + + private ExecutionTypeProto convertToProtoFormat(ExecutionType executionType) { + return ProtoUtils.convertToProtoFormat(executionType); + } + + private ExecutionType convertFromProtoFormat( + ExecutionTypeProto executionType) { + return ProtoUtils.convertFromProtoFormat(executionType); + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/proto/yarn_security_token.proto b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/proto/yarn_security_token.proto index 339e99ecdcd..71434bedb42 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/proto/yarn_security_token.proto +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/proto/yarn_security_token.proto @@ -51,6 +51,7 @@ message ContainerTokenIdentifierProto { optional LogAggregationContextProto logAggregationContext = 10; optional string nodeLabelExpression = 11; optional ContainerTypeProto containerType = 12; + optional ExecutionTypeProto executionType = 13 [default = GUARANTEED]; } message ClientToAMTokenIdentifierProto { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/security/TestYARNTokenIdentifier.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/security/TestYARNTokenIdentifier.java index 68f0b9df832..3a5f003045f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/security/TestYARNTokenIdentifier.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/security/TestYARNTokenIdentifier.java @@ -29,6 +29,7 @@ import org.apache.hadoop.security.HadoopKerberosName; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; import org.apache.hadoop.yarn.api.records.ApplicationId; import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ExecutionType; import org.apache.hadoop.yarn.api.records.NodeId; import org.apache.hadoop.yarn.api.records.Priority; import org.apache.hadoop.yarn.api.records.Resource; @@ -209,6 +210,9 @@ public class TestYARNTokenIdentifier { Assert.assertEquals(ContainerType.TASK, anotherToken.getContainerType()); + + Assert.assertEquals(ExecutionType.GUARANTEED, + anotherToken.getExecutionType()); } @Test @@ -384,10 +388,14 @@ public class TestYARNTokenIdentifier { Assert.assertEquals(ContainerType.APPLICATION_MASTER, anotherToken.getContainerType()); + Assert.assertEquals(ExecutionType.GUARANTEED, + anotherToken.getExecutionType()); + token = new ContainerTokenIdentifier(containerID, hostName, appSubmitter, r, expiryTimeStamp, masterKeyId, rmIdentifier, priority, creationTime, - null, CommonNodeLabelsManager.NO_LABEL, ContainerType.TASK); + null, CommonNodeLabelsManager.NO_LABEL, ContainerType.TASK, + ExecutionType.OPPORTUNISTIC); anotherToken = new ContainerTokenIdentifier(); @@ -398,6 +406,9 @@ public class TestYARNTokenIdentifier { Assert.assertEquals(ContainerType.TASK, anotherToken.getContainerType()); + + Assert.assertEquals(ExecutionType.OPPORTUNISTIC, + anotherToken.getExecutionType()); } }