diff --git a/hadoop-common-project/hadoop-auth/pom.xml b/hadoop-common-project/hadoop-auth/pom.xml index 752682ca7c6..9819b3fe084 100644 --- a/hadoop-common-project/hadoop-auth/pom.xml +++ b/hadoop-common-project/hadoop-auth/pom.xml @@ -110,6 +110,7 @@ **/${test.exclude}.java ${test.exclude.pattern} **/TestKerberosAuth*.java + **/TestAltKerberosAuth*.java **/Test*$*.java diff --git a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/AltKerberosAuthenticationHandler.java b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/AltKerberosAuthenticationHandler.java new file mode 100644 index 00000000000..e786e37df8e --- /dev/null +++ b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/AltKerberosAuthenticationHandler.java @@ -0,0 +1,150 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ +package org.apache.hadoop.security.authentication.server; + +import java.io.IOException; +import java.util.Properties; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; +import org.apache.hadoop.security.authentication.client.AuthenticationException; + + /** + * The {@link AltKerberosAuthenticationHandler} behaves exactly the same way as + * the {@link KerberosAuthenticationHandler}, except that it allows for an + * alternative form of authentication for browsers while still using Kerberos + * for Java access. This is an abstract class that should be subclassed + * to allow a developer to implement their own custom authentication for browser + * access. The alternateAuthenticate method will be called whenever a request + * comes from a browser. + *

+ */ +public abstract class AltKerberosAuthenticationHandler + extends KerberosAuthenticationHandler { + + /** + * Constant that identifies the authentication mechanism. + */ + public static final String TYPE = "alt-kerberos"; + + /** + * Constant for the configuration property that indicates which user agents + * are not considered browsers (comma separated) + */ + public static final String NON_BROWSER_USER_AGENTS = + TYPE + ".non-browser.user-agents"; + private static final String NON_BROWSER_USER_AGENTS_DEFAULT = + "java,curl,wget,perl"; + + private String[] nonBrowserUserAgents; + + /** + * Returns the authentication type of the authentication handler, + * 'alt-kerberos'. + *

+ * + * @return the authentication type of the authentication handler, + * 'alt-kerberos'. + */ + @Override + public String getType() { + return TYPE; + } + + @Override + public void init(Properties config) throws ServletException { + super.init(config); + + nonBrowserUserAgents = config.getProperty( + NON_BROWSER_USER_AGENTS, NON_BROWSER_USER_AGENTS_DEFAULT) + .split("\\W*,\\W*"); + for (int i = 0; i < nonBrowserUserAgents.length; i++) { + nonBrowserUserAgents[i] = nonBrowserUserAgents[i].toLowerCase(); + } + } + + /** + * It enforces the the Kerberos SPNEGO authentication sequence returning an + * {@link AuthenticationToken} only after the Kerberos SPNEGO sequence has + * completed successfully (in the case of Java access) and only after the + * custom authentication implemented by the subclass in alternateAuthenticate + * has completed successfully (in the case of browser access). + *

+ * + * @param request the HTTP client request. + * @param response the HTTP client response. + * + * @return an authentication token if the request is authorized or null + * + * @throws IOException thrown if an IO error occurred + * @throws AuthenticationException thrown if an authentication error occurred + */ + @Override + public AuthenticationToken authenticate(HttpServletRequest request, + HttpServletResponse response) + throws IOException, AuthenticationException { + AuthenticationToken token; + if (isBrowser(request.getHeader("User-Agent"))) { + token = alternateAuthenticate(request, response); + } + else { + token = super.authenticate(request, response); + } + return token; + } + + /** + * This method parses the User-Agent String and returns whether or not it + * refers to a browser. If its not a browser, then Kerberos authentication + * will be used; if it is a browser, alternateAuthenticate from the subclass + * will be used. + *

+ * A User-Agent String is considered to be a browser if it does not contain + * any of the values from alt-kerberos.non-browser.user-agents; the default + * behavior is to consider everything a browser unless it contains one of: + * "java", "curl", "wget", or "perl". Subclasses can optionally override + * this method to use different behavior. + * + * @param userAgent The User-Agent String, or null if there isn't one + * @return true if the User-Agent String refers to a browser, false if not + */ + protected boolean isBrowser(String userAgent) { + if (userAgent == null) { + return false; + } + userAgent = userAgent.toLowerCase(); + boolean isBrowser = true; + for (String nonBrowserUserAgent : nonBrowserUserAgents) { + if (userAgent.contains(nonBrowserUserAgent)) { + isBrowser = false; + break; + } + } + return isBrowser; + } + + /** + * Subclasses should implement this method to provide the custom + * authentication to be used for browsers. + * + * @param request the HTTP client request. + * @param response the HTTP client response. + * @return an authentication token if the request is authorized, or null + * @throws IOException thrown if an IO error occurs + * @throws AuthenticationException thrown if an authentication error occurs + */ + public abstract AuthenticationToken alternateAuthenticate( + HttpServletRequest request, HttpServletResponse response) + throws IOException, AuthenticationException; +} diff --git a/hadoop-common-project/hadoop-auth/src/site/apt/Configuration.apt.vm b/hadoop-common-project/hadoop-auth/src/site/apt/Configuration.apt.vm index e42ee8b4c30..f2fe11d8f6d 100644 --- a/hadoop-common-project/hadoop-auth/src/site/apt/Configuration.apt.vm +++ b/hadoop-common-project/hadoop-auth/src/site/apt/Configuration.apt.vm @@ -176,6 +176,73 @@ Configuration ... ++---+ + +** AltKerberos Configuration + + <>: A KDC must be configured and running. + + The AltKerberos authentication mechanism is a partially implemented derivative + of the Kerberos SPNEGO authentication mechanism which allows a "mixed" form of + authentication where Kerberos SPNEGO is used by non-browsers while an + alternate form of authentication (to be implemented by the user) is used for + browsers. To use AltKerberos as the authentication mechanism (besides + providing an implementation), the authentication filter must be configured + with the following init parameters, in addition to the previously mentioned + Kerberos SPNEGO ones: + + * <<<[PREFIX.]type>>>: the full class name of the implementation of + AltKerberosAuthenticationHandler to use. + + * <<<[PREFIX.]alt-kerberos.non-browser.user-agents>>>: a comma-separated + list of which user-agents should be considered non-browsers. + + <>: + ++---+ + + ... + + + kerberosFilter + org.apache.hadoop.security.auth.server.AuthenticationFilter + + type + org.my.subclass.of.AltKerberosAuthenticationHandler + + + alt-kerberos.non-browser.user-agents + java,curl,wget,perl + + + token.validity + 30 + + + cookie.domain + .foo.com + + + cookie.path + / + + + kerberos.principal + HTTP/localhost@LOCALHOST + + + kerberos.keytab + /tmp/auth.keytab + + + + + kerberosFilter + /kerberos/* + + + ... + +---+ \[ {{{./index.html}Go Back}} \] diff --git a/hadoop-common-project/hadoop-auth/src/site/apt/index.apt.vm b/hadoop-common-project/hadoop-auth/src/site/apt/index.apt.vm index a2e7b5e915a..26fc2492ca0 100644 --- a/hadoop-common-project/hadoop-auth/src/site/apt/index.apt.vm +++ b/hadoop-common-project/hadoop-auth/src/site/apt/index.apt.vm @@ -24,6 +24,11 @@ Hadoop Auth, Java HTTP SPNEGO ${project.version} Hadoop Auth also supports additional authentication mechanisms on the client and the server side via 2 simple interfaces. + Additionally, it provides a partially implemented derivative of the Kerberos + SPNEGO authentication to allow a "mixed" form of authentication where Kerberos + SPNEGO is used by non-browsers while an alternate form of authentication + (to be implemented by the user) is used for browsers. + * License Hadoop Auth is distributed under {{{http://www.apache.org/licenses/}Apache diff --git a/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/server/TestAltKerberosAuthenticationHandler.java b/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/server/TestAltKerberosAuthenticationHandler.java new file mode 100644 index 00000000000..c2d43ebb3ca --- /dev/null +++ b/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/server/TestAltKerberosAuthenticationHandler.java @@ -0,0 +1,110 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. See accompanying LICENSE file. + */ +package org.apache.hadoop.security.authentication.server; + +import java.io.IOException; +import java.util.Properties; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; +import org.apache.hadoop.security.authentication.client.AuthenticationException; +import org.mockito.Mockito; + +public class TestAltKerberosAuthenticationHandler + extends TestKerberosAuthenticationHandler { + + @Override + protected KerberosAuthenticationHandler getNewAuthenticationHandler() { + // AltKerberosAuthenticationHandler is abstract; a subclass would normally + // perform some other authentication when alternateAuthenticate() is called. + // For the test, we'll just return an AuthenticationToken as the other + // authentication is left up to the developer of the subclass + return new AltKerberosAuthenticationHandler() { + @Override + public AuthenticationToken alternateAuthenticate( + HttpServletRequest request, + HttpServletResponse response) + throws IOException, AuthenticationException { + return new AuthenticationToken("A", "B", getType()); + } + }; + } + + @Override + protected String getExpectedType() { + return AltKerberosAuthenticationHandler.TYPE; + } + + public void testAlternateAuthenticationAsBrowser() throws Exception { + HttpServletRequest request = Mockito.mock(HttpServletRequest.class); + HttpServletResponse response = Mockito.mock(HttpServletResponse.class); + + // By default, a User-Agent without "java", "curl", "wget", or "perl" in it + // is considered a browser + Mockito.when(request.getHeader("User-Agent")).thenReturn("Some Browser"); + + AuthenticationToken token = handler.authenticate(request, response); + assertEquals("A", token.getUserName()); + assertEquals("B", token.getName()); + assertEquals(getExpectedType(), token.getType()); + } + + public void testNonDefaultNonBrowserUserAgentAsBrowser() throws Exception { + HttpServletRequest request = Mockito.mock(HttpServletRequest.class); + HttpServletResponse response = Mockito.mock(HttpServletResponse.class); + + if (handler != null) { + handler.destroy(); + handler = null; + } + handler = getNewAuthenticationHandler(); + Properties props = getDefaultProperties(); + props.setProperty("alt-kerberos.non-browser.user-agents", "foo, bar"); + try { + handler.init(props); + } catch (Exception ex) { + handler = null; + throw ex; + } + + // Pretend we're something that will not match with "foo" (or "bar") + Mockito.when(request.getHeader("User-Agent")).thenReturn("blah"); + // Should use alt authentication + AuthenticationToken token = handler.authenticate(request, response); + assertEquals("A", token.getUserName()); + assertEquals("B", token.getName()); + assertEquals(getExpectedType(), token.getType()); + } + + public void testNonDefaultNonBrowserUserAgentAsNonBrowser() throws Exception { + if (handler != null) { + handler.destroy(); + handler = null; + } + handler = getNewAuthenticationHandler(); + Properties props = getDefaultProperties(); + props.setProperty("alt-kerberos.non-browser.user-agents", "foo, bar"); + try { + handler.init(props); + } catch (Exception ex) { + handler = null; + throw ex; + } + + // Run the kerberos tests again + testRequestWithoutAuthorization(); + testRequestWithInvalidAuthorization(); + testRequestWithAuthorization(); + testRequestWithInvalidKerberosAuthorization(); + } +} diff --git a/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/server/TestKerberosAuthenticationHandler.java b/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/server/TestKerberosAuthenticationHandler.java index 692ceab92da..d198e58431d 100644 --- a/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/server/TestKerberosAuthenticationHandler.java +++ b/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/server/TestKerberosAuthenticationHandler.java @@ -28,23 +28,37 @@ import org.ietf.jgss.Oid; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; -import java.lang.reflect.Field; import java.util.Properties; import java.util.concurrent.Callable; public class TestKerberosAuthenticationHandler extends TestCase { - private KerberosAuthenticationHandler handler; + protected KerberosAuthenticationHandler handler; + + protected KerberosAuthenticationHandler getNewAuthenticationHandler() { + return new KerberosAuthenticationHandler(); + } + + protected String getExpectedType() { + return KerberosAuthenticationHandler.TYPE; + } + + protected Properties getDefaultProperties() { + Properties props = new Properties(); + props.setProperty(KerberosAuthenticationHandler.PRINCIPAL, + KerberosTestUtils.getServerPrincipal()); + props.setProperty(KerberosAuthenticationHandler.KEYTAB, + KerberosTestUtils.getKeytabFile()); + props.setProperty(KerberosAuthenticationHandler.NAME_RULES, + "RULE:[1:$1@$0](.*@" + KerberosTestUtils.getRealm()+")s/@.*//\n"); + return props; + } @Override protected void setUp() throws Exception { super.setUp(); - handler = new KerberosAuthenticationHandler(); - Properties props = new Properties(); - props.setProperty(KerberosAuthenticationHandler.PRINCIPAL, KerberosTestUtils.getServerPrincipal()); - props.setProperty(KerberosAuthenticationHandler.KEYTAB, KerberosTestUtils.getKeytabFile()); - props.setProperty(KerberosAuthenticationHandler.NAME_RULES, - "RULE:[1:$1@$0](.*@" + KerberosTestUtils.getRealm()+")s/@.*//\n"); + handler = getNewAuthenticationHandler(); + Properties props = getDefaultProperties(); try { handler.init(props); } catch (Exception ex) { @@ -71,10 +85,8 @@ public class TestKerberosAuthenticationHandler extends TestCase { KerberosName.setRules("RULE:[1:$1@$0](.*@FOO)s/@.*//\nDEFAULT"); - handler = new KerberosAuthenticationHandler(); - Properties props = new Properties(); - props.setProperty(KerberosAuthenticationHandler.PRINCIPAL, KerberosTestUtils.getServerPrincipal()); - props.setProperty(KerberosAuthenticationHandler.KEYTAB, KerberosTestUtils.getKeytabFile()); + handler = getNewAuthenticationHandler(); + Properties props = getDefaultProperties(); props.setProperty(KerberosAuthenticationHandler.NAME_RULES, "RULE:[1:$1@$0](.*@BAR)s/@.*//\nDEFAULT"); try { handler.init(props); @@ -97,8 +109,7 @@ public class TestKerberosAuthenticationHandler extends TestCase { } public void testType() throws Exception { - KerberosAuthenticationHandler handler = new KerberosAuthenticationHandler(); - assertEquals(KerberosAuthenticationHandler.TYPE, handler.getType()); + assertEquals(getExpectedType(), handler.getType()); } public void testRequestWithoutAuthorization() throws Exception { @@ -182,7 +193,7 @@ public class TestKerberosAuthenticationHandler extends TestCase { assertEquals(KerberosTestUtils.getClientPrincipal(), authToken.getName()); assertTrue(KerberosTestUtils.getClientPrincipal().startsWith(authToken.getUserName())); - assertEquals(KerberosAuthenticationHandler.TYPE, authToken.getType()); + assertEquals(getExpectedType(), authToken.getType()); } else { Mockito.verify(response).setHeader(Mockito.eq(KerberosAuthenticator.WWW_AUTHENTICATE), Mockito.matches(KerberosAuthenticator.NEGOTIATE + " .*")); diff --git a/hadoop-common-project/hadoop-common/CHANGES.txt b/hadoop-common-project/hadoop-common/CHANGES.txt index b6a16ceea95..bcd337a2e77 100644 --- a/hadoop-common-project/hadoop-common/CHANGES.txt +++ b/hadoop-common-project/hadoop-common/CHANGES.txt @@ -143,6 +143,9 @@ Trunk (Unreleased) BUG FIXES + HADOOP-8418. Update UGI Principal classes name for running with + IBM JDK on 64 bits Windows. (Yu Gao via eyang) + HADOOP-8177. MBeans shouldn't try to register when it fails to create MBeanName. (Devaraj K via umamahesh) @@ -289,6 +292,9 @@ Trunk (Unreleased) HADOOP-9037. Bug in test-patch.sh and precommit build process (Kihwal Lee via jlowe) + HADOOP-9121. InodeTree.java has redundant check for vName while + throwing exception. (Arup Malakar via suresh) + OPTIMIZATIONS HADOOP-7761. Improve the performance of raw comparisons. (todd) @@ -306,6 +312,12 @@ Release 2.0.3-alpha - Unreleased HADOOP-9020. Add a SASL PLAIN server (daryn via bobby) + HADOOP-9090. Support on-demand publish of metrics. (Mostafa Elhemali via + suresh) + + HADOOP-9054. Add AuthenticationHandler that uses Kerberos but allows for + an alternate form of authentication for browsers. (rkanter via tucu) + IMPROVEMENTS HADOOP-8789. Tests setLevel(Level.OFF) should be Level.ERROR. @@ -456,6 +468,11 @@ Release 2.0.3-alpha - Unreleased HADOOP-8958. ViewFs:Non absolute mount name failures when running multiple tests on Windows. (Chris Nauroth via suresh) + HADOOP-9103. UTF8 class does not properly decode Unicode characters + outside the basic multilingual plane. (todd) + + HADOOP-9070. Kerberos SASL server cannot find kerberos key. (daryn via atm) + Release 2.0.2-alpha - 2012-09-07 INCOMPATIBLE CHANGES diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/InodeTree.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/InodeTree.java index ef64831287e..304785100c2 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/InodeTree.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/viewfs/InodeTree.java @@ -118,8 +118,7 @@ abstract class InodeTree { return result; } - INode resolveInternal(final String pathComponent) - throws FileNotFoundException { + INode resolveInternal(final String pathComponent) { return children.get(pathComponent); } @@ -336,8 +335,8 @@ abstract class InodeTree { } if (!gotMountTableEntry) { throw new IOException( - "ViewFs: Cannot initialize: Empty Mount table in config for " + - vName == null ? "viewfs:///" : ("viewfs://" + vName + "/")); + "ViewFs: Cannot initialize: Empty Mount table in config for " + + "viewfs://" + vName + "/"); } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java index 8a14860773c..2d42a939977 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/SequenceFile.java @@ -1858,10 +1858,10 @@ public class SequenceFile { UTF8 className = new UTF8(); className.readFields(in); - keyClassName = className.toString(); // key class name + keyClassName = className.toStringChecked(); // key class name className.readFields(in); - valClassName = className.toString(); // val class name + valClassName = className.toStringChecked(); // val class name } else { keyClassName = Text.readString(in); valClassName = Text.readString(in); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/UTF8.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/UTF8.java index ef7512996c7..89f1e428bb3 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/UTF8.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/io/UTF8.java @@ -21,7 +21,9 @@ package org.apache.hadoop.io; import java.io.IOException; import java.io.DataInput; import java.io.DataOutput; +import java.io.UTFDataFormatException; +import org.apache.hadoop.util.StringUtils; import org.apache.commons.logging.*; import org.apache.hadoop.classification.InterfaceAudience; @@ -31,6 +33,9 @@ import org.apache.hadoop.classification.InterfaceStability; * *

Also includes utilities for efficiently reading and writing UTF-8. * + * Note that this decodes UTF-8 but actually encodes CESU-8, a variant of + * UTF-8: see http://en.wikipedia.org/wiki/CESU-8 + * * @deprecated replaced by Text */ @Deprecated @@ -151,6 +156,21 @@ public class UTF8 implements WritableComparable { } return buffer.toString(); } + + /** + * Convert to a string, checking for valid UTF8. + * @return the converted string + * @throws UTFDataFormatException if the underlying bytes contain invalid + * UTF8 data. + */ + public String toStringChecked() throws IOException { + StringBuilder buffer = new StringBuilder(length); + synchronized (IBUF) { + IBUF.reset(bytes, length); + readChars(IBUF, buffer, length); + } + return buffer.toString(); + } /** Returns true iff o is a UTF8 with the same contents. */ @Override @@ -209,6 +229,19 @@ public class UTF8 implements WritableComparable { return result; } + /** + * Convert a UTF-8 encoded byte array back into a string. + * + * @throws IOException if the byte array is invalid UTF8 + */ + public static String fromBytes(byte[] bytes) throws IOException { + DataInputBuffer dbuf = new DataInputBuffer(); + dbuf.reset(bytes, 0, bytes.length); + StringBuilder buf = new StringBuilder(bytes.length); + readChars(dbuf, buf, bytes.length); + return buf.toString(); + } + /** Read a UTF-8 encoded string. * * @see DataInput#readUTF() @@ -221,7 +254,7 @@ public class UTF8 implements WritableComparable { } private static void readChars(DataInput in, StringBuilder buffer, int nBytes) - throws IOException { + throws UTFDataFormatException, IOException { DataOutputBuffer obuf = OBUF_FACTORY.get(); obuf.reset(); obuf.write(in, nBytes); @@ -230,18 +263,60 @@ public class UTF8 implements WritableComparable { while (i < nBytes) { byte b = bytes[i++]; if ((b & 0x80) == 0) { + // 0b0xxxxxxx: 1-byte sequence buffer.append((char)(b & 0x7F)); - } else if ((b & 0xE0) != 0xE0) { + } else if ((b & 0xE0) == 0xC0) { + if (i >= nBytes) { + throw new UTFDataFormatException("Truncated UTF8 at " + + StringUtils.byteToHexString(bytes, i - 1, 1)); + } + // 0b110xxxxx: 2-byte sequence buffer.append((char)(((b & 0x1F) << 6) | (bytes[i++] & 0x3F))); - } else { + } else if ((b & 0xF0) == 0xE0) { + // 0b1110xxxx: 3-byte sequence + if (i + 1 >= nBytes) { + throw new UTFDataFormatException("Truncated UTF8 at " + + StringUtils.byteToHexString(bytes, i - 1, 2)); + } buffer.append((char)(((b & 0x0F) << 12) | ((bytes[i++] & 0x3F) << 6) | (bytes[i++] & 0x3F))); + } else if ((b & 0xF8) == 0xF0) { + if (i + 2 >= nBytes) { + throw new UTFDataFormatException("Truncated UTF8 at " + + StringUtils.byteToHexString(bytes, i - 1, 3)); + } + // 0b11110xxx: 4-byte sequence + int codepoint = + ((b & 0x07) << 18) + | ((bytes[i++] & 0x3F) << 12) + | ((bytes[i++] & 0x3F) << 6) + | ((bytes[i++] & 0x3F)); + buffer.append(highSurrogate(codepoint)) + .append(lowSurrogate(codepoint)); + } else { + // The UTF8 standard describes 5-byte and 6-byte sequences, but + // these are no longer allowed as of 2003 (see RFC 3629) + + // Only show the next 6 bytes max in the error code - in case the + // buffer is large, this will prevent an exceedingly large message. + int endForError = Math.min(i + 5, nBytes); + throw new UTFDataFormatException("Invalid UTF8 at " + + StringUtils.byteToHexString(bytes, i - 1, endForError)); } } } + private static char highSurrogate(int codePoint) { + return (char) ((codePoint >>> 10) + + (Character.MIN_HIGH_SURROGATE - (Character.MIN_SUPPLEMENTARY_CODE_POINT >>> 10))); + } + + private static char lowSurrogate(int codePoint) { + return (char) ((codePoint & 0x3ff) + Character.MIN_LOW_SURROGATE); + } + /** Write a UTF-8 encoded string. * * @see DataOutput#writeUTF(String) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java index eb735ff9a79..093aadaa091 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/ipc/Server.java @@ -199,7 +199,8 @@ public abstract class Server { // in ObjectWritable to efficiently transmit arrays of primitives // 6 : Made RPC payload header explicit // 7 : Changed Ipc Connection Header to use Protocol buffers - public static final byte CURRENT_VERSION = 7; + // 8 : SASL server always sends a final response + public static final byte CURRENT_VERSION = 8; /** * Initial and max size of response buffer @@ -1220,8 +1221,8 @@ public abstract class Server { AUDITLOG.warn(AUTH_FAILED_FOR + clientIP + ":" + attemptingUser); throw e; } - if (replyToken == null && authMethod == AuthMethod.PLAIN) { - // client needs at least response to know if it should use SIMPLE + if (saslServer.isComplete() && replyToken == null) { + // send final response for success replyToken = new byte[0]; } if (replyToken != null) { @@ -1392,7 +1393,7 @@ public abstract class Server { } private AuthMethod initializeAuthContext(AuthMethod authMethod) - throws IOException { + throws IOException, InterruptedException { try { if (enabledAuthMethods.contains(authMethod)) { saslServer = createSaslServer(authMethod); @@ -1425,8 +1426,7 @@ public abstract class Server { } private SaslServer createSaslServer(AuthMethod authMethod) - throws IOException { - SaslServer saslServer = null; + throws IOException, InterruptedException { String hostname = null; String saslProtocol = null; CallbackHandler saslCallback = null; @@ -1462,10 +1462,23 @@ public abstract class Server { "Server does not support SASL " + authMethod); } - String mechanism = authMethod.getMechanismName(); - saslServer = Sasl.createSaslServer( - mechanism, saslProtocol, hostname, - SaslRpcServer.SASL_PROPS, saslCallback); + return createSaslServer(authMethod.getMechanismName(), saslProtocol, + hostname, saslCallback); + } + + private SaslServer createSaslServer(final String mechanism, + final String protocol, + final String hostname, + final CallbackHandler callback + ) throws IOException, InterruptedException { + SaslServer saslServer = UserGroupInformation.getCurrentUser().doAs( + new PrivilegedExceptionAction() { + @Override + public SaslServer run() throws SaslException { + return Sasl.createSaslServer(mechanism, protocol, hostname, + SaslRpcServer.SASL_PROPS, callback); + } + }); if (saslServer == null) { throw new AccessControlException( "Unable to find SASL server implementation for " + mechanism); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsSystem.java index ab403a2b178..e853319c4e8 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsSystem.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/MetricsSystem.java @@ -90,6 +90,17 @@ public abstract class MetricsSystem implements MetricsSystemMXBean { */ public abstract void register(Callback callback); + /** + * Requests an immediate publish of all metrics from sources to sinks. + * + * This is a "soft" request: the expectation is that a best effort will be + * done to synchronously snapshot the metrics from all the sources and put + * them in all the sinks (including flushing the sinks) before returning to + * the caller. If this can't be accomplished in reasonable time it's OK to + * return to the caller before everything is done. + */ + public abstract void publishMetricsNow(); + /** * Shutdown the metrics system completely (usually during server shutdown.) * The MetricsSystemMXBean will be unregistered. diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSinkAdapter.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSinkAdapter.java index 688eca4e998..56868c10c5e 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSinkAdapter.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSinkAdapter.java @@ -19,6 +19,7 @@ package org.apache.hadoop.metrics2.impl; import java.util.Random; +import java.util.concurrent.*; import static com.google.common.base.Preconditions.*; @@ -48,6 +49,7 @@ class MetricsSinkAdapter implements SinkQueue.Consumer { private volatile boolean stopping = false; private volatile boolean inError = false; private final int period, firstRetryDelay, retryCount; + private final long oobPutTimeout; private final float retryBackoff; private final MetricsRegistry registry = new MetricsRegistry("sinkadapter"); private final MutableStat latency; @@ -69,6 +71,8 @@ class MetricsSinkAdapter implements SinkQueue.Consumer { this.period = checkArg(period, period > 0, "period"); firstRetryDelay = checkArg(retryDelay, retryDelay > 0, "retry delay"); this.retryBackoff = checkArg(retryBackoff, retryBackoff>1, "retry backoff"); + oobPutTimeout = (long) + (firstRetryDelay * Math.pow(retryBackoff, retryCount) * 1000); this.retryCount = retryCount; this.queue = new SinkQueue(checkArg(queueCapacity, queueCapacity > 0, "queue capacity")); @@ -95,6 +99,23 @@ class MetricsSinkAdapter implements SinkQueue.Consumer { } return true; // OK } + + public boolean putMetricsImmediate(MetricsBuffer buffer) { + WaitableMetricsBuffer waitableBuffer = + new WaitableMetricsBuffer(buffer); + if (!queue.enqueue(waitableBuffer)) { + LOG.warn(name + " has a full queue and can't consume the given metrics."); + dropped.incr(); + return false; + } + if (!waitableBuffer.waitTillNotified(oobPutTimeout)) { + LOG.warn(name + + " couldn't fulfill an immediate putMetrics request in time." + + " Abandoning."); + return false; + } + return true; + } void publishMetricsFromQueue() { int retryDelay = firstRetryDelay; @@ -158,6 +179,9 @@ class MetricsSinkAdapter implements SinkQueue.Consumer { sink.flush(); latency.add(Time.now() - ts); } + if (buffer instanceof WaitableMetricsBuffer) { + ((WaitableMetricsBuffer)buffer).notifyAnyWaiters(); + } LOG.debug("Done"); } @@ -191,4 +215,26 @@ class MetricsSinkAdapter implements SinkQueue.Consumer { MetricsSink sink() { return sink; } + + static class WaitableMetricsBuffer extends MetricsBuffer { + private final Semaphore notificationSemaphore = + new Semaphore(0); + + public WaitableMetricsBuffer(MetricsBuffer metricsBuffer) { + super(metricsBuffer); + } + + public boolean waitTillNotified(long millisecondsToWait) { + try { + return notificationSemaphore.tryAcquire(millisecondsToWait, + TimeUnit.MILLISECONDS); + } catch (InterruptedException e) { + return false; + } + } + + public void notifyAnyWaiters() { + notificationSemaphore.release(); + } + } } diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java index 0bf320b82b5..2224d811de4 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/metrics2/impl/MetricsSystemImpl.java @@ -344,9 +344,19 @@ public class MetricsSystemImpl extends MetricsSystem implements MetricsSource { synchronized void onTimerEvent() { logicalTime += period; if (sinks.size() > 0) { - publishMetrics(sampleMetrics()); + publishMetrics(sampleMetrics(), false); } } + + /** + * Requests an immediate publish of all metrics from sources to sinks. + */ + @Override + public void publishMetricsNow() { + if (sinks.size() > 0) { + publishMetrics(sampleMetrics(), true); + } + } /** * Sample all the sources for a snapshot of metrics/tags @@ -380,12 +390,20 @@ public class MetricsSystemImpl extends MetricsSystem implements MetricsSource { /** * Publish a metrics snapshot to all the sinks * @param buffer the metrics snapshot to publish + * @param immediate indicates that we should publish metrics immediately + * instead of using a separate thread. */ - synchronized void publishMetrics(MetricsBuffer buffer) { + synchronized void publishMetrics(MetricsBuffer buffer, boolean immediate) { int dropped = 0; for (MetricsSinkAdapter sa : sinks.values()) { long startTime = Time.now(); - dropped += sa.putMetrics(buffer, logicalTime) ? 0 : 1; + boolean result; + if (immediate) { + result = sa.putMetricsImmediate(buffer); + } else { + result = sa.putMetrics(buffer, logicalTime); + } + dropped += result ? 0 : 1; publishStat.add(Time.now() - startTime); } droppedPubAll.incr(dropped); diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java index d206f3ace86..9260fbe9f53 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/security/UserGroupInformation.java @@ -299,13 +299,17 @@ public class UserGroupInformation { private static String OS_LOGIN_MODULE_NAME; private static Class OS_PRINCIPAL_CLASS; - private static final boolean windows = - System.getProperty("os.name").startsWith("Windows"); + private static final boolean windows = + System.getProperty("os.name").startsWith("Windows"); + private static final boolean is64Bit = + System.getProperty("os.arch").contains("64"); /* Return the OS login module class name */ private static String getOSLoginModuleName() { if (System.getProperty("java.vendor").contains("IBM")) { - return windows ? "com.ibm.security.auth.module.NTLoginModule" - : "com.ibm.security.auth.module.LinuxLoginModule"; + return windows ? (is64Bit + ? "com.ibm.security.auth.module.Win64LoginModule" + : "com.ibm.security.auth.module.NTLoginModule") + : "com.ibm.security.auth.module.LinuxLoginModule"; } else { return windows ? "com.sun.security.auth.module.NTLoginModule" : "com.sun.security.auth.module.UnixLoginModule"; @@ -319,13 +323,13 @@ public class UserGroupInformation { try { if (System.getProperty("java.vendor").contains("IBM")) { if (windows) { - return (Class) - cl.loadClass("com.ibm.security.auth.UsernamePrincipal"); + return (Class) (is64Bit + ? cl.loadClass("com.ibm.security.auth.UsernamePrincipal") + : cl.loadClass("com.ibm.security.auth.NTUserPrincipal")); } else { - return (Class) - (System.getProperty("os.arch").contains("64") - ? cl.loadClass("com.ibm.security.auth.UsernamePrincipal") - : cl.loadClass("com.ibm.security.auth.LinuxPrincipal")); + return (Class) (is64Bit + ? cl.loadClass("com.ibm.security.auth.UsernamePrincipal") + : cl.loadClass("com.ibm.security.auth.LinuxPrincipal")); } } else { return (Class) (windows diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextTestHelper.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextTestHelper.java index ec4f5437865..8d09540b1c0 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextTestHelper.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileContextTestHelper.java @@ -32,7 +32,7 @@ import org.junit.Assert; */ public final class FileContextTestHelper { // The test root is relative to the /build/test/data by default - public static final String TEST_ROOT_DIR = + public static String TEST_ROOT_DIR = System.getProperty("test.build.data", "build/test/data") + "/test"; private static final int DEFAULT_BLOCK_SIZE = 1024; private static final int DEFAULT_NUM_BLOCKS = 2; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileSystemTestHelper.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileSystemTestHelper.java index 2c058ca3098..c066aade28c 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileSystemTestHelper.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/FileSystemTestHelper.java @@ -34,7 +34,7 @@ import static org.mockito.Mockito.mock; */ public final class FileSystemTestHelper { // The test root is relative to the /build/test/data by default - public static final String TEST_ROOT_DIR = + public static String TEST_ROOT_DIR = System.getProperty("test.build.data", "target/test/data") + "/test"; private static final int DEFAULT_BLOCK_SIZE = 1024; private static final int DEFAULT_NUM_BLOCKS = 2; diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestUTF8.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestUTF8.java index 5c068a1c08c..b3872248327 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestUTF8.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/io/TestUTF8.java @@ -19,8 +19,13 @@ package org.apache.hadoop.io; import junit.framework.TestCase; +import java.io.IOException; +import java.io.UTFDataFormatException; import java.util.Random; +import org.apache.hadoop.test.GenericTestUtils; +import org.apache.hadoop.util.StringUtils; + /** Unit tests for UTF8. */ @SuppressWarnings("deprecation") public class TestUTF8 extends TestCase { @@ -92,5 +97,73 @@ public class TestUTF8 extends TestCase { assertEquals(s, new String(dob.getData(), 2, dob.getLength()-2, "UTF-8")); } - + + /** + * Test encoding and decoding of UTF8 outside the basic multilingual plane. + * + * This is a regression test for HADOOP-9103. + */ + public void testNonBasicMultilingualPlane() throws Exception { + // Test using the "CAT FACE" character (U+1F431) + // See http://www.fileformat.info/info/unicode/char/1f431/index.htm + String catFace = "\uD83D\uDC31"; + + // This encodes to 4 bytes in UTF-8: + byte[] encoded = catFace.getBytes("UTF-8"); + assertEquals(4, encoded.length); + assertEquals("f09f90b1", StringUtils.byteToHexString(encoded)); + + // Decode back to String using our own decoder + String roundTrip = UTF8.fromBytes(encoded); + assertEquals(catFace, roundTrip); + } + + /** + * Test that decoding invalid UTF8 throws an appropriate error message. + */ + public void testInvalidUTF8() throws Exception { + byte[] invalid = new byte[] { + 0x01, 0x02, (byte)0xff, (byte)0xff, 0x01, 0x02, 0x03, 0x04, 0x05 }; + try { + UTF8.fromBytes(invalid); + fail("did not throw an exception"); + } catch (UTFDataFormatException utfde) { + GenericTestUtils.assertExceptionContains( + "Invalid UTF8 at ffff01020304", utfde); + } + } + + /** + * Test for a 5-byte UTF8 sequence, which is now considered illegal. + */ + public void test5ByteUtf8Sequence() throws Exception { + byte[] invalid = new byte[] { + 0x01, 0x02, (byte)0xf8, (byte)0x88, (byte)0x80, + (byte)0x80, (byte)0x80, 0x04, 0x05 }; + try { + UTF8.fromBytes(invalid); + fail("did not throw an exception"); + } catch (UTFDataFormatException utfde) { + GenericTestUtils.assertExceptionContains( + "Invalid UTF8 at f88880808004", utfde); + } + } + + /** + * Test that decoding invalid UTF8 due to truncation yields the correct + * exception type. + */ + public void testInvalidUTF8Truncated() throws Exception { + // Truncated CAT FACE character -- this is a 4-byte sequence, but we + // only have the first three bytes. + byte[] truncated = new byte[] { + (byte)0xF0, (byte)0x9F, (byte)0x90 }; + try { + UTF8.fromBytes(truncated); + fail("did not throw an exception"); + } catch (UTFDataFormatException utfde) { + GenericTestUtils.assertExceptionContains( + "Truncated UTF8 at f09f90", utfde); + } + } } diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestGangliaMetrics.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestGangliaMetrics.java index 10012348b4e..7294ee9c27d 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestGangliaMetrics.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestGangliaMetrics.java @@ -29,8 +29,6 @@ import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; -import java.util.concurrent.CountDownLatch; -import java.util.concurrent.TimeUnit; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -115,31 +113,23 @@ public class TestGangliaMetrics { final int expectedCountFromGanglia30 = expectedMetrics.length; final int expectedCountFromGanglia31 = 2 * expectedMetrics.length; - // use latch to make sure we received required records before shutting - // down the MetricSystem - CountDownLatch latch = new CountDownLatch( - expectedCountFromGanglia30 + expectedCountFromGanglia31); - // Setup test for GangliaSink30 AbstractGangliaSink gsink30 = new GangliaSink30(); gsink30.init(cb.subset("test")); - MockDatagramSocket mockds30 = new MockDatagramSocket(latch); + MockDatagramSocket mockds30 = new MockDatagramSocket(); GangliaMetricsTestHelper.setDatagramSocket(gsink30, mockds30); // Setup test for GangliaSink31 AbstractGangliaSink gsink31 = new GangliaSink31(); gsink31.init(cb.subset("test")); - MockDatagramSocket mockds31 = new MockDatagramSocket(latch); + MockDatagramSocket mockds31 = new MockDatagramSocket(); GangliaMetricsTestHelper.setDatagramSocket(gsink31, mockds31); // register the sinks ms.register("gsink30", "gsink30 desc", gsink30); ms.register("gsink31", "gsink31 desc", gsink31); - ms.onTimerEvent(); // trigger something interesting + ms.publishMetricsNow(); // publish the metrics - // wait for all records and the stop MetricSystem. Without this - // sometime the ms gets shutdown before all the sinks have consumed - latch.await(200, TimeUnit.MILLISECONDS); ms.stop(); // check GanfliaSink30 data @@ -198,7 +188,6 @@ public class TestGangliaMetrics { */ private class MockDatagramSocket extends DatagramSocket { private ArrayList capture; - private CountDownLatch latch; /** * @throws SocketException @@ -207,15 +196,6 @@ public class TestGangliaMetrics { capture = new ArrayList(); } - /** - * @param latch - * @throws SocketException - */ - public MockDatagramSocket(CountDownLatch latch) throws SocketException { - this(); - this.latch = latch; - } - /* (non-Javadoc) * @see java.net.DatagramSocket#send(java.net.DatagramPacket) */ @@ -225,9 +205,6 @@ public class TestGangliaMetrics { byte[] bytes = new byte[p.getLength()]; System.arraycopy(p.getData(), p.getOffset(), bytes, 0, p.getLength()); capture.add(bytes); - - // decrement the latch - latch.countDown(); } /** diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestMetricsSystemImpl.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestMetricsSystemImpl.java index 3d3f0706c59..0aa19032cdc 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestMetricsSystemImpl.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/metrics2/impl/TestMetricsSystemImpl.java @@ -18,7 +18,11 @@ package org.apache.hadoop.metrics2.impl; -import java.util.List; +import java.util.*; +import java.util.concurrent.*; +import java.util.concurrent.atomic.*; + +import javax.annotation.Nullable; import org.junit.Test; import org.junit.runner.RunWith; @@ -26,9 +30,11 @@ import org.junit.runner.RunWith; import org.mockito.ArgumentCaptor; import org.mockito.Captor; import org.mockito.runners.MockitoJUnitRunner; + import static org.junit.Assert.*; import static org.mockito.Mockito.*; +import com.google.common.base.Predicate; import com.google.common.collect.Iterables; import org.apache.commons.configuration.SubsetConfiguration; @@ -36,6 +42,8 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.metrics2.MetricsException; import static org.apache.hadoop.test.MoreAsserts.*; + +import org.apache.hadoop.metrics2.AbstractMetric; import org.apache.hadoop.metrics2.MetricsRecord; import org.apache.hadoop.metrics2.MetricsSink; import org.apache.hadoop.metrics2.MetricsSource; @@ -47,6 +55,7 @@ import org.apache.hadoop.metrics2.lib.MetricsRegistry; import org.apache.hadoop.metrics2.lib.MutableCounterLong; import org.apache.hadoop.metrics2.lib.MutableRate; import org.apache.hadoop.metrics2.lib.MutableGaugeLong; +import org.apache.hadoop.util.StringUtils; /** * Test the MetricsSystemImpl class @@ -72,7 +81,7 @@ public class TestMetricsSystemImpl { } @Test public void testInitFirst() throws Exception { - ConfigBuilder cb = new ConfigBuilder().add("*.period", 8) + new ConfigBuilder().add("*.period", 8) //.add("test.sink.plugin.urls", getPluginUrlsAsString()) .add("test.sink.test.class", TestSink.class.getName()) .add("test.*.source.filter.exclude", "s0") @@ -93,8 +102,9 @@ public class TestMetricsSystemImpl { MetricsSink sink2 = mock(MetricsSink.class); ms.registerSink("sink1", "sink1 desc", sink1); ms.registerSink("sink2", "sink2 desc", sink2); - ms.onTimerEvent(); // trigger something interesting + ms.publishMetricsNow(); // publish the metrics ms.stop(); + ms.shutdown(); verify(sink1, times(2)).putMetrics(r1.capture()); List mr1 = r1.getAllValues(); @@ -104,6 +114,177 @@ public class TestMetricsSystemImpl { assertEquals("output", mr1, mr2); } + @Test public void testMultiThreadedPublish() throws Exception { + new ConfigBuilder().add("*.period", 80) + .add("test.sink.Collector.queue.capacity", "20") + .save(TestMetricsConfig.getTestFilename("hadoop-metrics2-test")); + final MetricsSystemImpl ms = new MetricsSystemImpl("Test"); + ms.start(); + final int numThreads = 10; + final CollectingSink sink = new CollectingSink(numThreads); + ms.registerSink("Collector", + "Collector of values from all threads.", sink); + final TestSource[] sources = new TestSource[numThreads]; + final Thread[] threads = new Thread[numThreads]; + final String[] results = new String[numThreads]; + final CyclicBarrier barrier1 = new CyclicBarrier(numThreads), + barrier2 = new CyclicBarrier(numThreads); + for (int i = 0; i < numThreads; i++) { + sources[i] = ms.register("threadSource" + i, + "A source of my threaded goodness.", + new TestSource("threadSourceRec" + i)); + threads[i] = new Thread(new Runnable() { + private boolean safeAwait(int mySource, CyclicBarrier barrier) { + try { + barrier1.await(2, TimeUnit.SECONDS); + } catch (InterruptedException e) { + results[mySource] = "Interrupted"; + return false; + } catch (BrokenBarrierException e) { + results[mySource] = "Broken Barrier"; + return false; + } catch (TimeoutException e) { + results[mySource] = "Timed out on barrier"; + return false; + } + return true; + } + + @Override + public void run() { + int mySource = Integer.parseInt(Thread.currentThread().getName()); + if (sink.collected[mySource].get() != 0L) { + results[mySource] = "Someone else collected my metric!"; + return; + } + // Wait for all the threads to come here so we can hammer + // the system at the same time + if (!safeAwait(mySource, barrier1)) return; + sources[mySource].g1.set(230); + ms.publishMetricsNow(); + // Since some other thread may have snatched my metric, + // I need to wait for the threads to finish before checking. + if (!safeAwait(mySource, barrier2)) return; + if (sink.collected[mySource].get() != 230L) { + results[mySource] = "Metric not collected!"; + return; + } + results[mySource] = "Passed"; + } + }, "" + i); + } + for (Thread t : threads) + t.start(); + for (Thread t : threads) + t.join(); + assertEquals(0L, ms.droppedPubAll.value()); + assertTrue(StringUtils.join("\n", Arrays.asList(results)), + Iterables.all(Arrays.asList(results), new Predicate() { + @Override + public boolean apply(@Nullable String input) { + return input.equalsIgnoreCase("Passed"); + } + })); + ms.stop(); + ms.shutdown(); + } + + private static class CollectingSink implements MetricsSink { + private final AtomicLong[] collected; + + public CollectingSink(int capacity) { + collected = new AtomicLong[capacity]; + for (int i = 0; i < capacity; i++) { + collected[i] = new AtomicLong(); + } + } + + @Override + public void init(SubsetConfiguration conf) { + } + + @Override + public void putMetrics(MetricsRecord record) { + final String prefix = "threadSourceRec"; + if (record.name().startsWith(prefix)) { + final int recordNumber = Integer.parseInt( + record.name().substring(prefix.length())); + ArrayList names = new ArrayList(); + for (AbstractMetric m : record.metrics()) { + if (m.name().equalsIgnoreCase("g1")) { + collected[recordNumber].set(m.value().longValue()); + return; + } + names.add(m.name()); + } + } + } + + @Override + public void flush() { + } + } + + @Test public void testHangingSink() { + new ConfigBuilder().add("*.period", 8) + .add("test.sink.test.class", TestSink.class.getName()) + .add("test.sink.hanging.retry.delay", "1") + .add("test.sink.hanging.retry.backoff", "1.01") + .add("test.sink.hanging.retry.count", "0") + .save(TestMetricsConfig.getTestFilename("hadoop-metrics2-test")); + MetricsSystemImpl ms = new MetricsSystemImpl("Test"); + ms.start(); + TestSource s = ms.register("s3", "s3 desc", new TestSource("s3rec")); + s.c1.incr(); + HangingSink hanging = new HangingSink(); + ms.registerSink("hanging", "Hang the sink!", hanging); + ms.publishMetricsNow(); + assertEquals(1L, ms.droppedPubAll.value()); + assertFalse(hanging.getInterrupted()); + ms.stop(); + ms.shutdown(); + assertTrue(hanging.getInterrupted()); + assertTrue("The sink didn't get called after its first hang " + + "for subsequent records.", hanging.getGotCalledSecondTime()); + } + + private static class HangingSink implements MetricsSink { + private volatile boolean interrupted; + private boolean gotCalledSecondTime; + private boolean firstTime = true; + + public boolean getGotCalledSecondTime() { + return gotCalledSecondTime; + } + + public boolean getInterrupted() { + return interrupted; + } + + @Override + public void init(SubsetConfiguration conf) { + } + + @Override + public void putMetrics(MetricsRecord record) { + // No need to hang every time, just the first record. + if (!firstTime) { + gotCalledSecondTime = true; + return; + } + firstTime = false; + try { + Thread.sleep(10 * 1000); + } catch (InterruptedException ex) { + interrupted = true; + } + } + + @Override + public void flush() { + } + } + @Test public void testRegisterDups() { MetricsSystem ms = new MetricsSystemImpl(); TestSource ts1 = new TestSource("ts1"); @@ -116,6 +297,7 @@ public class TestMetricsSystemImpl { MetricsSource s2 = ms.getSource("ts1"); assertNotNull(s2); assertNotSame(s1, s2); + ms.shutdown(); } @Test(expected=MetricsException.class) public void testRegisterDupError() { diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/wsrs/UserProvider.java b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/wsrs/UserProvider.java index 4db42c21ac8..fd59b19dc34 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/wsrs/UserProvider.java +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/main/java/org/apache/hadoop/lib/wsrs/UserProvider.java @@ -53,10 +53,9 @@ public class UserProvider extends AbstractHttpContextInjectable imple public String parseParam(String str) { if (str != null) { int len = str.length(); - if (len < 1 || len > 31) { + if (len < 1) { throw new IllegalArgumentException(MessageFormat.format( - "Parameter [{0}], invalid value [{1}], it's length must be between 1 and 31", - getName(), str)); + "Parameter [{0}], it's length must be at least 1", getName())); } } return super.parseParam(str); diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/lib/wsrs/TestUserProvider.java b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/lib/wsrs/TestUserProvider.java index 2bba4f090d0..694e8dc3185 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/lib/wsrs/TestUserProvider.java +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/src/test/java/org/apache/hadoop/lib/wsrs/TestUserProvider.java @@ -108,13 +108,6 @@ public class TestUserProvider { userParam.parseParam(""); } - @Test - @TestException(exception = IllegalArgumentException.class) - public void userNameTooLong() { - UserProvider.UserParam userParam = new UserProvider.UserParam("username"); - userParam.parseParam("a123456789012345678901234567890x"); - } - @Test @TestException(exception = IllegalArgumentException.class) public void userNameInvalidStart() { @@ -135,12 +128,6 @@ public class TestUserProvider { assertNotNull(userParam.parseParam("a")); } - @Test - public void userNameMaxLength() { - UserProvider.UserParam userParam = new UserProvider.UserParam("username"); - assertNotNull(userParam.parseParam("a123456789012345678901234567890")); - } - @Test public void userNameValidDollarSign() { UserProvider.UserParam userParam = new UserProvider.UserParam("username"); diff --git a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt index 645ad9a4e78..c89eac3723a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt +++ b/hadoop-hdfs-project/hadoop-hdfs/CHANGES.txt @@ -92,17 +92,12 @@ Trunk (Unreleased) HDFS-3040. TestMulitipleNNDataBlockScanner is misspelled. (Madhukara Phatak via atm) - HDFS-3049. During the normal NN startup process, fall back on a different - edit log if we see one that is corrupt (Colin Patrick McCabe via todd) - HDFS-3478. Test quotas with Long.Max_Value. (Sujay Rau via eli) HDFS-3498. Support replica removal in BlockPlacementPolicy and make BlockPlacementPolicyDefault extensible for reusing code in subclasses. (Junping Du via szetszwo) - HDFS-3571. Allow EditLogFileInputStream to read from a remote URL (todd) - HDFS-3510. Editlog pre-allocation is performed prior to writing edits to avoid partial edits case disk out of space.(Colin McCabe via suresh) @@ -146,8 +141,6 @@ Trunk (Unreleased) HDFS-4052. BlockManager#invalidateWork should print log outside the lock. (Jing Zhao via suresh) - HDFS-4110. Refine a log printed in JNStorage. (Liang Xie via suresh) - HDFS-4124. Refactor INodeDirectory#getExistingPathINodes() to enable returning more than INode array. (Jing Zhao via suresh) @@ -160,10 +153,6 @@ Trunk (Unreleased) HDFS-4152. Add a new class BlocksMapUpdateInfo for the parameter in INode.collectSubtreeBlocksAndClear(..). (Jing Zhao via szetszwo) - HDFS-4153. Add START_MSG/SHUTDOWN_MSG for JournalNode. (liang xie via atm) - - HDFS-3935. Add JournalNode to the start/stop scripts (Andy Isaacson via todd) - HDFS-4206. Change the fields in INode and its subclasses to private. (szetszwo) @@ -176,6 +165,11 @@ Trunk (Unreleased) HDFS-4209. Clean up the addNode/addChild/addChildNoQuotaCheck methods in FSDirectory and INodeDirectory. (szetszwo) + HDFS-3358. Specify explicitly that the NN UI status total is talking + of persistent objects on heap. (harsh) + + HDFS-4234. Use generic code for choosing datanode in Balancer. (szetszwo) + OPTIMIZATIONS BUG FIXES @@ -268,107 +262,12 @@ Trunk (Unreleased) HDFS-4105. The SPNEGO user for secondary namenode should use the web keytab. (Arpit Gupta via jitendra) - BREAKDOWN OF HDFS-3077 SUBTASKS + HDFS-4240. For nodegroup-aware block placement, when a node is excluded, + the nodes in the same nodegroup should also be excluded. (Junping Du + via szetszwo) - HDFS-3077. Quorum-based protocol for reading and writing edit logs. - (todd, Brandon Li, and Hari Mankude via todd) - - HDFS-3694. Fix getEditLogManifest to fetch httpPort if necessary (todd) - - HDFS-3692. Support purgeEditLogs() call to remotely purge logs on JNs - (todd) - - HDFS-3693. JNStorage should read its storage info even before a writer - becomes active (todd) - - HDFS-3725. Fix QJM startup when individual JNs have gaps (todd) - - HDFS-3741. Exhaustive failure injection test for skipped RPCs (todd) - - HDFS-3773. TestNNWithQJM fails after HDFS-3741. (atm) - - HDFS-3793. Implement genericized format() in QJM (todd) - - HDFS-3795. QJM: validate journal dir at startup (todd) - - HDFS-3798. Avoid throwing NPE when finalizeSegment() is called on invalid - segment (todd) - - HDFS-3799. QJM: handle empty log segments during recovery (todd) - - HDFS-3797. QJM: add segment txid as a parameter to journal() RPC (todd) - - HDFS-3800. improvements to QJM fault testing (todd) - - HDFS-3823. QJM: TestQJMWithFaults fails occasionally because of missed - setting of HTTP port. (todd and atm) - - HDFS-3826. QJM: Some trivial logging / exception text improvements. (todd - and atm) - - HDFS-3839. QJM: hadoop-daemon.sh should be updated to accept "journalnode" - (eli) - - HDFS-3845. Fixes for edge cases in QJM recovery protocol (todd) - - HDFS-3877. QJM: Provide defaults for dfs.journalnode.*address (eli) - - HDFS-3863. Track last "committed" txid in QJM (todd) - - HDFS-3869. Expose non-file journal manager details in web UI (todd) - - HDFS-3884. Journal format() should reset cached values (todd) - - HDFS-3870. Add metrics to JournalNode (todd) - - HDFS-3891. Make selectInputStreams throw IOE instead of RTE (todd) - - HDFS-3726. If a logger misses an RPC, don't retry that logger until next - segment (todd) - - HDFS-3893. QJM: Make QJM work with security enabled. (atm) - - HDFS-3897. QJM: TestBlockToken fails after HDFS-3893. (atm) - - HDFS-3898. QJM: enable TCP_NODELAY for IPC (todd) - - HDFS-3885. QJM: optimize log sync when JN is lagging behind (todd) - - HDFS-3900. QJM: avoid validating log segments on log rolls (todd) - - HDFS-3901. QJM: send 'heartbeat' messages to JNs even when they are - out-of-sync (todd) - - HDFS-3899. QJM: Add client-side metrics (todd) - - HDFS-3914. QJM: acceptRecovery should abort current segment (todd) - - HDFS-3915. QJM: Failover fails with auth error in secure cluster (todd) - - HDFS-3906. QJM: quorum timeout on failover with large log segment (todd) - - HDFS-3840. JournalNodes log JournalNotFormattedException backtrace error - before being formatted (todd) - - HDFS-3894. QJM: testRecoverAfterDoubleFailures can be flaky due to IPC - client caching (todd) - - HDFS-3926. QJM: Add user documentation for QJM. (atm) - - HDFS-3943. QJM: remove currently-unused md5sum field (todd) - - HDFS-3950. QJM: misc TODO cleanup, improved log messages, etc. (todd) - - HDFS-3955. QJM: Make acceptRecovery() atomic. (todd) - - HDFS-3956. QJM: purge temporary files when no longer within retention - period (todd) - - HDFS-4004. TestJournalNode#testJournal fails because of test case execution - order (Chao Shi via todd) - - HDFS-4017. Unclosed FileInputStream in GetJournalEditServlet - (Chao Shi via todd) + HDFS-4260 Fix HDFS tests to set test dir to a valid HDFS path as opposed + to the local build path (Chri Nauroth via Sanjay) Release 2.0.3-alpha - Unreleased @@ -493,6 +392,24 @@ Release 2.0.3-alpha - Unreleased HDFS-4214. OfflineEditsViewer should print out the offset at which it encountered an error. (Colin Patrick McCabe via atm) + HDFS-4199. Provide test for HdfsVolumeId. (Ivan A. Veselovsky via atm) + + HDFS-3049. During the normal NN startup process, fall back on a different + edit log if we see one that is corrupt (Colin Patrick McCabe via todd) + + HDFS-3571. Allow EditLogFileInputStream to read from a remote URL (todd) + + HDFS-4110. Refine a log printed in JNStorage. (Liang Xie via suresh) + + HDFS-4153. Add START_MSG/SHUTDOWN_MSG for JournalNode. (liang xie via atm) + + HDFS-3935. Add JournalNode to the start/stop scripts (Andy Isaacson via todd) + + HDFS-4268. Remove redundant enum NNHAStatusHeartbeat.State. (shv) + + HDFS-3680. Allow customized audit logging in HDFS FSNamesystem. (Marcelo + Vanzin via atm) + OPTIMIZATIONS BUG FIXES @@ -646,6 +563,127 @@ Release 2.0.3-alpha - Unreleased of it is undefined after the iteration or modifications of the map. (szetszwo) + HDFS-4231. BackupNode: Introduce BackupState. (shv) + + HDFS-4243. When replacing an INodeDirectory, the parent pointers of the + children of the child have to be updated to the new child. (Jing Zhao + via szetszwo) + + HDFS-4238. Standby namenode should not do purging of shared + storage edits. (todd) + + HDFS-4282. TestEditLog.testFuzzSequences FAILED in all pre-commit test + (todd) + + HDFS-4236. Remove artificial limit on username length introduced in + HDFS-4171. (tucu via suresh) + + HDFS-4279. NameNode does not initialize generic conf keys when started + with -recover. (Colin Patrick McCabe via atm) + + BREAKDOWN OF HDFS-3077 SUBTASKS + + HDFS-3077. Quorum-based protocol for reading and writing edit logs. + (todd, Brandon Li, and Hari Mankude via todd) + + HDFS-3694. Fix getEditLogManifest to fetch httpPort if necessary (todd) + + HDFS-3692. Support purgeEditLogs() call to remotely purge logs on JNs + (todd) + + HDFS-3693. JNStorage should read its storage info even before a writer + becomes active (todd) + + HDFS-3725. Fix QJM startup when individual JNs have gaps (todd) + + HDFS-3741. Exhaustive failure injection test for skipped RPCs (todd) + + HDFS-3773. TestNNWithQJM fails after HDFS-3741. (atm) + + HDFS-3793. Implement genericized format() in QJM (todd) + + HDFS-3795. QJM: validate journal dir at startup (todd) + + HDFS-3798. Avoid throwing NPE when finalizeSegment() is called on invalid + segment (todd) + + HDFS-3799. QJM: handle empty log segments during recovery (todd) + + HDFS-3797. QJM: add segment txid as a parameter to journal() RPC (todd) + + HDFS-3800. improvements to QJM fault testing (todd) + + HDFS-3823. QJM: TestQJMWithFaults fails occasionally because of missed + setting of HTTP port. (todd and atm) + + HDFS-3826. QJM: Some trivial logging / exception text improvements. (todd + and atm) + + HDFS-3839. QJM: hadoop-daemon.sh should be updated to accept "journalnode" + (eli) + + HDFS-3845. Fixes for edge cases in QJM recovery protocol (todd) + + HDFS-3877. QJM: Provide defaults for dfs.journalnode.*address (eli) + + HDFS-3863. Track last "committed" txid in QJM (todd) + + HDFS-3869. Expose non-file journal manager details in web UI (todd) + + HDFS-3884. Journal format() should reset cached values (todd) + + HDFS-3870. Add metrics to JournalNode (todd) + + HDFS-3891. Make selectInputStreams throw IOE instead of RTE (todd) + + HDFS-3726. If a logger misses an RPC, don't retry that logger until next + segment (todd) + + HDFS-3893. QJM: Make QJM work with security enabled. (atm) + + HDFS-3897. QJM: TestBlockToken fails after HDFS-3893. (atm) + + HDFS-3898. QJM: enable TCP_NODELAY for IPC (todd) + + HDFS-3885. QJM: optimize log sync when JN is lagging behind (todd) + + HDFS-3900. QJM: avoid validating log segments on log rolls (todd) + + HDFS-3901. QJM: send 'heartbeat' messages to JNs even when they are + out-of-sync (todd) + + HDFS-3899. QJM: Add client-side metrics (todd) + + HDFS-3914. QJM: acceptRecovery should abort current segment (todd) + + HDFS-3915. QJM: Failover fails with auth error in secure cluster (todd) + + HDFS-3906. QJM: quorum timeout on failover with large log segment (todd) + + HDFS-3840. JournalNodes log JournalNotFormattedException backtrace error + before being formatted (todd) + + HDFS-3894. QJM: testRecoverAfterDoubleFailures can be flaky due to IPC + client caching (todd) + + HDFS-3926. QJM: Add user documentation for QJM. (atm) + + HDFS-3943. QJM: remove currently-unused md5sum field (todd) + + HDFS-3950. QJM: misc TODO cleanup, improved log messages, etc. (todd) + + HDFS-3955. QJM: Make acceptRecovery() atomic. (todd) + + HDFS-3956. QJM: purge temporary files when no longer within retention + period (todd) + + HDFS-4004. TestJournalNode#testJournal fails because of test case execution + order (Chao Shi via todd) + + HDFS-4017. Unclosed FileInputStream in GetJournalEditServlet + (Chao Shi via todd) + + Release 2.0.2-alpha - 2012-09-07 INCOMPATIBLE CHANGES @@ -2035,6 +2073,11 @@ Release 0.23.6 - UNRELEASED BUG FIXES + HDFS-4247. saveNamespace should be tolerant of dangling lease (daryn) + + HDFS-4248. Renaming directories may incorrectly remove the paths in leases + under the tree. (daryn via szetszwo) + Release 0.23.5 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/fs/HdfsVolumeId.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/fs/HdfsVolumeId.java index 8e328051f7e..aa6785037c1 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/fs/HdfsVolumeId.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/fs/HdfsVolumeId.java @@ -27,26 +27,38 @@ import org.apache.hadoop.classification.InterfaceStability; * HDFS-specific volume identifier which implements {@link VolumeId}. Can be * used to differentiate between the data directories on a single datanode. This * identifier is only unique on a per-datanode basis. + * + * Note that invalid IDs are represented by {@link VolumeId#INVALID_VOLUME_ID}. */ @InterfaceStability.Unstable @InterfaceAudience.Public public class HdfsVolumeId implements VolumeId { - + private final byte[] id; - private final boolean isValid; - public HdfsVolumeId(byte[] id, boolean isValid) { + public HdfsVolumeId(byte[] id) { + if (id == null) { + throw new NullPointerException("A valid Id can only be constructed " + + "with a non-null byte array."); + } this.id = id; - this.isValid = isValid; } @Override - public boolean isValid() { - return isValid; + public final boolean isValid() { + return true; } @Override public int compareTo(VolumeId arg0) { + if (arg0 == null) { + return 1; + } + if (!arg0.isValid()) { + // any valid ID is greater + // than any invalid ID: + return 1; + } return hashCode() - arg0.hashCode(); } @@ -63,8 +75,10 @@ public class HdfsVolumeId implements VolumeId { if (obj == this) { return true; } - HdfsVolumeId that = (HdfsVolumeId) obj; + // NB: if (!obj.isValid()) { return false; } check is not necessary + // because we have class identity checking above, and for this class + // isValid() is always true. return new EqualsBuilder().append(this.id, that.id).isEquals(); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/fs/VolumeId.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/fs/VolumeId.java index f24ed66d009..b756241e976 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/fs/VolumeId.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/fs/VolumeId.java @@ -28,6 +28,48 @@ import org.apache.hadoop.classification.InterfaceStability; @InterfaceAudience.Public public interface VolumeId extends Comparable { + /** + * Represents an invalid Volume ID (ID for unknown content). + */ + public static final VolumeId INVALID_VOLUME_ID = new VolumeId() { + + @Override + public int compareTo(VolumeId arg0) { + // This object is equal only to itself; + // It is greater than null, and + // is always less than any other VolumeId: + if (arg0 == null) { + return 1; + } + if (arg0 == this) { + return 0; + } else { + return -1; + } + } + + @Override + public boolean equals(Object obj) { + // this object is equal only to itself: + return (obj == this); + } + + @Override + public int hashCode() { + return Integer.MIN_VALUE; + } + + @Override + public boolean isValid() { + return false; + } + + @Override + public String toString() { + return "Invalid VolumeId"; + } + }; + /** * Indicates if the disk identifier is valid. Invalid identifiers indicate * that the block was not present, or the location could otherwise not be diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockStorageLocationUtil.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockStorageLocationUtil.java index de74e023400..934f8dfe516 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockStorageLocationUtil.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/BlockStorageLocationUtil.java @@ -202,7 +202,7 @@ class BlockStorageLocationUtil { ArrayList l = new ArrayList(b.getLocations().length); // Start off all IDs as invalid, fill it in later with results from RPCs for (int i = 0; i < b.getLocations().length; i++) { - l.add(new HdfsVolumeId(null, false)); + l.add(VolumeId.INVALID_VOLUME_ID); } blockVolumeIds.put(b, l); } @@ -236,7 +236,7 @@ class BlockStorageLocationUtil { // Get the VolumeId by indexing into the list of VolumeIds // provided by the datanode byte[] volumeId = metaVolumeIds.get(volumeIndex); - HdfsVolumeId id = new HdfsVolumeId(volumeId, true); + HdfsVolumeId id = new HdfsVolumeId(volumeId); // Find out which index we are in the LocatedBlock's replicas LocatedBlock locBlock = extBlockToLocBlock.get(extBlock); DatanodeInfo[] dnInfos = locBlock.getLocations(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java index 9ea1ec5b526..994390cf7ac 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/DFSConfigKeys.java @@ -246,6 +246,8 @@ public class DFSConfigKeys extends CommonConfigurationKeys { public static final String DFS_HOSTS = "dfs.hosts"; public static final String DFS_HOSTS_EXCLUDE = "dfs.hosts.exclude"; public static final String DFS_CLIENT_LOCAL_INTERFACES = "dfs.client.local.interfaces"; + public static final String DFS_NAMENODE_AUDIT_LOGGERS_KEY = "dfs.namenode.audit.loggers"; + public static final String DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME = "default"; // Much code in hdfs is not yet updated to use these keys. public static final String DFS_CLIENT_BLOCK_WRITE_LOCATEFOLLOWINGBLOCK_RETRIES_KEY = "dfs.client.block.write.locateFollowingBlock.retries"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java index 0603d15dd87..e7833d1c2fe 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/protocolPB/PBHelper.java @@ -26,6 +26,7 @@ import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.FsServerDefaults; import org.apache.hadoop.fs.permission.FsPermission; +import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; import org.apache.hadoop.hdfs.server.protocol.StorageReport; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.ClientProtocol; @@ -1232,9 +1233,9 @@ public class PBHelper { if (s == null) return null; switch (s.getState()) { case ACTIVE: - return new NNHAStatusHeartbeat(NNHAStatusHeartbeat.State.ACTIVE, s.getTxid()); + return new NNHAStatusHeartbeat(HAServiceState.ACTIVE, s.getTxid()); case STANDBY: - return new NNHAStatusHeartbeat(NNHAStatusHeartbeat.State.STANDBY, s.getTxid()); + return new NNHAStatusHeartbeat(HAServiceState.STANDBY, s.getTxid()); default: throw new IllegalArgumentException("Unexpected NNHAStatusHeartbeat.State:" + s.getState()); } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java index 734b3ed269d..473f2592342 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/balancer/Balancer.java @@ -75,6 +75,7 @@ import org.apache.hadoop.hdfs.server.protocol.BlocksWithLocations.BlockWithLocat import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.net.NetUtils; import org.apache.hadoop.net.NetworkTopology; +import org.apache.hadoop.net.Node; import org.apache.hadoop.security.token.Token; import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.Time; @@ -557,7 +558,7 @@ public class Balancer { } /** Decide if still need to move more bytes */ - protected boolean isMoveQuotaFull() { + protected boolean hasSpaceForScheduling() { return scheduledSize pairs and + /** A matcher interface for matching nodes. */ + private interface Matcher { + /** Given the cluster topology, does the left node match the right node? */ + boolean match(NetworkTopology cluster, Node left, Node right); + } + + /** Match datanodes in the same node group. */ + static final Matcher SAME_NODE_GROUP = new Matcher() { + @Override + public boolean match(NetworkTopology cluster, Node left, Node right) { + return cluster.isOnSameNodeGroup(left, right); + } + }; + + /** Match datanodes in the same rack. */ + static final Matcher SAME_RACK = new Matcher() { + @Override + public boolean match(NetworkTopology cluster, Node left, Node right) { + return cluster.isOnSameRack(left, right); + } + }; + + /** Match any datanode with any other datanode. */ + static final Matcher ANY_OTHER = new Matcher() { + @Override + public boolean match(NetworkTopology cluster, Node left, Node right) { + return left != right; + } + }; + + /** + * Decide all pairs and * the number of bytes to move from a source to a target * Maximum bytes to be moved per node is * Min(1 Band worth of bytes, MAX_SIZE_TO_MOVE). * Return total number of bytes to move in this iteration */ private long chooseNodes() { - // First, match nodes on the same node group if cluster has nodegroup - // awareness + // First, match nodes on the same node group if cluster is node group aware if (cluster.isNodeGroupAware()) { - chooseNodesOnSameNodeGroup(); + chooseNodes(SAME_NODE_GROUP); } // Then, match nodes on the same rack - chooseNodes(true); - // At last, match nodes on different racks - chooseNodes(false); + chooseNodes(SAME_RACK); + // At last, match all remaining nodes + chooseNodes(ANY_OTHER); assert (datanodes.size() >= sources.size()+targets.size()) : "Mismatched number of datanodes (" + @@ -952,57 +983,55 @@ public class Balancer { } return bytesToMove; } - - /** - * Decide all pairs where source and target are - * on the same NodeGroup - */ - private void chooseNodesOnSameNodeGroup() { + /** Decide all pairs according to the matcher. */ + private void chooseNodes(final Matcher matcher) { /* first step: match each overUtilized datanode (source) to - * one or more underUtilized datanodes within same NodeGroup(targets). + * one or more underUtilized datanodes (targets). */ - chooseOnSameNodeGroup(overUtilizedDatanodes, underUtilizedDatanodes); - - /* match each remaining overutilized datanode (source) to below average - * utilized datanodes within the same NodeGroup(targets). + chooseDatanodes(overUtilizedDatanodes, underUtilizedDatanodes, matcher); + + /* match each remaining overutilized datanode (source) to + * below average utilized datanodes (targets). * Note only overutilized datanodes that haven't had that max bytes to move * satisfied in step 1 are selected */ - chooseOnSameNodeGroup(overUtilizedDatanodes, belowAvgUtilizedDatanodes); + chooseDatanodes(overUtilizedDatanodes, belowAvgUtilizedDatanodes, matcher); - /* match each remaining underutilized datanode to above average utilized - * datanodes within the same NodeGroup. + /* match each remaining underutilized datanode (target) to + * above average utilized datanodes (source). * Note only underutilized datanodes that have not had that max bytes to * move satisfied in step 1 are selected. */ - chooseOnSameNodeGroup(underUtilizedDatanodes, aboveAvgUtilizedDatanodes); + chooseDatanodes(underUtilizedDatanodes, aboveAvgUtilizedDatanodes, matcher); } - + /** - * Match two sets of nodes within the same NodeGroup, one should be source - * nodes (utilization > Avg), and the other should be destination nodes - * (utilization < Avg). - * @param datanodes - * @param candidates + * For each datanode, choose matching nodes from the candidates. Either the + * datanodes or the candidates are source nodes with (utilization > Avg), and + * the others are target nodes with (utilization < Avg). */ private void - chooseOnSameNodeGroup(Collection datanodes, Collection candidates) { + chooseDatanodes(Collection datanodes, Collection candidates, + Matcher matcher) { for (Iterator i = datanodes.iterator(); i.hasNext();) { final D datanode = i.next(); - for(; chooseOnSameNodeGroup(datanode, candidates.iterator()); ); - if (!datanode.isMoveQuotaFull()) { + for(; chooseForOneDatanode(datanode, candidates, matcher); ); + if (!datanode.hasSpaceForScheduling()) { i.remove(); } } } - + /** - * Match one datanode with a set of candidates nodes within the same NodeGroup. + * For the given datanode, choose a candidate and then schedule it. + * @return true if a candidate is chosen; false if no candidates is chosen. */ - private boolean chooseOnSameNodeGroup( - BalancerDatanode dn, Iterator candidates) { - final T chosen = chooseCandidateOnSameNodeGroup(dn, candidates); + private boolean chooseForOneDatanode( + BalancerDatanode dn, Collection candidates, Matcher matcher) { + final Iterator i = candidates.iterator(); + final C chosen = chooseCandidate(dn, i, matcher); + if (chosen == null) { return false; } @@ -1011,8 +1040,8 @@ public class Balancer { } else { matchSourceWithTargetToMove((Source)chosen, dn); } - if (!chosen.isMoveQuotaFull()) { - candidates.remove(); + if (!chosen.hasSpaceForScheduling()) { + i.remove(); } return true; } @@ -1029,19 +1058,15 @@ public class Balancer { +source.datanode.getName() + " to " + target.datanode.getName()); } - /** choose a datanode from candidates within the same NodeGroup - * of dn. - */ - private T chooseCandidateOnSameNodeGroup( - BalancerDatanode dn, Iterator candidates) { - if (dn.isMoveQuotaFull()) { + /** Choose a candidate for the given datanode. */ + private + C chooseCandidate(D dn, Iterator candidates, Matcher matcher) { + if (dn.hasSpaceForScheduling()) { for(; candidates.hasNext(); ) { - final T c = candidates.next(); - if (!c.isMoveQuotaFull()) { + final C c = candidates.next(); + if (!c.hasSpaceForScheduling()) { candidates.remove(); - continue; - } - if (cluster.isOnSameNodeGroup(dn.getDatanode(), c.getDatanode())) { + } else if (matcher.match(cluster, dn.getDatanode(), c.getDatanode())) { return c; } } @@ -1049,148 +1074,6 @@ public class Balancer { return null; } - /* if onRack is true, decide all pairs - * where source and target are on the same rack; Otherwise - * decide all pairs where source and target are - * on different racks - */ - private void chooseNodes(boolean onRack) { - /* first step: match each overUtilized datanode (source) to - * one or more underUtilized datanodes (targets). - */ - chooseTargets(underUtilizedDatanodes, onRack); - - /* match each remaining overutilized datanode (source) to - * below average utilized datanodes (targets). - * Note only overutilized datanodes that haven't had that max bytes to move - * satisfied in step 1 are selected - */ - chooseTargets(belowAvgUtilizedDatanodes, onRack); - - /* match each remaining underutilized datanode (target) to - * above average utilized datanodes (source). - * Note only underutilized datanodes that have not had that max bytes to - * move satisfied in step 1 are selected. - */ - chooseSources(aboveAvgUtilizedDatanodes, onRack); - } - - /* choose targets from the target candidate list for each over utilized - * source datanode. OnRackTarget determines if the chosen target - * should be on the same rack as the source - */ - private void chooseTargets( - Collection targetCandidates, boolean onRackTarget ) { - for (Iterator srcIterator = overUtilizedDatanodes.iterator(); - srcIterator.hasNext();) { - Source source = srcIterator.next(); - while (chooseTarget(source, targetCandidates.iterator(), onRackTarget)) { - } - if (!source.isMoveQuotaFull()) { - srcIterator.remove(); - } - } - return; - } - - /* choose sources from the source candidate list for each under utilized - * target datanode. onRackSource determines if the chosen source - * should be on the same rack as the target - */ - private void chooseSources( - Collection sourceCandidates, boolean onRackSource) { - for (Iterator targetIterator = - underUtilizedDatanodes.iterator(); targetIterator.hasNext();) { - BalancerDatanode target = targetIterator.next(); - while (chooseSource(target, sourceCandidates.iterator(), onRackSource)) { - } - if (!target.isMoveQuotaFull()) { - targetIterator.remove(); - } - } - return; - } - - /* For the given source, choose targets from the target candidate list. - * OnRackTarget determines if the chosen target - * should be on the same rack as the source - */ - private boolean chooseTarget(Source source, - Iterator targetCandidates, boolean onRackTarget) { - if (!source.isMoveQuotaFull()) { - return false; - } - boolean foundTarget = false; - BalancerDatanode target = null; - while (!foundTarget && targetCandidates.hasNext()) { - target = targetCandidates.next(); - if (!target.isMoveQuotaFull()) { - targetCandidates.remove(); - continue; - } - if (onRackTarget) { - // choose from on-rack nodes - if (cluster.isOnSameRack(source.datanode, target.datanode)) { - foundTarget = true; - } - } else { - // choose from off-rack nodes - if (!cluster.isOnSameRack(source.datanode, target.datanode)) { - foundTarget = true; - } - } - } - if (foundTarget) { - assert(target != null):"Choose a null target"; - matchSourceWithTargetToMove(source, target); - if (!target.isMoveQuotaFull()) { - targetCandidates.remove(); - } - return true; - } - return false; - } - - /* For the given target, choose sources from the source candidate list. - * OnRackSource determines if the chosen source - * should be on the same rack as the target - */ - private boolean chooseSource(BalancerDatanode target, - Iterator sourceCandidates, boolean onRackSource) { - if (!target.isMoveQuotaFull()) { - return false; - } - boolean foundSource = false; - Source source = null; - while (!foundSource && sourceCandidates.hasNext()) { - source = sourceCandidates.next(); - if (!source.isMoveQuotaFull()) { - sourceCandidates.remove(); - continue; - } - if (onRackSource) { - // choose from on-rack nodes - if ( cluster.isOnSameRack(source.getDatanode(), target.getDatanode())) { - foundSource = true; - } - } else { - // choose from off-rack nodes - if (!cluster.isOnSameRack(source.datanode, target.datanode)) { - foundSource = true; - } - } - } - if (foundSource) { - assert(source != null):"Choose a null source"; - matchSourceWithTargetToMove(source, target); - if ( !source.isMoveQuotaFull()) { - sourceCandidates.remove(); - } - return true; - } - return false; - } - private static class BytesMoved { private long bytesMoved = 0L;; private synchronized void inc( long bytes ) { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java index f976c996153..8383dc27e8a 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyDefault.java @@ -152,8 +152,9 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy { List results = new ArrayList(chosenNodes); - for (Node node:chosenNodes) { - excludedNodes.put(node, node); + for (DatanodeDescriptor node:chosenNodes) { + // add localMachine and related nodes to excludedNodes + addToExcludedNodes(node, excludedNodes); adjustExcludedNodes(excludedNodes, node); } @@ -235,7 +236,7 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy { + totalReplicasExpected + "\n" + e.getMessage()); if (avoidStaleNodes) { - // ecxludedNodes now has - initial excludedNodes, any nodes that were + // excludedNodes now has - initial excludedNodes, any nodes that were // chosen and nodes that were tried but were not chosen because they // were stale, decommissioned or for any other reason a node is not // chosen for write. Retry again now not avoiding stale node @@ -273,6 +274,8 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy { if (isGoodTarget(localMachine, blocksize, maxNodesPerRack, false, results, avoidStaleNodes)) { results.add(localMachine); + // add localMachine and related nodes to excludedNode + addToExcludedNodes(localMachine, excludedNodes); return localMachine; } } @@ -281,7 +284,19 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy { return chooseLocalRack(localMachine, excludedNodes, blocksize, maxNodesPerRack, results, avoidStaleNodes); } - + + /** + * Add localMachine and related nodes to excludedNodes + * for next replica choosing. In sub class, we can add more nodes within + * the same failure domain of localMachine + * @return number of new excluded nodes + */ + protected int addToExcludedNodes(DatanodeDescriptor localMachine, + HashMap excludedNodes) { + Node node = excludedNodes.put(localMachine, localMachine); + return node == null?1:0; + } + /* choose one node from the rack that localMachine is on. * if no such node is available, choose one node from the rack where * a second replica is on. @@ -392,6 +407,8 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy { if (isGoodTarget(chosenNode, blocksize, maxNodesPerRack, results, avoidStaleNodes)) { results.add(chosenNode); + // add chosenNode and related nodes to excludedNode + addToExcludedNodes(chosenNode, excludedNodes); adjustExcludedNodes(excludedNodes, chosenNode); return chosenNode; } else { @@ -441,6 +458,9 @@ public class BlockPlacementPolicyDefault extends BlockPlacementPolicy { maxNodesPerRack, results, avoidStaleNodes)) { numOfReplicas--; results.add(chosenNode); + // add chosenNode and related nodes to excludedNode + int newExcludedNodes = addToExcludedNodes(chosenNode, excludedNodes); + numOfAvailableNodes -= newExcludedNodes; adjustExcludedNodes(excludedNodes, chosenNode); } else { badTarget = true; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyWithNodeGroup.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyWithNodeGroup.java index c575fa8e115..643d2b401cd 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyWithNodeGroup.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/blockmanagement/BlockPlacementPolicyWithNodeGroup.java @@ -240,6 +240,27 @@ public class BlockPlacementPolicyWithNodeGroup extends BlockPlacementPolicyDefau String nodeGroupString = cur.getNetworkLocation(); return NetworkTopology.getFirstHalf(nodeGroupString); } + + /** + * Find other nodes in the same nodegroup of localMachine and add them + * into excludeNodes as replica should not be duplicated for nodes + * within the same nodegroup + * @return number of new excluded nodes + */ + protected int addToExcludedNodes(DatanodeDescriptor localMachine, + HashMap excludedNodes) { + int countOfExcludedNodes = 0; + String nodeGroupScope = localMachine.getNetworkLocation(); + List leafNodes = clusterMap.getLeaves(nodeGroupScope); + for (Node leafNode : leafNodes) { + Node node = excludedNodes.put(leafNode, leafNode); + if (node == null) { + // not a existing node in excludedNodes + countOfExcludedNodes++; + } + } + return countOfExcludedNodes; + } /** * Pick up replica node set for deleting replica as over-replicated. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java index acd8e9ce51d..c170cf9edda 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPOfferService.java @@ -26,6 +26,7 @@ import java.util.concurrent.CopyOnWriteArrayList; import org.apache.commons.logging.Log; import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; @@ -411,7 +412,7 @@ class BPOfferService { final long txid = nnHaState.getTxId(); final boolean nnClaimsActive = - nnHaState.getState() == NNHAStatusHeartbeat.State.ACTIVE; + nnHaState.getState() == HAServiceState.ACTIVE; final boolean bposThinksActive = bpServiceToActive == actor; final boolean isMoreRecentClaim = txid > lastActiveClaimTxId; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AuditLogger.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AuditLogger.java new file mode 100644 index 00000000000..614eb63d055 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/AuditLogger.java @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hdfs.server.namenode; + +import java.net.InetAddress; +import java.security.Principal; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; + +/** + * Interface defining an audit logger. + */ +@InterfaceAudience.Public +@InterfaceStability.Evolving +public interface AuditLogger { + + /** + * Called during initialization of the logger. + * + * @param conf The configuration object. + */ + void initialize(Configuration conf); + + /** + * Called to log an audit event. + *

+ * This method must return as quickly as possible, since it's called + * in a critical section of the NameNode's operation. + * + * @param succeeded Whether authorization succeeded. + * @param userName Name of the user executing the request. + * @param addr Remote address of the request. + * @param cmd The requested command. + * @param src Path of affected source file. + * @param dst Path of affected destination file (if any). + * @param stat File information for operations that change the file's + * metadata (permissions, owner, times, etc). + */ + void logAuditEvent(boolean succeeded, String userName, + InetAddress addr, String cmd, String src, String dst, + FileStatus stat); + +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java index dd273a2187c..077a9536903 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupNode.java @@ -24,6 +24,7 @@ import java.net.SocketTimeoutException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.CommonConfigurationKeys; +import org.apache.hadoop.ha.ServiceFailedException; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.NameNodeProxies; @@ -35,6 +36,7 @@ import org.apache.hadoop.hdfs.protocolPB.JournalProtocolPB; import org.apache.hadoop.hdfs.protocolPB.JournalProtocolServerSideTranslatorPB; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.NamenodeRole; import org.apache.hadoop.hdfs.server.common.Storage; +import org.apache.hadoop.hdfs.server.namenode.ha.HAState; import org.apache.hadoop.hdfs.server.protocol.FenceResponse; import org.apache.hadoop.hdfs.server.protocol.JournalInfo; import org.apache.hadoop.hdfs.server.protocol.JournalProtocol; @@ -414,14 +416,23 @@ public class BackupNode extends NameNode { + HdfsConstants.LAYOUT_VERSION + " actual "+ nsInfo.getLayoutVersion(); return nsInfo; } - + @Override + protected String getNameServiceId(Configuration conf) { + return DFSUtil.getBackupNameServiceId(conf); + } + + protected HAState createHAState() { + return new BackupState(); + } + + @Override // NameNode protected NameNodeHAContext createHAContext() { return new BNHAContext(); } - + private class BNHAContext extends NameNodeHAContext { - @Override // NameNode + @Override // NameNodeHAContext public void checkOperation(OperationCategory op) throws StandbyException { if (op == OperationCategory.UNCHECKED || @@ -435,10 +446,42 @@ public class BackupNode extends NameNode { throw new StandbyException(msg); } } - } - - @Override - protected String getNameServiceId(Configuration conf) { - return DFSUtil.getBackupNameServiceId(conf); + + @Override // NameNodeHAContext + public void prepareToStopStandbyServices() throws ServiceFailedException { + } + + /** + * Start services for BackupNode. + *

+ * The following services should be muted + * (not run or not pass any control commands to DataNodes) + * on BackupNode: + * {@link LeaseManager.Monitor} protected by SafeMode. + * {@link BlockManager.ReplicationMonitor} protected by SafeMode. + * {@link HeartbeatManager.Monitor} protected by SafeMode. + * {@link DecommissionManager.Monitor} need to prohibit refreshNodes(). + * {@link PendingReplicationBlocks.PendingReplicationMonitor} harmless, + * because ReplicationMonitor is muted. + */ + @Override + public void startActiveServices() throws IOException { + try { + namesystem.startActiveServices(); + } catch (Throwable t) { + doImmediateShutdown(t); + } + } + + @Override + public void stopActiveServices() throws IOException { + try { + if (namesystem != null) { + namesystem.stopActiveServices(); + } + } catch (Throwable t) { + doImmediateShutdown(t); + } + } } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupState.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupState.java new file mode 100644 index 00000000000..ce11fc9e687 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/BackupState.java @@ -0,0 +1,53 @@ +package org.apache.hadoop.hdfs.server.namenode; + +import java.io.IOException; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; +import org.apache.hadoop.ha.ServiceFailedException; +import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory; +import org.apache.hadoop.hdfs.server.namenode.ha.HAContext; +import org.apache.hadoop.hdfs.server.namenode.ha.HAState; +import org.apache.hadoop.ipc.StandbyException; + +@InterfaceAudience.Private +public class BackupState extends HAState { + + public BackupState() { + super(HAServiceState.STANDBY); + } + + @Override // HAState + public void checkOperation(HAContext context, OperationCategory op) + throws StandbyException { + context.checkOperation(op); + } + + @Override // HAState + public boolean shouldPopulateReplQueues() { + return false; + } + + @Override // HAState + public void enterState(HAContext context) throws ServiceFailedException { + try { + context.startActiveServices(); + } catch (IOException e) { + throw new ServiceFailedException("Failed to start backup services", e); + } + } + + @Override // HAState + public void exitState(HAContext context) throws ServiceFailedException { + try { + context.stopActiveServices(); + } catch (IOException e) { + throw new ServiceFailedException("Failed to stop backup services", e); + } + } + + @Override // HAState + public void prepareToExitState(HAContext context) throws ServiceFailedException { + context.prepareToStopStandbyServices(); + } +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java index affa4227c4a..6403955d4e5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSDirectory.java @@ -587,6 +587,8 @@ public class FSDirectory implements Closeable { // update modification time of dst and the parent of src srcInodes[srcInodes.length-2].setModificationTime(timestamp); dstInodes[dstInodes.length-2].setModificationTime(timestamp); + // update moved leases with new filename + getFSNamesystem().unprotectedChangeLease(src, dst); return true; } } finally { @@ -752,6 +754,8 @@ public class FSDirectory implements Closeable { } srcInodes[srcInodes.length - 2].setModificationTime(timestamp); dstInodes[dstInodes.length - 2].setModificationTime(timestamp); + // update moved lease with new filename + getFSNamesystem().unprotectedChangeLease(src, dst); // Collect the blocks and remove the lease for previous dst int filesDeleted = 0; @@ -1204,20 +1208,39 @@ public class FSDirectory implements Closeable { ) throws IOException { writeLock(); try { - replaceINodeUnsynced(path, oldnode, newnode); - - //Currently, oldnode and newnode are assumed to contain the same blocks. - //Otherwise, blocks need to be removed from the blocksMap. - int index = 0; - for (BlockInfo b : newnode.getBlocks()) { - BlockInfo info = getBlockManager().addBlockCollection(b, newnode); - newnode.setBlock(index, info); // inode refers to the block in BlocksMap - index++; - } + unprotectedReplaceNode(path, oldnode, newnode); } finally { writeUnlock(); } } + + void unprotectedReplaceNode(String path, INodeFile oldnode, INodeFile newnode) + throws IOException, UnresolvedLinkException { + assert hasWriteLock(); + INodeDirectory parent = oldnode.parent; + // Remove the node from the namespace + if (!oldnode.removeNode()) { + NameNode.stateChangeLog.warn("DIR* FSDirectory.replaceNode: " + + "failed to remove " + path); + throw new IOException("FSDirectory.replaceNode: " + + "failed to remove " + path); + } + + // Parent should be non-null, otherwise oldnode.removeNode() will return + // false + newnode.setLocalName(oldnode.getLocalNameBytes()); + parent.addChild(newnode, true); + + /* Currently oldnode and newnode are assumed to contain the same + * blocks. Otherwise, blocks need to be removed from the blocksMap. + */ + int index = 0; + for (BlockInfo b : newnode.getBlocks()) { + BlockInfo info = getBlockManager().addBlockCollection(b, newnode); + newnode.setBlock(index, info); // inode refers to the block in BlocksMap + index++; + } + } /** * Get a partial listing of the indicated directory diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java index 0bdc6238b88..157887ee044 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLog.java @@ -914,6 +914,11 @@ public class FSEditLog implements LogsPurgeable { return journalSet; } + @VisibleForTesting + synchronized void setJournalSetForTesting(JournalSet js) { + this.journalSet = js; + } + /** * Used only by tests. */ @@ -1067,9 +1072,18 @@ public class FSEditLog implements LogsPurgeable { /** * Archive any log files that are older than the given txid. + * + * If the edit log is not open for write, then this call returns with no + * effect. */ @Override public synchronized void purgeLogsOlderThan(final long minTxIdToKeep) { + // Should not purge logs unless they are open for write. + // This prevents the SBN from purging logs on shared storage, for example. + if (!isOpenForWrite()) { + return; + } + assert curSegmentTxId == HdfsConstants.INVALID_TXID || // on format this is no-op minTxIdToKeep <= curSegmentTxId : "cannot purge logs older than txid " + minTxIdToKeep + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java index 1916348d342..5b5d761a253 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSEditLogLoader.java @@ -31,7 +31,6 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.HdfsConstants; -import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; import org.apache.hadoop.hdfs.protocol.LayoutVersion; import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfoUnderConstruction; @@ -322,7 +321,7 @@ public class FSEditLogLoader { INodeFileUnderConstruction ucFile = (INodeFileUnderConstruction) oldFile; fsNamesys.leaseManager.removeLeaseWithPrefixPath(addCloseOp.path); INodeFile newFile = ucFile.convertToInodeFile(); - fsDir.replaceNode(addCloseOp.path, ucFile, newFile); + fsDir.unprotectedReplaceNode(addCloseOp.path, ucFile, newFile); } break; } @@ -360,10 +359,8 @@ public class FSEditLogLoader { } case OP_RENAME_OLD: { RenameOldOp renameOp = (RenameOldOp)op; - HdfsFileStatus dinfo = fsDir.getFileInfo(renameOp.dst, false); fsDir.unprotectedRenameTo(renameOp.src, renameOp.dst, renameOp.timestamp); - fsNamesys.unprotectedChangeLease(renameOp.src, renameOp.dst, dinfo); break; } case OP_DELETE: { @@ -433,11 +430,8 @@ public class FSEditLogLoader { } case OP_RENAME: { RenameOp renameOp = (RenameOp)op; - - HdfsFileStatus dinfo = fsDir.getFileInfo(renameOp.dst, false); fsDir.unprotectedRenameTo(renameOp.src, renameOp.dst, renameOp.timestamp, renameOp.options); - fsNamesys.unprotectedChangeLease(renameOp.src, renameOp.dst, dinfo); break; } case OP_GET_DELEGATION_TOKEN: { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java index bb761be2fda..144d01d1e6b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSImageSerialization.java @@ -197,7 +197,7 @@ public class FSImageSerialization { public static String readString(DataInputStream in) throws IOException { DeprecatedUTF8 ustr = TL_DATA.get().U_STR; ustr.readFields(in); - return ustr.toString(); + return ustr.toStringChecked(); } static String readString_EmptyAsNull(DataInputStream in) throws IOException { diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java index 8db01072c03..dd03cd15eff 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/FSNamesystem.java @@ -34,6 +34,8 @@ import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_ENCRYPT_DATA_TRANSFER_KEY import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_HA_STANDBY_CHECKPOINTS_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_ACCESSTIME_PRECISION_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOGGERS_KEY; +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_KEY_UPDATE_INTERVAL_KEY; import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_DEFAULT; @@ -111,6 +113,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.ContentSummary; import org.apache.hadoop.fs.CreateFlag; import org.apache.hadoop.fs.FileAlreadyExistsException; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FsServerDefaults; import org.apache.hadoop.fs.InvalidPathException; import org.apache.hadoop.fs.Options; @@ -121,6 +124,7 @@ import org.apache.hadoop.fs.UnresolvedLinkException; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.permission.PermissionStatus; +import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; import org.apache.hadoop.ha.ServiceFailedException; import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.HAUtil; @@ -165,12 +169,10 @@ import org.apache.hadoop.hdfs.server.namenode.INode.BlocksMapUpdateInfo; import org.apache.hadoop.hdfs.server.namenode.INodeDirectory.INodesInPath; import org.apache.hadoop.hdfs.server.namenode.LeaseManager.Lease; import org.apache.hadoop.hdfs.server.namenode.NameNode.OperationCategory; -import org.apache.hadoop.hdfs.server.namenode.ha.ActiveState; import org.apache.hadoop.hdfs.server.namenode.ha.EditLogTailer; import org.apache.hadoop.hdfs.server.namenode.ha.HAContext; import org.apache.hadoop.hdfs.server.namenode.ha.HAState; import org.apache.hadoop.hdfs.server.namenode.ha.StandbyCheckpointer; -import org.apache.hadoop.hdfs.server.namenode.ha.StandbyState; import org.apache.hadoop.hdfs.server.namenode.metrics.FSNamesystemMBean; import org.apache.hadoop.hdfs.server.namenode.metrics.NameNodeMetrics; import org.apache.hadoop.hdfs.server.namenode.snapshot.INodeFileWithLink; @@ -250,32 +252,32 @@ public class FSNamesystem implements Namesystem, FSClusterStats, } }; - private static final void logAuditEvent(UserGroupInformation ugi, + private boolean isAuditEnabled() { + return !isDefaultAuditLogger || auditLog.isInfoEnabled(); + } + + private void logAuditEvent(UserGroupInformation ugi, InetAddress addr, String cmd, String src, String dst, HdfsFileStatus stat) { logAuditEvent(true, ugi, addr, cmd, src, dst, stat); } - private static final void logAuditEvent(boolean succeeded, + private void logAuditEvent(boolean succeeded, UserGroupInformation ugi, InetAddress addr, String cmd, String src, String dst, HdfsFileStatus stat) { - final StringBuilder sb = auditBuffer.get(); - sb.setLength(0); - sb.append("allowed=").append(succeeded).append("\t"); - sb.append("ugi=").append(ugi).append("\t"); - sb.append("ip=").append(addr).append("\t"); - sb.append("cmd=").append(cmd).append("\t"); - sb.append("src=").append(src).append("\t"); - sb.append("dst=").append(dst).append("\t"); - if (null == stat) { - sb.append("perm=null"); - } else { - sb.append("perm="); - sb.append(stat.getOwner()).append(":"); - sb.append(stat.getGroup()).append(":"); - sb.append(stat.getPermission()); + FileStatus status = null; + if (stat != null) { + Path symlink = stat.isSymlink() ? new Path(stat.getSymlink()) : null; + Path path = dst != null ? new Path(dst) : new Path(src); + status = new FileStatus(stat.getLen(), stat.isDir(), + stat.getReplication(), stat.getBlockSize(), stat.getModificationTime(), + stat.getAccessTime(), stat.getPermission(), stat.getOwner(), + stat.getGroup(), symlink, path); + } + for (AuditLogger logger : auditLoggers) { + logger.logAuditEvent(succeeded, ugi.toString(), addr, + cmd, src, dst, status); } - auditLog.info(sb); } /** @@ -308,6 +310,11 @@ public class FSNamesystem implements Namesystem, FSClusterStats, final DelegationTokenSecretManager dtSecretManager; private final boolean alwaysUseDelegationTokensForTests; + // Tracks whether the default audit logger is the only configured audit + // logger; this allows isAuditEnabled() to return false in case the + // underlying logger is disabled, and avoid some unnecessary work. + private final boolean isDefaultAuditLogger; + private final List auditLoggers; /** The namespace tree. */ FSDirectory dir; @@ -542,14 +549,50 @@ public class FSNamesystem implements Namesystem, FSClusterStats, this.dir = new FSDirectory(fsImage, this, conf); this.snapshotManager = new SnapshotManager(this, dir); this.safeMode = new SafeModeInfo(conf); - + this.auditLoggers = initAuditLoggers(conf); + this.isDefaultAuditLogger = auditLoggers.size() == 1 && + auditLoggers.get(0) instanceof DefaultAuditLogger; } catch(IOException e) { LOG.error(getClass().getSimpleName() + " initialization failed.", e); close(); throw e; + } catch (RuntimeException re) { + LOG.error(getClass().getSimpleName() + " initialization failed.", re); + close(); + throw re; } } + private List initAuditLoggers(Configuration conf) { + // Initialize the custom access loggers if configured. + Collection alClasses = conf.getStringCollection(DFS_NAMENODE_AUDIT_LOGGERS_KEY); + List auditLoggers = Lists.newArrayList(); + if (alClasses != null && !alClasses.isEmpty()) { + for (String className : alClasses) { + try { + AuditLogger logger; + if (DFS_NAMENODE_DEFAULT_AUDIT_LOGGER_NAME.equals(className)) { + logger = new DefaultAuditLogger(); + } else { + logger = (AuditLogger) Class.forName(className).newInstance(); + } + logger.initialize(conf); + auditLoggers.add(logger); + } catch (RuntimeException re) { + throw re; + } catch (Exception e) { + throw new RuntimeException(e); + } + } + } + + // Make sure there is at least one logger installed. + if (auditLoggers.isEmpty()) { + auditLoggers.add(new DefaultAuditLogger()); + } + return auditLoggers; + } + void loadFSImage(StartupOption startOpt, FSImage fsImage, boolean haEnabled) throws IOException { // format before starting up if requested @@ -1009,8 +1052,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats, // start in active. return haEnabled; } - - return haContext.getState() instanceof StandbyState; + + return HAServiceState.STANDBY == haContext.getState().getServiceState(); } /** @@ -1036,7 +1079,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats, long totalInodes = this.dir.totalInodes(); long totalBlocks = this.getBlocksTotal(); out.println(totalInodes + " files and directories, " + totalBlocks - + " blocks = " + (totalInodes + totalBlocks) + " total"); + + " blocks = " + (totalInodes + totalBlocks) + + " total filesystem objects"); blockManager.metaSave(out); } @@ -1082,7 +1126,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, try { setPermissionInt(src, permission); } catch (AccessControlException e) { - if (auditLog.isInfoEnabled() && isExternalInvocation()) { + if (isAuditEnabled() && isExternalInvocation()) { logAuditEvent(false, UserGroupInformation.getCurrentUser(), getRemoteIp(), "setPermission", src, null, null); @@ -1104,14 +1148,14 @@ public class FSNamesystem implements Namesystem, FSClusterStats, } checkOwner(src); dir.setPermission(src, permission); - if (auditLog.isInfoEnabled() && isExternalInvocation()) { + if (isAuditEnabled() && isExternalInvocation()) { resultingStat = dir.getFileInfo(src, false); } } finally { writeUnlock(); } getEditLog().logSync(); - if (auditLog.isInfoEnabled() && isExternalInvocation()) { + if (isAuditEnabled() && isExternalInvocation()) { logAuditEvent(UserGroupInformation.getCurrentUser(), getRemoteIp(), "setPermission", src, null, resultingStat); @@ -1128,7 +1172,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, try { setOwnerInt(src, username, group); } catch (AccessControlException e) { - if (auditLog.isInfoEnabled() && isExternalInvocation()) { + if (isAuditEnabled() && isExternalInvocation()) { logAuditEvent(false, UserGroupInformation.getCurrentUser(), getRemoteIp(), "setOwner", src, null, null); @@ -1159,14 +1203,14 @@ public class FSNamesystem implements Namesystem, FSClusterStats, } } dir.setOwner(src, username, group); - if (auditLog.isInfoEnabled() && isExternalInvocation()) { + if (isAuditEnabled() && isExternalInvocation()) { resultingStat = dir.getFileInfo(src, false); } } finally { writeUnlock(); } getEditLog().logSync(); - if (auditLog.isInfoEnabled() && isExternalInvocation()) { + if (isAuditEnabled() && isExternalInvocation()) { logAuditEvent(UserGroupInformation.getCurrentUser(), getRemoteIp(), "setOwner", src, null, resultingStat); @@ -1209,7 +1253,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, return getBlockLocationsInt(src, offset, length, doAccessTime, needBlockToken, checkSafeMode); } catch (AccessControlException e) { - if (auditLog.isInfoEnabled() && isExternalInvocation()) { + if (isAuditEnabled() && isExternalInvocation()) { logAuditEvent(false, UserGroupInformation.getCurrentUser(), getRemoteIp(), "open", src, null, null); @@ -1235,7 +1279,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, } final LocatedBlocks ret = getBlockLocationsUpdateTimes(src, offset, length, doAccessTime, needBlockToken); - if (auditLog.isInfoEnabled() && isExternalInvocation()) { + if (isAuditEnabled() && isExternalInvocation()) { logAuditEvent(UserGroupInformation.getCurrentUser(), getRemoteIp(), "open", src, null, null); @@ -1316,7 +1360,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, try { concatInt(target, srcs); } catch (AccessControlException e) { - if (auditLog.isInfoEnabled() && isExternalInvocation()) { + if (isAuditEnabled() && isExternalInvocation()) { logAuditEvent(false, UserGroupInformation.getLoginUser(), getRemoteIp(), "concat", Arrays.toString(srcs), target, null); @@ -1359,14 +1403,14 @@ public class FSNamesystem implements Namesystem, FSClusterStats, throw new SafeModeException("Cannot concat " + target, safeMode); } concatInternal(target, srcs); - if (auditLog.isInfoEnabled() && isExternalInvocation()) { + if (isAuditEnabled() && isExternalInvocation()) { resultingStat = dir.getFileInfo(target, false); } } finally { writeUnlock(); } getEditLog().logSync(); - if (auditLog.isInfoEnabled() && isExternalInvocation()) { + if (isAuditEnabled() && isExternalInvocation()) { logAuditEvent(UserGroupInformation.getLoginUser(), getRemoteIp(), "concat", Arrays.toString(srcs), target, resultingStat); @@ -1492,7 +1536,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, try { setTimesInt(src, mtime, atime); } catch (AccessControlException e) { - if (auditLog.isInfoEnabled() && isExternalInvocation()) { + if (isAuditEnabled() && isExternalInvocation()) { logAuditEvent(false, UserGroupInformation.getCurrentUser(), getRemoteIp(), "setTimes", src, null, null); @@ -1518,7 +1562,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, INode inode = dir.getMutableINode(src); if (inode != null) { dir.setTimes(src, inode, mtime, atime, true); - if (auditLog.isInfoEnabled() && isExternalInvocation()) { + if (isAuditEnabled() && isExternalInvocation()) { final HdfsFileStatus stat = dir.getFileInfo(src, false); logAuditEvent(UserGroupInformation.getCurrentUser(), getRemoteIp(), @@ -1541,7 +1585,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, try { createSymlinkInt(target, link, dirPerms, createParent); } catch (AccessControlException e) { - if (auditLog.isInfoEnabled() && isExternalInvocation()) { + if (isAuditEnabled() && isExternalInvocation()) { logAuditEvent(false, UserGroupInformation.getCurrentUser(), getRemoteIp(), "createSymlink", link, target, null); @@ -1562,14 +1606,14 @@ public class FSNamesystem implements Namesystem, FSClusterStats, verifyParentDir(link); } createSymlinkInternal(target, link, dirPerms, createParent); - if (auditLog.isInfoEnabled() && isExternalInvocation()) { + if (isAuditEnabled() && isExternalInvocation()) { resultingStat = dir.getFileInfo(link, false); } } finally { writeUnlock(); } getEditLog().logSync(); - if (auditLog.isInfoEnabled() && isExternalInvocation()) { + if (isAuditEnabled() && isExternalInvocation()) { logAuditEvent(UserGroupInformation.getCurrentUser(), getRemoteIp(), "createSymlink", link, target, resultingStat); @@ -1625,7 +1669,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, try { return setReplicationInt(src, replication); } catch (AccessControlException e) { - if (auditLog.isInfoEnabled() && isExternalInvocation()) { + if (isAuditEnabled() && isExternalInvocation()) { logAuditEvent(false, UserGroupInformation.getCurrentUser(), getRemoteIp(), "setReplication", src, null, null); @@ -1661,7 +1705,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, } getEditLog().logSync(); - if (isFile && auditLog.isInfoEnabled() && isExternalInvocation()) { + if (isFile && isAuditEnabled() && isExternalInvocation()) { logAuditEvent(UserGroupInformation.getCurrentUser(), getRemoteIp(), "setReplication", src, null, null); @@ -1717,7 +1761,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, startFileInt(src, permissions, holder, clientMachine, flag, createParent, replication, blockSize); } catch (AccessControlException e) { - if (auditLog.isInfoEnabled() && isExternalInvocation()) { + if (isAuditEnabled() && isExternalInvocation()) { logAuditEvent(false, UserGroupInformation.getCurrentUser(), getRemoteIp(), "create", src, null, null); @@ -1750,7 +1794,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, } } - if (auditLog.isInfoEnabled() && isExternalInvocation()) { + if (isAuditEnabled() && isExternalInvocation()) { final HdfsFileStatus stat = dir.getFileInfo(src, false); logAuditEvent(UserGroupInformation.getCurrentUser(), getRemoteIp(), @@ -2052,7 +2096,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, try { return appendFileInt(src, holder, clientMachine); } catch (AccessControlException e) { - if (auditLog.isInfoEnabled() && isExternalInvocation()) { + if (isAuditEnabled() && isExternalInvocation()) { logAuditEvent(false, UserGroupInformation.getCurrentUser(), getRemoteIp(), "append", src, null, null); @@ -2098,7 +2142,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, +" block size " + lb.getBlock().getNumBytes()); } } - if (auditLog.isInfoEnabled() && isExternalInvocation()) { + if (isAuditEnabled() && isExternalInvocation()) { logAuditEvent(UserGroupInformation.getCurrentUser(), getRemoteIp(), "append", src, null, null); @@ -2544,7 +2588,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, try { return renameToInt(src, dst); } catch (AccessControlException e) { - if (auditLog.isInfoEnabled() && isExternalInvocation()) { + if (isAuditEnabled() && isExternalInvocation()) { logAuditEvent(false, UserGroupInformation.getCurrentUser(), getRemoteIp(), "rename", src, dst, null); @@ -2566,14 +2610,14 @@ public class FSNamesystem implements Namesystem, FSClusterStats, checkOperation(OperationCategory.WRITE); status = renameToInternal(src, dst); - if (status && auditLog.isInfoEnabled() && isExternalInvocation()) { + if (status && isAuditEnabled() && isExternalInvocation()) { resultingStat = dir.getFileInfo(dst, false); } } finally { writeUnlock(); } getEditLog().logSync(); - if (status && auditLog.isInfoEnabled() && isExternalInvocation()) { + if (status && isAuditEnabled() && isExternalInvocation()) { logAuditEvent(UserGroupInformation.getCurrentUser(), getRemoteIp(), "rename", src, dst, resultingStat); @@ -2595,15 +2639,15 @@ public class FSNamesystem implements Namesystem, FSClusterStats, if (isPermissionEnabled) { //We should not be doing this. This is move() not renameTo(). //but for now, + //NOTE: yes, this is bad! it's assuming much lower level behavior + // of rewriting the dst String actualdst = dir.isDir(dst)? dst + Path.SEPARATOR + new Path(src).getName(): dst; checkParentAccess(src, FsAction.WRITE); checkAncestorAccess(actualdst, FsAction.WRITE); } - HdfsFileStatus dinfo = dir.getFileInfo(dst, false); if (dir.renameTo(src, dst)) { - unprotectedChangeLease(src, dst, dinfo); // update lease with new filename return true; } return false; @@ -2623,14 +2667,14 @@ public class FSNamesystem implements Namesystem, FSClusterStats, checkOperation(OperationCategory.WRITE); renameToInternal(src, dst, options); - if (auditLog.isInfoEnabled() && isExternalInvocation()) { + if (isAuditEnabled() && isExternalInvocation()) { resultingStat = dir.getFileInfo(dst, false); } } finally { writeUnlock(); } getEditLog().logSync(); - if (auditLog.isInfoEnabled() && isExternalInvocation()) { + if (isAuditEnabled() && isExternalInvocation()) { StringBuilder cmd = new StringBuilder("rename options="); for (Rename option : options) { cmd.append(option.value()).append(" "); @@ -2654,9 +2698,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, checkAncestorAccess(dst, FsAction.WRITE); } - HdfsFileStatus dinfo = dir.getFileInfo(dst, false); dir.renameTo(src, dst, options); - unprotectedChangeLease(src, dst, dinfo); // update lease with new filename } /** @@ -2671,7 +2713,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, try { return deleteInt(src, recursive); } catch (AccessControlException e) { - if (auditLog.isInfoEnabled() && isExternalInvocation()) { + if (isAuditEnabled() && isExternalInvocation()) { logAuditEvent(false, UserGroupInformation.getCurrentUser(), getRemoteIp(), "delete", src, null, null); @@ -2687,7 +2729,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, NameNode.stateChangeLog.debug("DIR* NameSystem.delete: " + src); } boolean status = deleteInternal(src, recursive, true); - if (status && auditLog.isInfoEnabled() && isExternalInvocation()) { + if (status && isAuditEnabled() && isExternalInvocation()) { logAuditEvent(UserGroupInformation.getCurrentUser(), getRemoteIp(), "delete", src, null, null); @@ -2885,7 +2927,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, } stat = dir.getFileInfo(src, resolveLink); } catch (AccessControlException e) { - if (auditLog.isInfoEnabled() && isExternalInvocation()) { + if (isAuditEnabled() && isExternalInvocation()) { logAuditEvent(false, UserGroupInformation.getCurrentUser(), getRemoteIp(), "getfileinfo", src, null, null); @@ -2894,7 +2936,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, } finally { readUnlock(); } - if (auditLog.isInfoEnabled() && isExternalInvocation()) { + if (isAuditEnabled() && isExternalInvocation()) { logAuditEvent(UserGroupInformation.getCurrentUser(), getRemoteIp(), "getfileinfo", src, null, null); @@ -2910,7 +2952,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, try { return mkdirsInt(src, permissions, createParent); } catch (AccessControlException e) { - if (auditLog.isInfoEnabled() && isExternalInvocation()) { + if (isAuditEnabled() && isExternalInvocation()) { logAuditEvent(false, UserGroupInformation.getCurrentUser(), getRemoteIp(), "mkdirs", src, null, null); @@ -2934,7 +2976,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, writeUnlock(); } getEditLog().logSync(); - if (status && auditLog.isInfoEnabled() && isExternalInvocation()) { + if (status && isAuditEnabled() && isExternalInvocation()) { final HdfsFileStatus stat = dir.getFileInfo(src, false); logAuditEvent(UserGroupInformation.getCurrentUser(), getRemoteIp(), @@ -3368,7 +3410,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, try { return getListingInt(src, startAfter, needLocation); } catch (AccessControlException e) { - if (auditLog.isInfoEnabled() && isExternalInvocation()) { + if (isAuditEnabled() && isExternalInvocation()) { logAuditEvent(false, UserGroupInformation.getCurrentUser(), getRemoteIp(), "listStatus", src, null, null); @@ -3392,7 +3434,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, checkTraverse(src); } } - if (auditLog.isInfoEnabled() && isExternalInvocation()) { + if (isAuditEnabled() && isExternalInvocation()) { logAuditEvent(UserGroupInformation.getCurrentUser(), getRemoteIp(), "listStatus", src, null, null); @@ -3482,15 +3524,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, private NNHAStatusHeartbeat createHaStatusHeartbeat() { HAState state = haContext.getState(); - NNHAStatusHeartbeat.State hbState; - if (state instanceof ActiveState) { - hbState = NNHAStatusHeartbeat.State.ACTIVE; - } else if (state instanceof StandbyState) { - hbState = NNHAStatusHeartbeat.State.STANDBY; - } else { - throw new AssertionError("Invalid state: " + state.getClass()); - } - return new NNHAStatusHeartbeat(hbState, + return new NNHAStatusHeartbeat(state.getServiceState(), getFSImage().getLastAppliedOrWrittenTxId()); } @@ -3929,7 +3963,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, private synchronized void leave() { // if not done yet, initialize replication queues. // In the standby, do not populate repl queues - if (!isPopulatingReplQueues() && !isInStandbyState()) { + if (!isPopulatingReplQueues() && shouldPopulateReplQueues()) { initializeReplQueues(); } long timeInSafemode = now() - startTime; @@ -3972,7 +4006,8 @@ public class FSNamesystem implements Namesystem, FSClusterStats, * initializing replication queues. */ private synchronized boolean canInitializeReplQueues() { - return !isInStandbyState() && blockSafe >= blockReplQueueThreshold; + return shouldPopulateReplQueues() + && blockSafe >= blockReplQueueThreshold; } /** @@ -4312,7 +4347,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, @Override public boolean isPopulatingReplQueues() { - if (isInStandbyState()) { + if (!shouldPopulateReplQueues()) { return false; } // safeMode is volatile, and may be set to null at any time @@ -4321,7 +4356,13 @@ public class FSNamesystem implements Namesystem, FSClusterStats, return true; return safeMode.isPopulatingReplQueues(); } - + + private boolean shouldPopulateReplQueues() { + if(haContext == null || haContext.getState() == null) + return false; + return haContext.getState().shouldPopulateReplQueues(); + } + @Override public void incrementSafeBlockCount(int replication) { // safeMode is volatile, and may be set to null at any time @@ -4939,31 +4980,9 @@ public class FSNamesystem implements Namesystem, FSClusterStats, // rename was successful. If any part of the renamed subtree had // files that were being written to, update with new filename. - void unprotectedChangeLease(String src, String dst, HdfsFileStatus dinfo) { - String overwrite; - String replaceBy; + void unprotectedChangeLease(String src, String dst) { assert hasWriteLock(); - - boolean destinationExisted = true; - if (dinfo == null) { - destinationExisted = false; - } - - if (destinationExisted && dinfo.isDir()) { - Path spath = new Path(src); - Path parent = spath.getParent(); - if (parent.isRoot()) { - overwrite = parent.toString(); - } else { - overwrite = parent.toString() + Path.SEPARATOR; - } - replaceBy = dst + Path.SEPARATOR; - } else { - overwrite = src; - replaceBy = dst; - } - - leaseManager.changeLease(src, dst, overwrite, replaceBy); + leaseManager.changeLease(src, dst); } /** @@ -4974,19 +4993,13 @@ public class FSNamesystem implements Namesystem, FSClusterStats, // lock on our behalf. If we took the read lock here, we could block // for fairness if a writer is waiting on the lock. synchronized (leaseManager) { - out.writeInt(leaseManager.countPath()); // write the size - - for (Lease lease : leaseManager.getSortedLeases()) { - for(String path : lease.getPaths()) { - // verify that path exists in namespace - final INodeFileUnderConstruction cons; - try { - cons = INodeFileUnderConstruction.valueOf(dir.getINode(path), path); - } catch (UnresolvedLinkException e) { - throw new AssertionError("Lease files should reside on this FS"); - } - FSImageSerialization.writeINodeUnderConstruction(out, cons, path); - } + Map nodes = + leaseManager.getINodesUnderConstruction(); + out.writeInt(nodes.size()); // write the size + for (Map.Entry entry + : nodes.entrySet()) { + FSImageSerialization.writeINodeUnderConstruction( + out, entry.getValue(), entry.getKey()); } } } @@ -5345,7 +5358,7 @@ public class FSNamesystem implements Namesystem, FSClusterStats, * Log fsck event in the audit log */ void logFsckEvent(String src, InetAddress remoteAddress) throws IOException { - if (auditLog.isInfoEnabled()) { + if (isAuditEnabled()) { logAuditEvent(UserGroupInformation.getCurrentUser(), remoteAddress, "fsck", src, null, null); @@ -5729,4 +5742,44 @@ public class FSNamesystem implements Namesystem, FSClusterStats, newSnapshotRoot.toString(), null); } } + + /** + * Default AuditLogger implementation; used when no access logger is + * defined in the config file. It can also be explicitly listed in the + * config file. + */ + private static class DefaultAuditLogger implements AuditLogger { + + @Override + public void initialize(Configuration conf) { + // Nothing to do. + } + + @Override + public void logAuditEvent(boolean succeeded, String userName, + InetAddress addr, String cmd, String src, String dst, + FileStatus status) { + if (auditLog.isInfoEnabled()) { + final StringBuilder sb = auditBuffer.get(); + sb.setLength(0); + sb.append("allowed=").append(succeeded).append("\t"); + sb.append("ugi=").append(userName).append("\t"); + sb.append("ip=").append(addr).append("\t"); + sb.append("cmd=").append(cmd).append("\t"); + sb.append("src=").append(src).append("\t"); + sb.append("dst=").append(dst).append("\t"); + if (null == status) { + sb.append("perm=null"); + } else { + sb.append("perm="); + sb.append(status.getOwner()).append(":"); + sb.append(status.getGroup()).append(":"); + sb.append(status.getPermission()); + } + auditLog.info(sb); + } + } + + } + } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java index 362153efe8d..94d35c3ae33 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/INodeDirectory.java @@ -80,6 +80,11 @@ public class INodeDirectory extends INode { public INodeDirectory(INodeDirectory other) { super(other); this.children = other.children; + if (this.children != null) { + for (INode child : children) { + child.parent = this; + } + } } /** @return true unconditionally. */ @@ -118,6 +123,7 @@ public class INodeDirectory extends INode { final int low = searchChildren(newChild); if (low>=0) { // an old child exists so replace by the newChild + children.get(low).parent = null; children.set(low, newChild); } else { throw new IllegalArgumentException("No child exists to be replaced"); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java index b1764b7d3cb..87d19e5fb61 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/LeaseManager.java @@ -331,22 +331,19 @@ public class LeaseManager { } } - synchronized void changeLease(String src, String dst, - String overwrite, String replaceBy) { + synchronized void changeLease(String src, String dst) { if (LOG.isDebugEnabled()) { LOG.debug(getClass().getSimpleName() + ".changelease: " + - " src=" + src + ", dest=" + dst + - ", overwrite=" + overwrite + - ", replaceBy=" + replaceBy); + " src=" + src + ", dest=" + dst); } - final int len = overwrite.length(); + final int len = src.length(); for(Map.Entry entry : findLeaseWithPrefixPath(src, sortedLeasesByPath).entrySet()) { final String oldpath = entry.getKey(); final Lease lease = entry.getValue(); - //overwrite must be a prefix of oldpath - final String newpath = replaceBy + oldpath.substring(len); + // replace stem of src with new destination + final String newpath = dst + oldpath.substring(len); if (LOG.isDebugEnabled()) { LOG.debug("changeLease: replacing " + oldpath + " with " + newpath); } @@ -429,6 +426,26 @@ public class LeaseManager { } } + /** + * Get the list of inodes corresponding to valid leases. + * @return list of inodes + * @throws UnresolvedLinkException + */ + Map getINodesUnderConstruction() { + Map inodes = + new TreeMap(); + for (String p : sortedLeasesByPath.keySet()) { + // verify that path exists in namespace + try { + INode node = fsnamesystem.dir.getINode(p); + inodes.put(p, INodeFileUnderConstruction.valueOf(node, p)); + } catch (IOException ioe) { + LOG.error(ioe); + } + } + return inodes; + } + /** Check the leases beginning from the oldest. * @return true is sync is needed. */ diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java index 309811c242f..99f804d1bc5 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java @@ -598,11 +598,7 @@ public class NameNode { String nsId = getNameServiceId(conf); String namenodeId = HAUtil.getNameNodeId(conf, nsId); this.haEnabled = HAUtil.isHAEnabled(conf, nsId); - if (!haEnabled) { - state = ACTIVE_STATE; - } else { - state = STANDBY_STATE; - } + state = createHAState(); this.allowStaleStandbyReads = HAUtil.shouldAllowStandbyReads(conf); this.haContext = createHAContext(); try { @@ -619,6 +615,10 @@ public class NameNode { } } + protected HAState createHAState() { + return !haEnabled ? ACTIVE_STATE : STANDBY_STATE; + } + protected HAContext createHAContext() { return new NameNodeHAContext(); } @@ -1050,6 +1050,9 @@ public class NameNode { private static void doRecovery(StartupOption startOpt, Configuration conf) throws IOException { + String nsId = DFSUtil.getNamenodeNameServiceId(conf); + String namenodeId = HAUtil.getNameNodeId(conf, nsId); + initializeGenericKeys(conf, nsId, namenodeId); if (startOpt.getForce() < MetaRecoveryContext.FORCE_ALL) { if (!confirmPrompt("You have selected Metadata Recovery mode. " + "This mode is intended to recover lost metadata on a corrupt " + @@ -1298,7 +1301,7 @@ public class NameNode { * before exit. * @throws ExitException thrown only for testing. */ - private synchronized void doImmediateShutdown(Throwable t) + protected synchronized void doImmediateShutdown(Throwable t) throws ExitException { String message = "Error encountered requiring NN shutdown. " + "Shutting down immediately."; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeJspHelper.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeJspHelper.java index 07ec14e47d4..3a067c8e799 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeJspHelper.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NamenodeJspHelper.java @@ -102,7 +102,7 @@ class NamenodeJspHelper { long usedNonHeap = (totalNonHeap * 100) / commitedNonHeap; String str = "

" + inodes + " files and directories, " + blocks + " blocks = " - + (inodes + blocks) + " total"; + + (inodes + blocks) + " total filesystem objects"; if (maxobjects != 0) { long pct = ((inodes + blocks) * 100) / maxobjects; str += " / " + maxobjects + " (" + pct + "%)"; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NNHAStatusHeartbeat.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NNHAStatusHeartbeat.java index 337a83c2ed5..66ccb3bd79f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NNHAStatusHeartbeat.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/protocol/NNHAStatusHeartbeat.java @@ -19,31 +19,26 @@ package org.apache.hadoop.hdfs.server.protocol; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; import org.apache.hadoop.hdfs.protocol.HdfsConstants; @InterfaceAudience.Private @InterfaceStability.Evolving public class NNHAStatusHeartbeat { - private State state; + private HAServiceState state; private long txid = HdfsConstants.INVALID_TXID; - public NNHAStatusHeartbeat(State state, long txid) { + public NNHAStatusHeartbeat(HAServiceState state, long txid) { this.state = state; this.txid = txid; } - public State getState() { + public HAServiceState getState() { return state; } public long getTxId() { return txid; } - - @InterfaceAudience.Private - public enum State { - ACTIVE, - STANDBY; - } } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/UserParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/UserParam.java index ead8e54882b..36e128feedb 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/UserParam.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/web/resources/UserParam.java @@ -38,10 +38,9 @@ public class UserParam extends StringParam { MessageFormat.format("Parameter [{0}], cannot be NULL", NAME)); } int len = str.length(); - if (len < 1 || len > 31) { + if (len < 1) { throw new IllegalArgumentException(MessageFormat.format( - "Parameter [{0}], invalid value [{1}], it's length must be between 1 and 31", - NAME, str)); + "Parameter [{0}], it's length must be at least 1", NAME)); } return str; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml index 13dad672a2c..34cd8465fd7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/resources/hdfs-default.xml @@ -1184,4 +1184,17 @@ + + dfs.namenode.audit.loggers + default + + List of classes implementing audit loggers that will receive audit events. + These should be implementations of org.apache.hadoop.hdfs.server.namenode.AuditLogger. + The special value "default" can be used to reference the default audit + logger, which uses the configured log system. Installing custom audit loggers + may affect the performance and stability of the NameNode. Refer to the custom + logger's documentation for more details. + + + diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestFcHdfsCreateMkdir.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestFcHdfsCreateMkdir.java index fe3054da775..1fd79a14fc3 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestFcHdfsCreateMkdir.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestFcHdfsCreateMkdir.java @@ -41,6 +41,7 @@ public class TestFcHdfsCreateMkdir extends @BeforeClass public static void clusterSetupAtBegining() throws IOException, LoginException, URISyntaxException { + FileContextTestHelper.TEST_ROOT_DIR = "/tmp/TestFcHdfsCreateMkdir"; Configuration conf = new HdfsConfiguration(); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build(); fc = FileContext.getFileContext(cluster.getURI(0), conf); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestFcHdfsPermission.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestFcHdfsPermission.java index e2b684912b4..6dad4d4ff29 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestFcHdfsPermission.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestFcHdfsPermission.java @@ -41,6 +41,7 @@ public class TestFcHdfsPermission extends FileContextPermissionBase { @BeforeClass public static void clusterSetupAtBegining() throws IOException, LoginException, URISyntaxException { + FileContextTestHelper.TEST_ROOT_DIR = "/tmp/TestFcHdfsPermission"; Configuration conf = new HdfsConfiguration(); cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build(); fc = FileContext.getFileContext(cluster.getURI(0), conf); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestFcHdfsSetUMask.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestFcHdfsSetUMask.java index 4da771b4401..c8a4d26843f 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestFcHdfsSetUMask.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestFcHdfsSetUMask.java @@ -82,6 +82,7 @@ public class TestFcHdfsSetUMask { @BeforeClass public static void clusterSetupAtBegining() throws IOException, LoginException, URISyntaxException { + FileContextTestHelper.TEST_ROOT_DIR = "/tmp/TestFcHdfsSetUMask"; Configuration conf = new HdfsConfiguration(); // set permissions very restrictive conf.set(CommonConfigurationKeys.FS_PERMISSIONS_UMASK_KEY, "077"); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestFcHdfsSymlink.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestFcHdfsSymlink.java index f42f64d7609..03adab60a7b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestFcHdfsSymlink.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestFcHdfsSymlink.java @@ -86,6 +86,7 @@ public class TestFcHdfsSymlink extends FileContextSymlinkBaseTest { @BeforeClass public static void testSetUp() throws Exception { + FileContextTestHelper.TEST_ROOT_DIR = "/tmp/TestFcHdfsSymlink"; Configuration conf = new HdfsConfiguration(); conf.setBoolean(DFSConfigKeys.DFS_WEBHDFS_ENABLED_KEY, true); conf.set(FsPermission.UMASK_LABEL, "000"); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestHDFSFileContextMainOperations.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestHDFSFileContextMainOperations.java index a4f2d5fe3f9..018d3886a6e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestHDFSFileContextMainOperations.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestHDFSFileContextMainOperations.java @@ -49,6 +49,8 @@ public class TestHDFSFileContextMainOperations extends @BeforeClass public static void clusterSetupAtBegining() throws IOException, LoginException, URISyntaxException { + FileContextTestHelper.TEST_ROOT_DIR = + "/tmp/TestHDFSFileContextMainOperations"; cluster = new MiniDFSCluster.Builder(CONF).numDataNodes(2).build(); cluster.waitClusterUp(); fc = FileContext.getFileContext(cluster.getURI(0), CONF); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestVolumeId.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestVolumeId.java new file mode 100644 index 00000000000..da6f192a757 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/fs/TestVolumeId.java @@ -0,0 +1,183 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.fs; + +import org.junit.Test; +import static org.junit.Assert.*; + +public class TestVolumeId { + + @Test + public void testEquality() { + final VolumeId id1 = new HdfsVolumeId(new byte[] { (byte)0, (byte)0 }); + testEq(true, id1, id1); + + final VolumeId id2 = new HdfsVolumeId(new byte[] { (byte)0, (byte)1 }); + testEq(true, id2, id2); + testEq(false, id1, id2); + + final VolumeId id3 = new HdfsVolumeId(new byte[] { (byte)1, (byte)0 }); + testEq(true, id3, id3); + testEq(false, id1, id3); + + // same as 2, but "invalid": + final VolumeId id2copy1 = new HdfsVolumeId(new byte[] { (byte)0, (byte)1 }); + + testEq(true, id2, id2copy1); + + // same as 2copy1: + final VolumeId id2copy2 = new HdfsVolumeId(new byte[] { (byte)0, (byte)1 }); + + testEq(true, id2, id2copy2); + + testEqMany(true, new VolumeId[] { id2, id2copy1, id2copy2 }); + + testEqMany(false, new VolumeId[] { id1, id2, id3 }); + } + + @SuppressWarnings("unchecked") + private void testEq(final boolean eq, Comparable id1, Comparable id2) { + final int h1 = id1.hashCode(); + final int h2 = id2.hashCode(); + + // eq reflectivity: + assertTrue(id1.equals(id1)); + assertTrue(id2.equals(id2)); + assertEquals(0, id1.compareTo((T)id1)); + assertEquals(0, id2.compareTo((T)id2)); + + // eq symmetry: + assertEquals(eq, id1.equals(id2)); + assertEquals(eq, id2.equals(id1)); + + // null comparison: + assertFalse(id1.equals(null)); + assertFalse(id2.equals(null)); + + // compareTo: + assertEquals(eq, 0 == id1.compareTo((T)id2)); + assertEquals(eq, 0 == id2.compareTo((T)id1)); + // compareTo must be antisymmetric: + assertEquals(sign(id1.compareTo((T)id2)), -sign(id2.compareTo((T)id1))); + + // compare with null should never return 0 to be consistent with #equals(): + assertTrue(id1.compareTo(null) != 0); + assertTrue(id2.compareTo(null) != 0); + + // check that hash codes did not change: + assertEquals(h1, id1.hashCode()); + assertEquals(h2, id2.hashCode()); + if (eq) { + // in this case the hash codes must be the same: + assertEquals(h1, h2); + } + } + + private static int sign(int x) { + if (x == 0) { + return 0; + } else if (x > 0) { + return 1; + } else { + return -1; + } + } + + @SuppressWarnings("unchecked") + private void testEqMany(final boolean eq, Comparable... volumeIds) { + Comparable vidNext; + int sum = 0; + for (int i=0; i targetSet = new HashSet(); + for(DatanodeDescriptor node:targets) { + String nodeGroup = NetworkTopology.getLastHalf(node.getNetworkLocation()); + if(targetSet.contains(nodeGroup)) { + return false; + } else { + targetSet.add(nodeGroup); + } + } + return true; + } + /** * In this testcase, client is dataNodes[0]. So the 1st replica should be * placed on dataNodes[0], the 2nd replica should be placed on @@ -497,5 +552,122 @@ public class TestReplicationPolicyWithNodeGroup { null, null, (short)1, first, second); assertEquals(chosenNode, dataNodes[5]); } + + /** + * Test replica placement policy in case of boundary topology. + * Rack 2 has only 1 node group & can't be placed with two replicas + * The 1st replica will be placed on writer. + * The 2nd replica should be placed on a different rack + * The 3rd replica should be placed on the same rack with writer, but on a + * different node group. + */ + @Test + public void testChooseTargetsOnBoundaryTopology() throws Exception { + for(int i=0; i(), BLOCK_SIZE); + assertEquals(targets.length, 0); + + targets = replicator.chooseTarget(filename, 1, dataNodesInBoundaryCase[0], + new ArrayList(), BLOCK_SIZE); + assertEquals(targets.length, 1); + + targets = replicator.chooseTarget(filename, 2, dataNodesInBoundaryCase[0], + new ArrayList(), BLOCK_SIZE); + assertEquals(targets.length, 2); + assertFalse(cluster.isOnSameRack(targets[0], targets[1])); + + targets = replicator.chooseTarget(filename, 3, dataNodesInBoundaryCase[0], + new ArrayList(), BLOCK_SIZE); + assertEquals(targets.length, 3); + assertTrue(checkTargetsOnDifferentNodeGroup(targets)); + } + + /** + * Test re-replication policy in boundary case. + * Rack 2 has only one node group & the node in this node group is chosen + * Rack 1 has two nodegroups & one of them is chosen. + * Replica policy should choose the node from node group of Rack1 but not the + * same nodegroup with chosen nodes. + */ + @Test + public void testRereplicateOnBoundaryTopology() throws Exception { + for(int i=0; i chosenNodes = new ArrayList(); + chosenNodes.add(dataNodesInBoundaryCase[0]); + chosenNodes.add(dataNodesInBoundaryCase[5]); + DatanodeDescriptor[] targets; + targets = replicator.chooseTarget(filename, 1, dataNodesInBoundaryCase[0], + chosenNodes, BLOCK_SIZE); + assertFalse(cluster.isOnSameNodeGroup(targets[0], + dataNodesInBoundaryCase[0])); + assertFalse(cluster.isOnSameNodeGroup(targets[0], + dataNodesInBoundaryCase[5])); + assertTrue(checkTargetsOnDifferentNodeGroup(targets)); + } + + /** + * Test replica placement policy in case of targets more than number of + * NodeGroups. + * The 12-nodes cluster only has 6 NodeGroups, but in some cases, like: + * placing submitted job file, there is requirement to choose more (10) + * targets for placing replica. We should test it can return 6 targets. + */ + @Test + public void testChooseMoreTargetsThanNodeGroups() throws Exception { + // Cleanup nodes in previous tests + for(int i=0; i(), BLOCK_SIZE); + assertEquals(targets.length, 3); + assertTrue(checkTargetsOnDifferentNodeGroup(targets)); + + // Test special case -- replica number over node groups. + targets = replicator.chooseTarget(filename, 10, dataNodesInMoreTargetsCase[0], + new ArrayList(), BLOCK_SIZE); + assertTrue(checkTargetsOnDifferentNodeGroup(targets)); + // Verify it only can find 6 targets for placing replicas. + assertEquals(targets.length, 6); + } + } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java index 8e01e6d70a9..504e1ca6854 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBPOfferService.java @@ -29,6 +29,7 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.protocol.Block; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; @@ -41,7 +42,6 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeProtocol; import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse; import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat; -import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat.State; import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; import org.apache.hadoop.hdfs.server.protocol.ReceivedDeletedBlockInfo; import org.apache.hadoop.hdfs.server.protocol.StorageBlockReport; @@ -123,7 +123,7 @@ public class TestBPOfferService { Mockito.anyInt(), Mockito.anyInt(), Mockito.anyInt()); - mockHaStatuses[nnIdx] = new NNHAStatusHeartbeat(State.STANDBY, 0); + mockHaStatuses[nnIdx] = new NNHAStatusHeartbeat(HAServiceState.STANDBY, 0); return mock; } @@ -255,12 +255,12 @@ public class TestBPOfferService { assertNull(bpos.getActiveNN()); // Have NN1 claim active at txid 1 - mockHaStatuses[0] = new NNHAStatusHeartbeat(State.ACTIVE, 1); + mockHaStatuses[0] = new NNHAStatusHeartbeat(HAServiceState.ACTIVE, 1); bpos.triggerHeartbeatForTests(); assertSame(mockNN1, bpos.getActiveNN()); // NN2 claims active at a higher txid - mockHaStatuses[1] = new NNHAStatusHeartbeat(State.ACTIVE, 2); + mockHaStatuses[1] = new NNHAStatusHeartbeat(HAServiceState.ACTIVE, 2); bpos.triggerHeartbeatForTests(); assertSame(mockNN2, bpos.getActiveNN()); @@ -272,12 +272,12 @@ public class TestBPOfferService { // Even if NN2 goes to standby, DN shouldn't reset to talking to NN1, // because NN1's txid is lower than the last active txid. Instead, // it should consider neither active. - mockHaStatuses[1] = new NNHAStatusHeartbeat(State.STANDBY, 2); + mockHaStatuses[1] = new NNHAStatusHeartbeat(HAServiceState.STANDBY, 2); bpos.triggerHeartbeatForTests(); assertNull(bpos.getActiveNN()); // Now if NN1 goes back to a higher txid, it should be considered active - mockHaStatuses[0] = new NNHAStatusHeartbeat(State.ACTIVE, 3); + mockHaStatuses[0] = new NNHAStatusHeartbeat(HAServiceState.ACTIVE, 3); bpos.triggerHeartbeatForTests(); assertSame(mockNN1, bpos.getActiveNN()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockRecovery.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockRecovery.java index d496419c7eb..a400e850594 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockRecovery.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestBlockRecovery.java @@ -49,6 +49,7 @@ import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.DistributedFileSystem; @@ -72,7 +73,6 @@ import org.apache.hadoop.hdfs.server.protocol.DatanodeRegistration; import org.apache.hadoop.hdfs.server.protocol.HeartbeatResponse; import org.apache.hadoop.hdfs.server.protocol.InterDatanodeProtocol; import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat; -import org.apache.hadoop.hdfs.server.protocol.NNHAStatusHeartbeat.State; import org.apache.hadoop.hdfs.server.protocol.NamespaceInfo; import org.apache.hadoop.hdfs.server.protocol.ReplicaRecoveryInfo; import org.apache.hadoop.hdfs.server.protocol.StorageReport; @@ -157,7 +157,7 @@ public class TestBlockRecovery { Mockito.anyInt())) .thenReturn(new HeartbeatResponse( new DatanodeCommand[0], - new NNHAStatusHeartbeat(State.ACTIVE, 1))); + new NNHAStatusHeartbeat(HAServiceState.ACTIVE, 1))); dn = new DataNode(conf, dirs, null) { @Override diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java index f310959d9a1..cf64c335bac 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/NameNodeAdapter.java @@ -179,6 +179,13 @@ public class NameNodeAdapter { return spy; } + public static JournalSet spyOnJournalSet(NameNode nn) { + FSEditLog editLog = nn.getFSImage().getEditLog(); + JournalSet js = Mockito.spy(editLog.getJournalSet()); + editLog.setJournalSetForTesting(js); + return js; + } + public static String getMkdirOpPath(FSEditLogOp op) { if (op.opCode == FSEditLogOpCodes.OP_MKDIR) { return ((MkdirOp) op).path; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogger.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogger.java new file mode 100644 index 00000000000..3de27cb2746 --- /dev/null +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestAuditLogger.java @@ -0,0 +1,123 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hdfs.server.namenode; + +import static org.apache.hadoop.hdfs.DFSConfigKeys.DFS_NAMENODE_AUDIT_LOGGERS_KEY; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +import java.io.IOException; +import java.net.InetAddress; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.HdfsConfiguration; +import org.apache.hadoop.hdfs.MiniDFSCluster; +import org.apache.hadoop.hdfs.protocol.HdfsFileStatus; +import org.apache.hadoop.ipc.RemoteException; +import org.apache.hadoop.security.UserGroupInformation; +import org.junit.Test; + +/** + * Tests for the {@link AuditLogger} custom audit logging interface. + */ +public class TestAuditLogger { + + /** + * Tests that AuditLogger works as expected. + */ + @Test + public void testAuditLogger() throws IOException { + Configuration conf = new HdfsConfiguration(); + conf.set(DFS_NAMENODE_AUDIT_LOGGERS_KEY, + DummyAuditLogger.class.getName()); + MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build(); + + try { + cluster.waitClusterUp(); + assertTrue(DummyAuditLogger.initialized); + + FileSystem fs = cluster.getFileSystem(); + long time = System.currentTimeMillis(); + fs.setTimes(new Path("/"), time, time); + assertEquals(1, DummyAuditLogger.logCount); + } finally { + cluster.shutdown(); + } + } + + /** + * Tests that a broken audit logger causes requests to fail. + */ + @Test + public void testBrokenLogger() throws IOException { + Configuration conf = new HdfsConfiguration(); + conf.set(DFS_NAMENODE_AUDIT_LOGGERS_KEY, + BrokenAuditLogger.class.getName()); + MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).build(); + + try { + cluster.waitClusterUp(); + + FileSystem fs = cluster.getFileSystem(); + long time = System.currentTimeMillis(); + fs.setTimes(new Path("/"), time, time); + fail("Expected exception due to broken audit logger."); + } catch (RemoteException re) { + // Expected. + } finally { + cluster.shutdown(); + } + } + + public static class DummyAuditLogger implements AuditLogger { + + static boolean initialized; + static int logCount; + + public void initialize(Configuration conf) { + initialized = true; + } + + public void logAuditEvent(boolean succeeded, String userName, + InetAddress addr, String cmd, String src, String dst, + FileStatus stat) { + logCount++; + } + + } + + public static class BrokenAuditLogger implements AuditLogger { + + public void initialize(Configuration conf) { + // No op. + } + + public void logAuditEvent(boolean succeeded, String userName, + InetAddress addr, String cmd, String src, String dst, + FileStatus stat) { + throw new RuntimeException("uh oh"); + } + + } + +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestBackupNode.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestBackupNode.java index 6ade5f2dfc6..223064893d6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestBackupNode.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestBackupNode.java @@ -35,6 +35,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.ha.HAServiceProtocol.HAServiceState; import org.apache.hadoop.hdfs.DFSConfigKeys; import org.apache.hadoop.hdfs.DFSTestUtil; import org.apache.hadoop.hdfs.HAUtil; @@ -103,6 +104,9 @@ public class TestBackupNode { BackupNode bn = (BackupNode)NameNode.createNameNode( new String[]{startupOpt.getName()}, c); assertTrue(bn.getRole() + " must be in SafeMode.", bn.isInSafeMode()); + assertTrue(bn.getRole() + " must be in StandbyState", + bn.getNamesystem().getHAState() + .equalsIgnoreCase(HAServiceState.STANDBY.name())); return bn; } diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java index d417a3d0a95..c2ff6cbeb4e 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestINodeFile.java @@ -25,10 +25,15 @@ import static org.junit.Assert.fail; import java.io.FileNotFoundException; import java.io.IOException; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Options; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathIsNotDirectoryException; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.fs.permission.PermissionStatus; +import org.apache.hadoop.hdfs.DFSTestUtil; +import org.apache.hadoop.hdfs.DistributedFileSystem; +import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.server.blockmanagement.BlockInfo; import org.junit.Test; @@ -157,6 +162,48 @@ public class TestINodeFile { } + /** + * FSDirectory#unprotectedSetQuota creates a new INodeDirectoryWithQuota to + * replace the original INodeDirectory. Before HDFS-4243, the parent field of + * all the children INodes of the target INodeDirectory is not changed to + * point to the new INodeDirectoryWithQuota. This testcase tests this + * scenario. + */ + @Test + public void testGetFullPathNameAfterSetQuota() throws Exception { + long fileLen = 1024; + replication = 3; + Configuration conf = new Configuration(); + MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes( + replication).build(); + cluster.waitActive(); + FSNamesystem fsn = cluster.getNamesystem(); + FSDirectory fsdir = fsn.getFSDirectory(); + DistributedFileSystem dfs = cluster.getFileSystem(); + + // Create a file for test + final Path dir = new Path("/dir"); + final Path file = new Path(dir, "file"); + DFSTestUtil.createFile(dfs, file, fileLen, replication, 0L); + + // Check the full path name of the INode associating with the file + INode fnode = fsdir.getINode(file.toString()); + assertEquals(file.toString(), fnode.getFullPathName()); + + // Call FSDirectory#unprotectedSetQuota which calls + // INodeDirectory#replaceChild + dfs.setQuota(dir, Long.MAX_VALUE - 1, replication * fileLen * 10); + final Path newDir = new Path("/newdir"); + final Path newFile = new Path(newDir, "file"); + // Also rename dir + dfs.rename(dir, newDir, Options.Rename.OVERWRITE); + // /dir/file now should be renamed to /newdir/file + fnode = fsdir.getINode(newFile.toString()); + // getFullPathName can return correct result only if the parent field of + // child node is set correctly + assertEquals(newFile.toString(), fnode.getFullPathName()); + } + @Test public void testAppendBlocks() { INodeFile origFile = createINodeFiles(1, "origfile")[0]; diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestMetaSave.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestMetaSave.java index 9befc49f014..c41a0746a6b 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestMetaSave.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestMetaSave.java @@ -92,7 +92,8 @@ public class TestMetaSave { DataInputStream in = new DataInputStream(fstream); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); String line = reader.readLine(); - assertTrue(line.equals("3 files and directories, 2 blocks = 5 total")); + assertTrue(line.equals( + "3 files and directories, 2 blocks = 5 total filesystem objects")); line = reader.readLine(); assertTrue(line.equals("Live Datanodes: 1")); line = reader.readLine(); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRecovery.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRecovery.java index a8dac5701e4..1539467da11 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRecovery.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestNameNodeRecovery.java @@ -30,11 +30,16 @@ import java.io.RandomAccessFile; import java.util.HashSet; import java.util.Set; +import junit.framework.Assert; + +import org.apache.commons.io.FileUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hdfs.DFSConfigKeys; +import org.apache.hadoop.hdfs.DFSUtil; import org.apache.hadoop.hdfs.HdfsConfiguration; import org.apache.hadoop.hdfs.MiniDFSCluster; import org.apache.hadoop.hdfs.server.common.HdfsServerConstants.StartupOption; @@ -438,6 +443,39 @@ public class TestNameNodeRecovery { } } + /** + * Create a test configuration that will exercise the initializeGenericKeys + * code path. This is a regression test for HDFS-4279. + */ + static void setupRecoveryTestConf(Configuration conf) throws IOException { + conf.set(DFSConfigKeys.DFS_NAMESERVICES, "ns1"); + conf.set(DFSConfigKeys.DFS_HA_NAMENODE_ID_KEY, "nn1"); + conf.set(DFSUtil.addKeySuffixes(DFSConfigKeys.DFS_HA_NAMENODES_KEY_PREFIX, + "ns1"), "nn1,nn2"); + String baseDir = System.getProperty( + MiniDFSCluster.PROP_TEST_BUILD_DATA, "build/test/data") + "/dfs/"; + File nameDir = new File(baseDir, "nameR"); + File secondaryDir = new File(baseDir, "namesecondaryR"); + conf.set(DFSUtil.addKeySuffixes(DFSConfigKeys. + DFS_NAMENODE_NAME_DIR_KEY, "ns1", "nn1"), + nameDir.getCanonicalPath()); + conf.set(DFSUtil.addKeySuffixes(DFSConfigKeys. + DFS_NAMENODE_CHECKPOINT_DIR_KEY, "ns1", "nn1"), + secondaryDir.getCanonicalPath()); + conf.unset(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY); + conf.unset(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_DIR_KEY); + FileUtils.deleteQuietly(nameDir); + if (!nameDir.mkdirs()) { + throw new RuntimeException("failed to make directory " + + nameDir.getAbsolutePath()); + } + FileUtils.deleteQuietly(secondaryDir); + if (!secondaryDir.mkdirs()) { + throw new RuntimeException("failed to make directory " + + secondaryDir.getAbsolutePath()); + } + } + static void testNameNodeRecoveryImpl(Corruptor corruptor, boolean finalize) throws IOException { final String TEST_PATH = "/test/path/dir"; @@ -446,12 +484,13 @@ public class TestNameNodeRecovery { // start a cluster Configuration conf = new HdfsConfiguration(); + setupRecoveryTestConf(conf); MiniDFSCluster cluster = null; FileSystem fileSys = null; StorageDirectory sd = null; try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(0) - .enableManagedDfsDirsRedundancy(false).build(); + .manageNameDfsDirs(false).build(); cluster.waitActive(); if (!finalize) { // Normally, the in-progress edit log would be finalized by diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java index 8af6960344c..a852a690408 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/TestSaveNamespace.java @@ -603,6 +603,24 @@ public class TestSaveNamespace { } } + @Test + public void testSaveNamespaceWithDanglingLease() throws Exception { + MiniDFSCluster cluster = new MiniDFSCluster.Builder(new Configuration()) + .numDataNodes(1).build(); + cluster.waitActive(); + DistributedFileSystem fs = (DistributedFileSystem) cluster.getFileSystem(); + try { + cluster.getNamesystem().leaseManager.addLease("me", "/non-existent"); + fs.setSafeMode(SafeModeAction.SAFEMODE_ENTER); + cluster.getNameNodeRpc().saveNamespace(); + fs.setSafeMode(SafeModeAction.SAFEMODE_LEAVE); + } finally { + if (cluster != null) { + cluster.shutdown(); + } + } + } + private void doAnEdit(FSNamesystem fsn, int id) throws IOException { // Make an edit fsn.mkdirs( diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java index 61016c9540e..c449acae564 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/namenode/ha/TestStandbyCheckpoints.java @@ -35,6 +35,7 @@ import org.apache.hadoop.hdfs.MiniDFSNNTopology; import org.apache.hadoop.hdfs.server.namenode.FSImage; import org.apache.hadoop.hdfs.server.namenode.FSImageTestUtil; import org.apache.hadoop.hdfs.server.namenode.FSNamesystem; +import org.apache.hadoop.hdfs.server.namenode.JournalSet; import org.apache.hadoop.hdfs.server.namenode.NNStorage; import org.apache.hadoop.hdfs.server.namenode.NameNode; import org.apache.hadoop.hdfs.server.namenode.NameNodeAdapter; @@ -66,6 +67,12 @@ public class TestStandbyCheckpoints { conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_CHECK_PERIOD_KEY, 1); conf.setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_TXNS_KEY, 5); conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1); + + // Dial down the retention of extra edits and checkpoints. This is to + // help catch regressions of HDFS-4238 (SBN should not purge shared edits) + conf.setInt(DFSConfigKeys.DFS_NAMENODE_NUM_CHECKPOINTS_RETAINED_KEY, 1); + conf.setInt(DFSConfigKeys.DFS_NAMENODE_NUM_EXTRA_EDITS_RETAINED_KEY, 0); + conf.setBoolean(DFSConfigKeys.DFS_IMAGE_COMPRESS_KEY, true); conf.set(DFSConfigKeys.DFS_IMAGE_COMPRESSION_CODEC_KEY, SlowCodec.class.getCanonicalName()); @@ -99,15 +106,20 @@ public class TestStandbyCheckpoints { @Test public void testSBNCheckpoints() throws Exception { - doEdits(0, 10); + JournalSet standbyJournalSet = NameNodeAdapter.spyOnJournalSet(nn1); + doEdits(0, 10); HATestUtil.waitForStandbyToCatchUp(nn0, nn1); // Once the standby catches up, it should notice that it needs to // do a checkpoint and save one to its local directories. - HATestUtil.waitForCheckpoint(cluster, 1, ImmutableList.of(0, 12)); + HATestUtil.waitForCheckpoint(cluster, 1, ImmutableList.of(12)); // It should also upload it back to the active. - HATestUtil.waitForCheckpoint(cluster, 0, ImmutableList.of(0, 12)); + HATestUtil.waitForCheckpoint(cluster, 0, ImmutableList.of(12)); + + // The standby should never try to purge edit logs on shared storage. + Mockito.verify(standbyJournalSet, Mockito.never()). + purgeLogsOlderThan(Mockito.anyLong()); } /** @@ -129,8 +141,8 @@ public class TestStandbyCheckpoints { // so the standby will catch up. Then, both will be in standby mode // with enough uncheckpointed txns to cause a checkpoint, and they // will each try to take a checkpoint and upload to each other. - HATestUtil.waitForCheckpoint(cluster, 1, ImmutableList.of(0, 12)); - HATestUtil.waitForCheckpoint(cluster, 0, ImmutableList.of(0, 12)); + HATestUtil.waitForCheckpoint(cluster, 1, ImmutableList.of(12)); + HATestUtil.waitForCheckpoint(cluster, 0, ImmutableList.of(12)); assertEquals(12, nn0.getNamesystem().getFSImage() .getMostRecentCheckpointTxId()); diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestFSMainOperationsWebHdfs.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestFSMainOperationsWebHdfs.java index a2b9653e6ff..2607171e740 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestFSMainOperationsWebHdfs.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/TestFSMainOperationsWebHdfs.java @@ -28,6 +28,7 @@ import org.apache.commons.logging.impl.Log4JLogger; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSMainOperationsBaseTest; import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.FileSystemTestHelper; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.hdfs.DFSConfigKeys; @@ -53,6 +54,10 @@ public class TestFSMainOperationsWebHdfs extends FSMainOperationsBaseTest { @BeforeClass public static void setupCluster() { + // Initialize the test root directory to a DFS like path + // since we are testing based on the MiniDFSCluster. + FileSystemTestHelper.TEST_ROOT_DIR = "/tmp/TestFSMainOperationsWebHdfs"; + final Configuration conf = new Configuration(); conf.setBoolean(DFSConfigKeys.DFS_WEBHDFS_ENABLED_KEY, true); try { @@ -132,4 +137,4 @@ public class TestFSMainOperationsWebHdfs extends FSMainOperationsBaseTest { // also okay for HDFS. } } -} \ No newline at end of file +} diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/resources/TestParam.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/resources/TestParam.java index 0e06de319ad..c228c1f2989 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/resources/TestParam.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/web/resources/TestParam.java @@ -244,11 +244,6 @@ public class TestParam { assertNull(userParam.getValue()); } - @Test(expected = IllegalArgumentException.class) - public void userNameTooLong() { - new UserParam("a123456789012345678901234567890x"); - } - @Test(expected = IllegalArgumentException.class) public void userNameInvalidStart() { new UserParam("1x"); @@ -265,12 +260,6 @@ public class TestParam { assertNotNull(userParam.getValue()); } - @Test - public void userNameMaxLength() { - UserParam userParam = new UserParam("a123456789012345678901234567890"); - assertNotNull(userParam.getValue()); - } - @Test public void userNameValidDollarSign() { UserParam userParam = new UserParam("a$"); diff --git a/hadoop-mapreduce-project/CHANGES.txt b/hadoop-mapreduce-project/CHANGES.txt index 1c72e053ec2..bad1eae0bad 100644 --- a/hadoop-mapreduce-project/CHANGES.txt +++ b/hadoop-mapreduce-project/CHANGES.txt @@ -11,6 +11,9 @@ Trunk (Unreleased) MAPREDUCE-2669. Add new examples for Mean, Median, and Standard Deviation. (Plamen Jeliazkov via shv) + MAPREDUCE-4049. Experimental api to allow for alternate shuffle plugins. + (Avner BenHanoch via acmurthy) + IMPROVEMENTS MAPREDUCE-3787. [Gridmix] Optimize job monitoring and STRESS mode for @@ -604,6 +607,9 @@ Release 0.23.6 - UNRELEASED MAPREDUCE-4817. Hardcoded task ping timeout kills tasks localizing large amounts of data (tgraves) + MAPREDUCE-4836. Elapsed time for running tasks on AM web UI tasks page is 0 + (Ravi Prakash via jeagles) + Release 0.23.5 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/TaskInfo.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/TaskInfo.java index aab8b56d911..40983400aef 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/TaskInfo.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/TaskInfo.java @@ -59,11 +59,12 @@ public class TaskInfo { TaskReport report = task.getReport(); this.startTime = report.getStartTime(); this.finishTime = report.getFinishTime(); - this.elapsedTime = Times.elapsed(this.startTime, this.finishTime, false); + this.state = report.getTaskState(); + this.elapsedTime = Times.elapsed(this.startTime, this.finishTime, + this.state == TaskState.RUNNING); if (this.elapsedTime == -1) { this.elapsedTime = 0; } - this.state = report.getTaskState(); this.progress = report.getProgress() * 100; this.id = MRApps.toString(task.getID()); this.taskNum = task.getID().getId(); diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/ReduceTask.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/ReduceTask.java index 4e48c21a3e0..e8d97fab6ef 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/ReduceTask.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/ReduceTask.java @@ -340,6 +340,7 @@ public class ReduceTask extends Task { // Initialize the codec codec = initCodec(); RawKeyValueIterator rIter = null; + ShuffleConsumerPlugin shuffleConsumerPlugin = null; boolean isLocal = false; // local if @@ -358,8 +359,14 @@ public class ReduceTask extends Task { (null != combinerClass) ? new CombineOutputCollector(reduceCombineOutputCounter, reporter, conf) : null; - Shuffle shuffle = - new Shuffle(getTaskID(), job, FileSystem.getLocal(job), umbilical, + Class clazz = + job.getClass(MRConfig.SHUFFLE_CONSUMER_PLUGIN, Shuffle.class, ShuffleConsumerPlugin.class); + + shuffleConsumerPlugin = ReflectionUtils.newInstance(clazz, job); + LOG.info("Using ShuffleConsumerPlugin: " + shuffleConsumerPlugin); + + ShuffleConsumerPlugin.Context shuffleContext = + new ShuffleConsumerPlugin.Context(getTaskID(), job, FileSystem.getLocal(job), umbilical, super.lDirAlloc, reporter, codec, combinerClass, combineCollector, spilledRecordsCounter, reduceCombineInputCounter, @@ -368,7 +375,8 @@ public class ReduceTask extends Task { mergedMapOutputsCounter, taskStatus, copyPhase, sortPhase, this, mapOutputFile); - rIter = shuffle.run(); + shuffleConsumerPlugin.init(shuffleContext); + rIter = shuffleConsumerPlugin.run(); } else { // local job runner doesn't have a copy phase copyPhase.complete(); @@ -399,6 +407,10 @@ public class ReduceTask extends Task { runOldReducer(job, umbilical, reporter, rIter, comparator, keyClass, valueClass); } + + if (shuffleConsumerPlugin != null) { + shuffleConsumerPlugin.close(); + } done(umbilical, reporter); } diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/ShuffleConsumerPlugin.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/ShuffleConsumerPlugin.java new file mode 100644 index 00000000000..f57275255f1 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapred/ShuffleConsumerPlugin.java @@ -0,0 +1,168 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.mapred; + +import java.io.IOException; +import org.apache.hadoop.mapred.Task.CombineOutputCollector; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.LocalDirAllocator; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.util.Progress; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; + +/** + * ShuffleConsumerPlugin for serving Reducers. It may shuffle MOF files from + * either the built-in ShuffleHandler or from a 3rd party AuxiliaryService. + * + */ +@InterfaceAudience.LimitedPrivate("mapreduce") +@InterfaceStability.Unstable +public interface ShuffleConsumerPlugin { + + public void init(Context context); + + public RawKeyValueIterator run() throws IOException, InterruptedException; + + public void close(); + + @InterfaceAudience.LimitedPrivate("mapreduce") + @InterfaceStability.Unstable + public static class Context { + private final org.apache.hadoop.mapreduce.TaskAttemptID reduceId; + private final JobConf jobConf; + private final FileSystem localFS; + private final TaskUmbilicalProtocol umbilical; + private final LocalDirAllocator localDirAllocator; + private final Reporter reporter; + private final CompressionCodec codec; + private final Class combinerClass; + private final CombineOutputCollector combineCollector; + private final Counters.Counter spilledRecordsCounter; + private final Counters.Counter reduceCombineInputCounter; + private final Counters.Counter shuffledMapsCounter; + private final Counters.Counter reduceShuffleBytes; + private final Counters.Counter failedShuffleCounter; + private final Counters.Counter mergedMapOutputsCounter; + private final TaskStatus status; + private final Progress copyPhase; + private final Progress mergePhase; + private final Task reduceTask; + private final MapOutputFile mapOutputFile; + + public Context(org.apache.hadoop.mapreduce.TaskAttemptID reduceId, + JobConf jobConf, FileSystem localFS, + TaskUmbilicalProtocol umbilical, + LocalDirAllocator localDirAllocator, + Reporter reporter, CompressionCodec codec, + Class combinerClass, + CombineOutputCollector combineCollector, + Counters.Counter spilledRecordsCounter, + Counters.Counter reduceCombineInputCounter, + Counters.Counter shuffledMapsCounter, + Counters.Counter reduceShuffleBytes, + Counters.Counter failedShuffleCounter, + Counters.Counter mergedMapOutputsCounter, + TaskStatus status, Progress copyPhase, Progress mergePhase, + Task reduceTask, MapOutputFile mapOutputFile) { + this.reduceId = reduceId; + this.jobConf = jobConf; + this.localFS = localFS; + this. umbilical = umbilical; + this.localDirAllocator = localDirAllocator; + this.reporter = reporter; + this.codec = codec; + this.combinerClass = combinerClass; + this.combineCollector = combineCollector; + this.spilledRecordsCounter = spilledRecordsCounter; + this.reduceCombineInputCounter = reduceCombineInputCounter; + this.shuffledMapsCounter = shuffledMapsCounter; + this.reduceShuffleBytes = reduceShuffleBytes; + this.failedShuffleCounter = failedShuffleCounter; + this.mergedMapOutputsCounter = mergedMapOutputsCounter; + this.status = status; + this.copyPhase = copyPhase; + this.mergePhase = mergePhase; + this.reduceTask = reduceTask; + this.mapOutputFile = mapOutputFile; + } + + public org.apache.hadoop.mapreduce.TaskAttemptID getReduceId() { + return reduceId; + } + public JobConf getJobConf() { + return jobConf; + } + public FileSystem getLocalFS() { + return localFS; + } + public TaskUmbilicalProtocol getUmbilical() { + return umbilical; + } + public LocalDirAllocator getLocalDirAllocator() { + return localDirAllocator; + } + public Reporter getReporter() { + return reporter; + } + public CompressionCodec getCodec() { + return codec; + } + public Class getCombinerClass() { + return combinerClass; + } + public CombineOutputCollector getCombineCollector() { + return combineCollector; + } + public Counters.Counter getSpilledRecordsCounter() { + return spilledRecordsCounter; + } + public Counters.Counter getReduceCombineInputCounter() { + return reduceCombineInputCounter; + } + public Counters.Counter getShuffledMapsCounter() { + return shuffledMapsCounter; + } + public Counters.Counter getReduceShuffleBytes() { + return reduceShuffleBytes; + } + public Counters.Counter getFailedShuffleCounter() { + return failedShuffleCounter; + } + public Counters.Counter getMergedMapOutputsCounter() { + return mergedMapOutputsCounter; + } + public TaskStatus getStatus() { + return status; + } + public Progress getCopyPhase() { + return copyPhase; + } + public Progress getMergePhase() { + return mergePhase; + } + public Task getReduceTask() { + return reduceTask; + } + public MapOutputFile getMapOutputFile() { + return mapOutputFile; + } + } // end of public static class Context + +} diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRConfig.java index d758e00483e..dc1ff658f67 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRConfig.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRConfig.java @@ -85,6 +85,9 @@ public interface MRConfig { public static final boolean SHUFFLE_SSL_ENABLED_DEFAULT = false; + public static final String SHUFFLE_CONSUMER_PLUGIN = + "mapreduce.job.reduce.shuffle.consumer.plugin.class"; + /** * Configuration key to enable/disable IFile readahead. */ diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Shuffle.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Shuffle.java index e582d2856f4..fc22979797a 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Shuffle.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/task/reduce/Shuffle.java @@ -34,73 +34,63 @@ import org.apache.hadoop.mapred.Task; import org.apache.hadoop.mapred.Task.CombineOutputCollector; import org.apache.hadoop.mapred.TaskStatus; import org.apache.hadoop.mapred.TaskUmbilicalProtocol; +import org.apache.hadoop.mapred.ShuffleConsumerPlugin; import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.mapreduce.TaskAttemptID; import org.apache.hadoop.util.Progress; -@InterfaceAudience.Private +@InterfaceAudience.LimitedPrivate("mapreduce") @InterfaceStability.Unstable @SuppressWarnings({"unchecked", "rawtypes"}) -public class Shuffle implements ExceptionReporter { +public class Shuffle implements ShuffleConsumerPlugin, ExceptionReporter { private static final int PROGRESS_FREQUENCY = 2000; private static final int MAX_EVENTS_TO_FETCH = 10000; private static final int MIN_EVENTS_TO_FETCH = 100; private static final int MAX_RPC_OUTSTANDING_EVENTS = 3000000; - private final TaskAttemptID reduceId; - private final JobConf jobConf; - private final Reporter reporter; - private final ShuffleClientMetrics metrics; - private final TaskUmbilicalProtocol umbilical; + private ShuffleConsumerPlugin.Context context; + + private TaskAttemptID reduceId; + private JobConf jobConf; + private Reporter reporter; + private ShuffleClientMetrics metrics; + private TaskUmbilicalProtocol umbilical; - private final ShuffleScheduler scheduler; - private final MergeManager merger; + private ShuffleScheduler scheduler; + private MergeManager merger; private Throwable throwable = null; private String throwingThreadName = null; - private final Progress copyPhase; - private final TaskStatus taskStatus; - private final Task reduceTask; //Used for status updates - - public Shuffle(TaskAttemptID reduceId, JobConf jobConf, FileSystem localFS, - TaskUmbilicalProtocol umbilical, - LocalDirAllocator localDirAllocator, - Reporter reporter, - CompressionCodec codec, - Class combinerClass, - CombineOutputCollector combineCollector, - Counters.Counter spilledRecordsCounter, - Counters.Counter reduceCombineInputCounter, - Counters.Counter shuffledMapsCounter, - Counters.Counter reduceShuffleBytes, - Counters.Counter failedShuffleCounter, - Counters.Counter mergedMapOutputsCounter, - TaskStatus status, - Progress copyPhase, - Progress mergePhase, - Task reduceTask, - MapOutputFile mapOutputFile) { - this.reduceId = reduceId; - this.jobConf = jobConf; - this.umbilical = umbilical; - this.reporter = reporter; + private Progress copyPhase; + private TaskStatus taskStatus; + private Task reduceTask; //Used for status updates + + @Override + public void init(ShuffleConsumerPlugin.Context context) { + this.context = context; + + this.reduceId = context.getReduceId(); + this.jobConf = context.getJobConf(); + this.umbilical = context.getUmbilical(); + this.reporter = context.getReporter(); this.metrics = new ShuffleClientMetrics(reduceId, jobConf); - this.copyPhase = copyPhase; - this.taskStatus = status; - this.reduceTask = reduceTask; + this.copyPhase = context.getCopyPhase(); + this.taskStatus = context.getStatus(); + this.reduceTask = context.getReduceTask(); scheduler = - new ShuffleScheduler(jobConf, status, this, copyPhase, - shuffledMapsCounter, - reduceShuffleBytes, failedShuffleCounter); - merger = new MergeManager(reduceId, jobConf, localFS, - localDirAllocator, reporter, codec, - combinerClass, combineCollector, - spilledRecordsCounter, - reduceCombineInputCounter, - mergedMapOutputsCounter, - this, mergePhase, mapOutputFile); + new ShuffleScheduler(jobConf, taskStatus, this, copyPhase, + context.getShuffledMapsCounter(), + context.getReduceShuffleBytes(), context.getFailedShuffleCounter()); + merger = new MergeManager(reduceId, jobConf, context.getLocalFS(), + context.getLocalDirAllocator(), reporter, context.getCodec(), + context.getCombinerClass(), context.getCombineCollector(), + context.getSpilledRecordsCounter(), + context.getReduceCombineInputCounter(), + context.getMergedMapOutputsCounter(), + this, context.getMergePhase(), context.getMapOutputFile()); } + @Override public RawKeyValueIterator run() throws IOException, InterruptedException { // Scale the maximum events we fetch per RPC call to mitigate OOM issues // on the ApplicationMaster when a thundering herd of reducers fetch events @@ -171,6 +161,10 @@ public class Shuffle implements ExceptionReporter { return kvIter; } + @Override + public void close(){ + } + public synchronized void reportException(Throwable t) { if (throwable == null) { throwable = t; diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml index 85330457aea..00ac075bca2 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml @@ -748,6 +748,16 @@ + + mapreduce.job.reduce.shuffle.consumer.plugin.class + org.apache.hadoop.mapreduce.task.reduce.Shuffle + + Name of the class whose instance will be used + to send shuffle requests by reducetasks of this job. + The class must be an instance of org.apache.hadoop.mapred.ShuffleConsumerPlugin. + + + diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/TestShufflePlugin.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/TestShufflePlugin.java new file mode 100644 index 00000000000..e172be54e84 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/test/java/org/apache/hadoop/mapreduce/TestShufflePlugin.java @@ -0,0 +1,197 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. +*/ + +package org.apache.hadoop.mapreduce; + +import org.junit.Test; +import static org.junit.Assert.*; +import static org.mockito.Mockito.*; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.fs.LocalDirAllocator; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.mapred.Task.CombineOutputCollector; +import org.apache.hadoop.io.compress.CompressionCodec; +import org.apache.hadoop.util.Progress; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.util.ReflectionUtils; +import org.apache.hadoop.mapreduce.task.reduce.Shuffle; +import org.apache.hadoop.mapred.Counters; +import org.apache.hadoop.mapred.Counters.Counter; +import org.apache.hadoop.mapred.MapOutputFile; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.Task; +import org.apache.hadoop.mapred.ReduceTask; +import org.apache.hadoop.mapred.TaskStatus; +import org.apache.hadoop.mapred.TaskUmbilicalProtocol; +import org.apache.hadoop.mapred.ShuffleConsumerPlugin; +import org.apache.hadoop.mapred.RawKeyValueIterator; +import org.apache.hadoop.mapred.Reducer; + +/** + * A JUnit for testing availability and accessibility of shuffle related API. + * It is needed for maintaining comptability with external sub-classes of + * ShuffleConsumerPlugin and AuxiliaryService(s) like ShuffleHandler. + * + * The importance of this test is for preserving API with 3rd party plugins. + */ +public class TestShufflePlugin { + + static class TestShuffleConsumerPlugin implements ShuffleConsumerPlugin { + + @Override + public void init(ShuffleConsumerPlugin.Context context) { + // just verify that Context has kept its public interface + context.getReduceId(); + context.getJobConf(); + context.getLocalFS(); + context.getUmbilical(); + context.getLocalDirAllocator(); + context.getReporter(); + context.getCodec(); + context.getCombinerClass(); + context.getCombineCollector(); + context.getSpilledRecordsCounter(); + context.getReduceCombineInputCounter(); + context.getShuffledMapsCounter(); + context.getReduceShuffleBytes(); + context.getFailedShuffleCounter(); + context.getMergedMapOutputsCounter(); + context.getStatus(); + context.getCopyPhase(); + context.getMergePhase(); + context.getReduceTask(); + context.getMapOutputFile(); + } + + @Override + public void close(){ + } + + @Override + public RawKeyValueIterator run() throws java.io.IOException, java.lang.InterruptedException{ + return null; + } + } + + + + @Test + /** + * A testing method instructing core hadoop to load an external ShuffleConsumerPlugin + * as if it came from a 3rd party. + */ + public void testPluginAbility() { + + try{ + // create JobConf with mapreduce.job.shuffle.consumer.plugin=TestShuffleConsumerPlugin + JobConf jobConf = new JobConf(); + jobConf.setClass(MRConfig.SHUFFLE_CONSUMER_PLUGIN, + TestShufflePlugin.TestShuffleConsumerPlugin.class, + ShuffleConsumerPlugin.class); + + ShuffleConsumerPlugin shuffleConsumerPlugin = null; + Class clazz = + jobConf.getClass(MRConfig.SHUFFLE_CONSUMER_PLUGIN, Shuffle.class, ShuffleConsumerPlugin.class); + assertNotNull("Unable to get " + MRConfig.SHUFFLE_CONSUMER_PLUGIN, clazz); + + // load 3rd party plugin through core's factory method + shuffleConsumerPlugin = ReflectionUtils.newInstance(clazz, jobConf); + assertNotNull("Unable to load " + MRConfig.SHUFFLE_CONSUMER_PLUGIN, shuffleConsumerPlugin); + } + catch (Exception e) { + assertTrue("Threw exception:" + e, false); + } + } + + @Test + /** + * A testing method verifying availability and accessibility of API that is needed + * for sub-classes of ShuffleConsumerPlugin + */ + public void testConsumerApi() { + + JobConf jobConf = new JobConf(); + ShuffleConsumerPlugin shuffleConsumerPlugin = new TestShuffleConsumerPlugin(); + + //mock creation + ReduceTask mockReduceTask = mock(ReduceTask.class); + TaskUmbilicalProtocol mockUmbilical = mock(TaskUmbilicalProtocol.class); + Reporter mockReporter = mock(Reporter.class); + FileSystem mockFileSystem = mock(FileSystem.class); + Class combinerClass = jobConf.getCombinerClass(); + @SuppressWarnings("unchecked") // needed for mock with generic + CombineOutputCollector mockCombineOutputCollector = + (CombineOutputCollector) mock(CombineOutputCollector.class); + org.apache.hadoop.mapreduce.TaskAttemptID mockTaskAttemptID = + mock(org.apache.hadoop.mapreduce.TaskAttemptID.class); + LocalDirAllocator mockLocalDirAllocator = mock(LocalDirAllocator.class); + CompressionCodec mockCompressionCodec = mock(CompressionCodec.class); + Counter mockCounter = mock(Counter.class); + TaskStatus mockTaskStatus = mock(TaskStatus.class); + Progress mockProgress = mock(Progress.class); + MapOutputFile mockMapOutputFile = mock(MapOutputFile.class); + Task mockTask = mock(Task.class); + + try { + String [] dirs = jobConf.getLocalDirs(); + // verify that these APIs are available through super class handler + ShuffleConsumerPlugin.Context context = + new ShuffleConsumerPlugin.Context(mockTaskAttemptID, jobConf, mockFileSystem, + mockUmbilical, mockLocalDirAllocator, + mockReporter, mockCompressionCodec, + combinerClass, mockCombineOutputCollector, + mockCounter, mockCounter, mockCounter, + mockCounter, mockCounter, mockCounter, + mockTaskStatus, mockProgress, mockProgress, + mockTask, mockMapOutputFile); + shuffleConsumerPlugin.init(context); + shuffleConsumerPlugin.run(); + shuffleConsumerPlugin.close(); + } + catch (Exception e) { + assertTrue("Threw exception:" + e, false); + } + + // verify that these APIs are available for 3rd party plugins + mockReduceTask.getTaskID(); + mockReduceTask.getJobID(); + mockReduceTask.getNumMaps(); + mockReduceTask.getPartition(); + mockReporter.progress(); + } + + @Test + /** + * A testing method verifying availability and accessibility of API needed for + * AuxiliaryService(s) which are "Shuffle-Providers" (ShuffleHandler and 3rd party plugins) + */ + public void testProviderApi() { + + ApplicationId mockApplicationId = mock(ApplicationId.class); + mockApplicationId.setClusterTimestamp(new Long(10)); + mockApplicationId.setId(mock(JobID.class).getId()); + LocalDirAllocator mockLocalDirAllocator = mock(LocalDirAllocator.class); + JobConf mockJobConf = mock(JobConf.class); + try { + mockLocalDirAllocator.getLocalPathToRead("", mockJobConf); + } + catch (Exception e) { + assertTrue("Threw exception:" + e, false); + } + } +} diff --git a/hadoop-yarn-project/CHANGES.txt b/hadoop-yarn-project/CHANGES.txt index b9c63020a3d..6ba341a385c 100644 --- a/hadoop-yarn-project/CHANGES.txt +++ b/hadoop-yarn-project/CHANGES.txt @@ -117,7 +117,11 @@ Release 2.0.3-alpha - Unreleased YARN-229. Remove old unused RM recovery code. (Bikas Saha via acmurthy) - YARN-187. Add hierarchical queues to the fair scheduler. (Sandy Ryza via tomwhite) + YARN-187. Add hierarchical queues to the fair scheduler. + (Sandy Ryza via tomwhite) + + YARN-72. NM should handle cleaning up containers when it shuts down. + (Sandy Ryza via tomwhite) Release 2.0.2-alpha - 2012-09-07 @@ -199,6 +203,9 @@ Release 0.23.6 - UNRELEASED YARN-251. Proxy URI generation fails for blank tracking URIs (Tom White via jlowe) + YARN-258. RM web page UI shows Invalid Date for start and finish times + (Ravi Prakash via jlowe) + Release 0.23.5 - UNRELEASED INCOMPATIBLE CHANGES diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/JQueryUI.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/JQueryUI.java index da334eb9e16..115338eb07c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/JQueryUI.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/JQueryUI.java @@ -28,17 +28,6 @@ import static org.apache.hadoop.yarn.util.StringHelper.*; import org.apache.hadoop.yarn.webapp.hamlet.HamletSpec.HTML; public class JQueryUI extends HtmlBlock { - // Render choices (mostly for dataTables) - public enum Render { - /** small (<~100 rows) table as html, most gracefully degradable */ - HTML, - /** medium (<~2000 rows) table as js array */ - JS_ARRAY, - /** large (<~10000 rows) table loading from server */ - JS_LOAD, - /** huge (>~10000 rows) table processing from server */ - JS_SERVER - }; // UI params public static final String ACCORDION = "ui.accordion"; @@ -197,12 +186,4 @@ public class JQueryUI extends HtmlBlock { append("sPaginationType: 'full_numbers', iDisplayLength:20, "). append("aLengthMenu:[20, 40, 60, 80, 100]"); } - - public static StringBuilder tableInitProgress(StringBuilder init, - long numCells) { - return init.append(", bProcessing:true, "). - append("oLanguage:{sProcessing:'Processing "). - append(numCells).append(" cells..."). - append("

'}"); - } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/Jsons.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/Jsons.java deleted file mode 100644 index 8e1794062bd..00000000000 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/Jsons.java +++ /dev/null @@ -1,56 +0,0 @@ -/** -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -package org.apache.hadoop.yarn.webapp.view; - -import java.io.PrintWriter; - -import static org.apache.hadoop.yarn.util.StringHelper.*; -import static org.apache.hadoop.yarn.webapp.view.JQueryUI.*; - -/** - * JSON helpers - */ -public class Jsons { - public static final String _SEP = "\",\""; - - public static PrintWriter appendProgressBar(PrintWriter out, String pct) { - return out.append("
"). - append("

").append("<\\/div><\\/div>"); - } - - public static PrintWriter appendProgressBar(PrintWriter out, - float progress) { - return appendProgressBar(out, String.format("%.1f", progress)); - } - - public static PrintWriter appendSortable(PrintWriter out, Object value) { - return out.append("
"); - } - - public static PrintWriter appendLink(PrintWriter out, Object anchor, - String prefix, String... parts) { - String anchorText = String.valueOf(anchor); - return out.append("").append(anchorText).append("<\\/a>"); - } -} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrCompletedContainersEvent.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrCompletedContainersEvent.java index 28e96324ab1..675b6051211 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrCompletedContainersEvent.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/CMgrCompletedContainersEvent.java @@ -25,13 +25,23 @@ import org.apache.hadoop.yarn.api.records.ContainerId; public class CMgrCompletedContainersEvent extends ContainerManagerEvent { private List containerToCleanup; - - public CMgrCompletedContainersEvent(List containersToCleanup) { + private Reason reason; + + public CMgrCompletedContainersEvent(List containersToCleanup, Reason reason) { super(ContainerManagerEventType.FINISH_CONTAINERS); this.containerToCleanup = containersToCleanup; + this.reason = reason; } public List getContainersToCleanup() { return this.containerToCleanup; } + + public Reason getReason() { + return reason; + } + + public static enum Reason { + ON_SHUTDOWN, BY_RESOURCEMANAGER + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java index cbb3d2914e7..51b81f25f8b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeManager.java @@ -19,6 +19,9 @@ package org.apache.hadoop.yarn.server.nodemanager; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ConcurrentSkipListMap; @@ -61,14 +64,24 @@ public class NodeManager extends CompositeService implements * Priority of the NodeManager shutdown hook. */ public static final int SHUTDOWN_HOOK_PRIORITY = 30; + + /** + * Extra duration to wait for containers to be killed on shutdown. + */ + private static final int SHUTDOWN_CLEANUP_SLOP_MS = 1000; private static final Log LOG = LogFactory.getLog(NodeManager.class); protected final NodeManagerMetrics metrics = NodeManagerMetrics.create(); private ApplicationACLsManager aclsManager; private NodeHealthCheckerService nodeHealthChecker; private LocalDirsHandlerService dirsHandler; + private Context context; + private AsyncDispatcher dispatcher; + private ContainerManagerImpl containerManager; private static CompositeServiceShutdownHook nodeManagerShutdownHook; + private long waitForContainersOnShutdownMillis; + public NodeManager() { super(NodeManager.class.getName()); } @@ -115,7 +128,7 @@ public class NodeManager extends CompositeService implements containerTokenSecretManager = new NMContainerTokenSecretManager(conf); } - Context context = new NMContext(containerTokenSecretManager); + this.context = new NMContext(containerTokenSecretManager); this.aclsManager = new ApplicationACLsManager(conf); @@ -131,7 +144,7 @@ public class NodeManager extends CompositeService implements addService(del); // NodeManager level dispatcher - AsyncDispatcher dispatcher = new AsyncDispatcher(); + this.dispatcher = new AsyncDispatcher(); nodeHealthChecker = new NodeHealthCheckerService(); addService(nodeHealthChecker); @@ -144,7 +157,7 @@ public class NodeManager extends CompositeService implements NodeResourceMonitor nodeResourceMonitor = createNodeResourceMonitor(); addService(nodeResourceMonitor); - ContainerManagerImpl containerManager = + containerManager = createContainerManager(context, exec, del, nodeStatusUpdater, this.aclsManager, dirsHandler); addService(containerManager); @@ -155,13 +168,20 @@ public class NodeManager extends CompositeService implements dispatcher.register(ContainerManagerEventType.class, containerManager); addService(dispatcher); - + DefaultMetricsSystem.initialize("NodeManager"); // StatusUpdater should be added last so that it get started last // so that we make sure everything is up before registering with RM. addService(nodeStatusUpdater); - + + waitForContainersOnShutdownMillis = + conf.getLong(YarnConfiguration.NM_SLEEP_DELAY_BEFORE_SIGKILL_MS, + YarnConfiguration.DEFAULT_NM_SLEEP_DELAY_BEFORE_SIGKILL_MS) + + conf.getLong(YarnConfiguration.NM_PROCESS_KILL_WAIT_MS, + YarnConfiguration.DEFAULT_NM_PROCESS_KILL_WAIT_MS) + + SHUTDOWN_CLEANUP_SLOP_MS; + super.init(conf); // TODO add local dirs to del } @@ -178,9 +198,44 @@ public class NodeManager extends CompositeService implements @Override public void stop() { + cleanupContainers(); super.stop(); DefaultMetricsSystem.shutdown(); } + + @SuppressWarnings("unchecked") + private void cleanupContainers() { + Map containers = context.getContainers(); + if (containers.isEmpty()) { + return; + } + LOG.info("Containers still running on shutdown: " + containers.keySet()); + + List containerIds = new ArrayList(containers.keySet()); + dispatcher.getEventHandler().handle( + new CMgrCompletedContainersEvent(containerIds, + CMgrCompletedContainersEvent.Reason.ON_SHUTDOWN)); + + LOG.info("Waiting for containers to be killed"); + + long waitStartTime = System.currentTimeMillis(); + while (!containers.isEmpty() && + System.currentTimeMillis() - waitStartTime < waitForContainersOnShutdownMillis) { + try { + Thread.sleep(1000); + } catch (InterruptedException ex) { + LOG.warn("Interrupted while sleeping on container kill", ex); + } + } + + // All containers killed + if (containers.isEmpty()) { + LOG.info("All containers in DONE state"); + } else { + LOG.info("Done waiting for containers to be killed. Still alive: " + + containers.keySet()); + } + } public static class NMContext implements Context { @@ -282,6 +337,11 @@ public class NodeManager extends CompositeService implements NodeManager createNewNodeManager() { return new NodeManager(); } + + // For testing + ContainerManagerImpl getContainerManager() { + return containerManager; + } public static void main(String[] args) { Thread.setDefaultUncaughtExceptionHandler(new YarnUncaughtExceptionHandler()); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java index 819e22d2146..22ec10a5a8e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/NodeStatusUpdaterImpl.java @@ -363,7 +363,8 @@ public class NodeStatusUpdaterImpl extends AbstractService implements .getContainersToCleanupList(); if (containersToCleanup.size() != 0) { dispatcher.getEventHandler().handle( - new CMgrCompletedContainersEvent(containersToCleanup)); + new CMgrCompletedContainersEvent(containersToCleanup, + CMgrCompletedContainersEvent.Reason.BY_RESOURCEMANAGER)); } List appsToCleanup = response.getApplicationsToCleanupList(); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java index 7ca6a2cbd32..b4a0034217c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/java/org/apache/hadoop/yarn/server/nodemanager/containermanager/ContainerManagerImpl.java @@ -23,6 +23,8 @@ import static org.apache.hadoop.yarn.service.Service.STATE.STARTED; import java.io.IOException; import java.net.InetSocketAddress; import java.nio.ByteBuffer; +import java.util.ArrayList; +import java.util.List; import java.util.Map; import java.util.Set; @@ -593,9 +595,16 @@ public class ContainerManagerImpl extends CompositeService implements (CMgrCompletedContainersEvent) event; for (ContainerId container : containersFinishedEvent .getContainersToCleanup()) { + String diagnostic = ""; + if (containersFinishedEvent.getReason() == + CMgrCompletedContainersEvent.Reason.ON_SHUTDOWN) { + diagnostic = "Container Killed on Shutdown"; + } else if (containersFinishedEvent.getReason() == + CMgrCompletedContainersEvent.Reason.BY_RESOURCEMANAGER) { + diagnostic = "Container Killed by ResourceManager"; + } this.dispatcher.getEventHandler().handle( - new ContainerKillEvent(container, - "Container Killed by ResourceManager")); + new ContainerKillEvent(container, diagnostic)); } break; default: diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/MockNodeStatusUpdater.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/MockNodeStatusUpdater.java new file mode 100644 index 00000000000..e2ea8212d88 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/MockNodeStatusUpdater.java @@ -0,0 +1,92 @@ +/** +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package org.apache.hadoop.yarn.server.nodemanager; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.yarn.event.Dispatcher; +import org.apache.hadoop.yarn.exceptions.YarnRemoteException; +import org.apache.hadoop.yarn.factories.RecordFactory; +import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; +import org.apache.hadoop.yarn.server.api.ResourceTracker; +import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatRequest; +import org.apache.hadoop.yarn.server.api.protocolrecords.NodeHeartbeatResponse; +import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerRequest; +import org.apache.hadoop.yarn.server.api.protocolrecords.RegisterNodeManagerResponse; +import org.apache.hadoop.yarn.server.api.records.HeartbeatResponse; +import org.apache.hadoop.yarn.server.api.records.NodeStatus; +import org.apache.hadoop.yarn.server.api.records.RegistrationResponse; +import org.apache.hadoop.yarn.server.nodemanager.metrics.NodeManagerMetrics; + +/** + * This class allows a node manager to run without without communicating with a + * real RM. + */ +public class MockNodeStatusUpdater extends NodeStatusUpdaterImpl { + static final Log LOG = LogFactory.getLog(MockNodeStatusUpdater.class); + + private static final RecordFactory recordFactory = RecordFactoryProvider + .getRecordFactory(null); + + private ResourceTracker resourceTracker; + + public MockNodeStatusUpdater(Context context, Dispatcher dispatcher, + NodeHealthCheckerService healthChecker, NodeManagerMetrics metrics) { + super(context, dispatcher, healthChecker, metrics); + resourceTracker = new MockResourceTracker(); + } + + @Override + protected ResourceTracker getRMClient() { + return resourceTracker; + } + + private static class MockResourceTracker implements ResourceTracker { + private int heartBeatID; + + @Override + public RegisterNodeManagerResponse registerNodeManager( + RegisterNodeManagerRequest request) throws YarnRemoteException { + RegistrationResponse regResponse = recordFactory + .newRecordInstance(RegistrationResponse.class); + + RegisterNodeManagerResponse response = recordFactory + .newRecordInstance(RegisterNodeManagerResponse.class); + response.setRegistrationResponse(regResponse); + return response; + } + + @Override + public NodeHeartbeatResponse nodeHeartbeat(NodeHeartbeatRequest request) + throws YarnRemoteException { + NodeStatus nodeStatus = request.getNodeStatus(); + LOG.info("Got heartbeat number " + heartBeatID); + nodeStatus.setResponseId(heartBeatID++); + + HeartbeatResponse response = recordFactory + .newRecordInstance(HeartbeatResponse.class); + response.setResponseId(heartBeatID); + + NodeHeartbeatResponse nhResponse = recordFactory + .newRecordInstance(NodeHeartbeatResponse.class); + nhResponse.setHeartbeatResponse(response); + return nhResponse; + } + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java new file mode 100644 index 00000000000..c5b48819641 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/test/java/org/apache/hadoop/yarn/server/nodemanager/TestNodeManagerShutdown.java @@ -0,0 +1,222 @@ +/** +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +package org.apache.hadoop.yarn.server.nodemanager; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import junit.framework.Assert; + +import org.apache.hadoop.fs.FileContext; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.UnsupportedFileSystemException; +import org.apache.hadoop.yarn.api.protocolrecords.GetContainerStatusRequest; +import org.apache.hadoop.yarn.api.protocolrecords.StartContainerRequest; +import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.api.records.ContainerId; +import org.apache.hadoop.yarn.api.records.ContainerLaunchContext; +import org.apache.hadoop.yarn.api.records.ContainerState; +import org.apache.hadoop.yarn.api.records.ContainerStatus; +import org.apache.hadoop.yarn.api.records.LocalResource; +import org.apache.hadoop.yarn.api.records.LocalResourceType; +import org.apache.hadoop.yarn.api.records.LocalResourceVisibility; +import org.apache.hadoop.yarn.api.records.Resource; +import org.apache.hadoop.yarn.api.records.URL; +import org.apache.hadoop.yarn.conf.YarnConfiguration; +import org.apache.hadoop.yarn.event.Dispatcher; +import org.apache.hadoop.yarn.factories.RecordFactory; +import org.apache.hadoop.yarn.factory.providers.RecordFactoryProvider; +import org.apache.hadoop.yarn.server.nodemanager.containermanager.ContainerManagerImpl; +import org.apache.hadoop.yarn.util.ConverterUtils; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +public class TestNodeManagerShutdown { + static final File basedir = + new File("target", TestNodeManagerShutdown.class.getName()); + static final File tmpDir = new File(basedir, "tmpDir"); + static final File logsDir = new File(basedir, "logs"); + static final File remoteLogsDir = new File(basedir, "remotelogs"); + static final File nmLocalDir = new File(basedir, "nm0"); + static final File processStartFile = new File(tmpDir, "start_file.txt") + .getAbsoluteFile(); + + static final RecordFactory recordFactory = RecordFactoryProvider + .getRecordFactory(null); + static final String user = "nobody"; + private FileContext localFS; + + @Before + public void setup() throws UnsupportedFileSystemException { + localFS = FileContext.getLocalFSFileContext(); + tmpDir.mkdirs(); + logsDir.mkdirs(); + remoteLogsDir.mkdirs(); + nmLocalDir.mkdirs(); + } + + @After + public void tearDown() throws IOException, InterruptedException { + localFS.delete(new Path(basedir.getPath()), true); + } + + @Test + public void testKillContainersOnShutdown() throws IOException { + NodeManager nm = getNodeManager(); + nm.init(createNMConfig()); + nm.start(); + + ContainerManagerImpl containerManager = nm.getContainerManager(); + File scriptFile = createUnhaltingScriptFile(); + + ContainerLaunchContext containerLaunchContext = + recordFactory.newRecordInstance(ContainerLaunchContext.class); + + // Construct the Container-id + ContainerId cId = createContainerId(); + containerLaunchContext.setContainerId(cId); + + containerLaunchContext.setUser(user); + + URL localResourceUri = + ConverterUtils.getYarnUrlFromPath(localFS + .makeQualified(new Path(scriptFile.getAbsolutePath()))); + LocalResource localResource = + recordFactory.newRecordInstance(LocalResource.class); + localResource.setResource(localResourceUri); + localResource.setSize(-1); + localResource.setVisibility(LocalResourceVisibility.APPLICATION); + localResource.setType(LocalResourceType.FILE); + localResource.setTimestamp(scriptFile.lastModified()); + String destinationFile = "dest_file"; + Map localResources = + new HashMap(); + localResources.put(destinationFile, localResource); + containerLaunchContext.setLocalResources(localResources); + containerLaunchContext.setUser(containerLaunchContext.getUser()); + List commands = new ArrayList(); + commands.add("/bin/bash"); + commands.add(scriptFile.getAbsolutePath()); + containerLaunchContext.setCommands(commands); + containerLaunchContext.setResource(recordFactory + .newRecordInstance(Resource.class)); + containerLaunchContext.getResource().setMemory(1024); + StartContainerRequest startRequest = recordFactory.newRecordInstance(StartContainerRequest.class); + startRequest.setContainerLaunchContext(containerLaunchContext); + containerManager.startContainer(startRequest); + + GetContainerStatusRequest request = + recordFactory.newRecordInstance(GetContainerStatusRequest.class); + request.setContainerId(cId); + ContainerStatus containerStatus = + containerManager.getContainerStatus(request).getStatus(); + Assert.assertEquals(ContainerState.RUNNING, containerStatus.getState()); + + try {Thread.sleep(5000);} catch (InterruptedException ex) {ex.printStackTrace();} + + nm.stop(); + + // Now verify the contents of the file + // Script generates a message when it receives a sigterm + // so we look for that + BufferedReader reader = + new BufferedReader(new FileReader(processStartFile)); + + boolean foundSigTermMessage = false; + while (true) { + String line = reader.readLine(); + if (line == null) { + break; + } + if (line.contains("SIGTERM")) { + foundSigTermMessage = true; + break; + } + } + Assert.assertTrue("Did not find sigterm message", foundSigTermMessage); + reader.close(); + } + + private ContainerId createContainerId() { + ApplicationId appId = recordFactory.newRecordInstance(ApplicationId.class); + appId.setClusterTimestamp(0); + appId.setId(0); + ApplicationAttemptId appAttemptId = + recordFactory.newRecordInstance(ApplicationAttemptId.class); + appAttemptId.setApplicationId(appId); + appAttemptId.setAttemptId(1); + ContainerId containerId = + recordFactory.newRecordInstance(ContainerId.class); + containerId.setApplicationAttemptId(appAttemptId); + return containerId; + } + + private YarnConfiguration createNMConfig() { + YarnConfiguration conf = new YarnConfiguration(); + conf.setInt(YarnConfiguration.NM_PMEM_MB, 5*1024); // 5GB + conf.set(YarnConfiguration.NM_ADDRESS, "127.0.0.1:12345"); + conf.set(YarnConfiguration.NM_LOCALIZER_ADDRESS, "127.0.0.1:12346"); + conf.set(YarnConfiguration.NM_LOG_DIRS, logsDir.getAbsolutePath()); + conf.set(YarnConfiguration.NM_REMOTE_APP_LOG_DIR, remoteLogsDir.getAbsolutePath()); + conf.set(YarnConfiguration.NM_LOCAL_DIRS, nmLocalDir.getAbsolutePath()); + return conf; + } + + /** + * Creates a script to run a container that will run forever unless + * stopped by external means. + */ + private File createUnhaltingScriptFile() throws IOException { + File scriptFile = new File(tmpDir, "scriptFile.sh"); + BufferedWriter fileWriter = new BufferedWriter(new FileWriter(scriptFile)); + fileWriter.write("#!/bin/bash\n\n"); + fileWriter.write("echo \"Running testscript for delayed kill\"\n"); + fileWriter.write("hello=\"Got SIGTERM\"\n"); + fileWriter.write("umask 0\n"); + fileWriter.write("trap \"echo $hello >> " + processStartFile + "\" SIGTERM\n"); + fileWriter.write("echo \"Writing pid to start file\"\n"); + fileWriter.write("echo $$ >> " + processStartFile + "\n"); + fileWriter.write("while true; do\nsleep 1s;\ndone\n"); + + fileWriter.close(); + return scriptFile; + } + + private NodeManager getNodeManager() { + return new NodeManager() { + @Override + protected NodeStatusUpdater createNodeStatusUpdater(Context context, + Dispatcher dispatcher, NodeHealthCheckerService healthChecker) { + MockNodeStatusUpdater myNodeStatusUpdater = new MockNodeStatusUpdater( + context, dispatcher, healthChecker, metrics); + return myNodeStatusUpdater; + } + }; + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppsBlock.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppsBlock.java index fad5dd82591..e90edae89aa 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppsBlock.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppsBlock.java @@ -25,26 +25,27 @@ import static org.apache.hadoop.yarn.webapp.view.JQueryUI.C_PROGRESSBAR_VALUE; import java.util.Collection; import java.util.HashSet; +import java.util.concurrent.ConcurrentMap; import org.apache.commons.lang.StringEscapeUtils; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppInfo; -import org.apache.hadoop.yarn.util.Times; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TABLE; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TBODY; import org.apache.hadoop.yarn.webapp.view.HtmlBlock; -import org.apache.hadoop.yarn.webapp.view.JQueryUI.Render; import com.google.inject.Inject; class AppsBlock extends HtmlBlock { - final AppsList list; + final ConcurrentMap apps; - @Inject AppsBlock(AppsList list, ViewContext ctx) { +@Inject AppsBlock(RMContext rmContext, ViewContext ctx) { super(ctx); - this.list = list; + apps = rmContext.getRMApps(); } @Override public void render(Block html) { @@ -63,7 +64,6 @@ class AppsBlock extends HtmlBlock { th(".progress", "Progress"). th(".ui", "Tracking UI")._()._(). tbody(); - int i = 0; Collection reqAppStates = null; String reqStateString = $(APP_STATE); if (reqStateString != null && !reqStateString.isEmpty()) { @@ -74,7 +74,7 @@ class AppsBlock extends HtmlBlock { } } StringBuilder appsTableData = new StringBuilder("[\n"); - for (RMApp app : list.apps.values()) { + for (RMApp app : apps.values()) { if (reqAppStates != null && !reqAppStates.contains(app.getState())) { continue; } @@ -108,7 +108,6 @@ class AppsBlock extends HtmlBlock { appsTableData.append(trackingURL).append("'>") .append(appInfo.getTrackingUI()).append("\"],\n"); - if (list.rendering != Render.HTML && ++i >= 20) break; } if(appsTableData.charAt(appsTableData.length() - 2) == ',') { appsTableData.delete(appsTableData.length()-2, appsTableData.length()-1); @@ -118,12 +117,5 @@ class AppsBlock extends HtmlBlock { _("var appsTableData=" + appsTableData)._(); tbody._()._(); - - if (list.rendering == Render.JS_ARRAY) { - echo("\n"); - } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppsList.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppsList.java deleted file mode 100644 index 415f915cd5f..00000000000 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/AppsList.java +++ /dev/null @@ -1,101 +0,0 @@ -/** -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*/ - -package org.apache.hadoop.yarn.server.resourcemanager.webapp; - -import static org.apache.commons.lang.StringEscapeUtils.escapeHtml; -import static org.apache.commons.lang.StringEscapeUtils.escapeJavaScript; -import static org.apache.hadoop.yarn.webapp.view.Jsons._SEP; -import static org.apache.hadoop.yarn.webapp.view.Jsons.appendLink; -import static org.apache.hadoop.yarn.webapp.view.Jsons.appendProgressBar; -import static org.apache.hadoop.yarn.webapp.view.Jsons.appendSortable; - -import java.io.PrintWriter; -import java.util.Collection; -import java.util.concurrent.ConcurrentMap; - -import org.apache.hadoop.yarn.api.records.ApplicationId; -import org.apache.hadoop.yarn.server.resourcemanager.RMContext; -import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; -import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; -import org.apache.hadoop.yarn.server.resourcemanager.webapp.dao.AppInfo; -import org.apache.hadoop.yarn.util.Times; -import org.apache.hadoop.yarn.webapp.Controller.RequestContext; -import org.apache.hadoop.yarn.webapp.ToJSON; -import org.apache.hadoop.yarn.webapp.view.JQueryUI.Render; - -import com.google.inject.Inject; -import com.google.inject.servlet.RequestScoped; - -// So we only need to do asm.getApplications once in a request -@RequestScoped -class AppsList implements ToJSON { - final RequestContext rc; - final ConcurrentMap apps; - Render rendering; - - @Inject AppsList(RequestContext ctx, RMContext rmContext) { - rc = ctx; - apps = rmContext.getRMApps(); - } - - void toDataTableArrays(Collection requiredAppStates, PrintWriter out) { - out.append('['); - boolean first = true; - for (RMApp app : apps.values()) { - if (requiredAppStates != null && - !requiredAppStates.contains(app.getState())) { - continue; - } - AppInfo appInfo = new AppInfo(app, true); - String startTime = Times.format(appInfo.getStartTime()); - String finishTime = Times.format(appInfo.getFinishTime()); - if (first) { - first = false; - } else { - out.append(",\n"); - } - out.append("[\""); - appendSortable(out, appInfo.getAppIdNum()); - appendLink(out, appInfo.getAppId(), rc.prefix(), "app", - appInfo.getAppId()).append(_SEP). - append(escapeHtml(appInfo.getUser())).append(_SEP). - append(escapeJavaScript(escapeHtml(appInfo.getName()))).append(_SEP). - append(escapeHtml(appInfo.getQueue())).append(_SEP); - appendSortable(out, appInfo.getStartTime()). - append(startTime).append(_SEP); - appendSortable(out, appInfo.getFinishTime()). - append(finishTime).append(_SEP). - append(appInfo.getState()).append(_SEP). - append(appInfo.getFinalStatus()).append(_SEP); - appendProgressBar(out, appInfo.getProgress()).append(_SEP); - appendLink(out, appInfo.getTrackingUI(), rc.prefix(), - !appInfo.isTrackingUrlReady() ? - "#" : appInfo.getTrackingUrlPretty()). - append("\"]"); - } - out.append(']'); - } - - @Override - public void toJSON(PrintWriter out) { - out.print("{\"aaData\":"); - toDataTableArrays(null, out); - out.print("}\n"); - } -} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/FairSchedulerAppsBlock.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/FairSchedulerAppsBlock.java index efbe64a5b78..9860e18dac3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/FairSchedulerAppsBlock.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/FairSchedulerAppsBlock.java @@ -25,8 +25,11 @@ import static org.apache.hadoop.yarn.webapp.view.JQueryUI._PROGRESSBAR_VALUE; import java.util.Collection; import java.util.HashSet; +import java.util.concurrent.ConcurrentMap; import org.apache.hadoop.yarn.api.records.ApplicationAttemptId; +import org.apache.hadoop.yarn.api.records.ApplicationId; +import org.apache.hadoop.yarn.server.resourcemanager.RMContext; import org.apache.hadoop.yarn.server.resourcemanager.ResourceManager; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMApp; import org.apache.hadoop.yarn.server.resourcemanager.rmapp.RMAppState; @@ -38,7 +41,6 @@ import org.apache.hadoop.yarn.webapp.hamlet.Hamlet; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TABLE; import org.apache.hadoop.yarn.webapp.hamlet.Hamlet.TBODY; import org.apache.hadoop.yarn.webapp.view.HtmlBlock; -import org.apache.hadoop.yarn.webapp.view.JQueryUI.Render; import com.google.inject.Inject; @@ -47,15 +49,15 @@ import com.google.inject.Inject; * scheduler as part of the fair scheduler page. */ public class FairSchedulerAppsBlock extends HtmlBlock { - final AppsList list; + final ConcurrentMap apps; final FairSchedulerInfo fsinfo; - @Inject public FairSchedulerAppsBlock(AppsList list, + @Inject public FairSchedulerAppsBlock(RMContext rmContext, ResourceManager rm, ViewContext ctx) { super(ctx); - this.list = list; FairScheduler scheduler = (FairScheduler) rm.getResourceScheduler(); fsinfo = new FairSchedulerInfo(scheduler); + apps = rmContext.getRMApps(); } @Override public void render(Block html) { @@ -75,7 +77,6 @@ public class FairSchedulerAppsBlock extends HtmlBlock { th(".progress", "Progress"). th(".ui", "Tracking UI")._()._(). tbody(); - int i = 0; Collection reqAppStates = null; String reqStateString = $(APP_STATE); if (reqStateString != null && !reqStateString.isEmpty()) { @@ -85,7 +86,7 @@ public class FairSchedulerAppsBlock extends HtmlBlock { reqAppStates.add(RMAppState.valueOf(stateString)); } } - for (RMApp app : list.apps.values()) { + for (RMApp app : apps.values()) { if (reqAppStates != null && !reqAppStates.contains(app.getState())) { continue; } @@ -122,15 +123,7 @@ public class FairSchedulerAppsBlock extends HtmlBlock { td(). a(!appInfo.isTrackingUrlReady()? "#" : appInfo.getTrackingUrlPretty(), appInfo.getTrackingUI())._()._(); - if (list.rendering != Render.HTML && ++i >= 20) break; } tbody._()._(); - - if (list.rendering == Render.JS_ARRAY) { - echo("\n"); - } } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmController.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmController.java index 753e197af01..a4826e806c9 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmController.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmController.java @@ -93,8 +93,4 @@ public class RmController extends Controller { public void submit() { setTitle("Application Submission Not Allowed"); } - - public void json() { - renderJSON(AppsList.class); - } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmView.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmView.java index 0ad11901007..f9ad7825575 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmView.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/webapp/RmView.java @@ -60,9 +60,8 @@ public class RmView extends TwoColumnLayout { } private String appsTableInit() { - AppsList list = getInstance(AppsList.class); // id, user, name, queue, starttime, finishtime, state, status, progress, ui - StringBuilder init = tableInit() + return tableInit() .append(", 'aaData': appsTableData") .append(", bDeferRender: true") .append(", bProcessing: true") @@ -78,18 +77,6 @@ public class RmView extends TwoColumnLayout { .append(", 'mRender': parseHadoopProgress }]") // Sort by id upon page load - .append(", aaSorting: [[0, 'desc']]"); - - String rows = $("rowlimit"); - int rowLimit = rows.isEmpty() ? MAX_DISPLAY_ROWS : Integer.parseInt(rows); - if (list.apps.size() < rowLimit) { - list.rendering = Render.HTML; - return init.append('}').toString(); - } - if (list.apps.size() > MAX_FAST_ROWS) { - tableInitProgress(init, list.apps.size() * 6); - } - list.rendering = Render.JS_ARRAY; - return init.append(", aaData:appsData}").toString(); + .append(", aaSorting: [[0, 'desc']]}").toString(); } }