From 96da4be11c4c0e56cf6f14fe66007c4db4e5e861 Mon Sep 17 00:00:00 2001 From: Todd Lipcon Date: Fri, 11 Mar 2011 18:44:25 +0000 Subject: [PATCH] HADOOP-7156. Workaround for unsafe implementations of getpwuid_r. Contributed by Todd Lipcon. git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1080723 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 2 + src/java/core-default.xml | 15 ++++++ .../apache/hadoop/io/nativeio/NativeIO.java | 11 ++++ .../org/apache/hadoop/io/nativeio/NativeIO.c | 51 ++++++++++++++++++- .../hadoop/io/nativeio/TestNativeIO.java | 49 ++++++++++++++++++ 5 files changed, 126 insertions(+), 2 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index d11497b0d2f..519abcbde80 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -535,6 +535,8 @@ Release 0.22.0 - Unreleased HADOOP-7145. Configuration.getLocalPath should trim whitespace from the provided directories. (todd) + HADOOP-7156. Workaround for unsafe implementations of getpwuid_r (todd) + Release 0.21.1 - Unreleased IMPROVEMENTS diff --git a/src/java/core-default.xml b/src/java/core-default.xml index aa6da1c8b16..643640b3911 100644 --- a/src/java/core-default.xml +++ b/src/java/core-default.xml @@ -108,6 +108,21 @@ + + hadoop.work.around.non.threadsafe.getpwuid + false + Some operating systems or authentication modules are known to + have broken implementations of getpwuid_r and getpwgid_r, such that these + calls are not thread-safe. Symptoms of this problem include JVM crashes + with a stack trace inside these functions. If your system exhibits this + issue, enable this configuration parameter to include a lock around the + calls as a workaround. + + An incomplete list of some systems known to have this issue is available + at http://wiki.apache.org/hadoop/KnownBrokenPwuidImplementations + + + diff --git a/src/java/org/apache/hadoop/io/nativeio/NativeIO.java b/src/java/org/apache/hadoop/io/nativeio/NativeIO.java index db4512562f8..e3c5803818c 100644 --- a/src/java/org/apache/hadoop/io/nativeio/NativeIO.java +++ b/src/java/org/apache/hadoop/io/nativeio/NativeIO.java @@ -20,6 +20,7 @@ package org.apache.hadoop.io.nativeio; import java.io.FileDescriptor; import java.io.IOException; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.util.NativeCodeLoader; import org.apache.commons.logging.Log; @@ -48,10 +49,20 @@ public class NativeIO { private static final Log LOG = LogFactory.getLog(NativeIO.class); private static boolean nativeLoaded = false; + private static boolean workaroundNonThreadSafePasswdCalls = false; + + static final String WORKAROUND_NON_THREADSAFE_CALLS_KEY = + "hadoop.workaround.non.threadsafe.getpwuid"; + static final boolean WORKAROUND_NON_THREADSAFE_CALLS_DEFAULT = false; static { if (NativeCodeLoader.isNativeCodeLoaded()) { try { + Configuration conf = new Configuration(); + workaroundNonThreadSafePasswdCalls = conf.getBoolean( + WORKAROUND_NON_THREADSAFE_CALLS_KEY, + WORKAROUND_NON_THREADSAFE_CALLS_DEFAULT); + initNative(); nativeLoaded = true; } catch (Throwable t) { diff --git a/src/native/src/org/apache/hadoop/io/nativeio/NativeIO.c b/src/native/src/org/apache/hadoop/io/nativeio/NativeIO.c index d55072099db..209bb7a8dff 100644 --- a/src/native/src/org/apache/hadoop/io/nativeio/NativeIO.c +++ b/src/native/src/org/apache/hadoop/io/nativeio/NativeIO.c @@ -44,18 +44,50 @@ static jmethodID stat_ctor; static jclass nioe_clazz; static jmethodID nioe_ctor; +// the monitor used for working around non-threadsafe implementations +// of getpwuid_r, observed on platforms including RHEL 6.0. +// Please see HADOOP-7156 for details. +static jobject pw_lock_object; + // Internal functions static void throw_ioe(JNIEnv* env, int errnum); static ssize_t get_pw_buflen(); +/** + * Returns non-zero if the user has specified that the system + * has non-threadsafe implementations of getpwuid_r or getgrgid_r. + **/ +static int workaround_non_threadsafe_calls(JNIEnv *env, jclass clazz) { + jfieldID needs_workaround_field = (*env)->GetStaticFieldID(env, clazz, + "workaroundNonThreadSafePasswdCalls", "Z"); + PASS_EXCEPTIONS_RET(env, 0); + assert(needs_workaround_field); -static void stat_init(JNIEnv *env) { + jboolean result = (*env)->GetStaticBooleanField( + env, clazz, needs_workaround_field); + return result; +} + +static void stat_init(JNIEnv *env, jclass nativeio_class) { // Init Stat jclass clazz = (*env)->FindClass(env, "org/apache/hadoop/io/nativeio/NativeIO$Stat"); PASS_EXCEPTIONS(env); stat_clazz = (*env)->NewGlobalRef(env, clazz); stat_ctor = (*env)->GetMethodID(env, stat_clazz, "", "(Ljava/lang/String;Ljava/lang/String;I)V"); + + jclass obj_class = (*env)->FindClass(env, "java/lang/Object"); + assert(obj_class != NULL); + jmethodID obj_ctor = (*env)->GetMethodID(env, obj_class, + "", "()V"); + assert(obj_ctor != NULL); + + if (workaround_non_threadsafe_calls(env, nativeio_class)) { + pw_lock_object = (*env)->NewObject(env, obj_class, obj_ctor); + PASS_EXCEPTIONS(env); + pw_lock_object = (*env)->NewGlobalRef(env, pw_lock_object); + PASS_EXCEPTIONS(env); + } } static void stat_deinit(JNIEnv *env) { @@ -63,6 +95,10 @@ static void stat_deinit(JNIEnv *env) { (*env)->DeleteGlobalRef(env, stat_clazz); stat_clazz = NULL; } + if (pw_lock_object != NULL) { + (*env)->DeleteGlobalRef(env, pw_lock_object); + pw_lock_object = NULL; + } } static void nioe_init(JNIEnv *env) { @@ -95,7 +131,7 @@ JNIEXPORT void JNICALL Java_org_apache_hadoop_io_nativeio_NativeIO_initNative( JNIEnv *env, jclass clazz) { - stat_init(env); + stat_init(env, clazz); PASS_EXCEPTIONS_GOTO(env, error); nioe_init(env); PASS_EXCEPTIONS_GOTO(env, error); @@ -122,6 +158,7 @@ Java_org_apache_hadoop_io_nativeio_NativeIO_fstat( { jobject ret = NULL; char *pw_buf = NULL; + int pw_lock_locked = 0; int fd = fd_get(env, fd_object); PASS_EXCEPTIONS_GOTO(env, cleanup); @@ -139,6 +176,13 @@ Java_org_apache_hadoop_io_nativeio_NativeIO_fstat( goto cleanup; } + if (pw_lock_object != NULL) { + if ((*env)->MonitorEnter(env, pw_lock_object) != JNI_OK) { + goto cleanup; + } + pw_lock_locked = 1; + } + // Grab username struct passwd pwd, *pwdp; while ((rc = getpwuid_r(s.st_uid, &pwd, pw_buf, pw_buflen, &pwdp)) != 0) { @@ -183,6 +227,9 @@ Java_org_apache_hadoop_io_nativeio_NativeIO_fstat( cleanup: if (pw_buf != NULL) free(pw_buf); + if (pw_lock_locked) { + (*env)->MonitorExit(env, pw_lock_object); + } return ret; } diff --git a/src/test/core/org/apache/hadoop/io/nativeio/TestNativeIO.java b/src/test/core/org/apache/hadoop/io/nativeio/TestNativeIO.java index 06e02294f69..6c3f5b1b396 100644 --- a/src/test/core/org/apache/hadoop/io/nativeio/TestNativeIO.java +++ b/src/test/core/org/apache/hadoop/io/nativeio/TestNativeIO.java @@ -21,6 +21,9 @@ import java.io.File; import java.io.FileDescriptor; import java.io.FileOutputStream; import java.io.IOException; +import java.util.concurrent.atomic.AtomicReference; +import java.util.ArrayList; +import java.util.List; import org.junit.Before; import org.junit.Test; import static org.junit.Assume.*; @@ -67,6 +70,52 @@ public class TestNativeIO { NativeIO.Stat.S_IFREG, stat.getMode() & NativeIO.Stat.S_IFMT); } + /** + * Test for races in fstat usage + * + * NOTE: this test is likely to fail on RHEL 6.0 which has a non-threadsafe + * implementation of getpwuid_r. + */ + @Test + public void testMultiThreadedFstat() throws Exception { + final FileOutputStream fos = new FileOutputStream( + new File(TEST_DIR, "testfstat")); + + final AtomicReference thrown = + new AtomicReference(); + List statters = new ArrayList(); + for (int i = 0; i < 10; i++) { + Thread statter = new Thread() { + public void run() { + long et = System.currentTimeMillis() + 5000; + while (System.currentTimeMillis() < et) { + try { + NativeIO.Stat stat = NativeIO.fstat(fos.getFD()); + assertEquals(System.getProperty("user.name"), stat.getOwner()); + assertNotNull(stat.getGroup()); + assertTrue(!"".equals(stat.getGroup())); + assertEquals("Stat mode field should indicate a regular file", + NativeIO.Stat.S_IFREG, stat.getMode() & NativeIO.Stat.S_IFMT); + } catch (Throwable t) { + thrown.set(t); + } + } + } + }; + statters.add(statter); + statter.start(); + } + for (Thread t : statters) { + t.join(); + } + + fos.close(); + + if (thrown.get() != null) { + throw new RuntimeException(thrown.get()); + } + } + @Test public void testFstatClosedFd() throws Exception { FileOutputStream fos = new FileOutputStream(