Merge pull request #14609 from rmuir/32bit

fixup issues with 32-bit jvm
This commit is contained in:
Robert Muir 2015-11-08 21:23:37 -05:00
commit 336ac33eed
4 changed files with 87 additions and 48 deletions

View File

@ -20,6 +20,7 @@
package org.elasticsearch.bootstrap; package org.elasticsearch.bootstrap;
import com.sun.jna.Native; import com.sun.jna.Native;
import com.sun.jna.NativeLong;
import com.sun.jna.Structure; import com.sun.jna.Structure;
import org.apache.lucene.util.Constants; import org.apache.lucene.util.Constants;
@ -55,8 +56,8 @@ final class JNACLibrary {
/** corresponds to struct rlimit */ /** corresponds to struct rlimit */
public static final class Rlimit extends Structure implements Structure.ByReference { public static final class Rlimit extends Structure implements Structure.ByReference {
public long rlim_cur = 0; public NativeLong rlim_cur = new NativeLong(0);
public long rlim_max = 0; public NativeLong rlim_max = new NativeLong(0);
@Override @Override
protected List<String> getFieldOrder() { protected List<String> getFieldOrder() {

View File

@ -71,8 +71,8 @@ class JNANatives {
JNACLibrary.Rlimit rlimit = new JNACLibrary.Rlimit(); JNACLibrary.Rlimit rlimit = new JNACLibrary.Rlimit();
if (JNACLibrary.getrlimit(JNACLibrary.RLIMIT_MEMLOCK, rlimit) == 0) { if (JNACLibrary.getrlimit(JNACLibrary.RLIMIT_MEMLOCK, rlimit) == 0) {
rlimitSuccess = true; rlimitSuccess = true;
softLimit = rlimit.rlim_cur; softLimit = rlimit.rlim_cur.longValue();
hardLimit = rlimit.rlim_max; hardLimit = rlimit.rlim_max.longValue();
} else { } else {
logger.warn("Unable to retrieve resource limits: " + JNACLibrary.strerror(Native.getLastError())); logger.warn("Unable to retrieve resource limits: " + JNACLibrary.strerror(Native.getLastError()));
} }

View File

@ -22,6 +22,7 @@ package org.elasticsearch.bootstrap;
import com.sun.jna.Library; import com.sun.jna.Library;
import com.sun.jna.Memory; import com.sun.jna.Memory;
import com.sun.jna.Native; import com.sun.jna.Native;
import com.sun.jna.NativeLong;
import com.sun.jna.Pointer; import com.sun.jna.Pointer;
import com.sun.jna.Structure; import com.sun.jna.Structure;
import com.sun.jna.ptr.PointerByReference; import com.sun.jna.ptr.PointerByReference;
@ -38,7 +39,9 @@ import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap;
import java.util.List; import java.util.List;
import java.util.Map;
/** /**
* Installs a limited form of secure computing mode, * Installs a limited form of secure computing mode,
@ -46,7 +49,7 @@ import java.util.List;
* <p> * <p>
* This is only supported on the Linux, Solaris, and Mac OS X operating systems. * This is only supported on the Linux, Solaris, and Mac OS X operating systems.
* <p> * <p>
* On Linux it currently supports on the amd64 architecture, on Linux kernels 3.5 or above, and requires * On Linux it currently supports amd64 and i386 architectures, requires Linux kernel 3.5 or above, and requires
* {@code CONFIG_SECCOMP} and {@code CONFIG_SECCOMP_FILTER} compiled into the kernel. * {@code CONFIG_SECCOMP} and {@code CONFIG_SECCOMP_FILTER} compiled into the kernel.
* <p> * <p>
* On Linux BPF Filters are installed using either {@code seccomp(2)} (3.17+) or {@code prctl(2)} (3.5+). {@code seccomp(2)} * On Linux BPF Filters are installed using either {@code seccomp(2)} (3.17+) or {@code prctl(2)} (3.5+). {@code seccomp(2)}
@ -95,12 +98,12 @@ final class Seccomp {
/** /**
* maps to prctl(2) * maps to prctl(2)
*/ */
int prctl(int option, long arg2, long arg3, long arg4, long arg5); int prctl(int option, NativeLong arg2, NativeLong arg3, NativeLong arg4, NativeLong arg5);
/** /**
* used to call seccomp(2), its too new... * used to call seccomp(2), its too new...
* this is the only way, DONT use it on some other architecture unless you know wtf you are doing * this is the only way, DONT use it on some other architecture unless you know wtf you are doing
*/ */
long syscall(long number, Object... args); NativeLong syscall(NativeLong number, Object... args);
}; };
// null if unavailable or something goes wrong. // null if unavailable or something goes wrong.
@ -119,7 +122,6 @@ final class Seccomp {
} }
/** the preferred method is seccomp(2), since we can apply to all threads of the process */ /** the preferred method is seccomp(2), since we can apply to all threads of the process */
static final int SECCOMP_SYSCALL_NR = 317; // since Linux 3.17
static final int SECCOMP_SET_MODE_FILTER = 1; // since Linux 3.17 static final int SECCOMP_SET_MODE_FILTER = 1; // since Linux 3.17
static final int SECCOMP_FILTER_FLAG_TSYNC = 1; // since Linux 3.17 static final int SECCOMP_FILTER_FLAG_TSYNC = 1; // since Linux 3.17
@ -190,7 +192,6 @@ final class Seccomp {
return new SockFilter((short) code, (byte) jt, (byte) jf, k); return new SockFilter((short) code, (byte) jt, (byte) jf, k);
} }
static final int AUDIT_ARCH_X86_64 = 0xC000003E;
static final int SECCOMP_RET_ERRNO = 0x00050000; static final int SECCOMP_RET_ERRNO = 0x00050000;
static final int SECCOMP_RET_DATA = 0x0000FFFF; static final int SECCOMP_RET_DATA = 0x0000FFFF;
static final int SECCOMP_RET_ALLOW = 0x7FFF0000; static final int SECCOMP_RET_ALLOW = 0x7FFF0000;
@ -201,29 +202,63 @@ final class Seccomp {
static final int EINVAL = 0x16; static final int EINVAL = 0x16;
static final int ENOSYS = 0x26; static final int ENOSYS = 0x26;
// offsets (arch dependent) that our BPF checks // offsets that our BPF checks
// check with offsetof() when adding a new arch, move to Arch if different.
static final int SECCOMP_DATA_NR_OFFSET = 0x00; static final int SECCOMP_DATA_NR_OFFSET = 0x00;
static final int SECCOMP_DATA_ARCH_OFFSET = 0x04; static final int SECCOMP_DATA_ARCH_OFFSET = 0x04;
// currently these ranges are blocked (inclusive): static class Arch {
// execve is really the only one needed but why let someone fork a 30G heap? (not really what happens) /** AUDIT_ARCH_XXX constant from linux/audit.h */
// ... final int audit;
// 57: fork /** syscall limit (necessary for blacklisting on amd64, to ban 32-bit syscalls) */
// 58: vfork final int limit;
// 59: execve /** __NR_fork */
// ... final int fork;
// 322: execveat /** __NR_vfork */
// ... final int vfork;
static final int NR_SYSCALL_FORK = 57; /** __NR_execve */
static final int NR_SYSCALL_EXECVE = 59; final int execve;
static final int NR_SYSCALL_EXECVEAT = 322; // since Linux 3.19 /** __NR_execveat */
static final int NR_SYSCALL_TUXCALL = 184; // should return ENOSYS final int execveat;
/** __NR_seccomp */
final int seccomp;
Arch(int audit, int limit, int fork, int vfork, int execve, int execveat, int seccomp) {
this.audit = audit;
this.limit = limit;
this.fork = fork;
this.vfork = vfork;
this.execve = execve;
this.execveat = execveat;
this.seccomp = seccomp;
}
}
/** supported architectures map keyed by os.arch */
private static final Map<String,Arch> ARCHITECTURES;
static {
Map<String,Arch> m = new HashMap<>();
m.put("amd64", new Arch(0xC000003E, 0x3FFFFFFF, 57, 58, 59, 322, 317));
m.put("i386", new Arch(0x40000003, 0xFFFFFFFF, 2, 190, 11, 358, 354));
ARCHITECTURES = Collections.unmodifiableMap(m);
}
/** invokes prctl() from linux libc library */
private static int linux_prctl(int option, long arg2, long arg3, long arg4, long arg5) {
return linux_libc.prctl(option, new NativeLong(arg2), new NativeLong(arg3), new NativeLong(arg4), new NativeLong(arg5));
}
/** invokes syscall() from linux libc library */
private static long linux_syscall(long number, Object... args) {
return linux_libc.syscall(new NativeLong(number), args).longValue();
}
/** try to install our BPF filters via seccomp() or prctl() to block execution */ /** try to install our BPF filters via seccomp() or prctl() to block execution */
private static int linuxImpl() { private static int linuxImpl() {
// first be defensive: we can give nice errors this way, at the very least. // first be defensive: we can give nice errors this way, at the very least.
// also, some of these security features get backported to old versions, checking kernel version here is a big no-no! // also, some of these security features get backported to old versions, checking kernel version here is a big no-no!
boolean supported = Constants.LINUX && "amd64".equals(Constants.OS_ARCH); final Arch arch = ARCHITECTURES.get(Constants.OS_ARCH);
boolean supported = Constants.LINUX && arch != null;
if (supported == false) { if (supported == false) {
throw new UnsupportedOperationException("seccomp unavailable: '" + Constants.OS_ARCH + "' architecture unsupported"); throw new UnsupportedOperationException("seccomp unavailable: '" + Constants.OS_ARCH + "' architecture unsupported");
} }
@ -237,7 +272,7 @@ final class Seccomp {
// check that unimplemented syscalls actually return ENOSYS // check that unimplemented syscalls actually return ENOSYS
// you never know (e.g. https://code.google.com/p/chromium/issues/detail?id=439795) // you never know (e.g. https://code.google.com/p/chromium/issues/detail?id=439795)
if (linux_libc.syscall(NR_SYSCALL_TUXCALL) >= 0 || Native.getLastError() != ENOSYS) { if (linux_syscall(999) >= 0 || Native.getLastError() != ENOSYS) {
throw new UnsupportedOperationException("seccomp unavailable: your kernel is buggy and you should upgrade"); throw new UnsupportedOperationException("seccomp unavailable: your kernel is buggy and you should upgrade");
} }
@ -246,7 +281,7 @@ final class Seccomp {
final int bogusArg = 0xf7a46a5c; final int bogusArg = 0xf7a46a5c;
// test seccomp(BOGUS) // test seccomp(BOGUS)
long ret = linux_libc.syscall(SECCOMP_SYSCALL_NR, bogusArg); long ret = linux_syscall(arch.seccomp, bogusArg);
if (ret != -1) { if (ret != -1) {
throw new UnsupportedOperationException("seccomp unavailable: seccomp(BOGUS_OPERATION) returned " + ret); throw new UnsupportedOperationException("seccomp unavailable: seccomp(BOGUS_OPERATION) returned " + ret);
} else { } else {
@ -259,7 +294,7 @@ final class Seccomp {
} }
// test seccomp(VALID, BOGUS) // test seccomp(VALID, BOGUS)
ret = linux_libc.syscall(SECCOMP_SYSCALL_NR, SECCOMP_SET_MODE_FILTER, bogusArg); ret = linux_syscall(arch.seccomp, SECCOMP_SET_MODE_FILTER, bogusArg);
if (ret != -1) { if (ret != -1) {
throw new UnsupportedOperationException("seccomp unavailable: seccomp(SECCOMP_SET_MODE_FILTER, BOGUS_FLAG) returned " + ret); throw new UnsupportedOperationException("seccomp unavailable: seccomp(SECCOMP_SET_MODE_FILTER, BOGUS_FLAG) returned " + ret);
} else { } else {
@ -272,7 +307,7 @@ final class Seccomp {
} }
// test prctl(BOGUS) // test prctl(BOGUS)
ret = linux_libc.prctl(bogusArg, 0, 0, 0, 0); ret = linux_prctl(bogusArg, 0, 0, 0, 0);
if (ret != -1) { if (ret != -1) {
throw new UnsupportedOperationException("seccomp unavailable: prctl(BOGUS_OPTION) returned " + ret); throw new UnsupportedOperationException("seccomp unavailable: prctl(BOGUS_OPTION) returned " + ret);
} else { } else {
@ -287,7 +322,7 @@ final class Seccomp {
// now just normal defensive checks // now just normal defensive checks
// check for GET_NO_NEW_PRIVS // check for GET_NO_NEW_PRIVS
switch (linux_libc.prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0)) { switch (linux_prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0)) {
case 0: break; // not yet set case 0: break; // not yet set
case 1: break; // already set by caller case 1: break; // already set by caller
default: default:
@ -299,7 +334,7 @@ final class Seccomp {
} }
} }
// check for SECCOMP // check for SECCOMP
switch (linux_libc.prctl(PR_GET_SECCOMP, 0, 0, 0, 0)) { switch (linux_prctl(PR_GET_SECCOMP, 0, 0, 0, 0)) {
case 0: break; // not yet set case 0: break; // not yet set
case 2: break; // already in filter mode by caller case 2: break; // already in filter mode by caller
default: default:
@ -311,7 +346,7 @@ final class Seccomp {
} }
} }
// check for SECCOMP_MODE_FILTER // check for SECCOMP_MODE_FILTER
if (linux_libc.prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, 0, 0, 0) != 0) { if (linux_prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, 0, 0, 0) != 0) {
int errno = Native.getLastError(); int errno = Native.getLastError();
switch (errno) { switch (errno) {
case EFAULT: break; // available case EFAULT: break; // available
@ -321,27 +356,28 @@ final class Seccomp {
} }
// ok, now set PR_SET_NO_NEW_PRIVS, needed to be able to set a seccomp filter as ordinary user // ok, now set PR_SET_NO_NEW_PRIVS, needed to be able to set a seccomp filter as ordinary user
if (linux_libc.prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) != 0) { if (linux_prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) != 0) {
throw new UnsupportedOperationException("prctl(PR_SET_NO_NEW_PRIVS): " + JNACLibrary.strerror(Native.getLastError())); throw new UnsupportedOperationException("prctl(PR_SET_NO_NEW_PRIVS): " + JNACLibrary.strerror(Native.getLastError()));
} }
// check it worked // check it worked
if (linux_libc.prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0) != 1) { if (linux_prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0) != 1) {
throw new UnsupportedOperationException("seccomp filter did not really succeed: prctl(PR_GET_NO_NEW_PRIVS): " + JNACLibrary.strerror(Native.getLastError())); throw new UnsupportedOperationException("seccomp filter did not really succeed: prctl(PR_GET_NO_NEW_PRIVS): " + JNACLibrary.strerror(Native.getLastError()));
} }
// BPF installed to check arch, then syscall range. See https://www.kernel.org/doc/Documentation/prctl/seccomp_filter.txt for details. // BPF installed to check arch, limit, then syscall. See https://www.kernel.org/doc/Documentation/prctl/seccomp_filter.txt for details.
SockFilter insns[] = { SockFilter insns[] = {
/* 1 */ BPF_STMT(BPF_LD + BPF_W + BPF_ABS, SECCOMP_DATA_ARCH_OFFSET), // /* 1 */ BPF_STMT(BPF_LD + BPF_W + BPF_ABS, SECCOMP_DATA_ARCH_OFFSET), //
/* 2 */ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, AUDIT_ARCH_X86_64, 0, 4), // if (arch != amd64) goto fail; /* 2 */ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, arch.audit, 0, 7), // if (arch != audit) goto fail;
/* 3 */ BPF_STMT(BPF_LD + BPF_W + BPF_ABS, SECCOMP_DATA_NR_OFFSET), // /* 3 */ BPF_STMT(BPF_LD + BPF_W + BPF_ABS, SECCOMP_DATA_NR_OFFSET), //
/* 4 */ BPF_JUMP(BPF_JMP + BPF_JGE + BPF_K, NR_SYSCALL_FORK, 0, 3), // if (syscall < SYSCALL_FORK) goto pass; /* 4 */ BPF_JUMP(BPF_JMP + BPF_JGT + BPF_K, arch.limit, 5, 0), // if (syscall > LIMIT) goto fail;
/* 5 */ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, NR_SYSCALL_EXECVEAT, 1, 0), // if (syscall == SYSCALL_EXECVEAT) goto fail; /* 5 */ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, arch.fork, 4, 0), // if (syscall == FORK) goto fail;
/* 6 */ BPF_JUMP(BPF_JMP + BPF_JGT + BPF_K, NR_SYSCALL_EXECVE, 1, 0), // if (syscall > SYSCALL_EXECVE) goto pass; /* 6 */ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, arch.vfork, 3, 0), // if (syscall == VFORK) goto fail;
/* 7 */ BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_ERRNO | (EACCES & SECCOMP_RET_DATA)), // fail: return EACCES; /* 7 */ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, arch.execve, 2, 0), // if (syscall == EXECVE) goto fail;
/* 8 */ BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_ALLOW) // pass: return OK; /* 8 */ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, arch.execveat, 1, 0), // if (syscall == EXECVEAT) goto fail;
/* 9 */ BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_ALLOW), // pass: return OK;
/* 10 */ BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_ERRNO | (EACCES & SECCOMP_RET_DATA)), // fail: return EACCES;
}; };
// seccomp takes a long, so we pass it one explicitly to keep the JNA simple // seccomp takes a long, so we pass it one explicitly to keep the JNA simple
SockFProg prog = new SockFProg(insns); SockFProg prog = new SockFProg(insns);
prog.write(); prog.write();
@ -350,13 +386,13 @@ final class Seccomp {
int method = 1; int method = 1;
// install filter, if this works, after this there is no going back! // install filter, if this works, after this there is no going back!
// first try it with seccomp(SECCOMP_SET_MODE_FILTER), falling back to prctl() // first try it with seccomp(SECCOMP_SET_MODE_FILTER), falling back to prctl()
if (linux_libc.syscall(SECCOMP_SYSCALL_NR, SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, pointer) != 0) { if (linux_syscall(arch.seccomp, SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, new NativeLong(pointer)) != 0) {
method = 0; method = 0;
int errno1 = Native.getLastError(); int errno1 = Native.getLastError();
if (logger.isDebugEnabled()) { if (logger.isDebugEnabled()) {
logger.debug("seccomp(SECCOMP_SET_MODE_FILTER): " + JNACLibrary.strerror(errno1) + ", falling back to prctl(PR_SET_SECCOMP)..."); logger.debug("seccomp(SECCOMP_SET_MODE_FILTER): " + JNACLibrary.strerror(errno1) + ", falling back to prctl(PR_SET_SECCOMP)...");
} }
if (linux_libc.prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, pointer, 0, 0) != 0) { if (linux_prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, pointer, 0, 0) != 0) {
int errno2 = Native.getLastError(); int errno2 = Native.getLastError();
throw new UnsupportedOperationException("seccomp(SECCOMP_SET_MODE_FILTER): " + JNACLibrary.strerror(errno1) + throw new UnsupportedOperationException("seccomp(SECCOMP_SET_MODE_FILTER): " + JNACLibrary.strerror(errno1) +
", prctl(PR_SET_SECCOMP): " + JNACLibrary.strerror(errno2)); ", prctl(PR_SET_SECCOMP): " + JNACLibrary.strerror(errno2));
@ -364,7 +400,7 @@ final class Seccomp {
} }
// now check that the filter was really installed, we should be in filter mode. // now check that the filter was really installed, we should be in filter mode.
if (linux_libc.prctl(PR_GET_SECCOMP, 0, 0, 0, 0) != 2) { if (linux_prctl(PR_GET_SECCOMP, 0, 0, 0, 0) != 2) {
throw new UnsupportedOperationException("seccomp filter installation did not really succeed. seccomp(PR_GET_SECCOMP): " + JNACLibrary.strerror(Native.getLastError())); throw new UnsupportedOperationException("seccomp filter installation did not really succeed. seccomp(PR_GET_SECCOMP): " + JNACLibrary.strerror(Native.getLastError()));
} }

View File

@ -84,8 +84,10 @@ public class IndexStoreTests extends ESTestCase {
try (final Directory directory = service.newFSDirectory(tempDir, NoLockFactory.INSTANCE)) { try (final Directory directory = service.newFSDirectory(tempDir, NoLockFactory.INSTANCE)) {
if (Constants.WINDOWS) { if (Constants.WINDOWS) {
assertTrue(directory.toString(), directory instanceof MMapDirectory || directory instanceof SimpleFSDirectory); assertTrue(directory.toString(), directory instanceof MMapDirectory || directory instanceof SimpleFSDirectory);
} else { } else if (Constants.JRE_IS_64BIT) {
assertTrue(directory.toString(), directory instanceof FileSwitchDirectory); assertTrue(directory.toString(), directory instanceof FileSwitchDirectory);
} else {
assertTrue(directory.toString(), directory instanceof NIOFSDirectory);
} }
} }
} }