Block process execution with seccomp on linux/amd64

Block execve(), fork(), and vfork() system calls, returning EACCES instead,
on kernels that support seccomp-bpf: either via seccomp() or falling back
to prctl().

Only linux/amd64 is supported. This feature can be disabled (in case
of problems) with bootstrap.seccomp=false.

Closes #13753

Squashed commit of the following:

commit 92cee05c72b49e532d41be7b16709e1c9f919fa9
Author: Robert Muir <rmuir@apache.org>
Date:   Thu Sep 24 10:12:51 2015 -0400

    Add a note about why we don't parse uname() or anything

commit b427971f45cbda4d0b964ddc4a55fae638880335
Author: Robert Muir <rmuir@apache.org>
Date:   Thu Sep 24 09:44:31 2015 -0400

    style only: we already pull errno into a local, use it for catch-all case

commit ddf93305525ed1546baf91f7902148a8f5b1ad06
Author: Robert Muir <rmuir@apache.org>
Date:   Thu Sep 24 08:36:01 2015 -0400

    add TODO

commit f29d1b7b809a9d4c1fcf15f6064e43f7d1b24696
Author: Robert Muir <rmuir@apache.org>
Date:   Thu Sep 24 08:33:28 2015 -0400

    Add full stacktrace at debug level always

commit a3c991ff8b0b16dc5e128af8fb3dfa6346c6d6f1
Author: Robert Muir <rmuir@apache.org>
Date:   Thu Sep 24 00:08:19 2015 -0400

    Add missing check just in case.

commit 628ed9c77603699aa9c67890fe7632b0e662a911
Author: Robert Muir <rmuir@apache.org>
Date:   Wed Sep 23 22:47:16 2015 -0400

    Add public getter, for stats or whatever if they need to know this

commit 3e2265b5f89d42043d9a07d4525ce42e2cb1c727
Author: Robert Muir <rmuir@apache.org>
Date:   Wed Sep 23 22:43:06 2015 -0400

    Enable use of seccomp(2) on Linux 3.17+ which provides more protection.
    Add nice errors.
    Add all kinds of checks and paranoia.
    Add documentation.
    Add boolean switch.

commit 0e421f7fa2d5236c8fa2cd073bcb616f5bcd2d23
Author: Robert Muir <rmuir@apache.org>
Date:   Wed Sep 23 21:36:32 2015 -0400

    Add defensive checks and nice error messages

commit 6231c3b7c96a81af8460cde30135e077f60a3f39
Author: Robert Muir <rmuir@apache.org>
Date:   Wed Sep 23 20:52:40 2015 -0400

    clean up JNA and BPF. block fork and vfork too.

commit bb31e8a6ef03ceeb1d5137c84d50378c270af85a
Author: Robert Muir <rmuir@apache.org>
Date:   Wed Sep 23 19:00:32 2015 -0400

    order is LE already for the JNA buffer, but be explicit about it

commit 10456d2f08f12ddc3d60989acb86b37be6a4b12b
Author: Robert Muir <rmuir@apache.org>
Date:   Wed Sep 23 17:47:07 2015 -0400

    block process execution with seccomp on linux/amd64
This commit is contained in:
Robert Muir 2015-09-24 12:17:21 -04:00
parent d121550a4f
commit 8b88a69d9c
7 changed files with 392 additions and 3 deletions

View File

@ -79,7 +79,7 @@ final class Bootstrap {
} }
/** initialize native resources */ /** initialize native resources */
public static void initializeNatives(boolean mlockAll, boolean ctrlHandler) { public static void initializeNatives(boolean mlockAll, boolean seccomp, boolean ctrlHandler) {
final ESLogger logger = Loggers.getLogger(Bootstrap.class); final ESLogger logger = Loggers.getLogger(Bootstrap.class);
// check if the user is running as root, and bail // check if the user is running as root, and bail
@ -91,6 +91,11 @@ final class Bootstrap {
} }
} }
// enable secure computing mode
if (seccomp) {
Natives.trySeccomp();
}
// mlockall if requested // mlockall if requested
if (mlockAll) { if (mlockAll) {
if (Constants.WINDOWS) { if (Constants.WINDOWS) {
@ -134,7 +139,8 @@ final class Bootstrap {
private void setup(boolean addShutdownHook, Settings settings, Environment environment) throws Exception { private void setup(boolean addShutdownHook, Settings settings, Environment environment) throws Exception {
initializeNatives(settings.getAsBoolean("bootstrap.mlockall", false), initializeNatives(settings.getAsBoolean("bootstrap.mlockall", false),
settings.getAsBoolean("bootstrap.ctrlhandler", true)); settings.getAsBoolean("bootstrap.seccomp", true),
settings.getAsBoolean("bootstrap.ctrlhandler", true));
// initialize probes before the security manager is installed // initialize probes before the security manager is installed
initializeProbes(); initializeProbes();

View File

@ -43,4 +43,11 @@ public final class BootstrapInfo {
public static boolean isMemoryLocked() { public static boolean isMemoryLocked() {
return Natives.isMemoryLocked(); return Natives.isMemoryLocked();
} }
/**
* Returns true if secure computing mode is enabled (linux/amd64 only)
*/
public static boolean isSeccompInstalled() {
return Natives.isSeccompInstalled();
}
} }

View File

@ -41,6 +41,8 @@ class JNANatives {
// Set to true, in case native mlockall call was successful // Set to true, in case native mlockall call was successful
static boolean LOCAL_MLOCKALL = false; static boolean LOCAL_MLOCKALL = false;
// Set to true, in case native seccomp call was successful
static boolean LOCAL_SECCOMP = false;
static void tryMlockall() { static void tryMlockall() {
int errno = Integer.MIN_VALUE; int errno = Integer.MIN_VALUE;
@ -170,4 +172,19 @@ class JNANatives {
} }
} }
static void trySeccomp() {
if (Constants.LINUX && "amd64".equals(Constants.OS_ARCH)) {
try {
Seccomp.installFilter();
LOCAL_SECCOMP = true;
} catch (Exception e) {
// this is likely to happen unless the kernel is newish, its a best effort at the moment
// so we log stacktrace at debug for now...
if (logger.isDebugEnabled()) {
logger.debug("unable to install seccomp filter", e);
}
logger.warn("unable to install seccomp filter: " + e.getMessage());
}
}
}
} }

View File

@ -88,4 +88,19 @@ final class Natives {
} }
return JNANatives.LOCAL_MLOCKALL; return JNANatives.LOCAL_MLOCKALL;
} }
static void trySeccomp() {
if (!JNA_AVAILABLE) {
logger.warn("cannot install seccomp filters because JNA is not available");
return;
}
JNANatives.trySeccomp();
}
static boolean isSeccompInstalled() {
if (!JNA_AVAILABLE) {
return false;
}
return JNANatives.LOCAL_SECCOMP;
}
} }

View File

@ -0,0 +1,271 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.bootstrap;
import com.sun.jna.Library;
import com.sun.jna.Memory;
import com.sun.jna.Native;
import com.sun.jna.Pointer;
import com.sun.jna.Structure;
import org.apache.lucene.util.Constants;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.logging.Loggers;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.Arrays;
import java.util.List;
/**
* Installs a limited form of Linux secure computing mode (filter mode).
* This filters system calls to block process execution.
* <p>
* This is only supported on the amd64 architecture, on Linux kernels 3.5 or above, and requires
* {@code CONFIG_SECCOMP} and {@code CONFIG_SECCOMP_FILTER} compiled into the kernel.
* <p>
* Filters are installed using either {@code seccomp(2)} (3.17+) or {@code prctl(2)} (3.5+). {@code seccomp(2)}
* is preferred, as it allows filters to be applied to any existing threads in the process, and one motivation
* here is to protect against bugs in the JVM. Otherwise, code will fall back to the {@code prctl(2)} method
* which will at least protect elasticsearch application threads.
* <p>
* The filters will return {@code EACCES} (Access Denied) for the following system calls:
* <ul>
* <li>{@code execve}</li>
* <li>{@code fork}</li>
* <li>{@code vfork}</li>
* </ul>
* <p>
* This is not intended as a sandbox. It is another level of security, mostly intended to annoy
* security researchers and make their lives more difficult in achieving "remote execution" exploits.
* @see <a href="http://www.kernel.org/doc/Documentation/prctl/seccomp_filter.txt">
* http://www.kernel.org/doc/Documentation/prctl/seccomp_filter.txt</a>
*/
// only supported on linux/amd64
// not an example of how to write code!!!
final class Seccomp {
private static final ESLogger logger = Loggers.getLogger(Seccomp.class);
/** we use an explicit interface for native methods, for varargs support */
static interface LinuxLibrary extends Library {
/**
* maps to prctl(2)
*/
int prctl(int option, long arg2, long arg3, long arg4, long arg5);
/**
* used to call seccomp(2), its too new...
* this is the only way, DONT use it on some other architecture unless you know wtf you are doing
*/
long syscall(long number, Object... args);
};
// null if something goes wrong.
static final LinuxLibrary libc;
static {
LinuxLibrary lib = null;
try {
lib = (LinuxLibrary) Native.loadLibrary("c", LinuxLibrary.class);
} catch (UnsatisfiedLinkError e) {
logger.warn("unable to link C library. native methods (seccomp) will be disabled.", e);
}
libc = lib;
}
/** the preferred method is seccomp(2), since we can apply to all threads of the process */
static final int SECCOMP_SYSCALL_NR = 317; // since Linux 3.17
static final int SECCOMP_SET_MODE_FILTER = 1; // since Linux 3.17
static final int SECCOMP_FILTER_FLAG_TSYNC = 1; // since Linux 3.17
/** otherwise, we can use prctl(2), which will at least protect ES application threads */
static final int PR_GET_NO_NEW_PRIVS = 39; // since Linux 3.5
static final int PR_SET_NO_NEW_PRIVS = 38; // since Linux 3.5
static final int PR_GET_SECCOMP = 21; // since Linux 2.6.23
static final int PR_SET_SECCOMP = 22; // since Linux 2.6.23
static final int SECCOMP_MODE_FILTER = 2; // since Linux Linux 3.5
/** corresponds to struct sock_filter */
static final class SockFilter {
short code; // insn
byte jt; // number of insn to jump (skip) if true
byte jf; // number of insn to jump (skip) if false
int k; // additional data
SockFilter(short code, byte jt, byte jf, int k) {
this.code = code;
this.jt = jt;
this.jf = jf;
this.k = k;
}
}
/** corresponds to struct sock_fprog */
public static final class SockFProg extends Structure implements Structure.ByReference {
public short len; // number of filters
public Pointer filter; // filters
public SockFProg(SockFilter filters[]) {
len = (short) filters.length;
// serialize struct sock_filter * explicitly, its less confusing than the JNA magic we would need
Memory filter = new Memory(len * 8);
ByteBuffer bbuf = filter.getByteBuffer(0, len * 8);
bbuf.order(ByteOrder.nativeOrder()); // little endian
for (SockFilter f : filters) {
bbuf.putShort(f.code);
bbuf.put(f.jt);
bbuf.put(f.jf);
bbuf.putInt(f.k);
}
this.filter = filter;
}
@Override
protected List<String> getFieldOrder() {
return Arrays.asList(new String[] { "len", "filter" });
}
}
// BPF "macros" and constants
static final int BPF_LD = 0x00;
static final int BPF_W = 0x00;
static final int BPF_ABS = 0x20;
static final int BPF_JMP = 0x05;
static final int BPF_JEQ = 0x10;
static final int BPF_JGE = 0x30;
static final int BPF_JGT = 0x20;
static final int BPF_RET = 0x06;
static final int BPF_K = 0x00;
static SockFilter BPF_STMT(int code, int k) {
return new SockFilter((short) code, (byte) 0, (byte) 0, k);
}
static SockFilter BPF_JUMP(int code, int k, int jt, int jf) {
return new SockFilter((short) code, (byte) jt, (byte) jf, k);
}
static final int AUDIT_ARCH_X86_64 = 0xC000003E;
static final int SECCOMP_RET_ERRNO = 0x00050000;
static final int SECCOMP_RET_DATA = 0x0000FFFF;
static final int SECCOMP_RET_ALLOW = 0x7FFF0000;
// some errno constants for error checking/handling
static final int EACCES = 0x0D;
static final int EFAULT = 0x0E;
static final int EINVAL = 0x16;
static final int ENOSYS = 0x26;
// offsets (arch dependent) that our BPF checks
static final int SECCOMP_DATA_NR_OFFSET = 0x00;
static final int SECCOMP_DATA_ARCH_OFFSET = 0x04;
// currently this range is blocked (inclusive):
// execve is really the only one needed but why let someone fork a 30G heap? (not really what happens)
// ...
// 57: fork
// 58: vfork
// 59: execve
// ...
static final int BLACKLIST_START = 57;
static final int BLACKLIST_END = 59;
// TODO: execveat()? its less of a risk since the jvm does not use it...
/** try to install our filters */
static void installFilter() {
// first be defensive: we can give nice errors this way, at the very least.
// also, some of these security features get backported to old versions, checking kernel version here is a big no-no!
boolean supported = Constants.LINUX && "amd64".equals(Constants.OS_ARCH);
if (supported == false) {
throw new IllegalStateException("bug: should not be trying to initialize seccomp for an unsupported architecture");
}
// we couldn't link methods, could be some really ancient kernel (e.g. < 2.1.57) or some bug
if (libc == null) {
throw new UnsupportedOperationException("seccomp unavailable: could not link methods. requires kernel 3.5+ with CONFIG_SECCOMP and CONFIG_SECCOMP_FILTER compiled in");
}
// check for kernel version
if (libc.prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0) < 0) {
int errno = Native.getLastError();
switch (errno) {
case ENOSYS: throw new UnsupportedOperationException("seccomp unavailable: requires kernel 3.5+ with CONFIG_SECCOMP and CONFIG_SECCOMP_FILTER compiled in");
default: throw new UnsupportedOperationException("prctl(PR_GET_NO_NEW_PRIVS): " + JNACLibrary.strerror(errno));
}
}
// check for SECCOMP
if (libc.prctl(PR_GET_SECCOMP, 0, 0, 0, 0) < 0) {
int errno = Native.getLastError();
switch (errno) {
case EINVAL: throw new UnsupportedOperationException("seccomp unavailable: CONFIG_SECCOMP not compiled into kernel, CONFIG_SECCOMP and CONFIG_SECCOMP_FILTER are needed");
default: throw new UnsupportedOperationException("prctl(PR_GET_SECCOMP): " + JNACLibrary.strerror(errno));
}
}
// check for SECCOMP_MODE_FILTER
if (libc.prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, 0, 0, 0) < 0) {
int errno = Native.getLastError();
switch (errno) {
case EFAULT: break; // available
case EINVAL: throw new UnsupportedOperationException("seccomp unavailable: CONFIG_SECCOMP_FILTER not compiled into kernel, CONFIG_SECCOMP and CONFIG_SECCOMP_FILTER are needed");
default: throw new UnsupportedOperationException("prctl(PR_SET_SECCOMP): " + JNACLibrary.strerror(errno));
}
}
// ok, now set PR_SET_NO_NEW_PRIVS, needed to be able to set a seccomp filter as ordinary user
if (libc.prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) {
throw new UnsupportedOperationException("prctl(PR_SET_NO_NEW_PRIVS): " + JNACLibrary.strerror(Native.getLastError()));
}
// BPF installed to check arch, then syscall range. See https://www.kernel.org/doc/Documentation/prctl/seccomp_filter.txt for details.
SockFilter insns[] = {
/* 1 */ BPF_STMT(BPF_LD + BPF_W + BPF_ABS, SECCOMP_DATA_ARCH_OFFSET), // if (arch != amd64) goto fail;
/* 2 */ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, AUDIT_ARCH_X86_64, 0, 3), //
/* 3 */ BPF_STMT(BPF_LD + BPF_W + BPF_ABS, SECCOMP_DATA_NR_OFFSET), // if (syscall < BLACKLIST_START) goto pass;
/* 4 */ BPF_JUMP(BPF_JMP + BPF_JGE + BPF_K, BLACKLIST_START, 0, 2), //
/* 5 */ BPF_JUMP(BPF_JMP + BPF_JGT + BPF_K, BLACKLIST_END, 1, 0), // if (syscall > BLACKLIST_END) goto pass;
/* 6 */ BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_ERRNO | (EACCES & SECCOMP_RET_DATA)), // fail: return EACCES;
/* 7 */ BPF_STMT(BPF_RET + BPF_K, SECCOMP_RET_ALLOW) // pass: return OK;
};
// seccomp takes a long, so we pass it one explicitly to keep the JNA simple
SockFProg prog = new SockFProg(insns);
prog.write();
long pointer = Pointer.nativeValue(prog.getPointer());
// install filter, if this works, after this there is no going back!
// first try it with seccomp(SECCOMP_SET_MODE_FILTER), falling back to prctl()
if (libc.syscall(SECCOMP_SYSCALL_NR, SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, pointer) != 0) {
int errno1 = Native.getLastError();
if (logger.isDebugEnabled()) {
logger.debug("seccomp(SECCOMP_SET_MODE_FILTER): " + JNACLibrary.strerror(errno1) + ", falling back to prctl(PR_SET_SECCOMP)...");
}
if (libc.prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, pointer, 0, 0) < 0) {
int errno2 = Native.getLastError();
throw new UnsupportedOperationException("seccomp(SECCOMP_SET_MODE_FILTER): " + JNACLibrary.strerror(errno1) +
", prctl(PR_SET_SECCOMP): " + JNACLibrary.strerror(errno2));
}
}
// now check that the filter was really installed, we should be in filter mode.
if (libc.prctl(PR_GET_SECCOMP, 0, 0, 0, 0) != 2) {
throw new UnsupportedOperationException("seccomp filter installation did not really succeed. seccomp(PR_GET_SECCOMP): " + JNACLibrary.strerror(Native.getLastError()));
}
}
}

View File

@ -52,7 +52,7 @@ public class BootstrapForTesting {
static { static {
// just like bootstrap, initialize natives, then SM // just like bootstrap, initialize natives, then SM
Bootstrap.initializeNatives(true, true); Bootstrap.initializeNatives(true, true, true);
// initialize probes // initialize probes
Bootstrap.initializeProbes(); Bootstrap.initializeProbes();

View File

@ -0,0 +1,73 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.bootstrap;
import org.elasticsearch.test.ESTestCase;
/** Simple tests seccomp filter is working. */
public class SeccompTests extends ESTestCase {
@Override
public void setUp() throws Exception {
super.setUp();
assumeTrue("requires seccomp filter installation", Natives.isSeccompInstalled());
// otherwise security manager will block the execution, no fun
assumeTrue("cannot test with security manager enabled", System.getSecurityManager() == null);
}
public void testNoExecution() throws Exception {
try {
Runtime.getRuntime().exec("ls");
fail("should not have been able to execute!");
} catch (Exception expected) {
// we can't guarantee how its converted, currently its an IOException, like this:
/*
java.io.IOException: Cannot run program "ls": error=13, Permission denied
at __randomizedtesting.SeedInfo.seed([65E6C4BED11899E:FC6E1CA6AA2DB634]:0)
at java.lang.ProcessBuilder.start(ProcessBuilder.java:1048)
at java.lang.Runtime.exec(Runtime.java:620)
...
Caused by: java.io.IOException: error=13, Permission denied
at java.lang.UNIXProcess.forkAndExec(Native Method)
at java.lang.UNIXProcess.<init>(UNIXProcess.java:248)
at java.lang.ProcessImpl.start(ProcessImpl.java:134)
at java.lang.ProcessBuilder.start(ProcessBuilder.java:1029)
...
*/
}
}
// make sure thread inherits this too (its documented that way)
public void testNoExecutionFromThread() throws Exception {
Thread t = new Thread() {
@Override
public void run() {
try {
Runtime.getRuntime().exec("ls");
fail("should not have been able to execute!");
} catch (Exception expected) {
// ok
}
}
};
t.start();
t.join();
}
}