mirror of https://github.com/apache/druid.git
Merge pull request #2429 from metamx/hadoopClasspath
Make hadoop classpath isolation more explicit
This commit is contained in:
commit
2b69e4b00f
|
@ -20,7 +20,9 @@
|
||||||
package io.druid.indexing.common.task;
|
package io.druid.indexing.common.task;
|
||||||
|
|
||||||
import com.google.common.base.Joiner;
|
import com.google.common.base.Joiner;
|
||||||
|
import com.google.common.base.Predicate;
|
||||||
import com.google.common.base.Throwables;
|
import com.google.common.base.Throwables;
|
||||||
|
import com.google.common.collect.Collections2;
|
||||||
import com.google.common.collect.ImmutableList;
|
import com.google.common.collect.ImmutableList;
|
||||||
import com.google.common.collect.Lists;
|
import com.google.common.collect.Lists;
|
||||||
import com.google.inject.Injector;
|
import com.google.inject.Injector;
|
||||||
|
@ -30,11 +32,14 @@ import io.druid.guice.GuiceInjectors;
|
||||||
import io.druid.indexing.common.TaskToolbox;
|
import io.druid.indexing.common.TaskToolbox;
|
||||||
import io.druid.initialization.Initialization;
|
import io.druid.initialization.Initialization;
|
||||||
|
|
||||||
|
import javax.annotation.Nullable;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.lang.reflect.InvocationTargetException;
|
import java.lang.reflect.InvocationTargetException;
|
||||||
import java.lang.reflect.Method;
|
import java.lang.reflect.Method;
|
||||||
|
import java.net.URISyntaxException;
|
||||||
import java.net.URL;
|
import java.net.URL;
|
||||||
import java.net.URLClassLoader;
|
import java.net.URLClassLoader;
|
||||||
|
import java.nio.file.Paths;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
@ -69,6 +74,27 @@ public abstract class HadoopTask extends AbstractTask
|
||||||
return hadoopDependencyCoordinates == null ? null : ImmutableList.copyOf(hadoopDependencyCoordinates);
|
return hadoopDependencyCoordinates == null ? null : ImmutableList.copyOf(hadoopDependencyCoordinates);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This could stand to have a more robust detection methodology.
|
||||||
|
// Right now it just looks for `druid.*\.jar`
|
||||||
|
// This is only used for classpath isolation in the runTask isolation stuff, so it shooouuullldddd be ok.
|
||||||
|
protected static final Predicate<URL> IS_DRUID_URL = new Predicate<URL>()
|
||||||
|
{
|
||||||
|
@Override
|
||||||
|
public boolean apply(@Nullable URL input)
|
||||||
|
{
|
||||||
|
try {
|
||||||
|
if (input == null) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
final String fName = Paths.get(input.toURI()).getFileName().toString();
|
||||||
|
return fName.startsWith("druid") && fName.endsWith(".jar");
|
||||||
|
}
|
||||||
|
catch (URISyntaxException e) {
|
||||||
|
throw Throwables.propagate(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
protected ClassLoader buildClassLoader(final TaskToolbox toolbox) throws Exception
|
protected ClassLoader buildClassLoader(final TaskToolbox toolbox) throws Exception
|
||||||
{
|
{
|
||||||
final List<String> finalHadoopDependencyCoordinates = hadoopDependencyCoordinates != null
|
final List<String> finalHadoopDependencyCoordinates = hadoopDependencyCoordinates != null
|
||||||
|
@ -84,27 +110,35 @@ public abstract class HadoopTask extends AbstractTask
|
||||||
final List<URL> nonHadoopURLs = Lists.newArrayList();
|
final List<URL> nonHadoopURLs = Lists.newArrayList();
|
||||||
nonHadoopURLs.addAll(Arrays.asList(((URLClassLoader) HadoopIndexTask.class.getClassLoader()).getURLs()));
|
nonHadoopURLs.addAll(Arrays.asList(((URLClassLoader) HadoopIndexTask.class.getClassLoader()).getURLs()));
|
||||||
|
|
||||||
final List<URL> driverURLs = Lists.newArrayList();
|
final List<URL> druidURLs = ImmutableList.copyOf(
|
||||||
driverURLs.addAll(nonHadoopURLs);
|
Collections2.filter(nonHadoopURLs, IS_DRUID_URL)
|
||||||
|
);
|
||||||
|
final List<URL> nonHadoopNotDruidURLs = Lists.newArrayList(nonHadoopURLs);
|
||||||
|
nonHadoopNotDruidURLs.removeAll(druidURLs);
|
||||||
|
|
||||||
// put hadoop dependencies last to avoid jets3t & apache.httpcore version conflicts
|
// Start from a fresh slate
|
||||||
|
ClassLoader classLoader = null;
|
||||||
|
classLoader = new URLClassLoader(nonHadoopNotDruidURLs.toArray(new URL[nonHadoopNotDruidURLs.size()]), classLoader);
|
||||||
|
|
||||||
|
// hadoop dependencies come before druid classes because some extensions depend on them
|
||||||
for (final File hadoopDependency :
|
for (final File hadoopDependency :
|
||||||
Initialization.getHadoopDependencyFilesToLoad(
|
Initialization.getHadoopDependencyFilesToLoad(
|
||||||
finalHadoopDependencyCoordinates,
|
finalHadoopDependencyCoordinates,
|
||||||
extensionsConfig
|
extensionsConfig
|
||||||
)) {
|
)) {
|
||||||
final ClassLoader hadoopLoader = Initialization.getClassLoaderForExtension(hadoopDependency);
|
final ClassLoader hadoopLoader = Initialization.getClassLoaderForExtension(hadoopDependency);
|
||||||
driverURLs.addAll(Arrays.asList(((URLClassLoader) hadoopLoader).getURLs()));
|
classLoader = new URLClassLoader(((URLClassLoader) hadoopLoader).getURLs(), classLoader);
|
||||||
}
|
}
|
||||||
|
|
||||||
final URLClassLoader loader = new URLClassLoader(driverURLs.toArray(new URL[driverURLs.size()]), null);
|
classLoader = new URLClassLoader(druidURLs.toArray(new URL[druidURLs.size()]), classLoader);
|
||||||
|
classLoader = new URLClassLoader(extensionURLs.toArray(new URL[extensionURLs.size()]), classLoader);
|
||||||
|
|
||||||
final List<URL> jobUrls = Lists.newArrayList();
|
final List<URL> jobUrls = Lists.newArrayList();
|
||||||
jobUrls.addAll(nonHadoopURLs);
|
jobUrls.addAll(nonHadoopURLs);
|
||||||
jobUrls.addAll(extensionURLs);
|
jobUrls.addAll(extensionURLs);
|
||||||
|
|
||||||
System.setProperty("druid.hadoop.internal.classpath", Joiner.on(File.pathSeparator).join(jobUrls));
|
System.setProperty("druid.hadoop.internal.classpath", Joiner.on(File.pathSeparator).join(jobUrls));
|
||||||
return loader;
|
return classLoader;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -65,7 +65,64 @@ public class PullDependencies implements Runnable
|
||||||
|
|
||||||
private static final Set<String> exclusions = Sets.newHashSet(
|
private static final Set<String> exclusions = Sets.newHashSet(
|
||||||
"io.druid",
|
"io.druid",
|
||||||
"com.metamx.druid"
|
"com.metamx.druid",
|
||||||
|
|
||||||
|
// It is possible that extensions will pull down a lot of jars that are either
|
||||||
|
// duplicates OR conflict with druid jars. In that case, there are two problems that arise
|
||||||
|
//
|
||||||
|
// 1. Large quantity of jars are passed around to things like hadoop when they are not needed (and should not be included)
|
||||||
|
// 2. Classpath priority becomes "mostly correct" and attempted to enforced correctly, but not fully tested
|
||||||
|
//
|
||||||
|
// These jar groups should be included by druid and *not* pulled down in extensions
|
||||||
|
// Note to future developers: This list is hand-crafted and will probably be out of date in the future
|
||||||
|
// A good way to know where to look for errant dependencies is to compare the lib/ directory in the distribution
|
||||||
|
// tarball with the jars included in the extension directories.
|
||||||
|
//
|
||||||
|
// This list is best-effort, and might still pull down more than desired.
|
||||||
|
//
|
||||||
|
// A simple example is that if an extension's dependency uses some-library-123.jar,
|
||||||
|
// druid uses some-library-456.jar, and hadoop uses some-library-666.jar, then we probably want to use some-library-456.jar,
|
||||||
|
// so don't pull down some-library-123.jar, and ask hadoop to load some-library-456.jar.
|
||||||
|
//
|
||||||
|
// In the case where some-library is NOT on this list, both some-library-456.jar and some-library-123.jar will be
|
||||||
|
// on the class path and propagated around the system. Most places TRY to make sure some-library-456.jar has
|
||||||
|
// precedence, but it is easy for this assumption to be violated and for the precedence of some-library-456.jar,
|
||||||
|
// some-library-123.jar and some-library-456.jar to not be properly defined.
|
||||||
|
//
|
||||||
|
// As of this writing there are no special unit tests for classloader issues and library version conflicts.
|
||||||
|
//
|
||||||
|
// Different tasks which are classloader sensitive attempt to maintain a sane order for loading libraries in the
|
||||||
|
// classloader, but it is always possible that something didn't load in the right order. Also we don't want to be
|
||||||
|
// throwing around a ton of jars we don't need to.
|
||||||
|
"asm",
|
||||||
|
"org.ow2.asm",
|
||||||
|
"org.jboss.netty",
|
||||||
|
"com.google.guava",
|
||||||
|
"com.google.code.findbugs",
|
||||||
|
"com.google.protobuf",
|
||||||
|
"com.esotericsoftware.minlog",
|
||||||
|
"log4j",
|
||||||
|
"org.slf4j",
|
||||||
|
"commons-logging",
|
||||||
|
"org.eclipse.jetty",
|
||||||
|
"org.mortbay.jetty",
|
||||||
|
"com.sun.jersey",
|
||||||
|
"com.sun.jersey.contribs",
|
||||||
|
"common-beanutils",
|
||||||
|
"commons-codec",
|
||||||
|
"commons-lang",
|
||||||
|
"commons-cli",
|
||||||
|
"commons-io",
|
||||||
|
"javax.activation",
|
||||||
|
"org.apache.httpcomponents",
|
||||||
|
"org.apache.zookeeper",
|
||||||
|
"org.codehaus.jackson",
|
||||||
|
"com.fasterxml.jackson",
|
||||||
|
"com.fasterxml.jackson.core",
|
||||||
|
"com.fasterxml.jackson.dataformat",
|
||||||
|
"com.fasterxml.jackson.datatype",
|
||||||
|
"io.netty",
|
||||||
|
"org.roaringbitmap"
|
||||||
);
|
);
|
||||||
|
|
||||||
private TeslaAether aether;
|
private TeslaAether aether;
|
||||||
|
|
Loading…
Reference in New Issue