mirror of https://github.com/apache/druid.git
parent
69a6bdcf03
commit
2bde8b1d68
|
@ -20,7 +20,9 @@
|
|||
package io.druid.indexing.common.task;
|
||||
|
||||
import com.google.common.base.Joiner;
|
||||
import com.google.common.base.Predicate;
|
||||
import com.google.common.base.Throwables;
|
||||
import com.google.common.collect.Collections2;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.inject.Injector;
|
||||
|
@ -30,11 +32,14 @@ import io.druid.guice.GuiceInjectors;
|
|||
import io.druid.indexing.common.TaskToolbox;
|
||||
import io.druid.initialization.Initialization;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.io.File;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.lang.reflect.Method;
|
||||
import java.net.URISyntaxException;
|
||||
import java.net.URL;
|
||||
import java.net.URLClassLoader;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
@ -69,6 +74,27 @@ public abstract class HadoopTask extends AbstractTask
|
|||
return hadoopDependencyCoordinates == null ? null : ImmutableList.copyOf(hadoopDependencyCoordinates);
|
||||
}
|
||||
|
||||
// This could stand to have a more robust detection methodology.
|
||||
// Right now it just looks for `druid.*\.jar`
|
||||
// This is only used for classpath isolation in the runTask isolation stuff, so it shooouuullldddd be ok.
|
||||
protected static final Predicate<URL> IS_DRUID_URL = new Predicate<URL>()
|
||||
{
|
||||
@Override
|
||||
public boolean apply(@Nullable URL input)
|
||||
{
|
||||
try {
|
||||
if (input == null) {
|
||||
return false;
|
||||
}
|
||||
final String fName = Paths.get(input.toURI()).getFileName().toString();
|
||||
return fName.startsWith("druid") && fName.endsWith(".jar");
|
||||
}
|
||||
catch (URISyntaxException e) {
|
||||
throw Throwables.propagate(e);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
protected ClassLoader buildClassLoader(final TaskToolbox toolbox) throws Exception
|
||||
{
|
||||
final List<String> finalHadoopDependencyCoordinates = hadoopDependencyCoordinates != null
|
||||
|
@ -84,27 +110,35 @@ public abstract class HadoopTask extends AbstractTask
|
|||
final List<URL> nonHadoopURLs = Lists.newArrayList();
|
||||
nonHadoopURLs.addAll(Arrays.asList(((URLClassLoader) HadoopIndexTask.class.getClassLoader()).getURLs()));
|
||||
|
||||
final List<URL> driverURLs = Lists.newArrayList();
|
||||
driverURLs.addAll(nonHadoopURLs);
|
||||
final List<URL> druidURLs = ImmutableList.copyOf(
|
||||
Collections2.filter(nonHadoopURLs, IS_DRUID_URL)
|
||||
);
|
||||
final List<URL> nonHadoopNotDruidURLs = Lists.newArrayList(nonHadoopURLs);
|
||||
nonHadoopNotDruidURLs.removeAll(druidURLs);
|
||||
|
||||
// put hadoop dependencies last to avoid jets3t & apache.httpcore version conflicts
|
||||
// Start from a fresh slate
|
||||
ClassLoader classLoader = null;
|
||||
classLoader = new URLClassLoader(nonHadoopNotDruidURLs.toArray(new URL[nonHadoopNotDruidURLs.size()]), classLoader);
|
||||
|
||||
// hadoop dependencies come before druid classes because some extensions depend on them
|
||||
for (final File hadoopDependency :
|
||||
Initialization.getHadoopDependencyFilesToLoad(
|
||||
finalHadoopDependencyCoordinates,
|
||||
extensionsConfig
|
||||
)) {
|
||||
final ClassLoader hadoopLoader = Initialization.getClassLoaderForExtension(hadoopDependency);
|
||||
driverURLs.addAll(Arrays.asList(((URLClassLoader) hadoopLoader).getURLs()));
|
||||
classLoader = new URLClassLoader(((URLClassLoader) hadoopLoader).getURLs(), classLoader);
|
||||
}
|
||||
|
||||
final URLClassLoader loader = new URLClassLoader(driverURLs.toArray(new URL[driverURLs.size()]), null);
|
||||
classLoader = new URLClassLoader(druidURLs.toArray(new URL[druidURLs.size()]), classLoader);
|
||||
classLoader = new URLClassLoader(extensionURLs.toArray(new URL[extensionURLs.size()]), classLoader);
|
||||
|
||||
final List<URL> jobUrls = Lists.newArrayList();
|
||||
jobUrls.addAll(nonHadoopURLs);
|
||||
jobUrls.addAll(extensionURLs);
|
||||
|
||||
System.setProperty("druid.hadoop.internal.classpath", Joiner.on(File.pathSeparator).join(jobUrls));
|
||||
return loader;
|
||||
return classLoader;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -65,7 +65,64 @@ public class PullDependencies implements Runnable
|
|||
|
||||
private static final Set<String> exclusions = Sets.newHashSet(
|
||||
"io.druid",
|
||||
"com.metamx.druid"
|
||||
"com.metamx.druid",
|
||||
|
||||
// It is possible that extensions will pull down a lot of jars that are either
|
||||
// duplicates OR conflict with druid jars. In that case, there are two problems that arise
|
||||
//
|
||||
// 1. Large quantity of jars are passed around to things like hadoop when they are not needed (and should not be included)
|
||||
// 2. Classpath priority becomes "mostly correct" and attempted to enforced correctly, but not fully tested
|
||||
//
|
||||
// These jar groups should be included by druid and *not* pulled down in extensions
|
||||
// Note to future developers: This list is hand-crafted and will probably be out of date in the future
|
||||
// A good way to know where to look for errant dependencies is to compare the lib/ directory in the distribution
|
||||
// tarball with the jars included in the extension directories.
|
||||
//
|
||||
// This list is best-effort, and might still pull down more than desired.
|
||||
//
|
||||
// A simple example is that if an extension's dependency uses some-library-123.jar,
|
||||
// druid uses some-library-456.jar, and hadoop uses some-library-666.jar, then we probably want to use some-library-456.jar,
|
||||
// so don't pull down some-library-123.jar, and ask hadoop to load some-library-456.jar.
|
||||
//
|
||||
// In the case where some-library is NOT on this list, both some-library-456.jar and some-library-123.jar will be
|
||||
// on the class path and propagated around the system. Most places TRY to make sure some-library-456.jar has
|
||||
// precedence, but it is easy for this assumption to be violated and for the precedence of some-library-456.jar,
|
||||
// some-library-123.jar and some-library-456.jar to not be properly defined.
|
||||
//
|
||||
// As of this writing there are no special unit tests for classloader issues and library version conflicts.
|
||||
//
|
||||
// Different tasks which are classloader sensitive attempt to maintain a sane order for loading libraries in the
|
||||
// classloader, but it is always possible that something didn't load in the right order. Also we don't want to be
|
||||
// throwing around a ton of jars we don't need to.
|
||||
"asm",
|
||||
"org.ow2.asm",
|
||||
"org.jboss.netty",
|
||||
"com.google.guava",
|
||||
"com.google.code.findbugs",
|
||||
"com.google.protobuf",
|
||||
"com.esotericsoftware.minlog",
|
||||
"log4j",
|
||||
"org.slf4j",
|
||||
"commons-logging",
|
||||
"org.eclipse.jetty",
|
||||
"org.mortbay.jetty",
|
||||
"com.sun.jersey",
|
||||
"com.sun.jersey.contribs",
|
||||
"common-beanutils",
|
||||
"commons-codec",
|
||||
"commons-lang",
|
||||
"commons-cli",
|
||||
"commons-io",
|
||||
"javax.activation",
|
||||
"org.apache.httpcomponents",
|
||||
"org.apache.zookeeper",
|
||||
"org.codehaus.jackson",
|
||||
"com.fasterxml.jackson",
|
||||
"com.fasterxml.jackson.core",
|
||||
"com.fasterxml.jackson.dataformat",
|
||||
"com.fasterxml.jackson.datatype",
|
||||
"io.netty",
|
||||
"org.roaringbitmap"
|
||||
);
|
||||
|
||||
private TeslaAether aether;
|
||||
|
|
Loading…
Reference in New Issue