From 004b00bb96a8195d8169bee788807beab0377602 Mon Sep 17 00:00:00 2001 From: Himanshu Gupta Date: Fri, 18 Mar 2016 11:43:23 -0500 Subject: [PATCH 1/2] config to explicitly specify classpath for hadoop container during hadoop ingestion --- docs/content/configuration/index.md | 1 + docs/content/ingestion/batch-ingestion.md | 2 + .../indexing/common/task/HadoopTask.java | 16 +++++++- .../java/io/druid/guice/ExtensionsConfig.java | 9 +++++ .../druid/initialization/Initialization.java | 38 ++++++++++++++++++ .../initialization/InitializationTest.java | 40 +++++++++++++++++++ 6 files changed, 105 insertions(+), 1 deletion(-) diff --git a/docs/content/configuration/index.md b/docs/content/configuration/index.md index 31cf108955b..801f620595e 100644 --- a/docs/content/configuration/index.md +++ b/docs/content/configuration/index.md @@ -23,6 +23,7 @@ Many of Druid's external dependencies can be plugged in as modules. Extensions c |--------|-----------|-------| |`druid.extensions.directory`|The root extension directory where user can put extensions related files. Druid will load extensions stored under this directory.|`extensions` (This is a relative path to Druid's working directory)| |`druid.extensions.hadoopDependenciesDir`|The root hadoop dependencies directory where user can put hadoop related dependencies files. Druid will load the dependencies based on the hadoop coordinate specified in the hadoop index task.|`hadoop-dependencies` (This is a relative path to Druid's working directory| +|`druid.extensions.hadoopContainerDruidClasspath`|Hadoop Indexing launches hadoop jobs and this configuration provides way to explicitly set the user classpath for the hadoop job. By default this is computed automatically by druid based on the druid process classpath and set of extensions. However, sometimes you might want to be explicit to resolve dependency conflicts between druid and hadoop.|druid classpath and extensions| |`druid.extensions.loadList`|A JSON array of extensions to load from extension directories by Druid. If it is not specified, its value will be `null` and Druid will load all the extensions under `druid.extensions.directory`. If its value is empty list `[]`, then no extensions will be loaded at all.|null| |`druid.extensions.searchCurrentClassloader`|This is a boolean flag that determines if Druid will search the main classloader for extensions. It defaults to true but can be turned off if you have reason to not automatically add all modules on the classpath.|true| diff --git a/docs/content/ingestion/batch-ingestion.md b/docs/content/ingestion/batch-ingestion.md index 921cbb383fc..ab3768e24e2 100644 --- a/docs/content/ingestion/batch-ingestion.md +++ b/docs/content/ingestion/batch-ingestion.md @@ -82,6 +82,8 @@ instance of a Druid [overlord](../design/indexing-service.html). A sample task i |hadoopDependencyCoordinates|A JSON array of Hadoop dependency coordinates that Druid will use, this property will override the default Hadoop coordinates. Once specified, Druid will look for those Hadoop dependencies from the location specified by `druid.extensions.hadoopDependenciesDir`|no| |classpathPrefix|Classpath that will be pre-appended for the peon process.|no| +also note that, druid automatically computes the classpath for hadoop job containers that run in hadoop cluster. But, in case of conflicts between hadoop and druid's dependencies, you can manually specify the classpath by setting `druid.extensions.hadoopContainerDruidClasspath` property. See the extensions config in [base druid configuration](../configuration/index.html). + ### DataSchema This field is required. See [Ingestion](../ingestion/index.html). diff --git a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopTask.java b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopTask.java index cb1fc7e3204..00d8a63194e 100644 --- a/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopTask.java +++ b/indexing-service/src/main/java/io/druid/indexing/common/task/HadoopTask.java @@ -157,7 +157,21 @@ public abstract class HadoopTask extends AbstractTask null ); - System.setProperty("druid.hadoop.internal.classpath", Joiner.on(File.pathSeparator).join(jobURLs)); + final String hadoopContainerDruidClasspathJars; + if (extensionsConfig.getHadoopContainerDruidClasspath() == null) { + hadoopContainerDruidClasspathJars = Joiner.on(File.pathSeparator).join(jobURLs); + + } else { + hadoopContainerDruidClasspathJars = + Joiner.on(File.pathSeparator) + .join( + Initialization.getURLsForClasspath(extensionsConfig.getHadoopContainerDruidClasspath()) + ); + } + + log.info("Hadoop Container Druid Classpath is set to [%s]", hadoopContainerDruidClasspathJars); + System.setProperty("druid.hadoop.internal.classpath", hadoopContainerDruidClasspathJars); + return classLoader; } diff --git a/processing/src/main/java/io/druid/guice/ExtensionsConfig.java b/processing/src/main/java/io/druid/guice/ExtensionsConfig.java index fa1923c34a5..3b6b595919c 100644 --- a/processing/src/main/java/io/druid/guice/ExtensionsConfig.java +++ b/processing/src/main/java/io/druid/guice/ExtensionsConfig.java @@ -38,6 +38,9 @@ public class ExtensionsConfig @JsonProperty private String hadoopDependenciesDir = "hadoop-dependencies"; + @JsonProperty + private String hadoopContainerDruidClasspath = null; + @JsonProperty private List loadList; @@ -56,6 +59,11 @@ public class ExtensionsConfig return hadoopDependenciesDir; } + public String getHadoopContainerDruidClasspath() + { + return hadoopContainerDruidClasspath; + } + public List getLoadList() { return loadList; @@ -68,6 +76,7 @@ public class ExtensionsConfig "searchCurrentClassloader=" + searchCurrentClassloader + ", directory='" + directory + '\'' + ", hadoopDependenciesDir='" + hadoopDependenciesDir + '\'' + + ", hadoopContainerDruidClasspath='" + hadoopContainerDruidClasspath + '\'' + ", loadList=" + loadList + '}'; } diff --git a/server/src/main/java/io/druid/initialization/Initialization.java b/server/src/main/java/io/druid/initialization/Initialization.java index 2448070ec3f..acf14014aea 100644 --- a/server/src/main/java/io/druid/initialization/Initialization.java +++ b/server/src/main/java/io/druid/initialization/Initialization.java @@ -64,9 +64,12 @@ import org.apache.commons.io.FileUtils; import org.eclipse.aether.artifact.DefaultArtifact; import java.io.File; +import java.io.FilenameFilter; +import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; import java.net.URLClassLoader; +import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.List; @@ -275,6 +278,41 @@ public class Initialization return loader; } + public static List getURLsForClasspath(String cp) + { + try { + String[] paths = cp.split(File.pathSeparator); + + List urls = new ArrayList<>(); + for (int i = 0; i < paths.length; i++) { + File f = new File(paths[i]); + if ("*".equals(f.getName())) { + File parentDir = f.getParentFile(); + if (parentDir.exists() && parentDir.isDirectory()) { + File[] jars = parentDir.listFiles( + new FilenameFilter() + { + @Override + public boolean accept(File dir, String name) + { + return name != null && (name.endsWith(".jar") || name.endsWith(".JAR")); + } + } + ); + for (File jar : jars) { + urls.add(jar.toURI().toURL()); + } + } + } else { + urls.add(new File(paths[i]).toURI().toURL()); + } + } + return urls; + } catch (IOException ex) { + throw Throwables.propagate(ex); + } + } + public static Injector makeInjectorWithModules(final Injector baseInjector, Iterable modules) { final ModuleList defaultModules = new ModuleList(baseInjector); diff --git a/server/src/test/java/io/druid/initialization/InitializationTest.java b/server/src/test/java/io/druid/initialization/InitializationTest.java index 335d51e1daa..656c1173847 100644 --- a/server/src/test/java/io/druid/initialization/InitializationTest.java +++ b/server/src/test/java/io/druid/initialization/InitializationTest.java @@ -387,6 +387,46 @@ public class InitializationTest Assert.assertArrayEquals(expectedFileList, actualFileList); } + @Test + public void testGetURLsForClasspath() throws Exception + { + File tmpDir1 = temporaryFolder.newFolder(); + File tmpDir2 = temporaryFolder.newFolder(); + File tmpDir3 = temporaryFolder.newFolder(); + + File tmpDir1a = new File(tmpDir1, "a.jar"); + tmpDir1a.createNewFile(); + File tmpDir1b = new File(tmpDir1, "b.jar"); + tmpDir1b.createNewFile(); + new File(tmpDir1, "note1.txt").createNewFile(); + + File tmpDir2c = new File(tmpDir2, "c.jar"); + tmpDir2c.createNewFile(); + File tmpDir2d = new File(tmpDir2, "d.jar"); + tmpDir2d.createNewFile(); + File tmpDir2e = new File(tmpDir2, "e.JAR"); + tmpDir2e.createNewFile(); + new File(tmpDir2, "note2.txt").createNewFile(); + + String cp = tmpDir1.getAbsolutePath() + File.separator + "*" + + File.pathSeparator + + tmpDir3.getAbsolutePath() + + File.pathSeparator + + tmpDir2.getAbsolutePath() + File.separator + "*"; + + List expected = ImmutableList.builder() + .add(tmpDir1a.toURI().toURL()) + .add(tmpDir1b.toURI().toURL()) + .add(tmpDir3.toURI().toURL()) + .add(tmpDir2c.toURI().toURL()) + .add(tmpDir2d.toURI().toURL()) + .add(tmpDir2e.toURI().toURL()) + .build(); + + + Assert.assertEquals(expected, Initialization.getURLsForClasspath(cp)); + } + public static class TestDruidModule implements DruidModule { @Override From e78a469fb7698eefa736222fbf5f9fffe59a81de Mon Sep 17 00:00:00 2001 From: Himanshu Gupta Date: Thu, 24 Mar 2016 13:28:49 -0500 Subject: [PATCH 2/2] UTs for ExtensionsConfig --- docs/content/configuration/index.md | 2 +- .../io/druid/guice/ExtensionsConfigTest.java | 82 +++++++++++++++++++ .../druid/initialization/Initialization.java | 2 +- 3 files changed, 84 insertions(+), 2 deletions(-) create mode 100644 processing/src/test/java/io/druid/guice/ExtensionsConfigTest.java diff --git a/docs/content/configuration/index.md b/docs/content/configuration/index.md index 801f620595e..7ea4de291dd 100644 --- a/docs/content/configuration/index.md +++ b/docs/content/configuration/index.md @@ -23,7 +23,7 @@ Many of Druid's external dependencies can be plugged in as modules. Extensions c |--------|-----------|-------| |`druid.extensions.directory`|The root extension directory where user can put extensions related files. Druid will load extensions stored under this directory.|`extensions` (This is a relative path to Druid's working directory)| |`druid.extensions.hadoopDependenciesDir`|The root hadoop dependencies directory where user can put hadoop related dependencies files. Druid will load the dependencies based on the hadoop coordinate specified in the hadoop index task.|`hadoop-dependencies` (This is a relative path to Druid's working directory| -|`druid.extensions.hadoopContainerDruidClasspath`|Hadoop Indexing launches hadoop jobs and this configuration provides way to explicitly set the user classpath for the hadoop job. By default this is computed automatically by druid based on the druid process classpath and set of extensions. However, sometimes you might want to be explicit to resolve dependency conflicts between druid and hadoop.|druid classpath and extensions| +|`druid.extensions.hadoopContainerDruidClasspath`|Hadoop Indexing launches hadoop jobs and this configuration provides way to explicitly set the user classpath for the hadoop job. By default this is computed automatically by druid based on the druid process classpath and set of extensions. However, sometimes you might want to be explicit to resolve dependency conflicts between druid and hadoop.|null| |`druid.extensions.loadList`|A JSON array of extensions to load from extension directories by Druid. If it is not specified, its value will be `null` and Druid will load all the extensions under `druid.extensions.directory`. If its value is empty list `[]`, then no extensions will be loaded at all.|null| |`druid.extensions.searchCurrentClassloader`|This is a boolean flag that determines if Druid will search the main classloader for extensions. It defaults to true but can be turned off if you have reason to not automatically add all modules on the classpath.|true| diff --git a/processing/src/test/java/io/druid/guice/ExtensionsConfigTest.java b/processing/src/test/java/io/druid/guice/ExtensionsConfigTest.java new file mode 100644 index 00000000000..7f91911838b --- /dev/null +++ b/processing/src/test/java/io/druid/guice/ExtensionsConfigTest.java @@ -0,0 +1,82 @@ +/* + * Licensed to Metamarkets Group Inc. (Metamarkets) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Metamarkets licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package io.druid.guice; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.collect.ImmutableList; +import io.druid.segment.TestHelper; +import org.junit.Assert; +import org.junit.Test; + +/** + */ +public class ExtensionsConfigTest +{ + @Test + public void testSerdeWithDefaults() throws Exception + { + String json = "{}"; + ObjectMapper mapper = TestHelper.getObjectMapper(); + + ExtensionsConfig config = mapper.readValue( + mapper.writeValueAsString( + mapper.readValue(json, ExtensionsConfig.class) + ), + ExtensionsConfig.class + ); + + Assert.assertTrue(config.searchCurrentClassloader()); + Assert.assertEquals("extensions", config.getDirectory()); + Assert.assertEquals("hadoop-dependencies", config.getHadoopDependenciesDir()); + Assert.assertNull(config.getHadoopContainerDruidClasspath()); + Assert.assertNull(config.getLoadList()); + } + + @Test + public void testSerdeWithNonDefaults() throws Exception + { + String json = "{\n" + + " \"searchCurrentClassloader\": false,\n" + + " \"directory\": \"testExtensions\",\n" + + " \"hadoopDependenciesDir\": \"testHadoopDependenciesDir\",\n" + + " \"hadoopContainerDruidClasspath\": \"testHadoopContainerClasspath\",\n" + + " \"loadList\": [\"a\",\"b\"]\n" + + "}"; + ObjectMapper mapper = TestHelper.getObjectMapper(); + + ExtensionsConfig config = mapper.readValue( + mapper.writeValueAsString( + mapper.readValue(json, ExtensionsConfig.class) + ), + ExtensionsConfig.class + ); + + Assert.assertFalse(config.searchCurrentClassloader()); + Assert.assertEquals("testExtensions", config.getDirectory()); + Assert.assertEquals("testHadoopDependenciesDir", config.getHadoopDependenciesDir()); + Assert.assertEquals("testHadoopContainerClasspath", config.getHadoopContainerDruidClasspath()); + Assert.assertEquals( + ImmutableList.of( + "a", "b" + ), + config.getLoadList() + ); + } +} diff --git a/server/src/main/java/io/druid/initialization/Initialization.java b/server/src/main/java/io/druid/initialization/Initialization.java index acf14014aea..d1286bd3c29 100644 --- a/server/src/main/java/io/druid/initialization/Initialization.java +++ b/server/src/main/java/io/druid/initialization/Initialization.java @@ -288,7 +288,7 @@ public class Initialization File f = new File(paths[i]); if ("*".equals(f.getName())) { File parentDir = f.getParentFile(); - if (parentDir.exists() && parentDir.isDirectory()) { + if (parentDir.isDirectory()) { File[] jars = parentDir.listFiles( new FilenameFilter() {