Lazy Initialisation of Orc extensions module (#12663)

* Lazy initialization of Orc extension

* nit

* moving intialize method to OrcInputFormat
This commit is contained in:
Tejaswini Bandlamudi 2022-06-21 11:13:10 +05:30 committed by GitHub
parent 818974f6e4
commit a85b1d8985
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 23 additions and 20 deletions

View File

@ -27,9 +27,7 @@ import com.google.inject.Inject;
import org.apache.druid.data.input.orc.guice.Orc; import org.apache.druid.data.input.orc.guice.Orc;
import org.apache.druid.initialization.DruidModule; import org.apache.druid.initialization.DruidModule;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import java.io.IOException;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
import java.util.Properties; import java.util.Properties;
@ -64,24 +62,6 @@ public class OrcExtensionsModule implements DruidModule
// to properly initialize everything // to properly initialize everything
final Configuration conf = new Configuration(); final Configuration conf = new Configuration();
// Set explicit CL. Otherwise it'll try to use thread context CL, which may not have all of our dependencies.
conf.setClassLoader(getClass().getClassLoader());
// Ensure that FileSystem class level initialization happens with correct CL
// See https://github.com/apache/druid/issues/1714
ClassLoader currCtxCl = Thread.currentThread().getContextClassLoader();
try {
Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
FileSystem.get(conf);
}
catch (IOException ex) {
throw new RuntimeException(ex);
}
finally {
Thread.currentThread().setContextClassLoader(currCtxCl);
}
if (props != null) { if (props != null) {
for (String propName : props.stringPropertyNames()) { for (String propName : props.stringPropertyNames()) {
if (propName.startsWith("hadoop.")) { if (propName.startsWith("hadoop.")) {

View File

@ -29,9 +29,11 @@ import org.apache.druid.data.input.impl.NestedInputFormat;
import org.apache.druid.data.input.orc.guice.Orc; import org.apache.druid.data.input.orc.guice.Orc;
import org.apache.druid.java.util.common.parsers.JSONPathSpec; import org.apache.druid.java.util.common.parsers.JSONPathSpec;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import javax.annotation.Nullable; import javax.annotation.Nullable;
import java.io.File; import java.io.File;
import java.io.IOException;
import java.util.Objects; import java.util.Objects;
public class OrcInputFormat extends NestedInputFormat public class OrcInputFormat extends NestedInputFormat
@ -51,6 +53,26 @@ public class OrcInputFormat extends NestedInputFormat
this.conf = conf; this.conf = conf;
} }
private void initialize(Configuration conf)
{
//Initializing seperately since during eager initialization, resolving
//namenode hostname throws an error if nodes are ephemeral
// Ensure that FileSystem class level initialization happens with correct CL
// See https://github.com/apache/druid/issues/1714
ClassLoader currCtxCl = Thread.currentThread().getContextClassLoader();
try {
Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
FileSystem.get(conf);
}
catch (IOException ex) {
throw new RuntimeException(ex);
}
finally {
Thread.currentThread().setContextClassLoader(currCtxCl);
}
}
@Override @Override
public boolean isSplittable() public boolean isSplittable()
{ {
@ -60,6 +82,7 @@ public class OrcInputFormat extends NestedInputFormat
@Override @Override
public InputEntityReader createReader(InputRowSchema inputRowSchema, InputEntity source, File temporaryDirectory) public InputEntityReader createReader(InputRowSchema inputRowSchema, InputEntity source, File temporaryDirectory)
{ {
initialize(conf);
return new OrcReader(conf, inputRowSchema, source, temporaryDirectory, getFlattenSpec(), binaryAsString); return new OrcReader(conf, inputRowSchema, source, temporaryDirectory, getFlattenSpec(), binaryAsString);
} }