mirror of https://github.com/apache/nifi.git
NIFI-3530 - Ensuring configuration and ugi information is passed down to Hive in PutHiveStreaming
NIFI-3530 - closing renewer in PutHiveStreaming.cleanup(), making npe less likely in HiveWriter.toString() This closes #1544
This commit is contained in:
parent
1978c986b5
commit
cfe899d189
|
@ -26,7 +26,7 @@ import org.apache.avro.file.DataFileWriter;
|
|||
import org.apache.avro.generic.GenericDatumReader;
|
||||
import org.apache.avro.generic.GenericDatumWriter;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.hadoop.conf.Configuration;
|
||||
import org.apache.hadoop.hive.conf.HiveConf;
|
||||
import org.apache.hadoop.security.UserGroupInformation;
|
||||
import org.apache.hive.hcatalog.streaming.ConnectionError;
|
||||
import org.apache.hive.hcatalog.streaming.HiveEndPoint;
|
||||
|
@ -258,6 +258,7 @@ public class PutHiveStreaming extends AbstractProcessor {
|
|||
|
||||
protected volatile HiveConfigurator hiveConfigurator = new HiveConfigurator();
|
||||
protected volatile UserGroupInformation ugi;
|
||||
protected volatile HiveConf hiveConfig;
|
||||
|
||||
protected final AtomicBoolean isInitialized = new AtomicBoolean(false);
|
||||
|
||||
|
@ -318,7 +319,7 @@ public class PutHiveStreaming extends AbstractProcessor {
|
|||
final Integer heartbeatInterval = context.getProperty(HEARTBEAT_INTERVAL).asInteger();
|
||||
final Integer txnsPerBatch = context.getProperty(TXNS_PER_BATCH).asInteger();
|
||||
final String configFiles = context.getProperty(HIVE_CONFIGURATION_RESOURCES).getValue();
|
||||
final Configuration hiveConfig = hiveConfigurator.getConfigurationFromFiles(configFiles);
|
||||
hiveConfig = hiveConfigurator.getConfigurationFromFiles(configFiles);
|
||||
|
||||
// add any dynamic properties to the Hive configuration
|
||||
for (final Map.Entry<PropertyDescriptor, String> entry : context.getProperties().entrySet()) {
|
||||
|
@ -669,6 +670,7 @@ public class PutHiveStreaming extends AbstractProcessor {
|
|||
}
|
||||
|
||||
callTimeoutPool = null;
|
||||
hiveConfigurator.stopRenewer();
|
||||
}
|
||||
|
||||
private void setupHeartBeatTimer() {
|
||||
|
@ -837,7 +839,7 @@ public class PutHiveStreaming extends AbstractProcessor {
|
|||
|
||||
protected HiveWriter makeHiveWriter(HiveEndPoint endPoint, ExecutorService callTimeoutPool, UserGroupInformation ugi, HiveOptions options)
|
||||
throws HiveWriter.ConnectFailure, InterruptedException {
|
||||
return HiveUtils.makeHiveWriter(endPoint, callTimeoutPool, ugi, options);
|
||||
return HiveUtils.makeHiveWriter(endPoint, callTimeoutPool, ugi, options, hiveConfig);
|
||||
}
|
||||
|
||||
protected KerberosProperties getKerberosProperties() {
|
||||
|
|
|
@ -62,8 +62,8 @@ public class HiveConfigurator {
|
|||
return problems;
|
||||
}
|
||||
|
||||
public Configuration getConfigurationFromFiles(final String configFiles) {
|
||||
final Configuration hiveConfig = new HiveConf();
|
||||
public HiveConf getConfigurationFromFiles(final String configFiles) {
|
||||
final HiveConf hiveConfig = new HiveConf();
|
||||
if (StringUtils.isNotBlank(configFiles)) {
|
||||
for (final String configFile : configFiles.split(",")) {
|
||||
hiveConfig.addResource(new Path(configFile.trim()));
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
|
||||
package org.apache.nifi.util.hive;
|
||||
|
||||
import org.apache.hadoop.hive.conf.HiveConf;
|
||||
import org.apache.hadoop.security.UserGroupInformation;
|
||||
|
||||
import org.apache.hive.hcatalog.streaming.ConnectionError;
|
||||
|
@ -39,10 +40,10 @@ public class HiveUtils {
|
|||
return new HiveEndPoint(options.getMetaStoreURI(), options.getDatabaseName(), options.getTableName(), partitionVals);
|
||||
}
|
||||
|
||||
public static HiveWriter makeHiveWriter(HiveEndPoint endPoint, ExecutorService callTimeoutPool, UserGroupInformation ugi, HiveOptions options)
|
||||
public static HiveWriter makeHiveWriter(HiveEndPoint endPoint, ExecutorService callTimeoutPool, UserGroupInformation ugi, HiveOptions options, HiveConf hiveConf)
|
||||
throws HiveWriter.ConnectFailure, InterruptedException {
|
||||
return new HiveWriter(endPoint, options.getTxnsPerBatch(), options.getAutoCreatePartitions(),
|
||||
options.getCallTimeOut(), callTimeoutPool, ugi);
|
||||
options.getCallTimeOut(), callTimeoutPool, ugi, hiveConf);
|
||||
}
|
||||
|
||||
public static void logAllHiveEndPoints(Map<HiveEndPoint, HiveWriter> allWriters) {
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
package org.apache.nifi.util.hive;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.security.PrivilegedExceptionAction;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
|
@ -26,6 +27,7 @@ import java.util.concurrent.Future;
|
|||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.TimeoutException;
|
||||
|
||||
import org.apache.hadoop.hive.conf.HiveConf;
|
||||
import org.apache.hadoop.security.UserGroupInformation;
|
||||
|
||||
import org.apache.hive.hcatalog.streaming.HiveEndPoint;
|
||||
|
@ -54,23 +56,17 @@ public class HiveWriter {
|
|||
private TransactionBatch txnBatch;
|
||||
private long lastUsed; // time of last flush on this writer
|
||||
protected boolean closed; // flag indicating HiveWriter was closed
|
||||
private boolean autoCreatePartitions;
|
||||
private UserGroupInformation ugi;
|
||||
private int totalRecords = 0;
|
||||
|
||||
public HiveWriter(HiveEndPoint endPoint, int txnsPerBatch,
|
||||
boolean autoCreatePartitions, long callTimeout,
|
||||
ExecutorService callTimeoutPool, UserGroupInformation ugi)
|
||||
public HiveWriter(HiveEndPoint endPoint, int txnsPerBatch, boolean autoCreatePartitions, long callTimeout, ExecutorService callTimeoutPool, UserGroupInformation ugi, HiveConf hiveConf)
|
||||
throws InterruptedException, ConnectFailure {
|
||||
try {
|
||||
this.autoCreatePartitions = autoCreatePartitions;
|
||||
this.callTimeout = callTimeout;
|
||||
this.callTimeoutPool = callTimeoutPool;
|
||||
this.endPoint = endPoint;
|
||||
this.ugi = ugi;
|
||||
this.connection = newConnection(ugi);
|
||||
this.connection = newConnection(endPoint, autoCreatePartitions, hiveConf, ugi);
|
||||
this.txnsPerBatch = txnsPerBatch;
|
||||
this.recordWriter = getRecordWriter(endPoint);
|
||||
this.recordWriter = getRecordWriter(endPoint, ugi, hiveConf);
|
||||
this.txnBatch = nextTxnBatch(recordWriter);
|
||||
this.closed = false;
|
||||
this.lastUsed = System.currentTimeMillis();
|
||||
|
@ -81,15 +77,17 @@ public class HiveWriter {
|
|||
}
|
||||
}
|
||||
|
||||
protected RecordWriter getRecordWriter(HiveEndPoint endPoint) throws StreamingException {
|
||||
return new StrictJsonWriter(endPoint);
|
||||
protected RecordWriter getRecordWriter(HiveEndPoint endPoint, UserGroupInformation ugi, HiveConf hiveConf) throws StreamingException, IOException, InterruptedException {
|
||||
if (ugi == null) {
|
||||
return new StrictJsonWriter(endPoint, hiveConf);
|
||||
} else {
|
||||
return ugi.doAs((PrivilegedExceptionAction<StrictJsonWriter>) () -> new StrictJsonWriter(endPoint, hiveConf));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "{ "
|
||||
+ "endPoint = " + endPoint.toString()
|
||||
+ ", TransactionBatch = " + txnBatch.toString() + " }";
|
||||
return "{ endPoint = " + endPoint + ", TransactionBatch = " + txnBatch + " }";
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -230,11 +228,10 @@ public class HiveWriter {
|
|||
}
|
||||
}
|
||||
|
||||
protected StreamingConnection newConnection(final UserGroupInformation ugi)
|
||||
throws InterruptedException, ConnectFailure {
|
||||
protected StreamingConnection newConnection(HiveEndPoint endPoint, boolean autoCreatePartitions, HiveConf conf, UserGroupInformation ugi) throws InterruptedException, ConnectFailure {
|
||||
try {
|
||||
return callWithTimeout(() -> {
|
||||
return endPoint.newConnection(autoCreatePartitions, null, ugi); // could block
|
||||
return endPoint.newConnection(autoCreatePartitions, conf, ugi); // could block
|
||||
});
|
||||
} catch (StreamingException | TimeoutException e) {
|
||||
throw new ConnectFailure(endPoint, e);
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.avro.generic.GenericDatumReader;
|
|||
import org.apache.avro.generic.GenericDatumWriter;
|
||||
import org.apache.avro.generic.GenericRecord;
|
||||
import org.apache.avro.io.DatumWriter;
|
||||
import org.apache.hadoop.hive.conf.HiveConf;
|
||||
import org.apache.hadoop.security.UserGroupInformation;
|
||||
import org.apache.hive.hcatalog.streaming.HiveEndPoint;
|
||||
import org.apache.hive.hcatalog.streaming.RecordWriter;
|
||||
|
@ -36,6 +37,7 @@ import org.apache.nifi.stream.io.ByteArrayOutputStream;
|
|||
import org.apache.nifi.util.MockFlowFile;
|
||||
import org.apache.nifi.util.TestRunner;
|
||||
import org.apache.nifi.util.TestRunners;
|
||||
import org.apache.nifi.util.hive.HiveConfigurator;
|
||||
import org.apache.nifi.util.hive.HiveOptions;
|
||||
import org.apache.nifi.util.hive.HiveWriter;
|
||||
import org.junit.Before;
|
||||
|
@ -58,7 +60,9 @@ import static org.junit.Assert.assertFalse;
|
|||
import static org.junit.Assert.assertNotNull;
|
||||
import static org.junit.Assert.assertNull;
|
||||
import static org.junit.Assert.assertTrue;
|
||||
import static org.mockito.Matchers.anyString;
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
/**
|
||||
* Unit tests for PutHiveStreaming processor.
|
||||
|
@ -69,6 +73,8 @@ public class TestPutHiveStreaming {
|
|||
private MockPutHiveStreaming processor;
|
||||
|
||||
private KerberosProperties kerberosPropsWithFile;
|
||||
private HiveConfigurator hiveConfigurator;
|
||||
private HiveConf hiveConf;
|
||||
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
|
@ -81,6 +87,10 @@ public class TestPutHiveStreaming {
|
|||
kerberosPropsWithFile = new KerberosProperties(new File("src/test/resources/krb5.conf"));
|
||||
|
||||
processor = new MockPutHiveStreaming();
|
||||
hiveConfigurator = mock(HiveConfigurator.class);
|
||||
hiveConf = mock(HiveConf.class);
|
||||
when(hiveConfigurator.getConfigurationFromFiles(anyString())).thenReturn(hiveConf);
|
||||
processor.hiveConfigurator = hiveConfigurator;
|
||||
processor.setKerberosProperties(kerberosPropsWithFile);
|
||||
runner = TestRunners.newTestRunner(processor);
|
||||
}
|
||||
|
@ -545,7 +555,7 @@ public class TestPutHiveStreaming {
|
|||
if (generateInterruptedExceptionOnCreateWriter) {
|
||||
throw new InterruptedException();
|
||||
}
|
||||
MockHiveWriter hiveWriter = new MockHiveWriter(endPoint, options.getTxnsPerBatch(), options.getAutoCreatePartitions(), options.getCallTimeOut(), callTimeoutPool, ugi);
|
||||
MockHiveWriter hiveWriter = new MockHiveWriter(endPoint, options.getTxnsPerBatch(), options.getAutoCreatePartitions(), options.getCallTimeOut(), callTimeoutPool, ugi, hiveConfig);
|
||||
hiveWriter.setGenerateWriteFailure(generateWriteFailure);
|
||||
hiveWriter.setGenerateSerializationError(generateSerializationError);
|
||||
hiveWriter.setGenerateCommitFailure(generateCommitFailure);
|
||||
|
@ -595,9 +605,9 @@ public class TestPutHiveStreaming {
|
|||
private HiveEndPoint endPoint;
|
||||
|
||||
public MockHiveWriter(HiveEndPoint endPoint, int txnsPerBatch, boolean autoCreatePartitions,
|
||||
long callTimeout, ExecutorService callTimeoutPool, UserGroupInformation ugi)
|
||||
long callTimeout, ExecutorService callTimeoutPool, UserGroupInformation ugi, HiveConf hiveConf)
|
||||
throws InterruptedException, ConnectFailure {
|
||||
super(endPoint, txnsPerBatch, autoCreatePartitions, callTimeout, callTimeoutPool, ugi);
|
||||
super(endPoint, txnsPerBatch, autoCreatePartitions, callTimeout, callTimeoutPool, ugi, hiveConf);
|
||||
this.endPoint = endPoint;
|
||||
}
|
||||
|
||||
|
@ -632,13 +642,15 @@ public class TestPutHiveStreaming {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected RecordWriter getRecordWriter(HiveEndPoint endPoint) throws StreamingException {
|
||||
protected RecordWriter getRecordWriter(HiveEndPoint endPoint, UserGroupInformation ugi, HiveConf conf) throws StreamingException {
|
||||
assertEquals(hiveConf, conf);
|
||||
return mock(RecordWriter.class);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected StreamingConnection newConnection(UserGroupInformation ugi) throws InterruptedException, ConnectFailure {
|
||||
protected StreamingConnection newConnection(HiveEndPoint endPoint, boolean autoCreatePartitions, HiveConf conf, UserGroupInformation ugi) throws InterruptedException, ConnectFailure {
|
||||
StreamingConnection connection = mock(StreamingConnection.class);
|
||||
assertEquals(hiveConf, conf);
|
||||
return connection;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue