mirror of https://github.com/apache/nifi.git
NIFI-1868: Incorporate PutHiveStreaming review comments
This closes #706. Signed-off-by: Bryan Bende <bbende@apache.org>
This commit is contained in:
parent
59659232c7
commit
3943d72e95
|
@ -18,8 +18,12 @@ package org.apache.nifi.processors.hive;
|
||||||
|
|
||||||
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
||||||
import org.apache.avro.Schema;
|
import org.apache.avro.Schema;
|
||||||
|
import org.apache.avro.file.CodecFactory;
|
||||||
|
import org.apache.avro.file.DataFileConstants;
|
||||||
import org.apache.avro.file.DataFileStream;
|
import org.apache.avro.file.DataFileStream;
|
||||||
|
import org.apache.avro.file.DataFileWriter;
|
||||||
import org.apache.avro.generic.GenericDatumReader;
|
import org.apache.avro.generic.GenericDatumReader;
|
||||||
|
import org.apache.avro.generic.GenericDatumWriter;
|
||||||
import org.apache.avro.generic.GenericRecord;
|
import org.apache.avro.generic.GenericRecord;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.security.UserGroupInformation;
|
import org.apache.hadoop.security.UserGroupInformation;
|
||||||
|
@ -27,6 +31,7 @@ import org.apache.hive.hcatalog.streaming.ConnectionError;
|
||||||
import org.apache.hive.hcatalog.streaming.HiveEndPoint;
|
import org.apache.hive.hcatalog.streaming.HiveEndPoint;
|
||||||
import org.apache.hive.hcatalog.streaming.SerializationError;
|
import org.apache.hive.hcatalog.streaming.SerializationError;
|
||||||
import org.apache.hive.hcatalog.streaming.StreamingException;
|
import org.apache.hive.hcatalog.streaming.StreamingException;
|
||||||
|
import org.apache.nifi.annotation.behavior.TriggerSerially;
|
||||||
import org.apache.nifi.annotation.behavior.WritesAttribute;
|
import org.apache.nifi.annotation.behavior.WritesAttribute;
|
||||||
import org.apache.nifi.annotation.behavior.WritesAttributes;
|
import org.apache.nifi.annotation.behavior.WritesAttributes;
|
||||||
import org.apache.nifi.annotation.documentation.CapabilityDescription;
|
import org.apache.nifi.annotation.documentation.CapabilityDescription;
|
||||||
|
@ -60,6 +65,7 @@ import org.json.JSONObject;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.charset.StandardCharsets;
|
import java.nio.charset.StandardCharsets;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
|
@ -73,17 +79,21 @@ import java.util.concurrent.ExecutorService;
|
||||||
import java.util.concurrent.Executors;
|
import java.util.concurrent.Executors;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
import java.util.concurrent.atomic.AtomicReference;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This processor utilizes the Hive Streaming capability to insert data from the flow into a Hive database table.
|
* This processor utilizes the Hive Streaming capability to insert data from the flow into a Hive database table.
|
||||||
*/
|
*/
|
||||||
|
@TriggerSerially
|
||||||
@Tags({"hive", "streaming", "put", "database", "store"})
|
@Tags({"hive", "streaming", "put", "database", "store"})
|
||||||
@CapabilityDescription("This processor uses Hive Streaming to send flow file data to an Apache Hive table. The incoming flow file is expected to be in "
|
@CapabilityDescription("This processor uses Hive Streaming to send flow file data to an Apache Hive table. The incoming flow file is expected to be in "
|
||||||
+ "Avro format and the table must exist in Hive. Please see the Hive documentation for requirements on the Hive table (format, partitions, etc.). "
|
+ "Avro format and the table must exist in Hive. Please see the Hive documentation for requirements on the Hive table (format, partitions, etc.). "
|
||||||
+ "The partition values are extracted from the Avro record based on the names of the partition columns as specified in the processor. ")
|
+ "The partition values are extracted from the Avro record based on the names of the partition columns as specified in the processor.")
|
||||||
@WritesAttributes({
|
@WritesAttributes({
|
||||||
@WritesAttribute(attribute = "hivestreaming.record.count", description = "The number of records from this flow file written using Hive Streaming.")
|
@WritesAttribute(attribute = "hivestreaming.record.count", description = "This attribute is written on the flow files routed to the 'success' "
|
||||||
|
+ "and 'failure' relationships, and contains the number of records from the incoming flow file written successfully and unsuccessfully, respectively.")
|
||||||
})
|
})
|
||||||
public class PutHiveStreaming extends AbstractProcessor {
|
public class PutHiveStreaming extends AbstractProcessor {
|
||||||
|
|
||||||
|
@ -110,6 +120,17 @@ public class PutHiveStreaming extends AbstractProcessor {
|
||||||
return new ValidationResult.Builder().subject(subject).input(value).explanation(reason).valid(reason == null).build();
|
return new ValidationResult.Builder().subject(subject).input(value).explanation(reason).valid(reason == null).build();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Metadata keys that are not transferred to split files when output strategy is datafile
|
||||||
|
// Avro will write this key/values pairs on its own
|
||||||
|
private static final Set<String> RESERVED_METADATA;
|
||||||
|
|
||||||
|
static {
|
||||||
|
Set<String> reservedMetadata = new HashSet<>();
|
||||||
|
reservedMetadata.add("avro.schema");
|
||||||
|
reservedMetadata.add("avro.codec");
|
||||||
|
RESERVED_METADATA = Collections.unmodifiableSet(reservedMetadata);
|
||||||
|
}
|
||||||
|
|
||||||
// Properties
|
// Properties
|
||||||
public static final PropertyDescriptor METASTORE_URI = new PropertyDescriptor.Builder()
|
public static final PropertyDescriptor METASTORE_URI = new PropertyDescriptor.Builder()
|
||||||
.name("hive-stream-metastore-uri")
|
.name("hive-stream-metastore-uri")
|
||||||
|
@ -202,15 +223,20 @@ public class PutHiveStreaming extends AbstractProcessor {
|
||||||
// Relationships
|
// Relationships
|
||||||
public static final Relationship REL_SUCCESS = new Relationship.Builder()
|
public static final Relationship REL_SUCCESS = new Relationship.Builder()
|
||||||
.name("success")
|
.name("success")
|
||||||
.description("A FlowFile is routed to this relationship after the database is successfully updated")
|
.description("A FlowFile containing the JSON contents of a record is routed to this relationship after the record has been successfully transmitted to Hive.")
|
||||||
.build();
|
|
||||||
public static final Relationship REL_RETRY = new Relationship.Builder()
|
|
||||||
.name("retry")
|
|
||||||
.description("A FlowFile is routed to this relationship if the database cannot be updated but attempting the operation again may succeed")
|
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
public static final Relationship REL_FAILURE = new Relationship.Builder()
|
public static final Relationship REL_FAILURE = new Relationship.Builder()
|
||||||
.name("failure")
|
.name("failure")
|
||||||
.description("A FlowFile is routed to this relationship if the database cannot be updated and retrying the operation will also fail.")
|
.description("A FlowFile containing the JSON contents of a record is routed to this relationship if the record could not be transmitted to Hive.")
|
||||||
|
.build();
|
||||||
|
|
||||||
|
public static final Relationship REL_RETRY = new Relationship.Builder()
|
||||||
|
.name("retry")
|
||||||
|
.description("The incoming FlowFile is routed to this relationship if its records cannot be transmitted to Hive. Note that "
|
||||||
|
+ "some records may have been processed successfully, they will be routed (as JSON flow files) to the success relationship. "
|
||||||
|
+ "The combination of the retry, success, and failure relationships indicate how many records succeeded and/or failed. This "
|
||||||
|
+ "can be used to provide a retry capability since full rollback is not possible.")
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
private final static List<PropertyDescriptor> propertyDescriptors;
|
private final static List<PropertyDescriptor> propertyDescriptors;
|
||||||
|
@ -333,105 +359,280 @@ public class PutHiveStreaming extends AbstractProcessor {
|
||||||
}
|
}
|
||||||
|
|
||||||
final ComponentLog log = getLogger();
|
final ComponentLog log = getLogger();
|
||||||
try {
|
final Integer txnsPerBatch = context.getProperty(TXNS_PER_BATCH).asInteger();
|
||||||
final List<String> partitionColumnList;
|
|
||||||
String partitionColumns = context.getProperty(PARTITION_COLUMNS).getValue();
|
// Store the original class loader, then explicitly set it to this class's classloader (for use by the Hive Metastore)
|
||||||
if (StringUtils.isEmpty(partitionColumns)) {
|
ClassLoader originalClassloader = Thread.currentThread().getContextClassLoader();
|
||||||
partitionColumnList = Collections.emptyList();
|
Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
|
||||||
} else {
|
|
||||||
String[] partitionCols = partitionColumns.split(",");
|
final List<String> partitionColumnList;
|
||||||
partitionColumnList = new ArrayList<>(partitionCols.length);
|
final String partitionColumns = context.getProperty(PARTITION_COLUMNS).getValue();
|
||||||
for (String col : partitionCols) {
|
if (StringUtils.isEmpty(partitionColumns)) {
|
||||||
partitionColumnList.add(col.trim());
|
partitionColumnList = Collections.emptyList();
|
||||||
}
|
} else {
|
||||||
|
String[] partitionCols = partitionColumns.split(",");
|
||||||
|
partitionColumnList = new ArrayList<>(partitionCols.length);
|
||||||
|
for (String col : partitionCols) {
|
||||||
|
partitionColumnList.add(col.trim());
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Store the original class loader, then explicitly set it to this class's classloader (for use by the Hive Metastore)
|
final AtomicInteger recordCount = new AtomicInteger(0);
|
||||||
ClassLoader originalClassloader = Thread.currentThread().getContextClassLoader();
|
final AtomicInteger successfulRecordCount = new AtomicInteger(0);
|
||||||
Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
|
List<HiveStreamingRecord> successfulRecords = new LinkedList<>();
|
||||||
|
final FlowFile inputFlowFile = flowFile;
|
||||||
|
final AtomicBoolean incomingFlowFileTransferred = new AtomicBoolean(false);
|
||||||
|
|
||||||
int recordCount = 0;
|
// Create output flow files and their Avro writers
|
||||||
final List<HiveStreamingRecord> records = new LinkedList<>();
|
AtomicReference<FlowFile> successFlowFile = new AtomicReference<>(session.create(inputFlowFile));
|
||||||
|
final DataFileWriter<GenericRecord> successAvroWriter = new DataFileWriter<>(new GenericDatumWriter<GenericRecord>());
|
||||||
|
AtomicReference<FlowFile> failureFlowFile = new AtomicReference<>(session.create(inputFlowFile));
|
||||||
|
final DataFileWriter<GenericRecord> failureAvroWriter = new DataFileWriter<>(new GenericDatumWriter<GenericRecord>());
|
||||||
|
|
||||||
session.read(flowFile, in -> {
|
try {
|
||||||
|
session.read(inputFlowFile, in -> {
|
||||||
|
|
||||||
try (final DataFileStream<GenericRecord> reader = new DataFileStream<>(in, new GenericDatumReader<GenericRecord>())) {
|
try (final DataFileStream<GenericRecord> reader = new DataFileStream<>(in, new GenericDatumReader<GenericRecord>())) {
|
||||||
|
GenericRecord currRecord = null;
|
||||||
|
|
||||||
|
// Copy codec and schema information to all writers
|
||||||
|
final String codec = reader.getMetaString(DataFileConstants.CODEC) == null
|
||||||
|
? DataFileConstants.NULL_CODEC
|
||||||
|
: reader.getMetaString(DataFileConstants.CODEC);
|
||||||
|
|
||||||
|
Arrays.asList(successAvroWriter, failureAvroWriter)
|
||||||
|
.forEach((writer) -> {
|
||||||
|
writer.setCodec(CodecFactory.fromString(codec));
|
||||||
|
// Transfer metadata (this is a subset of the incoming file)
|
||||||
|
for (String metaKey : reader.getMetaKeys()) {
|
||||||
|
if (!RESERVED_METADATA.contains(metaKey)) {
|
||||||
|
writer.setMeta(metaKey, reader.getMeta(metaKey));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
GenericRecord currRecord;
|
|
||||||
while (reader.hasNext()) {
|
while (reader.hasNext()) {
|
||||||
currRecord = reader.next();
|
currRecord = reader.next(currRecord);
|
||||||
|
recordCount.incrementAndGet();
|
||||||
|
|
||||||
|
// Extract the partition values (they must be put separately into the Hive Streaming API)
|
||||||
List<String> partitionValues = new ArrayList<>();
|
List<String> partitionValues = new ArrayList<>();
|
||||||
|
|
||||||
for (String partition : partitionColumnList) {
|
try {
|
||||||
Object partitionValue = currRecord.get(partition);
|
for (String partition : partitionColumnList) {
|
||||||
if (partitionValue == null) {
|
Object partitionValue = currRecord.get(partition);
|
||||||
throw new IOException("Partition column '" + partition + "' not found in Avro record");
|
if (partitionValue == null) {
|
||||||
|
throw new IOException("Partition column '" + partition + "' not found in Avro record");
|
||||||
|
}
|
||||||
|
partitionValues.add(partitionValue.toString());
|
||||||
}
|
}
|
||||||
partitionValues.add(partitionValue.toString());
|
} catch (IOException ioe) {
|
||||||
|
// Add the failed record to the failure flow file
|
||||||
|
log.error("Error writing record to Hive Streaming transaction", ioe);
|
||||||
|
appendRecordsToFlowFile(session, Collections.singletonList(new HiveStreamingRecord(null, currRecord)),
|
||||||
|
failureFlowFile, failureAvroWriter, reader);
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
List<Schema.Field> fields = currRecord.getSchema().getFields();
|
List<Schema.Field> fields = currRecord.getSchema().getFields();
|
||||||
if (fields != null) {
|
if (fields != null) {
|
||||||
JSONObject obj = new JSONObject();
|
JSONObject obj = new JSONObject();
|
||||||
for (Schema.Field field : fields) {
|
try {
|
||||||
String fieldName = field.name();
|
for (Schema.Field field : fields) {
|
||||||
// Skip fields that are partition columns, we extracted those values above to create an EndPoint
|
String fieldName = field.name();
|
||||||
if (!partitionColumnList.contains(fieldName)) {
|
// Skip fields that are partition columns, we extracted those values above to create an EndPoint
|
||||||
Object value = currRecord.get(fieldName);
|
if (!partitionColumnList.contains(fieldName)) {
|
||||||
|
Object value = currRecord.get(fieldName);
|
||||||
|
try {
|
||||||
|
obj.put(fieldName, value);
|
||||||
|
} catch (JSONException je) {
|
||||||
|
throw new IOException(je);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
// This really shouldn't happen since we are iterating over the schema fields, but just in case,
|
||||||
|
// add the failed record to the failure flow file.
|
||||||
|
log.error("Error writing record to Hive Streaming transaction", ioe);
|
||||||
|
appendRecordsToFlowFile(session, Collections.singletonList(new HiveStreamingRecord(null, currRecord)),
|
||||||
|
failureFlowFile, failureAvroWriter, reader);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
final HiveStreamingRecord record = new HiveStreamingRecord(partitionValues, currRecord);
|
||||||
|
HiveEndPoint endPoint = null;
|
||||||
|
HiveWriter hiveWriter = null;
|
||||||
|
try {
|
||||||
|
endPoint = makeHiveEndPoint(record.getPartitionValues(), options);
|
||||||
|
hiveWriter = getOrCreateWriter(endPoint);
|
||||||
|
} catch (ConnectionError
|
||||||
|
| HiveWriter.ConnectFailure
|
||||||
|
| InterruptedException connectionError) {
|
||||||
|
// Can't connect to Hive endpoint.
|
||||||
|
log.error("Error connecting to Hive endpoint: table {} at {}",
|
||||||
|
new Object[]{options.getTableName(), options.getMetaStoreURI()});
|
||||||
|
// If we can't connect to the endpoint, exit the loop and let the outer exception handler route the original flow file to retry
|
||||||
|
abortAndCloseWriters();
|
||||||
|
throw new ProcessException(connectionError);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
try {
|
||||||
|
hiveWriter.write(record.getRecord().toString().getBytes(StandardCharsets.UTF_8));
|
||||||
|
successfulRecords.add(record);
|
||||||
|
} catch (InterruptedException | HiveWriter.WriteFailure wf) {
|
||||||
|
// Add the failed record to the failure flow file
|
||||||
|
log.error("Error writing record to Hive Streaming transaction", wf);
|
||||||
|
appendRecordsToFlowFile(session, Collections.singletonList(record), failureFlowFile, failureAvroWriter, reader);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we've reached the transactions-per-batch limit, flush the Hive Writer and update the Avro Writer for successful records
|
||||||
|
if (hiveWriter.getTotalRecords() >= txnsPerBatch) {
|
||||||
|
hiveWriter.flush(true);
|
||||||
|
// Now send the records to the success relationship and update the success count
|
||||||
try {
|
try {
|
||||||
obj.put(fieldName, value);
|
appendRecordsToFlowFile(session, successfulRecords, successFlowFile, successAvroWriter, reader);
|
||||||
} catch (JSONException je) {
|
successfulRecordCount.accumulateAndGet(successfulRecords.size(), (current, incr) -> current + incr);
|
||||||
throw new IOException(je);
|
|
||||||
|
// Clear the list of successful records, we'll use it at the end when we flush whatever records are left
|
||||||
|
successfulRecords.clear();
|
||||||
|
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
// The records were put to Hive Streaming successfully, but there was an error while writing the
|
||||||
|
// Avro records to the flow file. Log as an error and move on.
|
||||||
|
getLogger().error("Error writing Avro records (which were sent successfully to Hive Streaming) to the flow file", ioe);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (InterruptedException
|
||||||
|
| HiveWriter.CommitFailure
|
||||||
|
| HiveWriter.TxnBatchFailure
|
||||||
|
| HiveWriter.TxnFailure
|
||||||
|
| SerializationError writeException) {
|
||||||
|
|
||||||
|
log.error("Error writing record to Hive Streaming transaction", writeException);
|
||||||
|
// Add the failed record to the failure flow file
|
||||||
|
appendRecordsToFlowFile(session, Collections.singletonList(record), failureFlowFile, failureAvroWriter, reader);
|
||||||
|
|
||||||
|
if (!(writeException instanceof SerializationError)) {
|
||||||
|
try {
|
||||||
|
hiveWriter.abort();
|
||||||
|
} catch (Exception e) {
|
||||||
|
// Can't even abort properly, throw a process exception
|
||||||
|
throw new ProcessException(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
records.add(new HiveStreamingRecord(partitionValues, obj));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
try {
|
||||||
|
// Finish any transactions
|
||||||
|
flushAllWriters(true);
|
||||||
|
closeAllWriters();
|
||||||
|
|
||||||
|
// Now send any remaining records to the success relationship and update the count
|
||||||
|
appendRecordsToFlowFile(session, successfulRecords, successFlowFile, successAvroWriter, reader);
|
||||||
|
successfulRecordCount.accumulateAndGet(successfulRecords.size(), (current, incr) -> current + incr);
|
||||||
|
successfulRecords.clear();
|
||||||
|
|
||||||
|
} catch (HiveWriter.CommitFailure
|
||||||
|
| HiveWriter.TxnBatchFailure
|
||||||
|
| HiveWriter.TxnFailure
|
||||||
|
| InterruptedException e) {
|
||||||
|
|
||||||
|
// If any records are in the successfulRecords list but ended up here, then they actually weren't transferred successfully, so
|
||||||
|
// route them to failure instead
|
||||||
|
appendRecordsToFlowFile(session, successfulRecords, failureFlowFile, failureAvroWriter, reader);
|
||||||
|
}
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
// The Avro file is invalid (or may not be an Avro file at all), send it to failure
|
||||||
|
log.error("The incoming flow file can not be read as an Avro file, routing to failure", ioe);
|
||||||
|
session.transfer(inputFlowFile, REL_FAILURE);
|
||||||
|
incomingFlowFileTransferred.set(true);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
// Write all records to Hive Streaming
|
|
||||||
for (HiveStreamingRecord record : records) {
|
if (recordCount.get() > 0) {
|
||||||
HiveEndPoint endPoint = makeHiveEndPoint(record.getPartitionValues(), options);
|
if (successfulRecordCount.get() > 0) {
|
||||||
HiveWriter writer = getOrCreateWriter(endPoint);
|
// Transfer the flow file with successful records
|
||||||
writer.write(record.getRecord().toString().getBytes(StandardCharsets.UTF_8));
|
successFlowFile.set(
|
||||||
recordCount++;
|
session.putAttribute(successFlowFile.get(), HIVE_STREAMING_RECORD_COUNT_ATTR, Integer.toString(recordCount.get())));
|
||||||
|
session.getProvenanceReporter().send(successFlowFile.get(), options.getMetaStoreURI());
|
||||||
|
session.transfer(successFlowFile.get(), REL_SUCCESS);
|
||||||
|
} else {
|
||||||
|
session.remove(successFlowFile.get());
|
||||||
|
}
|
||||||
|
|
||||||
|
if (recordCount.get() != successfulRecordCount.get()) {
|
||||||
|
// There were some failed records, so transfer that flow file to failure
|
||||||
|
failureFlowFile.set(
|
||||||
|
session.putAttribute(failureFlowFile.get(), HIVE_STREAMING_RECORD_COUNT_ATTR,
|
||||||
|
Integer.toString(recordCount.get() - successfulRecordCount.get())));
|
||||||
|
session.transfer(failureFlowFile.get(), REL_FAILURE);
|
||||||
|
} else {
|
||||||
|
session.remove(failureFlowFile.get());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// No records were processed, so remove the output flow files
|
||||||
|
session.remove(successFlowFile.get());
|
||||||
|
session.remove(failureFlowFile.get());
|
||||||
|
}
|
||||||
|
successFlowFile.set(null);
|
||||||
|
failureFlowFile.set(null);
|
||||||
|
|
||||||
|
// If we got here, we've processed the outgoing flow files correctly, so remove the incoming one if necessary
|
||||||
|
if (!incomingFlowFileTransferred.get()) {
|
||||||
|
session.remove(flowFile);
|
||||||
}
|
}
|
||||||
|
|
||||||
flowFile = session.putAttribute(flowFile, HIVE_STREAMING_RECORD_COUNT_ATTR, Integer.toString(recordCount));
|
} catch (ProcessException pe) {
|
||||||
flushAllWriters(true);
|
abortAndCloseWriters();
|
||||||
|
Throwable t = pe.getCause();
|
||||||
session.getProvenanceReporter().send(flowFile, options.getMetaStoreURI());
|
if (t != null) {
|
||||||
session.transfer(flowFile, REL_SUCCESS);
|
if (t instanceof ConnectionError
|
||||||
|
|| t instanceof HiveWriter.ConnectFailure
|
||||||
|
|| t instanceof HiveWriter.CommitFailure
|
||||||
|
|| t instanceof HiveWriter.TxnBatchFailure
|
||||||
|
|| t instanceof HiveWriter.TxnFailure
|
||||||
|
|| t instanceof InterruptedException) {
|
||||||
|
log.error("Hive Streaming connect/write error, flow file will be penalized and routed to retry", t);
|
||||||
|
flowFile = session.penalize(flowFile);
|
||||||
|
session.transfer(flowFile, REL_RETRY);
|
||||||
|
// Remove the ones we created
|
||||||
|
if (successFlowFile.get() != null) {
|
||||||
|
session.remove(successFlowFile.get());
|
||||||
|
}
|
||||||
|
if (failureFlowFile.get() != null) {
|
||||||
|
session.remove(failureFlowFile.get());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw pe;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw pe;
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
// Restore original class loader, might not be necessary but is good practice since the processor task changed it
|
// Restore original class loader, might not be necessary but is good practice since the processor task changed it
|
||||||
Thread.currentThread().setContextClassLoader(originalClassloader);
|
Thread.currentThread().setContextClassLoader(originalClassloader);
|
||||||
|
|
||||||
} catch (HiveWriter.CommitFailure commitFailure) {
|
|
||||||
log.error("Error committing to Hive", commitFailure);
|
|
||||||
session.transfer(flowFile, REL_FAILURE);
|
|
||||||
} catch (HiveWriter.TxnBatchFailure | HiveWriter.TxnFailure txnFailure) {
|
|
||||||
log.error("Hive Streaming Transaction Failure", txnFailure);
|
|
||||||
session.transfer(flowFile, REL_FAILURE);
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
log.error("Hive Streaming Interrupted, flow file will be penalized and routed to retry", e);
|
|
||||||
flowFile = session.penalize(flowFile);
|
|
||||||
session.transfer(flowFile, REL_RETRY);
|
|
||||||
} catch (ConnectionError | HiveWriter.ConnectFailure ce) {
|
|
||||||
log.error("Error while connecting via Hive Streaming, flow file will be penalized and routed to retry", ce);
|
|
||||||
flowFile = session.penalize(flowFile);
|
|
||||||
session.transfer(flowFile, REL_RETRY);
|
|
||||||
} catch (SerializationError se) {
|
|
||||||
log.error("Serialization exception occurred, record not written to Hive.", se);
|
|
||||||
session.transfer(flowFile, REL_FAILURE);
|
|
||||||
} catch (HiveWriter.WriteFailure wf) {
|
|
||||||
log.error("Error while writing record to Hive Streaming", wf);
|
|
||||||
abortAndCloseWriters();
|
|
||||||
session.transfer(flowFile, REL_FAILURE);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void appendRecordsToFlowFile(ProcessSession session,
|
||||||
|
List<HiveStreamingRecord> records,
|
||||||
|
AtomicReference<FlowFile> appendFlowFile,
|
||||||
|
DataFileWriter<GenericRecord> avroWriter,
|
||||||
|
DataFileStream<GenericRecord> reader) throws IOException {
|
||||||
|
|
||||||
|
appendFlowFile.set(session.append(appendFlowFile.get(), (out) -> {
|
||||||
|
|
||||||
|
try (DataFileWriter<GenericRecord> writer = avroWriter.create(reader.getSchema(), out)) {
|
||||||
|
for (HiveStreamingRecord sRecord : records) {
|
||||||
|
writer.append(sRecord.getRecord());
|
||||||
|
}
|
||||||
|
writer.flush();
|
||||||
|
}
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
@OnStopped
|
@OnStopped
|
||||||
public void cleanup() {
|
public void cleanup() {
|
||||||
ComponentLog log = getLogger();
|
ComponentLog log = getLogger();
|
||||||
|
@ -637,9 +838,9 @@ public class PutHiveStreaming extends AbstractProcessor {
|
||||||
protected class HiveStreamingRecord {
|
protected class HiveStreamingRecord {
|
||||||
|
|
||||||
private List<String> partitionValues;
|
private List<String> partitionValues;
|
||||||
private JSONObject record;
|
private GenericRecord record;
|
||||||
|
|
||||||
public HiveStreamingRecord(List<String> partitionValues, JSONObject record) {
|
public HiveStreamingRecord(List<String> partitionValues, GenericRecord record) {
|
||||||
this.partitionValues = partitionValues;
|
this.partitionValues = partitionValues;
|
||||||
this.record = record;
|
this.record = record;
|
||||||
}
|
}
|
||||||
|
@ -648,7 +849,7 @@ public class PutHiveStreaming extends AbstractProcessor {
|
||||||
return partitionValues;
|
return partitionValues;
|
||||||
}
|
}
|
||||||
|
|
||||||
public JSONObject getRecord() {
|
public GenericRecord getRecord() {
|
||||||
return record;
|
return record;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -36,9 +36,6 @@ public class HiveUtils {
|
||||||
private static final Logger LOG = LoggerFactory.getLogger(HiveUtils.class);
|
private static final Logger LOG = LoggerFactory.getLogger(HiveUtils.class);
|
||||||
|
|
||||||
public static HiveEndPoint makeEndPoint(List<String> partitionVals, HiveOptions options) throws ConnectionError {
|
public static HiveEndPoint makeEndPoint(List<String> partitionVals, HiveOptions options) throws ConnectionError {
|
||||||
if(partitionVals==null) {
|
|
||||||
return new HiveEndPoint(options.getMetaStoreURI(), options.getDatabaseName(), options.getTableName(), null);
|
|
||||||
}
|
|
||||||
return new HiveEndPoint(options.getMetaStoreURI(), options.getDatabaseName(), options.getTableName(), partitionVals);
|
return new HiveEndPoint(options.getMetaStoreURI(), options.getDatabaseName(), options.getTableName(), partitionVals);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -74,7 +74,7 @@ public class HiveWriter {
|
||||||
this.txnBatch = nextTxnBatch(recordWriter);
|
this.txnBatch = nextTxnBatch(recordWriter);
|
||||||
this.closed = false;
|
this.closed = false;
|
||||||
this.lastUsed = System.currentTimeMillis();
|
this.lastUsed = System.currentTimeMillis();
|
||||||
} catch (InterruptedException | RuntimeException e) {
|
} catch (InterruptedException | RuntimeException | ConnectFailure e) {
|
||||||
throw e;
|
throw e;
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new ConnectFailure(endPoint, e);
|
throw new ConnectFailure(endPoint, e);
|
||||||
|
|
|
@ -17,8 +17,10 @@
|
||||||
package org.apache.nifi.processors.hive;
|
package org.apache.nifi.processors.hive;
|
||||||
|
|
||||||
import org.apache.avro.Schema;
|
import org.apache.avro.Schema;
|
||||||
|
import org.apache.avro.file.DataFileStream;
|
||||||
import org.apache.avro.file.DataFileWriter;
|
import org.apache.avro.file.DataFileWriter;
|
||||||
import org.apache.avro.generic.GenericData;
|
import org.apache.avro.generic.GenericData;
|
||||||
|
import org.apache.avro.generic.GenericDatumReader;
|
||||||
import org.apache.avro.generic.GenericDatumWriter;
|
import org.apache.avro.generic.GenericDatumWriter;
|
||||||
import org.apache.avro.generic.GenericRecord;
|
import org.apache.avro.generic.GenericRecord;
|
||||||
import org.apache.avro.io.DatumWriter;
|
import org.apache.avro.io.DatumWriter;
|
||||||
|
@ -31,17 +33,19 @@ import org.apache.hive.hcatalog.streaming.StreamingException;
|
||||||
import org.apache.hive.hcatalog.streaming.TransactionBatch;
|
import org.apache.hive.hcatalog.streaming.TransactionBatch;
|
||||||
import org.apache.nifi.hadoop.KerberosProperties;
|
import org.apache.nifi.hadoop.KerberosProperties;
|
||||||
import org.apache.nifi.stream.io.ByteArrayOutputStream;
|
import org.apache.nifi.stream.io.ByteArrayOutputStream;
|
||||||
|
import org.apache.nifi.util.MockFlowFile;
|
||||||
import org.apache.nifi.util.NiFiProperties;
|
import org.apache.nifi.util.NiFiProperties;
|
||||||
import org.apache.nifi.util.TestRunner;
|
import org.apache.nifi.util.TestRunner;
|
||||||
import org.apache.nifi.util.TestRunners;
|
import org.apache.nifi.util.TestRunners;
|
||||||
import org.apache.nifi.util.hive.HiveOptions;
|
import org.apache.nifi.util.hive.HiveOptions;
|
||||||
import org.apache.nifi.util.hive.HiveWriter;
|
import org.apache.nifi.util.hive.HiveWriter;
|
||||||
import org.junit.After;
|
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.io.ByteArrayInputStream;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
|
@ -49,6 +53,12 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.concurrent.ExecutorService;
|
import java.util.concurrent.ExecutorService;
|
||||||
|
|
||||||
|
import static org.apache.nifi.processors.hive.PutHiveStreaming.HIVE_STREAMING_RECORD_COUNT_ATTR;
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
import static org.junit.Assert.assertFalse;
|
||||||
|
import static org.junit.Assert.assertNotNull;
|
||||||
|
import static org.junit.Assert.assertNull;
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
import static org.mockito.Mockito.mock;
|
import static org.mockito.Mockito.mock;
|
||||||
import static org.mockito.Mockito.when;
|
import static org.mockito.Mockito.when;
|
||||||
|
|
||||||
|
@ -57,8 +67,8 @@ import static org.mockito.Mockito.when;
|
||||||
*/
|
*/
|
||||||
public class TestPutHiveStreaming {
|
public class TestPutHiveStreaming {
|
||||||
|
|
||||||
TestRunner runner;
|
private TestRunner runner;
|
||||||
MockPutHiveStreaming processor;
|
private MockPutHiveStreaming processor;
|
||||||
|
|
||||||
private KerberosProperties kerberosPropsWithFile;
|
private KerberosProperties kerberosPropsWithFile;
|
||||||
private KerberosProperties kerberosPropsWithoutFile;
|
private KerberosProperties kerberosPropsWithoutFile;
|
||||||
|
@ -84,12 +94,6 @@ public class TestPutHiveStreaming {
|
||||||
runner = TestRunners.newTestRunner(processor);
|
runner = TestRunners.newTestRunner(processor);
|
||||||
}
|
}
|
||||||
|
|
||||||
@After
|
|
||||||
public void tearDown() throws Exception {
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testSetup() throws Exception {
|
public void testSetup() throws Exception {
|
||||||
runner.setValidateExpressionUsage(false);
|
runner.setValidateExpressionUsage(false);
|
||||||
|
@ -126,6 +130,17 @@ public class TestPutHiveStreaming {
|
||||||
runner.run();
|
runner.run();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testSingleBatchInvalid() throws Exception {
|
||||||
|
runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083");
|
||||||
|
runner.setProperty(PutHiveStreaming.DB_NAME, "default");
|
||||||
|
runner.setProperty(PutHiveStreaming.TABLE_NAME, "users");
|
||||||
|
runner.setProperty(PutHiveStreaming.TXNS_PER_BATCH, "2");
|
||||||
|
runner.assertValid();
|
||||||
|
runner.setProperty(PutHiveStreaming.TXNS_PER_BATCH, "1");
|
||||||
|
runner.assertNotValid();
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void onTrigger() throws Exception {
|
public void onTrigger() throws Exception {
|
||||||
runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083");
|
runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083");
|
||||||
|
@ -142,7 +157,76 @@ public class TestPutHiveStreaming {
|
||||||
runner.enqueue(createAvroRecord(Collections.singletonList(user1)));
|
runner.enqueue(createAvroRecord(Collections.singletonList(user1)));
|
||||||
runner.run();
|
runner.run();
|
||||||
|
|
||||||
runner.assertAllFlowFilesTransferred(PutHiveStreaming.REL_SUCCESS);
|
runner.assertTransferCount(PutHiveStreaming.REL_SUCCESS, 1);
|
||||||
|
assertEquals("1", runner.getFlowFilesForRelationship(PutHiveStreaming.REL_SUCCESS).get(0).getAttribute(HIVE_STREAMING_RECORD_COUNT_ATTR));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void onTriggerBadInput() throws Exception {
|
||||||
|
runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083");
|
||||||
|
runner.setProperty(PutHiveStreaming.DB_NAME, "default");
|
||||||
|
runner.setProperty(PutHiveStreaming.TABLE_NAME, "users");
|
||||||
|
runner.setProperty(PutHiveStreaming.TXNS_PER_BATCH, "100");
|
||||||
|
runner.setValidateExpressionUsage(false);
|
||||||
|
runner.enqueue("I am not an Avro record".getBytes());
|
||||||
|
runner.run();
|
||||||
|
|
||||||
|
runner.assertTransferCount(PutHiveStreaming.REL_FAILURE, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void onTriggerMultipleRecords() throws Exception {
|
||||||
|
runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083");
|
||||||
|
runner.setProperty(PutHiveStreaming.DB_NAME, "default");
|
||||||
|
runner.setProperty(PutHiveStreaming.TABLE_NAME, "users");
|
||||||
|
runner.setProperty(PutHiveStreaming.TXNS_PER_BATCH, "2");
|
||||||
|
runner.setValidateExpressionUsage(false);
|
||||||
|
Map<String, Object> user1 = new HashMap<String, Object>() {
|
||||||
|
{
|
||||||
|
put("name", "Joe");
|
||||||
|
put("favorite_number", 146);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Map<String, Object> user2 = new HashMap<String, Object>() {
|
||||||
|
{
|
||||||
|
put("name", "Mary");
|
||||||
|
put("favorite_number", 42);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Map<String, Object> user3 = new HashMap<String, Object>() {
|
||||||
|
{
|
||||||
|
put("name", "Matt");
|
||||||
|
put("favorite_number", 3);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
runner.enqueue(createAvroRecord(Arrays.asList(user1, user2, user3)));
|
||||||
|
runner.run();
|
||||||
|
|
||||||
|
runner.assertTransferCount(PutHiveStreaming.REL_SUCCESS, 1);
|
||||||
|
MockFlowFile resultFlowFile = runner.getFlowFilesForRelationship(PutHiveStreaming.REL_SUCCESS).get(0);
|
||||||
|
assertNotNull(resultFlowFile);
|
||||||
|
assertEquals("3", resultFlowFile.getAttribute(PutHiveStreaming.HIVE_STREAMING_RECORD_COUNT_ATTR));
|
||||||
|
final DataFileStream<GenericRecord> reader = new DataFileStream<>(
|
||||||
|
new ByteArrayInputStream(resultFlowFile.toByteArray()),
|
||||||
|
new GenericDatumReader<GenericRecord>());
|
||||||
|
|
||||||
|
Schema schema = reader.getSchema();
|
||||||
|
|
||||||
|
// Verify that the schema is preserved
|
||||||
|
assertTrue(schema.equals(new Schema.Parser().parse(new File("src/test/resources/user.avsc"))));
|
||||||
|
|
||||||
|
// Verify the records are intact. We can't guarantee order so check the total number and non-null fields
|
||||||
|
assertTrue(reader.hasNext());
|
||||||
|
GenericRecord record = reader.next(null);
|
||||||
|
assertNotNull(record.get("name"));
|
||||||
|
assertNotNull(record.get("favorite_number"));
|
||||||
|
assertNull(record.get("favorite_color"));
|
||||||
|
assertNull(record.get("scale"));
|
||||||
|
assertTrue(reader.hasNext());
|
||||||
|
record = reader.next(record);
|
||||||
|
assertTrue(reader.hasNext());
|
||||||
|
reader.next(record);
|
||||||
|
assertFalse(reader.hasNext());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -165,7 +249,35 @@ public class TestPutHiveStreaming {
|
||||||
runner.enqueue(createAvroRecord(Collections.singletonList(user1)));
|
runner.enqueue(createAvroRecord(Collections.singletonList(user1)));
|
||||||
runner.run();
|
runner.run();
|
||||||
|
|
||||||
runner.assertAllFlowFilesTransferred(PutHiveStreaming.REL_SUCCESS);
|
runner.assertTransferCount(PutHiveStreaming.REL_SUCCESS, 1);
|
||||||
|
assertEquals("1", runner.getFlowFilesForRelationship(PutHiveStreaming.REL_SUCCESS).get(0).getAttribute(HIVE_STREAMING_RECORD_COUNT_ATTR));
|
||||||
|
runner.assertTransferCount(PutHiveStreaming.REL_FAILURE, 0);
|
||||||
|
runner.assertTransferCount(PutHiveStreaming.REL_RETRY, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void onTriggerWithPartitionColumnsNotInRecord() throws Exception {
|
||||||
|
runner.setProperty(PutHiveStreaming.METASTORE_URI, "thrift://localhost:9083");
|
||||||
|
runner.setProperty(PutHiveStreaming.DB_NAME, "default");
|
||||||
|
runner.setProperty(PutHiveStreaming.TABLE_NAME, "users");
|
||||||
|
runner.setProperty(PutHiveStreaming.TXNS_PER_BATCH, "100");
|
||||||
|
runner.setProperty(PutHiveStreaming.PARTITION_COLUMNS, "favorite_food");
|
||||||
|
runner.setProperty(PutHiveStreaming.AUTOCREATE_PARTITIONS, "false");
|
||||||
|
runner.setValidateExpressionUsage(false);
|
||||||
|
Map<String, Object> user1 = new HashMap<String, Object>() {
|
||||||
|
{
|
||||||
|
put("name", "Joe");
|
||||||
|
put("favorite_number", 146);
|
||||||
|
put("favorite_color", "blue");
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
runner.enqueue(createAvroRecord(Collections.singletonList(user1)));
|
||||||
|
runner.run();
|
||||||
|
|
||||||
|
runner.assertTransferCount(PutHiveStreaming.REL_FAILURE, 1);
|
||||||
|
runner.assertTransferCount(PutHiveStreaming.REL_SUCCESS, 0);
|
||||||
|
runner.assertTransferCount(PutHiveStreaming.REL_RETRY, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -186,7 +298,9 @@ public class TestPutHiveStreaming {
|
||||||
}
|
}
|
||||||
runner.run(10);
|
runner.run(10);
|
||||||
|
|
||||||
runner.assertAllFlowFilesTransferred(PutHiveStreaming.REL_SUCCESS);
|
runner.assertTransferCount(PutHiveStreaming.REL_SUCCESS, 10);
|
||||||
|
runner.assertTransferCount(PutHiveStreaming.REL_FAILURE, 0);
|
||||||
|
runner.assertTransferCount(PutHiveStreaming.REL_RETRY, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -210,7 +324,9 @@ public class TestPutHiveStreaming {
|
||||||
|
|
||||||
runner.enqueue(createAvroRecord(Collections.singletonList(user1)));
|
runner.enqueue(createAvroRecord(Collections.singletonList(user1)));
|
||||||
runner.run(1, true);
|
runner.run(1, true);
|
||||||
runner.assertAllFlowFilesTransferred(PutHiveStreaming.REL_SUCCESS);
|
runner.assertTransferCount(PutHiveStreaming.REL_SUCCESS, 2);
|
||||||
|
runner.assertTransferCount(PutHiveStreaming.REL_FAILURE, 0);
|
||||||
|
runner.assertTransferCount(PutHiveStreaming.REL_RETRY, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -230,7 +346,9 @@ public class TestPutHiveStreaming {
|
||||||
runner.enqueue(createAvroRecord(Collections.singletonList(user1)));
|
runner.enqueue(createAvroRecord(Collections.singletonList(user1)));
|
||||||
runner.run();
|
runner.run();
|
||||||
|
|
||||||
runner.assertAllFlowFilesTransferred(PutHiveStreaming.REL_RETRY);
|
runner.assertTransferCount(PutHiveStreaming.REL_RETRY, 1);
|
||||||
|
runner.assertTransferCount(PutHiveStreaming.REL_FAILURE, 0);
|
||||||
|
runner.assertTransferCount(PutHiveStreaming.REL_SUCCESS, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -250,7 +368,7 @@ public class TestPutHiveStreaming {
|
||||||
runner.enqueue(createAvroRecord(Collections.singletonList(user1)));
|
runner.enqueue(createAvroRecord(Collections.singletonList(user1)));
|
||||||
runner.run();
|
runner.run();
|
||||||
|
|
||||||
runner.assertAllFlowFilesTransferred(PutHiveStreaming.REL_RETRY);
|
runner.assertTransferCount(PutHiveStreaming.REL_RETRY, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -267,10 +385,17 @@ public class TestPutHiveStreaming {
|
||||||
put("favorite_number", 146);
|
put("favorite_number", 146);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
runner.enqueue(createAvroRecord(Collections.singletonList(user1)));
|
Map<String, Object> user2 = new HashMap<String, Object>() {
|
||||||
|
{
|
||||||
|
put("name", "Mary");
|
||||||
|
put("favorite_number", 42);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
runner.enqueue(createAvroRecord(Arrays.asList(user1, user2)));
|
||||||
runner.run();
|
runner.run();
|
||||||
|
|
||||||
runner.assertAllFlowFilesTransferred(PutHiveStreaming.REL_FAILURE);
|
runner.assertTransferCount(PutHiveStreaming.REL_FAILURE, 1);
|
||||||
|
assertEquals("2", runner.getFlowFilesForRelationship(PutHiveStreaming.REL_FAILURE).get(0).getAttribute(HIVE_STREAMING_RECORD_COUNT_ATTR));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -290,7 +415,8 @@ public class TestPutHiveStreaming {
|
||||||
runner.enqueue(createAvroRecord(Collections.singletonList(user1)));
|
runner.enqueue(createAvroRecord(Collections.singletonList(user1)));
|
||||||
runner.run();
|
runner.run();
|
||||||
|
|
||||||
runner.assertAllFlowFilesTransferred(PutHiveStreaming.REL_FAILURE);
|
runner.assertTransferCount(PutHiveStreaming.REL_SUCCESS, 0);
|
||||||
|
runner.assertTransferCount(PutHiveStreaming.REL_FAILURE, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -310,7 +436,9 @@ public class TestPutHiveStreaming {
|
||||||
runner.enqueue(createAvroRecord(Collections.singletonList(user1)));
|
runner.enqueue(createAvroRecord(Collections.singletonList(user1)));
|
||||||
runner.run();
|
runner.run();
|
||||||
|
|
||||||
runner.assertAllFlowFilesTransferred(PutHiveStreaming.REL_FAILURE);
|
runner.assertTransferCount(PutHiveStreaming.REL_FAILURE, 1);
|
||||||
|
runner.assertTransferCount(PutHiveStreaming.REL_SUCCESS, 0);
|
||||||
|
runner.assertTransferCount(PutHiveStreaming.REL_RETRY, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -330,7 +458,9 @@ public class TestPutHiveStreaming {
|
||||||
runner.enqueue(createAvroRecord(Collections.singletonList(user1)));
|
runner.enqueue(createAvroRecord(Collections.singletonList(user1)));
|
||||||
runner.run();
|
runner.run();
|
||||||
|
|
||||||
runner.assertAllFlowFilesTransferred(PutHiveStreaming.REL_FAILURE);
|
runner.assertTransferCount(PutHiveStreaming.REL_FAILURE, 1);
|
||||||
|
runner.assertTransferCount(PutHiveStreaming.REL_SUCCESS, 0);
|
||||||
|
runner.assertTransferCount(PutHiveStreaming.REL_RETRY, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
|
@ -377,7 +507,6 @@ public class TestPutHiveStreaming {
|
||||||
user.put("favorite_color", record.get("favorite_color"));
|
user.put("favorite_color", record.get("favorite_color"));
|
||||||
users.add(user);
|
users.add(user);
|
||||||
}
|
}
|
||||||
|
|
||||||
final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
|
final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema);
|
||||||
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||||
try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter)) {
|
try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter)) {
|
||||||
|
@ -387,6 +516,7 @@ public class TestPutHiveStreaming {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return out.toByteArray();
|
return out.toByteArray();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private class MockPutHiveStreaming extends PutHiveStreaming {
|
private class MockPutHiveStreaming extends PutHiveStreaming {
|
||||||
|
|
|
@ -20,6 +20,7 @@
|
||||||
"fields": [
|
"fields": [
|
||||||
{"name": "name", "type": "string"},
|
{"name": "name", "type": "string"},
|
||||||
{"name": "favorite_number", "type": ["int", "null"]},
|
{"name": "favorite_number", "type": ["int", "null"]},
|
||||||
{"name": "favorite_color", "type": ["string", "null"]}
|
{"name": "favorite_color", "type": ["string", "null"]},
|
||||||
|
{"name": "scale", "type": ["double", "null"]}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue