mirror of https://github.com/apache/nifi.git
NIFI-5356: Added record.count and failure.count attributes to PutElasticsearchHttpRecord
Signed-off-by: Pierre Villard <pierre.villard.fr@gmail.com> This closes #2835.
This commit is contained in:
parent
22ec069acb
commit
ee86af0c15
|
@ -29,6 +29,8 @@ import okhttp3.ResponseBody;
|
|||
import org.apache.nifi.annotation.behavior.DynamicProperty;
|
||||
import org.apache.nifi.annotation.behavior.EventDriven;
|
||||
import org.apache.nifi.annotation.behavior.InputRequirement;
|
||||
import org.apache.nifi.annotation.behavior.WritesAttribute;
|
||||
import org.apache.nifi.annotation.behavior.WritesAttributes;
|
||||
import org.apache.nifi.annotation.documentation.CapabilityDescription;
|
||||
import org.apache.nifi.annotation.documentation.Tags;
|
||||
import org.apache.nifi.annotation.lifecycle.OnScheduled;
|
||||
|
@ -88,6 +90,10 @@ import java.util.Set;
|
|||
+ "the index to insert into and the type of the document, as well as the operation type (index, upsert, delete, etc.). Note: The Bulk API is used to "
|
||||
+ "send the records. This means that the entire contents of the incoming flow file are read into memory, and each record is transformed into a JSON document "
|
||||
+ "which is added to a single HTTP request body. For very large flow files (files with a large number of records, e.g.), this could cause memory usage issues.")
|
||||
@WritesAttributes({
|
||||
@WritesAttribute(attribute="record.count", description="The number of records in an outgoing FlowFile. This is only populated on the 'success' relationship."),
|
||||
@WritesAttribute(attribute="failure.count", description="The number of records found by Elasticsearch to have errors. This is only populated on the 'failure' relationship.")
|
||||
})
|
||||
@DynamicProperty(
|
||||
name = "A URL query parameter",
|
||||
value = "The value to set it to",
|
||||
|
@ -308,6 +314,7 @@ public class PutElasticsearchHttpRecord extends AbstractElasticsearchHttpProcess
|
|||
final RecordPath recordPath = StringUtils.isEmpty(id_path) ? null : recordPathCache.getCompiled(id_path);
|
||||
final StringBuilder sb = new StringBuilder();
|
||||
|
||||
int recordCount = 0;
|
||||
try (final InputStream in = session.read(flowFile);
|
||||
final RecordReader reader = readerFactory.createRecordReader(flowFile, in, getLogger())) {
|
||||
|
||||
|
@ -341,6 +348,7 @@ public class PutElasticsearchHttpRecord extends AbstractElasticsearchHttpProcess
|
|||
json.append(out.toString());
|
||||
|
||||
buildBulkCommand(sb, index, docType, indexOp, id, json.toString());
|
||||
recordCount++;
|
||||
}
|
||||
} catch (IdentifierNotFoundException infe) {
|
||||
logger.error(infe.getMessage(), new Object[]{flowFile});
|
||||
|
@ -374,34 +382,40 @@ public class PutElasticsearchHttpRecord extends AbstractElasticsearchHttpProcess
|
|||
|
||||
JsonNode responseJson = parseJsonResponse(new ByteArrayInputStream(bodyBytes));
|
||||
boolean errors = responseJson.get("errors").asBoolean(false);
|
||||
int failureCount = 0;
|
||||
// ES has no rollback, so if errors occur, log them and route the whole flow file to failure
|
||||
if (errors) {
|
||||
ArrayNode itemNodeArray = (ArrayNode) responseJson.get("items");
|
||||
if (itemNodeArray.size() > 0) {
|
||||
// All items are returned whether they succeeded or failed, so iterate through the item array
|
||||
// at the same time as the flow file list, moving each to success or failure accordingly,
|
||||
// but only keep the first error for logging
|
||||
String errorReason = null;
|
||||
for (int i = itemNodeArray.size() - 1; i >= 0; i--) {
|
||||
JsonNode itemNode = itemNodeArray.get(i);
|
||||
int status = itemNode.findPath("status").asInt();
|
||||
if (!isSuccess(status)) {
|
||||
if (errorReason == null) {
|
||||
// Use "result" if it is present; this happens for status codes like 404 Not Found, which may not have an error/reason
|
||||
String reason = itemNode.findPath("result").asText();
|
||||
if (StringUtils.isEmpty(reason)) {
|
||||
// If there was no result, we expect an error with a string description in the "reason" field
|
||||
reason = itemNode.findPath("reason").asText();
|
||||
if(itemNodeArray != null) {
|
||||
if (itemNodeArray.size() > 0) {
|
||||
// All items are returned whether they succeeded or failed, so iterate through the item array
|
||||
// at the same time as the flow file list, moving each to success or failure accordingly,
|
||||
// but only keep the first error for logging
|
||||
String errorReason = null;
|
||||
for (int i = itemNodeArray.size() - 1; i >= 0; i--) {
|
||||
JsonNode itemNode = itemNodeArray.get(i);
|
||||
int status = itemNode.findPath("status").asInt();
|
||||
if (!isSuccess(status)) {
|
||||
if (errorReason == null) {
|
||||
// Use "result" if it is present; this happens for status codes like 404 Not Found, which may not have an error/reason
|
||||
String reason = itemNode.findPath("result").asText();
|
||||
if (StringUtils.isEmpty(reason)) {
|
||||
// If there was no result, we expect an error with a string description in the "reason" field
|
||||
reason = itemNode.findPath("reason").asText();
|
||||
}
|
||||
errorReason = reason;
|
||||
logger.error("Failed to process {} due to {}, transferring to failure",
|
||||
new Object[]{flowFile, errorReason});
|
||||
}
|
||||
errorReason = reason;
|
||||
logger.error("Failed to process {} due to {}, transferring to failure",
|
||||
new Object[]{flowFile, errorReason});
|
||||
failureCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
flowFile = session.putAttribute(flowFile, "failure.count", Integer.toString(failureCount));
|
||||
session.transfer(flowFile, REL_FAILURE);
|
||||
} else {
|
||||
flowFile = session.putAttribute(flowFile, "record.count", Integer.toString(recordCount));
|
||||
session.transfer(flowFile, REL_SUCCESS);
|
||||
session.getProvenanceReporter().send(flowFile, url.toString());
|
||||
}
|
||||
|
|
|
@ -78,6 +78,7 @@ public class TestPutElasticsearchHttpRecord {
|
|||
final MockFlowFile out = runner.getFlowFilesForRelationship(PutElasticsearchHttpRecord.REL_SUCCESS).get(0);
|
||||
assertNotNull(out);
|
||||
out.assertAttributeEquals("doc_id", "28039652140");
|
||||
out.assertAttributeEquals("record.count", "4");
|
||||
List<ProvenanceEventRecord> provEvents = runner.getProvenanceEvents();
|
||||
assertNotNull(provEvents);
|
||||
assertEquals(1, provEvents.size());
|
||||
|
@ -263,8 +264,10 @@ public class TestPutElasticsearchHttpRecord {
|
|||
runner.enqueue(new byte[0]);
|
||||
runner.run(1, true, true);
|
||||
|
||||
runner.assertTransferCount(PutElasticsearchHttpRecord.REL_FAILURE, 1);
|
||||
runner.assertTransferCount(PutElasticsearchHttpRecord.REL_SUCCESS, 0);
|
||||
runner.assertTransferCount(PutElasticsearchHttpRecord.REL_FAILURE, 1);
|
||||
MockFlowFile flowFile = runner.getFlowFilesForRelationship(PutElasticsearchHttpRecord.REL_FAILURE).get(0);
|
||||
flowFile.assertAttributeEquals("failure.count", "1");
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
Loading…
Reference in New Issue