From c6e6a418aa1ff78bf508b75dc923f6c8965bb76b Mon Sep 17 00:00:00 2001 From: Peter Turcsanyi Date: Tue, 28 May 2019 00:22:25 +0200 Subject: [PATCH] NIFI-6318: Support EL in CSV formatting properties CSVReader and CSVRecordSetWriter controller services and also ConvertExcelToCSVProcessor support EL for Value Separator, Quote Character and Escape Character properties. NIFI-6318: Fixed null checks and compound OR expression. NIFI-6318: RecordSetWriterFactory.createWriter() changes. NIFI-6318: Initialize CSVFormat in onEnabled() if there are no dynamic formatting properties. NIFI-6318: Comment Marker supports EL. NIFI-6318: Various review changes. This closes #3504. Signed-off-by: Koji Kawamura --- .../apache/nifi/util/MockPropertyValue.java | 2 +- .../azure/eventhub/ConsumeAzureEventHub.java | 2 +- .../eventhub/TestConsumeAzureEventHub.java | 3 +- .../nifi/processors/druid/PutDruidRecord.java | 6 +- .../PutElasticsearchHttpRecord.java | 4 +- .../hadoop/AbstractFetchHDFSRecord.java | 2 +- .../record/ArrayListRecordWriter.java | 6 +- .../record/MockRecordWriter.java | 2 +- .../java/org/apache/nifi/csv/CSVUtils.java | 110 +- .../org/apache/nifi/csv/CSVValidators.java | 49 +- .../org/apache/nifi/csv/CSVUtilsTest.java | 152 ++ .../jolt/record/JoltTransformRecord.java | 4 +- .../kafka/pubsub/ConsumerLease.java | 2 +- .../kafka/pubsub/PublisherLease.java | 2 +- .../kafka/pubsub/TestPublisherLease.java | 4 +- .../kafka/pubsub/util/MockRecordWriter.java | 2 +- .../kafka/pubsub/ConsumerLease.java | 2 +- .../kafka/pubsub/PublisherLease.java | 2 +- .../kafka/pubsub/TestPublisherLease.java | 4 +- .../kafka/pubsub/util/MockRecordWriter.java | 2 +- .../kafka/pubsub/ConsumerLease.java | 2 +- .../kafka/pubsub/PublisherLease.java | 2 +- .../kafka/pubsub/TestPublisherLease.java | 4 +- .../kafka/pubsub/util/MockRecordWriter.java | 2 +- .../kafka/pubsub/ConsumerLease.java | 2 +- .../kafka/pubsub/PublisherLease.java | 2 +- .../kafka/pubsub/TestPublisherLease.java | 4 +- .../kafka/pubsub/util/MockRecordWriter.java | 2 +- .../processors/mongodb/GetMongoRecord.java | 2 +- .../processors/parquet/FetchParquetTest.java | 3 +- .../poi/ConvertExcelToCSVProcessor.java | 2 +- .../poi/ConvertExcelToCSVProcessorTest.java | 98 +- .../script/ScriptedRecordSetWriter.java | 4 +- .../script/ScriptedRecordSetWriterTest.groovy | 2 +- .../groovy/test_record_writer_inline.groovy | 2 +- .../AbstractSiteToSiteReportingTask.java | 2 +- .../apache/nifi/processors/solr/GetSolr.java | 3 +- .../nifi/processors/solr/QuerySolr.java | 1231 +++++++++-------- .../standard/AbstractRecordProcessor.java | 4 +- .../standard/AbstractRouteRecord.java | 4 +- .../nifi/processors/standard/ForkRecord.java | 2 +- .../processors/standard/ListenTCPRecord.java | 2 +- .../processors/standard/ListenUDPRecord.java | 2 +- .../processors/standard/PartitionRecord.java | 2 +- .../nifi/processors/standard/QueryRecord.java | 2 +- .../nifi/processors/standard/SplitRecord.java | 2 +- .../processors/standard/ValidateRecord.java | 2 +- .../processors/standard/merge/RecordBin.java | 2 +- .../standard/sql/RecordSqlWriter.java | 5 +- .../standard/TestConvertRecord.java | 55 +- .../processors/standard/TestQueryRecord.java | 2 +- .../serialization/RecordSetWriterFactory.java | 45 +- .../apache/nifi/avro/AvroRecordSetWriter.java | 3 +- .../nifi/csv/CSVHeaderSchemaStrategy.java | 2 +- .../java/org/apache/nifi/csv/CSVReader.java | 36 +- .../apache/nifi/csv/CSVRecordSetWriter.java | 28 +- .../org/apache/nifi/csv/CSVRecordSource.java | 5 +- .../apache/nifi/json/JsonRecordSetWriter.java | 3 +- .../text/FreeFormTextRecordSetWriter.java | 3 +- .../apache/nifi/xml/XMLRecordSetWriter.java | 419 +++--- .../nifi/csv/TestCSVHeaderSchemaStrategy.java | 35 +- .../nifi/csv/TestCSVSchemaInference.java | 49 +- .../apache/nifi/csv/TestCSVValidators.java | 21 +- .../nifi/xml/TestXMLRecordSetWriter.java | 495 +++---- .../xml/TestXMLRecordSetWriterProcessor.java | 245 ++-- 65 files changed, 1876 insertions(+), 1330 deletions(-) create mode 100644 nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-standard-record-utils/src/test/java/org/apache/nifi/csv/CSVUtilsTest.java diff --git a/nifi-mock/src/main/java/org/apache/nifi/util/MockPropertyValue.java b/nifi-mock/src/main/java/org/apache/nifi/util/MockPropertyValue.java index 6e38a0847a..ec2d08be56 100644 --- a/nifi-mock/src/main/java/org/apache/nifi/util/MockPropertyValue.java +++ b/nifi-mock/src/main/java/org/apache/nifi/util/MockPropertyValue.java @@ -294,7 +294,7 @@ public class MockPropertyValue implements PropertyValue { @Override public boolean isExpressionLanguagePresent() { - if (!Boolean.TRUE.equals(expectExpressions)) { + if (rawValue == null) { return false; } diff --git a/nifi-nar-bundles/nifi-azure-bundle/nifi-azure-processors/src/main/java/org/apache/nifi/processors/azure/eventhub/ConsumeAzureEventHub.java b/nifi-nar-bundles/nifi-azure-bundle/nifi-azure-processors/src/main/java/org/apache/nifi/processors/azure/eventhub/ConsumeAzureEventHub.java index 4eb0da56f7..fb7699f8c1 100644 --- a/nifi-nar-bundles/nifi-azure-bundle/nifi-azure-processors/src/main/java/org/apache/nifi/processors/azure/eventhub/ConsumeAzureEventHub.java +++ b/nifi-nar-bundles/nifi-azure-bundle/nifi-azure-processors/src/main/java/org/apache/nifi/processors/azure/eventhub/ConsumeAzureEventHub.java @@ -465,7 +465,7 @@ public class ConsumeAzureEventHub extends AbstractSessionFactoryProcessor { // Initialize the writer when the first record is read. final RecordSchema readerSchema = record.getSchema(); final RecordSchema writeSchema = writerFactory.getSchema(schemaRetrievalVariables, readerSchema); - writer = writerFactory.createWriter(logger, writeSchema, out); + writer = writerFactory.createWriter(logger, writeSchema, out, flowFile); writer.beginRecordSet(); } diff --git a/nifi-nar-bundles/nifi-azure-bundle/nifi-azure-processors/src/test/java/org/apache/nifi/processors/azure/eventhub/TestConsumeAzureEventHub.java b/nifi-nar-bundles/nifi-azure-bundle/nifi-azure-processors/src/test/java/org/apache/nifi/processors/azure/eventhub/TestConsumeAzureEventHub.java index 251664bf93..3946cba0bf 100644 --- a/nifi-nar-bundles/nifi-azure-bundle/nifi-azure-processors/src/test/java/org/apache/nifi/processors/azure/eventhub/TestConsumeAzureEventHub.java +++ b/nifi-nar-bundles/nifi-azure-bundle/nifi-azure-processors/src/test/java/org/apache/nifi/processors/azure/eventhub/TestConsumeAzureEventHub.java @@ -18,6 +18,7 @@ package org.apache.nifi.processors.azure.eventhub; import com.microsoft.azure.eventhubs.EventData; import com.microsoft.azure.eventprocessorhost.PartitionContext; +import org.apache.nifi.flowfile.FlowFile; import org.apache.nifi.processor.ProcessSessionFactory; import org.apache.nifi.processor.ProcessorInitializationContext; import org.apache.nifi.provenance.ProvenanceEventRecord; @@ -180,7 +181,7 @@ public class TestConsumeAzureEventHub { processor.setWriterFactory(writerFactory); final RecordSetWriter writer = mock(RecordSetWriter.class); final AtomicReference outRef = new AtomicReference<>(); - when(writerFactory.createWriter(any(), any(), any())).thenAnswer(invocation -> { + when(writerFactory.createWriter(any(), any(), any(), any(FlowFile.class))).thenAnswer(invocation -> { outRef.set(invocation.getArgument(2)); return writer; }); diff --git a/nifi-nar-bundles/nifi-druid-bundle/nifi-druid-processors/src/main/java/org/apache/nifi/processors/druid/PutDruidRecord.java b/nifi-nar-bundles/nifi-druid-bundle/nifi-druid-processors/src/main/java/org/apache/nifi/processors/druid/PutDruidRecord.java index fa6cfac4cc..fd0ee3773d 100644 --- a/nifi-nar-bundles/nifi-druid-bundle/nifi-druid-processors/src/main/java/org/apache/nifi/processors/druid/PutDruidRecord.java +++ b/nifi-nar-bundles/nifi-druid-bundle/nifi-druid-processors/src/main/java/org/apache/nifi/processors/druid/PutDruidRecord.java @@ -187,11 +187,11 @@ public class PutDruidRecord extends AbstractSessionFactoryProcessor { final RecordReader reader = recordParserFactory.createRecordReader(flowFile, in, getLogger()); final RecordSchema outSchema = writerFactory.getSchema(attributes, reader.getSchema()); - droppedRecordWriter = writerFactory.createWriter(log, outSchema, droppedOutputStream); + droppedRecordWriter = writerFactory.createWriter(log, outSchema, droppedOutputStream, flowFile); droppedRecordWriter.beginRecordSet(); - failedRecordWriter = writerFactory.createWriter(log, outSchema, failedOutputStream); + failedRecordWriter = writerFactory.createWriter(log, outSchema, failedOutputStream, flowFile); failedRecordWriter.beginRecordSet(); - successfulRecordWriter = writerFactory.createWriter(log, outSchema, successfulOutputStream); + successfulRecordWriter = writerFactory.createWriter(log, outSchema, successfulOutputStream, flowFile); successfulRecordWriter.beginRecordSet(); Record r; diff --git a/nifi-nar-bundles/nifi-elasticsearch-bundle/nifi-elasticsearch-processors/src/main/java/org/apache/nifi/processors/elasticsearch/PutElasticsearchHttpRecord.java b/nifi-nar-bundles/nifi-elasticsearch-bundle/nifi-elasticsearch-processors/src/main/java/org/apache/nifi/processors/elasticsearch/PutElasticsearchHttpRecord.java index 7f6140f9a6..5879865994 100644 --- a/nifi-nar-bundles/nifi-elasticsearch-bundle/nifi-elasticsearch-processors/src/main/java/org/apache/nifi/processors/elasticsearch/PutElasticsearchHttpRecord.java +++ b/nifi-nar-bundles/nifi-elasticsearch-bundle/nifi-elasticsearch-processors/src/main/java/org/apache/nifi/processors/elasticsearch/PutElasticsearchHttpRecord.java @@ -558,8 +558,8 @@ public class PutElasticsearchHttpRecord extends AbstractElasticsearchHttpProcess final RecordSchema schema = writerFactory.getSchema(inputFlowFile.getAttributes(), reader.getSchema()); - try (final RecordSetWriter successWriter = writerFactory.createWriter(getLogger(), schema, successOut); - final RecordSetWriter failedWriter = writerFactory.createWriter(getLogger(), schema, failedOut)) { + try (final RecordSetWriter successWriter = writerFactory.createWriter(getLogger(), schema, successOut, successFlowFile); + final RecordSetWriter failedWriter = writerFactory.createWriter(getLogger(), schema, failedOut, failedFlowFile)) { successWriter.beginRecordSet(); failedWriter.beginRecordSet(); diff --git a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-hadoop-record-utils/src/main/java/org/apache/nifi/processors/hadoop/AbstractFetchHDFSRecord.java b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-hadoop-record-utils/src/main/java/org/apache/nifi/processors/hadoop/AbstractFetchHDFSRecord.java index 41b0365cae..7248e8f531 100644 --- a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-hadoop-record-utils/src/main/java/org/apache/nifi/processors/hadoop/AbstractFetchHDFSRecord.java +++ b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-hadoop-record-utils/src/main/java/org/apache/nifi/processors/hadoop/AbstractFetchHDFSRecord.java @@ -199,7 +199,7 @@ public abstract class AbstractFetchHDFSRecord extends AbstractHadoopProcessor { final RecordSchema schema = recordSetWriterFactory.getSchema(originalFlowFile.getAttributes(), record == null ? null : record.getSchema()); - try (final RecordSetWriter recordSetWriter = recordSetWriterFactory.createWriter(getLogger(), schema, out)) { + try (final RecordSetWriter recordSetWriter = recordSetWriterFactory.createWriter(getLogger(), schema, out, originalFlowFile)) { recordSetWriter.beginRecordSet(); if (record != null) { recordSetWriter.write(record); diff --git a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-mock-record-utils/src/main/java/org/apache/nifi/serialization/record/ArrayListRecordWriter.java b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-mock-record-utils/src/main/java/org/apache/nifi/serialization/record/ArrayListRecordWriter.java index 16ad044c66..44303aef06 100644 --- a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-mock-record-utils/src/main/java/org/apache/nifi/serialization/record/ArrayListRecordWriter.java +++ b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-mock-record-utils/src/main/java/org/apache/nifi/serialization/record/ArrayListRecordWriter.java @@ -30,8 +30,8 @@ import java.util.List; import java.util.Map; /** - * An implementation that is suitable for testing that does not serialize the data to an Output Stream but insted just buffers the data into an - * ArrayList and then provides that List of written records to the user + * An implementation that is suitable for testing that does not serialize the data to an Output Stream but instead just buffers the data into an + * ArrayList and then provides that List of written records to the user. */ public class ArrayListRecordWriter extends AbstractControllerService implements RecordSetWriterFactory { private final List records = new ArrayList<>(); @@ -48,7 +48,7 @@ public class ArrayListRecordWriter extends AbstractControllerService implements } @Override - public RecordSetWriter createWriter(final ComponentLog logger, final RecordSchema schema, final OutputStream out) { + public RecordSetWriter createWriter(final ComponentLog logger, final RecordSchema schema, final OutputStream out, final Map variables) { return new ArrayListRecordSetWriter(records); } diff --git a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-mock-record-utils/src/main/java/org/apache/nifi/serialization/record/MockRecordWriter.java b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-mock-record-utils/src/main/java/org/apache/nifi/serialization/record/MockRecordWriter.java index 9d6b0878f1..523510900c 100644 --- a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-mock-record-utils/src/main/java/org/apache/nifi/serialization/record/MockRecordWriter.java +++ b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-mock-record-utils/src/main/java/org/apache/nifi/serialization/record/MockRecordWriter.java @@ -70,7 +70,7 @@ public class MockRecordWriter extends AbstractControllerService implements Recor } @Override - public RecordSetWriter createWriter(final ComponentLog logger, final RecordSchema schema, final OutputStream rawOut) { + public RecordSetWriter createWriter(final ComponentLog logger, final RecordSchema schema, final OutputStream rawOut, Map variables) { final OutputStream out = bufferOutput ? new BufferedOutputStream(rawOut) : rawOut; return new RecordSetWriter() { diff --git a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-standard-record-utils/src/main/java/org/apache/nifi/csv/CSVUtils.java b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-standard-record-utils/src/main/java/org/apache/nifi/csv/CSVUtils.java index 3f3814ee99..da0eaefc90 100644 --- a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-standard-record-utils/src/main/java/org/apache/nifi/csv/CSVUtils.java +++ b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-standard-record-utils/src/main/java/org/apache/nifi/csv/CSVUtils.java @@ -26,9 +26,15 @@ import org.apache.nifi.components.PropertyValue; import org.apache.nifi.context.PropertyContext; import org.apache.nifi.expression.ExpressionLanguageScope; import org.apache.nifi.processor.util.StandardValidators; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Map; public class CSVUtils { + private static Logger LOG = LoggerFactory.getLogger(CSVUtils.class); + public static final AllowableValue CUSTOM = new AllowableValue("custom", "Custom Format", "The format of the CSV is configured by using the properties of this Controller Service, such as Value Separator"); public static final AllowableValue RFC_4180 = new AllowableValue("rfc-4180", "RFC 4180", "CSV data follows the RFC 4180 Specification defined at https://tools.ietf.org/html/rfc4180"); @@ -49,17 +55,19 @@ public class CSVUtils { .build(); public static final PropertyDescriptor VALUE_SEPARATOR = new PropertyDescriptor.Builder() .name("Value Separator") - .description("The character that is used to separate values/fields in a CSV Record") + .description("The character that is used to separate values/fields in a CSV Record. If the property has been specified via Expression Language " + + "but the expression gets evaluated to an invalid Value Separator at runtime, then it will be skipped and the default Value Separator will be used.") .addValidator(CSVValidators.UNESCAPED_SINGLE_CHAR_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.NONE) + .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) .defaultValue(",") .required(true) .build(); public static final PropertyDescriptor QUOTE_CHAR = new PropertyDescriptor.Builder() .name("Quote Character") - .description("The character that is used to quote values so that escape characters do not have to be used") + .description("The character that is used to quote values so that escape characters do not have to be used. If the property has been specified via Expression Language " + + "but the expression gets evaluated to an invalid Quote Character at runtime, then it will be skipped and the default Quote Character will be used.") .addValidator(new CSVValidators.SingleCharacterValidator()) - .expressionLanguageSupported(ExpressionLanguageScope.NONE) + .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) .defaultValue("\"") .required(true) .build(); @@ -92,14 +100,15 @@ public class CSVUtils { .name("Comment Marker") .description("The character that is used to denote the start of a comment. Any line that begins with this comment will be ignored.") .addValidator(new CSVValidators.SingleCharacterValidator()) - .expressionLanguageSupported(ExpressionLanguageScope.NONE) + .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) .required(false) .build(); public static final PropertyDescriptor ESCAPE_CHAR = new PropertyDescriptor.Builder() .name("Escape Character") - .description("The character that is used to escape characters that would otherwise have a specific meaning to the CSV Parser.") + .description("The character that is used to escape characters that would otherwise have a specific meaning to the CSV Parser. If the property has been specified via Expression Language " + + "but the expression gets evaluated to an invalid Escape Character at runtime, then it will be skipped and the default Escape Character will be used.") .addValidator(new CSVValidators.SingleCharacterValidator()) - .expressionLanguageSupported(ExpressionLanguageScope.NONE) + .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) .defaultValue("\\") .required(true) .build(); @@ -168,10 +177,19 @@ public class CSVUtils { .required(true) .build(); - public static CSVFormat createCSVFormat(final PropertyContext context) { + public static boolean isDynamicCSVFormat(final PropertyContext context) { + final String formatName = context.getProperty(CSV_FORMAT).getValue(); + return formatName.equalsIgnoreCase(CUSTOM.getValue()) + && (context.getProperty(VALUE_SEPARATOR).isExpressionLanguagePresent() + || context.getProperty(QUOTE_CHAR).isExpressionLanguagePresent() + || context.getProperty(ESCAPE_CHAR).isExpressionLanguagePresent() + || context.getProperty(COMMENT_MARKER).isExpressionLanguagePresent()); + } + + public static CSVFormat createCSVFormat(final PropertyContext context, final Map variables) { final String formatName = context.getProperty(CSV_FORMAT).getValue(); if (formatName.equalsIgnoreCase(CUSTOM.getValue())) { - return buildCustomFormat(context); + return buildCustomFormat(context, variables); } if (formatName.equalsIgnoreCase(RFC_4180.getValue())) { return CSVFormat.RFC4180; @@ -190,50 +208,87 @@ public class CSVUtils { } } - private static char getUnescapedChar(final PropertyContext context, final PropertyDescriptor property) { - return StringEscapeUtils.unescapeJava(context.getProperty(property).getValue()).charAt(0); + private static Character getCharUnescapedJava(final PropertyContext context, final PropertyDescriptor property, final Map variables) { + String value = context.getProperty(property).evaluateAttributeExpressions(variables).getValue(); + + if (value != null) { + String unescaped = unescapeJava(value); + if (unescaped.length() == 1) { + return unescaped.charAt(0); + } + } + + LOG.warn("'{}' property evaluated to an invalid value: \"{}\". It must be a single character. The property value will be ignored.", property.getName(), value); + + if (property.getDefaultValue() != null) { + return property.getDefaultValue().charAt(0); + } else { + return null; + } } - private static char getChar(final PropertyContext context, final PropertyDescriptor property) { - return CSVUtils.unescape(context.getProperty(property).getValue()).charAt(0); + private static Character getCharUnescaped(final PropertyContext context, final PropertyDescriptor property, final Map variables) { + String value = context.getProperty(property).evaluateAttributeExpressions(variables).getValue(); + + if (value != null) { + String unescaped = unescape(value); + if (unescaped.length() == 1) { + return unescaped.charAt(0); + } + } + + LOG.warn("'{}' property evaluated to an invalid value: \"{}\". It must be a single character. The property value will be ignored.", property.getName(), value); + + if (property.getDefaultValue() != null) { + return property.getDefaultValue().charAt(0); + } else { + return null; + } } - private static CSVFormat buildCustomFormat(final PropertyContext context) { - final char valueSeparator = getUnescapedChar(context, VALUE_SEPARATOR); + private static CSVFormat buildCustomFormat(final PropertyContext context, final Map variables) { + final Character valueSeparator = getCharUnescapedJava(context, VALUE_SEPARATOR, variables); CSVFormat format = CSVFormat.newFormat(valueSeparator) .withAllowMissingColumnNames() .withIgnoreEmptyLines(); - final PropertyValue skipHeaderPropertyValue = context.getProperty(FIRST_LINE_IS_HEADER); - if (skipHeaderPropertyValue.getValue() != null && skipHeaderPropertyValue.asBoolean()) { + final PropertyValue firstLineIsHeaderPropertyValue = context.getProperty(FIRST_LINE_IS_HEADER); + if (firstLineIsHeaderPropertyValue.getValue() != null && firstLineIsHeaderPropertyValue.asBoolean()) { format = format.withFirstRecordAsHeader(); } - format = format.withQuote(getChar(context, QUOTE_CHAR)); - format = format.withEscape(getChar(context, ESCAPE_CHAR)); + final Character quoteChar = getCharUnescaped(context, QUOTE_CHAR, variables); + format = format.withQuote(quoteChar); + + final Character escapeChar = getCharUnescaped(context, ESCAPE_CHAR, variables); + format = format.withEscape(escapeChar); + format = format.withTrim(context.getProperty(TRIM_FIELDS).asBoolean()); if (context.getProperty(COMMENT_MARKER).isSet()) { - format = format.withCommentMarker(getChar(context, COMMENT_MARKER)); + final Character commentMarker = getCharUnescaped(context, COMMENT_MARKER, variables); + if (commentMarker != null) { + format = format.withCommentMarker(commentMarker); + } } if (context.getProperty(NULL_STRING).isSet()) { - format = format.withNullString(CSVUtils.unescape(context.getProperty(NULL_STRING).getValue())); + format = format.withNullString(unescape(context.getProperty(NULL_STRING).getValue())); } final PropertyValue quoteValue = context.getProperty(QUOTE_MODE); - if (quoteValue != null) { + if (quoteValue != null && quoteValue.isSet()) { final QuoteMode quoteMode = QuoteMode.valueOf(quoteValue.getValue()); format = format.withQuoteMode(quoteMode); } final PropertyValue trailingDelimiterValue = context.getProperty(TRAILING_DELIMITER); - if (trailingDelimiterValue != null) { + if (trailingDelimiterValue != null && trailingDelimiterValue.isSet()) { final boolean trailingDelimiter = trailingDelimiterValue.asBoolean(); format = format.withTrailingDelimiter(trailingDelimiter); } final PropertyValue recordSeparator = context.getProperty(RECORD_SEPARATOR); - if (recordSeparator != null) { + if (recordSeparator != null && recordSeparator.isSet()) { final String separator = unescape(recordSeparator.getValue()); format = format.withRecordSeparator(separator); } @@ -241,6 +296,13 @@ public class CSVUtils { return format; } + public static String unescapeJava(String input) { + if (input != null && input.length() > 1) { + input = StringEscapeUtils.unescapeJava(input); + } + return input; + } + public static String unescape(final String input) { if (input == null) { diff --git a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-standard-record-utils/src/main/java/org/apache/nifi/csv/CSVValidators.java b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-standard-record-utils/src/main/java/org/apache/nifi/csv/CSVValidators.java index 0f6a22f7b4..f9c01d8146 100644 --- a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-standard-record-utils/src/main/java/org/apache/nifi/csv/CSVValidators.java +++ b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-standard-record-utils/src/main/java/org/apache/nifi/csv/CSVValidators.java @@ -17,7 +17,6 @@ package org.apache.nifi.csv; -import org.apache.commons.text.StringEscapeUtils; import org.apache.nifi.components.ValidationContext; import org.apache.nifi.components.ValidationResult; import org.apache.nifi.components.Validator; @@ -47,23 +46,25 @@ public class CSVValidators { .build(); } - final String unescaped = CSVUtils.unescape(input); - if (unescaped.length() != 1) { - return new ValidationResult.Builder() - .input(input) - .subject(subject) - .valid(false) - .explanation("Value must be exactly 1 character but was " + input.length() + " in length") - .build(); - } + if (!context.isExpressionLanguageSupported(subject) || !context.isExpressionLanguagePresent(input)) { + final String unescaped = CSVUtils.unescape(input); + if (unescaped.length() != 1) { + return new ValidationResult.Builder() + .input(input) + .subject(subject) + .valid(false) + .explanation("Value must be exactly 1 character but was " + input.length() + " in length") + .build(); + } - if (illegalChars.contains(unescaped)) { - return new ValidationResult.Builder() - .input(input) - .subject(subject) - .valid(false) - .explanation(input + " is not a valid character for this property") - .build(); + if (illegalChars.contains(unescaped)) { + return new ValidationResult.Builder() + .input(input) + .subject(subject) + .valid(false) + .explanation(input + " is not a valid character for this property") + .build(); + } } return new ValidationResult.Builder() @@ -88,22 +89,16 @@ public class CSVValidators { .build(); } - String unescapeString = unescapeString(input); + String unescaped = CSVUtils.unescapeJava(input); return new ValidationResult.Builder() .subject(subject) - .input(unescapeString) + .input(unescaped) .explanation("Only non-null single characters are supported") - .valid((unescapeString.length() == 1 && unescapeString.charAt(0) != 0) || context.isExpressionLanguagePresent(input)) + .valid((unescaped.length() == 1 && unescaped.charAt(0) != 0) + || (context.isExpressionLanguageSupported(subject) && context.isExpressionLanguagePresent(input))) .build(); } - - private String unescapeString(String input) { - if (input != null && input.length() > 1) { - input = StringEscapeUtils.unescapeJava(input); - } - return input; - } }; } \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-standard-record-utils/src/test/java/org/apache/nifi/csv/CSVUtilsTest.java b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-standard-record-utils/src/test/java/org/apache/nifi/csv/CSVUtilsTest.java new file mode 100644 index 0000000000..b3f4e08871 --- /dev/null +++ b/nifi-nar-bundles/nifi-extension-utils/nifi-record-utils/nifi-standard-record-utils/src/test/java/org/apache/nifi/csv/CSVUtilsTest.java @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.csv; + +import org.apache.commons.csv.CSVFormat; +import org.apache.nifi.components.PropertyDescriptor; +import org.apache.nifi.context.PropertyContext; +import org.apache.nifi.util.MockConfigurationContext; +import org.junit.Test; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +public class CSVUtilsTest { + + @Test + public void testIsDynamicCSVFormatWithStaticProperties() { + PropertyContext context = createContext("|", "'", "^", "~"); + + boolean isDynamicCSVFormat = CSVUtils.isDynamicCSVFormat(context); + + assertFalse(isDynamicCSVFormat); + } + + @Test + public void testIsDynamicCSVFormatWithDynamicValueSeparator() { + PropertyContext context = createContext("${csv.delimiter}", "'", "^", "~"); + + boolean isDynamicCSVFormat = CSVUtils.isDynamicCSVFormat(context); + + assertTrue(isDynamicCSVFormat); + } + + @Test + public void testIsDynamicCSVFormatWithDynamicQuoteCharacter() { + PropertyContext context = createContext("|", "${csv.quote}", "^", "~"); + + boolean isDynamicCSVFormat = CSVUtils.isDynamicCSVFormat(context); + + assertTrue(isDynamicCSVFormat); + } + + @Test + public void testIsDynamicCSVFormatWithDynamicEscapeCharacter() { + PropertyContext context = createContext("|", "'", "${csv.escape}", "~"); + + boolean isDynamicCSVFormat = CSVUtils.isDynamicCSVFormat(context); + + assertTrue(isDynamicCSVFormat); + } + + @Test + public void testIsDynamicCSVFormatWithDynamicCommentMarker() { + PropertyContext context = createContext("|", "'", "^", "${csv.comment}"); + + boolean isDynamicCSVFormat = CSVUtils.isDynamicCSVFormat(context); + + assertTrue(isDynamicCSVFormat); + } + + @Test + public void testCustomFormat() { + PropertyContext context = createContext("|", "'", "^", "~"); + + CSVFormat csvFormat = CSVUtils.createCSVFormat(context, Collections.emptyMap()); + + assertEquals('|', csvFormat.getDelimiter()); + assertEquals('\'', (char) csvFormat.getQuoteCharacter()); + assertEquals('^', (char) csvFormat.getEscapeCharacter()); + assertEquals('~', (char) csvFormat.getCommentMarker()); + } + + @Test + public void testCustomFormatWithEL() { + PropertyContext context = createContext("${csv.delimiter}", "${csv.quote}", "${csv.escape}", "${csv.comment}"); + + Map attributes = new HashMap<>(); + attributes.put("csv.delimiter", "|"); + attributes.put("csv.quote", "'"); + attributes.put("csv.escape", "^"); + attributes.put("csv.comment", "~"); + + CSVFormat csvFormat = CSVUtils.createCSVFormat(context, attributes); + + assertEquals('|', csvFormat.getDelimiter()); + assertEquals('\'', (char) csvFormat.getQuoteCharacter()); + assertEquals('^', (char) csvFormat.getEscapeCharacter()); + assertEquals('~', (char) csvFormat.getCommentMarker()); + } + + @Test + public void testCustomFormatWithELEmptyValues() { + PropertyContext context = createContext("${csv.delimiter}", "${csv.quote}", "${csv.escape}", "${csv.comment}"); + + CSVFormat csvFormat = CSVUtils.createCSVFormat(context, Collections.emptyMap()); + + assertEquals(',', csvFormat.getDelimiter()); + assertEquals('"', (char) csvFormat.getQuoteCharacter()); + assertEquals('\\', (char) csvFormat.getEscapeCharacter()); + assertNull(csvFormat.getCommentMarker()); + } + + @Test + public void testCustomFormatWithELInvalidValues() { + PropertyContext context = createContext("${csv.delimiter}", "${csv.quote}", "${csv.escape}", "${csv.comment}"); + + Map attributes = new HashMap<>(); + attributes.put("csv.delimiter", "invalid"); + attributes.put("csv.quote", "invalid"); + attributes.put("csv.escape", "invalid"); + attributes.put("csv.comment", "invalid"); + + CSVFormat csvFormat = CSVUtils.createCSVFormat(context, attributes); + + assertEquals(',', csvFormat.getDelimiter()); + assertEquals('"', (char) csvFormat.getQuoteCharacter()); + assertEquals('\\', (char) csvFormat.getEscapeCharacter()); + assertNull(csvFormat.getCommentMarker()); + } + + private PropertyContext createContext(String valueSeparator, String quoteChar, String escapeChar, String commentMarker) { + Map properties = new HashMap<>(); + + properties.put(CSVUtils.VALUE_SEPARATOR, valueSeparator); + properties.put(CSVUtils.QUOTE_CHAR, quoteChar); + properties.put(CSVUtils.ESCAPE_CHAR, escapeChar); + properties.put(CSVUtils.COMMENT_MARKER, commentMarker); + + return new MockConfigurationContext(properties, null); + } +} diff --git a/nifi-nar-bundles/nifi-jolt-record-bundle/nifi-jolt-record-processors/src/main/java/org/apache/nifi/processors/jolt/record/JoltTransformRecord.java b/nifi-nar-bundles/nifi-jolt-record-bundle/nifi-jolt-record-processors/src/main/java/org/apache/nifi/processors/jolt/record/JoltTransformRecord.java index 2e1ef719ec..59629b6eaa 100644 --- a/nifi-nar-bundles/nifi-jolt-record-bundle/nifi-jolt-record-processors/src/main/java/org/apache/nifi/processors/jolt/record/JoltTransformRecord.java +++ b/nifi-nar-bundles/nifi-jolt-record-bundle/nifi-jolt-record-processors/src/main/java/org/apache/nifi/processors/jolt/record/JoltTransformRecord.java @@ -312,7 +312,7 @@ public class JoltTransformRecord extends AbstractProcessor { final Record firstRecord = reader.nextRecord(); if (firstRecord == null) { try (final OutputStream out = session.write(transformed); - final RecordSetWriter writer = writerFactory.createWriter(getLogger(), schema, out)) { + final RecordSetWriter writer = writerFactory.createWriter(getLogger(), schema, out, transformed)) { writer.beginRecordSet(); writeResult = writer.finishRecordSet(); @@ -339,7 +339,7 @@ public class JoltTransformRecord extends AbstractProcessor { // and instead use a Map. This way, even if many different output schemas are possible, // the output FlowFiles will each only contain records that have the same schema. try (final OutputStream out = session.write(transformed); - final RecordSetWriter writer = writerFactory.createWriter(getLogger(), writeSchema, out)) { + final RecordSetWriter writer = writerFactory.createWriter(getLogger(), writeSchema, out, transformed)) { writer.beginRecordSet(); diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerLease.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerLease.java index bde07a6492..2eaa58fe17 100644 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerLease.java +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerLease.java @@ -508,7 +508,7 @@ public abstract class ConsumerLease implements Closeable, ConsumerRebalanceListe throw new ProcessException(e); } - writer = writerFactory.createWriter(logger, writeSchema, rawOut); + writer = writerFactory.createWriter(logger, writeSchema, rawOut, flowFile); writer.beginRecordSet(); tracker = new BundleTracker(consumerRecord, topicPartition, keyEncoding, writer); diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublisherLease.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublisherLease.java index ea1c087a38..aa7113c396 100644 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublisherLease.java +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublisherLease.java @@ -122,7 +122,7 @@ public class PublisherLease implements Closeable { recordCount++; baos.reset(); - try (final RecordSetWriter writer = writerFactory.createWriter(logger, schema, baos)) { + try (final RecordSetWriter writer = writerFactory.createWriter(logger, schema, baos, flowFile)) { writer.write(record); writer.flush(); } diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublisherLease.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublisherLease.java index accd59b1d0..1f1d2b9799 100644 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublisherLease.java +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublisherLease.java @@ -282,11 +282,11 @@ public class TestPublisherLease { final RecordSetWriterFactory writerFactory = Mockito.mock(RecordSetWriterFactory.class); final RecordSetWriter writer = Mockito.mock(RecordSetWriter.class); - Mockito.when(writerFactory.createWriter(eq(logger), eq(schema), any())).thenReturn(writer); + Mockito.when(writerFactory.createWriter(eq(logger), eq(schema), any(), eq(flowFile))).thenReturn(writer); lease.publish(flowFile, recordSet, writerFactory, schema, keyField, topic); - verify(writerFactory, times(2)).createWriter(eq(logger), eq(schema), any()); + verify(writerFactory, times(2)).createWriter(eq(logger), eq(schema), any(), eq(flowFile)); verify(writer, times(2)).write(any(Record.class)); verify(producer, times(2)).send(any(), any()); } diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/util/MockRecordWriter.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/util/MockRecordWriter.java index 90a909d9b6..0a6e5ec149 100644 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/util/MockRecordWriter.java +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-10-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/util/MockRecordWriter.java @@ -57,7 +57,7 @@ public class MockRecordWriter extends AbstractControllerService implements Recor } @Override - public RecordSetWriter createWriter(final ComponentLog logger, final RecordSchema schema, final OutputStream out) { + public RecordSetWriter createWriter(final ComponentLog logger, final RecordSchema schema, final OutputStream out, final Map variables) { return new RecordSetWriter() { @Override diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-11-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerLease.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-11-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerLease.java index a871097e51..36e03ea019 100644 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-11-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerLease.java +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-11-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerLease.java @@ -558,7 +558,7 @@ public abstract class ConsumerLease implements Closeable, ConsumerRebalanceListe throw new ProcessException(e); } - writer = writerFactory.createWriter(logger, writeSchema, rawOut); + writer = writerFactory.createWriter(logger, writeSchema, rawOut, flowFile); writer.beginRecordSet(); tracker = new BundleTracker(consumerRecord, topicPartition, keyEncoding, writer); diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-11-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublisherLease.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-11-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublisherLease.java index 2e25129727..0165210977 100644 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-11-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublisherLease.java +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-11-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublisherLease.java @@ -167,7 +167,7 @@ public class PublisherLease implements Closeable { baos.reset(); Map additionalAttributes = Collections.emptyMap(); - try (final RecordSetWriter writer = writerFactory.createWriter(logger, schema, baos)) { + try (final RecordSetWriter writer = writerFactory.createWriter(logger, schema, baos, flowFile)) { final WriteResult writeResult = writer.write(record); additionalAttributes = writeResult.getAttributes(); writer.flush(); diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-11-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublisherLease.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-11-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublisherLease.java index 3954bd82e9..cc14a263b1 100644 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-11-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublisherLease.java +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-11-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublisherLease.java @@ -277,11 +277,11 @@ public class TestPublisherLease { final RecordSetWriter writer = Mockito.mock(RecordSetWriter.class); Mockito.when(writer.write(Mockito.any(Record.class))).thenReturn(WriteResult.of(1, Collections.emptyMap())); - Mockito.when(writerFactory.createWriter(eq(logger), eq(schema), any())).thenReturn(writer); + Mockito.when(writerFactory.createWriter(eq(logger), eq(schema), any(), eq(flowFile))).thenReturn(writer); lease.publish(flowFile, recordSet, writerFactory, schema, keyField, topic); - verify(writerFactory, times(2)).createWriter(eq(logger), eq(schema), any()); + verify(writerFactory, times(2)).createWriter(eq(logger), eq(schema), any(), eq(flowFile)); verify(writer, times(2)).write(any(Record.class)); verify(producer, times(2)).send(any(), any()); } diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-11-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/util/MockRecordWriter.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-11-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/util/MockRecordWriter.java index 0eb860688b..fc1b66652e 100644 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-11-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/util/MockRecordWriter.java +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-0-11-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/util/MockRecordWriter.java @@ -57,7 +57,7 @@ public class MockRecordWriter extends AbstractControllerService implements Recor } @Override - public RecordSetWriter createWriter(final ComponentLog logger, final RecordSchema schema, final OutputStream out) { + public RecordSetWriter createWriter(final ComponentLog logger, final RecordSchema schema, final OutputStream out, final Map variables) { return new RecordSetWriter() { @Override diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerLease.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerLease.java index 07831bbb3a..f2a382dcab 100644 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerLease.java +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerLease.java @@ -558,7 +558,7 @@ public abstract class ConsumerLease implements Closeable, ConsumerRebalanceListe throw new ProcessException(e); } - writer = writerFactory.createWriter(logger, writeSchema, rawOut); + writer = writerFactory.createWriter(logger, writeSchema, rawOut, flowFile); writer.beginRecordSet(); tracker = new BundleTracker(consumerRecord, topicPartition, keyEncoding, writer); diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublisherLease.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublisherLease.java index 1c241a41c4..a2ddd81dc8 100644 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublisherLease.java +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublisherLease.java @@ -166,7 +166,7 @@ public class PublisherLease implements Closeable { baos.reset(); Map additionalAttributes = Collections.emptyMap(); - try (final RecordSetWriter writer = writerFactory.createWriter(logger, schema, baos)) { + try (final RecordSetWriter writer = writerFactory.createWriter(logger, schema, baos, flowFile)) { final WriteResult writeResult = writer.write(record); additionalAttributes = writeResult.getAttributes(); writer.flush(); diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublisherLease.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublisherLease.java index a7a2bdd8dc..d148a45efd 100644 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublisherLease.java +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublisherLease.java @@ -277,11 +277,11 @@ public class TestPublisherLease { final RecordSetWriter writer = Mockito.mock(RecordSetWriter.class); Mockito.when(writer.write(Mockito.any(Record.class))).thenReturn(WriteResult.of(1, Collections.emptyMap())); - Mockito.when(writerFactory.createWriter(eq(logger), eq(schema), any())).thenReturn(writer); + Mockito.when(writerFactory.createWriter(eq(logger), eq(schema), any(), eq(flowFile))).thenReturn(writer); lease.publish(flowFile, recordSet, writerFactory, schema, keyField, topic); - verify(writerFactory, times(2)).createWriter(eq(logger), eq(schema), any()); + verify(writerFactory, times(2)).createWriter(eq(logger), eq(schema), any(), eq(flowFile)); verify(writer, times(2)).write(any(Record.class)); verify(producer, times(2)).send(any(), any()); } diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/util/MockRecordWriter.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/util/MockRecordWriter.java index 0eb860688b..fc1b66652e 100644 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/util/MockRecordWriter.java +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-1-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/util/MockRecordWriter.java @@ -57,7 +57,7 @@ public class MockRecordWriter extends AbstractControllerService implements Recor } @Override - public RecordSetWriter createWriter(final ComponentLog logger, final RecordSchema schema, final OutputStream out) { + public RecordSetWriter createWriter(final ComponentLog logger, final RecordSchema schema, final OutputStream out, final Map variables) { return new RecordSetWriter() { @Override diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-2-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerLease.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-2-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerLease.java index 6b4dc41d3e..83aa14b95e 100644 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-2-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerLease.java +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-2-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/ConsumerLease.java @@ -558,7 +558,7 @@ public abstract class ConsumerLease implements Closeable, ConsumerRebalanceListe throw new ProcessException(e); } - writer = writerFactory.createWriter(logger, writeSchema, rawOut); + writer = writerFactory.createWriter(logger, writeSchema, rawOut, flowFile); writer.beginRecordSet(); tracker = new BundleTracker(consumerRecord, topicPartition, keyEncoding, writer); diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-2-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublisherLease.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-2-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublisherLease.java index 1c241a41c4..a2ddd81dc8 100644 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-2-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublisherLease.java +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-2-0-processors/src/main/java/org/apache/nifi/processors/kafka/pubsub/PublisherLease.java @@ -166,7 +166,7 @@ public class PublisherLease implements Closeable { baos.reset(); Map additionalAttributes = Collections.emptyMap(); - try (final RecordSetWriter writer = writerFactory.createWriter(logger, schema, baos)) { + try (final RecordSetWriter writer = writerFactory.createWriter(logger, schema, baos, flowFile)) { final WriteResult writeResult = writer.write(record); additionalAttributes = writeResult.getAttributes(); writer.flush(); diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-2-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublisherLease.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-2-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublisherLease.java index 1bedcd7663..6280087d50 100644 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-2-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublisherLease.java +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-2-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/TestPublisherLease.java @@ -278,11 +278,11 @@ public class TestPublisherLease { final RecordSetWriter writer = Mockito.mock(RecordSetWriter.class); Mockito.when(writer.write(Mockito.any(Record.class))).thenReturn(WriteResult.of(1, Collections.emptyMap())); - Mockito.when(writerFactory.createWriter(eq(logger), eq(schema), any())).thenReturn(writer); + Mockito.when(writerFactory.createWriter(eq(logger), eq(schema), any(), eq(flowFile))).thenReturn(writer); lease.publish(flowFile, recordSet, writerFactory, schema, keyField, topic); - verify(writerFactory, times(2)).createWriter(eq(logger), eq(schema), any()); + verify(writerFactory, times(2)).createWriter(eq(logger), eq(schema), any(), eq(flowFile)); verify(writer, times(2)).write(any(Record.class)); verify(producer, times(2)).send(any(), any()); } diff --git a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-2-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/util/MockRecordWriter.java b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-2-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/util/MockRecordWriter.java index 66757a82e2..05adb5e333 100644 --- a/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-2-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/util/MockRecordWriter.java +++ b/nifi-nar-bundles/nifi-kafka-bundle/nifi-kafka-2-0-processors/src/test/java/org/apache/nifi/processors/kafka/pubsub/util/MockRecordWriter.java @@ -57,7 +57,7 @@ public class MockRecordWriter extends AbstractControllerService implements Recor } @Override - public RecordSetWriter createWriter(final ComponentLog logger, final RecordSchema schema, final OutputStream out) { + public RecordSetWriter createWriter(final ComponentLog logger, final RecordSchema schema, final OutputStream out, final Map variables) { return new RecordSetWriter() { @Override diff --git a/nifi-nar-bundles/nifi-mongodb-bundle/nifi-mongodb-processors/src/main/java/org/apache/nifi/processors/mongodb/GetMongoRecord.java b/nifi-nar-bundles/nifi-mongodb-bundle/nifi-mongodb-processors/src/main/java/org/apache/nifi/processors/mongodb/GetMongoRecord.java index 49878ba297..1c67ad1ea3 100644 --- a/nifi-nar-bundles/nifi-mongodb-bundle/nifi-mongodb-processors/src/main/java/org/apache/nifi/processors/mongodb/GetMongoRecord.java +++ b/nifi-nar-bundles/nifi-mongodb-bundle/nifi-mongodb-processors/src/main/java/org/apache/nifi/processors/mongodb/GetMongoRecord.java @@ -165,7 +165,7 @@ public class GetMongoRecord extends AbstractMongoQueryProcessor { put("schema.name", schemaName); }}; RecordSchema schema = writerFactory.getSchema(attrs, null); - RecordSetWriter writer = writerFactory.createWriter(getLogger(), schema, out); + RecordSetWriter writer = writerFactory.createWriter(getLogger(), schema, out, attrs); long count = 0L; writer.beginRecordSet(); while (cursor.hasNext()) { diff --git a/nifi-nar-bundles/nifi-parquet-bundle/nifi-parquet-processors/src/test/java/org/apache/nifi/processors/parquet/FetchParquetTest.java b/nifi-nar-bundles/nifi-parquet-bundle/nifi-parquet-processors/src/test/java/org/apache/nifi/processors/parquet/FetchParquetTest.java index e14fdccb8f..1fc908eb33 100644 --- a/nifi-nar-bundles/nifi-parquet-bundle/nifi-parquet-processors/src/test/java/org/apache/nifi/processors/parquet/FetchParquetTest.java +++ b/nifi-nar-bundles/nifi-parquet-bundle/nifi-parquet-processors/src/test/java/org/apache/nifi/processors/parquet/FetchParquetTest.java @@ -230,7 +230,8 @@ public class FetchParquetTest { final RecordSetWriterFactory recordSetWriterFactory = Mockito.mock(RecordSetWriterFactory.class); when(recordSetWriterFactory.getIdentifier()).thenReturn("mock-writer-factory"); - when(recordSetWriterFactory.createWriter(any(ComponentLog.class), AdditionalMatchers.or(any(RecordSchema.class), isNull()), any(OutputStream.class))).thenReturn(recordSetWriter); + when(recordSetWriterFactory.createWriter(any(ComponentLog.class), AdditionalMatchers.or(any(RecordSchema.class), isNull()), any(OutputStream.class), any(FlowFile.class))) + .thenReturn(recordSetWriter); testRunner.addControllerService("mock-writer-factory", recordSetWriterFactory); testRunner.enableControllerService(recordSetWriterFactory); diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/main/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessor.java b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/main/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessor.java index 51abc27f95..2070c781de 100644 --- a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/main/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessor.java +++ b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/main/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessor.java @@ -202,7 +202,7 @@ public class ConvertExcelToCSVProcessor final String desiredSheetsDelimited = context.getProperty(DESIRED_SHEETS).evaluateAttributeExpressions(flowFile).getValue(); final boolean formatValues = context.getProperty(FORMAT_VALUES).asBoolean(); - final CSVFormat csvFormat = CSVUtils.createCSVFormat(context); + final CSVFormat csvFormat = CSVUtils.createCSVFormat(context, flowFile.getAttributes()); //Switch to 0 based index final int firstRow = context.getProperty(ROWS_TO_SKIP).evaluateAttributeExpressions(flowFile).asInteger() - 1; diff --git a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessorTest.java b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessorTest.java index afcb28a44f..5314781c4c 100644 --- a/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessorTest.java +++ b/nifi-nar-bundles/nifi-poi-bundle/nifi-poi-processors/src/test/java/org/apache/nifi/processors/poi/ConvertExcelToCSVProcessorTest.java @@ -167,7 +167,7 @@ public class ConvertExcelToCSVProcessorTest { public void testSkipRowsWithEL() throws Exception { Map attributes = new HashMap(); attributes.put("rowsToSkip", "2"); - testRunner.enqueue(new File("src/test/resources/dataformatting.xlsx").toPath(),attributes); + testRunner.enqueue(new File("src/test/resources/dataformatting.xlsx").toPath(), attributes); testRunner.setProperty(ConvertExcelToCSVProcessor.ROWS_TO_SKIP, "${rowsToSkip}"); testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES, "true"); @@ -224,7 +224,7 @@ public class ConvertExcelToCSVProcessorTest { public void testSkipColumnsWithEL() throws Exception { Map attributes = new HashMap(); attributes.put("columnsToSkip", "2"); - testRunner.enqueue(new File("src/test/resources/dataformatting.xlsx").toPath(),attributes); + testRunner.enqueue(new File("src/test/resources/dataformatting.xlsx").toPath(), attributes); testRunner.setProperty(ConvertExcelToCSVProcessor.COLUMNS_TO_SKIP, "${columnsToSkip}"); testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES, "true"); @@ -280,6 +280,100 @@ public class ConvertExcelToCSVProcessorTest { "9.8765E+08||\r\n"); } + @Test + public void testCustomValueSeparatorWithEL() throws Exception { + Map attributes = new HashMap(); + attributes.put("csv.delimiter", "|"); + testRunner.enqueue(new File("src/test/resources/dataformatting.xlsx").toPath(), attributes); + + testRunner.setProperty(CSVUtils.VALUE_SEPARATOR, "${csv.delimiter}"); + testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES, "true"); + + testRunner.run(); + + testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1); + testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1); + testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0); + + MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0); + Long rowsSheet = new Long(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM)); + assertTrue(rowsSheet == 9); + + LocalDateTime localDt = LocalDateTime.of(2017, 1, 1, 12, 0, 0); + ff.assertContentEquals("Numbers|Timestamps|Money\n" + + "1234.456|" + DateTimeFormatter.ofPattern("d/M/yy").format(localDt) + "|$ 123.45\n" + + "1234.46|" + DateTimeFormatter.ofPattern("hh:mm:ss a").format(localDt) + "|£ 123.45\n" + + "1234.5|" + DateTimeFormatter.ofPattern("EEEE, MMMM dd, yyyy").format(localDt) + "|¥ 123.45\n" + + "1,234.46|" + DateTimeFormatter.ofPattern("d/M/yy HH:mm").format(localDt) + "|$ 1,023.45\n" + + "1,234.4560|" + DateTimeFormatter.ofPattern("hh:mm a").format(localDt) + "|£ 1,023.45\n" + + "9.88E+08|" + DateTimeFormatter.ofPattern("yyyy/MM/dd/ HH:mm").format(localDt) + "|¥ 1,023.45\n" + + "9.877E+08||\n" + + "9.8765E+08||\n"); + } + + @Test + public void testCustomQuoteCharWithEL() throws Exception { + Map attributes = new HashMap(); + attributes.put("csv.quote", "'"); + testRunner.enqueue(new File("src/test/resources/dataformatting.xlsx").toPath(), attributes); + + testRunner.setProperty(CSVUtils.QUOTE_CHAR, "${csv.quote}"); + testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES, "true"); + testRunner.setProperty(CSVUtils.QUOTE_MODE, CSVUtils.QUOTE_ALL); + + testRunner.run(); + + testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1); + testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1); + testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0); + + MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0); + Long rowsSheet = new Long(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM)); + assertTrue(rowsSheet == 9); + + LocalDateTime localDt = LocalDateTime.of(2017, 1, 1, 12, 0, 0); + ff.assertContentEquals("'Numbers','Timestamps','Money'\n" + + "'1234.456','" + DateTimeFormatter.ofPattern("d/M/yy").format(localDt) + "','$ 123.45'\n" + + "'1234.46','" + DateTimeFormatter.ofPattern("hh:mm:ss a").format(localDt) + "','£ 123.45'\n" + + "'1234.5','" + DateTimeFormatter.ofPattern("EEEE, MMMM dd, yyyy").format(localDt) + "','¥ 123.45'\n" + + "'1,234.46','" + DateTimeFormatter.ofPattern("d/M/yy HH:mm").format(localDt) + "','$ 1,023.45'\n" + + "'1,234.4560','" + DateTimeFormatter.ofPattern("hh:mm a").format(localDt) + "','£ 1,023.45'\n" + + "'9.88E+08','" + DateTimeFormatter.ofPattern("yyyy/MM/dd/ HH:mm").format(localDt) + "','¥ 1,023.45'\n" + + "'9.877E+08',,\n" + + "'9.8765E+08',,\n"); + } + + @Test + public void testCustomEscapeCharWithEL() throws Exception { + Map attributes = new HashMap(); + attributes.put("csv.escape", "^"); + testRunner.enqueue(new File("src/test/resources/dataformatting.xlsx").toPath(), attributes); + + testRunner.setProperty(CSVUtils.ESCAPE_CHAR, "${csv.escape}"); + testRunner.setProperty(ConvertExcelToCSVProcessor.FORMAT_VALUES, "true"); + + testRunner.run(); + + testRunner.assertTransferCount(ConvertExcelToCSVProcessor.SUCCESS, 1); + testRunner.assertTransferCount(ConvertExcelToCSVProcessor.ORIGINAL, 1); + testRunner.assertTransferCount(ConvertExcelToCSVProcessor.FAILURE, 0); + + MockFlowFile ff = testRunner.getFlowFilesForRelationship(ConvertExcelToCSVProcessor.SUCCESS).get(0); + Long rowsSheet = new Long(ff.getAttribute(ConvertExcelToCSVProcessor.ROW_NUM)); + assertTrue(rowsSheet == 9); + + LocalDateTime localDt = LocalDateTime.of(2017, 1, 1, 12, 0, 0); + ff.assertContentEquals("Numbers,Timestamps,Money\n" + + "1234.456," + DateTimeFormatter.ofPattern("d/M/yy").format(localDt) + ",$ 123.45\n" + + "1234.46," + DateTimeFormatter.ofPattern("hh:mm:ss a").format(localDt) + ",£ 123.45\n" + + "1234.5," + DateTimeFormatter.ofPattern("EEEE^, MMMM dd^, yyyy").format(localDt) + ",¥ 123.45\n" + + "1^,234.46," + DateTimeFormatter.ofPattern("d/M/yy HH:mm").format(localDt) + ",$ 1^,023.45\n" + + "1^,234.4560," + DateTimeFormatter.ofPattern("hh:mm a").format(localDt) + ",£ 1^,023.45\n" + + "9.88E+08," + DateTimeFormatter.ofPattern("yyyy/MM/dd/ HH:mm").format(localDt) + ",¥ 1^,023.45\n" + + "9.877E+08,,\n" + + "9.8765E+08,,\n"); + } + /** * Validates that all sheets in the Excel document are exported. * diff --git a/nifi-nar-bundles/nifi-scripting-bundle/nifi-scripting-processors/src/main/java/org/apache/nifi/record/script/ScriptedRecordSetWriter.java b/nifi-nar-bundles/nifi-scripting-bundle/nifi-scripting-processors/src/main/java/org/apache/nifi/record/script/ScriptedRecordSetWriter.java index 97544016ee..f32141ad3c 100644 --- a/nifi-nar-bundles/nifi-scripting-bundle/nifi-scripting-processors/src/main/java/org/apache/nifi/record/script/ScriptedRecordSetWriter.java +++ b/nifi-nar-bundles/nifi-scripting-bundle/nifi-scripting-processors/src/main/java/org/apache/nifi/record/script/ScriptedRecordSetWriter.java @@ -62,10 +62,10 @@ public class ScriptedRecordSetWriter extends AbstractScriptedRecordFactory variables) throws SchemaNotFoundException, IOException { if (recordFactory.get() != null) { try { - return recordFactory.get().createWriter(logger, schema, out); + return recordFactory.get().createWriter(logger, schema, out, variables); } catch (UndeclaredThrowableException ute) { throw new IOException(ute.getCause()); } diff --git a/nifi-nar-bundles/nifi-scripting-bundle/nifi-scripting-processors/src/test/groovy/org/apache/nifi/record/script/ScriptedRecordSetWriterTest.groovy b/nifi-nar-bundles/nifi-scripting-bundle/nifi-scripting-processors/src/test/groovy/org/apache/nifi/record/script/ScriptedRecordSetWriterTest.groovy index c3a7990e24..ce1bf2a232 100644 --- a/nifi-nar-bundles/nifi-scripting-bundle/nifi-scripting-processors/src/test/groovy/org/apache/nifi/record/script/ScriptedRecordSetWriterTest.groovy +++ b/nifi-nar-bundles/nifi-scripting-bundle/nifi-scripting-processors/src/test/groovy/org/apache/nifi/record/script/ScriptedRecordSetWriterTest.groovy @@ -102,7 +102,7 @@ class ScriptedRecordSetWriterTest { def schema = recordSetWriterFactory.getSchema(Collections.emptyMap(), null) ByteArrayOutputStream outputStream = new ByteArrayOutputStream() - RecordSetWriter recordSetWriter = recordSetWriterFactory.createWriter(logger, schema, outputStream) + RecordSetWriter recordSetWriter = recordSetWriterFactory.createWriter(logger, schema, outputStream, Collections.emptyMap()) assertNotNull(recordSetWriter) def recordSchema = new SimpleRecordSchema( diff --git a/nifi-nar-bundles/nifi-scripting-bundle/nifi-scripting-processors/src/test/resources/groovy/test_record_writer_inline.groovy b/nifi-nar-bundles/nifi-scripting-bundle/nifi-scripting-processors/src/test/resources/groovy/test_record_writer_inline.groovy index ccdb9ae7f7..569fa6839a 100644 --- a/nifi-nar-bundles/nifi-scripting-bundle/nifi-scripting-processors/src/test/resources/groovy/test_record_writer_inline.groovy +++ b/nifi-nar-bundles/nifi-scripting-bundle/nifi-scripting-processors/src/test/resources/groovy/test_record_writer_inline.groovy @@ -104,7 +104,7 @@ class GroovyRecordSetWriterFactory extends AbstractControllerService implements } @Override - RecordSetWriter createWriter(ComponentLog logger, RecordSchema schema, OutputStream out) throws SchemaNotFoundException, IOException { + RecordSetWriter createWriter(ComponentLog logger, RecordSchema schema, OutputStream out, Map variables) throws SchemaNotFoundException, IOException { return new GroovyRecordSetWriter(out) } diff --git a/nifi-nar-bundles/nifi-site-to-site-reporting-bundle/nifi-site-to-site-reporting-task/src/main/java/org/apache/nifi/reporting/AbstractSiteToSiteReportingTask.java b/nifi-nar-bundles/nifi-site-to-site-reporting-bundle/nifi-site-to-site-reporting-task/src/main/java/org/apache/nifi/reporting/AbstractSiteToSiteReportingTask.java index 8fdf4833b6..a4086cce7e 100644 --- a/nifi-nar-bundles/nifi-site-to-site-reporting-bundle/nifi-site-to-site-reporting-task/src/main/java/org/apache/nifi/reporting/AbstractSiteToSiteReportingTask.java +++ b/nifi-nar-bundles/nifi-site-to-site-reporting-bundle/nifi-site-to-site-reporting-task/src/main/java/org/apache/nifi/reporting/AbstractSiteToSiteReportingTask.java @@ -285,7 +285,7 @@ public abstract class AbstractSiteToSiteReportingTask extends AbstractReportingT final RecordSchema writeSchema = writerFactory.getSchema(null, recordSchema); final ByteArrayOutputStream out = new ByteArrayOutputStream(); - try (final RecordSetWriter writer = writerFactory.createWriter(getLogger(), writeSchema, out)) { + try (final RecordSetWriter writer = writerFactory.createWriter(getLogger(), writeSchema, out, attributes)) { writer.beginRecordSet(); Record record; diff --git a/nifi-nar-bundles/nifi-solr-bundle/nifi-solr-processors/src/main/java/org/apache/nifi/processors/solr/GetSolr.java b/nifi-nar-bundles/nifi-solr-bundle/nifi-solr-processors/src/main/java/org/apache/nifi/processors/solr/GetSolr.java index 0955d49132..f0759ce34e 100644 --- a/nifi-nar-bundles/nifi-solr-bundle/nifi-solr-processors/src/main/java/org/apache/nifi/processors/solr/GetSolr.java +++ b/nifi-nar-bundles/nifi-solr-bundle/nifi-solr-processors/src/main/java/org/apache/nifi/processors/solr/GetSolr.java @@ -376,11 +376,12 @@ public class GetSolr extends SolrProcessor { final RecordSchema schema = writerFactory.getSchema(null, null); final RecordSet recordSet = SolrUtils.solrDocumentsToRecordSet(response.getResults(), schema); final StringBuffer mimeType = new StringBuffer(); + final FlowFile flowFileRef = flowFile; flowFile = session.write(flowFile, new OutputStreamCallback() { @Override public void process(final OutputStream out) throws IOException { try { - final RecordSetWriter writer = writerFactory.createWriter(getLogger(), schema, out); + final RecordSetWriter writer = writerFactory.createWriter(getLogger(), schema, out, flowFileRef); writer.write(recordSet); writer.flush(); mimeType.append(writer.getMimeType()); diff --git a/nifi-nar-bundles/nifi-solr-bundle/nifi-solr-processors/src/main/java/org/apache/nifi/processors/solr/QuerySolr.java b/nifi-nar-bundles/nifi-solr-bundle/nifi-solr-processors/src/main/java/org/apache/nifi/processors/solr/QuerySolr.java index 5a85dd9a28..4f8202c868 100644 --- a/nifi-nar-bundles/nifi-solr-bundle/nifi-solr-processors/src/main/java/org/apache/nifi/processors/solr/QuerySolr.java +++ b/nifi-nar-bundles/nifi-solr-bundle/nifi-solr-processors/src/main/java/org/apache/nifi/processors/solr/QuerySolr.java @@ -1,615 +1,616 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.nifi.processors.solr; - -import com.google.gson.stream.JsonWriter; -import org.apache.nifi.annotation.behavior.InputRequirement; -import org.apache.nifi.annotation.behavior.WritesAttribute; -import org.apache.nifi.annotation.behavior.WritesAttributes; -import org.apache.nifi.annotation.documentation.CapabilityDescription; -import org.apache.nifi.annotation.documentation.Tags; -import org.apache.nifi.components.AllowableValue; -import org.apache.nifi.components.PropertyDescriptor; -import org.apache.nifi.components.ValidationContext; -import org.apache.nifi.components.ValidationResult; -import org.apache.nifi.expression.AttributeExpression; -import org.apache.nifi.expression.ExpressionLanguageScope; -import org.apache.nifi.flowfile.FlowFile; -import org.apache.nifi.flowfile.attributes.CoreAttributes; -import org.apache.nifi.logging.ComponentLog; -import org.apache.nifi.processor.ProcessContext; -import org.apache.nifi.processor.ProcessSession; -import org.apache.nifi.processor.ProcessorInitializationContext; -import org.apache.nifi.processor.Relationship; -import org.apache.nifi.processor.exception.ProcessException; -import org.apache.nifi.processor.util.StandardValidators; -import org.apache.nifi.schema.access.SchemaNotFoundException; -import org.apache.nifi.serialization.RecordSetWriter; -import org.apache.nifi.serialization.RecordSetWriterFactory; -import org.apache.nifi.serialization.record.RecordSchema; -import org.apache.nifi.serialization.record.RecordSet; -import org.apache.nifi.util.StopWatch; -import org.apache.solr.client.solrj.SolrQuery; -import org.apache.solr.client.solrj.request.QueryRequest; -import org.apache.solr.client.solrj.response.FacetField; -import org.apache.solr.client.solrj.response.FieldStatsInfo; -import org.apache.solr.client.solrj.response.IntervalFacet; -import org.apache.solr.client.solrj.response.QueryResponse; -import org.apache.solr.client.solrj.response.RangeFacet; -import org.apache.solr.client.solrj.response.RangeFacet.Count; -import org.apache.solr.common.params.CommonParams; -import org.apache.solr.common.params.FacetParams; -import org.apache.solr.common.params.MultiMapSolrParams; -import org.apache.solr.common.params.StatsParams; - -import java.io.IOException; -import java.io.OutputStreamWriter; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.TimeUnit; - -import static org.apache.nifi.processors.solr.SolrUtils.KERBEROS_CREDENTIALS_SERVICE; -import static org.apache.nifi.processors.solr.SolrUtils.SOLR_TYPE; -import static org.apache.nifi.processors.solr.SolrUtils.COLLECTION; -import static org.apache.nifi.processors.solr.SolrUtils.SOLR_TYPE_CLOUD; -import static org.apache.nifi.processors.solr.SolrUtils.SSL_CONTEXT_SERVICE; -import static org.apache.nifi.processors.solr.SolrUtils.SOLR_SOCKET_TIMEOUT; -import static org.apache.nifi.processors.solr.SolrUtils.SOLR_CONNECTION_TIMEOUT; -import static org.apache.nifi.processors.solr.SolrUtils.SOLR_MAX_CONNECTIONS; -import static org.apache.nifi.processors.solr.SolrUtils.SOLR_MAX_CONNECTIONS_PER_HOST; -import static org.apache.nifi.processors.solr.SolrUtils.ZK_CLIENT_TIMEOUT; -import static org.apache.nifi.processors.solr.SolrUtils.ZK_CONNECTION_TIMEOUT; -import static org.apache.nifi.processors.solr.SolrUtils.SOLR_LOCATION; -import static org.apache.nifi.processors.solr.SolrUtils.BASIC_USERNAME; -import static org.apache.nifi.processors.solr.SolrUtils.BASIC_PASSWORD; -import static org.apache.nifi.processors.solr.SolrUtils.RECORD_WRITER; - -@Tags({"Apache", "Solr", "Get", "Query", "Records"}) -@InputRequirement(InputRequirement.Requirement.INPUT_ALLOWED) -@CapabilityDescription("Queries Solr and outputs the results as a FlowFile in the format of XML or using a Record Writer") -@WritesAttributes({ - @WritesAttribute(attribute = "solr.connect", description = "Solr connect string"), - @WritesAttribute(attribute = "solr.collection", description = "Solr collection"), - @WritesAttribute(attribute = "solr.query", description = "Query string sent to Solr"), - @WritesAttribute(attribute = "solr.cursor.mark", description = "Cursor mark can be used for scrolling Solr"), - @WritesAttribute(attribute = "solr.status.code", description = "Status code of Solr request. A status code of 0 indicates that the request was successfully processed"), - @WritesAttribute(attribute = "solr.query.time", description = "The elapsed time to process the query (in ms)"), - @WritesAttribute(attribute = "solr.start", description = "Solr start parameter (result offset) for the query"), - @WritesAttribute(attribute = "solr.rows", description = "Number of Solr documents to be returned for the query"), - @WritesAttribute(attribute = "solr.number.results", description = "Number of Solr documents that match the query"), - @WritesAttribute(attribute = "mime.type", description = "The mime type of the data format"), - @WritesAttribute(attribute = "querysolr.exeption.class", description = "The Java exception class raised when the processor fails"), - @WritesAttribute(attribute = "querysolr.exeption.message", description = "The Java exception message raised when the processor fails") -}) -public class QuerySolr extends SolrProcessor { - - public static final AllowableValue MODE_XML = new AllowableValue("XML"); - public static final AllowableValue MODE_REC = new AllowableValue("Records"); - - public static final AllowableValue RETURN_TOP_RESULTS = new AllowableValue("return_only_top_results", "Only top results"); - public static final AllowableValue RETURN_ALL_RESULTS = new AllowableValue("return_all_results", "Entire results"); - - public static final String MIME_TYPE_JSON = "application/json"; - public static final String MIME_TYPE_XML = "application/xml"; - public static final String ATTRIBUTE_SOLR_CONNECT = "solr.connect"; - public static final String ATTRIBUTE_SOLR_COLLECTION = "solr.collection"; - public static final String ATTRIBUTE_SOLR_QUERY = "solr.query"; - public static final String ATTRIBUTE_CURSOR_MARK = "solr.cursor.mark"; - public static final String ATTRIBUTE_SOLR_STATUS = "solr.status.code"; - public static final String ATTRIBUTE_SOLR_START = "solr.start"; - public static final String ATTRIBUTE_SOLR_ROWS = "solr.rows"; - public static final String ATTRIBUTE_SOLR_NUMBER_RESULTS = "solr.number.results"; - public static final String ATTRIBUTE_QUERY_TIME = "solr.query.time"; - public static final String EXCEPTION = "querysolr.exeption"; - public static final String EXCEPTION_MESSAGE = "querysolr.exeption.message"; - - public static final Integer UPPER_LIMIT_START_PARAM = 10000; - - public static final PropertyDescriptor RETURN_TYPE = new PropertyDescriptor - .Builder().name("return_type") - .displayName("Return Type") - .description("Output format of Solr results. Write Solr documents to FlowFiles as XML or using a Record Writer") - .required(true) - .allowableValues(MODE_XML, MODE_REC) - .defaultValue(MODE_XML.getValue()) - .build(); - - public static final PropertyDescriptor SOLR_PARAM_QUERY = new PropertyDescriptor - .Builder().name("solr_param_query") - .displayName("Solr Query") - .description("Solr Query, e. g. field:value") - .required(true) - .addValidator(StandardValidators.createAttributeExpressionLanguageValidator(AttributeExpression.ResultType.STRING)) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .defaultValue("*:*") - .build(); - - public static final PropertyDescriptor SOLR_PARAM_REQUEST_HANDLER = new PropertyDescriptor - .Builder().name("solr_param_request_handler") - .displayName("Request Handler") - .description("Define a request handler here, e. g. /query") - .required(true) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .defaultValue("/select") - .build(); - - public static final PropertyDescriptor SOLR_PARAM_FIELD_LIST = new PropertyDescriptor - .Builder().name("solr_param_field_list") - .displayName("Field List") - .description("Comma separated list of fields to be included into results, e. g. field1,field2") - .required(false) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .build(); - - public static final PropertyDescriptor SOLR_PARAM_SORT = new PropertyDescriptor - .Builder().name("solr_param_sort") - .displayName("Sorting of result list") - .description("Comma separated sort clauses to define the sorting of results, e. g. field1 asc, field2 desc") - .required(false) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .build(); - - public static final PropertyDescriptor SOLR_PARAM_START = new PropertyDescriptor - .Builder().name("solr_param_start") - .displayName("Start of results") - .description("Offset of result set") - .required(false) - .addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .build(); - - public static final PropertyDescriptor SOLR_PARAM_ROWS = new PropertyDescriptor - .Builder().name("solr_param_rows") - .displayName("Rows") - .description("Number of results to be returned for a single request") - .required(false) - .addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .build(); - - public static final PropertyDescriptor AMOUNT_DOCUMENTS_TO_RETURN = new PropertyDescriptor - .Builder().name("amount_documents_to_return") - .displayName("Total amount of returned results") - .description("Total amount of Solr documents to be returned. If this property is set to \"Only top results\", " + - "only single requests will be sent to Solr and the results will be written into single FlowFiles. If it is set to " + - "\"Entire results\", all results matching to the query are retrieved via multiple Solr requests and " + - "returned in multiple FlowFiles. For both options, the number of Solr documents to be returned in a FlowFile depends on " + - "the configuration of the \"Rows\" property") - .required(true) - .allowableValues(RETURN_ALL_RESULTS, RETURN_TOP_RESULTS) - .defaultValue(RETURN_TOP_RESULTS.getValue()) - .build(); - - @Override - protected PropertyDescriptor getSupportedDynamicPropertyDescriptor(final String propertyDescriptorName) { - return new PropertyDescriptor.Builder() - .description("Specifies the value to send for the '" + propertyDescriptorName + "' Solr parameter") - .name(propertyDescriptorName) - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .dynamic(true) - .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) - .build(); - } - - public static final Relationship RESULTS = new Relationship.Builder().name("results") - .description("Results of Solr queries").build(); - public static final Relationship FACETS = new Relationship.Builder().name("facets") - .description("Results of faceted search").build(); - public static final Relationship STATS = new Relationship.Builder().name("stats") - .description("Stats about Solr index").build(); - public static final Relationship ORIGINAL = new Relationship.Builder().name("original") - .description("Original flowfile").build(); - public static final Relationship FAILURE = new Relationship.Builder().name("failure") - .description("Failure relationship").build(); - - private Set relationships; - private List descriptors; - - @Override - public Set getRelationships() { - return this.relationships; - } - - @Override - public List getSupportedPropertyDescriptors() { - return this.descriptors; - } - - @Override - protected void init(final ProcessorInitializationContext context) { - super.init(context); - - final List descriptors = new ArrayList<>(); - descriptors.add(SOLR_TYPE); - descriptors.add(SOLR_LOCATION); - descriptors.add(COLLECTION); - descriptors.add(RETURN_TYPE); - descriptors.add(RECORD_WRITER); - descriptors.add(SOLR_PARAM_QUERY); - descriptors.add(SOLR_PARAM_REQUEST_HANDLER); - descriptors.add(SOLR_PARAM_FIELD_LIST); - descriptors.add(SOLR_PARAM_SORT); - descriptors.add(SOLR_PARAM_START); - descriptors.add(SOLR_PARAM_ROWS); - descriptors.add(AMOUNT_DOCUMENTS_TO_RETURN); - descriptors.add(KERBEROS_CREDENTIALS_SERVICE); - descriptors.add(BASIC_USERNAME); - descriptors.add(BASIC_PASSWORD); - descriptors.add(SSL_CONTEXT_SERVICE); - descriptors.add(SOLR_SOCKET_TIMEOUT); - descriptors.add(SOLR_CONNECTION_TIMEOUT); - descriptors.add(SOLR_MAX_CONNECTIONS); - descriptors.add(SOLR_MAX_CONNECTIONS_PER_HOST); - descriptors.add(ZK_CLIENT_TIMEOUT); - descriptors.add(ZK_CONNECTION_TIMEOUT); - this.descriptors = Collections.unmodifiableList(descriptors); - - final Set relationships = new HashSet<>(); - relationships.add(FAILURE); - relationships.add(RESULTS); - relationships.add(FACETS); - relationships.add(STATS); - relationships.add(ORIGINAL); - this.relationships = Collections.unmodifiableSet(relationships); - } - - public static final Set SUPPORTED_SEARCH_COMPONENTS = new HashSet<>(); - static { - SUPPORTED_SEARCH_COMPONENTS.addAll(Arrays.asList(StatsParams.STATS, FacetParams.FACET)); - } - - public static final Set SEARCH_COMPONENTS_ON = new HashSet<>(); - static { - SEARCH_COMPONENTS_ON.addAll(Arrays.asList("true", "on", "yes")); - } - - @Override - protected final Collection additionalCustomValidation(ValidationContext context) { - final Collection problems = new ArrayList<>(); - - if (context.getProperty(RETURN_TYPE).evaluateAttributeExpressions().getValue().equals(MODE_REC.getValue()) - && !context.getProperty(RECORD_WRITER).isSet()) { - problems.add(new ValidationResult.Builder() - .explanation("for writing records a record writer has to be configured") - .valid(false) - .subject("Record writer check") - .build()); - } - return problems; - } - - @Override - public void doOnTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { - final ComponentLog logger = getLogger(); - - FlowFile flowFileOriginal = session.get(); - FlowFile flowFileResponse; - - if (flowFileOriginal == null) { - if (context.hasNonLoopConnection()) { - return; - } - flowFileResponse = session.create(); - } else { - flowFileResponse = session.create(flowFileOriginal); - } - - final SolrQuery solrQuery = new SolrQuery(); - final boolean isSolrCloud = SOLR_TYPE_CLOUD.equals(context.getProperty(SOLR_TYPE).getValue()); - final String collection = context.getProperty(COLLECTION).evaluateAttributeExpressions(flowFileResponse).getValue(); - - final StringBuilder transitUri = new StringBuilder("solr://"); - transitUri.append(getSolrLocation()); - if (isSolrCloud) { - transitUri.append(":").append(collection); - } - final StopWatch timer = new StopWatch(false); - - try { - solrQuery.setQuery(context.getProperty(SOLR_PARAM_QUERY).evaluateAttributeExpressions(flowFileResponse).getValue()); - solrQuery.setRequestHandler(context.getProperty(SOLR_PARAM_REQUEST_HANDLER).evaluateAttributeExpressions(flowFileResponse).getValue()); - - if (context.getProperty(SOLR_PARAM_FIELD_LIST).isSet()) { - for (final String field : context.getProperty(SOLR_PARAM_FIELD_LIST).evaluateAttributeExpressions(flowFileResponse).getValue() - .split(",")) { - solrQuery.addField(field.trim()); - } - } - - // Avoid ArrayIndexOutOfBoundsException due to incorrectly configured sorting - try { - if (context.getProperty(SOLR_PARAM_SORT).isSet()) { - final List sortings = new ArrayList<>(); - for (final String sorting : context.getProperty(SOLR_PARAM_SORT).evaluateAttributeExpressions(flowFileResponse).getValue() - .split(",")) { - final String[] sortEntry = sorting.trim().split(" "); - sortings.add(new SolrQuery.SortClause(sortEntry[0], sortEntry[1])); - } - solrQuery.setSorts(sortings); - } - } catch (Exception e) { - throw new ProcessException("Error while parsing the sort clauses for the Solr query"); - } - - final Integer startParam = context.getProperty(SOLR_PARAM_START).isSet() ? Integer.parseInt( - context.getProperty(SOLR_PARAM_START).evaluateAttributeExpressions(flowFileResponse).getValue()) : CommonParams.START_DEFAULT; - - solrQuery.setStart(startParam); - - final Integer rowParam = context.getProperty(SOLR_PARAM_ROWS).isSet() ? Integer.parseInt( - context.getProperty(SOLR_PARAM_ROWS).evaluateAttributeExpressions(flowFileResponse).getValue()) : CommonParams.ROWS_DEFAULT; - - solrQuery.setRows(rowParam); - - final Map additionalSolrParams = SolrUtils.getRequestParams(context, flowFileResponse); - - final Set searchComponents = extractSearchComponents(additionalSolrParams); - solrQuery.add(new MultiMapSolrParams(additionalSolrParams)); - - final Map attributes = new HashMap<>(); - attributes.put(ATTRIBUTE_SOLR_CONNECT, getSolrLocation()); - if (isSolrCloud) { - attributes.put(ATTRIBUTE_SOLR_COLLECTION, collection); - } - attributes.put(ATTRIBUTE_SOLR_QUERY, solrQuery.toString()); - if (flowFileOriginal != null) { - flowFileOriginal = session.putAllAttributes(flowFileOriginal, attributes); - } - - flowFileResponse = session.putAllAttributes(flowFileResponse, attributes); - - final boolean getEntireResults = RETURN_ALL_RESULTS.equals(context.getProperty(AMOUNT_DOCUMENTS_TO_RETURN).getValue()); - boolean processFacetsAndStats = true; - boolean continuePaging = true; - - while (continuePaging){ - - timer.start(); - - Map responseAttributes = new HashMap<>(); - responseAttributes.put(ATTRIBUTE_SOLR_START, solrQuery.getStart().toString()); - responseAttributes.put(ATTRIBUTE_SOLR_ROWS, solrQuery.getRows().toString()); - - if (solrQuery.getStart() > UPPER_LIMIT_START_PARAM) { - logger.warn("The start parameter of Solr query {} exceeded the upper limit of {}. The query will not be processed " + - "to avoid performance or memory issues on the part of Solr.", new Object[]{solrQuery.toString(), UPPER_LIMIT_START_PARAM}); - flowFileResponse = session.putAllAttributes(flowFileResponse, responseAttributes); - timer.stop(); - break; - } - - final QueryRequest req = new QueryRequest(solrQuery); - if (isBasicAuthEnabled()) { - req.setBasicAuthCredentials(getUsername(), getPassword()); - } - - final QueryResponse response = req.process(getSolrClient()); - timer.stop(); - - final Long totalNumberOfResults = response.getResults().getNumFound(); - - responseAttributes.put(ATTRIBUTE_SOLR_NUMBER_RESULTS, totalNumberOfResults.toString()); - responseAttributes.put(ATTRIBUTE_CURSOR_MARK, response.getNextCursorMark()); - responseAttributes.put(ATTRIBUTE_SOLR_STATUS, String.valueOf(response.getStatus())); - responseAttributes.put(ATTRIBUTE_QUERY_TIME, String.valueOf(response.getQTime())); - flowFileResponse = session.putAllAttributes(flowFileResponse, responseAttributes); - - if (response.getResults().size() > 0) { - - if (context.getProperty(RETURN_TYPE).getValue().equals(MODE_XML.getValue())){ - flowFileResponse = session.write(flowFileResponse, SolrUtils.getOutputStreamCallbackToTransformSolrResponseToXml(response)); - flowFileResponse = session.putAttribute(flowFileResponse, CoreAttributes.MIME_TYPE.key(), MIME_TYPE_XML); - } else { - final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).evaluateAttributeExpressions(flowFileResponse) - .asControllerService(RecordSetWriterFactory.class); - final RecordSchema schema = writerFactory.getSchema(flowFileResponse.getAttributes(), null); - final RecordSet recordSet = SolrUtils.solrDocumentsToRecordSet(response.getResults(), schema); - final StringBuffer mimeType = new StringBuffer(); - flowFileResponse = session.write(flowFileResponse, out -> { - try (final RecordSetWriter writer = writerFactory.createWriter(getLogger(), schema, out)) { - writer.write(recordSet); - writer.flush(); - mimeType.append(writer.getMimeType()); - } catch (SchemaNotFoundException e) { - throw new ProcessException("Could not parse Solr response", e); - } - }); - flowFileResponse = session.putAttribute(flowFileResponse, CoreAttributes.MIME_TYPE.key(), mimeType.toString()); - } - - if (processFacetsAndStats) { - if (searchComponents.contains(FacetParams.FACET)) { - FlowFile flowFileFacets = session.create(flowFileResponse); - flowFileFacets = session.write(flowFileFacets, out -> { - try ( - final OutputStreamWriter osw = new OutputStreamWriter(out); - final JsonWriter writer = new JsonWriter(osw) - ) { - addFacetsFromSolrResponseToJsonWriter(response, writer); - } - }); - flowFileFacets = session.putAttribute(flowFileFacets, CoreAttributes.MIME_TYPE.key(), MIME_TYPE_JSON); - session.getProvenanceReporter().receive(flowFileFacets, transitUri.toString(), timer.getDuration(TimeUnit.MILLISECONDS)); - session.transfer(flowFileFacets, FACETS); - } - - if (searchComponents.contains(StatsParams.STATS)) { - FlowFile flowFileStats = session.create(flowFileResponse); - flowFileStats = session.write(flowFileStats, out -> { - try ( - final OutputStreamWriter osw = new OutputStreamWriter(out); - final JsonWriter writer = new JsonWriter(osw) - ) { - addStatsFromSolrResponseToJsonWriter(response, writer); - } - }); - flowFileStats = session.putAttribute(flowFileStats, CoreAttributes.MIME_TYPE.key(), MIME_TYPE_JSON); - session.getProvenanceReporter().receive(flowFileStats, transitUri.toString(), timer.getDuration(TimeUnit.MILLISECONDS)); - session.transfer(flowFileStats, STATS); - } - processFacetsAndStats = false; - } - } - - if (getEntireResults) { - final Integer totalDocumentsReturned = solrQuery.getStart() + solrQuery.getRows(); - if (totalDocumentsReturned < totalNumberOfResults) { - solrQuery.setStart(totalDocumentsReturned); - session.getProvenanceReporter().receive(flowFileResponse, transitUri.toString(), timer.getDuration(TimeUnit.MILLISECONDS)); - session.transfer(flowFileResponse, RESULTS); - flowFileResponse = session.create(flowFileResponse); - } else { - continuePaging = false; - } - } else { - continuePaging = false; - } - } - - } catch (Exception e) { - flowFileResponse = session.penalize(flowFileResponse); - flowFileResponse = session.putAttribute(flowFileResponse, EXCEPTION, e.getClass().getName()); - flowFileResponse = session.putAttribute(flowFileResponse, EXCEPTION_MESSAGE, e.getMessage()); - session.transfer(flowFileResponse, FAILURE); - logger.error("Failed to execute query {} due to {}. FlowFile will be routed to relationship failure", new Object[]{solrQuery.toString(), e}, e); - if (flowFileOriginal != null) { - flowFileOriginal = session.penalize(flowFileOriginal); - } - } - - if (!flowFileResponse.isPenalized()) { - session.getProvenanceReporter().receive(flowFileResponse, transitUri.toString(), timer.getDuration(TimeUnit.MILLISECONDS)); - session.transfer(flowFileResponse, RESULTS); - } - - if (flowFileOriginal != null) { - if (!flowFileOriginal.isPenalized()) { - session.transfer(flowFileOriginal, ORIGINAL); - } else { - session.remove(flowFileOriginal); - } - } - } - - private Set extractSearchComponents(Map solrParams) { - final Set searchComponentsTemp = new HashSet<>(); - for (final String searchComponent : SUPPORTED_SEARCH_COMPONENTS) - if (solrParams.keySet().contains(searchComponent)) { - if (SEARCH_COMPONENTS_ON.contains(solrParams.get(searchComponent)[0])) { - searchComponentsTemp.add(searchComponent); - } - } - return Collections.unmodifiableSet(searchComponentsTemp); - } - - private static void addStatsFromSolrResponseToJsonWriter(final QueryResponse response, final JsonWriter writer) throws IOException { - writer.beginObject(); - writer.name("stats_fields"); - writer.beginObject(); - for (Map.Entry entry: response.getFieldStatsInfo().entrySet()) { - FieldStatsInfo fsi = entry.getValue(); - writer.name(entry.getKey()); - writer.beginObject(); - writer.name("min").value(fsi.getMin().toString()); - writer.name("max").value(fsi.getMax().toString()); - writer.name("count").value(fsi.getCount()); - writer.name("missing").value(fsi.getMissing()); - writer.name("sum").value(fsi.getSum().toString()); - writer.name("mean").value(fsi.getMean().toString()); - writer.name("sumOfSquares").value(fsi.getSumOfSquares()); - writer.name("stddev").value(fsi.getStddev()); - writer.endObject(); - } - writer.endObject(); - writer.endObject(); - } - - private static void addFacetsFromSolrResponseToJsonWriter(final QueryResponse response, final JsonWriter writer) throws IOException { - writer.beginObject(); - writer.name("facet_queries"); - writer.beginArray(); - for (final Map.Entry facetQuery : response.getFacetQuery().entrySet()){ - writer.beginObject(); - writer.name("facet").value(facetQuery.getKey()); - writer.name("count").value(facetQuery.getValue()); - writer.endObject(); - } - writer.endArray(); - - writer.name("facet_fields"); - writer.beginObject(); - for (final FacetField facetField : response.getFacetFields()){ - writer.name(facetField.getName()); - writer.beginArray(); - for (final FacetField.Count count : facetField.getValues()) { - writer.beginObject(); - writer.name("facet").value(count.getName()); - writer.name("count").value(count.getCount()); - writer.endObject(); - } - writer.endArray(); - } - writer.endObject(); - - writer.name("facet_ranges"); - writer.beginObject(); - for (final RangeFacet rangeFacet : response.getFacetRanges()) { - writer.name(rangeFacet.getName()); - writer.beginArray(); - final List list = rangeFacet.getCounts(); - for (final Count count : list) { - writer.beginObject(); - writer.name("facet").value(count.getValue()); - writer.name("count").value(count.getCount()); - writer.endObject(); - } - writer.endArray(); - } - writer.endObject(); - - writer.name("facet_intervals"); - writer.beginObject(); - for (final IntervalFacet intervalFacet : response.getIntervalFacets()) { - writer.name(intervalFacet.getField()); - writer.beginArray(); - for (final IntervalFacet.Count count : intervalFacet.getIntervals()) { - writer.beginObject(); - writer.name("facet").value(count.getKey()); - writer.name("count").value(count.getCount()); - writer.endObject(); - } - writer.endArray(); - } - writer.endObject(); - writer.endObject(); - } -} - - +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.nifi.processors.solr; + +import com.google.gson.stream.JsonWriter; +import org.apache.nifi.annotation.behavior.InputRequirement; +import org.apache.nifi.annotation.behavior.WritesAttribute; +import org.apache.nifi.annotation.behavior.WritesAttributes; +import org.apache.nifi.annotation.documentation.CapabilityDescription; +import org.apache.nifi.annotation.documentation.Tags; +import org.apache.nifi.components.AllowableValue; +import org.apache.nifi.components.PropertyDescriptor; +import org.apache.nifi.components.ValidationContext; +import org.apache.nifi.components.ValidationResult; +import org.apache.nifi.expression.AttributeExpression; +import org.apache.nifi.expression.ExpressionLanguageScope; +import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.flowfile.attributes.CoreAttributes; +import org.apache.nifi.logging.ComponentLog; +import org.apache.nifi.processor.ProcessContext; +import org.apache.nifi.processor.ProcessSession; +import org.apache.nifi.processor.ProcessorInitializationContext; +import org.apache.nifi.processor.Relationship; +import org.apache.nifi.processor.exception.ProcessException; +import org.apache.nifi.processor.util.StandardValidators; +import org.apache.nifi.schema.access.SchemaNotFoundException; +import org.apache.nifi.serialization.RecordSetWriter; +import org.apache.nifi.serialization.RecordSetWriterFactory; +import org.apache.nifi.serialization.record.RecordSchema; +import org.apache.nifi.serialization.record.RecordSet; +import org.apache.nifi.util.StopWatch; +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.request.QueryRequest; +import org.apache.solr.client.solrj.response.FacetField; +import org.apache.solr.client.solrj.response.FieldStatsInfo; +import org.apache.solr.client.solrj.response.IntervalFacet; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.client.solrj.response.RangeFacet; +import org.apache.solr.client.solrj.response.RangeFacet.Count; +import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.FacetParams; +import org.apache.solr.common.params.MultiMapSolrParams; +import org.apache.solr.common.params.StatsParams; + +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.TimeUnit; + +import static org.apache.nifi.processors.solr.SolrUtils.KERBEROS_CREDENTIALS_SERVICE; +import static org.apache.nifi.processors.solr.SolrUtils.SOLR_TYPE; +import static org.apache.nifi.processors.solr.SolrUtils.COLLECTION; +import static org.apache.nifi.processors.solr.SolrUtils.SOLR_TYPE_CLOUD; +import static org.apache.nifi.processors.solr.SolrUtils.SSL_CONTEXT_SERVICE; +import static org.apache.nifi.processors.solr.SolrUtils.SOLR_SOCKET_TIMEOUT; +import static org.apache.nifi.processors.solr.SolrUtils.SOLR_CONNECTION_TIMEOUT; +import static org.apache.nifi.processors.solr.SolrUtils.SOLR_MAX_CONNECTIONS; +import static org.apache.nifi.processors.solr.SolrUtils.SOLR_MAX_CONNECTIONS_PER_HOST; +import static org.apache.nifi.processors.solr.SolrUtils.ZK_CLIENT_TIMEOUT; +import static org.apache.nifi.processors.solr.SolrUtils.ZK_CONNECTION_TIMEOUT; +import static org.apache.nifi.processors.solr.SolrUtils.SOLR_LOCATION; +import static org.apache.nifi.processors.solr.SolrUtils.BASIC_USERNAME; +import static org.apache.nifi.processors.solr.SolrUtils.BASIC_PASSWORD; +import static org.apache.nifi.processors.solr.SolrUtils.RECORD_WRITER; + +@Tags({"Apache", "Solr", "Get", "Query", "Records"}) +@InputRequirement(InputRequirement.Requirement.INPUT_ALLOWED) +@CapabilityDescription("Queries Solr and outputs the results as a FlowFile in the format of XML or using a Record Writer") +@WritesAttributes({ + @WritesAttribute(attribute = "solr.connect", description = "Solr connect string"), + @WritesAttribute(attribute = "solr.collection", description = "Solr collection"), + @WritesAttribute(attribute = "solr.query", description = "Query string sent to Solr"), + @WritesAttribute(attribute = "solr.cursor.mark", description = "Cursor mark can be used for scrolling Solr"), + @WritesAttribute(attribute = "solr.status.code", description = "Status code of Solr request. A status code of 0 indicates that the request was successfully processed"), + @WritesAttribute(attribute = "solr.query.time", description = "The elapsed time to process the query (in ms)"), + @WritesAttribute(attribute = "solr.start", description = "Solr start parameter (result offset) for the query"), + @WritesAttribute(attribute = "solr.rows", description = "Number of Solr documents to be returned for the query"), + @WritesAttribute(attribute = "solr.number.results", description = "Number of Solr documents that match the query"), + @WritesAttribute(attribute = "mime.type", description = "The mime type of the data format"), + @WritesAttribute(attribute = "querysolr.exeption.class", description = "The Java exception class raised when the processor fails"), + @WritesAttribute(attribute = "querysolr.exeption.message", description = "The Java exception message raised when the processor fails") +}) +public class QuerySolr extends SolrProcessor { + + public static final AllowableValue MODE_XML = new AllowableValue("XML"); + public static final AllowableValue MODE_REC = new AllowableValue("Records"); + + public static final AllowableValue RETURN_TOP_RESULTS = new AllowableValue("return_only_top_results", "Only top results"); + public static final AllowableValue RETURN_ALL_RESULTS = new AllowableValue("return_all_results", "Entire results"); + + public static final String MIME_TYPE_JSON = "application/json"; + public static final String MIME_TYPE_XML = "application/xml"; + public static final String ATTRIBUTE_SOLR_CONNECT = "solr.connect"; + public static final String ATTRIBUTE_SOLR_COLLECTION = "solr.collection"; + public static final String ATTRIBUTE_SOLR_QUERY = "solr.query"; + public static final String ATTRIBUTE_CURSOR_MARK = "solr.cursor.mark"; + public static final String ATTRIBUTE_SOLR_STATUS = "solr.status.code"; + public static final String ATTRIBUTE_SOLR_START = "solr.start"; + public static final String ATTRIBUTE_SOLR_ROWS = "solr.rows"; + public static final String ATTRIBUTE_SOLR_NUMBER_RESULTS = "solr.number.results"; + public static final String ATTRIBUTE_QUERY_TIME = "solr.query.time"; + public static final String EXCEPTION = "querysolr.exeption"; + public static final String EXCEPTION_MESSAGE = "querysolr.exeption.message"; + + public static final Integer UPPER_LIMIT_START_PARAM = 10000; + + public static final PropertyDescriptor RETURN_TYPE = new PropertyDescriptor + .Builder().name("return_type") + .displayName("Return Type") + .description("Output format of Solr results. Write Solr documents to FlowFiles as XML or using a Record Writer") + .required(true) + .allowableValues(MODE_XML, MODE_REC) + .defaultValue(MODE_XML.getValue()) + .build(); + + public static final PropertyDescriptor SOLR_PARAM_QUERY = new PropertyDescriptor + .Builder().name("solr_param_query") + .displayName("Solr Query") + .description("Solr Query, e. g. field:value") + .required(true) + .addValidator(StandardValidators.createAttributeExpressionLanguageValidator(AttributeExpression.ResultType.STRING)) + .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) + .defaultValue("*:*") + .build(); + + public static final PropertyDescriptor SOLR_PARAM_REQUEST_HANDLER = new PropertyDescriptor + .Builder().name("solr_param_request_handler") + .displayName("Request Handler") + .description("Define a request handler here, e. g. /query") + .required(true) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) + .defaultValue("/select") + .build(); + + public static final PropertyDescriptor SOLR_PARAM_FIELD_LIST = new PropertyDescriptor + .Builder().name("solr_param_field_list") + .displayName("Field List") + .description("Comma separated list of fields to be included into results, e. g. field1,field2") + .required(false) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) + .build(); + + public static final PropertyDescriptor SOLR_PARAM_SORT = new PropertyDescriptor + .Builder().name("solr_param_sort") + .displayName("Sorting of result list") + .description("Comma separated sort clauses to define the sorting of results, e. g. field1 asc, field2 desc") + .required(false) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) + .build(); + + public static final PropertyDescriptor SOLR_PARAM_START = new PropertyDescriptor + .Builder().name("solr_param_start") + .displayName("Start of results") + .description("Offset of result set") + .required(false) + .addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR) + .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) + .build(); + + public static final PropertyDescriptor SOLR_PARAM_ROWS = new PropertyDescriptor + .Builder().name("solr_param_rows") + .displayName("Rows") + .description("Number of results to be returned for a single request") + .required(false) + .addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR) + .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) + .build(); + + public static final PropertyDescriptor AMOUNT_DOCUMENTS_TO_RETURN = new PropertyDescriptor + .Builder().name("amount_documents_to_return") + .displayName("Total amount of returned results") + .description("Total amount of Solr documents to be returned. If this property is set to \"Only top results\", " + + "only single requests will be sent to Solr and the results will be written into single FlowFiles. If it is set to " + + "\"Entire results\", all results matching to the query are retrieved via multiple Solr requests and " + + "returned in multiple FlowFiles. For both options, the number of Solr documents to be returned in a FlowFile depends on " + + "the configuration of the \"Rows\" property") + .required(true) + .allowableValues(RETURN_ALL_RESULTS, RETURN_TOP_RESULTS) + .defaultValue(RETURN_TOP_RESULTS.getValue()) + .build(); + + @Override + protected PropertyDescriptor getSupportedDynamicPropertyDescriptor(final String propertyDescriptorName) { + return new PropertyDescriptor.Builder() + .description("Specifies the value to send for the '" + propertyDescriptorName + "' Solr parameter") + .name(propertyDescriptorName) + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .dynamic(true) + .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) + .build(); + } + + public static final Relationship RESULTS = new Relationship.Builder().name("results") + .description("Results of Solr queries").build(); + public static final Relationship FACETS = new Relationship.Builder().name("facets") + .description("Results of faceted search").build(); + public static final Relationship STATS = new Relationship.Builder().name("stats") + .description("Stats about Solr index").build(); + public static final Relationship ORIGINAL = new Relationship.Builder().name("original") + .description("Original flowfile").build(); + public static final Relationship FAILURE = new Relationship.Builder().name("failure") + .description("Failure relationship").build(); + + private Set relationships; + private List descriptors; + + @Override + public Set getRelationships() { + return this.relationships; + } + + @Override + public List getSupportedPropertyDescriptors() { + return this.descriptors; + } + + @Override + protected void init(final ProcessorInitializationContext context) { + super.init(context); + + final List descriptors = new ArrayList<>(); + descriptors.add(SOLR_TYPE); + descriptors.add(SOLR_LOCATION); + descriptors.add(COLLECTION); + descriptors.add(RETURN_TYPE); + descriptors.add(RECORD_WRITER); + descriptors.add(SOLR_PARAM_QUERY); + descriptors.add(SOLR_PARAM_REQUEST_HANDLER); + descriptors.add(SOLR_PARAM_FIELD_LIST); + descriptors.add(SOLR_PARAM_SORT); + descriptors.add(SOLR_PARAM_START); + descriptors.add(SOLR_PARAM_ROWS); + descriptors.add(AMOUNT_DOCUMENTS_TO_RETURN); + descriptors.add(KERBEROS_CREDENTIALS_SERVICE); + descriptors.add(BASIC_USERNAME); + descriptors.add(BASIC_PASSWORD); + descriptors.add(SSL_CONTEXT_SERVICE); + descriptors.add(SOLR_SOCKET_TIMEOUT); + descriptors.add(SOLR_CONNECTION_TIMEOUT); + descriptors.add(SOLR_MAX_CONNECTIONS); + descriptors.add(SOLR_MAX_CONNECTIONS_PER_HOST); + descriptors.add(ZK_CLIENT_TIMEOUT); + descriptors.add(ZK_CONNECTION_TIMEOUT); + this.descriptors = Collections.unmodifiableList(descriptors); + + final Set relationships = new HashSet<>(); + relationships.add(FAILURE); + relationships.add(RESULTS); + relationships.add(FACETS); + relationships.add(STATS); + relationships.add(ORIGINAL); + this.relationships = Collections.unmodifiableSet(relationships); + } + + public static final Set SUPPORTED_SEARCH_COMPONENTS = new HashSet<>(); + static { + SUPPORTED_SEARCH_COMPONENTS.addAll(Arrays.asList(StatsParams.STATS, FacetParams.FACET)); + } + + public static final Set SEARCH_COMPONENTS_ON = new HashSet<>(); + static { + SEARCH_COMPONENTS_ON.addAll(Arrays.asList("true", "on", "yes")); + } + + @Override + protected final Collection additionalCustomValidation(ValidationContext context) { + final Collection problems = new ArrayList<>(); + + if (context.getProperty(RETURN_TYPE).evaluateAttributeExpressions().getValue().equals(MODE_REC.getValue()) + && !context.getProperty(RECORD_WRITER).isSet()) { + problems.add(new ValidationResult.Builder() + .explanation("for writing records a record writer has to be configured") + .valid(false) + .subject("Record writer check") + .build()); + } + return problems; + } + + @Override + public void doOnTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException { + final ComponentLog logger = getLogger(); + + FlowFile flowFileOriginal = session.get(); + FlowFile flowFileResponse; + + if (flowFileOriginal == null) { + if (context.hasNonLoopConnection()) { + return; + } + flowFileResponse = session.create(); + } else { + flowFileResponse = session.create(flowFileOriginal); + } + + final SolrQuery solrQuery = new SolrQuery(); + final boolean isSolrCloud = SOLR_TYPE_CLOUD.equals(context.getProperty(SOLR_TYPE).getValue()); + final String collection = context.getProperty(COLLECTION).evaluateAttributeExpressions(flowFileResponse).getValue(); + + final StringBuilder transitUri = new StringBuilder("solr://"); + transitUri.append(getSolrLocation()); + if (isSolrCloud) { + transitUri.append(":").append(collection); + } + final StopWatch timer = new StopWatch(false); + + try { + solrQuery.setQuery(context.getProperty(SOLR_PARAM_QUERY).evaluateAttributeExpressions(flowFileResponse).getValue()); + solrQuery.setRequestHandler(context.getProperty(SOLR_PARAM_REQUEST_HANDLER).evaluateAttributeExpressions(flowFileResponse).getValue()); + + if (context.getProperty(SOLR_PARAM_FIELD_LIST).isSet()) { + for (final String field : context.getProperty(SOLR_PARAM_FIELD_LIST).evaluateAttributeExpressions(flowFileResponse).getValue() + .split(",")) { + solrQuery.addField(field.trim()); + } + } + + // Avoid ArrayIndexOutOfBoundsException due to incorrectly configured sorting + try { + if (context.getProperty(SOLR_PARAM_SORT).isSet()) { + final List sortings = new ArrayList<>(); + for (final String sorting : context.getProperty(SOLR_PARAM_SORT).evaluateAttributeExpressions(flowFileResponse).getValue() + .split(",")) { + final String[] sortEntry = sorting.trim().split(" "); + sortings.add(new SolrQuery.SortClause(sortEntry[0], sortEntry[1])); + } + solrQuery.setSorts(sortings); + } + } catch (Exception e) { + throw new ProcessException("Error while parsing the sort clauses for the Solr query"); + } + + final Integer startParam = context.getProperty(SOLR_PARAM_START).isSet() ? Integer.parseInt( + context.getProperty(SOLR_PARAM_START).evaluateAttributeExpressions(flowFileResponse).getValue()) : CommonParams.START_DEFAULT; + + solrQuery.setStart(startParam); + + final Integer rowParam = context.getProperty(SOLR_PARAM_ROWS).isSet() ? Integer.parseInt( + context.getProperty(SOLR_PARAM_ROWS).evaluateAttributeExpressions(flowFileResponse).getValue()) : CommonParams.ROWS_DEFAULT; + + solrQuery.setRows(rowParam); + + final Map additionalSolrParams = SolrUtils.getRequestParams(context, flowFileResponse); + + final Set searchComponents = extractSearchComponents(additionalSolrParams); + solrQuery.add(new MultiMapSolrParams(additionalSolrParams)); + + final Map attributes = new HashMap<>(); + attributes.put(ATTRIBUTE_SOLR_CONNECT, getSolrLocation()); + if (isSolrCloud) { + attributes.put(ATTRIBUTE_SOLR_COLLECTION, collection); + } + attributes.put(ATTRIBUTE_SOLR_QUERY, solrQuery.toString()); + if (flowFileOriginal != null) { + flowFileOriginal = session.putAllAttributes(flowFileOriginal, attributes); + } + + flowFileResponse = session.putAllAttributes(flowFileResponse, attributes); + + final boolean getEntireResults = RETURN_ALL_RESULTS.equals(context.getProperty(AMOUNT_DOCUMENTS_TO_RETURN).getValue()); + boolean processFacetsAndStats = true; + boolean continuePaging = true; + + while (continuePaging){ + + timer.start(); + + Map responseAttributes = new HashMap<>(); + responseAttributes.put(ATTRIBUTE_SOLR_START, solrQuery.getStart().toString()); + responseAttributes.put(ATTRIBUTE_SOLR_ROWS, solrQuery.getRows().toString()); + + if (solrQuery.getStart() > UPPER_LIMIT_START_PARAM) { + logger.warn("The start parameter of Solr query {} exceeded the upper limit of {}. The query will not be processed " + + "to avoid performance or memory issues on the part of Solr.", new Object[]{solrQuery.toString(), UPPER_LIMIT_START_PARAM}); + flowFileResponse = session.putAllAttributes(flowFileResponse, responseAttributes); + timer.stop(); + break; + } + + final QueryRequest req = new QueryRequest(solrQuery); + if (isBasicAuthEnabled()) { + req.setBasicAuthCredentials(getUsername(), getPassword()); + } + + final QueryResponse response = req.process(getSolrClient()); + timer.stop(); + + final Long totalNumberOfResults = response.getResults().getNumFound(); + + responseAttributes.put(ATTRIBUTE_SOLR_NUMBER_RESULTS, totalNumberOfResults.toString()); + responseAttributes.put(ATTRIBUTE_CURSOR_MARK, response.getNextCursorMark()); + responseAttributes.put(ATTRIBUTE_SOLR_STATUS, String.valueOf(response.getStatus())); + responseAttributes.put(ATTRIBUTE_QUERY_TIME, String.valueOf(response.getQTime())); + flowFileResponse = session.putAllAttributes(flowFileResponse, responseAttributes); + + if (response.getResults().size() > 0) { + + if (context.getProperty(RETURN_TYPE).getValue().equals(MODE_XML.getValue())){ + flowFileResponse = session.write(flowFileResponse, SolrUtils.getOutputStreamCallbackToTransformSolrResponseToXml(response)); + flowFileResponse = session.putAttribute(flowFileResponse, CoreAttributes.MIME_TYPE.key(), MIME_TYPE_XML); + } else { + final RecordSetWriterFactory writerFactory = context.getProperty(RECORD_WRITER).evaluateAttributeExpressions(flowFileResponse) + .asControllerService(RecordSetWriterFactory.class); + final RecordSchema schema = writerFactory.getSchema(flowFileResponse.getAttributes(), null); + final RecordSet recordSet = SolrUtils.solrDocumentsToRecordSet(response.getResults(), schema); + final StringBuffer mimeType = new StringBuffer(); + final FlowFile flowFileResponseRef = flowFileResponse; + flowFileResponse = session.write(flowFileResponse, out -> { + try (final RecordSetWriter writer = writerFactory.createWriter(getLogger(), schema, out, flowFileResponseRef)) { + writer.write(recordSet); + writer.flush(); + mimeType.append(writer.getMimeType()); + } catch (SchemaNotFoundException e) { + throw new ProcessException("Could not parse Solr response", e); + } + }); + flowFileResponse = session.putAttribute(flowFileResponse, CoreAttributes.MIME_TYPE.key(), mimeType.toString()); + } + + if (processFacetsAndStats) { + if (searchComponents.contains(FacetParams.FACET)) { + FlowFile flowFileFacets = session.create(flowFileResponse); + flowFileFacets = session.write(flowFileFacets, out -> { + try ( + final OutputStreamWriter osw = new OutputStreamWriter(out); + final JsonWriter writer = new JsonWriter(osw) + ) { + addFacetsFromSolrResponseToJsonWriter(response, writer); + } + }); + flowFileFacets = session.putAttribute(flowFileFacets, CoreAttributes.MIME_TYPE.key(), MIME_TYPE_JSON); + session.getProvenanceReporter().receive(flowFileFacets, transitUri.toString(), timer.getDuration(TimeUnit.MILLISECONDS)); + session.transfer(flowFileFacets, FACETS); + } + + if (searchComponents.contains(StatsParams.STATS)) { + FlowFile flowFileStats = session.create(flowFileResponse); + flowFileStats = session.write(flowFileStats, out -> { + try ( + final OutputStreamWriter osw = new OutputStreamWriter(out); + final JsonWriter writer = new JsonWriter(osw) + ) { + addStatsFromSolrResponseToJsonWriter(response, writer); + } + }); + flowFileStats = session.putAttribute(flowFileStats, CoreAttributes.MIME_TYPE.key(), MIME_TYPE_JSON); + session.getProvenanceReporter().receive(flowFileStats, transitUri.toString(), timer.getDuration(TimeUnit.MILLISECONDS)); + session.transfer(flowFileStats, STATS); + } + processFacetsAndStats = false; + } + } + + if (getEntireResults) { + final Integer totalDocumentsReturned = solrQuery.getStart() + solrQuery.getRows(); + if (totalDocumentsReturned < totalNumberOfResults) { + solrQuery.setStart(totalDocumentsReturned); + session.getProvenanceReporter().receive(flowFileResponse, transitUri.toString(), timer.getDuration(TimeUnit.MILLISECONDS)); + session.transfer(flowFileResponse, RESULTS); + flowFileResponse = session.create(flowFileResponse); + } else { + continuePaging = false; + } + } else { + continuePaging = false; + } + } + + } catch (Exception e) { + flowFileResponse = session.penalize(flowFileResponse); + flowFileResponse = session.putAttribute(flowFileResponse, EXCEPTION, e.getClass().getName()); + flowFileResponse = session.putAttribute(flowFileResponse, EXCEPTION_MESSAGE, e.getMessage()); + session.transfer(flowFileResponse, FAILURE); + logger.error("Failed to execute query {} due to {}. FlowFile will be routed to relationship failure", new Object[]{solrQuery.toString(), e}, e); + if (flowFileOriginal != null) { + flowFileOriginal = session.penalize(flowFileOriginal); + } + } + + if (!flowFileResponse.isPenalized()) { + session.getProvenanceReporter().receive(flowFileResponse, transitUri.toString(), timer.getDuration(TimeUnit.MILLISECONDS)); + session.transfer(flowFileResponse, RESULTS); + } + + if (flowFileOriginal != null) { + if (!flowFileOriginal.isPenalized()) { + session.transfer(flowFileOriginal, ORIGINAL); + } else { + session.remove(flowFileOriginal); + } + } + } + + private Set extractSearchComponents(Map solrParams) { + final Set searchComponentsTemp = new HashSet<>(); + for (final String searchComponent : SUPPORTED_SEARCH_COMPONENTS) + if (solrParams.keySet().contains(searchComponent)) { + if (SEARCH_COMPONENTS_ON.contains(solrParams.get(searchComponent)[0])) { + searchComponentsTemp.add(searchComponent); + } + } + return Collections.unmodifiableSet(searchComponentsTemp); + } + + private static void addStatsFromSolrResponseToJsonWriter(final QueryResponse response, final JsonWriter writer) throws IOException { + writer.beginObject(); + writer.name("stats_fields"); + writer.beginObject(); + for (Map.Entry entry: response.getFieldStatsInfo().entrySet()) { + FieldStatsInfo fsi = entry.getValue(); + writer.name(entry.getKey()); + writer.beginObject(); + writer.name("min").value(fsi.getMin().toString()); + writer.name("max").value(fsi.getMax().toString()); + writer.name("count").value(fsi.getCount()); + writer.name("missing").value(fsi.getMissing()); + writer.name("sum").value(fsi.getSum().toString()); + writer.name("mean").value(fsi.getMean().toString()); + writer.name("sumOfSquares").value(fsi.getSumOfSquares()); + writer.name("stddev").value(fsi.getStddev()); + writer.endObject(); + } + writer.endObject(); + writer.endObject(); + } + + private static void addFacetsFromSolrResponseToJsonWriter(final QueryResponse response, final JsonWriter writer) throws IOException { + writer.beginObject(); + writer.name("facet_queries"); + writer.beginArray(); + for (final Map.Entry facetQuery : response.getFacetQuery().entrySet()){ + writer.beginObject(); + writer.name("facet").value(facetQuery.getKey()); + writer.name("count").value(facetQuery.getValue()); + writer.endObject(); + } + writer.endArray(); + + writer.name("facet_fields"); + writer.beginObject(); + for (final FacetField facetField : response.getFacetFields()){ + writer.name(facetField.getName()); + writer.beginArray(); + for (final FacetField.Count count : facetField.getValues()) { + writer.beginObject(); + writer.name("facet").value(count.getName()); + writer.name("count").value(count.getCount()); + writer.endObject(); + } + writer.endArray(); + } + writer.endObject(); + + writer.name("facet_ranges"); + writer.beginObject(); + for (final RangeFacet rangeFacet : response.getFacetRanges()) { + writer.name(rangeFacet.getName()); + writer.beginArray(); + final List list = rangeFacet.getCounts(); + for (final Count count : list) { + writer.beginObject(); + writer.name("facet").value(count.getValue()); + writer.name("count").value(count.getCount()); + writer.endObject(); + } + writer.endArray(); + } + writer.endObject(); + + writer.name("facet_intervals"); + writer.beginObject(); + for (final IntervalFacet intervalFacet : response.getIntervalFacets()) { + writer.name(intervalFacet.getField()); + writer.beginArray(); + for (final IntervalFacet.Count count : intervalFacet.getIntervals()) { + writer.beginObject(); + writer.name("facet").value(count.getKey()); + writer.name("count").value(count.getCount()); + writer.endObject(); + } + writer.endArray(); + } + writer.endObject(); + writer.endObject(); + } +} + + diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/AbstractRecordProcessor.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/AbstractRecordProcessor.java index e4371f69c6..f34f58f836 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/AbstractRecordProcessor.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/AbstractRecordProcessor.java @@ -131,7 +131,7 @@ public abstract class AbstractRecordProcessor extends AbstractProcessor { Record firstRecord = reader.nextRecord(); if (firstRecord == null) { final RecordSchema writeSchema = writerFactory.getSchema(originalAttributes, reader.getSchema()); - try (final RecordSetWriter writer = writerFactory.createWriter(getLogger(), writeSchema, out)) { + try (final RecordSetWriter writer = writerFactory.createWriter(getLogger(), writeSchema, out, originalAttributes)) { writer.beginRecordSet(); final WriteResult writeResult = writer.finishRecordSet(); @@ -147,7 +147,7 @@ public abstract class AbstractRecordProcessor extends AbstractProcessor { firstRecord = AbstractRecordProcessor.this.process(firstRecord, original, context); final RecordSchema writeSchema = writerFactory.getSchema(originalAttributes, firstRecord.getSchema()); - try (final RecordSetWriter writer = writerFactory.createWriter(getLogger(), writeSchema, out)) { + try (final RecordSetWriter writer = writerFactory.createWriter(getLogger(), writeSchema, out, originalAttributes)) { writer.beginRecordSet(); writer.write(firstRecord); diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/AbstractRouteRecord.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/AbstractRouteRecord.java index 9d96d34cc6..46acf98373 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/AbstractRouteRecord.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/AbstractRouteRecord.java @@ -219,11 +219,11 @@ public abstract class AbstractRouteRecord extends AbstractProcessor { Tuple tuple = writers.get(relationship); if (tuple == null) { - FlowFile outFlowFile = session.create(original); + final FlowFile outFlowFile = session.create(original); final OutputStream out = session.write(outFlowFile); final RecordSchema recordWriteSchema = writerFactory.getSchema(originalAttributes, record.getSchema()); - recordSetWriter = writerFactory.createWriter(getLogger(), recordWriteSchema, out); + recordSetWriter = writerFactory.createWriter(getLogger(), recordWriteSchema, out, outFlowFile); recordSetWriter.beginRecordSet(); tuple = new Tuple<>(outFlowFile, recordSetWriter); diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ForkRecord.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ForkRecord.java index 941e7dc55b..1b719abea0 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ForkRecord.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ForkRecord.java @@ -243,7 +243,7 @@ public class ForkRecord extends AbstractProcessor { final RecordSchema writeSchema = writerFactory.getSchema(originalAttributes, reader.getSchema()); final OutputStream out = session.write(outFlowFile); - try (final RecordSetWriter recordSetWriter = writerFactory.createWriter(getLogger(), writeSchema, out)) { + try (final RecordSetWriter recordSetWriter = writerFactory.createWriter(getLogger(), writeSchema, out, outFlowFile)) { recordSetWriter.beginRecordSet(); diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ListenTCPRecord.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ListenTCPRecord.java index 738c3e2ede..5ea3e6f324 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ListenTCPRecord.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ListenTCPRecord.java @@ -379,7 +379,7 @@ public class ListenTCPRecord extends AbstractProcessor { final RecordSchema recordSchema = recordSetWriterFactory.getSchema(Collections.EMPTY_MAP, record.getSchema()); try (final OutputStream out = session.write(flowFile); - final RecordSetWriter recordWriter = recordSetWriterFactory.createWriter(getLogger(), recordSchema, out)) { + final RecordSetWriter recordWriter = recordSetWriterFactory.createWriter(getLogger(), recordSchema, out, flowFile)) { // start the record set and write the first record from above recordWriter.beginRecordSet(); diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ListenUDPRecord.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ListenUDPRecord.java index 2e3a04a7fb..d331a332d4 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ListenUDPRecord.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ListenUDPRecord.java @@ -274,7 +274,7 @@ public class ListenUDPRecord extends AbstractListenEventProcessor final RecordSchema recordSchema = firstRecord.getSchema(); final RecordSchema writeSchema = writerFactory.getSchema(Collections.emptyMap(), recordSchema); - writer = writerFactory.createWriter(getLogger(), writeSchema, rawOut); + writer = writerFactory.createWriter(getLogger(), writeSchema, rawOut, flowFile); writer.beginRecordSet(); flowFileRecordWriter = new FlowFileRecordWriter(flowFile, writer); diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/PartitionRecord.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/PartitionRecord.java index 16209c8ef7..6c0ef0844e 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/PartitionRecord.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/PartitionRecord.java @@ -230,7 +230,7 @@ public class PartitionRecord extends AbstractProcessor { final OutputStream out = session.write(childFlowFile); - writer = writerFactory.createWriter(getLogger(), writeSchema, out); + writer = writerFactory.createWriter(getLogger(), writeSchema, out, childFlowFile); writer.beginRecordSet(); writerMap.put(recordValueMap, writer); } diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/QueryRecord.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/QueryRecord.java index e81282b365..514d0438b2 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/QueryRecord.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/QueryRecord.java @@ -336,7 +336,7 @@ public class QueryRecord extends AbstractProcessor { throw new ProcessException(e); } - try (final RecordSetWriter resultSetWriter = recordSetWriterFactory.createWriter(getLogger(), writeSchema, out)) { + try (final RecordSetWriter resultSetWriter = recordSetWriterFactory.createWriter(getLogger(), writeSchema, out, original)) { writeResultRef.set(resultSetWriter.write(recordSet)); mimeTypeRef.set(resultSetWriter.getMimeType()); } catch (final Exception e) { diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/SplitRecord.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/SplitRecord.java index 654435710d..57f1fbd4b2 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/SplitRecord.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/SplitRecord.java @@ -169,7 +169,7 @@ public class SplitRecord extends AbstractProcessor { final WriteResult writeResult; try (final OutputStream out = session.write(split); - final RecordSetWriter writer = writerFactory.createWriter(getLogger(), schema, out)) { + final RecordSetWriter writer = writerFactory.createWriter(getLogger(), schema, out, split)) { if (maxRecords == 1) { final Record record = pushbackSet.next(); writeResult = writer.write(record); diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ValidateRecord.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ValidateRecord.java index bc39ecf081..b3255e6bd8 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ValidateRecord.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ValidateRecord.java @@ -445,7 +445,7 @@ public class ValidateRecord extends AbstractProcessor { } final OutputStream out = session.write(flowFile); - final RecordSetWriter created = factory.createWriter(getLogger(), outputSchema, out); + final RecordSetWriter created = factory.createWriter(getLogger(), outputSchema, out, flowFile); created.beginRecordSet(); return created; } diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/merge/RecordBin.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/merge/RecordBin.java index 139b2a4063..e6ec2e6f01 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/merge/RecordBin.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/merge/RecordBin.java @@ -132,7 +132,7 @@ public class RecordBin { this.out = new ByteCountingOutputStream(rawOut); - recordWriter = writerFactory.createWriter(logger, record.getSchema(), out); + recordWriter = writerFactory.createWriter(logger, record.getSchema(), out, flowFile); recordWriter.beginRecordSet(); } diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/sql/RecordSqlWriter.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/sql/RecordSqlWriter.java index d5d798b15a..56a160bbb0 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/sql/RecordSqlWriter.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/sql/RecordSqlWriter.java @@ -36,6 +36,7 @@ import java.io.IOException; import java.io.OutputStream; import java.sql.ResultSet; import java.sql.SQLException; +import java.util.Collections; import java.util.HashMap; import java.util.Map; import java.util.concurrent.atomic.AtomicReference; @@ -77,7 +78,7 @@ public class RecordSqlWriter implements SqlWriter { } catch (final SQLException | SchemaNotFoundException | IOException e) { throw new ProcessException(e); } - try (final RecordSetWriter resultSetWriter = recordSetWriterFactory.createWriter(logger, writeSchema, outputStream)) { + try (final RecordSetWriter resultSetWriter = recordSetWriterFactory.createWriter(logger, writeSchema, outputStream, Collections.emptyMap())) { writeResultRef.set(resultSetWriter.write(recordSet)); if (mimeType == null) { mimeType = resultSetWriter.getMimeType(); @@ -115,7 +116,7 @@ public class RecordSqlWriter implements SqlWriter { @Override public void writeEmptyResultSet(OutputStream outputStream, ComponentLog logger) throws IOException { - try (final RecordSetWriter resultSetWriter = recordSetWriterFactory.createWriter(logger, writeSchema, outputStream)) { + try (final RecordSetWriter resultSetWriter = recordSetWriterFactory.createWriter(logger, writeSchema, outputStream, Collections.emptyMap())) { mimeType = resultSetWriter.getMimeType(); resultSetWriter.beginRecordSet(); resultSetWriter.finishRecordSet(); diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestConvertRecord.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestConvertRecord.java index eba08354c8..822f664aed 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestConvertRecord.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestConvertRecord.java @@ -27,7 +27,12 @@ import java.io.OutputStream; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Paths; +import java.util.HashMap; +import java.util.Map; +import org.apache.nifi.csv.CSVReader; +import org.apache.nifi.csv.CSVRecordSetWriter; +import org.apache.nifi.csv.CSVUtils; import org.apache.nifi.json.JsonRecordSetWriter; import org.apache.nifi.json.JsonTreeReader; import org.apache.nifi.reporting.InitializationException; @@ -203,9 +208,9 @@ public class TestConvertRecord { runner.setProperty(ConvertRecord.RECORD_WRITER, "writer"); runner.run(); - runner.assertAllFlowFilesTransferred(UpdateRecord.REL_SUCCESS, 1); + runner.assertAllFlowFilesTransferred(ConvertRecord.REL_SUCCESS, 1); - MockFlowFile flowFile = runner.getFlowFilesForRelationship(ExecuteSQL.REL_SUCCESS).get(0); + MockFlowFile flowFile = runner.getFlowFilesForRelationship(ConvertRecord.REL_SUCCESS).get(0); final ByteArrayOutputStream baos = new ByteArrayOutputStream(); try (final SnappyInputStream sis = new SnappyInputStream(new ByteArrayInputStream(flowFile.toByteArray())); final OutputStream out = baos) { @@ -218,4 +223,50 @@ public class TestConvertRecord { assertEquals(new String(Files.readAllBytes(Paths.get("src/test/resources/TestConvertRecord/input/person.json"))), baos.toString(StandardCharsets.UTF_8.name())); } + + @Test + public void testCSVFormattingWithEL() throws InitializationException { + TestRunner runner = TestRunners.newTestRunner(ConvertRecord.class); + + CSVReader csvReader = new CSVReader(); + runner.addControllerService("csv-reader", csvReader); + runner.setProperty(csvReader, CSVUtils.VALUE_SEPARATOR, "${csv.in.delimiter}"); + runner.setProperty(csvReader, CSVUtils.QUOTE_CHAR, "${csv.in.quote}"); + runner.setProperty(csvReader, CSVUtils.ESCAPE_CHAR, "${csv.in.escape}"); + runner.setProperty(csvReader, CSVUtils.COMMENT_MARKER, "${csv.in.comment}"); + runner.enableControllerService(csvReader); + + CSVRecordSetWriter csvWriter = new CSVRecordSetWriter(); + runner.addControllerService("csv-writer", csvWriter); + runner.setProperty(csvWriter, CSVUtils.VALUE_SEPARATOR, "${csv.out.delimiter}"); + runner.setProperty(csvWriter, CSVUtils.QUOTE_CHAR, "${csv.out.quote}"); + runner.setProperty(csvWriter, CSVUtils.QUOTE_MODE, CSVUtils.QUOTE_ALL); + runner.enableControllerService(csvWriter); + + runner.setProperty(ConvertRecord.RECORD_READER, "csv-reader"); + runner.setProperty(ConvertRecord.RECORD_WRITER, "csv-writer"); + + String ffContent = "~ comment\n" + + "id|username|password\n" + + "123|'John'|^|^'^^\n"; + + Map ffAttributes = new HashMap<>(); + ffAttributes.put("csv.in.delimiter", "|"); + ffAttributes.put("csv.in.quote", "'"); + ffAttributes.put("csv.in.escape", "^"); + ffAttributes.put("csv.in.comment", "~"); + ffAttributes.put("csv.out.delimiter", "\t"); + ffAttributes.put("csv.out.quote", "`"); + + runner.enqueue(ffContent, ffAttributes); + runner.run(); + + runner.assertAllFlowFilesTransferred(ConvertRecord.REL_SUCCESS, 1); + + MockFlowFile flowFile = runner.getFlowFilesForRelationship(ConvertRecord.REL_SUCCESS).get(0); + + String expected = "`id`\t`username`\t`password`\n" + + "`123`\t`John`\t`|'^`\n"; + assertEquals(expected, new String(flowFile.toByteArray())); + } } diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestQueryRecord.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestQueryRecord.java index d2981b5773..5b1732768e 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestQueryRecord.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/test/java/org/apache/nifi/processors/standard/TestQueryRecord.java @@ -824,7 +824,7 @@ public class TestQueryRecord { } @Override - public RecordSetWriter createWriter(final ComponentLog logger, final RecordSchema schema, final OutputStream out) { + public RecordSetWriter createWriter(final ComponentLog logger, final RecordSchema schema, final OutputStream out, final Map variables) { return new RecordSetWriter() { @Override diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-service-api/src/main/java/org/apache/nifi/serialization/RecordSetWriterFactory.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-service-api/src/main/java/org/apache/nifi/serialization/RecordSetWriterFactory.java index a9032e4ae6..3d16f264d2 100644 --- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-service-api/src/main/java/org/apache/nifi/serialization/RecordSetWriterFactory.java +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-service-api/src/main/java/org/apache/nifi/serialization/RecordSetWriterFactory.java @@ -19,9 +19,11 @@ package org.apache.nifi.serialization; import java.io.IOException; import java.io.OutputStream; +import java.util.Collections; import java.util.Map; import org.apache.nifi.controller.ControllerService; +import org.apache.nifi.flowfile.FlowFile; import org.apache.nifi.logging.ComponentLog; import org.apache.nifi.schema.access.SchemaNotFoundException; import org.apache.nifi.serialization.record.RecordSchema; @@ -75,6 +77,47 @@ public interface RecordSetWriterFactory extends ControllerService { * * @return a RecordSetWriter that can write record sets to an OutputStream * @throws IOException if unable to read from the given InputStream + * + * @deprecated Use {@link #createWriter(ComponentLog, RecordSchema, OutputStream, FlowFile)} or {@link #createWriter(ComponentLog, RecordSchema, OutputStream, Map)} instead. */ - RecordSetWriter createWriter(ComponentLog logger, RecordSchema schema, OutputStream out) throws SchemaNotFoundException, IOException; + @Deprecated + default RecordSetWriter createWriter(ComponentLog logger, RecordSchema schema, OutputStream out) throws SchemaNotFoundException, IOException { + return createWriter(logger, schema, out, Collections.emptyMap()); + } + + /** + *

+ * Creates a new RecordSetWriter that is capable of writing record contents to an OutputStream. + * The method accepts a FlowFile whose attributes can be used to resolve properties specified via Expression Language. + *

+ * + * @param logger the logger to use when logging information. This is passed in, rather than using the logger of the Controller Service + * because it allows messages to be logged for the component that is calling this Controller Service. + * @param schema the schema that will be used for writing records + * @param out the OutputStream to write to + * @param flowFile the FlowFile whose attributes are used to resolve properties specified via Expression Language + * + * @return a RecordSetWriter that can write record sets to an OutputStream + * @throws IOException if unable to read from the given InputStream + */ + default RecordSetWriter createWriter(ComponentLog logger, RecordSchema schema, OutputStream out, FlowFile flowFile) throws SchemaNotFoundException, IOException { + return createWriter(logger, schema, out, flowFile.getAttributes()); + } + + /** + *

+ * Creates a new RecordSetWriter that is capable of writing record contents to an OutputStream. + * The method accepts a variables map that can be used to resolve properties specified via Expression Language. + *

+ * + * @param logger the logger to use when logging information. This is passed in, rather than using the logger of the Controller Service + * because it allows messages to be logged for the component that is calling this Controller Service. + * @param schema the schema that will be used for writing records + * @param out the OutputStream to write to + * @param variables the variables which are used to resolve properties specified via Expression Language + * + * @return a RecordSetWriter that can write record sets to an OutputStream + * @throws IOException if unable to read from the given InputStream + */ + RecordSetWriter createWriter(ComponentLog logger, RecordSchema schema, OutputStream out, Map variables) throws SchemaNotFoundException, IOException; } diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/avro/AvroRecordSetWriter.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/avro/AvroRecordSetWriter.java index 487b51f33b..5730ee37d2 100644 --- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/avro/AvroRecordSetWriter.java +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/avro/AvroRecordSetWriter.java @@ -49,6 +49,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.EnumSet; import java.util.List; +import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.concurrent.BlockingQueue; @@ -123,7 +124,7 @@ public class AvroRecordSetWriter extends SchemaRegistryRecordSetWriter implement } @Override - public RecordSetWriter createWriter(final ComponentLog logger, final RecordSchema recordSchema, final OutputStream out) throws IOException { + public RecordSetWriter createWriter(final ComponentLog logger, final RecordSchema recordSchema, final OutputStream out, final Map variables) throws IOException { final String strategyValue = getConfigurationContext().getProperty(getSchemaWriteStrategyDescriptor()).getValue(); final String compressionFormat = getConfigurationContext().getProperty(COMPRESSION_FORMAT).getValue(); diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVHeaderSchemaStrategy.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVHeaderSchemaStrategy.java index 624981b41f..ac269169bc 100644 --- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVHeaderSchemaStrategy.java +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVHeaderSchemaStrategy.java @@ -54,7 +54,7 @@ public class CSVHeaderSchemaStrategy implements SchemaAccessStrategy { } try { - final CSVFormat csvFormat = CSVUtils.createCSVFormat(context).withFirstRecordAsHeader(); + final CSVFormat csvFormat = CSVUtils.createCSVFormat(context, variables).withFirstRecordAsHeader(); try (final Reader reader = new InputStreamReader(new BOMInputStream(contentStream)); final CSVParser csvParser = new CSVParser(reader, csvFormat)) { diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVReader.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVReader.java index 1ae57b304c..b1a840a6cb 100644 --- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVReader.java +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVReader.java @@ -46,6 +46,7 @@ import org.apache.nifi.stream.io.NonCloseableInputStream; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; +import java.util.Collections; import java.util.List; import java.util.Map; @@ -55,7 +56,7 @@ import java.util.Map; + "the values. See Controller Service's Usage for further documentation.") public class CSVReader extends SchemaRegistryService implements RecordReaderFactory { - private final AllowableValue headerDerivedAllowableValue = new AllowableValue("csv-header-derived", "Use String Fields From Header", + private static final AllowableValue HEADER_DERIVED = new AllowableValue("csv-header-derived", "Use String Fields From Header", "The first non-comment line of the CSV file is a header line that contains the names of the columns. The schema will be derived by using the " + "column names in the header and assuming that all columns are of type String."); @@ -78,8 +79,9 @@ public class CSVReader extends SchemaRegistryService implements RecordReaderFact .required(true) .build(); + private volatile ConfigurationContext context; + private volatile String csvParser; - private volatile CSVFormat csvFormat; private volatile String dateFormat; private volatile String timeFormat; private volatile String timestampFormat; @@ -87,6 +89,9 @@ public class CSVReader extends SchemaRegistryService implements RecordReaderFact private volatile boolean ignoreHeader; private volatile String charSet; + // it will be initialized only if there are no dynamic csv formatting properties + private volatile CSVFormat csvFormat; + @Override protected List getSupportedPropertyDescriptors() { final List properties = new ArrayList<>(super.getSupportedPropertyDescriptors()); @@ -108,9 +113,10 @@ public class CSVReader extends SchemaRegistryService implements RecordReaderFact } @OnEnabled - public void storeCsvFormat(final ConfigurationContext context) { + public void storeStaticProperties(final ConfigurationContext context) { + this.context = context; + this.csvParser = context.getProperty(CSV_PARSER).getValue(); - this.csvFormat = CSVUtils.createCSVFormat(context); this.dateFormat = context.getProperty(DateTimeUtils.DATE_FORMAT).getValue(); this.timeFormat = context.getProperty(DateTimeUtils.TIME_FORMAT).getValue(); this.timestampFormat = context.getProperty(DateTimeUtils.TIMESTAMP_FORMAT).getValue(); @@ -121,10 +127,15 @@ public class CSVReader extends SchemaRegistryService implements RecordReaderFact // Ensure that if we are deriving schema from header that we always treat the first line as a header, // regardless of the 'First Line is Header' property final String accessStrategy = context.getProperty(SchemaAccessUtils.SCHEMA_ACCESS_STRATEGY).getValue(); - if (headerDerivedAllowableValue.getValue().equals(accessStrategy) || SchemaInferenceUtil.INFER_SCHEMA.getValue().equals(accessStrategy)) { - this.csvFormat = this.csvFormat.withFirstRecordAsHeader(); + if (HEADER_DERIVED.getValue().equals(accessStrategy) || SchemaInferenceUtil.INFER_SCHEMA.getValue().equals(accessStrategy)) { this.firstLineIsHeader = true; } + + if (!CSVUtils.isDynamicCSVFormat(context)) { + this.csvFormat = CSVUtils.createCSVFormat(context, Collections.emptyMap()); + } else { + this.csvFormat = null; + } } @Override @@ -134,6 +145,13 @@ public class CSVReader extends SchemaRegistryService implements RecordReaderFact final RecordSchema schema = getSchema(variables, new NonCloseableInputStream(in), null); in.reset(); + CSVFormat csvFormat; + if (this.csvFormat != null) { + csvFormat = this.csvFormat; + } else { + csvFormat = CSVUtils.createCSVFormat(context, variables); + } + if(APACHE_COMMONS_CSV.getValue().equals(csvParser)) { return new CSVRecordReader(in, logger, schema, csvFormat, firstLineIsHeader, ignoreHeader, dateFormat, timeFormat, timestampFormat, charSet); } else if(JACKSON_CSV.getValue().equals(csvParser)) { @@ -145,10 +163,10 @@ public class CSVReader extends SchemaRegistryService implements RecordReaderFact @Override protected SchemaAccessStrategy getSchemaAccessStrategy(final String allowableValue, final SchemaRegistry schemaRegistry, final PropertyContext context) { - if (allowableValue.equalsIgnoreCase(headerDerivedAllowableValue.getValue())) { + if (allowableValue.equalsIgnoreCase(HEADER_DERIVED.getValue())) { return new CSVHeaderSchemaStrategy(context); } else if (allowableValue.equalsIgnoreCase(SchemaInferenceUtil.INFER_SCHEMA.getValue())) { - final RecordSourceFactory sourceFactory = (var, in) -> new CSVRecordSource(in, context); + final RecordSourceFactory sourceFactory = (variables, in) -> new CSVRecordSource(in, context, variables); final SchemaInferenceEngine inference = new CSVSchemaInference(new TimeValueInference(dateFormat, timeFormat, timestampFormat)); return new InferSchemaAccessStrategy<>(sourceFactory, inference, getLogger()); } @@ -159,7 +177,7 @@ public class CSVReader extends SchemaRegistryService implements RecordReaderFact @Override protected List getSchemaAccessStrategyValues() { final List allowableValues = new ArrayList<>(super.getSchemaAccessStrategyValues()); - allowableValues.add(headerDerivedAllowableValue); + allowableValues.add(HEADER_DERIVED); allowableValues.add(SchemaInferenceUtil.INFER_SCHEMA); return allowableValues; } diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVRecordSetWriter.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVRecordSetWriter.java index 7aab5a36e6..9326d8a228 100644 --- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVRecordSetWriter.java +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVRecordSetWriter.java @@ -20,7 +20,9 @@ package org.apache.nifi.csv; import java.io.IOException; import java.io.OutputStream; import java.util.ArrayList; +import java.util.Collections; import java.util.List; +import java.util.Map; import org.apache.commons.csv.CSVFormat; import org.apache.nifi.annotation.documentation.CapabilityDescription; @@ -41,10 +43,14 @@ import org.apache.nifi.serialization.record.RecordSchema; + "corresponding to the record fields.") public class CSVRecordSetWriter extends DateTimeTextRecordSetWriter implements RecordSetWriterFactory { - private volatile CSVFormat csvFormat; + private volatile ConfigurationContext context; + private volatile boolean includeHeader; private volatile String charSet; + // it will be initialized only if there are no dynamic csv formatting properties + private volatile CSVFormat csvFormat; + @Override protected List getSupportedPropertyDescriptors() { final List properties = new ArrayList<>(super.getSupportedPropertyDescriptors()); @@ -64,14 +70,28 @@ public class CSVRecordSetWriter extends DateTimeTextRecordSetWriter implements R } @OnEnabled - public void storeCsvFormat(final ConfigurationContext context) { - this.csvFormat = CSVUtils.createCSVFormat(context); + public void storeStaticProperties(final ConfigurationContext context) { + this.context = context; + this.includeHeader = context.getProperty(CSVUtils.INCLUDE_HEADER_LINE).asBoolean(); this.charSet = context.getProperty(CSVUtils.CHARSET).getValue(); + + if (!CSVUtils.isDynamicCSVFormat(context)) { + this.csvFormat = CSVUtils.createCSVFormat(context, Collections.emptyMap()); + } else { + this.csvFormat = null; + } } @Override - public RecordSetWriter createWriter(final ComponentLog logger, final RecordSchema schema, final OutputStream out) throws SchemaNotFoundException, IOException { + public RecordSetWriter createWriter(final ComponentLog logger, final RecordSchema schema, final OutputStream out, final Map variables) throws SchemaNotFoundException, IOException { + CSVFormat csvFormat; + if (this.csvFormat != null) { + csvFormat = this.csvFormat; + } else { + csvFormat = CSVUtils.createCSVFormat(context, variables); + } + return new WriteCSVResult(csvFormat, schema, getSchemaAccessWriter(schema), out, getDateFormat().orElse(null), getTimeFormat().orElse(null), getTimestampFormat().orElse(null), includeHeader, charSet); } diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVRecordSource.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVRecordSource.java index 20a8407885..c27b5798fa 100644 --- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVRecordSource.java +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/csv/CSVRecordSource.java @@ -33,12 +33,13 @@ import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; import java.util.List; +import java.util.Map; public class CSVRecordSource implements RecordSource { private final Iterator csvRecordIterator; private final List fieldNames; - public CSVRecordSource(final InputStream in, final PropertyContext context) throws IOException { + public CSVRecordSource(final InputStream in, final PropertyContext context, final Map variables) throws IOException { final String charset = context.getProperty(CSVUtils.CHARSET).getValue(); final Reader reader; @@ -48,7 +49,7 @@ public class CSVRecordSource implements RecordSource { throw new ProcessException(e); } - final CSVFormat csvFormat = CSVUtils.createCSVFormat(context).withFirstRecordAsHeader().withTrim(); + final CSVFormat csvFormat = CSVUtils.createCSVFormat(context, variables).withFirstRecordAsHeader().withTrim(); final CSVParser csvParser = new CSVParser(reader, csvFormat); fieldNames = Collections.unmodifiableList(new ArrayList<>(csvParser.getHeaderMap().keySet())); diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/json/JsonRecordSetWriter.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/json/JsonRecordSetWriter.java index b61586ed34..864574f9f7 100644 --- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/json/JsonRecordSetWriter.java +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/json/JsonRecordSetWriter.java @@ -23,6 +23,7 @@ import java.io.OutputStream; import java.util.ArrayList; import java.util.Collection; import java.util.List; +import java.util.Map; import org.apache.commons.compress.compressors.CompressorException; import org.apache.commons.compress.compressors.CompressorStreamFactory; @@ -173,7 +174,7 @@ public class JsonRecordSetWriter extends DateTimeTextRecordSetWriter implements } @Override - public RecordSetWriter createWriter(final ComponentLog logger, final RecordSchema schema, final OutputStream out) throws SchemaNotFoundException, IOException { + public RecordSetWriter createWriter(final ComponentLog logger, final RecordSchema schema, final OutputStream out, final Map variables) throws SchemaNotFoundException, IOException { final OutputStream bufferedOut = new BufferedOutputStream(out, 65536); final OutputStream compressionOut; diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/text/FreeFormTextRecordSetWriter.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/text/FreeFormTextRecordSetWriter.java index 3971baa52f..528e74e386 100644 --- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/text/FreeFormTextRecordSetWriter.java +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/text/FreeFormTextRecordSetWriter.java @@ -39,6 +39,7 @@ import java.io.OutputStream; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.List; +import java.util.Map; @Tags({"text", "freeform", "expression", "language", "el", "record", "recordset", "resultset", "writer", "serialize"}) @CapabilityDescription("Writes the contents of a RecordSet as free-form text. The configured " @@ -79,7 +80,7 @@ public class FreeFormTextRecordSetWriter extends SchemaRegistryRecordSetWriter i } @Override - public RecordSetWriter createWriter(final ComponentLog logger, final RecordSchema schema, final OutputStream out) { + public RecordSetWriter createWriter(final ComponentLog logger, final RecordSchema schema, final OutputStream out, final Map variables) { return new FreeFormTextWriter(textValue, characterSet, out); } diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/xml/XMLRecordSetWriter.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/xml/XMLRecordSetWriter.java index 1f718d6652..87874980c4 100644 --- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/xml/XMLRecordSetWriter.java +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/main/java/org/apache/nifi/xml/XMLRecordSetWriter.java @@ -1,209 +1,210 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.nifi.xml; - -import org.apache.nifi.record.NullSuppression; -import org.apache.nifi.annotation.documentation.CapabilityDescription; -import org.apache.nifi.annotation.documentation.Tags; -import org.apache.nifi.components.AllowableValue; -import org.apache.nifi.components.PropertyDescriptor; -import org.apache.nifi.components.ValidationContext; -import org.apache.nifi.components.ValidationResult; -import org.apache.nifi.expression.ExpressionLanguageScope; -import org.apache.nifi.logging.ComponentLog; -import org.apache.nifi.processor.util.StandardValidators; -import org.apache.nifi.schema.access.SchemaNotFoundException; -import org.apache.nifi.serialization.DateTimeTextRecordSetWriter; -import org.apache.nifi.serialization.RecordSetWriter; -import org.apache.nifi.serialization.RecordSetWriterFactory; -import org.apache.nifi.serialization.record.RecordSchema; - -import java.io.IOException; -import java.io.OutputStream; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.List; - -@Tags({"xml", "resultset", "writer", "serialize", "record", "recordset", "row"}) -@CapabilityDescription("Writes a RecordSet to XML. The records are wrapped by a root tag.") -public class XMLRecordSetWriter extends DateTimeTextRecordSetWriter implements RecordSetWriterFactory { - - public static final AllowableValue ALWAYS_SUPPRESS = new AllowableValue("always-suppress", "Always Suppress", - "Fields that are missing (present in the schema but not in the record), or that have a value of null, will not be written out"); - public static final AllowableValue NEVER_SUPPRESS = new AllowableValue("never-suppress", "Never Suppress", - "Fields that are missing (present in the schema but not in the record), or that have a value of null, will be written out as a null value"); - public static final AllowableValue SUPPRESS_MISSING = new AllowableValue("suppress-missing", "Suppress Missing Values", - "When a field has a value of null, it will be written out. However, if a field is defined in the schema and not present in the record, the field will not be written out."); - - public static final AllowableValue USE_PROPERTY_AS_WRAPPER = new AllowableValue("use-property-as-wrapper", "Use Property as Wrapper", - "The value of the property \"Array Tag Name\" will be used as the tag name to wrap elements of an array. The field name of the array field will be used for the tag name " + - "of the elements."); - public static final AllowableValue USE_PROPERTY_FOR_ELEMENTS = new AllowableValue("use-property-for-elements", "Use Property for Elements", - "The value of the property \"Array Tag Name\" will be used for the tag name of the elements of an array. The field name of the array field will be used as the tag name " + - "to wrap elements."); - public static final AllowableValue NO_WRAPPING = new AllowableValue("no-wrapping", "No Wrapping", - "The elements of an array will not be wrapped"); - - public static final PropertyDescriptor SUPPRESS_NULLS = new PropertyDescriptor.Builder() - .name("suppress_nulls") - .displayName("Suppress Null Values") - .description("Specifies how the writer should handle a null field") - .allowableValues(NEVER_SUPPRESS, ALWAYS_SUPPRESS, SUPPRESS_MISSING) - .defaultValue(NEVER_SUPPRESS.getValue()) - .required(true) - .build(); - - public static final PropertyDescriptor PRETTY_PRINT_XML = new PropertyDescriptor.Builder() - .name("pretty_print_xml") - .displayName("Pretty Print XML") - .description("Specifies whether or not the XML should be pretty printed") - .expressionLanguageSupported(ExpressionLanguageScope.NONE) - .allowableValues("true", "false") - .defaultValue("false") - .required(true) - .build(); - - public static final PropertyDescriptor ROOT_TAG_NAME = new PropertyDescriptor.Builder() - .name("root_tag_name") - .displayName("Name of Root Tag") - .description("Specifies the name of the XML root tag wrapping the record set. This property has to be defined if " + - "the writer is supposed to write multiple records in a single FlowFile.") - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.NONE) - .required(false) - .build(); - - public static final PropertyDescriptor RECORD_TAG_NAME = new PropertyDescriptor.Builder() - .name("record_tag_name") - .displayName("Name of Record Tag") - .description("Specifies the name of the XML record tag wrapping the record fields. If this is not set, the writer " + - "will use the record name in the schema.") - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.NONE) - .required(false) - .build(); - - public static final PropertyDescriptor ARRAY_WRAPPING = new PropertyDescriptor.Builder() - .name("array_wrapping") - .displayName("Wrap Elements of Arrays") - .description("Specifies how the writer wraps elements of fields of type array") - .allowableValues(USE_PROPERTY_AS_WRAPPER, USE_PROPERTY_FOR_ELEMENTS, NO_WRAPPING) - .defaultValue(NO_WRAPPING.getValue()) - .required(true) - .build(); - - public static final PropertyDescriptor ARRAY_TAG_NAME = new PropertyDescriptor.Builder() - .name("array_tag_name") - .displayName("Array Tag Name") - .description("Name of the tag used by property \"Wrap Elements of Arrays\" to write arrays") - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .expressionLanguageSupported(ExpressionLanguageScope.NONE) - .required(false) - .build(); - - public static final PropertyDescriptor CHARACTER_SET = new PropertyDescriptor.Builder() - .name("Character Set") - .description("The Character set to use when writing the data to the FlowFile") - .addValidator(StandardValidators.CHARACTER_SET_VALIDATOR) - .defaultValue("UTF-8") - .expressionLanguageSupported(ExpressionLanguageScope.NONE) - .required(true) - .build(); - - @Override - protected List getSupportedPropertyDescriptors() { - final List properties = new ArrayList<>(super.getSupportedPropertyDescriptors()); - properties.add(SUPPRESS_NULLS); - properties.add(PRETTY_PRINT_XML); - properties.add(ROOT_TAG_NAME); - properties.add(RECORD_TAG_NAME); - properties.add(ARRAY_WRAPPING); - properties.add(ARRAY_TAG_NAME); - properties.add(CHARACTER_SET); - return properties; - } - - @Override - protected Collection customValidate(final ValidationContext validationContext) { - if (!validationContext.getProperty(ARRAY_WRAPPING).getValue().equals(NO_WRAPPING.getValue())) { - if (!validationContext.getProperty(ARRAY_TAG_NAME).isSet()) { - StringBuilder explanation = new StringBuilder() - .append("if property \'") - .append(ARRAY_WRAPPING.getName()) - .append("\' is defined as \'") - .append(USE_PROPERTY_AS_WRAPPER.getDisplayName()) - .append("\' or \'") - .append(USE_PROPERTY_FOR_ELEMENTS.getDisplayName()) - .append("\' the property \'") - .append(ARRAY_TAG_NAME.getDisplayName()) - .append("\' has to be set."); - - return Collections.singleton(new ValidationResult.Builder() - .subject(ARRAY_TAG_NAME.getName()) - .valid(false) - .explanation(explanation.toString()) - .build()); - } - } - return Collections.emptyList(); - } - - @Override - public RecordSetWriter createWriter(final ComponentLog logger, final RecordSchema schema, final OutputStream out) throws SchemaNotFoundException, IOException { - final String nullSuppression = getConfigurationContext().getProperty(SUPPRESS_NULLS).getValue(); - final NullSuppression nullSuppressionEnum; - if (nullSuppression.equals(ALWAYS_SUPPRESS.getValue())) { - nullSuppressionEnum = NullSuppression.ALWAYS_SUPPRESS; - } else if (nullSuppression.equals(NEVER_SUPPRESS.getValue())) { - nullSuppressionEnum = NullSuppression.NEVER_SUPPRESS; - } else { - nullSuppressionEnum = NullSuppression.SUPPRESS_MISSING; - } - - final boolean prettyPrint = getConfigurationContext().getProperty(PRETTY_PRINT_XML).getValue().equals("true"); - - final String rootTagName = getConfigurationContext().getProperty(ROOT_TAG_NAME).isSet() - ? getConfigurationContext().getProperty(ROOT_TAG_NAME).getValue() : null; - final String recordTagName = getConfigurationContext().getProperty(RECORD_TAG_NAME).isSet() - ? getConfigurationContext().getProperty(RECORD_TAG_NAME).getValue() : null; - - final String arrayWrapping = getConfigurationContext().getProperty(ARRAY_WRAPPING).getValue(); - final ArrayWrapping arrayWrappingEnum; - if (arrayWrapping.equals(NO_WRAPPING.getValue())) { - arrayWrappingEnum = ArrayWrapping.NO_WRAPPING; - } else if (arrayWrapping.equals(USE_PROPERTY_AS_WRAPPER.getValue())) { - arrayWrappingEnum = ArrayWrapping.USE_PROPERTY_AS_WRAPPER; - } else { - arrayWrappingEnum = ArrayWrapping.USE_PROPERTY_FOR_ELEMENTS; - } - - final String arrayTagName; - if (getConfigurationContext().getProperty(ARRAY_TAG_NAME).isSet()) { - arrayTagName = getConfigurationContext().getProperty(ARRAY_TAG_NAME).getValue(); - } else { - arrayTagName = null; - } - - final String charSet = getConfigurationContext().getProperty(CHARACTER_SET).getValue(); - - return new WriteXMLResult(schema, getSchemaAccessWriter(schema), - out, prettyPrint, nullSuppressionEnum, arrayWrappingEnum, arrayTagName, rootTagName, recordTagName, charSet, - getDateFormat().orElse(null), getTimeFormat().orElse(null), getTimestampFormat().orElse(null)); - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.xml; + +import org.apache.nifi.record.NullSuppression; +import org.apache.nifi.annotation.documentation.CapabilityDescription; +import org.apache.nifi.annotation.documentation.Tags; +import org.apache.nifi.components.AllowableValue; +import org.apache.nifi.components.PropertyDescriptor; +import org.apache.nifi.components.ValidationContext; +import org.apache.nifi.components.ValidationResult; +import org.apache.nifi.expression.ExpressionLanguageScope; +import org.apache.nifi.logging.ComponentLog; +import org.apache.nifi.processor.util.StandardValidators; +import org.apache.nifi.schema.access.SchemaNotFoundException; +import org.apache.nifi.serialization.DateTimeTextRecordSetWriter; +import org.apache.nifi.serialization.RecordSetWriter; +import org.apache.nifi.serialization.RecordSetWriterFactory; +import org.apache.nifi.serialization.record.RecordSchema; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +@Tags({"xml", "resultset", "writer", "serialize", "record", "recordset", "row"}) +@CapabilityDescription("Writes a RecordSet to XML. The records are wrapped by a root tag.") +public class XMLRecordSetWriter extends DateTimeTextRecordSetWriter implements RecordSetWriterFactory { + + public static final AllowableValue ALWAYS_SUPPRESS = new AllowableValue("always-suppress", "Always Suppress", + "Fields that are missing (present in the schema but not in the record), or that have a value of null, will not be written out"); + public static final AllowableValue NEVER_SUPPRESS = new AllowableValue("never-suppress", "Never Suppress", + "Fields that are missing (present in the schema but not in the record), or that have a value of null, will be written out as a null value"); + public static final AllowableValue SUPPRESS_MISSING = new AllowableValue("suppress-missing", "Suppress Missing Values", + "When a field has a value of null, it will be written out. However, if a field is defined in the schema and not present in the record, the field will not be written out."); + + public static final AllowableValue USE_PROPERTY_AS_WRAPPER = new AllowableValue("use-property-as-wrapper", "Use Property as Wrapper", + "The value of the property \"Array Tag Name\" will be used as the tag name to wrap elements of an array. The field name of the array field will be used for the tag name " + + "of the elements."); + public static final AllowableValue USE_PROPERTY_FOR_ELEMENTS = new AllowableValue("use-property-for-elements", "Use Property for Elements", + "The value of the property \"Array Tag Name\" will be used for the tag name of the elements of an array. The field name of the array field will be used as the tag name " + + "to wrap elements."); + public static final AllowableValue NO_WRAPPING = new AllowableValue("no-wrapping", "No Wrapping", + "The elements of an array will not be wrapped"); + + public static final PropertyDescriptor SUPPRESS_NULLS = new PropertyDescriptor.Builder() + .name("suppress_nulls") + .displayName("Suppress Null Values") + .description("Specifies how the writer should handle a null field") + .allowableValues(NEVER_SUPPRESS, ALWAYS_SUPPRESS, SUPPRESS_MISSING) + .defaultValue(NEVER_SUPPRESS.getValue()) + .required(true) + .build(); + + public static final PropertyDescriptor PRETTY_PRINT_XML = new PropertyDescriptor.Builder() + .name("pretty_print_xml") + .displayName("Pretty Print XML") + .description("Specifies whether or not the XML should be pretty printed") + .expressionLanguageSupported(ExpressionLanguageScope.NONE) + .allowableValues("true", "false") + .defaultValue("false") + .required(true) + .build(); + + public static final PropertyDescriptor ROOT_TAG_NAME = new PropertyDescriptor.Builder() + .name("root_tag_name") + .displayName("Name of Root Tag") + .description("Specifies the name of the XML root tag wrapping the record set. This property has to be defined if " + + "the writer is supposed to write multiple records in a single FlowFile.") + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .expressionLanguageSupported(ExpressionLanguageScope.NONE) + .required(false) + .build(); + + public static final PropertyDescriptor RECORD_TAG_NAME = new PropertyDescriptor.Builder() + .name("record_tag_name") + .displayName("Name of Record Tag") + .description("Specifies the name of the XML record tag wrapping the record fields. If this is not set, the writer " + + "will use the record name in the schema.") + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .expressionLanguageSupported(ExpressionLanguageScope.NONE) + .required(false) + .build(); + + public static final PropertyDescriptor ARRAY_WRAPPING = new PropertyDescriptor.Builder() + .name("array_wrapping") + .displayName("Wrap Elements of Arrays") + .description("Specifies how the writer wraps elements of fields of type array") + .allowableValues(USE_PROPERTY_AS_WRAPPER, USE_PROPERTY_FOR_ELEMENTS, NO_WRAPPING) + .defaultValue(NO_WRAPPING.getValue()) + .required(true) + .build(); + + public static final PropertyDescriptor ARRAY_TAG_NAME = new PropertyDescriptor.Builder() + .name("array_tag_name") + .displayName("Array Tag Name") + .description("Name of the tag used by property \"Wrap Elements of Arrays\" to write arrays") + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .expressionLanguageSupported(ExpressionLanguageScope.NONE) + .required(false) + .build(); + + public static final PropertyDescriptor CHARACTER_SET = new PropertyDescriptor.Builder() + .name("Character Set") + .description("The Character set to use when writing the data to the FlowFile") + .addValidator(StandardValidators.CHARACTER_SET_VALIDATOR) + .defaultValue("UTF-8") + .expressionLanguageSupported(ExpressionLanguageScope.NONE) + .required(true) + .build(); + + @Override + protected List getSupportedPropertyDescriptors() { + final List properties = new ArrayList<>(super.getSupportedPropertyDescriptors()); + properties.add(SUPPRESS_NULLS); + properties.add(PRETTY_PRINT_XML); + properties.add(ROOT_TAG_NAME); + properties.add(RECORD_TAG_NAME); + properties.add(ARRAY_WRAPPING); + properties.add(ARRAY_TAG_NAME); + properties.add(CHARACTER_SET); + return properties; + } + + @Override + protected Collection customValidate(final ValidationContext validationContext) { + if (!validationContext.getProperty(ARRAY_WRAPPING).getValue().equals(NO_WRAPPING.getValue())) { + if (!validationContext.getProperty(ARRAY_TAG_NAME).isSet()) { + StringBuilder explanation = new StringBuilder() + .append("if property \'") + .append(ARRAY_WRAPPING.getName()) + .append("\' is defined as \'") + .append(USE_PROPERTY_AS_WRAPPER.getDisplayName()) + .append("\' or \'") + .append(USE_PROPERTY_FOR_ELEMENTS.getDisplayName()) + .append("\' the property \'") + .append(ARRAY_TAG_NAME.getDisplayName()) + .append("\' has to be set."); + + return Collections.singleton(new ValidationResult.Builder() + .subject(ARRAY_TAG_NAME.getName()) + .valid(false) + .explanation(explanation.toString()) + .build()); + } + } + return Collections.emptyList(); + } + + @Override + public RecordSetWriter createWriter(final ComponentLog logger, final RecordSchema schema, final OutputStream out, final Map variables) throws SchemaNotFoundException, IOException { + final String nullSuppression = getConfigurationContext().getProperty(SUPPRESS_NULLS).getValue(); + final NullSuppression nullSuppressionEnum; + if (nullSuppression.equals(ALWAYS_SUPPRESS.getValue())) { + nullSuppressionEnum = NullSuppression.ALWAYS_SUPPRESS; + } else if (nullSuppression.equals(NEVER_SUPPRESS.getValue())) { + nullSuppressionEnum = NullSuppression.NEVER_SUPPRESS; + } else { + nullSuppressionEnum = NullSuppression.SUPPRESS_MISSING; + } + + final boolean prettyPrint = getConfigurationContext().getProperty(PRETTY_PRINT_XML).getValue().equals("true"); + + final String rootTagName = getConfigurationContext().getProperty(ROOT_TAG_NAME).isSet() + ? getConfigurationContext().getProperty(ROOT_TAG_NAME).getValue() : null; + final String recordTagName = getConfigurationContext().getProperty(RECORD_TAG_NAME).isSet() + ? getConfigurationContext().getProperty(RECORD_TAG_NAME).getValue() : null; + + final String arrayWrapping = getConfigurationContext().getProperty(ARRAY_WRAPPING).getValue(); + final ArrayWrapping arrayWrappingEnum; + if (arrayWrapping.equals(NO_WRAPPING.getValue())) { + arrayWrappingEnum = ArrayWrapping.NO_WRAPPING; + } else if (arrayWrapping.equals(USE_PROPERTY_AS_WRAPPER.getValue())) { + arrayWrappingEnum = ArrayWrapping.USE_PROPERTY_AS_WRAPPER; + } else { + arrayWrappingEnum = ArrayWrapping.USE_PROPERTY_FOR_ELEMENTS; + } + + final String arrayTagName; + if (getConfigurationContext().getProperty(ARRAY_TAG_NAME).isSet()) { + arrayTagName = getConfigurationContext().getProperty(ARRAY_TAG_NAME).getValue(); + } else { + arrayTagName = null; + } + + final String charSet = getConfigurationContext().getProperty(CHARACTER_SET).getValue(); + + return new WriteXMLResult(schema, getSchemaAccessWriter(schema), + out, prettyPrint, nullSuppressionEnum, arrayWrappingEnum, arrayTagName, rootTagName, recordTagName, charSet, + getDateFormat().orElse(null), getTimeFormat().orElse(null), getTimestampFormat().orElse(null)); + } +} diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestCSVHeaderSchemaStrategy.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestCSVHeaderSchemaStrategy.java index e9de978598..e61d2a66ca 100644 --- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestCSVHeaderSchemaStrategy.java +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestCSVHeaderSchemaStrategy.java @@ -40,7 +40,7 @@ public class TestCSVHeaderSchemaStrategy { @Test public void testSimple() throws SchemaNotFoundException, IOException { - final String headerLine = "a, b, c, d, e\\,z, f"; + final String headerLine = "\"a\", b, c, d, e\\,z, f"; final byte[] headerBytes = headerLine.getBytes(); final Map properties = new HashMap<>(); @@ -66,4 +66,37 @@ public class TestCSVHeaderSchemaStrategy { .allMatch(field -> field.getDataType().equals(RecordFieldType.STRING.getDataType()))); } + @Test + public void testWithEL() throws SchemaNotFoundException, IOException { + final String headerLine = "\'a\'; b; c; d; e^;z; f"; + final byte[] headerBytes = headerLine.getBytes(); + + final Map properties = new HashMap<>(); + properties.put(CSVUtils.CSV_FORMAT, CSVUtils.CUSTOM.getValue()); + properties.put(CSVUtils.COMMENT_MARKER, "#"); + properties.put(CSVUtils.VALUE_SEPARATOR, "${csv.delimiter}"); + properties.put(CSVUtils.TRIM_FIELDS, "true"); + properties.put(CSVUtils.QUOTE_CHAR, "${csv.quote}"); + properties.put(CSVUtils.ESCAPE_CHAR, "${csv.escape}"); + + final Map variables = new HashMap<>(); + variables.put("csv.delimiter", ";"); + variables.put("csv.quote", "'"); + variables.put("csv.escape", "^"); + + final ConfigurationContext context = new MockConfigurationContext(properties, null); + final CSVHeaderSchemaStrategy strategy = new CSVHeaderSchemaStrategy(context); + + final RecordSchema schema; + try (final InputStream bais = new ByteArrayInputStream(headerBytes)) { + schema = strategy.getSchema(variables, bais, null); + } + + final List expectedFieldNames = Arrays.asList("a", "b", "c", "d", "e;z", "f"); + assertEquals(expectedFieldNames, schema.getFieldNames()); + + assertTrue(schema.getFields().stream() + .allMatch(field -> field.getDataType().equals(RecordFieldType.STRING.getDataType()))); + } + } diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestCSVSchemaInference.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestCSVSchemaInference.java index 51d3eb2998..9dc8f29ce4 100644 --- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestCSVSchemaInference.java +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestCSVSchemaInference.java @@ -58,7 +58,7 @@ public class TestCSVSchemaInference { final InputStream bufferedIn = new BufferedInputStream(in)) { final InferSchemaAccessStrategy accessStrategy = new InferSchemaAccessStrategy<>( - (var, content) -> new CSVRecordSource(content, context), + (variables, content) -> new CSVRecordSource(content, context, variables), new CSVSchemaInference(timestampInference), Mockito.mock(ComponentLog.class)); schema = accessStrategy.getSchema(null, bufferedIn, null); } @@ -82,4 +82,51 @@ public class TestCSVSchemaInference { "componentId", "componentType", "componentName", "processGroupId", "processGroupName", "entityId", "entityType", "entitySize", "previousEntitySize", "updatedAttributes", "actorHostname", "contentURI", "previousContentURI", "parentIds", "childIds", "platform", "application", "extra field", "numeric string"), fieldNames); } + + @Test + public void testInferenceIncludesAllRecordsWithEL() throws IOException { + final File file = new File("src/test/resources/csv/prov-events.csv"); + + final Map properties = new HashMap<>(); + new CSVReader().getSupportedPropertyDescriptors().forEach(prop -> properties.put(prop, prop.getDefaultValue())); + properties.put(CSVUtils.TRIM_FIELDS, "true"); + properties.put(CSVUtils.VALUE_SEPARATOR, "${csv.delimiter}"); + properties.put(CSVUtils.QUOTE_CHAR, "${csv.quote}"); + properties.put(CSVUtils.ESCAPE_CHAR, "${csv.escape}"); + final PropertyContext context = new MockConfigurationContext(properties, null); + + final Map attributes = new HashMap<>(); + attributes.put("csv.delimiter", ","); + attributes.put("csv.quote", "\""); + attributes.put("csv.escape", "\\"); + + final RecordSchema schema; + try (final InputStream in = new FileInputStream(file); + final InputStream bufferedIn = new BufferedInputStream(in)) { + + final InferSchemaAccessStrategy accessStrategy = new InferSchemaAccessStrategy<>( + (variables, content) -> new CSVRecordSource(content, context, variables), + new CSVSchemaInference(timestampInference), Mockito.mock(ComponentLog.class)); + schema = accessStrategy.getSchema(attributes, bufferedIn, null); + } + + assertSame(RecordFieldType.STRING, schema.getDataType("eventId").get().getFieldType()); + assertSame(RecordFieldType.INT, schema.getDataType("eventOrdinal").get().getFieldType()); + assertSame(RecordFieldType.STRING, schema.getDataType("eventType").get().getFieldType()); + assertSame(RecordFieldType.LONG, schema.getDataType("timestampMillis").get().getFieldType()); + + assertEquals(RecordFieldType.TIMESTAMP.getDataType("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"), schema.getDataType("timestamp").get()); + assertEquals(RecordFieldType.TIME.getDataType("HH:mm:ss"), schema.getDataType("eventTime").get()); + assertEquals(RecordFieldType.DATE.getDataType("yyyy-MM-dd"), schema.getDataType("eventDate").get()); + assertEquals(RecordFieldType.STRING.getDataType(), schema.getDataType("maybeTime").get()); + assertEquals(RecordFieldType.DATE.getDataType("yyyy-MM-dd"), schema.getDataType("maybeDate").get()); + + assertSame(RecordFieldType.INT, schema.getDataType("parentIds").get().getFieldType()); + assertSame(RecordFieldType.STRING, schema.getDataType("numeric string").get().getFieldType()); + + final List fieldNames = schema.getFieldNames(); + assertEquals(Arrays.asList("eventId", "eventOrdinal", "eventType", "timestampMillis", "timestamp", "eventDate", "eventTime", "maybeTime", "maybeDate", "durationMillis", "lineageStart", + "componentId", "componentType", "componentName", "processGroupId", "processGroupName", "entityId", "entityType", "entitySize", "previousEntitySize", "updatedAttributes", "actorHostname", + "contentURI", "previousContentURI", "parentIds", "childIds", "platform", "application", "extra field", "numeric string"), fieldNames); + } } diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestCSVValidators.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestCSVValidators.java index 5c7c9e278a..e481fa6fcf 100644 --- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestCSVValidators.java +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/csv/TestCSVValidators.java @@ -33,7 +33,6 @@ public class TestCSVValidators { /*** SingleCharValidator **/ @Test public void testSingleCharNullValue() { - CSVValidators.SingleCharacterValidator validator = new CSVValidators.SingleCharacterValidator(); ValidationContext mockContext = Mockito.mock(ValidationContext.class); ValidationResult result = validator.validate("EscapeChar", null, mockContext); @@ -66,6 +65,16 @@ public class TestCSVValidators { assertTrue(result.isValid()); } + @Test + public void testSingleCharExpressionLanguage() { + CSVValidators.SingleCharacterValidator validator = new CSVValidators.SingleCharacterValidator(); + ValidationContext mockContext = Mockito.mock(ValidationContext.class); + Mockito.when(mockContext.isExpressionLanguageSupported(Mockito.any())).thenReturn(true); + Mockito.when(mockContext.isExpressionLanguagePresent(Mockito.any())).thenReturn(true); + ValidationResult result = validator.validate("EscapeChar", "${csv.escape}", mockContext); + assertTrue(result.isValid()); + } + /*** Unescaped SingleCharValidator **/ @@ -95,4 +104,14 @@ public class TestCSVValidators { assertTrue(result.isValid()); } + @Test + public void testUnescapedSingleCharExpressionLanguage() { + Validator validator = CSVValidators.UNESCAPED_SINGLE_CHAR_VALIDATOR; + ValidationContext mockContext = Mockito.mock(ValidationContext.class); + Mockito.when(mockContext.isExpressionLanguageSupported(Mockito.any())).thenReturn(true); + Mockito.when(mockContext.isExpressionLanguagePresent(Mockito.any())).thenReturn(true); + ValidationResult result = validator.validate("Delimiter", "${csv.delimiter}", mockContext); + assertTrue(result.isValid()); + } + } diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/xml/TestXMLRecordSetWriter.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/xml/TestXMLRecordSetWriter.java index becb3c5a91..293f16458b 100644 --- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/xml/TestXMLRecordSetWriter.java +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/xml/TestXMLRecordSetWriter.java @@ -1,247 +1,248 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.nifi.xml; - -import org.apache.avro.Schema; -import org.apache.nifi.avro.AvroTypeUtil; -import org.apache.nifi.components.AllowableValue; -import org.apache.nifi.logging.ComponentLog; -import org.apache.nifi.reporting.InitializationException; -import org.apache.nifi.schema.access.SchemaAccessUtils; -import org.apache.nifi.schema.access.SchemaNotFoundException; -import org.apache.nifi.serialization.RecordSetWriter; -import org.apache.nifi.serialization.record.RecordSchema; -import org.apache.nifi.serialization.record.SchemaIdentifier; -import org.apache.nifi.util.TestRunner; -import org.apache.nifi.util.TestRunners; -import org.junit.Assert; -import org.junit.Test; -import org.xmlunit.diff.DefaultNodeMatcher; -import org.xmlunit.diff.ElementSelectors; -import org.xmlunit.matchers.CompareMatcher; - -import java.io.IOException; -import java.io.OutputStream; -import java.nio.file.Files; -import java.nio.file.Paths; - -import static org.junit.Assert.assertThat; - -public class TestXMLRecordSetWriter { - - private TestRunner setup(XMLRecordSetWriter writer) throws InitializationException, IOException { - TestRunner runner = TestRunners.newTestRunner(TestXMLRecordSetWriterProcessor.class); - - final String outputSchemaText = new String(Files.readAllBytes(Paths.get("src/test/resources/xml/testschema3"))); - - runner.addControllerService("xml_writer", writer); - runner.setProperty(TestXMLRecordSetWriterProcessor.XML_WRITER, "xml_writer"); - - runner.setProperty(writer, SchemaAccessUtils.SCHEMA_ACCESS_STRATEGY, SchemaAccessUtils.SCHEMA_TEXT_PROPERTY); - runner.setProperty(writer, SchemaAccessUtils.SCHEMA_TEXT, outputSchemaText); - runner.setProperty(writer, XMLRecordSetWriter.PRETTY_PRINT_XML, new AllowableValue("true")); - - runner.setProperty(writer, "Schema Write Strategy", "no-schema"); - - return runner; - } - - @Test - public void testDefault() throws IOException, InitializationException { - XMLRecordSetWriter writer = new XMLRecordSetWriter(); - TestRunner runner = setup(writer); - - runner.setProperty(writer, XMLRecordSetWriter.ROOT_TAG_NAME, "root"); - - runner.enableControllerService(writer); - runner.enqueue(""); - runner.run(); - runner.assertQueueEmpty(); - runner.assertAllFlowFilesTransferred(TestXMLRecordSetWriterProcessor.SUCCESS, 1); - - String expected = "13" + - "val1" + - "13" + - "val1"; - String actual = new String(runner.getContentAsByteArray(runner.getFlowFilesForRelationship(TestXMLRecordSetWriterProcessor.SUCCESS).get(0))); - assertThat(expected, CompareMatcher.isSimilarTo(actual).ignoreWhitespace().withNodeMatcher(new DefaultNodeMatcher(ElementSelectors.byNameAndText))); - } - - @Test - public void testDefaultSingleRecord() throws IOException, InitializationException { - XMLRecordSetWriter writer = new XMLRecordSetWriter(); - TestRunner runner = setup(writer); - - runner.setProperty(TestXMLRecordSetWriterProcessor.MULTIPLE_RECORDS, "false"); - - runner.enableControllerService(writer); - runner.enqueue(""); - runner.run(); - runner.assertQueueEmpty(); - runner.assertAllFlowFilesTransferred(TestXMLRecordSetWriterProcessor.SUCCESS, 1); - - String expected = "13" + - "val1"; - - String actual = new String(runner.getContentAsByteArray(runner.getFlowFilesForRelationship(TestXMLRecordSetWriterProcessor.SUCCESS).get(0))); - assertThat(expected, CompareMatcher.isSimilarTo(actual).ignoreWhitespace().withNodeMatcher(new DefaultNodeMatcher(ElementSelectors.byNameAndText))); - } - - @Test - public void testRootAndRecordNaming() throws IOException, InitializationException { - XMLRecordSetWriter writer = new XMLRecordSetWriter(); - TestRunner runner = setup(writer); - - runner.setProperty(writer, XMLRecordSetWriter.ROOT_TAG_NAME, "ROOT_NODE"); - runner.setProperty(writer, XMLRecordSetWriter.RECORD_TAG_NAME, "RECORD_NODE"); - - runner.enableControllerService(writer); - runner.enqueue(""); - runner.run(); - runner.assertQueueEmpty(); - runner.assertAllFlowFilesTransferred(TestXMLRecordSetWriterProcessor.SUCCESS, 1); - - String expected = "13" + - "val1" + - "13" + - "val1"; - String actual = new String(runner.getContentAsByteArray(runner.getFlowFilesForRelationship(TestXMLRecordSetWriterProcessor.SUCCESS).get(0))); - assertThat(expected, CompareMatcher.isSimilarTo(actual).ignoreWhitespace().withNodeMatcher(new DefaultNodeMatcher(ElementSelectors.byNameAndText))); - } - - @Test - public void testSchemaRootRecordNaming() throws IOException, InitializationException { - String avroSchemaText = new String(Files.readAllBytes(Paths.get("src/test/resources/xml/testschema3")));; - Schema avroSchema = new Schema.Parser().parse(avroSchemaText); - - SchemaIdentifier schemaId = SchemaIdentifier.builder().name("schemaName").build(); - RecordSchema recordSchema = AvroTypeUtil.createSchema(avroSchema, avroSchemaText, schemaId); - - XMLRecordSetWriter writer = new _XMLRecordSetWriter(recordSchema); - TestRunner runner = setup(writer); - - runner.setProperty(writer, XMLRecordSetWriter.ROOT_TAG_NAME, "ROOT_NODE"); - - runner.enableControllerService(writer); - runner.enqueue(""); - runner.run(); - runner.assertQueueEmpty(); - runner.assertAllFlowFilesTransferred(TestXMLRecordSetWriterProcessor.SUCCESS, 1); - - String expected = "13" + - "val1" + - "13" + - "val1"; - String actual = new String(runner.getContentAsByteArray(runner.getFlowFilesForRelationship(TestXMLRecordSetWriterProcessor.SUCCESS).get(0))); - assertThat(expected, CompareMatcher.isSimilarTo(actual).ignoreWhitespace().withNodeMatcher(new DefaultNodeMatcher(ElementSelectors.byNameAndText))); - } - - @Test - public void testNullSuppression() throws IOException, InitializationException { - XMLRecordSetWriter writer = new XMLRecordSetWriter(); - TestRunner runner = setup(writer); - - runner.setProperty(writer, XMLRecordSetWriter.ROOT_TAG_NAME, "root"); - runner.setProperty(writer, XMLRecordSetWriter.RECORD_TAG_NAME, "record"); - - runner.setProperty(writer, XMLRecordSetWriter.SUPPRESS_NULLS, XMLRecordSetWriter.ALWAYS_SUPPRESS); - - runner.enableControllerService(writer); - runner.enqueue(""); - runner.run(); - runner.assertQueueEmpty(); - runner.assertAllFlowFilesTransferred(TestXMLRecordSetWriterProcessor.SUCCESS, 1); - - String expected = "13" + - "val1" + - "13" + - "val1"; - String actual = new String(runner.getContentAsByteArray(runner.getFlowFilesForRelationship(TestXMLRecordSetWriterProcessor.SUCCESS).get(0))); - assertThat(expected, CompareMatcher.isSimilarTo(actual).ignoreWhitespace().withNodeMatcher(new DefaultNodeMatcher(ElementSelectors.byNameAndText))); - } - - @Test - public void testArrayWrapping() throws IOException, InitializationException { - XMLRecordSetWriter writer = new XMLRecordSetWriter(); - TestRunner runner = setup(writer); - - runner.setProperty(writer, XMLRecordSetWriter.ROOT_TAG_NAME, "root"); - runner.setProperty(writer, XMLRecordSetWriter.RECORD_TAG_NAME, "record"); - - runner.setProperty(writer, XMLRecordSetWriter.ARRAY_WRAPPING, XMLRecordSetWriter.USE_PROPERTY_AS_WRAPPER); - runner.setProperty(writer, XMLRecordSetWriter.ARRAY_TAG_NAME, "wrap"); - - runner.enableControllerService(writer); - runner.enqueue(""); - runner.run(); - runner.assertQueueEmpty(); - runner.assertAllFlowFilesTransferred(TestXMLRecordSetWriterProcessor.SUCCESS, 1); - - String expected = "13" + - "val1" + - "13" + - "val1"; - String actual = new String(runner.getContentAsByteArray(runner.getFlowFilesForRelationship(TestXMLRecordSetWriterProcessor.SUCCESS).get(0))); - assertThat(expected, CompareMatcher.isSimilarTo(actual).ignoreWhitespace().withNodeMatcher(new DefaultNodeMatcher(ElementSelectors.byNameAndText))); - } - - @Test - public void testValidation() throws IOException, InitializationException { - XMLRecordSetWriter writer = new XMLRecordSetWriter(); - TestRunner runner = setup(writer); - - runner.setProperty(writer, XMLRecordSetWriter.ROOT_TAG_NAME, "root"); - runner.setProperty(writer, XMLRecordSetWriter.RECORD_TAG_NAME, "record"); - - runner.setProperty(writer, XMLRecordSetWriter.ARRAY_WRAPPING, XMLRecordSetWriter.USE_PROPERTY_AS_WRAPPER); - runner.assertNotValid(writer); - - runner.setProperty(writer, XMLRecordSetWriter.ARRAY_TAG_NAME, "array-tag-name"); - runner.assertValid(writer); - - runner.enableControllerService(writer); - runner.enqueue(""); - - String message = "Processor has 1 validation failures:\n" + - "'xml_writer' validated against 'xml_writer' is invalid because Controller Service is not valid: " + - "'array_tag_name' is invalid because if property 'array_wrapping' is defined as 'Use Property as Wrapper' " + - "or 'Use Property for Elements' the property 'Array Tag Name' has to be set.\n"; - - try { - runner.run(); - } catch (AssertionError e) { - Assert.assertEquals(message, e.getMessage()); - } - } - - static class _XMLRecordSetWriter extends XMLRecordSetWriter{ - - RecordSchema recordSchema; - - _XMLRecordSetWriter(RecordSchema recordSchema){ - this.recordSchema = recordSchema; - } - - @Override - public RecordSetWriter createWriter(ComponentLog logger, RecordSchema schema, OutputStream out) - throws SchemaNotFoundException, IOException { - return super.createWriter(logger, this.recordSchema, out); - } - } - -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.xml; + +import org.apache.avro.Schema; +import org.apache.nifi.avro.AvroTypeUtil; +import org.apache.nifi.components.AllowableValue; +import org.apache.nifi.logging.ComponentLog; +import org.apache.nifi.reporting.InitializationException; +import org.apache.nifi.schema.access.SchemaAccessUtils; +import org.apache.nifi.schema.access.SchemaNotFoundException; +import org.apache.nifi.serialization.RecordSetWriter; +import org.apache.nifi.serialization.record.RecordSchema; +import org.apache.nifi.serialization.record.SchemaIdentifier; +import org.apache.nifi.util.TestRunner; +import org.apache.nifi.util.TestRunners; +import org.junit.Assert; +import org.junit.Test; +import org.xmlunit.diff.DefaultNodeMatcher; +import org.xmlunit.diff.ElementSelectors; +import org.xmlunit.matchers.CompareMatcher; + +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.Collections; + +import static org.junit.Assert.assertThat; + +public class TestXMLRecordSetWriter { + + private TestRunner setup(XMLRecordSetWriter writer) throws InitializationException, IOException { + TestRunner runner = TestRunners.newTestRunner(TestXMLRecordSetWriterProcessor.class); + + final String outputSchemaText = new String(Files.readAllBytes(Paths.get("src/test/resources/xml/testschema3"))); + + runner.addControllerService("xml_writer", writer); + runner.setProperty(TestXMLRecordSetWriterProcessor.XML_WRITER, "xml_writer"); + + runner.setProperty(writer, SchemaAccessUtils.SCHEMA_ACCESS_STRATEGY, SchemaAccessUtils.SCHEMA_TEXT_PROPERTY); + runner.setProperty(writer, SchemaAccessUtils.SCHEMA_TEXT, outputSchemaText); + runner.setProperty(writer, XMLRecordSetWriter.PRETTY_PRINT_XML, new AllowableValue("true")); + + runner.setProperty(writer, "Schema Write Strategy", "no-schema"); + + return runner; + } + + @Test + public void testDefault() throws IOException, InitializationException { + XMLRecordSetWriter writer = new XMLRecordSetWriter(); + TestRunner runner = setup(writer); + + runner.setProperty(writer, XMLRecordSetWriter.ROOT_TAG_NAME, "root"); + + runner.enableControllerService(writer); + runner.enqueue(""); + runner.run(); + runner.assertQueueEmpty(); + runner.assertAllFlowFilesTransferred(TestXMLRecordSetWriterProcessor.SUCCESS, 1); + + String expected = "13" + + "val1" + + "13" + + "val1"; + String actual = new String(runner.getContentAsByteArray(runner.getFlowFilesForRelationship(TestXMLRecordSetWriterProcessor.SUCCESS).get(0))); + assertThat(expected, CompareMatcher.isSimilarTo(actual).ignoreWhitespace().withNodeMatcher(new DefaultNodeMatcher(ElementSelectors.byNameAndText))); + } + + @Test + public void testDefaultSingleRecord() throws IOException, InitializationException { + XMLRecordSetWriter writer = new XMLRecordSetWriter(); + TestRunner runner = setup(writer); + + runner.setProperty(TestXMLRecordSetWriterProcessor.MULTIPLE_RECORDS, "false"); + + runner.enableControllerService(writer); + runner.enqueue(""); + runner.run(); + runner.assertQueueEmpty(); + runner.assertAllFlowFilesTransferred(TestXMLRecordSetWriterProcessor.SUCCESS, 1); + + String expected = "13" + + "val1"; + + String actual = new String(runner.getContentAsByteArray(runner.getFlowFilesForRelationship(TestXMLRecordSetWriterProcessor.SUCCESS).get(0))); + assertThat(expected, CompareMatcher.isSimilarTo(actual).ignoreWhitespace().withNodeMatcher(new DefaultNodeMatcher(ElementSelectors.byNameAndText))); + } + + @Test + public void testRootAndRecordNaming() throws IOException, InitializationException { + XMLRecordSetWriter writer = new XMLRecordSetWriter(); + TestRunner runner = setup(writer); + + runner.setProperty(writer, XMLRecordSetWriter.ROOT_TAG_NAME, "ROOT_NODE"); + runner.setProperty(writer, XMLRecordSetWriter.RECORD_TAG_NAME, "RECORD_NODE"); + + runner.enableControllerService(writer); + runner.enqueue(""); + runner.run(); + runner.assertQueueEmpty(); + runner.assertAllFlowFilesTransferred(TestXMLRecordSetWriterProcessor.SUCCESS, 1); + + String expected = "13" + + "val1" + + "13" + + "val1"; + String actual = new String(runner.getContentAsByteArray(runner.getFlowFilesForRelationship(TestXMLRecordSetWriterProcessor.SUCCESS).get(0))); + assertThat(expected, CompareMatcher.isSimilarTo(actual).ignoreWhitespace().withNodeMatcher(new DefaultNodeMatcher(ElementSelectors.byNameAndText))); + } + + @Test + public void testSchemaRootRecordNaming() throws IOException, InitializationException { + String avroSchemaText = new String(Files.readAllBytes(Paths.get("src/test/resources/xml/testschema3")));; + Schema avroSchema = new Schema.Parser().parse(avroSchemaText); + + SchemaIdentifier schemaId = SchemaIdentifier.builder().name("schemaName").build(); + RecordSchema recordSchema = AvroTypeUtil.createSchema(avroSchema, avroSchemaText, schemaId); + + XMLRecordSetWriter writer = new _XMLRecordSetWriter(recordSchema); + TestRunner runner = setup(writer); + + runner.setProperty(writer, XMLRecordSetWriter.ROOT_TAG_NAME, "ROOT_NODE"); + + runner.enableControllerService(writer); + runner.enqueue(""); + runner.run(); + runner.assertQueueEmpty(); + runner.assertAllFlowFilesTransferred(TestXMLRecordSetWriterProcessor.SUCCESS, 1); + + String expected = "13" + + "val1" + + "13" + + "val1"; + String actual = new String(runner.getContentAsByteArray(runner.getFlowFilesForRelationship(TestXMLRecordSetWriterProcessor.SUCCESS).get(0))); + assertThat(expected, CompareMatcher.isSimilarTo(actual).ignoreWhitespace().withNodeMatcher(new DefaultNodeMatcher(ElementSelectors.byNameAndText))); + } + + @Test + public void testNullSuppression() throws IOException, InitializationException { + XMLRecordSetWriter writer = new XMLRecordSetWriter(); + TestRunner runner = setup(writer); + + runner.setProperty(writer, XMLRecordSetWriter.ROOT_TAG_NAME, "root"); + runner.setProperty(writer, XMLRecordSetWriter.RECORD_TAG_NAME, "record"); + + runner.setProperty(writer, XMLRecordSetWriter.SUPPRESS_NULLS, XMLRecordSetWriter.ALWAYS_SUPPRESS); + + runner.enableControllerService(writer); + runner.enqueue(""); + runner.run(); + runner.assertQueueEmpty(); + runner.assertAllFlowFilesTransferred(TestXMLRecordSetWriterProcessor.SUCCESS, 1); + + String expected = "13" + + "val1" + + "13" + + "val1"; + String actual = new String(runner.getContentAsByteArray(runner.getFlowFilesForRelationship(TestXMLRecordSetWriterProcessor.SUCCESS).get(0))); + assertThat(expected, CompareMatcher.isSimilarTo(actual).ignoreWhitespace().withNodeMatcher(new DefaultNodeMatcher(ElementSelectors.byNameAndText))); + } + + @Test + public void testArrayWrapping() throws IOException, InitializationException { + XMLRecordSetWriter writer = new XMLRecordSetWriter(); + TestRunner runner = setup(writer); + + runner.setProperty(writer, XMLRecordSetWriter.ROOT_TAG_NAME, "root"); + runner.setProperty(writer, XMLRecordSetWriter.RECORD_TAG_NAME, "record"); + + runner.setProperty(writer, XMLRecordSetWriter.ARRAY_WRAPPING, XMLRecordSetWriter.USE_PROPERTY_AS_WRAPPER); + runner.setProperty(writer, XMLRecordSetWriter.ARRAY_TAG_NAME, "wrap"); + + runner.enableControllerService(writer); + runner.enqueue(""); + runner.run(); + runner.assertQueueEmpty(); + runner.assertAllFlowFilesTransferred(TestXMLRecordSetWriterProcessor.SUCCESS, 1); + + String expected = "13" + + "val1" + + "13" + + "val1"; + String actual = new String(runner.getContentAsByteArray(runner.getFlowFilesForRelationship(TestXMLRecordSetWriterProcessor.SUCCESS).get(0))); + assertThat(expected, CompareMatcher.isSimilarTo(actual).ignoreWhitespace().withNodeMatcher(new DefaultNodeMatcher(ElementSelectors.byNameAndText))); + } + + @Test + public void testValidation() throws IOException, InitializationException { + XMLRecordSetWriter writer = new XMLRecordSetWriter(); + TestRunner runner = setup(writer); + + runner.setProperty(writer, XMLRecordSetWriter.ROOT_TAG_NAME, "root"); + runner.setProperty(writer, XMLRecordSetWriter.RECORD_TAG_NAME, "record"); + + runner.setProperty(writer, XMLRecordSetWriter.ARRAY_WRAPPING, XMLRecordSetWriter.USE_PROPERTY_AS_WRAPPER); + runner.assertNotValid(writer); + + runner.setProperty(writer, XMLRecordSetWriter.ARRAY_TAG_NAME, "array-tag-name"); + runner.assertValid(writer); + + runner.enableControllerService(writer); + runner.enqueue(""); + + String message = "Processor has 1 validation failures:\n" + + "'xml_writer' validated against 'xml_writer' is invalid because Controller Service is not valid: " + + "'array_tag_name' is invalid because if property 'array_wrapping' is defined as 'Use Property as Wrapper' " + + "or 'Use Property for Elements' the property 'Array Tag Name' has to be set.\n"; + + try { + runner.run(); + } catch (AssertionError e) { + Assert.assertEquals(message, e.getMessage()); + } + } + + static class _XMLRecordSetWriter extends XMLRecordSetWriter{ + + RecordSchema recordSchema; + + _XMLRecordSetWriter(RecordSchema recordSchema){ + this.recordSchema = recordSchema; + } + + @Override + public RecordSetWriter createWriter(ComponentLog logger, RecordSchema schema, OutputStream out) + throws SchemaNotFoundException, IOException { + return super.createWriter(logger, this.recordSchema, out, Collections.emptyMap()); + } + } + +} diff --git a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/xml/TestXMLRecordSetWriterProcessor.java b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/xml/TestXMLRecordSetWriterProcessor.java index 57ee6248a5..e0c4a190b7 100644 --- a/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/xml/TestXMLRecordSetWriterProcessor.java +++ b/nifi-nar-bundles/nifi-standard-services/nifi-record-serialization-services-bundle/nifi-record-serialization-services/src/test/java/org/apache/nifi/xml/TestXMLRecordSetWriterProcessor.java @@ -1,122 +1,123 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.nifi.xml; - -import org.apache.nifi.components.PropertyDescriptor; -import org.apache.nifi.flowfile.FlowFile; -import org.apache.nifi.processor.AbstractProcessor; -import org.apache.nifi.processor.ProcessContext; -import org.apache.nifi.processor.ProcessSession; -import org.apache.nifi.processor.Relationship; -import org.apache.nifi.processor.exception.ProcessException; -import org.apache.nifi.processor.util.StandardValidators; -import org.apache.nifi.serialization.RecordSetWriter; -import org.apache.nifi.serialization.RecordSetWriterFactory; -import org.apache.nifi.serialization.SimpleRecordSchema; -import org.apache.nifi.serialization.record.ListRecordSet; -import org.apache.nifi.serialization.record.MapRecord; -import org.apache.nifi.serialization.record.Record; -import org.apache.nifi.serialization.record.RecordSchema; -import org.apache.nifi.serialization.record.RecordSet; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - -public class TestXMLRecordSetWriterProcessor extends AbstractProcessor { - - static final PropertyDescriptor XML_WRITER = new PropertyDescriptor.Builder() - .name("xml_writer") - .identifiesControllerService(XMLRecordSetWriter.class) - .required(true) - .build(); - - static final PropertyDescriptor MULTIPLE_RECORDS = new PropertyDescriptor.Builder() - .name("multiple_records") - .allowableValues("true", "false") - .defaultValue("true") - .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) - .build(); - - public static final Relationship SUCCESS = new Relationship.Builder().name("success").description("success").build(); - - @Override - public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException { - FlowFile flowFile = session.get(); - - final RecordSetWriterFactory writerFactory = context.getProperty(XML_WRITER).asControllerService(RecordSetWriterFactory.class); - flowFile = session.write(flowFile, out -> { - try { - - final RecordSchema schema = writerFactory.getSchema(null, null); - - boolean multipleRecords = Boolean.parseBoolean(context.getProperty(MULTIPLE_RECORDS).getValue()); - RecordSet recordSet = getRecordSet(multipleRecords); - - final RecordSetWriter writer = writerFactory.createWriter(getLogger(), schema, out); - - - writer.write(recordSet); - writer.flush(); - - - } catch (Exception e) { - throw new ProcessException(e.getMessage()); - } - - }); - session.transfer(flowFile, SUCCESS); - } - - @Override - protected List getSupportedPropertyDescriptors() { - return new ArrayList() {{ add(XML_WRITER); add(MULTIPLE_RECORDS); }}; - } - - @Override - public Set getRelationships() { - return new HashSet() {{ add(SUCCESS); }}; - } - - protected static RecordSet getRecordSet(boolean multipleRecords) { - Object[] arrayVals = {1, null, 3}; - - Map recordFields = new HashMap<>(); - recordFields.put("name1", "val1"); - recordFields.put("name2", null); - recordFields.put("array_field", arrayVals); - - RecordSchema emptySchema = new SimpleRecordSchema(Collections.emptyList()); - - List records = new ArrayList<>(); - records.add(new MapRecord(emptySchema, recordFields)); - - if (multipleRecords) { - records.add(new MapRecord(emptySchema, recordFields)); - } - - return new ListRecordSet(emptySchema, records); - } - - - -} +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.xml; + +import org.apache.nifi.components.PropertyDescriptor; +import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.processor.AbstractProcessor; +import org.apache.nifi.processor.ProcessContext; +import org.apache.nifi.processor.ProcessSession; +import org.apache.nifi.processor.Relationship; +import org.apache.nifi.processor.exception.ProcessException; +import org.apache.nifi.processor.util.StandardValidators; +import org.apache.nifi.serialization.RecordSetWriter; +import org.apache.nifi.serialization.RecordSetWriterFactory; +import org.apache.nifi.serialization.SimpleRecordSchema; +import org.apache.nifi.serialization.record.ListRecordSet; +import org.apache.nifi.serialization.record.MapRecord; +import org.apache.nifi.serialization.record.Record; +import org.apache.nifi.serialization.record.RecordSchema; +import org.apache.nifi.serialization.record.RecordSet; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +public class TestXMLRecordSetWriterProcessor extends AbstractProcessor { + + static final PropertyDescriptor XML_WRITER = new PropertyDescriptor.Builder() + .name("xml_writer") + .identifiesControllerService(XMLRecordSetWriter.class) + .required(true) + .build(); + + static final PropertyDescriptor MULTIPLE_RECORDS = new PropertyDescriptor.Builder() + .name("multiple_records") + .allowableValues("true", "false") + .defaultValue("true") + .addValidator(StandardValidators.NON_EMPTY_VALIDATOR) + .build(); + + public static final Relationship SUCCESS = new Relationship.Builder().name("success").description("success").build(); + + @Override + public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException { + FlowFile flowFile = session.get(); + + final RecordSetWriterFactory writerFactory = context.getProperty(XML_WRITER).asControllerService(RecordSetWriterFactory.class); + final FlowFile flowFileRef = flowFile; + flowFile = session.write(flowFile, out -> { + try { + + final RecordSchema schema = writerFactory.getSchema(null, null); + + boolean multipleRecords = Boolean.parseBoolean(context.getProperty(MULTIPLE_RECORDS).getValue()); + RecordSet recordSet = getRecordSet(multipleRecords); + + final RecordSetWriter writer = writerFactory.createWriter(getLogger(), schema, out, flowFileRef); + + + writer.write(recordSet); + writer.flush(); + + + } catch (Exception e) { + throw new ProcessException(e.getMessage()); + } + + }); + session.transfer(flowFile, SUCCESS); + } + + @Override + protected List getSupportedPropertyDescriptors() { + return new ArrayList() {{ add(XML_WRITER); add(MULTIPLE_RECORDS); }}; + } + + @Override + public Set getRelationships() { + return new HashSet() {{ add(SUCCESS); }}; + } + + protected static RecordSet getRecordSet(boolean multipleRecords) { + Object[] arrayVals = {1, null, 3}; + + Map recordFields = new HashMap<>(); + recordFields.put("name1", "val1"); + recordFields.put("name2", null); + recordFields.put("array_field", arrayVals); + + RecordSchema emptySchema = new SimpleRecordSchema(Collections.emptyList()); + + List records = new ArrayList<>(); + records.add(new MapRecord(emptySchema, recordFields)); + + if (multipleRecords) { + records.add(new MapRecord(emptySchema, recordFields)); + } + + return new ListRecordSet(emptySchema, records); + } + + + +}