From 964f56043c7840c11c944fd320dc253d4acd37da Mon Sep 17 00:00:00 2001 From: Mark Payne Date: Thu, 3 Aug 2023 17:25:54 -0400 Subject: [PATCH] NIFI-11910: Clarified some points in docs This closes #7574 Signed-off-by: Chris Sampson --- .../processors/aws/AbstractAWSProcessor.java | 2 + .../aws/s3/AbstractS3Processor.java | 22 +++--- .../processors/aws/s3/DeleteS3Object.java | 14 ++-- .../apache/nifi/processors/aws/s3/ListS3.java | 6 +- ...AbstractAzureDataLakeStorageProcessor.java | 13 +-- .../storage/PutAzureDataLakeStorage.java | 2 +- .../compress/ModifyCompression.java | 5 +- .../processors/gcp/bigquery/PutBigQuery.java | 19 +++-- .../gcp/drive/FetchGoogleDrive.java | 26 +++--- .../processors/gcp/drive/ListGoogleDrive.java | 55 ++++++------- .../processors/gcp/drive/PutGoogleDrive.java | 79 ++++++++++--------- .../processors/gcp/storage/ListGCSBucket.java | 2 +- .../processors/gcp/storage/PutGCSObject.java | 8 +- .../processors/standard/CompressContent.java | 6 +- .../processors/standard/ConvertJSONToSQL.java | 44 +++++------ .../processors/standard/EncodeContent.java | 24 +++--- .../nifi/processors/standard/ExtractText.java | 43 +++++----- .../processors/standard/PartitionRecord.java | 11 ++- .../processors/standard/UnpackContent.java | 42 +++++----- 19 files changed, 214 insertions(+), 209 deletions(-) diff --git a/nifi-nar-bundles/nifi-aws-bundle/nifi-aws-abstract-processors/src/main/java/org/apache/nifi/processors/aws/AbstractAWSProcessor.java b/nifi-nar-bundles/nifi-aws-bundle/nifi-aws-abstract-processors/src/main/java/org/apache/nifi/processors/aws/AbstractAWSProcessor.java index 50310b93c6..fb88c07251 100644 --- a/nifi-nar-bundles/nifi-aws-bundle/nifi-aws-abstract-processors/src/main/java/org/apache/nifi/processors/aws/AbstractAWSProcessor.java +++ b/nifi-nar-bundles/nifi-aws-bundle/nifi-aws-abstract-processors/src/main/java/org/apache/nifi/processors/aws/AbstractAWSProcessor.java @@ -124,6 +124,7 @@ public abstract class AbstractAWSProcessor RELATIONSHIPS = Collections.unmodifiableSet(new HashSet<>(Arrays.asList( diff --git a/nifi-nar-bundles/nifi-azure-bundle/nifi-azure-processors/src/main/java/org/apache/nifi/processors/azure/storage/PutAzureDataLakeStorage.java b/nifi-nar-bundles/nifi-azure-bundle/nifi-azure-processors/src/main/java/org/apache/nifi/processors/azure/storage/PutAzureDataLakeStorage.java index ab6014abfd..cfd660c289 100644 --- a/nifi-nar-bundles/nifi-azure-bundle/nifi-azure-processors/src/main/java/org/apache/nifi/processors/azure/storage/PutAzureDataLakeStorage.java +++ b/nifi-nar-bundles/nifi-azure-bundle/nifi-azure-processors/src/main/java/org/apache/nifi/processors/azure/storage/PutAzureDataLakeStorage.java @@ -69,7 +69,7 @@ import static org.apache.nifi.processors.transfer.ResourceTransferUtils.getFileR @Tags({"azure", "microsoft", "cloud", "storage", "adlsgen2", "datalake"}) @SeeAlso({DeleteAzureDataLakeStorage.class, FetchAzureDataLakeStorage.class, ListAzureDataLakeStorage.class}) -@CapabilityDescription("Puts content into an Azure Data Lake Storage Gen 2") +@CapabilityDescription("Writes the contents of a FlowFile as a file on Azure Data Lake Storage Gen 2") @WritesAttributes({@WritesAttribute(attribute = ATTR_NAME_FILESYSTEM, description = ATTR_DESCRIPTION_FILESYSTEM), @WritesAttribute(attribute = ATTR_NAME_DIRECTORY, description = ATTR_DESCRIPTION_DIRECTORY), @WritesAttribute(attribute = ATTR_NAME_FILENAME, description = ATTR_DESCRIPTION_FILENAME), diff --git a/nifi-nar-bundles/nifi-compress-bundle/nifi-compress-processors/src/main/java/org/apache/nifi/processors/compress/ModifyCompression.java b/nifi-nar-bundles/nifi-compress-bundle/nifi-compress-processors/src/main/java/org/apache/nifi/processors/compress/ModifyCompression.java index 9467bfe6d7..f4a3653e03 100644 --- a/nifi-nar-bundles/nifi-compress-bundle/nifi-compress-processors/src/main/java/org/apache/nifi/processors/compress/ModifyCompression.java +++ b/nifi-nar-bundles/nifi-compress-bundle/nifi-compress-processors/src/main/java/org/apache/nifi/processors/compress/ModifyCompression.java @@ -85,8 +85,9 @@ import java.util.zip.InflaterInputStream; @SupportsBatching @InputRequirement(Requirement.INPUT_REQUIRED) @Tags({"content", "compress", "recompress", "gzip", "bzip2", "lzma", "xz-lzma2", "snappy", "snappy-hadoop", "snappy framed", "lz4-framed", "deflate", "zstd", "brotli"}) -@CapabilityDescription("Decompresses the contents of FlowFiles using a user-specified compression algorithm and recompresses the contents using the specified compression format properties. " - + "This processor operates in a very memory efficient way so very large objects well beyond the heap size are generally fine to process") +@CapabilityDescription("Changes the compression algorithm used to compress the contents of a FlowFile by decompressing the contents of FlowFiles using a user-specified compression algorithm and " + + "recompressing the contents using the specified compression format properties. This processor operates in a very memory efficient way so very large objects well beyond " + + "the heap size are generally fine to process") @ReadsAttribute(attribute = "mime.type", description = "If the Decompression Format is set to 'use mime.type attribute', this attribute is used to " + "determine the decompression type. Otherwise, this attribute is ignored.") @WritesAttribute(attribute = "mime.type", description = "The appropriate MIME Type is set based on the value of the Compression Format property. If the Compression Format is 'no compression' this " diff --git a/nifi-nar-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/bigquery/PutBigQuery.java b/nifi-nar-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/bigquery/PutBigQuery.java index 0b17f0d939..1b416c2c79 100644 --- a/nifi-nar-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/bigquery/PutBigQuery.java +++ b/nifi-nar-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/bigquery/PutBigQuery.java @@ -17,18 +17,15 @@ package org.apache.nifi.processors.gcp.bigquery; -import static java.util.stream.Collectors.collectingAndThen; -import static java.util.stream.Collectors.toList; - import com.google.api.core.ApiFuture; import com.google.api.core.ApiFutureCallback; import com.google.api.core.ApiFutures; import com.google.api.gax.core.FixedCredentialsProvider; import com.google.auth.oauth2.GoogleCredentials; import com.google.cloud.bigquery.storage.v1.AppendRowsResponse; +import com.google.cloud.bigquery.storage.v1.BQTableSchemaToProtoDescriptor; import com.google.cloud.bigquery.storage.v1.BatchCommitWriteStreamsRequest; import com.google.cloud.bigquery.storage.v1.BatchCommitWriteStreamsResponse; -import com.google.cloud.bigquery.storage.v1.BQTableSchemaToProtoDescriptor; import com.google.cloud.bigquery.storage.v1.BigQueryWriteClient; import com.google.cloud.bigquery.storage.v1.BigQueryWriteSettings; import com.google.cloud.bigquery.storage.v1.CivilTimeEncoder; @@ -46,8 +43,6 @@ import com.google.cloud.bigquery.storage.v1.stub.BigQueryWriteStubSettings; import com.google.protobuf.Descriptors; import com.google.protobuf.DynamicMessage; import io.grpc.Status; -import java.time.LocalTime; -import java.util.stream.Stream; import org.apache.nifi.annotation.behavior.InputRequirement; import org.apache.nifi.annotation.behavior.TriggerSerially; import org.apache.nifi.annotation.behavior.WritesAttribute; @@ -75,6 +70,7 @@ import java.io.InputStream; import java.sql.Date; import java.sql.Time; import java.sql.Timestamp; +import java.time.LocalTime; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -85,13 +81,16 @@ import java.util.Optional; import java.util.concurrent.Phaser; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; +import java.util.stream.Stream; + +import static java.util.stream.Collectors.collectingAndThen; +import static java.util.stream.Collectors.toList; @TriggerSerially @Tags({"google", "google cloud", "bq", "bigquery"}) -@CapabilityDescription("Unified processor for batch and stream flow files content to a Google BigQuery table via the Storage Write API." + - "The processor is record based so the used schema is driven by the RecordReader. Attributes that are not matched to the target schema" + - "are skipped. Exactly once delivery semantics are achieved via stream offsets. The Storage Write API is more efficient than the older " + - "insertAll method because it uses gRPC streaming rather than REST over HTTP") +@CapabilityDescription("Writes the contents of a FlowFile to a Google BigQuery table. " + + "The processor is record based so the schema that is used is driven by the RecordReader. Attributes that are not matched to the target schema " + + "are skipped. Exactly once delivery semantics are achieved via stream offsets.") @InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED) @WritesAttributes({ @WritesAttribute(attribute = BigQueryAttributes.JOB_NB_RECORDS_ATTR, description = BigQueryAttributes.JOB_NB_RECORDS_DESC) diff --git a/nifi-nar-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/FetchGoogleDrive.java b/nifi-nar-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/FetchGoogleDrive.java index 2aca33f02f..bbdb555042 100644 --- a/nifi-nar-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/FetchGoogleDrive.java +++ b/nifi-nar-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/FetchGoogleDrive.java @@ -140,11 +140,10 @@ public class FetchGoogleDrive extends AbstractProcessor implements GoogleDriveTr } - public static final PropertyDescriptor FILE_ID = new PropertyDescriptor - .Builder().name("drive-file-id") + public static final PropertyDescriptor FILE_ID = new PropertyDescriptor.Builder() + .name("drive-file-id") .displayName("File ID") - .description("The Drive ID of the File to fetch. " - + "Please see Additional Details to obtain Drive ID.") + .description("The Drive ID of the File to fetch. Please see Additional Details for information on how to obtain the Drive ID.") .required(true) .defaultValue("${drive.id}") .expressionLanguageSupported(ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) @@ -198,16 +197,15 @@ public class FetchGoogleDrive extends AbstractProcessor implements GoogleDriveTr - public static final Relationship REL_SUCCESS = - new Relationship.Builder() - .name("success") - .description("A FlowFile will be routed here for each successfully fetched File.") - .build(); + public static final Relationship REL_SUCCESS = new Relationship.Builder() + .name("success") + .description("A FlowFile will be routed here for each successfully fetched File.") + .build(); - public static final Relationship REL_FAILURE = - new Relationship.Builder().name("failure") - .description("A FlowFile will be routed here for each File for which fetch was attempted but failed.") - .build(); + public static final Relationship REL_FAILURE = new Relationship.Builder() + .name("failure") + .description("A FlowFile will be routed here for each File for which fetch was attempted but failed.") + .build(); private static final List PROPERTIES = Collections.unmodifiableList(Arrays.asList( GoogleUtils.GCP_CREDENTIALS_PROVIDER_SERVICE, @@ -349,7 +347,7 @@ public class FetchGoogleDrive extends AbstractProcessor implements GoogleDriveTr private void handleErrorResponse(final ProcessSession session, final String fileId, FlowFile flowFile, final GoogleJsonResponseException e) { getLogger().error("Fetching File [{}] failed", fileId, e); - flowFile = session.putAttribute(flowFile, ERROR_CODE, "" + e.getStatusCode()); + flowFile = session.putAttribute(flowFile, ERROR_CODE, String.valueOf(e.getStatusCode())); flowFile = session.putAttribute(flowFile, ERROR_MESSAGE, e.getMessage()); flowFile = session.penalize(flowFile); diff --git a/nifi-nar-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/ListGoogleDrive.java b/nifi-nar-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/ListGoogleDrive.java index b7855d1102..2cf710bf39 100644 --- a/nifi-nar-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/ListGoogleDrive.java +++ b/nifi-nar-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/ListGoogleDrive.java @@ -16,36 +16,12 @@ */ package org.apache.nifi.processors.gcp.drive; -import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.FILENAME_DESC; -import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.ID; -import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.ID_DESC; -import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.MIME_TYPE_DESC; -import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.SIZE; -import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.SIZE_DESC; -import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.TIMESTAMP; -import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.TIMESTAMP_DESC; - import com.google.api.client.http.HttpTransport; import com.google.api.client.util.DateTime; import com.google.api.services.drive.Drive; import com.google.api.services.drive.DriveScopes; import com.google.api.services.drive.model.File; import com.google.api.services.drive.model.FileList; -import java.io.IOException; -import java.time.Instant; -import java.time.OffsetDateTime; -import java.time.ZoneOffset; -import java.time.format.DateTimeFormatter; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.LinkedList; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.concurrent.TimeUnit; import org.apache.nifi.annotation.behavior.InputRequirement; import org.apache.nifi.annotation.behavior.InputRequirement.Requirement; import org.apache.nifi.annotation.behavior.PrimaryNodeOnly; @@ -74,12 +50,37 @@ import org.apache.nifi.proxy.ProxyConfiguration; import org.apache.nifi.scheduling.SchedulingStrategy; import org.apache.nifi.serialization.record.RecordSchema; +import java.io.IOException; +import java.time.Instant; +import java.time.OffsetDateTime; +import java.time.ZoneOffset; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.concurrent.TimeUnit; + +import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.FILENAME_DESC; +import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.ID; +import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.ID_DESC; +import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.MIME_TYPE_DESC; +import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.SIZE; +import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.SIZE_DESC; +import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.TIMESTAMP; +import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.TIMESTAMP_DESC; + @PrimaryNodeOnly @TriggerSerially @Tags({"google", "drive", "storage"}) -@CapabilityDescription("Lists concrete files (shortcuts are ignored) in a Google Drive folder. " + - "Each listed file may result in one FlowFile, the metadata being written as FlowFile attributes. " + - "Or - in case the 'Record Writer' property is set - the entire result is written as records to a single FlowFile. " + +@CapabilityDescription("Performs a listing of concrete files (shortcuts are ignored) in a Google Drive folder. " + + "If the 'Record Writer' property is set, a single Output FlowFile is created, and each file in the listing is written as a single record to the output file. " + + "Otherwise, for each file in the listing, an individual FlowFile is created, the metadata being written as FlowFile attributes. " + "This Processor is designed to run on Primary Node only in a cluster. If the primary node changes, the new Primary Node will pick up where the " + "previous node left off without duplicating all of the data. " + "Please see Additional Details to set up access to Google Drive.") diff --git a/nifi-nar-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/PutGoogleDrive.java b/nifi-nar-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/PutGoogleDrive.java index f0b8683c6e..632ff541e5 100644 --- a/nifi-nar-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/PutGoogleDrive.java +++ b/nifi-nar-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/drive/PutGoogleDrive.java @@ -16,30 +16,6 @@ */ package org.apache.nifi.processors.gcp.drive; -import static java.lang.String.format; -import static java.lang.String.valueOf; -import static java.nio.charset.StandardCharsets.UTF_8; -import static java.util.Arrays.asList; -import static java.util.Collections.singletonList; -import static java.util.stream.Collectors.joining; -import static org.apache.nifi.processor.util.StandardValidators.DATA_SIZE_VALIDATOR; -import static org.apache.nifi.processors.conflict.resolution.ConflictResolutionStrategy.FAIL; -import static org.apache.nifi.processors.conflict.resolution.ConflictResolutionStrategy.IGNORE; -import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.ERROR_CODE; -import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.ERROR_CODE_DESC; -import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.ERROR_MESSAGE; -import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.ERROR_MESSAGE_DESC; -import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.FILENAME; -import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.FILENAME_DESC; -import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.ID; -import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.ID_DESC; -import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.MIME_TYPE_DESC; -import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.SIZE; -import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.SIZE_DESC; -import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.TIMESTAMP; -import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.TIMESTAMP_DESC; -import static org.apache.nifi.processors.gcp.util.GoogleUtils.GCP_CREDENTIALS_PROVIDER_SERVICE; - import com.google.api.client.googleapis.json.GoogleJsonResponseException; import com.google.api.client.googleapis.media.MediaHttpUploader; import com.google.api.client.http.GenericUrl; @@ -53,20 +29,6 @@ import com.google.api.services.drive.DriveRequest; import com.google.api.services.drive.DriveScopes; import com.google.api.services.drive.model.File; import com.google.api.services.drive.model.FileList; -import java.io.BufferedInputStream; -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.Set; -import java.util.concurrent.TimeUnit; import org.apache.nifi.annotation.behavior.InputRequirement; import org.apache.nifi.annotation.behavior.InputRequirement.Requirement; import org.apache.nifi.annotation.behavior.ReadsAttribute; @@ -95,10 +57,49 @@ import org.apache.nifi.processors.gcp.ProxyAwareTransportFactory; import org.apache.nifi.proxy.ProxyConfiguration; import org.json.JSONObject; +import java.io.BufferedInputStream; +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.concurrent.TimeUnit; + +import static java.lang.String.format; +import static java.lang.String.valueOf; +import static java.nio.charset.StandardCharsets.UTF_8; +import static java.util.Arrays.asList; +import static java.util.Collections.singletonList; +import static java.util.stream.Collectors.joining; +import static org.apache.nifi.processor.util.StandardValidators.DATA_SIZE_VALIDATOR; +import static org.apache.nifi.processors.conflict.resolution.ConflictResolutionStrategy.FAIL; +import static org.apache.nifi.processors.conflict.resolution.ConflictResolutionStrategy.IGNORE; +import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.ERROR_CODE; +import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.ERROR_CODE_DESC; +import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.ERROR_MESSAGE; +import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.ERROR_MESSAGE_DESC; +import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.FILENAME; +import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.FILENAME_DESC; +import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.ID; +import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.ID_DESC; +import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.MIME_TYPE_DESC; +import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.SIZE; +import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.SIZE_DESC; +import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.TIMESTAMP; +import static org.apache.nifi.processors.gcp.drive.GoogleDriveAttributes.TIMESTAMP_DESC; +import static org.apache.nifi.processors.gcp.util.GoogleUtils.GCP_CREDENTIALS_PROVIDER_SERVICE; + @SeeAlso({ListGoogleDrive.class, FetchGoogleDrive.class}) @InputRequirement(Requirement.INPUT_REQUIRED) @Tags({"google", "drive", "storage", "put"}) -@CapabilityDescription("Puts content to a Google Drive Folder.") +@CapabilityDescription("Writes the contents of a FlowFile as a file in Google Drive.") @ReadsAttribute(attribute = "filename", description = "Uses the FlowFile's filename as the filename for the Google Drive object.") @WritesAttributes({ @WritesAttribute(attribute = ID, description = ID_DESC), diff --git a/nifi-nar-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/storage/ListGCSBucket.java b/nifi-nar-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/storage/ListGCSBucket.java index 1b156aac71..417af91388 100644 --- a/nifi-nar-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/storage/ListGCSBucket.java +++ b/nifi-nar-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/storage/ListGCSBucket.java @@ -132,7 +132,7 @@ import static org.apache.nifi.processors.gcp.storage.StorageAttributes.URI_DESC; @TriggerWhenEmpty @InputRequirement(Requirement.INPUT_FORBIDDEN) @Tags({"google cloud", "google", "storage", "gcs", "list"}) -@CapabilityDescription("Retrieves a listing of objects from an GCS bucket. For each object that is listed, creates a FlowFile that represents " +@CapabilityDescription("Retrieves a listing of objects from a GCS bucket. For each object that is listed, creates a FlowFile that represents " + "the object so that it can be fetched in conjunction with FetchGCSObject. This Processor is designed to run on Primary Node only " + "in a cluster. If the primary node changes, the new Primary Node will pick up where the previous node left off without duplicating " + "all of the data.") diff --git a/nifi-nar-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/storage/PutGCSObject.java b/nifi-nar-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/storage/PutGCSObject.java index ef33008b0c..70cc3332fc 100644 --- a/nifi-nar-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/storage/PutGCSObject.java +++ b/nifi-nar-bundles/nifi-gcp-bundle/nifi-gcp-processors/src/main/java/org/apache/nifi/processors/gcp/storage/PutGCSObject.java @@ -107,17 +107,15 @@ import static org.apache.nifi.processors.gcp.storage.StorageAttributes.URI_DESC; @InputRequirement(InputRequirement.Requirement.INPUT_REQUIRED) @Tags({"google", "google cloud", "gcs", "archive", "put"}) -@CapabilityDescription("Puts flow files to a Google Cloud Bucket.") +@CapabilityDescription("Writes the contents of a FlowFile as an object in a Google Cloud Storage.") @SeeAlso({FetchGCSObject.class, DeleteGCSObject.class, ListGCSBucket.class}) @DynamicProperty(name = "The name of a User-Defined Metadata field to add to the GCS Object", value = "The value of a User-Defined Metadata field to add to the GCS Object", description = "Allows user-defined metadata to be added to the GCS object as key/value pairs", expressionLanguageScope = ExpressionLanguageScope.FLOWFILE_ATTRIBUTES) @ReadsAttributes({ - @ReadsAttribute(attribute = "filename", description = "Uses the FlowFile's filename as the filename for the " + - "GCS object"), - @ReadsAttribute(attribute = "mime.type", description = "Uses the FlowFile's MIME type as the content-type for " + - "the GCS object") + @ReadsAttribute(attribute = "filename", description = "Uses the FlowFile's filename as the filename for the GCS object"), + @ReadsAttribute(attribute = "mime.type", description = "Uses the FlowFile's MIME type as the content-type for the GCS object") }) @WritesAttributes({ @WritesAttribute(attribute = BUCKET_ATTR, description = BUCKET_DESC), diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/CompressContent.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/CompressContent.java index 5e1e202cc4..4868d4ca74 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/CompressContent.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/CompressContent.java @@ -88,8 +88,10 @@ import java.util.zip.InflaterInputStream; @InputRequirement(Requirement.INPUT_REQUIRED) @Tags({"content", "compress", "decompress", "gzip", "bzip2", "lzma", "xz-lzma2", "snappy", "snappy-hadoop", "snappy framed", "lz4-framed", "deflate", "zstd", "brotli"}) @CapabilityDescription("Compresses or decompresses the contents of FlowFiles using a user-specified compression algorithm and updates the mime.type " - + "attribute as appropriate. This processor operates in a very memory efficient way so very large objects well beyond the heap size " - + "are generally fine to process") + + "attribute as appropriate. A common idiom is to precede CompressContent with IdentifyMimeType and configure Mode='decompress' AND Compression Format='use mime.type attribute'. " + + "When used in this manner, the MIME type is automatically detected and the data is decompressed, if necessary. " + + "If decompression is unnecessary, the data is passed through to the 'success' relationship." + + " This processor operates in a very memory efficient way so very large objects well beyond the heap size are generally fine to process.") @ReadsAttribute(attribute = "mime.type", description = "If the Compression Format is set to use mime.type attribute, this attribute is used to " + "determine the compression type. Otherwise, this attribute is ignored.") @WritesAttribute(attribute = "mime.type", description = "If the Mode property is set to compress, the appropriate MIME Type is set. If the Mode " diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ConvertJSONToSQL.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ConvertJSONToSQL.java index f07fcbfe9f..7af19faf00 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ConvertJSONToSQL.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ConvertJSONToSQL.java @@ -16,27 +16,6 @@ */ package org.apache.nifi.processors.standard; -import java.io.BufferedInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.nio.charset.StandardCharsets; -import java.sql.Connection; -import java.sql.DatabaseMetaData; -import java.sql.ResultSet; -import java.sql.ResultSetMetaData; -import java.sql.SQLException; -import java.sql.Types; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.UUID; -import java.util.concurrent.atomic.AtomicReference; - import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ArrayNode; @@ -68,6 +47,27 @@ import org.apache.nifi.processor.io.InputStreamCallback; import org.apache.nifi.processor.io.OutputStreamCallback; import org.apache.nifi.processor.util.StandardValidators; +import java.io.BufferedInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.charset.StandardCharsets; +import java.sql.Connection; +import java.sql.DatabaseMetaData; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.Types; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.atomic.AtomicReference; + import static org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_COUNT; import static org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_ID; import static org.apache.nifi.flowfile.attributes.FragmentAttributes.FRAGMENT_INDEX; @@ -75,7 +75,7 @@ import static org.apache.nifi.flowfile.attributes.FragmentAttributes.copyAttribu @SideEffectFree @SupportsBatching -@SeeAlso(PutSQL.class) +@SeeAlso({PutSQL.class, PutDatabaseRecord.class}) @InputRequirement(Requirement.INPUT_REQUIRED) @Tags({"json", "sql", "database", "rdbms", "insert", "update", "delete", "relational", "flat"}) @CapabilityDescription("Converts a JSON-formatted FlowFile into an UPDATE, INSERT, or DELETE SQL statement. The incoming FlowFile is expected to be " diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/EncodeContent.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/EncodeContent.java index 869605b7d2..54874a2de2 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/EncodeContent.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/EncodeContent.java @@ -16,16 +16,6 @@ */ package org.apache.nifi.processors.standard; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.util.Arrays; -import java.util.Collections; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Set; -import java.util.concurrent.TimeUnit; - import org.apache.commons.codec.DecoderException; import org.apache.commons.codec.binary.Base32InputStream; import org.apache.commons.codec.binary.Base32OutputStream; @@ -50,11 +40,21 @@ import org.apache.nifi.processors.standard.util.ValidatingBase64InputStream; import org.apache.nifi.stream.io.StreamUtils; import org.apache.nifi.util.StopWatch; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.Arrays; +import java.util.Collections; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.TimeUnit; + @SideEffectFree @SupportsBatching @InputRequirement(Requirement.INPUT_REQUIRED) -@Tags({"encode", "decode", "base64", "hex"}) -@CapabilityDescription("Encode or decode contents using configurable encoding schemes") +@Tags({"encode", "decode", "base64", "base32", "hex"}) +@CapabilityDescription("Encode or decode the contents of a FlowFile using Base64, Base32, or hex encoding schemes") public class EncodeContent extends AbstractProcessor { public static final String ENCODE_MODE = "Encode"; diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ExtractText.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ExtractText.java index 8e2dafd258..c006e934ff 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ExtractText.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/ExtractText.java @@ -16,25 +16,6 @@ */ package org.apache.nifi.processors.standard; -import static io.krakens.grok.api.GrokUtils.getNameGroups; - -import java.io.IOException; -import java.io.InputStream; -import java.nio.charset.Charset; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.LinkedBlockingQueue; -import java.util.concurrent.atomic.AtomicReference; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - import org.apache.nifi.annotation.behavior.DynamicProperty; import org.apache.nifi.annotation.behavior.InputRequirement; import org.apache.nifi.annotation.behavior.InputRequirement.Requirement; @@ -61,6 +42,25 @@ import org.apache.nifi.processor.io.InputStreamCallback; import org.apache.nifi.processor.util.StandardValidators; import org.apache.nifi.stream.io.StreamUtils; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.atomic.AtomicReference; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import static io.krakens.grok.api.GrokUtils.getNameGroups; + @SideEffectFree @SupportsBatching @InputRequirement(Requirement.INPUT_REQUIRED) @@ -111,7 +111,8 @@ public class ExtractText extends AbstractProcessor { public static final PropertyDescriptor MAX_BUFFER_SIZE = new PropertyDescriptor.Builder() .name("Maximum Buffer Size") - .description("Specifies the maximum amount of data to buffer (per file) in order to apply the regular expressions. Files larger than the specified maximum will not be fully evaluated.") + .description("Specifies the maximum amount of data to buffer (per FlowFile) in order to apply the regular expressions. " + + "FlowFiles larger than the specified maximum will not be fully evaluated.") .required(true) .addValidator(StandardValidators.DATA_SIZE_VALIDATOR) .addValidator(StandardValidators.createDataSizeBoundsValidator(0, Integer.MAX_VALUE)) @@ -120,7 +121,7 @@ public class ExtractText extends AbstractProcessor { public static final PropertyDescriptor MAX_CAPTURE_GROUP_LENGTH = new PropertyDescriptor.Builder() .name("Maximum Capture Group Length") - .description("Specifies the maximum number of characters a given capture group value can have. Any characters beyond the max will be truncated.") + .description("Specifies the maximum number of characters a given capture group value can have. Any characters beyond the max will be truncated.") .required(false) .defaultValue("1024") .addValidator(StandardValidators.POSITIVE_INTEGER_VALIDATOR) diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/PartitionRecord.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/PartitionRecord.java index 6fe54e7d48..ba6b9f8af4 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/PartitionRecord.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/PartitionRecord.java @@ -69,12 +69,11 @@ import java.util.stream.Stream; @SupportsBatching @InputRequirement(Requirement.INPUT_REQUIRED) -@CapabilityDescription("Receives Record-oriented data (i.e., data that can be read by the configured Record Reader) and evaluates one or more RecordPaths against the " - + "each record in the incoming FlowFile. Each record is then grouped with other \"like records\" and a FlowFile is created for each group of \"like records.\" What it means for " - + "two records to be \"like records\" is determined by user-defined properties. The user is required to enter at least one user-defined property whose value is a RecordPath. Two " - + "records are considered alike if they have the same value for all configured RecordPaths. Because we know that all records in a given output FlowFile have the same value for the " - + "fields that are specified by the RecordPath, an attribute is added for each field. See Additional Details on the Usage page for more information and examples.") -@DynamicProperty(name="The name given to the dynamic property is the name of the attribute that will be used to denote the value of the associted RecordPath.", +@CapabilityDescription("Splits, or partitions, record-oriented data based on the configured fields in the data. One or more properties must be added. The name of the property is the name " + + "of an attribute to add. The value of the property is a RecordPath to evaluate against each Record. Two records will go to the same outbound FlowFile only if they have the same value for each " + + "of the given RecordPaths. Because we know that all records in a given output FlowFile have the same value for the fields that are specified by the RecordPath, an attribute is added for each " + + "field. See Additional Details on the Usage page for more information and examples.") +@DynamicProperty(name="The name given to the dynamic property is the name of the attribute that will be used to denote the value of the associated RecordPath.", value="A RecordPath that points to a field in the Record.", description="Each dynamic property represents a RecordPath that will be evaluated against each record in an incoming FlowFile. When the value of the RecordPath is determined " + "for a Record, an attribute is added to the outgoing FlowFile. The name of the attribute is the same as the name of this property. The value of the attribute is the same as " diff --git a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/UnpackContent.java b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/UnpackContent.java index 6127308bf8..ede27cc904 100644 --- a/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/UnpackContent.java +++ b/nifi-nar-bundles/nifi-standard-bundle/nifi-standard-processors/src/main/java/org/apache/nifi/processors/standard/UnpackContent.java @@ -16,26 +16,6 @@ */ package org.apache.nifi.processors.standard; -import java.io.BufferedInputStream; -import java.io.BufferedOutputStream; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.nio.file.Path; -import java.time.ZoneId; -import java.time.format.DateTimeFormatter; -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.UUID; -import java.util.concurrent.atomic.AtomicReference; -import java.util.regex.Pattern; - import net.lingala.zip4j.io.inputstream.ZipInputStream; import net.lingala.zip4j.model.LocalFileHeader; import net.lingala.zip4j.model.enums.EncryptionMethod; @@ -78,12 +58,32 @@ import org.apache.nifi.util.FlowFileUnpackagerV1; import org.apache.nifi.util.FlowFileUnpackagerV2; import org.apache.nifi.util.FlowFileUnpackagerV3; +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.file.Path; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import java.util.concurrent.atomic.AtomicReference; +import java.util.regex.Pattern; + @SideEffectFree @SupportsBatching @InputRequirement(Requirement.INPUT_REQUIRED) @Tags({"Unpack", "un-merge", "tar", "zip", "archive", "flowfile-stream", "flowfile-stream-v3"}) @CapabilityDescription("Unpacks the content of FlowFiles that have been packaged with one of several different Packaging Formats, emitting one to many " - + "FlowFiles for each input FlowFile") + + "FlowFiles for each input FlowFile. Supported formats are TAR, ZIP, and FlowFile Stream packages.") @ReadsAttribute(attribute = "mime.type", description = "If the property is set to use mime.type attribute, this attribute is used " + "to determine the FlowFile's MIME Type. In this case, if the attribute is set to application/tar, the TAR Packaging Format will be used. If " + "the attribute is set to application/zip, the ZIP Packaging Format will be used. If the attribute is set to application/flowfile-v3 or "