mirror of https://github.com/apache/nifi.git
NIFI-11817: Improve ListHDFS extensibility
Fix review items This closes #7484 Signed-off-by: Bence Simon <bsimon@apache.org>
This commit is contained in:
parent
bdd2a2b24b
commit
a3fe9d121d
|
@ -46,9 +46,7 @@ import org.apache.nifi.processor.util.StandardValidators;
|
||||||
import org.apache.nifi.processors.hadoop.util.FileStatusIterable;
|
import org.apache.nifi.processors.hadoop.util.FileStatusIterable;
|
||||||
import org.apache.nifi.processors.hadoop.util.FileStatusManager;
|
import org.apache.nifi.processors.hadoop.util.FileStatusManager;
|
||||||
import org.apache.nifi.processors.hadoop.util.FilterMode;
|
import org.apache.nifi.processors.hadoop.util.FilterMode;
|
||||||
import org.apache.nifi.processors.hadoop.util.writer.FlowFileObjectWriter;
|
|
||||||
import org.apache.nifi.processors.hadoop.util.writer.HadoopFileStatusWriter;
|
import org.apache.nifi.processors.hadoop.util.writer.HadoopFileStatusWriter;
|
||||||
import org.apache.nifi.processors.hadoop.util.writer.RecordObjectWriter;
|
|
||||||
import org.apache.nifi.scheduling.SchedulingStrategy;
|
import org.apache.nifi.scheduling.SchedulingStrategy;
|
||||||
import org.apache.nifi.serialization.RecordSetWriterFactory;
|
import org.apache.nifi.serialization.RecordSetWriterFactory;
|
||||||
|
|
||||||
|
@ -100,6 +98,7 @@ import static org.apache.nifi.processors.hadoop.util.FilterMode.FILTER_DIRECTORI
|
||||||
public class ListHDFS extends AbstractHadoopProcessor {
|
public class ListHDFS extends AbstractHadoopProcessor {
|
||||||
|
|
||||||
private static final String NON_HIDDEN_FILES_REGEX = "[^\\.].*";
|
private static final String NON_HIDDEN_FILES_REGEX = "[^\\.].*";
|
||||||
|
private static final String HDFS_ATTRIBUTE_PREFIX = "hdfs";
|
||||||
|
|
||||||
public static final PropertyDescriptor RECURSE_SUBDIRS = new PropertyDescriptor.Builder()
|
public static final PropertyDescriptor RECURSE_SUBDIRS = new PropertyDescriptor.Builder()
|
||||||
.name("Recurse Subdirectories")
|
.name("Recurse Subdirectories")
|
||||||
|
@ -163,6 +162,8 @@ public class ListHDFS extends AbstractHadoopProcessor {
|
||||||
public static final String LATEST_TIMESTAMP_KEY = "latest.timestamp";
|
public static final String LATEST_TIMESTAMP_KEY = "latest.timestamp";
|
||||||
public static final String LATEST_FILES_KEY = "latest.file.%d";
|
public static final String LATEST_FILES_KEY = "latest.file.%d";
|
||||||
|
|
||||||
|
private static final List<PropertyDescriptor> LIST_HDFS_PROPERTIES = Arrays.asList(
|
||||||
|
DIRECTORY, RECURSE_SUBDIRS, RECORD_WRITER, FILE_FILTER, FILE_FILTER_MODE, MINIMUM_FILE_AGE, MAXIMUM_FILE_AGE);
|
||||||
private static final Set<Relationship> RELATIONSHIPS = Collections.singleton(REL_SUCCESS);
|
private static final Set<Relationship> RELATIONSHIPS = Collections.singleton(REL_SUCCESS);
|
||||||
private Pattern fileFilterRegexPattern;
|
private Pattern fileFilterRegexPattern;
|
||||||
private volatile boolean resetState = false;
|
private volatile boolean resetState = false;
|
||||||
|
@ -177,7 +178,7 @@ public class ListHDFS extends AbstractHadoopProcessor {
|
||||||
@Override
|
@Override
|
||||||
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
|
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
|
||||||
final List<PropertyDescriptor> props = new ArrayList<>(properties);
|
final List<PropertyDescriptor> props = new ArrayList<>(properties);
|
||||||
props.addAll(Arrays.asList(DIRECTORY, RECURSE_SUBDIRS, RECORD_WRITER, FILE_FILTER, FILE_FILTER_MODE, MINIMUM_FILE_AGE, MAXIMUM_FILE_AGE));
|
props.addAll(LIST_HDFS_PROPERTIES);
|
||||||
return props;
|
return props;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -263,24 +264,31 @@ public class ListHDFS extends AbstractHadoopProcessor {
|
||||||
|
|
||||||
final FileStatusManager fileStatusManager = new FileStatusManager(latestTimestamp, latestFiles);
|
final FileStatusManager fileStatusManager = new FileStatusManager(latestTimestamp, latestFiles);
|
||||||
final Path rootPath = getNormalizedPath(context, DIRECTORY);
|
final Path rootPath = getNormalizedPath(context, DIRECTORY);
|
||||||
final FileStatusIterable fileStatuses = new FileStatusIterable(rootPath, recursive, hdfs, getUserGroupInformation());
|
final FileStatusIterable fileStatusIterable = new FileStatusIterable(rootPath, recursive, hdfs, getUserGroupInformation());
|
||||||
|
|
||||||
final Long minAgeProp = context.getProperty(MINIMUM_FILE_AGE).asTimePeriod(TimeUnit.MILLISECONDS);
|
final Long minAgeProp = context.getProperty(MINIMUM_FILE_AGE).asTimePeriod(TimeUnit.MILLISECONDS);
|
||||||
final long minimumAge = (minAgeProp == null) ? Long.MIN_VALUE : minAgeProp;
|
final long minimumAge = (minAgeProp == null) ? Long.MIN_VALUE : minAgeProp;
|
||||||
final Long maxAgeProp = context.getProperty(MAXIMUM_FILE_AGE).asTimePeriod(TimeUnit.MILLISECONDS);
|
final Long maxAgeProp = context.getProperty(MAXIMUM_FILE_AGE).asTimePeriod(TimeUnit.MILLISECONDS);
|
||||||
final long maximumAge = (maxAgeProp == null) ? Long.MAX_VALUE : maxAgeProp;
|
final long maximumAge = (maxAgeProp == null) ? Long.MAX_VALUE : maxAgeProp;
|
||||||
|
|
||||||
final HadoopFileStatusWriter writer;
|
final HadoopFileStatusWriter writer = HadoopFileStatusWriter.builder()
|
||||||
if (writerFactory == null) {
|
.session(session)
|
||||||
writer = new FlowFileObjectWriter(session, fileStatuses, minimumAge, maximumAge, pathFilter, fileStatusManager, latestTimestamp, latestFiles);
|
.successRelationship(getSuccessRelationship())
|
||||||
} else {
|
.fileStatusIterable(fileStatusIterable)
|
||||||
writer = new RecordObjectWriter(session, fileStatuses, minimumAge, maximumAge, pathFilter, fileStatusManager, latestTimestamp,
|
.fileStatusManager(fileStatusManager)
|
||||||
latestFiles, writerFactory, getLogger());
|
.pathFilter(pathFilter)
|
||||||
}
|
.minimumAge(minimumAge)
|
||||||
|
.maximumAge(maximumAge)
|
||||||
|
.previousLatestTimestamp(latestTimestamp)
|
||||||
|
.previousLatestFiles(latestFiles)
|
||||||
|
.writerFactory(writerFactory)
|
||||||
|
.hdfsPrefix(getAttributePrefix())
|
||||||
|
.logger(getLogger())
|
||||||
|
.build();
|
||||||
|
|
||||||
writer.write();
|
writer.write();
|
||||||
|
|
||||||
getLogger().debug("Found a total of {} files in HDFS, {} are listed", fileStatuses.getTotalFileCount(), writer.getListedFileCount());
|
getLogger().debug("Found a total of {} files in HDFS, {} are listed", fileStatusIterable.getTotalFileCount(), writer.getListedFileCount());
|
||||||
|
|
||||||
if (writer.getListedFileCount() > 0) {
|
if (writer.getListedFileCount() > 0) {
|
||||||
final Map<String, String> updatedState = new HashMap<>();
|
final Map<String, String> updatedState = new HashMap<>();
|
||||||
|
@ -335,4 +343,12 @@ public class ListHDFS extends AbstractHadoopProcessor {
|
||||||
getLogger().warn("Failed to save cluster-wide state. If NiFi is restarted, data duplication may occur", e);
|
getLogger().warn("Failed to save cluster-wide state. If NiFi is restarted, data duplication may occur", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected Relationship getSuccessRelationship() {
|
||||||
|
return REL_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected String getAttributePrefix() {
|
||||||
|
return HDFS_ATTRIBUTE_PREFIX;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,28 +20,31 @@ import org.apache.hadoop.fs.FileStatus;
|
||||||
import org.apache.hadoop.fs.PathFilter;
|
import org.apache.hadoop.fs.PathFilter;
|
||||||
import org.apache.nifi.flowfile.FlowFile;
|
import org.apache.nifi.flowfile.FlowFile;
|
||||||
import org.apache.nifi.flowfile.attributes.CoreAttributes;
|
import org.apache.nifi.flowfile.attributes.CoreAttributes;
|
||||||
|
import org.apache.nifi.logging.ComponentLog;
|
||||||
import org.apache.nifi.processor.ProcessSession;
|
import org.apache.nifi.processor.ProcessSession;
|
||||||
import org.apache.nifi.processors.hadoop.ListHDFS;
|
import org.apache.nifi.processor.Relationship;
|
||||||
import org.apache.nifi.processors.hadoop.util.FileStatusIterable;
|
import org.apache.nifi.processors.hadoop.util.FileStatusIterable;
|
||||||
import org.apache.nifi.processors.hadoop.util.FileStatusManager;
|
import org.apache.nifi.processors.hadoop.util.FileStatusManager;
|
||||||
|
import org.apache.nifi.serialization.RecordSetWriterFactory;
|
||||||
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
public class FlowFileObjectWriter extends HadoopFileStatusWriter {
|
public class FlowFileHadoopFileStatusWriter extends HadoopFileStatusWriter {
|
||||||
|
public FlowFileHadoopFileStatusWriter(final ProcessSession session,
|
||||||
private static final String HDFS_ATTRIBUTE_PREFIX = "hdfs";
|
final Relationship successRelationship,
|
||||||
|
final FileStatusIterable fileStatusIterable,
|
||||||
public FlowFileObjectWriter(final ProcessSession session,
|
final FileStatusManager fileStatusManager,
|
||||||
final FileStatusIterable fileStatuses,
|
final PathFilter pathFilter,
|
||||||
final long minimumAge,
|
final long minimumAge,
|
||||||
final long maximumAge,
|
final long maximumAge,
|
||||||
final PathFilter pathFilter,
|
final long previousLatestTimestamp,
|
||||||
final FileStatusManager fileStatusManager,
|
final List<String> previousLatestFiles,
|
||||||
final long previousLatestModificationTime,
|
final RecordSetWriterFactory writerFactory,
|
||||||
final List<String> previousLatestFiles) {
|
final String hdfsPrefix,
|
||||||
super(session, fileStatuses, minimumAge, maximumAge, pathFilter, fileStatusManager, previousLatestModificationTime, previousLatestFiles);
|
final ComponentLog logger) {
|
||||||
|
super(session, successRelationship, fileStatusIterable, fileStatusManager, pathFilter, minimumAge, maximumAge, previousLatestTimestamp, previousLatestFiles, writerFactory, hdfsPrefix, logger);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -52,7 +55,7 @@ public class FlowFileObjectWriter extends HadoopFileStatusWriter {
|
||||||
final Map<String, String> attributes = createAttributes(status);
|
final Map<String, String> attributes = createAttributes(status);
|
||||||
FlowFile flowFile = session.create();
|
FlowFile flowFile = session.create();
|
||||||
flowFile = session.putAllAttributes(flowFile, attributes);
|
flowFile = session.putAllAttributes(flowFile, attributes);
|
||||||
session.transfer(flowFile, ListHDFS.REL_SUCCESS);
|
session.transfer(flowFile, successRelationship);
|
||||||
|
|
||||||
fileStatusManager.update(status);
|
fileStatusManager.update(status);
|
||||||
fileCount++;
|
fileCount++;
|
||||||
|
@ -64,12 +67,12 @@ public class FlowFileObjectWriter extends HadoopFileStatusWriter {
|
||||||
final Map<String, String> attributes = new HashMap<>();
|
final Map<String, String> attributes = new HashMap<>();
|
||||||
attributes.put(CoreAttributes.FILENAME.key(), status.getPath().getName());
|
attributes.put(CoreAttributes.FILENAME.key(), status.getPath().getName());
|
||||||
attributes.put(CoreAttributes.PATH.key(), getAbsolutePath(status.getPath().getParent()));
|
attributes.put(CoreAttributes.PATH.key(), getAbsolutePath(status.getPath().getParent()));
|
||||||
attributes.put(HDFS_ATTRIBUTE_PREFIX + ".owner", status.getOwner());
|
attributes.put(hdfsPrefix + ".owner", status.getOwner());
|
||||||
attributes.put(HDFS_ATTRIBUTE_PREFIX + ".group", status.getGroup());
|
attributes.put(hdfsPrefix + ".group", status.getGroup());
|
||||||
attributes.put(HDFS_ATTRIBUTE_PREFIX + ".lastModified", String.valueOf(status.getModificationTime()));
|
attributes.put(hdfsPrefix + ".lastModified", String.valueOf(status.getModificationTime()));
|
||||||
attributes.put(HDFS_ATTRIBUTE_PREFIX + ".length", String.valueOf(status.getLen()));
|
attributes.put(hdfsPrefix + ".length", String.valueOf(status.getLen()));
|
||||||
attributes.put(HDFS_ATTRIBUTE_PREFIX + ".replication", String.valueOf(status.getReplication()));
|
attributes.put(hdfsPrefix + ".replication", String.valueOf(status.getReplication()));
|
||||||
attributes.put(HDFS_ATTRIBUTE_PREFIX + ".permissions", getPermissionsString(status.getPermission()));
|
attributes.put(hdfsPrefix + ".permissions", getPermissionsString(status.getPermission()));
|
||||||
return attributes;
|
return attributes;
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -21,9 +21,12 @@ import org.apache.hadoop.fs.Path;
|
||||||
import org.apache.hadoop.fs.PathFilter;
|
import org.apache.hadoop.fs.PathFilter;
|
||||||
import org.apache.hadoop.fs.permission.FsAction;
|
import org.apache.hadoop.fs.permission.FsAction;
|
||||||
import org.apache.hadoop.fs.permission.FsPermission;
|
import org.apache.hadoop.fs.permission.FsPermission;
|
||||||
|
import org.apache.nifi.logging.ComponentLog;
|
||||||
import org.apache.nifi.processor.ProcessSession;
|
import org.apache.nifi.processor.ProcessSession;
|
||||||
|
import org.apache.nifi.processor.Relationship;
|
||||||
import org.apache.nifi.processors.hadoop.util.FileStatusIterable;
|
import org.apache.nifi.processors.hadoop.util.FileStatusIterable;
|
||||||
import org.apache.nifi.processors.hadoop.util.FileStatusManager;
|
import org.apache.nifi.processors.hadoop.util.FileStatusManager;
|
||||||
|
import org.apache.nifi.serialization.RecordSetWriterFactory;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
@ -33,33 +36,45 @@ import java.util.List;
|
||||||
public abstract class HadoopFileStatusWriter {
|
public abstract class HadoopFileStatusWriter {
|
||||||
|
|
||||||
protected final ProcessSession session;
|
protected final ProcessSession session;
|
||||||
|
protected final Relationship successRelationship;
|
||||||
protected final FileStatusIterable fileStatusIterable;
|
protected final FileStatusIterable fileStatusIterable;
|
||||||
|
protected final FileStatusManager fileStatusManager;
|
||||||
|
protected final PathFilter pathFilter;
|
||||||
protected final long minimumAge;
|
protected final long minimumAge;
|
||||||
protected final long maximumAge;
|
protected final long maximumAge;
|
||||||
protected final PathFilter pathFilter;
|
|
||||||
protected final FileStatusManager fileStatusManager;
|
|
||||||
protected final long previousLatestTimestamp;
|
protected final long previousLatestTimestamp;
|
||||||
protected final List<String> previousLatestFiles;
|
protected final List<String> previousLatestFiles;
|
||||||
|
|
||||||
|
protected final RecordSetWriterFactory writerFactory;
|
||||||
|
protected final String hdfsPrefix;
|
||||||
|
protected final ComponentLog logger;
|
||||||
|
protected final long currentTimeMillis;
|
||||||
protected long fileCount;
|
protected long fileCount;
|
||||||
private final long currentTimeMillis;
|
|
||||||
|
|
||||||
|
public HadoopFileStatusWriter(final ProcessSession session,
|
||||||
HadoopFileStatusWriter(final ProcessSession session,
|
final Relationship successRelationship,
|
||||||
final FileStatusIterable fileStatusIterable,
|
final FileStatusIterable fileStatusIterable,
|
||||||
final long minimumAge,
|
final FileStatusManager fileStatusManager,
|
||||||
final long maximumAge,
|
final PathFilter pathFilter,
|
||||||
final PathFilter pathFilter,
|
final long minimumAge,
|
||||||
final FileStatusManager fileStatusManager,
|
final long maximumAge,
|
||||||
final long previousLatestTimestamp,
|
final long previousLatestTimestamp,
|
||||||
final List<String> previousLatestFiles) {
|
final List<String> previousLatestFiles,
|
||||||
|
final RecordSetWriterFactory writerFactory,
|
||||||
|
final String hdfsPrefix,
|
||||||
|
final ComponentLog logger) {
|
||||||
this.session = session;
|
this.session = session;
|
||||||
|
this.successRelationship = successRelationship;
|
||||||
this.fileStatusIterable = fileStatusIterable;
|
this.fileStatusIterable = fileStatusIterable;
|
||||||
|
this.fileStatusManager = fileStatusManager;
|
||||||
|
this.pathFilter = pathFilter;
|
||||||
this.minimumAge = minimumAge;
|
this.minimumAge = minimumAge;
|
||||||
this.maximumAge = maximumAge;
|
this.maximumAge = maximumAge;
|
||||||
this.pathFilter = pathFilter;
|
|
||||||
this.fileStatusManager = fileStatusManager;
|
|
||||||
this.previousLatestTimestamp = previousLatestTimestamp;
|
this.previousLatestTimestamp = previousLatestTimestamp;
|
||||||
this.previousLatestFiles = previousLatestFiles;
|
this.previousLatestFiles = previousLatestFiles;
|
||||||
|
this.writerFactory = writerFactory;
|
||||||
|
this.hdfsPrefix = hdfsPrefix;
|
||||||
|
this.logger = logger;
|
||||||
currentTimeMillis = System.currentTimeMillis();
|
currentTimeMillis = System.currentTimeMillis();
|
||||||
fileCount = 0L;
|
fileCount = 0L;
|
||||||
}
|
}
|
||||||
|
@ -110,4 +125,152 @@ public abstract class HadoopFileStatusWriter {
|
||||||
return String.format("%s%s%s", getPerms(permission.getUserAction()),
|
return String.format("%s%s%s", getPerms(permission.getUserAction()),
|
||||||
getPerms(permission.getGroupAction()), getPerms(permission.getOtherAction()));
|
getPerms(permission.getGroupAction()), getPerms(permission.getOtherAction()));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static HadoopFileStatusWriter.Builder builder() {
|
||||||
|
return new HadoopFileStatusWriter.Builder();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class Builder {
|
||||||
|
private ProcessSession session;
|
||||||
|
private FileStatusIterable fileStatusIterable;
|
||||||
|
private long minimumAge;
|
||||||
|
private long maximumAge;
|
||||||
|
private PathFilter pathFilter;
|
||||||
|
private FileStatusManager fileStatusManager;
|
||||||
|
private long previousLatestTimestamp;
|
||||||
|
private List<String> previousLatestFiles;
|
||||||
|
private Relationship successRelationship;
|
||||||
|
private RecordSetWriterFactory writerFactory;
|
||||||
|
private ComponentLog logger;
|
||||||
|
private String hdfsPrefix;
|
||||||
|
|
||||||
|
public HadoopFileStatusWriter.Builder session(final ProcessSession session) {
|
||||||
|
this.session = session;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public HadoopFileStatusWriter.Builder fileStatusIterable(final FileStatusIterable fileStatusIterable) {
|
||||||
|
this.fileStatusIterable = fileStatusIterable;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public HadoopFileStatusWriter.Builder minimumAge(final long minimumAge) {
|
||||||
|
this.minimumAge = minimumAge;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public HadoopFileStatusWriter.Builder maximumAge(final long maximumAge) {
|
||||||
|
this.maximumAge = maximumAge;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public HadoopFileStatusWriter.Builder pathFilter(final PathFilter pathFilter) {
|
||||||
|
this.pathFilter = pathFilter;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public HadoopFileStatusWriter.Builder fileStatusManager(final FileStatusManager fileStatusManager) {
|
||||||
|
this.fileStatusManager = fileStatusManager;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public HadoopFileStatusWriter.Builder previousLatestTimestamp(final long previousLatestTimestamp) {
|
||||||
|
this.previousLatestTimestamp = previousLatestTimestamp;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public HadoopFileStatusWriter.Builder previousLatestFiles(final List<String> previousLatestFiles) {
|
||||||
|
this.previousLatestFiles = previousLatestFiles;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public HadoopFileStatusWriter.Builder successRelationship(final Relationship successRelationship) {
|
||||||
|
this.successRelationship = successRelationship;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public HadoopFileStatusWriter.Builder writerFactory(final RecordSetWriterFactory writerFactory) {
|
||||||
|
this.writerFactory = writerFactory;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public HadoopFileStatusWriter.Builder logger(final ComponentLog logger) {
|
||||||
|
this.logger = logger;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public HadoopFileStatusWriter.Builder hdfsPrefix(final String hdfsPrefix) {
|
||||||
|
this.hdfsPrefix = hdfsPrefix;
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
|
public HadoopFileStatusWriter build() {
|
||||||
|
validateMandatoryField("session", session);
|
||||||
|
validateMandatoryField("successRelationship", successRelationship);
|
||||||
|
validateMandatoryField("fileStatusIterable", fileStatusIterable);
|
||||||
|
validateMandatoryField("fileStatusManager", fileStatusManager);
|
||||||
|
|
||||||
|
if (writerFactory == null) {
|
||||||
|
return new FlowFileHadoopFileStatusWriter(session, successRelationship, fileStatusIterable, fileStatusManager, pathFilter, minimumAge, maximumAge,
|
||||||
|
previousLatestTimestamp, previousLatestFiles, writerFactory, hdfsPrefix, logger);
|
||||||
|
} else {
|
||||||
|
return new RecordHadoopFileStatusWriter(session, successRelationship, fileStatusIterable, fileStatusManager, pathFilter, minimumAge, maximumAge,
|
||||||
|
previousLatestTimestamp, previousLatestFiles, writerFactory, hdfsPrefix, logger);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void validateMandatoryField(String variableName, Object variable) {
|
||||||
|
if (variable == null) {
|
||||||
|
throw new IllegalArgumentException(variableName + " is null but must be set");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public ProcessSession getSession() {
|
||||||
|
return session;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Relationship getSuccessRelationship() {
|
||||||
|
return successRelationship;
|
||||||
|
}
|
||||||
|
|
||||||
|
public FileStatusIterable getFileStatusIterable() {
|
||||||
|
return fileStatusIterable;
|
||||||
|
}
|
||||||
|
|
||||||
|
public FileStatusManager getFileStatusManager() {
|
||||||
|
return fileStatusManager;
|
||||||
|
}
|
||||||
|
|
||||||
|
public PathFilter getPathFilter() {
|
||||||
|
return pathFilter;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getMinimumAge() {
|
||||||
|
return minimumAge;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getMaximumAge() {
|
||||||
|
return maximumAge;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long getPreviousLatestTimestamp() {
|
||||||
|
return previousLatestTimestamp;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getPreviousLatestFiles() {
|
||||||
|
return previousLatestFiles;
|
||||||
|
}
|
||||||
|
|
||||||
|
public RecordSetWriterFactory getWriterFactory() {
|
||||||
|
return writerFactory;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getHdfsPrefix() {
|
||||||
|
return hdfsPrefix;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ComponentLog getLogger() {
|
||||||
|
return logger;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,8 +20,10 @@ import org.apache.hadoop.fs.FileStatus;
|
||||||
import org.apache.hadoop.fs.PathFilter;
|
import org.apache.hadoop.fs.PathFilter;
|
||||||
import org.apache.hadoop.fs.permission.FsPermission;
|
import org.apache.hadoop.fs.permission.FsPermission;
|
||||||
import org.apache.nifi.flowfile.FlowFile;
|
import org.apache.nifi.flowfile.FlowFile;
|
||||||
|
import org.apache.nifi.flowfile.attributes.CoreAttributes;
|
||||||
import org.apache.nifi.logging.ComponentLog;
|
import org.apache.nifi.logging.ComponentLog;
|
||||||
import org.apache.nifi.processor.ProcessSession;
|
import org.apache.nifi.processor.ProcessSession;
|
||||||
|
import org.apache.nifi.processor.Relationship;
|
||||||
import org.apache.nifi.processor.exception.ProcessException;
|
import org.apache.nifi.processor.exception.ProcessException;
|
||||||
import org.apache.nifi.processors.hadoop.util.FileStatusIterable;
|
import org.apache.nifi.processors.hadoop.util.FileStatusIterable;
|
||||||
import org.apache.nifi.processors.hadoop.util.FileStatusManager;
|
import org.apache.nifi.processors.hadoop.util.FileStatusManager;
|
||||||
|
@ -42,9 +44,7 @@ import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import static org.apache.nifi.processors.hadoop.ListHDFS.REL_SUCCESS;
|
public class RecordHadoopFileStatusWriter extends HadoopFileStatusWriter {
|
||||||
|
|
||||||
public class RecordObjectWriter extends HadoopFileStatusWriter {
|
|
||||||
|
|
||||||
private static final RecordSchema RECORD_SCHEMA;
|
private static final RecordSchema RECORD_SCHEMA;
|
||||||
|
|
||||||
|
@ -78,23 +78,19 @@ public class RecordObjectWriter extends HadoopFileStatusWriter {
|
||||||
RECORD_SCHEMA = new SimpleRecordSchema(recordFields);
|
RECORD_SCHEMA = new SimpleRecordSchema(recordFields);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public RecordHadoopFileStatusWriter(final ProcessSession session,
|
||||||
private final RecordSetWriterFactory writerFactory;
|
final Relationship successRelationship,
|
||||||
private final ComponentLog logger;
|
final FileStatusIterable fileStatusIterable,
|
||||||
|
final FileStatusManager fileStatusManager,
|
||||||
public RecordObjectWriter(final ProcessSession session,
|
final PathFilter pathFilter,
|
||||||
final FileStatusIterable fileStatuses,
|
final long minimumAge,
|
||||||
final long minimumAge,
|
final long maximumAge,
|
||||||
final long maximumAge,
|
final long previousLatestTimestamp,
|
||||||
final PathFilter pathFilter,
|
final List<String> previousLatestFiles,
|
||||||
final FileStatusManager fileStatusManager,
|
final RecordSetWriterFactory writerFactory,
|
||||||
final long previousLatestModificationTime,
|
final String hdfsPrefix,
|
||||||
final List<String> previousLatestFiles,
|
final ComponentLog logger) {
|
||||||
final RecordSetWriterFactory writerFactory,
|
super(session, successRelationship, fileStatusIterable, fileStatusManager, pathFilter, minimumAge, maximumAge, previousLatestTimestamp, previousLatestFiles, writerFactory, hdfsPrefix, logger);
|
||||||
final ComponentLog logger) {
|
|
||||||
super(session, fileStatuses, minimumAge, maximumAge, pathFilter, fileStatusManager, previousLatestModificationTime, previousLatestFiles);
|
|
||||||
this.writerFactory = writerFactory;
|
|
||||||
this.logger = logger;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -102,10 +98,12 @@ public class RecordObjectWriter extends HadoopFileStatusWriter {
|
||||||
FlowFile flowFile = session.create();
|
FlowFile flowFile = session.create();
|
||||||
|
|
||||||
final WriteResult writeResult;
|
final WriteResult writeResult;
|
||||||
|
final String mimeType;
|
||||||
try (
|
try (
|
||||||
final OutputStream out = session.write(flowFile);
|
final OutputStream out = session.write(flowFile);
|
||||||
final RecordSetWriter recordWriter = writerFactory.createWriter(logger, RECORD_SCHEMA, out, flowFile)
|
final RecordSetWriter recordWriter = writerFactory.createWriter(logger, RECORD_SCHEMA, out, flowFile)
|
||||||
) {
|
) {
|
||||||
|
mimeType = recordWriter.getMimeType();
|
||||||
recordWriter.beginRecordSet();
|
recordWriter.beginRecordSet();
|
||||||
for (FileStatus status : fileStatusIterable) {
|
for (FileStatus status : fileStatusIterable) {
|
||||||
if (determineListable(status)) {
|
if (determineListable(status)) {
|
||||||
|
@ -124,8 +122,9 @@ public class RecordObjectWriter extends HadoopFileStatusWriter {
|
||||||
} else {
|
} else {
|
||||||
final Map<String, String> attributes = new HashMap<>(writeResult.getAttributes());
|
final Map<String, String> attributes = new HashMap<>(writeResult.getAttributes());
|
||||||
attributes.put("record.count", String.valueOf(writeResult.getRecordCount()));
|
attributes.put("record.count", String.valueOf(writeResult.getRecordCount()));
|
||||||
|
attributes.put(CoreAttributes.MIME_TYPE.key(), mimeType);
|
||||||
flowFile = session.putAllAttributes(flowFile, attributes);
|
flowFile = session.putAllAttributes(flowFile, attributes);
|
||||||
session.transfer(flowFile, REL_SUCCESS);
|
session.transfer(flowFile, successRelationship);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue