mirror of https://github.com/apache/nifi.git
NIFI-3366 This closes #2332. Added parent/child flowfile relationship between the incoming flowfile and the files that are moved from the input directory to the output directory.
Updated to allow tests to check for evaluation of properties that support expression language. Fixed bug with changeOwner attempting to operate on original file rather than the moved/copied file. Added license header to MoveHDFSTest.java Added test for moving a directory of files that contains a subdir, ensuring non-recursive behavior Added to the description of the processor that it is non-recursive when a directory is used as input. Added RAT exclude for test resource .dotfile to pom.xml Signed-off-by: joewitt <joewitt@apache.org>
This commit is contained in:
parent
3731fbee88
commit
600586d6be
|
@ -66,4 +66,19 @@
|
||||||
<artifactId>nifi-properties</artifactId>
|
<artifactId>nifi-properties</artifactId>
|
||||||
</dependency>
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
|
<build>
|
||||||
|
<plugins>
|
||||||
|
<plugin>
|
||||||
|
<groupId>org.apache.rat</groupId>
|
||||||
|
<artifactId>apache-rat-plugin</artifactId>
|
||||||
|
<configuration>
|
||||||
|
<excludes combine.children="append">
|
||||||
|
<exclude>src/test/resources/testdata/.dotfile</exclude>
|
||||||
|
</excludes>
|
||||||
|
</configuration>
|
||||||
|
</plugin>
|
||||||
|
</plugins>
|
||||||
|
</build>
|
||||||
|
|
||||||
</project>
|
</project>
|
||||||
|
|
|
@ -32,6 +32,7 @@ import java.util.concurrent.locks.Lock;
|
||||||
import java.util.concurrent.locks.ReentrantLock;
|
import java.util.concurrent.locks.ReentrantLock;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
|
import com.google.common.base.Preconditions;
|
||||||
import org.apache.hadoop.conf.Configuration;
|
import org.apache.hadoop.conf.Configuration;
|
||||||
import org.apache.hadoop.fs.FileStatus;
|
import org.apache.hadoop.fs.FileStatus;
|
||||||
import org.apache.hadoop.fs.FileSystem;
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
@ -60,145 +61,141 @@ import org.apache.nifi.util.StopWatch;
|
||||||
/**
|
/**
|
||||||
* This processor renames files on HDFS.
|
* This processor renames files on HDFS.
|
||||||
*/
|
*/
|
||||||
@Tags({ "hadoop", "HDFS", "put", "move", "filesystem", "restricted", "moveHDFS" })
|
@Tags({"hadoop", "HDFS", "put", "move", "filesystem", "restricted", "moveHDFS"})
|
||||||
@CapabilityDescription("Rename existing files on Hadoop Distributed File System (HDFS)")
|
@CapabilityDescription("Rename existing files or a directory of files (non-recursive) on Hadoop Distributed File System (HDFS).")
|
||||||
@ReadsAttribute(attribute = "filename", description = "The name of the file written to HDFS comes from the value of this attribute.")
|
@ReadsAttribute(attribute = "filename", description = "The name of the file written to HDFS comes from the value of this attribute.")
|
||||||
@WritesAttributes({
|
@WritesAttributes({
|
||||||
@WritesAttribute(attribute = "filename", description = "The name of the file written to HDFS is stored in this attribute."),
|
@WritesAttribute(attribute = "filename", description = "The name of the file written to HDFS is stored in this attribute."),
|
||||||
@WritesAttribute(attribute = "absolute.hdfs.path", description = "The absolute path to the file on HDFS is stored in this attribute.") })
|
@WritesAttribute(attribute = "absolute.hdfs.path", description = "The absolute path to the file on HDFS is stored in this attribute.")})
|
||||||
@SeeAlso({ PutHDFS.class, GetHDFS.class })
|
@SeeAlso({PutHDFS.class, GetHDFS.class})
|
||||||
public class MoveHDFS extends AbstractHadoopProcessor {
|
public class MoveHDFS extends AbstractHadoopProcessor {
|
||||||
|
|
||||||
// static global
|
// static global
|
||||||
public static final int MAX_WORKING_QUEUE_SIZE = 25000;
|
public static final String REPLACE_RESOLUTION = "replace";
|
||||||
public static final String REPLACE_RESOLUTION = "replace";
|
public static final String IGNORE_RESOLUTION = "ignore";
|
||||||
public static final String IGNORE_RESOLUTION = "ignore";
|
public static final String FAIL_RESOLUTION = "fail";
|
||||||
public static final String FAIL_RESOLUTION = "fail";
|
|
||||||
|
|
||||||
private static final Set<Relationship> relationships;
|
private static final Set<Relationship> relationships;
|
||||||
|
|
||||||
public static final AllowableValue REPLACE_RESOLUTION_AV = new AllowableValue(REPLACE_RESOLUTION,
|
public static final AllowableValue REPLACE_RESOLUTION_AV = new AllowableValue(REPLACE_RESOLUTION,
|
||||||
REPLACE_RESOLUTION, "Replaces the existing file if any.");
|
REPLACE_RESOLUTION, "Replaces the existing file if any.");
|
||||||
public static final AllowableValue IGNORE_RESOLUTION_AV = new AllowableValue(IGNORE_RESOLUTION, IGNORE_RESOLUTION,
|
public static final AllowableValue IGNORE_RESOLUTION_AV = new AllowableValue(IGNORE_RESOLUTION, IGNORE_RESOLUTION,
|
||||||
"Failed rename operation stops processing and routes to success.");
|
"Failed rename operation stops processing and routes to success.");
|
||||||
public static final AllowableValue FAIL_RESOLUTION_AV = new AllowableValue(FAIL_RESOLUTION, FAIL_RESOLUTION,
|
public static final AllowableValue FAIL_RESOLUTION_AV = new AllowableValue(FAIL_RESOLUTION, FAIL_RESOLUTION,
|
||||||
"Failing to rename a file routes to failure.");
|
"Failing to rename a file routes to failure.");
|
||||||
|
|
||||||
public static final String BUFFER_SIZE_KEY = "io.file.buffer.size";
|
public static final String ABSOLUTE_HDFS_PATH_ATTRIBUTE = "absolute.hdfs.path";
|
||||||
public static final int BUFFER_SIZE_DEFAULT = 4096;
|
|
||||||
|
|
||||||
public static final String ABSOLUTE_HDFS_PATH_ATTRIBUTE = "absolute.hdfs.path";
|
// relationships
|
||||||
|
public static final Relationship REL_SUCCESS = new Relationship.Builder().name("success")
|
||||||
|
.description("Files that have been successfully renamed on HDFS are transferred to this relationship")
|
||||||
|
.build();
|
||||||
|
|
||||||
// relationships
|
public static final Relationship REL_FAILURE = new Relationship.Builder().name("failure")
|
||||||
public static final Relationship REL_SUCCESS = new Relationship.Builder().name("success")
|
.description("Files that could not be renamed on HDFS are transferred to this relationship").build();
|
||||||
.description("Files that have been successfully renamed on HDFS are transferred to this relationship")
|
|
||||||
.build();
|
|
||||||
|
|
||||||
public static final Relationship REL_FAILURE = new Relationship.Builder().name("failure")
|
// properties
|
||||||
.description("Files that could not be renamed on HDFS are transferred to this relationship").build();
|
public static final PropertyDescriptor CONFLICT_RESOLUTION = new PropertyDescriptor.Builder()
|
||||||
|
.name("Conflict Resolution Strategy")
|
||||||
|
.description(
|
||||||
|
"Indicates what should happen when a file with the same name already exists in the output directory")
|
||||||
|
.required(true).defaultValue(FAIL_RESOLUTION_AV.getValue())
|
||||||
|
.allowableValues(REPLACE_RESOLUTION_AV, IGNORE_RESOLUTION_AV, FAIL_RESOLUTION_AV).build();
|
||||||
|
|
||||||
// properties
|
public static final PropertyDescriptor FILE_FILTER_REGEX = new PropertyDescriptor.Builder()
|
||||||
public static final PropertyDescriptor CONFLICT_RESOLUTION = new PropertyDescriptor.Builder()
|
.name("File Filter Regex")
|
||||||
.name("Conflict Resolution Strategy")
|
.description(
|
||||||
.description(
|
"A Java Regular Expression for filtering Filenames; if a filter is supplied then only files whose names match that Regular "
|
||||||
"Indicates what should happen when a file with the same name already exists in the output directory")
|
+ "Expression will be fetched, otherwise all files will be fetched")
|
||||||
.required(true).defaultValue(FAIL_RESOLUTION_AV.getValue())
|
.required(false).addValidator(StandardValidators.REGULAR_EXPRESSION_VALIDATOR).build();
|
||||||
.allowableValues(REPLACE_RESOLUTION_AV, IGNORE_RESOLUTION_AV, FAIL_RESOLUTION_AV).build();
|
|
||||||
|
|
||||||
public static final PropertyDescriptor FILE_FILTER_REGEX = new PropertyDescriptor.Builder()
|
public static final PropertyDescriptor IGNORE_DOTTED_FILES = new PropertyDescriptor.Builder()
|
||||||
.name("File Filter Regex")
|
.name("Ignore Dotted Files")
|
||||||
.description(
|
.description("If true, files whose names begin with a dot (\".\") will be ignored").required(true)
|
||||||
"A Java Regular Expression for filtering Filenames; if a filter is supplied then only files whose names match that Regular "
|
.allowableValues("true", "false").defaultValue("true").build();
|
||||||
+ "Expression will be fetched, otherwise all files will be fetched")
|
|
||||||
.required(false).addValidator(StandardValidators.REGULAR_EXPRESSION_VALIDATOR).build();
|
|
||||||
|
|
||||||
public static final PropertyDescriptor IGNORE_DOTTED_FILES = new PropertyDescriptor.Builder()
|
public static final PropertyDescriptor INPUT_DIRECTORY_OR_FILE = new PropertyDescriptor.Builder()
|
||||||
.name("Ignore Dotted Files")
|
.name("Input Directory or File")
|
||||||
.description("If true, files whose names begin with a dot (\".\") will be ignored").required(true)
|
.description("The HDFS directory from which files should be read, or a single file to read.")
|
||||||
.allowableValues("true", "false").defaultValue("true").build();
|
.defaultValue("${path}").addValidator(StandardValidators.ATTRIBUTE_EXPRESSION_LANGUAGE_VALIDATOR)
|
||||||
|
.expressionLanguageSupported(true).build();
|
||||||
|
|
||||||
public static final PropertyDescriptor INPUT_DIRECTORY_OR_FILE = new PropertyDescriptor.Builder()
|
public static final PropertyDescriptor OUTPUT_DIRECTORY = new PropertyDescriptor.Builder().name("Output Directory")
|
||||||
.name("Input Directory or File")
|
.description("The HDFS directory where the files will be moved to").required(true)
|
||||||
.description("The HDFS directory from which files should be read, or a single file to read")
|
.addValidator(StandardValidators.ATTRIBUTE_EXPRESSION_LANGUAGE_VALIDATOR).expressionLanguageSupported(true)
|
||||||
.defaultValue("${path}").addValidator(StandardValidators.ATTRIBUTE_EXPRESSION_LANGUAGE_VALIDATOR)
|
.build();
|
||||||
.expressionLanguageSupported(true).build();
|
|
||||||
|
|
||||||
public static final PropertyDescriptor OUTPUT_DIRECTORY = new PropertyDescriptor.Builder().name("Output Directory")
|
public static final PropertyDescriptor OPERATION = new PropertyDescriptor.Builder().name("HDFS Operation")
|
||||||
.description("The HDFS directory where the files will be moved to").required(true)
|
.description("The operation that will be performed on the source file").required(true)
|
||||||
.addValidator(StandardValidators.ATTRIBUTE_EXPRESSION_LANGUAGE_VALIDATOR).expressionLanguageSupported(true)
|
.allowableValues("move", "copy").defaultValue("move").build();
|
||||||
.build();
|
|
||||||
|
|
||||||
public static final PropertyDescriptor OPERATION = new PropertyDescriptor.Builder().name("HDFS Operation")
|
public static final PropertyDescriptor REMOTE_OWNER = new PropertyDescriptor.Builder().name("Remote Owner")
|
||||||
.description("The operation that will be performed on the source file").required(true)
|
.description(
|
||||||
.allowableValues("move", "copy").defaultValue("move").build();
|
"Changes the owner of the HDFS file to this value after it is written. This only works if NiFi is running as a user that has HDFS super user privilege to change owner")
|
||||||
|
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR).build();
|
||||||
|
|
||||||
public static final PropertyDescriptor REMOTE_OWNER = new PropertyDescriptor.Builder().name("Remote Owner")
|
public static final PropertyDescriptor REMOTE_GROUP = new PropertyDescriptor.Builder().name("Remote Group")
|
||||||
.description(
|
.description(
|
||||||
"Changes the owner of the HDFS file to this value after it is written. This only works if NiFi is running as a user that has HDFS super user privilege to change owner")
|
"Changes the group of the HDFS file to this value after it is written. This only works if NiFi is running as a user that has HDFS super user privilege to change group")
|
||||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR).build();
|
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR).build();
|
||||||
|
|
||||||
public static final PropertyDescriptor REMOTE_GROUP = new PropertyDescriptor.Builder().name("Remote Group")
|
static {
|
||||||
.description(
|
final Set<Relationship> rels = new HashSet<>();
|
||||||
"Changes the group of the HDFS file to this value after it is written. This only works if NiFi is running as a user that has HDFS super user privilege to change group")
|
rels.add(REL_SUCCESS);
|
||||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR).build();
|
rels.add(REL_FAILURE);
|
||||||
|
relationships = Collections.unmodifiableSet(rels);
|
||||||
|
}
|
||||||
|
|
||||||
static {
|
// non-static global
|
||||||
final Set<Relationship> rels = new HashSet<>();
|
protected ProcessorConfiguration processorConfig;
|
||||||
rels.add(REL_SUCCESS);
|
private final AtomicLong logEmptyListing = new AtomicLong(2L);
|
||||||
rels.add(REL_FAILURE);
|
|
||||||
relationships = Collections.unmodifiableSet(rels);
|
|
||||||
}
|
|
||||||
|
|
||||||
// non-static global
|
private final Lock listingLock = new ReentrantLock();
|
||||||
protected ProcessorConfiguration processorConfig;
|
private final Lock queueLock = new ReentrantLock();
|
||||||
private final AtomicLong logEmptyListing = new AtomicLong(2L);
|
|
||||||
|
|
||||||
private final Lock listingLock = new ReentrantLock();
|
private final BlockingQueue<Path> filePathQueue = new LinkedBlockingQueue<>();
|
||||||
private final Lock queueLock = new ReentrantLock();
|
private final BlockingQueue<Path> processing = new LinkedBlockingQueue<>();
|
||||||
|
|
||||||
private final BlockingQueue<Path> filePathQueue = new LinkedBlockingQueue<>();
|
// methods
|
||||||
private final BlockingQueue<Path> processing = new LinkedBlockingQueue<>();
|
@Override
|
||||||
|
public Set<Relationship> getRelationships() {
|
||||||
|
return relationships;
|
||||||
|
}
|
||||||
|
|
||||||
// methods
|
@Override
|
||||||
@Override
|
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
|
||||||
public Set<Relationship> getRelationships() {
|
List<PropertyDescriptor> props = new ArrayList<>(properties);
|
||||||
return relationships;
|
props.add(CONFLICT_RESOLUTION);
|
||||||
}
|
props.add(INPUT_DIRECTORY_OR_FILE);
|
||||||
|
props.add(OUTPUT_DIRECTORY);
|
||||||
|
props.add(OPERATION);
|
||||||
|
props.add(FILE_FILTER_REGEX);
|
||||||
|
props.add(IGNORE_DOTTED_FILES);
|
||||||
|
props.add(REMOTE_OWNER);
|
||||||
|
props.add(REMOTE_GROUP);
|
||||||
|
return props;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@OnScheduled
|
||||||
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
|
public void onScheduled(ProcessContext context) throws Exception {
|
||||||
List<PropertyDescriptor> props = new ArrayList<>(properties);
|
super.abstractOnScheduled(context);
|
||||||
props.add(CONFLICT_RESOLUTION);
|
// copy configuration values to pass them around cleanly
|
||||||
props.add(INPUT_DIRECTORY_OR_FILE);
|
processorConfig = new ProcessorConfiguration(context);
|
||||||
props.add(OUTPUT_DIRECTORY);
|
// forget the state of the queue in case HDFS contents changed while
|
||||||
props.add(OPERATION);
|
// this processor was turned off
|
||||||
props.add(FILE_FILTER_REGEX);
|
queueLock.lock();
|
||||||
props.add(IGNORE_DOTTED_FILES);
|
try {
|
||||||
props.add(REMOTE_OWNER);
|
filePathQueue.clear();
|
||||||
props.add(REMOTE_GROUP);
|
processing.clear();
|
||||||
return props;
|
} finally {
|
||||||
}
|
queueLock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@OnScheduled
|
@Override
|
||||||
public void onScheduled(ProcessContext context) throws Exception {
|
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
|
||||||
super.abstractOnScheduled(context);
|
// MoveHDFS
|
||||||
// copy configuration values to pass them around cleanly
|
FlowFile parentFlowFile = session.get();
|
||||||
processorConfig = new ProcessorConfiguration(context);
|
|
||||||
// forget the state of the queue in case HDFS contents changed while
|
|
||||||
// this processor was turned off
|
|
||||||
queueLock.lock();
|
|
||||||
try {
|
|
||||||
filePathQueue.clear();
|
|
||||||
processing.clear();
|
|
||||||
} finally {
|
|
||||||
queueLock.unlock();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
|
|
||||||
// MoveHDFS
|
|
||||||
FlowFile parentFlowFile = session.get();
|
|
||||||
if (parentFlowFile == null) {
|
if (parentFlowFile == null) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -209,319 +206,318 @@ public class MoveHDFS extends AbstractHadoopProcessor {
|
||||||
Path inputPath = null;
|
Path inputPath = null;
|
||||||
try {
|
try {
|
||||||
inputPath = new Path(filenameValue);
|
inputPath = new Path(filenameValue);
|
||||||
if(!hdfs.exists(inputPath)) {
|
if (!hdfs.exists(inputPath)) {
|
||||||
throw new IOException("Input Directory or File does not exist in HDFS");
|
throw new IOException("Input Directory or File does not exist in HDFS");
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to failure", new Object[] {filenameValue, parentFlowFile, e});
|
getLogger().error("Failed to retrieve content from {} for {} due to {}; routing to failure", new Object[]{filenameValue, parentFlowFile, e});
|
||||||
parentFlowFile = session.putAttribute(parentFlowFile, "hdfs.failure.reason", e.getMessage());
|
parentFlowFile = session.putAttribute(parentFlowFile, "hdfs.failure.reason", e.getMessage());
|
||||||
parentFlowFile = session.penalize(parentFlowFile);
|
parentFlowFile = session.penalize(parentFlowFile);
|
||||||
session.transfer(parentFlowFile, REL_FAILURE);
|
session.transfer(parentFlowFile, REL_FAILURE);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
List<Path> files = new ArrayList<Path>();
|
||||||
|
|
||||||
|
try {
|
||||||
|
final StopWatch stopWatch = new StopWatch(true);
|
||||||
|
Set<Path> listedFiles = performListing(context, inputPath);
|
||||||
|
stopWatch.stop();
|
||||||
|
final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
|
||||||
|
|
||||||
|
if (listedFiles != null) {
|
||||||
|
// place files into the work queue
|
||||||
|
int newItems = 0;
|
||||||
|
queueLock.lock();
|
||||||
|
try {
|
||||||
|
for (Path file : listedFiles) {
|
||||||
|
if (!filePathQueue.contains(file) && !processing.contains(file)) {
|
||||||
|
if (!filePathQueue.offer(file)) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
newItems++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
getLogger().warn("Could not add to processing queue due to {}", new Object[]{e.getMessage()}, e);
|
||||||
|
} finally {
|
||||||
|
queueLock.unlock();
|
||||||
|
}
|
||||||
|
if (listedFiles.size() > 0) {
|
||||||
|
logEmptyListing.set(3L);
|
||||||
|
}
|
||||||
|
if (logEmptyListing.getAndDecrement() > 0) {
|
||||||
|
getLogger().info(
|
||||||
|
"Obtained file listing in {} milliseconds; listing had {} items, {} of which were new",
|
||||||
|
new Object[]{millis, listedFiles.size(), newItems});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
context.yield();
|
||||||
|
getLogger().warn("Error while retrieving list of files due to {}", new Object[]{e});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// prepare to process a batch of files in the queue
|
||||||
|
queueLock.lock();
|
||||||
|
try {
|
||||||
|
filePathQueue.drainTo(files);
|
||||||
|
if (files.isEmpty()) {
|
||||||
|
// nothing to do!
|
||||||
|
session.remove(parentFlowFile);
|
||||||
|
context.yield();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
queueLock.unlock();
|
||||||
|
}
|
||||||
|
|
||||||
|
processBatchOfFiles(files, context, session, parentFlowFile);
|
||||||
|
|
||||||
|
queueLock.lock();
|
||||||
|
try {
|
||||||
|
processing.removeAll(files);
|
||||||
|
} finally {
|
||||||
|
queueLock.unlock();
|
||||||
|
}
|
||||||
|
|
||||||
session.remove(parentFlowFile);
|
session.remove(parentFlowFile);
|
||||||
|
}
|
||||||
|
|
||||||
List<Path> files = new ArrayList<Path>();
|
protected void processBatchOfFiles(final List<Path> files, final ProcessContext context,
|
||||||
|
final ProcessSession session, FlowFile parentFlowFile) {
|
||||||
|
Preconditions.checkState(parentFlowFile != null, "No parent flowfile for this batch was provided");
|
||||||
|
|
||||||
try {
|
// process the batch of files
|
||||||
final StopWatch stopWatch = new StopWatch(true);
|
final Configuration conf = getConfiguration();
|
||||||
Set<Path> listedFiles = performListing(context, inputPath);
|
final FileSystem hdfs = getFileSystem();
|
||||||
stopWatch.stop();
|
final UserGroupInformation ugi = getUserGroupInformation();
|
||||||
final long millis = stopWatch.getDuration(TimeUnit.MILLISECONDS);
|
|
||||||
|
|
||||||
if (listedFiles != null) {
|
if (conf == null || ugi == null) {
|
||||||
// place files into the work queue
|
getLogger().error("Configuration or UserGroupInformation not configured properly");
|
||||||
int newItems = 0;
|
session.transfer(parentFlowFile, REL_FAILURE);
|
||||||
queueLock.lock();
|
context.yield();
|
||||||
try {
|
return;
|
||||||
for (Path file : listedFiles) {
|
}
|
||||||
if (!filePathQueue.contains(file) && !processing.contains(file)) {
|
|
||||||
if (!filePathQueue.offer(file)) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
newItems++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
getLogger().warn("Could not add to processing queue due to {}", new Object[] { e });
|
|
||||||
} finally {
|
|
||||||
queueLock.unlock();
|
|
||||||
}
|
|
||||||
if (listedFiles.size() > 0) {
|
|
||||||
logEmptyListing.set(3L);
|
|
||||||
}
|
|
||||||
if (logEmptyListing.getAndDecrement() > 0) {
|
|
||||||
getLogger().info(
|
|
||||||
"Obtained file listing in {} milliseconds; listing had {} items, {} of which were new",
|
|
||||||
new Object[] { millis, listedFiles.size(), newItems });
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
context.yield();
|
|
||||||
getLogger().warn("Error while retrieving list of files due to {}", new Object[] { e });
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// prepare to process a batch of files in the queue
|
for (final Path file : files) {
|
||||||
queueLock.lock();
|
|
||||||
try {
|
|
||||||
filePathQueue.drainTo(files);
|
|
||||||
if (files.isEmpty()) {
|
|
||||||
// nothing to do!
|
|
||||||
context.yield();
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
queueLock.unlock();
|
|
||||||
}
|
|
||||||
|
|
||||||
processBatchOfFiles(files, context, session);
|
ugi.doAs(new PrivilegedAction<Object>() {
|
||||||
|
@Override
|
||||||
|
public Object run() {
|
||||||
|
FlowFile flowFile = session.create(parentFlowFile);
|
||||||
|
try {
|
||||||
|
final String originalFilename = file.getName();
|
||||||
|
final Path configuredRootOutputDirPath = processorConfig.getOutputDirectory();
|
||||||
|
final Path newFile = new Path(configuredRootOutputDirPath, originalFilename);
|
||||||
|
final boolean destinationExists = hdfs.exists(newFile);
|
||||||
|
// If destination file already exists, resolve that
|
||||||
|
// based on processor configuration
|
||||||
|
if (destinationExists) {
|
||||||
|
switch (processorConfig.getConflictResolution()) {
|
||||||
|
case REPLACE_RESOLUTION:
|
||||||
|
if (hdfs.delete(file, false)) {
|
||||||
|
getLogger().info("deleted {} in order to replace with the contents of {}",
|
||||||
|
new Object[]{file, flowFile});
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case IGNORE_RESOLUTION:
|
||||||
|
session.transfer(flowFile, REL_SUCCESS);
|
||||||
|
getLogger().info(
|
||||||
|
"transferring {} to success because file with same name already exists",
|
||||||
|
new Object[]{flowFile});
|
||||||
|
return null;
|
||||||
|
case FAIL_RESOLUTION:
|
||||||
|
session.transfer(session.penalize(flowFile), REL_FAILURE);
|
||||||
|
getLogger().warn(
|
||||||
|
"penalizing {} and routing to failure because file with same name already exists",
|
||||||
|
new Object[]{flowFile});
|
||||||
|
return null;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
queueLock.lock();
|
// Create destination directory if it does not exist
|
||||||
try {
|
try {
|
||||||
processing.removeAll(files);
|
if (!hdfs.getFileStatus(configuredRootOutputDirPath).isDirectory()) {
|
||||||
} finally {
|
throw new IOException(configuredRootOutputDirPath.toString()
|
||||||
queueLock.unlock();
|
+ " already exists and is not a directory");
|
||||||
}
|
}
|
||||||
}
|
} catch (FileNotFoundException fe) {
|
||||||
|
if (!hdfs.mkdirs(configuredRootOutputDirPath)) {
|
||||||
|
throw new IOException(configuredRootOutputDirPath.toString() + " could not be created");
|
||||||
|
}
|
||||||
|
changeOwner(context, hdfs, configuredRootOutputDirPath);
|
||||||
|
}
|
||||||
|
|
||||||
protected void processBatchOfFiles(final List<Path> files, final ProcessContext context,
|
boolean moved = false;
|
||||||
final ProcessSession session) {
|
for (int i = 0; i < 10; i++) { // try to rename multiple
|
||||||
// process the batch of files
|
// times.
|
||||||
final Configuration conf = getConfiguration();
|
if (processorConfig.getOperation().equals("move")) {
|
||||||
final FileSystem hdfs = getFileSystem();
|
if (hdfs.rename(file, newFile)) {
|
||||||
final UserGroupInformation ugi = getUserGroupInformation();
|
moved = true;
|
||||||
|
break;// rename was successful
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (FileUtil.copy(hdfs, file, hdfs, newFile, false, conf)) {
|
||||||
|
moved = true;
|
||||||
|
break;// copy was successful
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Thread.sleep(200L);// try waiting to let whatever might cause rename failure to resolve
|
||||||
|
}
|
||||||
|
if (!moved) {
|
||||||
|
throw new ProcessException("Could not move file " + file + " to its final filename");
|
||||||
|
}
|
||||||
|
|
||||||
if (conf == null || ugi == null) {
|
changeOwner(context, hdfs, newFile);
|
||||||
getLogger().error("Configuration or UserGroupInformation not configured properly");
|
final String outputPath = newFile.toString();
|
||||||
session.transfer(session.get(), REL_FAILURE);
|
final String newFilename = newFile.getName();
|
||||||
context.yield();
|
final String hdfsPath = newFile.getParent().toString();
|
||||||
return;
|
flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), newFilename);
|
||||||
}
|
flowFile = session.putAttribute(flowFile, ABSOLUTE_HDFS_PATH_ATTRIBUTE, hdfsPath);
|
||||||
|
final String transitUri = (outputPath.startsWith("/")) ? "hdfs:/" + outputPath
|
||||||
|
: "hdfs://" + outputPath;
|
||||||
|
session.getProvenanceReporter().send(flowFile, transitUri);
|
||||||
|
session.transfer(flowFile, REL_SUCCESS);
|
||||||
|
|
||||||
for (final Path file : files) {
|
} catch (final Throwable t) {
|
||||||
|
getLogger().error("Failed to rename on HDFS due to {}", new Object[]{t});
|
||||||
|
session.transfer(session.penalize(flowFile), REL_FAILURE);
|
||||||
|
context.yield();
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ugi.doAs(new PrivilegedAction<Object>() {
|
protected Set<Path> performListing(final ProcessContext context, Path path) throws IOException {
|
||||||
@Override
|
Set<Path> listing = null;
|
||||||
public Object run() {
|
|
||||||
FlowFile flowFile = session.create();
|
|
||||||
try {
|
|
||||||
final String originalFilename = file.getName();
|
|
||||||
final Path configuredRootOutputDirPath = processorConfig.getOutputDirectory();
|
|
||||||
final Path newFile = new Path(configuredRootOutputDirPath, originalFilename);
|
|
||||||
final boolean destinationExists = hdfs.exists(newFile);
|
|
||||||
// If destination file already exists, resolve that
|
|
||||||
// based on processor configuration
|
|
||||||
if (destinationExists) {
|
|
||||||
switch (processorConfig.getConflictResolution()) {
|
|
||||||
case REPLACE_RESOLUTION:
|
|
||||||
if (hdfs.delete(file, false)) {
|
|
||||||
getLogger().info("deleted {} in order to replace with the contents of {}",
|
|
||||||
new Object[] { file, flowFile });
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case IGNORE_RESOLUTION:
|
|
||||||
session.transfer(flowFile, REL_SUCCESS);
|
|
||||||
getLogger().info(
|
|
||||||
"transferring {} to success because file with same name already exists",
|
|
||||||
new Object[] { flowFile });
|
|
||||||
return null;
|
|
||||||
case FAIL_RESOLUTION:
|
|
||||||
session.transfer(session.penalize(flowFile), REL_FAILURE);
|
|
||||||
getLogger().warn(
|
|
||||||
"penalizing {} and routing to failure because file with same name already exists",
|
|
||||||
new Object[] { flowFile });
|
|
||||||
return null;
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create destination directory if it does not exist
|
if (listingLock.tryLock()) {
|
||||||
try {
|
try {
|
||||||
if (!hdfs.getFileStatus(configuredRootOutputDirPath).isDirectory()) {
|
final FileSystem hdfs = getFileSystem();
|
||||||
throw new IOException(configuredRootOutputDirPath.toString()
|
// get listing
|
||||||
+ " already exists and is not a directory");
|
listing = selectFiles(hdfs, path, null);
|
||||||
}
|
} finally {
|
||||||
} catch (FileNotFoundException fe) {
|
listingLock.unlock();
|
||||||
if (!hdfs.mkdirs(configuredRootOutputDirPath)) {
|
}
|
||||||
throw new IOException(configuredRootOutputDirPath.toString() + " could not be created");
|
}
|
||||||
}
|
|
||||||
changeOwner(context, hdfs, configuredRootOutputDirPath);
|
|
||||||
}
|
|
||||||
|
|
||||||
boolean moved = false;
|
return listing;
|
||||||
for (int i = 0; i < 10; i++) { // try to rename multiple
|
}
|
||||||
// times.
|
|
||||||
if (processorConfig.getOperation().equals("move")) {
|
|
||||||
if (hdfs.rename(file, newFile)) {
|
|
||||||
moved = true;
|
|
||||||
break;// rename was successful
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (FileUtil.copy(hdfs, file, hdfs, newFile, false, conf)) {
|
|
||||||
moved = true;
|
|
||||||
break;// copy was successful
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Thread.sleep(200L);// try waiting to let whatever
|
|
||||||
// might cause rename failure to
|
|
||||||
// resolve
|
|
||||||
}
|
|
||||||
if (!moved) {
|
|
||||||
throw new ProcessException("Could not move file " + file + " to its final filename");
|
|
||||||
}
|
|
||||||
|
|
||||||
changeOwner(context, hdfs, file);
|
protected void changeOwner(final ProcessContext context, final FileSystem hdfs, final Path name) {
|
||||||
final String outputPath = newFile.toString();
|
try {
|
||||||
final String newFilename = newFile.getName();
|
// Change owner and group of file if configured to do so
|
||||||
final String hdfsPath = newFile.getParent().toString();
|
String owner = context.getProperty(REMOTE_OWNER).getValue();
|
||||||
flowFile = session.putAttribute(flowFile, CoreAttributes.FILENAME.key(), newFilename);
|
String group = context.getProperty(REMOTE_GROUP).getValue();
|
||||||
flowFile = session.putAttribute(flowFile, ABSOLUTE_HDFS_PATH_ATTRIBUTE, hdfsPath);
|
if (owner != null || group != null) {
|
||||||
final String transitUri = (outputPath.startsWith("/")) ? "hdfs:/" + outputPath
|
hdfs.setOwner(name, owner, group);
|
||||||
: "hdfs://" + outputPath;
|
}
|
||||||
session.getProvenanceReporter().send(flowFile, transitUri);
|
} catch (Exception e) {
|
||||||
session.transfer(flowFile, REL_SUCCESS);
|
getLogger().warn("Could not change owner or group of {} on HDFS due to {}", new Object[]{name, e.getMessage()}, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} catch (final Throwable t) {
|
protected Set<Path> selectFiles(final FileSystem hdfs, final Path inputPath, Set<Path> filesVisited)
|
||||||
getLogger().error("Failed to rename on HDFS due to {}", new Object[] { t });
|
throws IOException {
|
||||||
session.transfer(session.penalize(flowFile), REL_FAILURE);
|
if (null == filesVisited) {
|
||||||
context.yield();
|
filesVisited = new HashSet<>();
|
||||||
}
|
}
|
||||||
return null;
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected Set<Path> performListing(final ProcessContext context, Path path) throws IOException {
|
if (!hdfs.exists(inputPath)) {
|
||||||
Set<Path> listing = null;
|
throw new IOException("Selection directory " + inputPath.toString() + " doesn't appear to exist!");
|
||||||
|
}
|
||||||
|
|
||||||
if (listingLock.tryLock()) {
|
final Set<Path> files = new HashSet<>();
|
||||||
try {
|
|
||||||
final FileSystem hdfs = getFileSystem();
|
|
||||||
// get listing
|
|
||||||
listing = selectFiles(hdfs, path, null);
|
|
||||||
} finally {
|
|
||||||
listingLock.unlock();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return listing;
|
FileStatus inputStatus = hdfs.getFileStatus(inputPath);
|
||||||
}
|
|
||||||
|
|
||||||
protected void changeOwner(final ProcessContext context, final FileSystem hdfs, final Path name) {
|
if (inputStatus.isDirectory()) {
|
||||||
try {
|
for (final FileStatus file : hdfs.listStatus(inputPath)) {
|
||||||
// Change owner and group of file if configured to do so
|
final Path canonicalFile = file.getPath();
|
||||||
String owner = context.getProperty(REMOTE_OWNER).getValue();
|
|
||||||
String group = context.getProperty(REMOTE_GROUP).getValue();
|
|
||||||
if (owner != null || group != null) {
|
|
||||||
hdfs.setOwner(name, owner, group);
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
getLogger().warn("Could not change owner or group of {} on HDFS due to {}", new Object[] { name, e });
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected Set<Path> selectFiles(final FileSystem hdfs, final Path inputPath, Set<Path> filesVisited)
|
if (!filesVisited.add(canonicalFile)) { // skip files we've already seen (may be looping directory links)
|
||||||
throws IOException {
|
continue;
|
||||||
if (null == filesVisited) {
|
}
|
||||||
filesVisited = new HashSet<>();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!hdfs.exists(inputPath)) {
|
if (!file.isDirectory() && processorConfig.getPathFilter(inputPath).accept(canonicalFile)) {
|
||||||
throw new IOException("Selection directory " + inputPath.toString() + " doesn't appear to exist!");
|
files.add(canonicalFile);
|
||||||
}
|
|
||||||
|
|
||||||
final Set<Path> files = new HashSet<>();
|
if (getLogger().isDebugEnabled()) {
|
||||||
|
getLogger().debug(this + " selected file at path: " + canonicalFile.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (inputStatus.isFile()) {
|
||||||
|
files.add(inputPath);
|
||||||
|
}
|
||||||
|
return files;
|
||||||
|
}
|
||||||
|
|
||||||
FileStatus inputStatus = hdfs.getFileStatus(inputPath);
|
protected static class ProcessorConfiguration {
|
||||||
|
|
||||||
if (inputStatus.isDirectory()) {
|
final private String conflictResolution;
|
||||||
for (final FileStatus file : hdfs.listStatus(inputPath)) {
|
final private String operation;
|
||||||
final Path canonicalFile = file.getPath();
|
final private Path inputRootDirPath;
|
||||||
|
final private Path outputRootDirPath;
|
||||||
|
final private Pattern fileFilterPattern;
|
||||||
|
final private boolean ignoreDottedFiles;
|
||||||
|
|
||||||
if (!filesVisited.add(canonicalFile)) { // skip files we've
|
ProcessorConfiguration(final ProcessContext context) {
|
||||||
// already
|
conflictResolution = context.getProperty(CONFLICT_RESOLUTION).getValue();
|
||||||
// seen (may be looping
|
operation = context.getProperty(OPERATION).getValue();
|
||||||
// directory links)
|
final String inputDirValue = context.getProperty(INPUT_DIRECTORY_OR_FILE).evaluateAttributeExpressions().getValue();
|
||||||
continue;
|
inputRootDirPath = new Path(inputDirValue);
|
||||||
}
|
final String outputDirValue = context.getProperty(OUTPUT_DIRECTORY).evaluateAttributeExpressions().getValue();
|
||||||
|
outputRootDirPath = new Path(outputDirValue);
|
||||||
|
final String fileFilterRegex = context.getProperty(FILE_FILTER_REGEX).getValue();
|
||||||
|
fileFilterPattern = (fileFilterRegex == null) ? null : Pattern.compile(fileFilterRegex);
|
||||||
|
ignoreDottedFiles = context.getProperty(IGNORE_DOTTED_FILES).asBoolean();
|
||||||
|
}
|
||||||
|
|
||||||
if (!file.isDirectory() && processorConfig.getPathFilter(inputPath).accept(canonicalFile)) {
|
public String getOperation() {
|
||||||
files.add(canonicalFile);
|
return operation;
|
||||||
|
}
|
||||||
|
|
||||||
if (getLogger().isDebugEnabled()) {
|
public String getConflictResolution() {
|
||||||
getLogger().debug(this + " selected file at path: " + canonicalFile.toString());
|
return conflictResolution;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (inputStatus.isFile()) {
|
|
||||||
files.add(inputPath);
|
|
||||||
}
|
|
||||||
return files;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected static class ProcessorConfiguration {
|
public Path getInput() {
|
||||||
|
return inputRootDirPath;
|
||||||
|
}
|
||||||
|
|
||||||
final private String conflictResolution;
|
public Path getOutputDirectory() {
|
||||||
final private String operation;
|
return outputRootDirPath;
|
||||||
final private Path inputRootDirPath;
|
}
|
||||||
final private Path outputRootDirPath;
|
|
||||||
final private Pattern fileFilterPattern;
|
|
||||||
final private boolean ignoreDottedFiles;
|
|
||||||
|
|
||||||
ProcessorConfiguration(final ProcessContext context) {
|
protected PathFilter getPathFilter(final Path dir) {
|
||||||
conflictResolution = context.getProperty(CONFLICT_RESOLUTION).getValue();
|
return new PathFilter() {
|
||||||
operation = context.getProperty(OPERATION).getValue();
|
|
||||||
final String inputDirValue = context.getProperty(INPUT_DIRECTORY_OR_FILE).getValue();
|
|
||||||
inputRootDirPath = new Path(inputDirValue);
|
|
||||||
final String outputDirValue = context.getProperty(OUTPUT_DIRECTORY).getValue();
|
|
||||||
outputRootDirPath = new Path(outputDirValue);
|
|
||||||
final String fileFilterRegex = context.getProperty(FILE_FILTER_REGEX).getValue();
|
|
||||||
fileFilterPattern = (fileFilterRegex == null) ? null : Pattern.compile(fileFilterRegex);
|
|
||||||
ignoreDottedFiles = context.getProperty(IGNORE_DOTTED_FILES).asBoolean();
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getOperation() {
|
@Override
|
||||||
return operation;
|
public boolean accept(Path path) {
|
||||||
}
|
if (ignoreDottedFiles && path.getName().startsWith(".")) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
final String pathToCompare;
|
||||||
|
String relativePath = getPathDifference(dir, path);
|
||||||
|
if (relativePath.length() == 0) {
|
||||||
|
pathToCompare = path.getName();
|
||||||
|
} else {
|
||||||
|
pathToCompare = relativePath + Path.SEPARATOR + path.getName();
|
||||||
|
}
|
||||||
|
|
||||||
public String getConflictResolution() {
|
if (fileFilterPattern != null && !fileFilterPattern.matcher(pathToCompare).matches()) {
|
||||||
return conflictResolution;
|
return false;
|
||||||
}
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
public Path getInput() {
|
};
|
||||||
return inputRootDirPath;
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public Path getOutputDirectory() {
|
|
||||||
return outputRootDirPath;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected PathFilter getPathFilter(final Path dir) {
|
|
||||||
return new PathFilter() {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean accept(Path path) {
|
|
||||||
if (ignoreDottedFiles && path.getName().startsWith(".")) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
final String pathToCompare;
|
|
||||||
String relativePath = getPathDifference(dir, path);
|
|
||||||
if (relativePath.length() == 0) {
|
|
||||||
pathToCompare = path.getName();
|
|
||||||
} else {
|
|
||||||
pathToCompare = relativePath + Path.SEPARATOR + path.getName();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (fileFilterPattern != null && !fileFilterPattern.matcher(pathToCompare).matches()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
|
@ -1,15 +1,22 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
package org.apache.nifi.processors.hadoop;
|
package org.apache.nifi.processors.hadoop;
|
||||||
|
|
||||||
import static org.junit.Assert.assertTrue;
|
import org.apache.commons.io.FileUtils;
|
||||||
import static org.mockito.Mockito.mock;
|
|
||||||
import static org.mockito.Mockito.when;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
import org.apache.nifi.components.ValidationResult;
|
import org.apache.nifi.components.ValidationResult;
|
||||||
import org.apache.nifi.hadoop.KerberosProperties;
|
import org.apache.nifi.hadoop.KerberosProperties;
|
||||||
import org.apache.nifi.processor.ProcessContext;
|
import org.apache.nifi.processor.ProcessContext;
|
||||||
|
@ -23,173 +30,222 @@ import org.junit.Assert;
|
||||||
import org.junit.Before;
|
import org.junit.Before;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Paths;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertTrue;
|
||||||
|
import static org.mockito.Mockito.mock;
|
||||||
|
import static org.mockito.Mockito.when;
|
||||||
|
|
||||||
public class MoveHDFSTest {
|
public class MoveHDFSTest {
|
||||||
|
|
||||||
private static final String OUTPUT_DIRECTORY = "src/test/resources/testdataoutput";
|
private static final String OUTPUT_DIRECTORY = "target/test-data-output";
|
||||||
private static final String INPUT_DIRECTORY = "src/test/resources/testdata";
|
private static final String TEST_DATA_DIRECTORY = "src/test/resources/testdata";
|
||||||
private static final String DOT_FILE_PATH = "src/test/resources/testdata/.testfordotfiles";
|
private static final String INPUT_DIRECTORY = "target/test-data-input";
|
||||||
private NiFiProperties mockNiFiProperties;
|
private NiFiProperties mockNiFiProperties;
|
||||||
private KerberosProperties kerberosProperties;
|
private KerberosProperties kerberosProperties;
|
||||||
|
|
||||||
@Before
|
@Before
|
||||||
public void setup() {
|
public void setup() {
|
||||||
mockNiFiProperties = mock(NiFiProperties.class);
|
mockNiFiProperties = mock(NiFiProperties.class);
|
||||||
when(mockNiFiProperties.getKerberosConfigurationFile()).thenReturn(null);
|
when(mockNiFiProperties.getKerberosConfigurationFile()).thenReturn(null);
|
||||||
kerberosProperties = new KerberosProperties(null);
|
kerberosProperties = new KerberosProperties(null);
|
||||||
}
|
}
|
||||||
|
|
||||||
@After
|
@After
|
||||||
public void teardown() {
|
public void teardown() {
|
||||||
File outputDirectory = new File(OUTPUT_DIRECTORY);
|
File inputDirectory = new File(INPUT_DIRECTORY);
|
||||||
if (outputDirectory.exists()) {
|
File outputDirectory = new File(OUTPUT_DIRECTORY);
|
||||||
if (outputDirectory.isDirectory()) {
|
if (inputDirectory.exists()) {
|
||||||
moveFilesFromOutputDirectoryToInput();
|
Assert.assertTrue("Could not delete input directory: " + inputDirectory, FileUtils.deleteQuietly(inputDirectory));
|
||||||
}
|
}
|
||||||
outputDirectory.delete();
|
if (outputDirectory.exists()) {
|
||||||
}
|
Assert.assertTrue("Could not delete output directory: " + outputDirectory, FileUtils.deleteQuietly(outputDirectory));
|
||||||
removeDotFile();
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void removeDotFile() {
|
@Test
|
||||||
File dotFile = new File(DOT_FILE_PATH);
|
public void testOutputDirectoryValidator() {
|
||||||
if (dotFile.exists()) {
|
MoveHDFS proc = new TestableMoveHDFS(kerberosProperties);
|
||||||
dotFile.delete();
|
TestRunner runner = TestRunners.newTestRunner(proc);
|
||||||
}
|
Collection<ValidationResult> results;
|
||||||
}
|
ProcessContext pc;
|
||||||
|
|
||||||
private void moveFilesFromOutputDirectoryToInput() {
|
results = new HashSet<>();
|
||||||
File folder = new File(OUTPUT_DIRECTORY);
|
runner.setProperty(MoveHDFS.INPUT_DIRECTORY_OR_FILE, "/source");
|
||||||
for (File file : folder.listFiles()) {
|
runner.enqueue(new byte[0]);
|
||||||
if (file.isFile()) {
|
pc = runner.getProcessContext();
|
||||||
String path = file.getAbsolutePath();
|
if (pc instanceof MockProcessContext) {
|
||||||
if(!path.endsWith(".crc")) {
|
results = ((MockProcessContext) pc).validate();
|
||||||
String newPath = path.replaceAll("testdataoutput", "testdata");
|
}
|
||||||
File newFile = new File(newPath);
|
Assert.assertEquals(1, results.size());
|
||||||
if (!newFile.exists()) {
|
for (ValidationResult vr : results) {
|
||||||
file.renameTo(newFile);
|
assertTrue(vr.toString().contains("Output Directory is required"));
|
||||||
}
|
}
|
||||||
} else {
|
}
|
||||||
file.delete();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testOutputDirectoryValidator() {
|
public void testBothInputAndOutputDirectoriesAreValid() {
|
||||||
MoveHDFS proc = new TestableMoveHDFS(kerberosProperties);
|
MoveHDFS proc = new TestableMoveHDFS(kerberosProperties);
|
||||||
TestRunner runner = TestRunners.newTestRunner(proc);
|
TestRunner runner = TestRunners.newTestRunner(proc);
|
||||||
Collection<ValidationResult> results;
|
Collection<ValidationResult> results;
|
||||||
ProcessContext pc;
|
ProcessContext pc;
|
||||||
|
|
||||||
results = new HashSet<>();
|
results = new HashSet<>();
|
||||||
runner.setProperty(MoveHDFS.INPUT_DIRECTORY_OR_FILE, "/source");
|
runner.setProperty(MoveHDFS.INPUT_DIRECTORY_OR_FILE, INPUT_DIRECTORY);
|
||||||
runner.enqueue(new byte[0]);
|
runner.setProperty(MoveHDFS.OUTPUT_DIRECTORY, OUTPUT_DIRECTORY);
|
||||||
pc = runner.getProcessContext();
|
runner.enqueue(new byte[0]);
|
||||||
if (pc instanceof MockProcessContext) {
|
pc = runner.getProcessContext();
|
||||||
results = ((MockProcessContext) pc).validate();
|
if (pc instanceof MockProcessContext) {
|
||||||
}
|
results = ((MockProcessContext) pc).validate();
|
||||||
Assert.assertEquals(1, results.size());
|
}
|
||||||
for (ValidationResult vr : results) {
|
Assert.assertEquals(0, results.size());
|
||||||
assertTrue(vr.toString().contains("Output Directory is required"));
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testBothInputAndOutputDirectoriesAreValid() {
|
public void testOnScheduledShouldRunCleanly() throws IOException {
|
||||||
MoveHDFS proc = new TestableMoveHDFS(kerberosProperties);
|
FileUtils.copyDirectory(new File(TEST_DATA_DIRECTORY), new File(INPUT_DIRECTORY));
|
||||||
TestRunner runner = TestRunners.newTestRunner(proc);
|
MoveHDFS proc = new TestableMoveHDFS(kerberosProperties);
|
||||||
Collection<ValidationResult> results;
|
TestRunner runner = TestRunners.newTestRunner(proc);
|
||||||
ProcessContext pc;
|
runner.setProperty(MoveHDFS.INPUT_DIRECTORY_OR_FILE, INPUT_DIRECTORY);
|
||||||
|
runner.setProperty(MoveHDFS.OUTPUT_DIRECTORY, OUTPUT_DIRECTORY);
|
||||||
|
runner.enqueue(new byte[0]);
|
||||||
|
runner.run();
|
||||||
|
List<MockFlowFile> flowFiles = runner.getFlowFilesForRelationship(MoveHDFS.REL_SUCCESS);
|
||||||
|
runner.assertAllFlowFilesTransferred(MoveHDFS.REL_SUCCESS);
|
||||||
|
Assert.assertEquals(7, flowFiles.size());
|
||||||
|
}
|
||||||
|
|
||||||
results = new HashSet<>();
|
@Test
|
||||||
runner.setProperty(MoveHDFS.INPUT_DIRECTORY_OR_FILE, INPUT_DIRECTORY);
|
public void testDotFileFilterIgnore() throws IOException {
|
||||||
runner.setProperty(MoveHDFS.OUTPUT_DIRECTORY, OUTPUT_DIRECTORY);
|
FileUtils.copyDirectory(new File(TEST_DATA_DIRECTORY), new File(INPUT_DIRECTORY));
|
||||||
runner.enqueue(new byte[0]);
|
MoveHDFS proc = new TestableMoveHDFS(kerberosProperties);
|
||||||
pc = runner.getProcessContext();
|
TestRunner runner = TestRunners.newTestRunner(proc);
|
||||||
if (pc instanceof MockProcessContext) {
|
runner.setProperty(MoveHDFS.INPUT_DIRECTORY_OR_FILE, INPUT_DIRECTORY);
|
||||||
results = ((MockProcessContext) pc).validate();
|
runner.setProperty(MoveHDFS.OUTPUT_DIRECTORY, OUTPUT_DIRECTORY);
|
||||||
}
|
runner.setProperty(MoveHDFS.IGNORE_DOTTED_FILES, "true");
|
||||||
Assert.assertEquals(0, results.size());
|
runner.enqueue(new byte[0]);
|
||||||
}
|
runner.run();
|
||||||
|
List<MockFlowFile> flowFiles = runner.getFlowFilesForRelationship(MoveHDFS.REL_SUCCESS);
|
||||||
|
runner.assertAllFlowFilesTransferred(MoveHDFS.REL_SUCCESS);
|
||||||
|
Assert.assertEquals(7, flowFiles.size());
|
||||||
|
Assert.assertTrue(new File(INPUT_DIRECTORY, ".dotfile").exists());
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testOnScheduledShouldRunCleanly() {
|
public void testDotFileFilterInclude() throws IOException {
|
||||||
MoveHDFS proc = new TestableMoveHDFS(kerberosProperties);
|
FileUtils.copyDirectory(new File(TEST_DATA_DIRECTORY), new File(INPUT_DIRECTORY));
|
||||||
TestRunner runner = TestRunners.newTestRunner(proc);
|
MoveHDFS proc = new TestableMoveHDFS(kerberosProperties);
|
||||||
runner.setProperty(MoveHDFS.INPUT_DIRECTORY_OR_FILE, INPUT_DIRECTORY);
|
TestRunner runner = TestRunners.newTestRunner(proc);
|
||||||
runner.setProperty(MoveHDFS.OUTPUT_DIRECTORY, OUTPUT_DIRECTORY);
|
runner.setProperty(MoveHDFS.INPUT_DIRECTORY_OR_FILE, INPUT_DIRECTORY);
|
||||||
runner.enqueue(new byte[0]);
|
runner.setProperty(MoveHDFS.OUTPUT_DIRECTORY, OUTPUT_DIRECTORY);
|
||||||
runner.setValidateExpressionUsage(false);
|
runner.setProperty(MoveHDFS.IGNORE_DOTTED_FILES, "false");
|
||||||
runner.run();
|
runner.enqueue(new byte[0]);
|
||||||
List<MockFlowFile> flowFiles = runner.getFlowFilesForRelationship(MoveHDFS.REL_SUCCESS);
|
runner.run();
|
||||||
runner.assertAllFlowFilesTransferred(MoveHDFS.REL_SUCCESS);
|
List<MockFlowFile> flowFiles = runner.getFlowFilesForRelationship(MoveHDFS.REL_SUCCESS);
|
||||||
Assert.assertEquals(7, flowFiles.size());
|
runner.assertAllFlowFilesTransferred(MoveHDFS.REL_SUCCESS);
|
||||||
}
|
Assert.assertEquals(8, flowFiles.size());
|
||||||
|
}
|
||||||
@Test
|
|
||||||
public void testDotFileFilter() throws IOException {
|
|
||||||
createDotFile();
|
|
||||||
MoveHDFS proc = new TestableMoveHDFS(kerberosProperties);
|
|
||||||
TestRunner runner = TestRunners.newTestRunner(proc);
|
|
||||||
runner.setProperty(MoveHDFS.INPUT_DIRECTORY_OR_FILE, INPUT_DIRECTORY);
|
|
||||||
runner.setProperty(MoveHDFS.OUTPUT_DIRECTORY, OUTPUT_DIRECTORY);
|
|
||||||
runner.setProperty(MoveHDFS.IGNORE_DOTTED_FILES, "false");
|
|
||||||
runner.enqueue(new byte[0]);
|
|
||||||
runner.setValidateExpressionUsage(false);
|
|
||||||
runner.run();
|
|
||||||
List<MockFlowFile> flowFiles = runner.getFlowFilesForRelationship(MoveHDFS.REL_SUCCESS);
|
|
||||||
runner.assertAllFlowFilesTransferred(MoveHDFS.REL_SUCCESS);
|
|
||||||
Assert.assertEquals(8, flowFiles.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testFileFilterRegex() {
|
|
||||||
MoveHDFS proc = new TestableMoveHDFS(kerberosProperties);
|
|
||||||
TestRunner runner = TestRunners.newTestRunner(proc);
|
|
||||||
runner.setProperty(MoveHDFS.INPUT_DIRECTORY_OR_FILE, INPUT_DIRECTORY);
|
|
||||||
runner.setProperty(MoveHDFS.OUTPUT_DIRECTORY, OUTPUT_DIRECTORY);
|
|
||||||
runner.setProperty(MoveHDFS.FILE_FILTER_REGEX, ".*\\.gz");
|
|
||||||
runner.enqueue(new byte[0]);
|
|
||||||
runner.setValidateExpressionUsage(false);
|
|
||||||
runner.run();
|
|
||||||
List<MockFlowFile> flowFiles = runner.getFlowFilesForRelationship(MoveHDFS.REL_SUCCESS);
|
|
||||||
runner.assertAllFlowFilesTransferred(MoveHDFS.REL_SUCCESS);
|
|
||||||
Assert.assertEquals(1, flowFiles.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
|
||||||
public void testSingleFileAsInput() {
|
|
||||||
MoveHDFS proc = new TestableMoveHDFS(kerberosProperties);
|
|
||||||
TestRunner runner = TestRunners.newTestRunner(proc);
|
|
||||||
runner.setProperty(MoveHDFS.INPUT_DIRECTORY_OR_FILE, INPUT_DIRECTORY + "/randombytes-1");
|
|
||||||
runner.setProperty(MoveHDFS.OUTPUT_DIRECTORY, OUTPUT_DIRECTORY);
|
|
||||||
runner.enqueue(new byte[0]);
|
|
||||||
runner.setValidateExpressionUsage(false);
|
|
||||||
runner.run();
|
|
||||||
List<MockFlowFile> flowFiles = runner.getFlowFilesForRelationship(MoveHDFS.REL_SUCCESS);
|
|
||||||
runner.assertAllFlowFilesTransferred(MoveHDFS.REL_SUCCESS);
|
|
||||||
Assert.assertEquals(1, flowFiles.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
private void createDotFile() throws IOException {
|
@Test
|
||||||
File dotFile = new File(DOT_FILE_PATH);
|
public void testFileFilterRegex() throws IOException {
|
||||||
dotFile.createNewFile();
|
FileUtils.copyDirectory(new File(TEST_DATA_DIRECTORY), new File(INPUT_DIRECTORY));
|
||||||
}
|
MoveHDFS proc = new TestableMoveHDFS(kerberosProperties);
|
||||||
|
TestRunner runner = TestRunners.newTestRunner(proc);
|
||||||
|
runner.setProperty(MoveHDFS.INPUT_DIRECTORY_OR_FILE, INPUT_DIRECTORY);
|
||||||
|
runner.setProperty(MoveHDFS.OUTPUT_DIRECTORY, OUTPUT_DIRECTORY);
|
||||||
|
runner.setProperty(MoveHDFS.FILE_FILTER_REGEX, ".*\\.gz");
|
||||||
|
runner.enqueue(new byte[0]);
|
||||||
|
runner.run();
|
||||||
|
List<MockFlowFile> flowFiles = runner.getFlowFilesForRelationship(MoveHDFS.REL_SUCCESS);
|
||||||
|
runner.assertAllFlowFilesTransferred(MoveHDFS.REL_SUCCESS);
|
||||||
|
Assert.assertEquals(1, flowFiles.size());
|
||||||
|
}
|
||||||
|
|
||||||
private static class TestableMoveHDFS extends MoveHDFS {
|
@Test
|
||||||
|
public void testSingleFileAsInputCopy() throws IOException {
|
||||||
|
FileUtils.copyDirectory(new File(TEST_DATA_DIRECTORY), new File(INPUT_DIRECTORY));
|
||||||
|
MoveHDFS proc = new TestableMoveHDFS(kerberosProperties);
|
||||||
|
TestRunner runner = TestRunners.newTestRunner(proc);
|
||||||
|
runner.setProperty(MoveHDFS.INPUT_DIRECTORY_OR_FILE, INPUT_DIRECTORY + "/randombytes-1");
|
||||||
|
runner.setProperty(MoveHDFS.OUTPUT_DIRECTORY, OUTPUT_DIRECTORY);
|
||||||
|
runner.setProperty(MoveHDFS.OPERATION, "copy");
|
||||||
|
runner.enqueue(new byte[0]);
|
||||||
|
runner.run();
|
||||||
|
List<MockFlowFile> flowFiles = runner.getFlowFilesForRelationship(MoveHDFS.REL_SUCCESS);
|
||||||
|
runner.assertAllFlowFilesTransferred(MoveHDFS.REL_SUCCESS);
|
||||||
|
Assert.assertEquals(1, flowFiles.size());
|
||||||
|
Assert.assertTrue(new File(INPUT_DIRECTORY, "randombytes-1").exists());
|
||||||
|
Assert.assertTrue(new File(OUTPUT_DIRECTORY, "randombytes-1").exists());
|
||||||
|
}
|
||||||
|
|
||||||
private KerberosProperties testKerberosProperties;
|
@Test
|
||||||
|
public void testSingleFileAsInputMove() throws IOException {
|
||||||
|
FileUtils.copyDirectory(new File(TEST_DATA_DIRECTORY), new File(INPUT_DIRECTORY));
|
||||||
|
MoveHDFS proc = new TestableMoveHDFS(kerberosProperties);
|
||||||
|
TestRunner runner = TestRunners.newTestRunner(proc);
|
||||||
|
runner.setProperty(MoveHDFS.INPUT_DIRECTORY_OR_FILE, INPUT_DIRECTORY + "/randombytes-1");
|
||||||
|
runner.setProperty(MoveHDFS.OUTPUT_DIRECTORY, OUTPUT_DIRECTORY);
|
||||||
|
runner.enqueue(new byte[0]);
|
||||||
|
runner.run();
|
||||||
|
List<MockFlowFile> flowFiles = runner.getFlowFilesForRelationship(MoveHDFS.REL_SUCCESS);
|
||||||
|
runner.assertAllFlowFilesTransferred(MoveHDFS.REL_SUCCESS);
|
||||||
|
Assert.assertEquals(1, flowFiles.size());
|
||||||
|
Assert.assertFalse(new File(INPUT_DIRECTORY, "randombytes-1").exists());
|
||||||
|
Assert.assertTrue(new File(OUTPUT_DIRECTORY, "randombytes-1").exists());
|
||||||
|
}
|
||||||
|
|
||||||
public TestableMoveHDFS(KerberosProperties testKerberosProperties) {
|
@Test
|
||||||
this.testKerberosProperties = testKerberosProperties;
|
public void testDirectoryWithSubDirectoryAsInputMove() throws IOException {
|
||||||
}
|
FileUtils.copyDirectory(new File(TEST_DATA_DIRECTORY), new File(INPUT_DIRECTORY));
|
||||||
|
File subdir = new File(INPUT_DIRECTORY, "subdir");
|
||||||
|
FileUtils.copyDirectory(new File(TEST_DATA_DIRECTORY), subdir);
|
||||||
|
MoveHDFS proc = new TestableMoveHDFS(kerberosProperties);
|
||||||
|
TestRunner runner = TestRunners.newTestRunner(proc);
|
||||||
|
runner.setProperty(MoveHDFS.INPUT_DIRECTORY_OR_FILE, INPUT_DIRECTORY);
|
||||||
|
runner.setProperty(MoveHDFS.OUTPUT_DIRECTORY, OUTPUT_DIRECTORY);
|
||||||
|
runner.enqueue(new byte[0]);
|
||||||
|
runner.run();
|
||||||
|
List<MockFlowFile> flowFiles = runner.getFlowFilesForRelationship(MoveHDFS.REL_SUCCESS);
|
||||||
|
runner.assertAllFlowFilesTransferred(MoveHDFS.REL_SUCCESS);
|
||||||
|
Assert.assertEquals(7, flowFiles.size());
|
||||||
|
Assert.assertTrue(new File(INPUT_DIRECTORY).exists());
|
||||||
|
Assert.assertTrue(subdir.exists());
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Test
|
||||||
protected KerberosProperties getKerberosProperties(File kerberosConfigFile) {
|
public void testEmptyInputDirectory() throws IOException {
|
||||||
return testKerberosProperties;
|
MoveHDFS proc = new TestableMoveHDFS(kerberosProperties);
|
||||||
}
|
TestRunner runner = TestRunners.newTestRunner(proc);
|
||||||
|
Files.createDirectories(Paths.get(INPUT_DIRECTORY));
|
||||||
|
runner.setProperty(MoveHDFS.INPUT_DIRECTORY_OR_FILE, INPUT_DIRECTORY);
|
||||||
|
runner.setProperty(MoveHDFS.OUTPUT_DIRECTORY, OUTPUT_DIRECTORY);
|
||||||
|
runner.enqueue(new byte[0]);
|
||||||
|
Assert.assertEquals(0, Files.list(Paths.get(INPUT_DIRECTORY)).count());
|
||||||
|
runner.run();
|
||||||
|
List<MockFlowFile> flowFiles = runner.getFlowFilesForRelationship(MoveHDFS.REL_SUCCESS);
|
||||||
|
runner.assertAllFlowFilesTransferred(MoveHDFS.REL_SUCCESS);
|
||||||
|
Assert.assertEquals(0, flowFiles.size());
|
||||||
|
}
|
||||||
|
|
||||||
}
|
private static class TestableMoveHDFS extends MoveHDFS {
|
||||||
|
|
||||||
|
private KerberosProperties testKerberosProperties;
|
||||||
|
|
||||||
|
public TestableMoveHDFS(KerberosProperties testKerberosProperties) {
|
||||||
|
this.testKerberosProperties = testKerberosProperties;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected KerberosProperties getKerberosProperties(File kerberosConfigFile) {
|
||||||
|
return testKerberosProperties;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue