mirror of https://github.com/apache/nifi.git
NIFI-2547: Add DeleteHDFS Processor
This processor adds the capability to delete files or directories inside of HDFS. Paths supports both static and expression language values, as well as glob support (e.g. /data/for/2016/07/*). This processor may be used standalone, as well as part of a downstream connection. Signed-off-by: Matt Burgess <mattyb149@apache.org> Add Glob Matcher with Tests Also set displayName on properties. Signed-off-by: Matt Burgess <mattyb149@apache.org> This closes #850
This commit is contained in:
parent
a919844461
commit
26d362b144
|
@ -0,0 +1,168 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.processors.hadoop;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.nifi.annotation.behavior.InputRequirement;
|
||||
import org.apache.nifi.annotation.behavior.TriggerWhenEmpty;
|
||||
import org.apache.nifi.annotation.documentation.CapabilityDescription;
|
||||
import org.apache.nifi.annotation.documentation.Tags;
|
||||
import org.apache.nifi.components.PropertyDescriptor;
|
||||
import org.apache.nifi.flowfile.FlowFile;
|
||||
import org.apache.nifi.processor.ProcessContext;
|
||||
import org.apache.nifi.processor.ProcessSession;
|
||||
import org.apache.nifi.processor.Relationship;
|
||||
import org.apache.nifi.processor.exception.ProcessException;
|
||||
import org.apache.nifi.processor.util.StandardValidators;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.google.common.collect.Maps;
|
||||
|
||||
@TriggerWhenEmpty
|
||||
@InputRequirement(InputRequirement.Requirement.INPUT_ALLOWED)
|
||||
@Tags({ "hadoop", "HDFS", "delete", "remove", "filesystem" })
|
||||
@CapabilityDescription("Deletes a file from HDFS. The file can be provided as an attribute from an incoming FlowFile, "
|
||||
+ "or a statically set file that is periodically removed. If this processor has an incoming connection, it"
|
||||
+ "will ignore running on a periodic basis and instead rely on incoming FlowFiles to trigger a delete. "
|
||||
+ "Optionally, you may specify use a wildcard character to match multiple files or directories.")
|
||||
public class DeleteHDFS extends AbstractHadoopProcessor {
|
||||
public static final Relationship REL_SUCCESS = new Relationship.Builder()
|
||||
.name("success")
|
||||
.description("FlowFiles will be routed here if the delete command was successful")
|
||||
.build();
|
||||
|
||||
public static final Relationship REL_FAILURE = new Relationship.Builder()
|
||||
.name("failure")
|
||||
.description("FlowFiles will be routed here if the delete command was unsuccessful")
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor FILE_OR_DIRECTORY = new PropertyDescriptor.Builder()
|
||||
.name("file_or_directory")
|
||||
.displayName("File or Directory")
|
||||
.description("The HDFS file or directory to delete. A wildcard expression may be used to only delete certain files")
|
||||
.required(true)
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.expressionLanguageSupported(true)
|
||||
.build();
|
||||
|
||||
public static final PropertyDescriptor RECURSIVE = new PropertyDescriptor.Builder()
|
||||
.name("recursive")
|
||||
.displayName("Recursive")
|
||||
.description("Remove contents of a non-empty directory recursively")
|
||||
.allowableValues("true", "false")
|
||||
.required(true)
|
||||
.defaultValue("true")
|
||||
.addValidator(StandardValidators.NON_EMPTY_VALIDATOR)
|
||||
.build();
|
||||
|
||||
protected final Pattern GLOB_PATTERN = Pattern.compile("\\[|\\]|\\*|\\?|\\^|\\{|\\}|\\\\c");
|
||||
protected final Matcher GLOB_MATCHER = GLOB_PATTERN.matcher("");
|
||||
|
||||
private static final Set<Relationship> relationships;
|
||||
|
||||
static {
|
||||
final Set<Relationship> relationshipSet = new HashSet<>();
|
||||
relationshipSet.add(REL_SUCCESS);
|
||||
relationshipSet.add(REL_FAILURE);
|
||||
relationships = Collections.unmodifiableSet(relationshipSet);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected List<PropertyDescriptor> getSupportedPropertyDescriptors() {
|
||||
List<PropertyDescriptor> props = new ArrayList<>(properties);
|
||||
props.add(FILE_OR_DIRECTORY);
|
||||
props.add(RECURSIVE);
|
||||
return props;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<Relationship> getRelationships() {
|
||||
return relationships;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onTrigger(ProcessContext context, ProcessSession session) throws ProcessException {
|
||||
String fileOrDirectoryName = null;
|
||||
FlowFile flowFile = session.get();
|
||||
|
||||
// If this processor has an incoming connection, then do not run unless a
|
||||
// FlowFile is actually sent through
|
||||
if (flowFile == null && context.hasIncomingConnection()) {
|
||||
context.yield();
|
||||
return;
|
||||
}
|
||||
|
||||
if (flowFile != null) {
|
||||
fileOrDirectoryName = context.getProperty(FILE_OR_DIRECTORY).evaluateAttributeExpressions(flowFile).getValue();
|
||||
} else {
|
||||
fileOrDirectoryName = context.getProperty(FILE_OR_DIRECTORY).evaluateAttributeExpressions().getValue();
|
||||
}
|
||||
|
||||
final FileSystem fileSystem = getFileSystem();
|
||||
try {
|
||||
// Check if the user has supplied a file or directory pattern
|
||||
List<Path> pathList = Lists.newArrayList();
|
||||
if (GLOB_MATCHER.reset(fileOrDirectoryName).find()) {
|
||||
FileStatus[] fileStatuses = fileSystem.globStatus(new Path(fileOrDirectoryName));
|
||||
if (fileStatuses != null) {
|
||||
for (FileStatus fileStatus : fileStatuses) {
|
||||
pathList.add(fileStatus.getPath());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
pathList.add(new Path(fileOrDirectoryName));
|
||||
}
|
||||
|
||||
Map<String, String> attributes = Maps.newHashMapWithExpectedSize(2);
|
||||
for (Path path : pathList) {
|
||||
attributes.put("filename", path.getName());
|
||||
attributes.put("path", path.getParent().toString());
|
||||
if (fileSystem.exists(path)) {
|
||||
fileSystem.delete(path, context.getProperty(RECURSIVE).asBoolean());
|
||||
if (!context.hasIncomingConnection()) {
|
||||
flowFile = session.create();
|
||||
}
|
||||
session.transfer(session.putAllAttributes(flowFile, attributes), REL_SUCCESS);
|
||||
} else {
|
||||
getLogger().warn("File (" + path + ") does not exist");
|
||||
if (!context.hasIncomingConnection()) {
|
||||
flowFile = session.create();
|
||||
}
|
||||
session.transfer(session.putAllAttributes(flowFile, attributes), REL_FAILURE);
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
getLogger().warn("Error processing delete for file or directory", e);
|
||||
if (flowFile != null) {
|
||||
session.rollback(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -19,3 +19,4 @@ org.apache.nifi.processors.hadoop.GetHDFSSequenceFile
|
|||
org.apache.nifi.processors.hadoop.inotify.GetHDFSEvents
|
||||
org.apache.nifi.processors.hadoop.ListHDFS
|
||||
org.apache.nifi.processors.hadoop.PutHDFS
|
||||
org.apache.nifi.processors.hadoop.DeleteHDFS
|
||||
|
|
|
@ -0,0 +1,198 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.nifi.processors.hadoop;
|
||||
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.mockito.Mockito.any;
|
||||
import static org.mockito.Mockito.mock;
|
||||
import static org.mockito.Mockito.when;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.nifi.flowfile.FlowFile;
|
||||
import org.apache.nifi.hadoop.KerberosProperties;
|
||||
import org.apache.nifi.util.MockFlowFile;
|
||||
import org.apache.nifi.util.NiFiProperties;
|
||||
import org.apache.nifi.util.TestRunner;
|
||||
import org.apache.nifi.util.TestRunners;
|
||||
import org.junit.Before;
|
||||
import org.junit.Test;
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
import com.google.common.collect.Maps;
|
||||
|
||||
public class TestDeleteHDFS {
|
||||
private NiFiProperties mockNiFiProperties;
|
||||
private FileSystem mockFileSystem;
|
||||
private KerberosProperties kerberosProperties;
|
||||
|
||||
@Before
|
||||
public void setup() throws Exception {
|
||||
mockNiFiProperties = mock(NiFiProperties.class);
|
||||
when(mockNiFiProperties.getKerberosConfigurationFile()).thenReturn(null);
|
||||
kerberosProperties = KerberosProperties.create(mockNiFiProperties);
|
||||
mockFileSystem = mock(FileSystem.class);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSuccessfulDelete() throws Exception {
|
||||
Path filePath = new Path("/some/path/to/file.txt");
|
||||
when(mockFileSystem.exists(any(Path.class))).thenReturn(true);
|
||||
DeleteHDFS deleteHDFS = new TestableDeleteHDFS(kerberosProperties, mockFileSystem);
|
||||
TestRunner runner = TestRunners.newTestRunner(deleteHDFS);
|
||||
runner.setIncomingConnection(false);
|
||||
runner.assertNotValid();
|
||||
runner.setProperty(DeleteHDFS.FILE_OR_DIRECTORY, filePath.toString());
|
||||
runner.assertValid();
|
||||
runner.run();
|
||||
runner.assertAllFlowFilesTransferred(DeleteHDFS.REL_SUCCESS);
|
||||
runner.assertTransferCount(DeleteHDFS.REL_SUCCESS, 1);
|
||||
FlowFile flowFile = runner.getFlowFilesForRelationship(DeleteHDFS.REL_SUCCESS).get(0);
|
||||
assertEquals(filePath.getName(), flowFile.getAttribute("filename"));
|
||||
assertEquals(filePath.getParent().toString(), flowFile.getAttribute("path"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDeleteFromIncomingFlowFile() throws Exception {
|
||||
Path filePath = new Path("/some/path/to/file.txt");
|
||||
when(mockFileSystem.exists(any(Path.class))).thenReturn(true);
|
||||
DeleteHDFS deleteHDFS = new TestableDeleteHDFS(kerberosProperties, mockFileSystem);
|
||||
TestRunner runner = TestRunners.newTestRunner(deleteHDFS);
|
||||
runner.setProperty(DeleteHDFS.FILE_OR_DIRECTORY, "${hdfs.file}");
|
||||
Map<String, String> attributes = Maps.newHashMap();
|
||||
attributes.put("hdfs.file", filePath.toString());
|
||||
runner.enqueue("foo", attributes);
|
||||
runner.run();
|
||||
runner.assertAllFlowFilesTransferred(DeleteHDFS.REL_SUCCESS);
|
||||
runner.assertTransferCount(DeleteHDFS.REL_SUCCESS, 1);
|
||||
FlowFile flowFile = runner.getFlowFilesForRelationship(DeleteHDFS.REL_SUCCESS).get(0);
|
||||
assertEquals(filePath.getName(), flowFile.getAttribute("filename"));
|
||||
assertEquals(filePath.getParent().toString(), flowFile.getAttribute("path"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIOException() throws Exception {
|
||||
Path filePath = new Path("/some/path/to/file.txt");
|
||||
when(mockFileSystem.exists(any(Path.class))).thenThrow(new IOException());
|
||||
DeleteHDFS deleteHDFS = new TestableDeleteHDFS(kerberosProperties, mockFileSystem);
|
||||
TestRunner runner = TestRunners.newTestRunner(deleteHDFS);
|
||||
runner.setProperty(DeleteHDFS.FILE_OR_DIRECTORY, "${hdfs.file}");
|
||||
Map<String, String> attributes = Maps.newHashMap();
|
||||
attributes.put("hdfs.file", filePath.toString());
|
||||
runner.enqueue("foo", attributes);
|
||||
runner.run();
|
||||
runner.assertQueueNotEmpty();
|
||||
runner.assertPenalizeCount(1);
|
||||
assertEquals(1, runner.getQueueSize().getObjectCount());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNoFlowFilesWithIncomingConnection() throws Exception {
|
||||
Path filePath = new Path("${hdfs.file}");
|
||||
DeleteHDFS deleteHDFS = new TestableDeleteHDFS(kerberosProperties, mockFileSystem);
|
||||
TestRunner runner = TestRunners.newTestRunner(deleteHDFS);
|
||||
runner.setProperty(DeleteHDFS.FILE_OR_DIRECTORY, filePath.toString());
|
||||
runner.setIncomingConnection(true);
|
||||
runner.run();
|
||||
runner.assertQueueEmpty();
|
||||
runner.assertTransferCount(DeleteHDFS.REL_SUCCESS, 0);
|
||||
runner.assertTransferCount(DeleteHDFS.REL_FAILURE, 0);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testUnsuccessfulDelete() throws Exception {
|
||||
Path filePath = new Path("/some/path/to/file.txt");
|
||||
when(mockFileSystem.exists(any(Path.class))).thenReturn(false);
|
||||
DeleteHDFS deleteHDFS = new TestableDeleteHDFS(kerberosProperties, mockFileSystem);
|
||||
TestRunner runner = TestRunners.newTestRunner(deleteHDFS);
|
||||
runner.setIncomingConnection(false);
|
||||
runner.assertNotValid();
|
||||
runner.setProperty(DeleteHDFS.FILE_OR_DIRECTORY, filePath.toString());
|
||||
runner.assertValid();
|
||||
runner.run();
|
||||
runner.assertAllFlowFilesTransferred(DeleteHDFS.REL_FAILURE);
|
||||
runner.assertTransferCount(DeleteHDFS.REL_FAILURE, 1);
|
||||
FlowFile flowFile = runner.getFlowFilesForRelationship(DeleteHDFS.REL_FAILURE).get(0);
|
||||
assertEquals(filePath.getName(), flowFile.getAttribute("filename"));
|
||||
assertEquals(filePath.getParent().toString(), flowFile.getAttribute("path"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGlobDelete() throws Exception {
|
||||
Path glob = new Path("/data/for/2017/08/05/*");
|
||||
int fileCount = 300;
|
||||
FileStatus[] fileStatuses = new FileStatus[fileCount];
|
||||
for (int i = 0; i < fileCount; i++) {
|
||||
Path file = new Path("/data/for/2017/08/05/file" + i);
|
||||
FileStatus fileStatus = mock(FileStatus.class);
|
||||
when(fileStatus.getPath()).thenReturn(file);
|
||||
fileStatuses[i] = fileStatus;
|
||||
}
|
||||
when(mockFileSystem.exists(any(Path.class))).thenReturn(true);
|
||||
when(mockFileSystem.globStatus(any(Path.class))).thenReturn(fileStatuses);
|
||||
DeleteHDFS deleteHDFS = new TestableDeleteHDFS(kerberosProperties, mockFileSystem);
|
||||
TestRunner runner = TestRunners.newTestRunner(deleteHDFS);
|
||||
runner.setIncomingConnection(false);
|
||||
runner.assertNotValid();
|
||||
runner.setProperty(DeleteHDFS.FILE_OR_DIRECTORY, glob.toString());
|
||||
runner.assertValid();
|
||||
runner.run();
|
||||
runner.assertAllFlowFilesTransferred(DeleteHDFS.REL_SUCCESS);
|
||||
runner.assertTransferCount(DeleteHDFS.REL_SUCCESS, fileCount);
|
||||
List<MockFlowFile> flowFiles = runner.getFlowFilesForRelationship(DeleteHDFS.REL_SUCCESS);
|
||||
for (int i = 0; i < fileCount; i++) {
|
||||
FlowFile flowFile = flowFiles.get(i);
|
||||
assertEquals("file" + i, flowFile.getAttribute("filename"));
|
||||
assertEquals("/data/for/2017/08/05", flowFile.getAttribute("path"));
|
||||
}
|
||||
}
|
||||
|
||||
private static class TestableDeleteHDFS extends DeleteHDFS {
|
||||
private KerberosProperties testKerberosProperties;
|
||||
private FileSystem mockFileSystem;
|
||||
|
||||
public TestableDeleteHDFS(KerberosProperties kerberosProperties, FileSystem mockFileSystem) {
|
||||
this.testKerberosProperties = kerberosProperties;
|
||||
this.mockFileSystem = mockFileSystem;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected KerberosProperties getKerberosProperties() {
|
||||
return testKerberosProperties;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected FileSystem getFileSystem() {
|
||||
return mockFileSystem;
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGlobMatcher() throws Exception {
|
||||
DeleteHDFS deleteHDFS = new DeleteHDFS();
|
||||
assertTrue(deleteHDFS.GLOB_MATCHER.reset("/data/for/08/09/*").find());
|
||||
assertTrue(deleteHDFS.GLOB_MATCHER.reset("/data/for/08/09/[01-04]").find());
|
||||
assertTrue(deleteHDFS.GLOB_MATCHER.reset("/data/for/0?/09/").find());
|
||||
assertFalse(deleteHDFS.GLOB_MATCHER.reset("/data/for/08/09").find());
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue