mirror of https://github.com/apache/nifi.git
NIFI-11924 Closing FileSystem after using in HDFSExternalResourceProvider
This closes #7588. Signed-off-by: Peter Turcsanyi <turcsanyi@apache.org>
This commit is contained in:
parent
baea7ffee9
commit
7340bb8153
|
@ -376,20 +376,7 @@ public abstract class AbstractHadoopProcessor extends AbstractProcessor implemen
|
||||||
if (resources != null) {
|
if (resources != null) {
|
||||||
// Attempt to close the FileSystem
|
// Attempt to close the FileSystem
|
||||||
final FileSystem fileSystem = resources.getFileSystem();
|
final FileSystem fileSystem = resources.getFileSystem();
|
||||||
try {
|
HDFSResourceHelper.closeFileSystem(fileSystem);
|
||||||
interruptStatisticsThread(fileSystem);
|
|
||||||
} catch (Exception e) {
|
|
||||||
getLogger().warn("Error stopping FileSystem statistics thread: " + e.getMessage());
|
|
||||||
getLogger().debug("", e);
|
|
||||||
} finally {
|
|
||||||
if (fileSystem != null) {
|
|
||||||
try {
|
|
||||||
fileSystem.close();
|
|
||||||
} catch (IOException e) {
|
|
||||||
getLogger().warn("Error close FileSystem: " + e.getMessage(), e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Clear out the reference to the resources
|
// Clear out the reference to the resources
|
||||||
|
|
|
@ -0,0 +1,72 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.nifi.processors.hadoop;
|
||||||
|
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.slf4j.Logger;
|
||||||
|
import org.slf4j.LoggerFactory;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.lang.reflect.Field;
|
||||||
|
|
||||||
|
public final class HDFSResourceHelper {
|
||||||
|
private static final Logger LOGGER = LoggerFactory.getLogger(HDFSResourceHelper.class);
|
||||||
|
|
||||||
|
private HDFSResourceHelper() {
|
||||||
|
// Not to be instantiated
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void closeFileSystem(final FileSystem fileSystem) {
|
||||||
|
try {
|
||||||
|
interruptStatisticsThread(fileSystem);
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOGGER.warn("Error stopping FileSystem statistics thread: " + e.getMessage());
|
||||||
|
LOGGER.debug("", e);
|
||||||
|
} finally {
|
||||||
|
if (fileSystem != null) {
|
||||||
|
try {
|
||||||
|
fileSystem.close();
|
||||||
|
} catch (IOException e) {
|
||||||
|
LOGGER.warn("Error close FileSystem: " + e.getMessage(), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void interruptStatisticsThread(final FileSystem fileSystem) throws NoSuchFieldException, IllegalAccessException {
|
||||||
|
final Field statsField = FileSystem.class.getDeclaredField("statistics");
|
||||||
|
statsField.setAccessible(true);
|
||||||
|
|
||||||
|
final Object statsObj = statsField.get(fileSystem);
|
||||||
|
if (statsObj instanceof FileSystem.Statistics) {
|
||||||
|
final FileSystem.Statistics statistics = (FileSystem.Statistics) statsObj;
|
||||||
|
|
||||||
|
final Field statsThreadField = statistics.getClass().getDeclaredField("STATS_DATA_CLEANER");
|
||||||
|
statsThreadField.setAccessible(true);
|
||||||
|
|
||||||
|
final Object statsThreadObj = statsThreadField.get(statistics);
|
||||||
|
if (statsThreadObj instanceof Thread) {
|
||||||
|
final Thread statsThread = (Thread) statsThreadObj;
|
||||||
|
try {
|
||||||
|
statsThread.interrupt();
|
||||||
|
} catch (Exception e) {
|
||||||
|
LOGGER.warn("Error interrupting thread: " + e.getMessage(), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -31,6 +31,7 @@ import org.apache.nifi.flow.resource.ExternalResourceProviderInitializationConte
|
||||||
import org.apache.nifi.flow.resource.ImmutableExternalResourceDescriptor;
|
import org.apache.nifi.flow.resource.ImmutableExternalResourceDescriptor;
|
||||||
import org.apache.nifi.hadoop.SecurityUtil;
|
import org.apache.nifi.hadoop.SecurityUtil;
|
||||||
import org.apache.nifi.processors.hadoop.ExtendedConfiguration;
|
import org.apache.nifi.processors.hadoop.ExtendedConfiguration;
|
||||||
|
import org.apache.nifi.processors.hadoop.HDFSResourceHelper;
|
||||||
import org.apache.nifi.processors.hadoop.HdfsResources;
|
import org.apache.nifi.processors.hadoop.HdfsResources;
|
||||||
import org.apache.nifi.security.krb.KerberosKeytabUser;
|
import org.apache.nifi.security.krb.KerberosKeytabUser;
|
||||||
import org.apache.nifi.security.krb.KerberosPasswordUser;
|
import org.apache.nifi.security.krb.KerberosPasswordUser;
|
||||||
|
@ -51,6 +52,11 @@ import java.util.List;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
// Implementation considerations: The public methods are considered as steps orchestrated by clients. As of this, there is no direct dependency
|
||||||
|
// or connection between {@code listResources} and {@code fetchExternalResource}: both are self-sufficing actions. As of this they do not share
|
||||||
|
// a {@code FileSystem} instance but every method is responsible for collecting and maintaining one. This comes with a minimal overhead but due to
|
||||||
|
// the nature of the service the method calls are relatively rare. Alternatively a provider could have a FileService instance maintained during its
|
||||||
|
// lifecycle but that is considered a more error-prone approach as it comes with logic regularly checking for the state of the maintained instance.
|
||||||
@RequiresInstanceClassLoading(cloneAncestorResources = true)
|
@RequiresInstanceClassLoading(cloneAncestorResources = true)
|
||||||
public class HDFSExternalResourceProvider implements ExternalResourceProvider {
|
public class HDFSExternalResourceProvider implements ExternalResourceProvider {
|
||||||
private static final Logger LOGGER = LoggerFactory.getLogger(HDFSExternalResourceProvider.class);
|
private static final Logger LOGGER = LoggerFactory.getLogger(HDFSExternalResourceProvider.class);
|
||||||
|
@ -104,7 +110,6 @@ public class HDFSExternalResourceProvider implements ExternalResourceProvider {
|
||||||
final HdfsResources hdfsResources = getHdfsResources();
|
final HdfsResources hdfsResources = getHdfsResources();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|
||||||
final FileStatus[] fileStatuses = hdfsResources.getUserGroupInformation()
|
final FileStatus[] fileStatuses = hdfsResources.getUserGroupInformation()
|
||||||
.doAs((PrivilegedExceptionAction<FileStatus[]>) () -> hdfsResources.getFileSystem().listStatus(sourceDirectory));
|
.doAs((PrivilegedExceptionAction<FileStatus[]>) () -> hdfsResources.getFileSystem().listStatus(sourceDirectory));
|
||||||
|
|
||||||
|
@ -122,6 +127,8 @@ public class HDFSExternalResourceProvider implements ExternalResourceProvider {
|
||||||
} catch (final InterruptedException e) {
|
} catch (final InterruptedException e) {
|
||||||
Thread.currentThread().interrupt();
|
Thread.currentThread().interrupt();
|
||||||
throw new IOException("Provider cannot list resources", e);
|
throw new IOException("Provider cannot list resources", e);
|
||||||
|
} finally {
|
||||||
|
HDFSResourceHelper.closeFileSystem(hdfsResources.getFileSystem());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -140,13 +147,18 @@ public class HDFSExternalResourceProvider implements ExternalResourceProvider {
|
||||||
final HdfsResources hdfsResources = getHdfsResources();
|
final HdfsResources hdfsResources = getHdfsResources();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
return hdfsResources.getUserGroupInformation().doAs((PrivilegedExceptionAction<FSDataInputStream>) () -> {
|
final FSDataInputStream fsDataInputStream =
|
||||||
|
hdfsResources.getUserGroupInformation().doAs((PrivilegedExceptionAction<FSDataInputStream>) () -> {
|
||||||
if (!hdfsResources.getFileSystem().exists(path)) {
|
if (!hdfsResources.getFileSystem().exists(path)) {
|
||||||
throw new IOException("Cannot find file in HDFS at location " + location);
|
throw new IOException("Cannot find file in HDFS at location " + location);
|
||||||
}
|
}
|
||||||
|
|
||||||
return hdfsResources.getFileSystem().open(path, BUFFER_SIZE_DEFAULT);
|
return hdfsResources.getFileSystem().open(path, BUFFER_SIZE_DEFAULT);
|
||||||
});
|
});
|
||||||
|
// The acquired InputStream is used by the client and for this reason the FileSystem cannot be closed here.
|
||||||
|
// The closing of the file system is delegated to the decorator (HDFSResourceInputStream) which will close
|
||||||
|
// it when the decorated input stream is closed.
|
||||||
|
return new HDFSResourceInputStream(hdfsResources.getFileSystem(), fsDataInputStream);
|
||||||
} catch (final InterruptedException e) {
|
} catch (final InterruptedException e) {
|
||||||
Thread.currentThread().interrupt();
|
Thread.currentThread().interrupt();
|
||||||
throw new IOException("Error during acquiring file", e);
|
throw new IOException("Error during acquiring file", e);
|
||||||
|
|
|
@ -0,0 +1,106 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
package org.apache.nifi.flow.resource.hadoop;
|
||||||
|
|
||||||
|
import org.apache.hadoop.fs.FSDataInputStream;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.nifi.processors.hadoop.HDFSResourceHelper;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.OutputStream;
|
||||||
|
|
||||||
|
final class HDFSResourceInputStream extends InputStream {
|
||||||
|
private final FileSystem fileSystem;
|
||||||
|
private final FSDataInputStream inputStream;
|
||||||
|
|
||||||
|
HDFSResourceInputStream(final FileSystem fileSystem, final FSDataInputStream inputStream) {
|
||||||
|
this.fileSystem = fileSystem;
|
||||||
|
this.inputStream = inputStream;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int read() throws IOException {
|
||||||
|
return inputStream.read();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int read(final byte[] b) throws IOException {
|
||||||
|
return inputStream.read(b);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int read(final byte[] b, final int off, final int len) throws IOException {
|
||||||
|
return inputStream.read(b, off, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public byte[] readAllBytes() throws IOException {
|
||||||
|
return inputStream.readAllBytes();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public byte[] readNBytes(final int len) throws IOException {
|
||||||
|
return inputStream.readNBytes(len);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int readNBytes(final byte[] b, final int off, final int len) throws IOException {
|
||||||
|
return inputStream.readNBytes(b, off, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long skip(final long n) throws IOException {
|
||||||
|
return inputStream.skip(n);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void skipNBytes(final long n) throws IOException {
|
||||||
|
inputStream.skipNBytes(n);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int available() throws IOException {
|
||||||
|
return inputStream.available();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void close() throws IOException {
|
||||||
|
inputStream.close();
|
||||||
|
HDFSResourceHelper.closeFileSystem(fileSystem);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void mark(final int readlimit) {
|
||||||
|
inputStream.mark(readlimit);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public synchronized void reset() throws IOException {
|
||||||
|
inputStream.reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean markSupported() {
|
||||||
|
return inputStream.markSupported();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public long transferTo(final OutputStream out) throws IOException {
|
||||||
|
return inputStream.transferTo(out);
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue