NIFI-450: Catch Throwable from all implementations of Runnable in the FileSystemRepository; these are expected to always be running, so if anything odd like an OutOfMemoryError occurs, this needs to be caught rather than allowing the thread to die

This commit is contained in:
Mark Payne 2015-03-23 15:19:18 -04:00
parent d6408046bc
commit 54f3476a4c

View File

@ -791,34 +791,38 @@ public class FileSystemRepository implements ContentRepository {
@Override
public void run() {
// Get all of the Destructable Claims and bin them based on their Container. We do this
// because the Container generally maps to a physical partition on the disk, so we want a few
// different threads hitting the different partitions but don't want multiple threads hitting
// the same partition.
final List<ContentClaim> toDestroy = new ArrayList<>();
while (true) {
toDestroy.clear();
contentClaimManager.drainDestructableClaims(toDestroy, 10000);
if (toDestroy.isEmpty()) {
return;
}
try {
// Get all of the Destructable Claims and bin them based on their Container. We do this
// because the Container generally maps to a physical partition on the disk, so we want a few
// different threads hitting the different partitions but don't want multiple threads hitting
// the same partition.
final List<ContentClaim> toDestroy = new ArrayList<>();
while (true) {
toDestroy.clear();
contentClaimManager.drainDestructableClaims(toDestroy, 10000);
if (toDestroy.isEmpty()) {
return;
}
for (final ContentClaim claim : toDestroy) {
final String container = claim.getContainer();
final BlockingQueue<ContentClaim> claimQueue = reclaimable.get(container);
for (final ContentClaim claim : toDestroy) {
final String container = claim.getContainer();
final BlockingQueue<ContentClaim> claimQueue = reclaimable.get(container);
try {
while (true) {
if (claimQueue.offer(claim, 10, TimeUnit.MINUTES)) {
break;
} else {
LOG.warn("Failed to clean up {} because old claims aren't being cleaned up fast enough. This Content Claim will remain in the Content Repository until NiFi is restarted, at which point it will be cleaned up", claim);
try {
while (true) {
if (claimQueue.offer(claim, 10, TimeUnit.MINUTES)) {
break;
} else {
LOG.warn("Failed to clean up {} because old claims aren't being cleaned up fast enough. This Content Claim will remain in the Content Repository until NiFi is restarted, at which point it will be cleaned up", claim);
}
}
} catch (final InterruptedException ie) {
LOG.warn("Failed to clean up {} because thread was interrupted", claim);
}
} catch (final InterruptedException ie) {
LOG.warn("Failed to clean up {} because thread was interrupted", claim);
}
}
} catch (final Throwable t) {
LOG.error("Failed to cleanup content claims due to {}", t);
}
}
}
@ -1198,23 +1202,23 @@ public class FileSystemRepository implements ContentRepository {
@Override
public void run() {
if (oldestArchiveDate.get() > (System.currentTimeMillis() - maxArchiveMillis)) {
final Long minRequiredSpace = minUsableContainerBytesForArchive.get(containerName);
if (minRequiredSpace == null) {
return;
}
try {
final long usableSpace = getContainerUsableSpace(containerName);
if (usableSpace > minRequiredSpace) {
try {
if (oldestArchiveDate.get() > (System.currentTimeMillis() - maxArchiveMillis)) {
final Long minRequiredSpace = minUsableContainerBytesForArchive.get(containerName);
if (minRequiredSpace == null) {
return;
}
} catch (final Exception e) {
LOG.error("Failed to determine space available in container {}; will attempt to cleanup archive", containerName);
}
}
try {
try {
final long usableSpace = getContainerUsableSpace(containerName);
if (usableSpace > minRequiredSpace) {
return;
}
} catch (final Exception e) {
LOG.error("Failed to determine space available in container {}; will attempt to cleanup archive", containerName);
}
}
Thread.currentThread().setName("Cleanup Archive for " + containerName);
final long oldestContainerArchive;