From 73c167197563d4d07a036739f9341a0f5e36b188 Mon Sep 17 00:00:00 2001 From: Mark Payne Date: Mon, 9 Nov 2015 12:09:56 -0500 Subject: [PATCH] NIFI-1132: Limited number of Lineage Identifiers held to 100 and marked the getLineageIdentifiers() method as deprecated --- .../org/apache/nifi/flowfile/FlowFile.java | 7 ++ .../provenance/ProvenanceEventRecord.java | 7 ++ .../nifi/provenance/StandardQueryResult.java | 23 +++++- .../repository/StandardFlowFileRecord.java | 24 ++++-- .../TestPersistentProvenanceRepository.java | 78 ++----------------- 5 files changed, 60 insertions(+), 79 deletions(-) diff --git a/nifi-api/src/main/java/org/apache/nifi/flowfile/FlowFile.java b/nifi-api/src/main/java/org/apache/nifi/flowfile/FlowFile.java index 0e2c19d6ea..5edb7ddfa0 100644 --- a/nifi-api/src/main/java/org/apache/nifi/flowfile/FlowFile.java +++ b/nifi-api/src/main/java/org/apache/nifi/flowfile/FlowFile.java @@ -64,7 +64,14 @@ public interface FlowFile extends Comparable { * @return a set of identifiers that are unique to this FlowFile's lineage. * If FlowFile X is derived from FlowFile Y, both FlowFiles will have the * same value for the Lineage Claim ID. + * + * @deprecated this collection was erroneously unbounded and caused a lot of OutOfMemoryError problems + * when dealing with FlowFiles with many ancestors. This Collection is + * now capped at 100 lineage identifiers. This method was introduced with the idea of providing + * future performance improvements but due to the high cost of heap consumption will not be used + * in such a manner. As a result, this method will be removed in a future release. */ + @Deprecated Set getLineageIdentifiers(); /** diff --git a/nifi-api/src/main/java/org/apache/nifi/provenance/ProvenanceEventRecord.java b/nifi-api/src/main/java/org/apache/nifi/provenance/ProvenanceEventRecord.java index dc251b3f17..fc26d93467 100644 --- a/nifi-api/src/main/java/org/apache/nifi/provenance/ProvenanceEventRecord.java +++ b/nifi-api/src/main/java/org/apache/nifi/provenance/ProvenanceEventRecord.java @@ -51,7 +51,14 @@ public interface ProvenanceEventRecord { /** * @return the set of all lineage identifiers that are associated with the * FlowFile for which this Event was created + * + * @deprecated this collection was erroneously unbounded and caused a lot of OutOfMemoryError problems + * when querying Provenance Events about FlowFiles with many ancestors. This Collection is + * now capped at 100 lineage identifiers. This method was introduced with the idea of providing + * future performance improvements but due to the high cost of heap consumption will not be used + * in such a manner. As a result, this method will be removed in a future release. */ + @Deprecated Set getLineageIdentifiers(); /** diff --git a/nifi-commons/nifi-data-provenance-utils/src/main/java/org/apache/nifi/provenance/StandardQueryResult.java b/nifi-commons/nifi-data-provenance-utils/src/main/java/org/apache/nifi/provenance/StandardQueryResult.java index 9a9a27d79a..03ab3eabab 100644 --- a/nifi-commons/nifi-data-provenance-utils/src/main/java/org/apache/nifi/provenance/StandardQueryResult.java +++ b/nifi-commons/nifi-data-provenance-utils/src/main/java/org/apache/nifi/provenance/StandardQueryResult.java @@ -18,8 +18,12 @@ package org.apache.nifi.provenance; import java.util.ArrayList; import java.util.Collection; +import java.util.Comparator; import java.util.Date; +import java.util.Iterator; import java.util.List; +import java.util.Set; +import java.util.TreeSet; import java.util.concurrent.TimeUnit; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReadWriteLock; @@ -40,7 +44,7 @@ public class StandardQueryResult implements QueryResult { private final Lock writeLock = rwLock.writeLock(); // guarded by writeLock - private final List matchingRecords = new ArrayList<>(); + private final Set matchingRecords = new TreeSet<>(new EventIdComparator()); private long totalHitCount; private int numCompletedSteps = 0; private Date expirationDate; @@ -66,8 +70,14 @@ public class StandardQueryResult implements QueryResult { } final List copy = new ArrayList<>(query.getMaxResults()); - for (int i = 0; i < query.getMaxResults(); i++) { - copy.add(matchingRecords.get(i)); + + int i = 0; + final Iterator itr = matchingRecords.iterator(); + while (itr.hasNext()) { + copy.add(itr.next()); + if (++i >= query.getMaxResults()) { + break; + } } return copy; @@ -165,4 +175,11 @@ public class StandardQueryResult implements QueryResult { private void updateExpiration() { expirationDate = new Date(System.currentTimeMillis() + TTL); } + + private static class EventIdComparator implements Comparator { + @Override + public int compare(final ProvenanceEventRecord o1, final ProvenanceEventRecord o2) { + return Long.compare(o2.getEventId(), o1.getEventId()); + } + } } diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/StandardFlowFileRecord.java b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/StandardFlowFileRecord.java index cc8c734c99..5474c7a165 100644 --- a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/StandardFlowFileRecord.java +++ b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/StandardFlowFileRecord.java @@ -25,24 +25,25 @@ import java.util.Map; import java.util.Set; import java.util.regex.Pattern; -import org.apache.nifi.controller.repository.claim.ContentClaim; -import org.apache.nifi.flowfile.FlowFile; -import org.apache.nifi.flowfile.attributes.CoreAttributes; - import org.apache.commons.lang3.builder.CompareToBuilder; import org.apache.commons.lang3.builder.EqualsBuilder; import org.apache.commons.lang3.builder.HashCodeBuilder; import org.apache.commons.lang3.builder.ToStringBuilder; import org.apache.commons.lang3.builder.ToStringStyle; +import org.apache.nifi.controller.repository.claim.ContentClaim; +import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.flowfile.attributes.CoreAttributes; /** *

- * A flow file is a logical notion of an item in a flow with its associated attributes and identity which can be used as a reference for its actual content.

+ * A flow file is a logical notion of an item in a flow with its associated attributes and identity which can be used as a reference for its actual content. + *

* * Immutable - Thread Safe * */ public final class StandardFlowFileRecord implements FlowFile, FlowFileRecord { + private static final int MAX_LINEAGE_IDENTIFIERS = 100; private final long id; private final long entryDate; @@ -182,7 +183,18 @@ public final class StandardFlowFileRecord implements FlowFile, FlowFileRecord { public Builder lineageIdentifiers(final Collection lineageIdentifiers) { if (null != lineageIdentifiers) { bLineageIdentifiers.clear(); - bLineageIdentifiers.addAll(lineageIdentifiers); + + if (lineageIdentifiers.size() > MAX_LINEAGE_IDENTIFIERS) { + int i = 0; + for (final String id : lineageIdentifiers) { + bLineageIdentifiers.add(id); + if (i++ >= MAX_LINEAGE_IDENTIFIERS) { + break; + } + } + } else { + bLineageIdentifiers.addAll(lineageIdentifiers); + } } return this; } diff --git a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/test/java/org/apache/nifi/provenance/TestPersistentProvenanceRepository.java b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/test/java/org/apache/nifi/provenance/TestPersistentProvenanceRepository.java index 687574351e..5e4aed05eb 100644 --- a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/test/java/org/apache/nifi/provenance/TestPersistentProvenanceRepository.java +++ b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/test/java/org/apache/nifi/provenance/TestPersistentProvenanceRepository.java @@ -117,16 +117,16 @@ public class TestPersistentProvenanceRepository { // Delete all of the storage files. We do this in order to clean up the tons of files that // we create but also to ensure that we have closed all of the file handles. If we leave any // streams open, for instance, this will throw an IOException, causing our unit test to fail. - for ( final File storageDir : config.getStorageDirectories() ) { + for (final File storageDir : config.getStorageDirectories()) { int i; - for (i=0; i < 3; i++) { + for (i = 0; i < 3; i++) { try { FileUtils.deleteFile(storageDir, true); break; } catch (final IOException ioe) { // if there is a virus scanner, etc. running in the background we may not be able to // delete the file. Wait a sec and try again. - if ( i == 2 ) { + if (i == 2) { throw ioe; } else { try { @@ -441,7 +441,7 @@ public class TestPersistentProvenanceRepository { repo.waitForRollover(); final Query query = new Query(UUID.randomUUID().toString()); - // query.addSearchTerm(SearchTerms.newSearchTerm(SearchableFields.FlowFileUUID, "00000000-0000-0000-0000*")); + // query.addSearchTerm(SearchTerms.newSearchTerm(SearchableFields.FlowFileUUID, "00000000-0000-0000-0000*")); query.addSearchTerm(SearchTerms.newSearchTerm(SearchableFields.Filename, "file-*")); query.addSearchTerm(SearchTerms.newSearchTerm(SearchableFields.ComponentID, "12?4")); query.addSearchTerm(SearchTerms.newSearchTerm(SearchableFields.TransitURI, "nifi://*")); @@ -464,68 +464,6 @@ public class TestPersistentProvenanceRepository { assertTrue(newRecordSet.getMatchingEvents().isEmpty()); } - @Test - public void testIndexAndCompressOnRolloverAndSubsequentSearchAsync() throws IOException, InterruptedException, ParseException { - final RepositoryConfiguration config = createConfiguration(); - config.setMaxRecordLife(3, TimeUnit.SECONDS); - config.setMaxStorageCapacity(1024L * 1024L); - config.setMaxEventFileLife(500, TimeUnit.MILLISECONDS); - config.setMaxEventFileCapacity(1024L * 1024L); - config.setSearchableFields(new ArrayList<>(SearchableFields.getStandardFields())); - - repo = new PersistentProvenanceRepository(config, DEFAULT_ROLLOVER_MILLIS); - repo.initialize(getEventReporter()); - - final String uuid = "00000000-0000-0000-0000-000000000000"; - final Map attributes = new HashMap<>(); - attributes.put("abc", "xyz"); - attributes.put("xyz", "abc"); - attributes.put("filename", "file-" + uuid); - - final ProvenanceEventBuilder builder = new StandardProvenanceEventRecord.Builder(); - builder.setEventTime(System.currentTimeMillis()); - builder.setEventType(ProvenanceEventType.RECEIVE); - builder.setTransitUri("nifi://unit-test"); - builder.fromFlowFile(createFlowFile(3L, 3000L, attributes)); - builder.setComponentId("1234"); - builder.setComponentType("dummy processor"); - - for (int i = 0; i < 10; i++) { - attributes.put("uuid", "00000000-0000-0000-0000-00000000000" + i); - builder.fromFlowFile(createFlowFile(i, 3000L, attributes)); - repo.registerEvent(builder.build()); - } - - repo.waitForRollover(); - - final Query query = new Query(UUID.randomUUID().toString()); - query.addSearchTerm(SearchTerms.newSearchTerm(SearchableFields.FlowFileUUID, "00000*")); - query.addSearchTerm(SearchTerms.newSearchTerm(SearchableFields.Filename, "file-*")); - query.addSearchTerm(SearchTerms.newSearchTerm(SearchableFields.ComponentID, "12?4")); - query.addSearchTerm(SearchTerms.newSearchTerm(SearchableFields.TransitURI, "nifi://*")); - query.setMaxResults(100); - - final QuerySubmission submission = repo.submitQuery(query); - while (!submission.getResult().isFinished()) { - Thread.sleep(100L); - } - - assertEquals(10, submission.getResult().getMatchingEvents().size()); - for (final ProvenanceEventRecord match : submission.getResult().getMatchingEvents()) { - System.out.println(match); - } - - Thread.sleep(2000L); - - config.setMaxStorageCapacity(100L); - config.setMaxRecordLife(500, TimeUnit.MILLISECONDS); - repo.purgeOldEvents(); - Thread.sleep(2000L); - - final QueryResult newRecordSet = repo.queryEvents(query); - assertTrue(newRecordSet.getMatchingEvents().isEmpty()); - } - @Test public void testIndexAndCompressOnRolloverAndSubsequentSearchMultipleStorageDirs() throws IOException, InterruptedException, ParseException { final RepositoryConfiguration config = createConfiguration(); @@ -603,7 +541,7 @@ public class TestPersistentProvenanceRepository { repo.purgeOldEvents(); - Thread.sleep(2000L); // purge is async. Give it time to do its job. + Thread.sleep(2000L); // purge is async. Give it time to do its job. query.setMaxResults(100); final QuerySubmission noResultSubmission = repo.submitQuery(query); @@ -939,7 +877,7 @@ public class TestPersistentProvenanceRepository { config.setMaxEventFileLife(500, TimeUnit.MILLISECONDS); config.setMaxEventFileCapacity(1024L * 1024L); config.setSearchableFields(new ArrayList<>(SearchableFields.getStandardFields())); - config.setDesiredIndexSize(10); // force new index to be created for each rollover + config.setDesiredIndexSize(10); // force new index to be created for each rollover repo = new PersistentProvenanceRepository(config, DEFAULT_ROLLOVER_MILLIS); repo.initialize(getEventReporter()); @@ -961,7 +899,7 @@ public class TestPersistentProvenanceRepository { for (int i = 0; i < 10; i++) { attributes.put("uuid", "00000000-0000-0000-0000-00000000000" + i); builder.fromFlowFile(createFlowFile(i, 3000L, attributes)); - builder.setEventTime(10L); // make sure the events are destroyed when we call purge + builder.setEventTime(10L); // make sure the events are destroyed when we call purge repo.registerEvent(builder.build()); } @@ -1019,7 +957,7 @@ public class TestPersistentProvenanceRepository { @Test public void testBackPressure() throws IOException, InterruptedException { final RepositoryConfiguration config = createConfiguration(); - config.setMaxEventFileCapacity(1L); // force rollover on each record. + config.setMaxEventFileCapacity(1L); // force rollover on each record. config.setJournalCount(1); final AtomicInteger journalCountRef = new AtomicInteger(0);