From 15880f9fcc78e7a040e0d64d2a517390122e3fe3 Mon Sep 17 00:00:00 2001 From: Oleg Zhurakousky Date: Mon, 16 Nov 2015 08:39:23 -0500 Subject: [PATCH] NIFI-748 addressed PR comments - made DocReader package private - polished logic in read(..) method to avoid escaping the loop - added call to sorting logic in LuceneUtil.groupDocsByStorageFileName(..) to ensure that previous behavior and assumptions in read(..) methodd are preserved - other minor polishing --- .../nifi/provenance/lucene/DocsReader.java | 21 ++++++++++--------- .../nifi/provenance/lucene/LuceneUtil.java | 15 +++++++++++-- 2 files changed, 24 insertions(+), 12 deletions(-) diff --git a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/lucene/DocsReader.java b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/lucene/DocsReader.java index 7bd800b99c..703a5b8818 100644 --- a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/lucene/DocsReader.java +++ b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/lucene/DocsReader.java @@ -22,6 +22,7 @@ import java.nio.file.Path; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; +import java.util.Iterator; import java.util.LinkedHashSet; import java.util.List; import java.util.Map; @@ -43,7 +44,7 @@ import org.apache.lucene.search.TopDocs; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class DocsReader { +class DocsReader { private final Logger logger = LoggerFactory.getLogger(DocsReader.class); public Set read(final TopDocs topDocs, final IndexReader indexReader, final Collection allProvenanceLogFiles, @@ -106,12 +107,13 @@ public class DocsReader { public Set read(final List docs, final Collection allProvenanceLogFiles, final AtomicInteger retrievalCount, final int maxResults, final int maxAttributeChars) throws IOException { + if (retrievalCount.get() >= maxResults) { + return Collections.emptySet(); + } + final long start = System.nanoTime(); Set matchingRecords = new LinkedHashSet<>(); - if (retrievalCount.get() >= maxResults) { - return matchingRecords; - } Map> byStorageNameDocGroups = LuceneUtil.groupDocsByStorageFileName(docs); @@ -123,17 +125,16 @@ public class DocsReader { if (provenanceEventFile != null) { try (RecordReader reader = RecordReaders.newRecordReader(provenanceEventFile, allProvenanceLogFiles, maxAttributeChars)) { - for (Document document : byStorageNameDocGroups.get(storageFileName)) { - ProvenanceEventRecord eRec = this.getRecord(document, reader); + + Iterator docIter = byStorageNameDocGroups.get(storageFileName).iterator(); + while (docIter.hasNext() && retrievalCount.incrementAndGet() < maxResults){ + ProvenanceEventRecord eRec = this.getRecord(docIter.next(), reader); if (eRec != null) { matchingRecords.add(eRec); eventsReadThisFile++; - - if (retrievalCount.incrementAndGet() >= maxResults) { - break; - } } } + } catch (Exception e) { logger.warn("Failed while trying to read Provenance Events. The event file '" + provenanceEventFile.getAbsolutePath() + diff --git a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/lucene/LuceneUtil.java b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/lucene/LuceneUtil.java index aa50b94700..08a99d61ea 100644 --- a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/lucene/LuceneUtil.java +++ b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/lucene/LuceneUtil.java @@ -130,8 +130,14 @@ public class LuceneUtil { return luceneQuery; } + /** + * Will sort documents by filename and then file offset so that we can + * retrieve the records efficiently + * + * @param documents + * list of {@link Document}s + */ public static void sortDocsForRetrieval(final List documents) { - // sort by filename and then file offset so that we can retrieve the records efficiently Collections.sort(documents, new Comparator() { @Override public int compare(final Document o1, final Document o2) { @@ -167,7 +173,9 @@ public class LuceneUtil { * Will group documents based on the {@link FieldNames#STORAGE_FILENAME}. * * @param documents - * list of {@link Document}s + * list of {@link Document}s which will be sorted via + * {@link #sortDocsForRetrieval(List)} for more efficient record + * retrieval. * @return a {@link Map} of document groups with * {@link FieldNames#STORAGE_FILENAME} as key and {@link List} of * {@link Document}s as value. @@ -181,6 +189,9 @@ public class LuceneUtil { } documentGroups.get(fileName).add(document); } + for (List groupedDocuments : documentGroups.values()) { + sortDocsForRetrieval(groupedDocuments); + } return documentGroups; } }