NIFI-554: Catch FileNotFoundException and handle it when creating IndexSearcher

This commit is contained in:
Mark Payne 2015-04-29 08:31:13 -04:00
parent 57e78bf723
commit 20831c87fc
2 changed files with 85 additions and 78 deletions

View File

@ -699,7 +699,7 @@ public class PersistentProvenanceRepository implements ProvenanceEventRepository
if (bytesWrittenSinceRollover.get() >= configuration.getMaxEventFileCapacity()) { if (bytesWrittenSinceRollover.get() >= configuration.getMaxEventFileCapacity()) {
try { try {
rollover(false); rollover(false);
} catch (IOException e) { } catch (final IOException e) {
logger.error("Failed to Rollover Provenance Event Repository file due to {}", e.toString()); logger.error("Failed to Rollover Provenance Event Repository file due to {}", e.toString());
logger.error("", e); logger.error("", e);
eventReporter.reportEvent(Severity.ERROR, EVENT_CATEGORY, "Failed to Rollover Provenance Event Repository file due to " + e.toString()); eventReporter.reportEvent(Severity.ERROR, EVENT_CATEGORY, "Failed to Rollover Provenance Event Repository file due to " + e.toString());
@ -1001,7 +1001,7 @@ public class PersistentProvenanceRepository implements ProvenanceEventRepository
if (fileRolledOver == null) { if (fileRolledOver == null) {
return; return;
} }
File file = fileRolledOver; final File file = fileRolledOver;
// update our map of id to Path // update our map of id to Path
// need lock to update the map, even though it's an AtomicReference, AtomicReference allows those doing a // need lock to update the map, even though it's an AtomicReference, AtomicReference allows those doing a
@ -1010,7 +1010,7 @@ public class PersistentProvenanceRepository implements ProvenanceEventRepository
writeLock.lock(); writeLock.lock();
try { try {
final Long fileFirstEventId = Long.valueOf(LuceneUtil.substringBefore(fileRolledOver.getName(), ".")); final Long fileFirstEventId = Long.valueOf(LuceneUtil.substringBefore(fileRolledOver.getName(), "."));
SortedMap<Long, Path> newIdToPathMap = new TreeMap<>(new PathMapComparator()); final SortedMap<Long, Path> newIdToPathMap = new TreeMap<>(new PathMapComparator());
newIdToPathMap.putAll(idToPathMap.get()); newIdToPathMap.putAll(idToPathMap.get());
newIdToPathMap.put(fileFirstEventId, file.toPath()); newIdToPathMap.put(fileFirstEventId, file.toPath());
idToPathMap.set(newIdToPathMap); idToPathMap.set(newIdToPathMap);
@ -1452,11 +1452,11 @@ public class PersistentProvenanceRepository implements ProvenanceEventRepository
try (final DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(indexDirectory))) { try (final DirectoryReader directoryReader = DirectoryReader.open(FSDirectory.open(indexDirectory))) {
final IndexSearcher searcher = new IndexSearcher(directoryReader); final IndexSearcher searcher = new IndexSearcher(directoryReader);
TopDocs topDocs = searcher.search(luceneQuery, 10000000); final TopDocs topDocs = searcher.search(luceneQuery, 10000000);
logger.info("For {}, Top Docs has {} hits; reading Lucene results", indexDirectory, topDocs.scoreDocs.length); logger.info("For {}, Top Docs has {} hits; reading Lucene results", indexDirectory, topDocs.scoreDocs.length);
if (topDocs.totalHits > 0) { if (topDocs.totalHits > 0) {
for (ScoreDoc scoreDoc : topDocs.scoreDocs) { for (final ScoreDoc scoreDoc : topDocs.scoreDocs) {
final int docId = scoreDoc.doc; final int docId = scoreDoc.doc;
final Document d = directoryReader.document(docId); final Document d = directoryReader.document(docId);
localScoreDocs.add(d); localScoreDocs.add(d);
@ -1649,16 +1649,16 @@ public class PersistentProvenanceRepository implements ProvenanceEventRepository
} }
switch (event.getEventType()) { switch (event.getEventType()) {
case CLONE: case CLONE:
case FORK: case FORK:
case JOIN: case JOIN:
case REPLAY: case REPLAY:
return submitLineageComputation(event.getChildUuids(), LineageComputationType.EXPAND_CHILDREN, eventId, event.getEventTime(), Long.MAX_VALUE); return submitLineageComputation(event.getChildUuids(), LineageComputationType.EXPAND_CHILDREN, eventId, event.getEventTime(), Long.MAX_VALUE);
default: default:
final AsyncLineageSubmission submission = new AsyncLineageSubmission(LineageComputationType.EXPAND_CHILDREN, eventId, Collections.<String>emptyList(), 1); final AsyncLineageSubmission submission = new AsyncLineageSubmission(LineageComputationType.EXPAND_CHILDREN, eventId, Collections.<String>emptyList(), 1);
lineageSubmissionMap.put(submission.getLineageIdentifier(), submission); lineageSubmissionMap.put(submission.getLineageIdentifier(), submission);
submission.getResult().setError("Event ID " + eventId + " indicates an event of type " + event.getEventType() + " so its children cannot be expanded"); submission.getResult().setError("Event ID " + eventId + " indicates an event of type " + event.getEventType() + " so its children cannot be expanded");
return submission; return submission;
} }
} catch (final IOException ioe) { } catch (final IOException ioe) {
final AsyncLineageSubmission submission = new AsyncLineageSubmission(LineageComputationType.EXPAND_CHILDREN, eventId, Collections.<String>emptyList(), 1); final AsyncLineageSubmission submission = new AsyncLineageSubmission(LineageComputationType.EXPAND_CHILDREN, eventId, Collections.<String>emptyList(), 1);
@ -1686,17 +1686,17 @@ public class PersistentProvenanceRepository implements ProvenanceEventRepository
} }
switch (event.getEventType()) { switch (event.getEventType()) {
case JOIN: case JOIN:
case FORK: case FORK:
case CLONE: case CLONE:
case REPLAY: case REPLAY:
return submitLineageComputation(event.getParentUuids(), LineageComputationType.EXPAND_PARENTS, eventId, 0L, event.getEventTime()); return submitLineageComputation(event.getParentUuids(), LineageComputationType.EXPAND_PARENTS, eventId, 0L, event.getEventTime());
default: { default: {
final AsyncLineageSubmission submission = new AsyncLineageSubmission(LineageComputationType.EXPAND_PARENTS, eventId, Collections.<String>emptyList(), 1); final AsyncLineageSubmission submission = new AsyncLineageSubmission(LineageComputationType.EXPAND_PARENTS, eventId, Collections.<String>emptyList(), 1);
lineageSubmissionMap.put(submission.getLineageIdentifier(), submission); lineageSubmissionMap.put(submission.getLineageIdentifier(), submission);
submission.getResult().setError("Event ID " + eventId + " indicates an event of type " + event.getEventType() + " so its parents cannot be expanded"); submission.getResult().setError("Event ID " + eventId + " indicates an event of type " + event.getEventType() + " so its parents cannot be expanded");
return submission; return submission;
} }
} }
} catch (final IOException ioe) { } catch (final IOException ioe) {
final AsyncLineageSubmission submission = new AsyncLineageSubmission(LineageComputationType.EXPAND_PARENTS, eventId, Collections.<String>emptyList(), 1); final AsyncLineageSubmission submission = new AsyncLineageSubmission(LineageComputationType.EXPAND_PARENTS, eventId, Collections.<String>emptyList(), 1);
@ -1880,7 +1880,7 @@ public class PersistentProvenanceRepository implements ProvenanceEventRepository
} }
try { try {
final Set<ProvenanceEventRecord> matchingRecords = LineageQuery.computeLineageForFlowFiles(PersistentProvenanceRepository.this, indexDir, null, flowFileUuids); final Set<ProvenanceEventRecord> matchingRecords = LineageQuery.computeLineageForFlowFiles(PersistentProvenanceRepository.this, indexManager, indexDir, null, flowFileUuids);
final StandardLineageResult result = submission.getResult(); final StandardLineageResult result = submission.getResult();
result.update(matchingRecords); result.update(matchingRecords);

View File

@ -19,26 +19,23 @@ package org.apache.nifi.provenance.lucene;
import static java.util.Objects.requireNonNull; import static java.util.Objects.requireNonNull;
import java.io.File; import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.util.Collection; import java.util.Collection;
import java.util.Collections;
import java.util.Set; import java.util.Set;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
import org.apache.nifi.provenance.PersistentProvenanceRepository;
import org.apache.nifi.provenance.ProvenanceEventRecord;
import org.apache.nifi.provenance.SearchableFields;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory; import org.apache.nifi.provenance.PersistentProvenanceRepository;
import org.apache.lucene.store.FSDirectory; import org.apache.nifi.provenance.ProvenanceEventRecord;
import org.apache.nifi.provenance.SearchableFields;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -48,7 +45,7 @@ public class LineageQuery {
public static final int MAX_LINEAGE_UUIDS = 100; public static final int MAX_LINEAGE_UUIDS = 100;
private static final Logger logger = LoggerFactory.getLogger(LineageQuery.class); private static final Logger logger = LoggerFactory.getLogger(LineageQuery.class);
public static Set<ProvenanceEventRecord> computeLineageForFlowFiles(final PersistentProvenanceRepository repo, final File indexDirectory, public static Set<ProvenanceEventRecord> computeLineageForFlowFiles(final PersistentProvenanceRepository repo, final IndexManager indexManager, final File indexDirectory,
final String lineageIdentifier, final Collection<String> flowFileUuids) throws IOException { final String lineageIdentifier, final Collection<String> flowFileUuids) throws IOException {
if (requireNonNull(flowFileUuids).size() > MAX_LINEAGE_UUIDS) { if (requireNonNull(flowFileUuids).size() > MAX_LINEAGE_UUIDS) {
throw new IllegalArgumentException(String.format("Cannot compute lineage for more than %s FlowFiles. This lineage contains %s.", MAX_LINEAGE_UUIDS, flowFileUuids.size())); throw new IllegalArgumentException(String.format("Cannot compute lineage for more than %s FlowFiles. This lineage contains %s.", MAX_LINEAGE_UUIDS, flowFileUuids.size()));
@ -58,52 +55,62 @@ public class LineageQuery {
throw new IllegalArgumentException("Must specify either Lineage Identifier or FlowFile UUIDs to compute lineage"); throw new IllegalArgumentException("Must specify either Lineage Identifier or FlowFile UUIDs to compute lineage");
} }
try (final Directory fsDir = FSDirectory.open(indexDirectory); final IndexSearcher searcher;
final IndexReader indexReader = DirectoryReader.open(fsDir)) { try {
searcher = indexManager.borrowIndexSearcher(indexDirectory);
final IndexSearcher searcher = new IndexSearcher(indexReader); try {
// Create a query for all Events related to the FlowFiles of interest. We do this by adding all ID's as
// Create a query for all Events related to the FlowFiles of interest. We do this by adding all ID's as // "SHOULD" clauses and then setting the minimum required to 1.
// "SHOULD" clauses and then setting the minimum required to 1. final BooleanQuery flowFileIdQuery;
final BooleanQuery flowFileIdQuery; if (flowFileUuids == null || flowFileUuids.isEmpty()) {
if (flowFileUuids == null || flowFileUuids.isEmpty()) { flowFileIdQuery = null;
flowFileIdQuery = null;
} else {
flowFileIdQuery = new BooleanQuery();
for (final String flowFileUuid : flowFileUuids) {
flowFileIdQuery.add(new TermQuery(new Term(SearchableFields.FlowFileUUID.getSearchableFieldName(), flowFileUuid)), Occur.SHOULD);
}
flowFileIdQuery.setMinimumNumberShouldMatch(1);
}
BooleanQuery query;
if (lineageIdentifier == null) {
query = flowFileIdQuery;
} else {
final BooleanQuery lineageIdQuery = new BooleanQuery();
lineageIdQuery.add(new TermQuery(new Term(SearchableFields.LineageIdentifier.getSearchableFieldName(), lineageIdentifier)), Occur.MUST);
if (flowFileIdQuery == null) {
query = lineageIdQuery;
} else { } else {
query = new BooleanQuery(); flowFileIdQuery = new BooleanQuery();
query.add(flowFileIdQuery, Occur.SHOULD); for (final String flowFileUuid : flowFileUuids) {
query.add(lineageIdQuery, Occur.SHOULD); flowFileIdQuery.add(new TermQuery(new Term(SearchableFields.FlowFileUUID.getSearchableFieldName(), flowFileUuid)), Occur.SHOULD);
query.setMinimumNumberShouldMatch(1); }
flowFileIdQuery.setMinimumNumberShouldMatch(1);
} }
BooleanQuery query;
if (lineageIdentifier == null) {
query = flowFileIdQuery;
} else {
final BooleanQuery lineageIdQuery = new BooleanQuery();
lineageIdQuery.add(new TermQuery(new Term(SearchableFields.LineageIdentifier.getSearchableFieldName(), lineageIdentifier)), Occur.MUST);
if (flowFileIdQuery == null) {
query = lineageIdQuery;
} else {
query = new BooleanQuery();
query.add(flowFileIdQuery, Occur.SHOULD);
query.add(lineageIdQuery, Occur.SHOULD);
query.setMinimumNumberShouldMatch(1);
}
}
final long searchStart = System.nanoTime();
final TopDocs uuidQueryTopDocs = searcher.search(query, MAX_QUERY_RESULTS);
final long searchEnd = System.nanoTime();
final DocsReader docsReader = new DocsReader(repo.getConfiguration().getStorageDirectories());
final Set<ProvenanceEventRecord> recs = docsReader.read(uuidQueryTopDocs, searcher.getIndexReader(), repo.getAllLogFiles(), new AtomicInteger(0), Integer.MAX_VALUE);
final long readDocsEnd = System.nanoTime();
logger.debug("Finished Lineage Query; Lucene search took {} millis, reading records took {} millis",
TimeUnit.NANOSECONDS.toMillis(searchEnd - searchStart), TimeUnit.NANOSECONDS.toMillis(readDocsEnd - searchEnd));
return recs;
} finally {
indexManager.returnIndexSearcher(indexDirectory, searcher);
}
} catch (final FileNotFoundException fnfe) {
// nothing has been indexed yet, or the data has already aged off
logger.warn("Attempted to search Provenance Index {} but could not find the file due to {}", indexDirectory, fnfe);
if ( logger.isDebugEnabled() ) {
logger.warn("", fnfe);
} }
final long searchStart = System.nanoTime(); return Collections.emptySet();
final TopDocs uuidQueryTopDocs = searcher.search(query, MAX_QUERY_RESULTS);
final long searchEnd = System.nanoTime();
final DocsReader docsReader = new DocsReader(repo.getConfiguration().getStorageDirectories());
final Set<ProvenanceEventRecord> recs = docsReader.read(uuidQueryTopDocs, indexReader, repo.getAllLogFiles(), new AtomicInteger(0), Integer.MAX_VALUE);
final long readDocsEnd = System.nanoTime();
logger.debug("Finished Lineage Query; Lucene search took {} millis, reading records took {} millis",
TimeUnit.NANOSECONDS.toMillis(searchEnd - searchStart), TimeUnit.NANOSECONDS.toMillis(readDocsEnd - searchEnd));
return recs;
} }
} }