From 1be08714731f01347ac1f98e18047fe7d9ab8afd Mon Sep 17 00:00:00 2001 From: Mark Payne Date: Tue, 4 Oct 2016 09:38:14 -0400 Subject: [PATCH] NIFI-2854: Refactor repositories and swap files to use schema-based serialization so that nifi can be rolled back to a previous version after an upgrade. NIFI-2854: Incorporated PR review feedback NIFI-2854: Implemented feedback from PR Review NIFI-2854: Ensure that all resources are closed on CompressableRecordReader.close() even if an IOException is thrown when closing one of them This closes #1202 --- .../nifi/provenance/NamedSearchableField.java | 9 +- .../StandardProvenanceEventRecord.java | 38 ++ nifi-commons/nifi-schema-utils/pom.xml | 26 + .../repository/schema/ComplexRecordField.java | 95 +++ .../repository/schema/FieldMapRecord.java | 81 +++ .../nifi/repository/schema/FieldType.java | 74 +++ .../repository/schema/MapRecordField.java | 75 +++ .../nifi/repository/schema/NamedValue.java | 36 ++ .../apache/nifi/repository/schema/Record.java | 30 + .../nifi/repository/schema/RecordField.java | 30 + .../nifi/repository/schema/RecordSchema.java | 188 ++++++ .../nifi/repository/schema/Repetition.java | 22 + .../repository/schema/SchemaRecordReader.java | 191 ++++++ .../repository/schema/SchemaRecordWriter.java | 139 +++++ .../repository/schema/SimpleRecordField.java | 84 +++ .../repository/schema/UnionRecordField.java | 64 ++ .../schema/TestSchemaRecordReader.java | 281 +++++++++ .../schema/TestSchemaRecordReaderWriter.java | 178 ++++++ .../nifi/stream/io/BufferedInputStream.java | 436 +++++++++++++- .../org/apache/nifi/util/FormatUtils.java | 38 ++ .../timebuffer/CountSizeEntityAccess.java | 43 ++ .../nifi/util/timebuffer/TimedCountSize.java | 41 ++ .../org/wali/MinimalLockingWriteAheadLog.java | 118 ++-- .../src/main/java/org/wali/SerDe.java | 30 +- .../src/main/java/org/wali/SerDeFactory.java | 60 ++ .../java/org/wali/SingletonSerDeFactory.java | 46 ++ nifi-commons/pom.xml | 1 + .../repository/claim/ResourceClaim.java | 24 + .../nifi-framework-core/pom.xml | 4 + .../controller/FileSystemSwapManager.java | 416 ++----------- .../controller/StandardFlowFileQueue.java | 6 + .../repository/RepositoryRecordSerde.java | 68 +++ .../RepositoryRecordSerdeFactory.java | 95 +++ .../SchemaRepositoryRecordSerde.java | 213 +++++++ .../repository/StandardProcessSession.java | 70 +-- .../WriteAheadFlowFileRepository.java | 547 +----------------- .../WriteAheadRepositoryRecordSerde.java | 517 +++++++++++++++++ .../claim/StandardResourceClaim.java | 23 - .../claim/StandardResourceClaimManager.java | 7 +- .../io/ByteCountingInputStream.java | 101 ---- .../schema/ContentClaimFieldMap.java | 92 +++ .../repository/schema/ContentClaimSchema.java | 63 ++ .../schema/FlowFileRecordFieldMap.java | 99 ++++ .../repository/schema/FlowFileSchema.java | 67 +++ .../schema/RepositoryRecordFieldMap.java | 83 +++ .../schema/RepositoryRecordSchema.java | 93 +++ .../schema/RepositoryRecordUpdate.java | 69 +++ .../schema/ResourceClaimFieldMap.java | 85 +++ .../swap/SchemaSwapDeserializer.java | 77 +++ .../controller/swap/SchemaSwapSerializer.java | 101 ++++ .../swap/SimpleSwapDeserializer.java | 303 ++++++++++ .../controller/swap/SimpleSwapSerializer.java | 133 +++++ .../controller/swap/SwapDeserializer.java | 33 ++ .../nifi/controller/swap/SwapSchema.java | 79 +++ .../nifi/controller/swap/SwapSerializer.java | 33 ++ .../controller/swap/SwapSummaryFieldMap.java | 106 ++++ .../controller/TestFileSystemSwapManager.java | 160 +---- .../TestWriteAheadFlowFileRepository.java | 2 + .../nifi/controller/swap/MockFlowFile.java | 136 +++++ .../TestSchemaSwapSerializerDeserializer.java | 195 +++++++ .../TestSimpleSwapSerializerDeserializer.java | 139 +++++ .../pom.xml | 4 + .../nifi/provenance/AbstractRecordWriter.java | 173 ++++++ .../ByteArraySchemaRecordReader.java | 80 +++ .../ByteArraySchemaRecordWriter.java | 85 +++ .../PersistentProvenanceRepository.java | 48 +- .../nifi/provenance/StandardRecordReader.java | 263 +-------- .../nifi/provenance/StandardRecordWriter.java | 388 +++---------- .../provenance/lucene/IndexingAction.java | 21 +- .../nifi/provenance/schema/EventRecord.java | 198 +++++++ .../provenance/schema/EventRecordFields.java | 110 ++++ .../schema/ProvenanceEventSchema.java | 80 +++ .../CompressableRecordReader.java | 282 +++++++++ .../CompressableRecordWriter.java | 206 +++++++ .../serialization/EmptyRecordReader.java} | 85 +-- .../serialization/RecordReaders.java | 45 +- .../serialization/RecordWriter.java | 7 + .../serialization/RecordWriters.java | 10 +- .../AbstractTestRecordReaderWriter.java | 204 +++++++ .../nifi/provenance/LoopingInputStream.java | 94 +++ .../TestPersistentProvenanceRepository.java | 132 +---- .../TestSchemaRecordReaderWriter.java | 315 ++++++++++ .../TestStandardRecordReaderWriter.java | 216 +++---- .../nifi/provenance/toc/NopTocWriter.java | 49 ++ pom.xml | 5 + 85 files changed, 7690 insertions(+), 2103 deletions(-) create mode 100644 nifi-commons/nifi-schema-utils/pom.xml create mode 100644 nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/ComplexRecordField.java create mode 100644 nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/FieldMapRecord.java create mode 100644 nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/FieldType.java create mode 100644 nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/MapRecordField.java create mode 100644 nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/NamedValue.java create mode 100644 nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/Record.java create mode 100644 nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/RecordField.java create mode 100644 nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/RecordSchema.java create mode 100644 nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/Repetition.java create mode 100644 nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/SchemaRecordReader.java create mode 100644 nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/SchemaRecordWriter.java create mode 100644 nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/SimpleRecordField.java create mode 100644 nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/UnionRecordField.java create mode 100644 nifi-commons/nifi-schema-utils/src/test/java/org/apache/nifi/repository/schema/TestSchemaRecordReader.java create mode 100644 nifi-commons/nifi-schema-utils/src/test/java/org/apache/nifi/repository/schema/TestSchemaRecordReaderWriter.java create mode 100644 nifi-commons/nifi-utils/src/main/java/org/apache/nifi/util/timebuffer/CountSizeEntityAccess.java create mode 100644 nifi-commons/nifi-utils/src/main/java/org/apache/nifi/util/timebuffer/TimedCountSize.java create mode 100644 nifi-commons/nifi-write-ahead-log/src/main/java/org/wali/SerDeFactory.java create mode 100644 nifi-commons/nifi-write-ahead-log/src/main/java/org/wali/SingletonSerDeFactory.java create mode 100644 nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/RepositoryRecordSerde.java create mode 100644 nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/RepositoryRecordSerdeFactory.java create mode 100644 nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/SchemaRepositoryRecordSerde.java create mode 100644 nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/WriteAheadRepositoryRecordSerde.java delete mode 100644 nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/io/ByteCountingInputStream.java create mode 100644 nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/schema/ContentClaimFieldMap.java create mode 100644 nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/schema/ContentClaimSchema.java create mode 100644 nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/schema/FlowFileRecordFieldMap.java create mode 100644 nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/schema/FlowFileSchema.java create mode 100644 nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/schema/RepositoryRecordFieldMap.java create mode 100644 nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/schema/RepositoryRecordSchema.java create mode 100644 nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/schema/RepositoryRecordUpdate.java create mode 100644 nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/schema/ResourceClaimFieldMap.java create mode 100644 nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/swap/SchemaSwapDeserializer.java create mode 100644 nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/swap/SchemaSwapSerializer.java create mode 100644 nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/swap/SimpleSwapDeserializer.java create mode 100644 nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/swap/SimpleSwapSerializer.java create mode 100644 nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/swap/SwapDeserializer.java create mode 100644 nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/swap/SwapSchema.java create mode 100644 nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/swap/SwapSerializer.java create mode 100644 nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/swap/SwapSummaryFieldMap.java create mode 100644 nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/test/java/org/apache/nifi/controller/swap/MockFlowFile.java create mode 100644 nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/test/java/org/apache/nifi/controller/swap/TestSchemaSwapSerializerDeserializer.java create mode 100644 nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/test/java/org/apache/nifi/controller/swap/TestSimpleSwapSerializerDeserializer.java create mode 100644 nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/AbstractRecordWriter.java create mode 100644 nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/ByteArraySchemaRecordReader.java create mode 100644 nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/ByteArraySchemaRecordWriter.java create mode 100644 nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/schema/EventRecord.java create mode 100644 nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/schema/EventRecordFields.java create mode 100644 nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/schema/ProvenanceEventSchema.java create mode 100644 nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/serialization/CompressableRecordReader.java create mode 100644 nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/serialization/CompressableRecordWriter.java rename nifi-nar-bundles/{nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/io/ByteCountingOutputStream.java => nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/serialization/EmptyRecordReader.java} (51%) create mode 100644 nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/test/java/org/apache/nifi/provenance/AbstractTestRecordReaderWriter.java create mode 100644 nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/test/java/org/apache/nifi/provenance/LoopingInputStream.java create mode 100644 nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/test/java/org/apache/nifi/provenance/TestSchemaRecordReaderWriter.java create mode 100644 nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/test/java/org/apache/nifi/provenance/toc/NopTocWriter.java diff --git a/nifi-commons/nifi-data-provenance-utils/src/main/java/org/apache/nifi/provenance/NamedSearchableField.java b/nifi-commons/nifi-data-provenance-utils/src/main/java/org/apache/nifi/provenance/NamedSearchableField.java index dc2903f377..e879c63465 100644 --- a/nifi-commons/nifi-data-provenance-utils/src/main/java/org/apache/nifi/provenance/NamedSearchableField.java +++ b/nifi-commons/nifi-data-provenance-utils/src/main/java/org/apache/nifi/provenance/NamedSearchableField.java @@ -31,6 +31,7 @@ public class NamedSearchableField implements SearchableField { private final SearchableFieldType fieldType; private final String friendlyName; private final boolean attribute; + private final int hash; // cached for more efficient/faster use in sets and maps NamedSearchableField(final String identifier, final String searchableName, final String friendlyName, final boolean attribute) { this(identifier, searchableName, friendlyName, attribute, SearchableFieldType.STRING); @@ -42,6 +43,7 @@ public class NamedSearchableField implements SearchableField { this.friendlyName = requireNonNull(friendlyName); this.attribute = requireNonNull(attribute); this.fieldType = requireNonNull(fieldType); + this.hash = 298347 + searchableName.hashCode() + (attribute ? 1 : 0); } @Override @@ -76,11 +78,14 @@ public class NamedSearchableField implements SearchableField { @Override public int hashCode() { - return 298347 + searchableName.hashCode() + (attribute ? 1 : 0); + return hash; } @Override public boolean equals(final Object obj) { + if (obj == this) { + return true; + } if (obj == null) { return false; } @@ -90,6 +95,6 @@ public class NamedSearchableField implements SearchableField { } final SearchableField other = (SearchableField) obj; - return (this.searchableName.equals(other.getSearchableFieldName()) && attribute == other.isAttribute()); + return attribute == other.isAttribute() && this.searchableName.equals(other.getSearchableFieldName()); } } diff --git a/nifi-commons/nifi-data-provenance-utils/src/main/java/org/apache/nifi/provenance/StandardProvenanceEventRecord.java b/nifi-commons/nifi-data-provenance-utils/src/main/java/org/apache/nifi/provenance/StandardProvenanceEventRecord.java index cafdc97566..5c6614c6fa 100644 --- a/nifi-commons/nifi-data-provenance-utils/src/main/java/org/apache/nifi/provenance/StandardProvenanceEventRecord.java +++ b/nifi-commons/nifi-data-provenance-utils/src/main/java/org/apache/nifi/provenance/StandardProvenanceEventRecord.java @@ -104,6 +104,10 @@ public final class StandardProvenanceEventRecord implements ProvenanceEventRecor updatedAttributes = builder.updatedAttributes == null ? Collections.emptyMap() : Collections.unmodifiableMap(builder.updatedAttributes); sourceQueueIdentifier = builder.sourceQueueIdentifier; + + if (builder.eventId != null) { + eventId = builder.eventId; + } } public String getStorageFilename() { @@ -160,6 +164,14 @@ public final class StandardProvenanceEventRecord implements ProvenanceEventRecor return allAttrs; } + public String getAttribute(final String attributeName) { + if (updatedAttributes.containsKey(attributeName)) { + return updatedAttributes.get(attributeName); + } + + return previousAttributes.get(attributeName); + } + @Override public String getComponentId() { return componentId; @@ -417,6 +429,7 @@ public final class StandardProvenanceEventRecord implements ProvenanceEventRecor private long storageByteOffset = -1L; private long eventDuration = -1L; private String storageFilename; + private Long eventId; private String contentClaimSection; private String contentClaimContainer; @@ -478,6 +491,11 @@ public final class StandardProvenanceEventRecord implements ProvenanceEventRecor return this; } + public Builder setEventId(final long eventId) { + this.eventId = eventId; + return this; + } + @Override public ProvenanceEventBuilder copy() { final Builder copy = new Builder(); @@ -540,6 +558,16 @@ public final class StandardProvenanceEventRecord implements ProvenanceEventRecor return this; } + public Builder setPreviousAttributes(final Map previousAttributes) { + this.previousAttributes = previousAttributes; + return this; + } + + public Builder setUpdatedAttributes(final Map updatedAttributes) { + this.updatedAttributes = updatedAttributes; + return this; + } + @Override public Builder setFlowFileUUID(final String uuid) { this.uuid = uuid; @@ -649,6 +677,16 @@ public final class StandardProvenanceEventRecord implements ProvenanceEventRecor return this; } + public Builder setChildUuids(final List uuids) { + this.childrenUuids = uuids; + return this; + } + + public Builder setParentUuids(final List uuids) { + this.parentUuids = uuids; + return this; + } + @Override public Builder removeChildFlowFile(final FlowFile childFlowFile) { if (this.childrenUuids == null) { diff --git a/nifi-commons/nifi-schema-utils/pom.xml b/nifi-commons/nifi-schema-utils/pom.xml new file mode 100644 index 0000000000..ba7c306388 --- /dev/null +++ b/nifi-commons/nifi-schema-utils/pom.xml @@ -0,0 +1,26 @@ + + + 4.0.0 + + org.apache.nifi + nifi-commons + 1.1.0-SNAPSHOT + + nifi-schema-utils + + + + diff --git a/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/ComplexRecordField.java b/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/ComplexRecordField.java new file mode 100644 index 0000000000..d780147015 --- /dev/null +++ b/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/ComplexRecordField.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.repository.schema; + +import java.util.List; +import java.util.Objects; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +public class ComplexRecordField implements RecordField { + private static final FieldType fieldType = FieldType.COMPLEX; + + private final String fieldName; + private final Repetition repetition; + private final List subFields; + + public ComplexRecordField(final String fieldName, final Repetition repetition, final RecordField... subFields) { + this(fieldName, repetition, Stream.of(subFields).collect(Collectors.toList())); + } + + public ComplexRecordField(final String fieldName, final Repetition repetition, final List subFields) { + Objects.requireNonNull(fieldName); + Objects.requireNonNull(repetition); + Objects.requireNonNull(subFields); + + if (subFields.isEmpty()) { + throw new IllegalArgumentException("Cannot have a RecordField of type " + fieldType.name() + " without any sub-fields"); + } + + this.fieldName = fieldName; + this.repetition = repetition; + this.subFields = subFields; + } + + @Override + public String getFieldName() { + return fieldName; + } + + @Override + public FieldType getFieldType() { + return fieldType; + } + + @Override + public Repetition getRepetition() { + return repetition; + } + + @Override + public List getSubFields() { + return subFields; + } + + @Override + public String toString() { + return "ComplexRecordField[" + fieldName + "]"; + } + + @Override + public int hashCode() { + return 81 + fieldName.hashCode(); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (!(obj instanceof RecordField)) { + return false; + } + + final RecordField other = (RecordField) obj; + return fieldName.equals(other.getFieldName()); + } +} diff --git a/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/FieldMapRecord.java b/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/FieldMapRecord.java new file mode 100644 index 0000000000..c9368a7a74 --- /dev/null +++ b/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/FieldMapRecord.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.repository.schema; + +import java.util.HashMap; +import java.util.Map; + +public class FieldMapRecord implements Record { + private final Map values; + private final RecordSchema schema; + + public FieldMapRecord(final Map values, final RecordSchema schema) { + this.schema = schema; + this.values = convertFieldToName(values); + } + + private static Map convertFieldToName(final Map map) { + final Map nameMap = new HashMap<>(map.size()); + for (final Map.Entry entry : map.entrySet()) { + nameMap.put(entry.getKey().getFieldName(), entry.getValue()); + } + return nameMap; + } + + @Override + public Object getFieldValue(final RecordField field) { + return values.get(field.getFieldName()); + } + + @Override + public RecordSchema getSchema() { + return schema; + } + + @Override + public Object getFieldValue(final String fieldName) { + return values.get(fieldName); + } + + @Override + public String toString() { + return "FieldMapRecord[" + values + "]"; + } + + @Override + public int hashCode() { + return 33 + 41 * values.hashCode(); + } + + @Override + public boolean equals(final Object obj) { + if (obj == null) { + return false; + } + if (obj == this) { + return true; + } + + if (!(obj instanceof FieldMapRecord)) { + return false; + } + + final FieldMapRecord other = (FieldMapRecord) obj; + return values.equals(other.values); + } +} diff --git a/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/FieldType.java b/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/FieldType.java new file mode 100644 index 0000000000..b3b42589ce --- /dev/null +++ b/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/FieldType.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.repository.schema; + +import java.util.Map; + +public enum FieldType { + STRING(2, true, String.class), // 2 bytes for length of string + LONG_STRING(4, true, String.class), // 4 bytes for length of string + BOOLEAN(1, false, Boolean.class), + LONG(8, false, Long.class), + INT(4, false, Integer.class), + BYTE_ARRAY(4, true, byte[].class), // 4 bytes for number of bytes + COMPLEX(2, true, Record.class), // 2 bytes for number of sub-elements + MAP(2, true, Map.class), + UNION(4, false, NamedValue.class); + + + private final int numBytes; + private final boolean lengthEncoded; + private final Class javaClass; + + private FieldType(final int numBytes, final boolean lengthEncoded, final Class javaClass) { + this.numBytes = numBytes; + this.lengthEncoded = lengthEncoded; + this.javaClass = javaClass; + } + + + /** + * Indicates the number of bytes that must be read for this field. If this field is length-encoded + * (see {@link #isLengthEncoded()}, then this method tells us how many bytes to read in order to determine + * the full length of the field. Otherwise, these bytes tell us the full length of the field themselves. + * + * @return the number of bytes to read for this field + */ + public int getNumberOfBytes() { + return numBytes; + } + + /** + * This method returns true, then the value of {@link #getNumberOfBytes()} tells us how many bytes to read in + * order to determine the full length of the field (if the field is not complex) or the number of sub-fields to + * read in order to determine the full field (if this field is complex). If false, the value of + * {@link #getNumberOfBytes()} simply tells us how many bytes must be read in order to read the entire field. + * + * @return whether or not the field is length-encoded. + */ + public boolean isLengthEncoded() { + return lengthEncoded; + } + + /** + * @return the Java type that corresponds to this FieldType + */ + public Class getJavaClass() { + return javaClass; + } +} diff --git a/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/MapRecordField.java b/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/MapRecordField.java new file mode 100644 index 0000000000..c15d2db3f1 --- /dev/null +++ b/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/MapRecordField.java @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.repository.schema; + +import static java.util.Objects.requireNonNull; + +import java.util.ArrayList; +import java.util.List; + +public class MapRecordField implements RecordField { + private final String fieldName; + private final RecordField keyField; + private final RecordField valueField; + private final Repetition repetition; + private final List subFields; + + public MapRecordField(final String fieldName, final RecordField keyField, final RecordField valueField, final Repetition repetition) { + this.fieldName = requireNonNull(fieldName); + this.keyField = requireNonNull(keyField); + this.valueField = requireNonNull(valueField); + this.repetition = requireNonNull(repetition); + + subFields = new ArrayList<>(2); + subFields.add(keyField); + subFields.add(valueField); + } + + @Override + public String getFieldName() { + return fieldName; + } + + @Override + public FieldType getFieldType() { + return FieldType.MAP; + } + + @Override + public Repetition getRepetition() { + return repetition; + } + + @Override + public List getSubFields() { + return subFields; + } + + public RecordField getKeyField() { + return keyField; + } + + public RecordField getValueField() { + return valueField; + } + + @Override + public String toString() { + return "MapRecordField[" + fieldName + "]"; + } +} diff --git a/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/NamedValue.java b/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/NamedValue.java new file mode 100644 index 0000000000..1a87121156 --- /dev/null +++ b/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/NamedValue.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.repository.schema; + +public class NamedValue { + private final String name; + private final Object value; + + public NamedValue(final String name, final Object value) { + this.name = name; + this.value = value; + } + + public String getName() { + return name; + } + + public Object getValue() { + return value; + } +} diff --git a/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/Record.java b/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/Record.java new file mode 100644 index 0000000000..98b8a80001 --- /dev/null +++ b/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/Record.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.repository.schema; + +public interface Record { + + RecordSchema getSchema(); + + Object getFieldValue(String fieldName); + + default Object getFieldValue(RecordField field) { + return getFieldValue(field.getFieldName()); + } + +} diff --git a/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/RecordField.java b/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/RecordField.java new file mode 100644 index 0000000000..4956b44af4 --- /dev/null +++ b/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/RecordField.java @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.repository.schema; + +import java.util.List; + +public interface RecordField { + String getFieldName(); + + FieldType getFieldType(); + + Repetition getRepetition(); + + List getSubFields(); +} diff --git a/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/RecordSchema.java b/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/RecordSchema.java new file mode 100644 index 0000000000..965254bc54 --- /dev/null +++ b/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/RecordSchema.java @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.repository.schema; + +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class RecordSchema { + private static final String FIELD_NAME = "Field Name"; + private static final String FIELD_TYPE = "Field Type"; + private static final String REPETITION = "Repetition"; + private static final String SUBFIELDS = "SubFields"; + + private static final String STRING_TYPE = "String"; + private static final String INT_TYPE = "Integer"; + private static final String LONG_TYPE = "Long"; + private static final String SUBFIELD_TYPE = "SubFieldList"; + + private final List fields; + + public RecordSchema(final List fields) { + this.fields = fields; + } + + public RecordSchema(final RecordField... fields) { + this(Arrays.asList(fields)); + } + + public List getFields() { + return fields; + } + + public RecordField getField(final String fieldName) { + return fields.stream() + .filter(field -> field.getFieldName().equals(fieldName)) + .findFirst() + .orElse(null); + } + + public void writeTo(final OutputStream out) throws IOException { + try { + final DataOutputStream dos = (out instanceof DataOutputStream) ? (DataOutputStream) out : new DataOutputStream(out); + + dos.writeInt(fields.size()); + for (final RecordField field : fields) { + writeField(field, dos); + } + } catch (final IOException ioe) { + throw new IOException("Unable to write Record Schema to stream", ioe); + } + } + + private void writeField(final RecordField field, final DataOutputStream dos) throws IOException { + dos.writeInt(4); // A field is made up of 4 "elements": Field Name, Field Type, Field Repetition, Sub-Fields. + + // For each of the elements, we write a String indicating the Element Name, a String indicating the Element Type, and + // finally the Element data itself. + dos.writeUTF(FIELD_NAME); + dos.writeUTF(STRING_TYPE); + dos.writeUTF(field.getFieldName()); + + dos.writeUTF(FIELD_TYPE); + dos.writeUTF(STRING_TYPE); + dos.writeUTF(field.getFieldType().name()); + + dos.writeUTF(REPETITION); + dos.writeUTF(STRING_TYPE); + dos.writeUTF(field.getRepetition().name()); + + dos.writeUTF(SUBFIELDS); + dos.writeUTF(SUBFIELD_TYPE); + final List subFields = field.getSubFields(); + dos.writeInt(subFields.size()); // SubField is encoded as number of Sub-Fields followed by the fields themselves. + for (final RecordField subField : subFields) { + writeField(subField, dos); + } + } + + public static RecordSchema readFrom(final InputStream in) throws IOException { + try { + final DataInputStream dis = (in instanceof DataInputStream) ? (DataInputStream) in : new DataInputStream(in); + + final int numFields = dis.readInt(); + final List fields = new ArrayList<>(numFields); + + for (int i = 0; i < numFields; i++) { + final RecordField field = readField(dis); + fields.add(field); + } + + return new RecordSchema(fields); + } catch (final IOException ioe) { + throw new IOException("Unable to read Record Schema from stream", ioe); + } + } + + @SuppressWarnings("unchecked") + private static RecordField readField(final DataInputStream dis) throws IOException { + final Map schemaFieldMap = new HashMap<>(); + final int numElementsToRead = dis.readInt(); + for (int i = 0; i < numElementsToRead; i++) { + final String fieldName = dis.readUTF(); + final String typeName = dis.readUTF(); + Object fieldValue = null; + + switch (typeName) { + case STRING_TYPE: + fieldValue = dis.readUTF(); + break; + case INT_TYPE: + fieldValue = dis.readInt(); + break; + case LONG_TYPE: + fieldValue = dis.readLong(); + break; + case SUBFIELD_TYPE: { + final int numFields = dis.readInt(); + final List subFields = new ArrayList<>(numFields); + for (int j = 0; j < numFields; j++) { + subFields.add(readField(dis)); + } + fieldValue = subFields; + break; + } + default: { + throw new IOException("Cannot read schema because the schema definition contains a field named '" + + fieldName + "' with a Field Type of '" + typeName + "', which is not a known Field Type"); + } + } + + schemaFieldMap.put(fieldName, fieldValue); + } + + final String fieldName = (String) schemaFieldMap.get(FIELD_NAME); + final String fieldTypeName = (String) schemaFieldMap.get(FIELD_TYPE); + final String repetitionName = (String) schemaFieldMap.get(REPETITION); + List subFields = (List) schemaFieldMap.get(SUBFIELDS); + if (subFields == null) { + subFields = Collections.emptyList(); + } + + final Repetition repetition = Repetition.valueOf(repetitionName); + if (FieldType.COMPLEX.name().equals(fieldTypeName)) { + return new ComplexRecordField(fieldName, repetition, subFields); + } else if (FieldType.UNION.name().equals(fieldTypeName)) { + return new UnionRecordField(fieldName, repetition, subFields); + } else if (FieldType.MAP.name().equals(fieldTypeName)) { + if (subFields.size() != 2) { + throw new IOException("Found a Map that did not have a 'Key' field and a 'Value' field but instead had " + subFields.size() + " fields: " + subFields); + } + + final RecordField keyField = subFields.get(0); + final RecordField valueField = subFields.get(1); + return new MapRecordField(fieldName, keyField, valueField, repetition); + } + + return new SimpleRecordField(fieldName, FieldType.valueOf(fieldTypeName), repetition); + } + + @Override + public String toString() { + return "RecordSchema[" + fields + "]"; + } +} diff --git a/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/Repetition.java b/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/Repetition.java new file mode 100644 index 0000000000..6220bf7be8 --- /dev/null +++ b/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/Repetition.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.repository.schema; + +public enum Repetition { + ZERO_OR_ONE, EXACTLY_ONE, ZERO_OR_MORE; +} diff --git a/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/SchemaRecordReader.java b/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/SchemaRecordReader.java new file mode 100644 index 0000000000..b296b13bc2 --- /dev/null +++ b/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/SchemaRecordReader.java @@ -0,0 +1,191 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.repository.schema; + +import java.io.DataInputStream; +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + + +public class SchemaRecordReader { + private final RecordSchema schema; + + public SchemaRecordReader(final RecordSchema schema) { + this.schema = schema; + } + + public static SchemaRecordReader fromSchema(final RecordSchema schema) { + return new SchemaRecordReader(schema); + } + + private static void fillBuffer(final InputStream in, final byte[] destination) throws IOException { + int bytesRead = 0; + int len; + while (bytesRead < destination.length) { + len = in.read(destination, bytesRead, destination.length - bytesRead); + if (len < 0) { + throw new EOFException(); + } + + bytesRead += len; + } + } + + public Record readRecord(final InputStream in) throws IOException { + final int sentinelByte = in.read(); + if (sentinelByte < 0) { + return null; + } + + if (sentinelByte != 1) { + throw new IOException("Expected to read a Sentinel Byte of '1' but got a value of '" + sentinelByte + "' instead"); + } + + final List schemaFields = schema.getFields(); + final Map fields = new HashMap<>(schemaFields.size()); + + for (final RecordField field : schema.getFields()) { + final Object value = readField(in, field); + fields.put(field, value); + } + + return new FieldMapRecord(fields, schema); + } + + + private Object readField(final InputStream in, final RecordField field) throws IOException { + switch (field.getRepetition()) { + case ZERO_OR_MORE: { + // If repetition is 0+ then that means we have a list and need to read how many items are in the list. + final int iterations = readInt(in); + if (iterations == 0) { + return Collections.emptyList(); + } + + final List value = new ArrayList<>(iterations); + for (int i = 0; i < iterations; i++) { + value.add(readFieldValue(in, field.getFieldType(), field.getFieldName(), field.getSubFields())); + } + + return value; + } + case ZERO_OR_ONE: { + // If repetition is 0 or 1 (optional), then check if next byte is a 0, which means field is absent or 1, which means + // field is present. Otherwise, throw an Exception. + final int nextByte = in.read(); + if (nextByte == -1) { + throw new EOFException("Unexpected End-of-File when attempting to read Repetition value for field '" + field.getFieldName() + "'"); + } + if (nextByte == 0) { + return null; + } + if (nextByte != 1) { + throw new IOException("Invalid Boolean value found when reading 'Repetition' of field '" + field.getFieldName() + "'. Expected 0 or 1 but got " + (nextByte & 0xFF)); + } + } + } + + return readFieldValue(in, field.getFieldType(), field.getFieldName(), field.getSubFields()); + } + + + private Object readFieldValue(final InputStream in, final FieldType fieldType, final String fieldName, final List subFields) throws IOException { + switch (fieldType) { + case BOOLEAN: { + final DataInputStream dis = new DataInputStream(in); + return dis.readBoolean(); + } + case INT: { + return readInt(in); + } + case LONG: { + final DataInputStream dis = new DataInputStream(in); + return dis.readLong(); + } + case STRING: { + final DataInputStream dis = new DataInputStream(in); + return dis.readUTF(); + } + case LONG_STRING: { + final int length = readInt(in); + final byte[] buffer = new byte[length]; + fillBuffer(in, buffer); + return new String(buffer, StandardCharsets.UTF_8); + } + case BYTE_ARRAY: { + final int length = readInt(in); + final byte[] buffer = new byte[length]; + fillBuffer(in, buffer); + return buffer; + } + case MAP: { + final int numEntries = readInt(in); + final RecordField keyField = subFields.get(0); + final RecordField valueField = subFields.get(1); + + final Map entries = new HashMap<>(numEntries); + for (int i = 0; i < numEntries; i++) { + final Object key = readField(in, keyField); + final Object value = readField(in, valueField); + entries.put(key, value); + } + + return entries; + } + case COMPLEX: { + final int numSubFields = subFields.size(); + final Map subFieldValues = new HashMap<>(numSubFields); + for (int i = 0; i < numSubFields; i++) { + final Object subFieldValue = readField(in, subFields.get(i)); + subFieldValues.put(subFields.get(i), subFieldValue); + } + + return new FieldMapRecord(subFieldValues, new RecordSchema(subFields)); + } + case UNION: { + final DataInputStream dis = new DataInputStream(in); + final String childFieldType = dis.readUTF(); + final Optional fieldOption = subFields.stream().filter(field -> field.getFieldName().equals(childFieldType)).findFirst(); + if (!fieldOption.isPresent()) { + throw new IOException("Found a field of type '" + childFieldType + "' but that was not in the expected list of types"); + } + + final RecordField matchingField = fieldOption.get(); + return readField(in, matchingField); + } + default: { + throw new IOException("Unrecognized Field Type " + fieldType + " for field '" + fieldName + "'"); + } + } + } + + private int readInt(final InputStream in) throws IOException { + final byte[] buffer = new byte[4]; + fillBuffer(in, buffer); + return ByteBuffer.wrap(buffer).getInt(); + } +} diff --git a/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/SchemaRecordWriter.java b/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/SchemaRecordWriter.java new file mode 100644 index 0000000000..469388962f --- /dev/null +++ b/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/SchemaRecordWriter.java @@ -0,0 +1,139 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.repository.schema; + +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.charset.StandardCharsets; +import java.util.Collection; +import java.util.List; +import java.util.Map; + +public class SchemaRecordWriter { + + public void writeRecord(final Record record, final OutputStream out) throws IOException { + // write sentinel value to indicate that there is a record. This allows the reader to then read one + // byte and check if -1. If so, the reader knows there are no more records. If not, then the reader + // knows that it should be able to continue reading. + out.write(1); + writeRecordFields(record, out); + } + + private void writeRecordFields(final Record record, final OutputStream out) throws IOException { + final DataOutputStream dos = out instanceof DataOutputStream ? (DataOutputStream) out : new DataOutputStream(out); + for (final RecordField field : record.getSchema().getFields()) { + final Object value = record.getFieldValue(field); + + try { + writeFieldRepetitionAndValue(field, value, dos); + } catch (final Exception e) { + throw new IOException("Failed to write field '" + field.getFieldName() + "'", e); + } + } + } + + private void writeFieldRepetitionAndValue(final RecordField field, final Object value, final DataOutputStream dos) throws IOException { + switch (field.getRepetition()) { + case EXACTLY_ONE: { + if (value == null) { + throw new IllegalArgumentException("Record does not have a value for the '" + field.getFieldName() + "' but the field is required"); + } + writeFieldValue(field, value, dos); + break; + } + case ZERO_OR_MORE: { + if (value == null) { + dos.writeInt(0); + break; + } + + if (!(value instanceof Collection)) { + throw new IllegalArgumentException("Record contains a value of type '" + value.getClass() + + "' for the '" + field.getFieldName() + "' but expected a Collection because the Repetition for the field is " + field.getRepetition()); + } + + final Collection collection = (Collection) value; + dos.writeInt(collection.size()); + for (final Object fieldValue : collection) { + writeFieldValue(field, fieldValue, dos); + } + break; + } + case ZERO_OR_ONE: { + if (value == null) { + dos.write(0); + break; + } + dos.write(1); + writeFieldValue(field, value, dos); + break; + } + } + } + + @SuppressWarnings("unchecked") + private void writeFieldValue(final RecordField field, final Object value, final DataOutputStream out) throws IOException { + switch (field.getFieldType()) { + case BOOLEAN: + out.writeBoolean((boolean) value); + break; + case BYTE_ARRAY: + final byte[] array = (byte[]) value; + out.writeInt(array.length); + out.write(array); + break; + case INT: + out.writeInt((Integer) value); + break; + case LONG: + out.writeLong((Long) value); + break; + case STRING: + out.writeUTF((String) value); + break; + case LONG_STRING: + final byte[] charArray = ((String) value).getBytes(StandardCharsets.UTF_8); + out.writeInt(charArray.length); + out.write(charArray); + break; + case MAP: + final Map map = (Map) value; + out.writeInt(map.size()); + final List subFields = field.getSubFields(); + final RecordField keyField = subFields.get(0); + final RecordField valueField = subFields.get(1); + + for (final Map.Entry entry : map.entrySet()) { + writeFieldRepetitionAndValue(keyField, entry.getKey(), out); + writeFieldRepetitionAndValue(valueField, entry.getValue(), out); + } + break; + case UNION: + final NamedValue namedValue = (NamedValue) value; + out.writeUTF(namedValue.getName()); + final Record childRecord = (Record) namedValue.getValue(); + writeRecordFields(childRecord, out); + break; + case COMPLEX: + final Record record = (Record) value; + writeRecordFields(record, out); + break; + } + } +} diff --git a/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/SimpleRecordField.java b/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/SimpleRecordField.java new file mode 100644 index 0000000000..d756cdd717 --- /dev/null +++ b/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/SimpleRecordField.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.repository.schema; + +import java.util.Collections; +import java.util.List; +import java.util.Objects; + +public class SimpleRecordField implements RecordField { + private final String fieldName; + private final FieldType fieldType; + private final Repetition repetition; + + public SimpleRecordField(final String fieldName, final FieldType fieldType, final Repetition repetition) { + Objects.requireNonNull(fieldName); + Objects.requireNonNull(fieldType); + Objects.requireNonNull(repetition); + + this.fieldName = fieldName; + this.fieldType = fieldType; + this.repetition = repetition; + } + + @Override + public String getFieldName() { + return fieldName; + } + + @Override + public FieldType getFieldType() { + return fieldType; + } + + @Override + public Repetition getRepetition() { + return repetition; + } + + @Override + public List getSubFields() { + return Collections.emptyList(); + } + + @Override + public int hashCode() { + return 31 + fieldName.hashCode(); + } + + @Override + public boolean equals(final Object obj) { + if (this == obj) { + return true; + } + if (obj == null) { + return false; + } + if (!(obj instanceof RecordField)) { + return false; + } + + final RecordField other = (RecordField) obj; + return fieldName.equals(other.getFieldName()); + } + + @Override + public String toString() { + return "SimpleRecordField[fieldName=" + fieldName + ", type=" + fieldType.name() + "]"; + } +} diff --git a/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/UnionRecordField.java b/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/UnionRecordField.java new file mode 100644 index 0000000000..1b5d03fc84 --- /dev/null +++ b/nifi-commons/nifi-schema-utils/src/main/java/org/apache/nifi/repository/schema/UnionRecordField.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.repository.schema; + +import static java.util.Objects.requireNonNull; + +import java.util.Arrays; +import java.util.List; + +public class UnionRecordField implements RecordField { + private final String fieldName; + private final Repetition repetition; + private final List possibilities; + + public UnionRecordField(final String fieldName, final Repetition repetition, final RecordField... possibilities) { + this(fieldName, repetition, Arrays.asList(possibilities)); + } + + public UnionRecordField(final String fieldName, final Repetition repetition, final List possibilities) { + this.fieldName = requireNonNull(fieldName); + this.repetition = requireNonNull(repetition); + this.possibilities = requireNonNull(possibilities); + } + + @Override + public String getFieldName() { + return fieldName; + } + + @Override + public FieldType getFieldType() { + return FieldType.UNION; + } + + @Override + public Repetition getRepetition() { + return repetition; + } + + @Override + public List getSubFields() { + return possibilities; + } + + @Override + public String toString() { + return "UnionRecordField[name=" + fieldName + ", possible types=" + possibilities + "]"; + } +} diff --git a/nifi-commons/nifi-schema-utils/src/test/java/org/apache/nifi/repository/schema/TestSchemaRecordReader.java b/nifi-commons/nifi-schema-utils/src/test/java/org/apache/nifi/repository/schema/TestSchemaRecordReader.java new file mode 100644 index 0000000000..6099814c1a --- /dev/null +++ b/nifi-commons/nifi-schema-utils/src/test/java/org/apache/nifi/repository/schema/TestSchemaRecordReader.java @@ -0,0 +1,281 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.repository.schema; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.junit.Test; + +public class TestSchemaRecordReader { + + @Test + public void testReadExactlyOnceFields() throws IOException { + final List fields = new ArrayList<>(); + fields.add(new SimpleRecordField("int", FieldType.INT, Repetition.EXACTLY_ONE)); + fields.add(new SimpleRecordField("boolean", FieldType.BOOLEAN, Repetition.EXACTLY_ONE)); + fields.add(new SimpleRecordField("byte array", FieldType.BYTE_ARRAY, Repetition.EXACTLY_ONE)); + fields.add(new SimpleRecordField("long", FieldType.LONG, Repetition.EXACTLY_ONE)); + fields.add(new SimpleRecordField("string", FieldType.STRING, Repetition.EXACTLY_ONE)); + fields.add(new SimpleRecordField("long string", FieldType.LONG_STRING, Repetition.EXACTLY_ONE)); + fields.add(new ComplexRecordField("complex", Repetition.EXACTLY_ONE, + new SimpleRecordField("key", FieldType.STRING, Repetition.EXACTLY_ONE), + new SimpleRecordField("value", FieldType.STRING, Repetition.EXACTLY_ONE))); + fields.add(new MapRecordField("map", + new SimpleRecordField("key", FieldType.STRING, Repetition.EXACTLY_ONE), + new SimpleRecordField("value", FieldType.STRING, Repetition.ZERO_OR_ONE), Repetition.EXACTLY_ONE)); + fields.add(new UnionRecordField("union1", Repetition.EXACTLY_ONE, Arrays.asList(new RecordField[] { + new SimpleRecordField("one", FieldType.STRING, Repetition.EXACTLY_ONE), + new SimpleRecordField("two", FieldType.INT, Repetition.EXACTLY_ONE) + }))); + fields.add(new UnionRecordField("union2", Repetition.EXACTLY_ONE, Arrays.asList(new RecordField[] { + new SimpleRecordField("one", FieldType.STRING, Repetition.EXACTLY_ONE), + new SimpleRecordField("two", FieldType.INT, Repetition.EXACTLY_ONE) + }))); + final RecordSchema schema = new RecordSchema(fields); + + final SchemaRecordReader reader = SchemaRecordReader.fromSchema(schema); + + final byte[] buffer; + try (final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final DataOutputStream dos = new DataOutputStream(baos)) { + + dos.write(1); // sentinel byte + dos.writeInt(42); + dos.writeBoolean(true); + final byte[] array = "hello".getBytes(); + dos.writeInt(array.length); + dos.write(array); + + dos.writeLong(42L); + dos.writeUTF("hello"); + + final String longString = "hello"; + final byte[] longStringArray = longString.getBytes(StandardCharsets.UTF_8); + dos.writeInt(longStringArray.length); + dos.write(longStringArray); + + dos.writeUTF("key"); + dos.writeUTF("value"); + + dos.writeInt(2); + dos.writeUTF("key1"); + dos.writeBoolean(true); + dos.writeUTF("value1"); + dos.writeUTF("key2"); + dos.writeBoolean(false); + + dos.writeUTF("one"); + dos.writeUTF("hello"); + + dos.writeUTF("two"); + dos.writeInt(42); + + buffer = baos.toByteArray(); + } + + try (final ByteArrayInputStream in = new ByteArrayInputStream(buffer)) { + final Record record = reader.readRecord(in); + assertNotNull(record); + + assertEquals(42, record.getFieldValue("int")); + assertTrue((boolean) record.getFieldValue("boolean")); + assertTrue(Arrays.equals("hello".getBytes(), (byte[]) record.getFieldValue("byte array"))); + assertEquals(42L, record.getFieldValue("long")); + assertEquals("hello", record.getFieldValue("string")); + assertEquals("hello", record.getFieldValue("long string")); + + final Record complexRecord = (Record) record.getFieldValue("complex"); + assertEquals("key", complexRecord.getFieldValue(new SimpleRecordField("key", FieldType.STRING, Repetition.EXACTLY_ONE))); + assertEquals("value", complexRecord.getFieldValue(new SimpleRecordField("value", FieldType.STRING, Repetition.EXACTLY_ONE))); + + final Map map = new HashMap<>(); + map.put("key1", "value1"); + map.put("key2", null); + assertEquals(map, record.getFieldValue("map")); + + assertEquals("hello", record.getFieldValue("union1")); + assertEquals(42, record.getFieldValue("union2")); + } + } + + @Test + @SuppressWarnings("unchecked") + public void testReadZeroOrOneFields() throws IOException { + final List fields = new ArrayList<>(); + fields.add(new SimpleRecordField("int", FieldType.INT, Repetition.ZERO_OR_ONE)); + fields.add(new SimpleRecordField("int present", FieldType.INT, Repetition.ZERO_OR_ONE)); + fields.add(new SimpleRecordField("boolean", FieldType.BOOLEAN, Repetition.ZERO_OR_ONE)); + fields.add(new SimpleRecordField("boolean present", FieldType.BOOLEAN, Repetition.ZERO_OR_ONE)); + fields.add(new SimpleRecordField("byte array", FieldType.BYTE_ARRAY, Repetition.ZERO_OR_ONE)); + fields.add(new SimpleRecordField("byte array present", FieldType.BYTE_ARRAY, Repetition.ZERO_OR_ONE)); + fields.add(new SimpleRecordField("long", FieldType.LONG, Repetition.ZERO_OR_ONE)); + fields.add(new SimpleRecordField("long present", FieldType.LONG, Repetition.ZERO_OR_ONE)); + fields.add(new SimpleRecordField("string", FieldType.STRING, Repetition.ZERO_OR_ONE)); + fields.add(new SimpleRecordField("string present", FieldType.STRING, Repetition.ZERO_OR_ONE)); + fields.add(new SimpleRecordField("long string", FieldType.LONG_STRING, Repetition.ZERO_OR_ONE)); + fields.add(new SimpleRecordField("long string present", FieldType.LONG_STRING, Repetition.ZERO_OR_ONE)); + fields.add(new ComplexRecordField("complex", Repetition.ZERO_OR_ONE, + new SimpleRecordField("key", FieldType.STRING, Repetition.ZERO_OR_ONE), + new SimpleRecordField("value", FieldType.STRING, Repetition.ZERO_OR_ONE))); + fields.add(new ComplexRecordField("complex present", Repetition.ZERO_OR_ONE, + new SimpleRecordField("key", FieldType.STRING, Repetition.ZERO_OR_ONE), + new SimpleRecordField("value", FieldType.STRING, Repetition.ZERO_OR_ONE))); + fields.add(new MapRecordField("map", + new SimpleRecordField("key", FieldType.STRING, Repetition.ZERO_OR_ONE), + new SimpleRecordField("value", FieldType.STRING, Repetition.ZERO_OR_MORE), Repetition.ZERO_OR_ONE)); + fields.add(new MapRecordField("map present", + new SimpleRecordField("key", FieldType.STRING, Repetition.ZERO_OR_ONE), + new SimpleRecordField("value", FieldType.STRING, Repetition.ZERO_OR_MORE), Repetition.ZERO_OR_ONE)); + fields.add(new UnionRecordField("union", Repetition.ZERO_OR_ONE, Arrays.asList(new RecordField[] { + new SimpleRecordField("one", FieldType.STRING, Repetition.EXACTLY_ONE), + new SimpleRecordField("two", FieldType.INT, Repetition.EXACTLY_ONE) + }))); + fields.add(new UnionRecordField("union present", Repetition.ZERO_OR_ONE, Arrays.asList(new RecordField[] { + new SimpleRecordField("one", FieldType.STRING, Repetition.EXACTLY_ONE), + new SimpleRecordField("two", FieldType.INT, Repetition.ZERO_OR_MORE) + }))); + + final RecordSchema schema = new RecordSchema(fields); + + final SchemaRecordReader reader = SchemaRecordReader.fromSchema(schema); + + // for each field, make the first one missing and the second one present. + final byte[] buffer; + try (final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + final DataOutputStream dos = new DataOutputStream(baos)) { + + dos.write(1); // sentinel byte + dos.write(0); + dos.writeByte(1); + dos.writeInt(42); + + dos.write(0); + dos.writeByte(1); + dos.writeBoolean(true); + + final byte[] array = "hello".getBytes(); + dos.write(0); + dos.writeByte(1); + dos.writeInt(array.length); + dos.write(array); + + dos.write(0); + dos.writeByte(1); + dos.writeLong(42L); + + dos.write(0); + dos.writeByte(1); + dos.writeUTF("hello"); + + final String longString = "hello"; + final byte[] longStringArray = longString.getBytes(StandardCharsets.UTF_8); + dos.write(0); + dos.writeByte(1); + dos.writeInt(longStringArray.length); + dos.write(longStringArray); + + dos.write(0); + dos.writeByte(1); + dos.writeByte(1); + dos.writeUTF("key"); + dos.writeByte(0); + + dos.writeBoolean(false); // map not present + dos.writeBoolean(true); // map present + dos.writeInt(2); // 2 entries in the map + dos.writeBoolean(true); // key present + dos.writeUTF("key1"); + dos.writeInt(2); // 2 values + dos.writeUTF("one"); + dos.writeUTF("two"); + dos.writeBoolean(false); // key not present + dos.writeInt(1); + dos.writeUTF("three"); + + dos.writeBoolean(false); + dos.writeBoolean(true); + dos.writeUTF("two"); + dos.writeInt(3); // 3 entries + dos.writeInt(1); + dos.writeInt(2); + dos.writeInt(3); + + buffer = baos.toByteArray(); + } + + try (final ByteArrayInputStream in = new ByteArrayInputStream(buffer)) { + final Record record = reader.readRecord(in); + assertNotNull(record); + + // Read everything into a map and make sure that no value is missing that has a name ending in " present" + final Map valueMap = new HashMap<>(); + for (final RecordField field : record.getSchema().getFields()) { + final Object value = record.getFieldValue(field); + if (value == null) { + assertFalse(field.getFieldName().endsWith(" present")); + continue; + } + + valueMap.put(field.getFieldName(), value); + } + + assertEquals(42, valueMap.get("int present")); + assertTrue((boolean) valueMap.get("boolean present")); + assertTrue(Arrays.equals("hello".getBytes(), (byte[]) valueMap.get("byte array present"))); + assertEquals(42L, valueMap.get("long present")); + assertEquals("hello", valueMap.get("string present")); + assertEquals("hello", valueMap.get("long string present")); + + final Record complexRecord = (Record) valueMap.get("complex present"); + assertEquals("key", complexRecord.getFieldValue(new SimpleRecordField("key", FieldType.STRING, Repetition.EXACTLY_ONE))); + assertNull(complexRecord.getFieldValue(new SimpleRecordField("value", FieldType.STRING, Repetition.EXACTLY_ONE))); + + final Map> map = (Map>) valueMap.get("map present"); + assertNotNull(map); + assertEquals(2, map.size()); + assertTrue(map.containsKey(null)); + assertTrue(map.containsKey("key1")); + + final List key1Values = Arrays.asList(new String[] {"one", "two"}); + assertEquals(key1Values, map.get("key1")); + final List nullKeyValues = Arrays.asList(new String[] {"three"}); + assertEquals(nullKeyValues, map.get(null)); + + final List unionValues = (List) valueMap.get("union present"); + assertEquals(3, unionValues.size()); + assertEquals(1, unionValues.get(0).intValue()); + assertEquals(2, unionValues.get(1).intValue()); + assertEquals(3, unionValues.get(2).intValue()); + } + } +} diff --git a/nifi-commons/nifi-schema-utils/src/test/java/org/apache/nifi/repository/schema/TestSchemaRecordReaderWriter.java b/nifi-commons/nifi-schema-utils/src/test/java/org/apache/nifi/repository/schema/TestSchemaRecordReaderWriter.java new file mode 100644 index 0000000000..18548fb2be --- /dev/null +++ b/nifi-commons/nifi-schema-utils/src/test/java/org/apache/nifi/repository/schema/TestSchemaRecordReaderWriter.java @@ -0,0 +1,178 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.repository.schema; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import org.junit.Test; + +public class TestSchemaRecordReaderWriter { + + @Test + @SuppressWarnings("unchecked") + public void testRoundTrip() throws IOException { + // Create a 'complex' record that contains two different types of fields - a string and an int. + final List complexFieldList1 = new ArrayList<>(); + complexFieldList1.add(createField("string field", FieldType.STRING)); + complexFieldList1.add(createField("int field", FieldType.INT)); + final ComplexRecordField complexField1 = new ComplexRecordField("complex1", Repetition.EXACTLY_ONE, complexFieldList1); + final Map complexMap1 = new LinkedHashMap<>(); + final RecordField stringField = createField("string field", FieldType.STRING); + final RecordField intField = createField("int field", FieldType.INT); + complexMap1.put(stringField, "apples"); + complexMap1.put(intField, 100); + final FieldMapRecord complexRecord1 = new FieldMapRecord(complexMap1, new RecordSchema(stringField, intField)); + + // Create another 'complex' record that contains two other types of fields - a long string and a long. + final List complexFieldList2 = new ArrayList<>(); + complexFieldList2.add(createField("long string field", FieldType.LONG_STRING)); + complexFieldList2.add(createField("long field", FieldType.LONG)); + final ComplexRecordField complexField2 = new ComplexRecordField("complex2", Repetition.EXACTLY_ONE, complexFieldList2); + final Map complexMap2 = new LinkedHashMap<>(); + final RecordField longStringField = createField("long string field", FieldType.LONG_STRING); + final RecordField longField = createField("long field", FieldType.LONG); + complexMap2.put(longStringField, "oranges"); + complexMap2.put(longField, Long.MAX_VALUE); + final FieldMapRecord complexRecord2 = new FieldMapRecord(complexMap2, new RecordSchema(longStringField, longField)); + + // Create a Union Field that indicates that the type could be either 'complex 1' or 'complex 2' + final UnionRecordField unionRecordField = new UnionRecordField("union", Repetition.ZERO_OR_MORE, Arrays.asList(new RecordField[] {complexField1, complexField2})); + + // Create a Record Schema + final List fields = new ArrayList<>(); + fields.add(new SimpleRecordField("int", FieldType.INT, Repetition.ZERO_OR_ONE)); + fields.add(new SimpleRecordField("int present", FieldType.INT, Repetition.ZERO_OR_ONE)); + fields.add(new SimpleRecordField("boolean", FieldType.BOOLEAN, Repetition.ZERO_OR_ONE)); + fields.add(new SimpleRecordField("boolean present", FieldType.BOOLEAN, Repetition.ZERO_OR_ONE)); + fields.add(new SimpleRecordField("byte array", FieldType.BYTE_ARRAY, Repetition.ZERO_OR_ONE)); + fields.add(new SimpleRecordField("byte array present", FieldType.BYTE_ARRAY, Repetition.ZERO_OR_ONE)); + fields.add(new SimpleRecordField("long", FieldType.LONG, Repetition.ZERO_OR_ONE)); + fields.add(new SimpleRecordField("long present", FieldType.LONG, Repetition.ZERO_OR_ONE)); + fields.add(new SimpleRecordField("string", FieldType.STRING, Repetition.ZERO_OR_ONE)); + fields.add(new SimpleRecordField("string present", FieldType.STRING, Repetition.ZERO_OR_ONE)); + fields.add(new SimpleRecordField("long string", FieldType.LONG_STRING, Repetition.ZERO_OR_ONE)); + fields.add(new SimpleRecordField("long string present", FieldType.LONG_STRING, Repetition.ZERO_OR_ONE)); + fields.add(new ComplexRecordField("complex present", Repetition.EXACTLY_ONE, + new SimpleRecordField("color", FieldType.STRING, Repetition.ZERO_OR_ONE), + new SimpleRecordField("fruit", FieldType.STRING, Repetition.ZERO_OR_ONE))); + fields.add(new MapRecordField("map present", + new SimpleRecordField("key", FieldType.STRING, Repetition.EXACTLY_ONE), + new SimpleRecordField("value", FieldType.INT, Repetition.EXACTLY_ONE), Repetition.ZERO_OR_ONE)); + fields.add(unionRecordField); + + final RecordSchema schema = new RecordSchema(fields); + + // Create a 'complex' record that contains two different elements. + final RecordField colorField = createField("color", FieldType.STRING); + final RecordField fruitField = createField("fruit", FieldType.STRING); + final Map complexFieldMap = new LinkedHashMap<>(); + complexFieldMap.put(colorField, "red"); + complexFieldMap.put(fruitField, "apple"); + + // Create a simple map that can be used for a Map Field + final Map simpleMap = new HashMap<>(); + simpleMap.put("apples", 100); + + // Create a Map of record fields to values, so that we can create a Record to write out + final Map values = new LinkedHashMap<>(); + values.put(createField("int", FieldType.INT), 42); + values.put(createField("int present", FieldType.INT), 42); + values.put(createField("boolean present", FieldType.BOOLEAN), true); + values.put(createField("byte array present", FieldType.BYTE_ARRAY), "Hello".getBytes()); + values.put(createField("long present", FieldType.LONG), 42L); + values.put(createField("string present", FieldType.STRING), "Hello"); + values.put(createField("long string present", FieldType.LONG_STRING), "Long Hello"); + values.put(createField("complex present", FieldType.COMPLEX), new FieldMapRecord(complexFieldMap, new RecordSchema(colorField, fruitField))); + values.put(new MapRecordField("map present", createField("key", FieldType.STRING), createField("value", FieldType.INT), Repetition.EXACTLY_ONE), simpleMap); + values.put(unionRecordField, Arrays.asList(new NamedValue[] { + new NamedValue("complex1", complexRecord1), + new NamedValue("complex2", complexRecord2)})); + + final FieldMapRecord originalRecord = new FieldMapRecord(values, schema); + + // Write out a record and read it back in. + try (final ByteArrayOutputStream baos = new ByteArrayOutputStream()) { + // Write the schema to the stream + schema.writeTo(baos); + + // Write the record twice, to make sure that we're able to read/write multiple sequential records + final SchemaRecordWriter writer = new SchemaRecordWriter(); + writer.writeRecord(originalRecord, baos); + writer.writeRecord(originalRecord, baos); + + try (final InputStream in = new ByteArrayInputStream(baos.toByteArray())) { + // Read the Schema from the stream and create a Record Reader for reading records, based on this schema + final RecordSchema readSchema = RecordSchema.readFrom(in); + final SchemaRecordReader reader = SchemaRecordReader.fromSchema(readSchema); + + // Read two records and verify the values. + for (int i=0; i < 2; i++) { + final Record record = reader.readRecord(in); + + assertNotNull(record); + assertEquals(42, record.getFieldValue("int")); + assertEquals(42, record.getFieldValue("int present")); + assertEquals(true, record.getFieldValue("boolean present")); + assertTrue(Arrays.equals("Hello".getBytes(), (byte[]) record.getFieldValue("byte array present"))); + assertEquals(42L, record.getFieldValue("long present")); + assertEquals("Hello", record.getFieldValue("string present")); + assertEquals("Long Hello", record.getFieldValue("long string present")); + + final Record complexRecord = (Record) record.getFieldValue("complex present"); + assertEquals("red", complexRecord.getFieldValue("color")); + assertEquals("apple", complexRecord.getFieldValue("fruit")); + + assertEquals(simpleMap, record.getFieldValue("map present")); + + final List unionRecords = (List) record.getFieldValue("union"); + assertNotNull(unionRecords); + assertEquals(2, unionRecords.size()); + + final Record unionRecord1 = unionRecords.get(0); + assertEquals("apples", unionRecord1.getFieldValue("string field")); + assertEquals(100, unionRecord1.getFieldValue("int field")); + + final Record unionRecord2 = unionRecords.get(1); + assertEquals("oranges", unionRecord2.getFieldValue("long string field")); + assertEquals(Long.MAX_VALUE, unionRecord2.getFieldValue("long field")); + } + + // Ensure that there is no more data. + assertNull(reader.readRecord(in)); + } + } + } + + private SimpleRecordField createField(final String fieldName, final FieldType type) { + return new SimpleRecordField(fieldName, type, Repetition.ZERO_OR_ONE); + } +} diff --git a/nifi-commons/nifi-utils/src/main/java/org/apache/nifi/stream/io/BufferedInputStream.java b/nifi-commons/nifi-utils/src/main/java/org/apache/nifi/stream/io/BufferedInputStream.java index 2afaa70623..324f59f72f 100644 --- a/nifi-commons/nifi-utils/src/main/java/org/apache/nifi/stream/io/BufferedInputStream.java +++ b/nifi-commons/nifi-utils/src/main/java/org/apache/nifi/stream/io/BufferedInputStream.java @@ -16,19 +16,445 @@ */ package org.apache.nifi.stream.io; +import java.io.IOException; import java.io.InputStream; /** * This class is a slight modification of the BufferedInputStream in the java.io package. The modification is that this implementation does not provide synchronization on method calls, which means * that this class is not suitable for use by multiple threads. However, the absence of these synchronized blocks results in potentially much better performance. */ -public class BufferedInputStream extends java.io.BufferedInputStream { +public class BufferedInputStream extends InputStream { - public BufferedInputStream(final InputStream in) { - super(in); + private final InputStream in; + + private static int DEFAULT_BUFFER_SIZE = 8192; + + /** + * The maximum size of array to allocate. + * Some VMs reserve some header words in an array. + * Attempts to allocate larger arrays may result in + * OutOfMemoryError: Requested array size exceeds VM limit + */ + private static int MAX_BUFFER_SIZE = Integer.MAX_VALUE - 8; + + /** + * The internal buffer array where the data is stored. When necessary, + * it may be replaced by another array of + * a different size. + */ + protected byte buf[]; + + /** + * The index one greater than the index of the last valid byte in + * the buffer. + * This value is always + * in the range 0 through buf.length; + * elements buf[0] through buf[count-1] + * contain buffered input data obtained + * from the underlying input stream. + */ + private int count; + + /** + * The current position in the buffer. This is the index of the next + * character to be read from the buf array. + *

+ * This value is always in the range 0 + * through count. If it is less + * than count, then buf[pos] + * is the next byte to be supplied as input; + * if it is equal to count, then + * the next read or skip + * operation will require more bytes to be + * read from the contained input stream. + * + * @see java.io.BufferedInputStream#buf + */ + private int pos; + + /** + * The value of the pos field at the time the last + * mark method was called. + *

+ * This value is always + * in the range -1 through pos. + * If there is no marked position in the input + * stream, this field is -1. If + * there is a marked position in the input + * stream, then buf[markpos] + * is the first byte to be supplied as input + * after a reset operation. If + * markpos is not -1, + * then all bytes from positions buf[markpos] + * through buf[pos-1] must remain + * in the buffer array (though they may be + * moved to another place in the buffer array, + * with suitable adjustments to the values + * of count, pos, + * and markpos); they may not + * be discarded unless and until the difference + * between pos and markpos + * exceeds marklimit. + * + * @see java.io.BufferedInputStream#mark(int) + * @see java.io.BufferedInputStream#pos + */ + protected int markpos = -1; + + /** + * The maximum read ahead allowed after a call to the + * mark method before subsequent calls to the + * reset method fail. + * Whenever the difference between pos + * and markpos exceeds marklimit, + * then the mark may be dropped by setting + * markpos to -1. + * + * @see java.io.BufferedInputStream#mark(int) + * @see java.io.BufferedInputStream#reset() + */ + protected int marklimit; + + /** + * Check to make sure that underlying input stream has not been + * nulled out due to close; if not return it; + */ + private InputStream getInIfOpen() throws IOException { + InputStream input = in; + if (input == null) { + throw new IOException("Stream closed"); + } + return input; } - public BufferedInputStream(final InputStream in, final int size) { - super(in, size); + /** + * Check to make sure that buffer has not been nulled out due to + * close; if not return it; + */ + private byte[] getBufIfOpen() throws IOException { + if (buf == null) { + throw new IOException("Stream closed"); + } + return buf; + } + + /** + * Creates a BufferedInputStream + * and saves its argument, the input stream + * in, for later use. An internal + * buffer array is created and stored in buf. + * + * @param in the underlying input stream. + */ + public BufferedInputStream(InputStream in) { + this(in, DEFAULT_BUFFER_SIZE); + } + + /** + * Creates a BufferedInputStream + * with the specified buffer size, + * and saves its argument, the input stream + * in, for later use. An internal + * buffer array of length size + * is created and stored in buf. + * + * @param in the underlying input stream. + * @param size the buffer size. + * @exception IllegalArgumentException if {@code size <= 0}. + */ + public BufferedInputStream(InputStream in, int size) { + this.in = in; + if (size <= 0) { + throw new IllegalArgumentException("Buffer size <= 0"); + } + buf = new byte[size]; + } + + /** + * Fills the buffer with more data, taking into account + * shuffling and other tricks for dealing with marks. + * Assumes that it is being called by a synchronized method. + * This method also assumes that all data has already been read in, + * hence pos > count. + */ + private void fill() throws IOException { + byte[] buffer = getBufIfOpen(); + if (markpos < 0) { + pos = 0; /* no mark: throw away the buffer */ + } else if (pos >= buffer.length) { + if (markpos > 0) { /* can throw away early part of the buffer */ + int sz = pos - markpos; + System.arraycopy(buffer, markpos, buffer, 0, sz); + pos = sz; + markpos = 0; + } else if (buffer.length >= marklimit) { + markpos = -1; /* buffer got too big, invalidate mark */ + pos = 0; /* drop buffer contents */ + } else if (buffer.length >= MAX_BUFFER_SIZE) { + throw new OutOfMemoryError("Required array size too large"); + } else { /* grow buffer */ + int nsz = (pos <= MAX_BUFFER_SIZE - pos) ? pos * 2 : MAX_BUFFER_SIZE; + if (nsz > marklimit) { + nsz = marklimit; + } + byte nbuf[] = new byte[nsz]; + System.arraycopy(buffer, 0, nbuf, 0, pos); + buffer = nbuf; + } + } + count = pos; + int n = getInIfOpen().read(buffer, pos, buffer.length - pos); + if (n > 0) { + count = n + pos; + } + } + + /** + * See + * the general contract of the read + * method of InputStream. + * + * @return the next byte of data, or -1 if the end of the + * stream is reached. + * @exception IOException if this input stream has been closed by + * invoking its {@link #close()} method, + * or an I/O error occurs. + * @see java.io.FilterInputStream#in + */ + @Override + public int read() throws IOException { + if (pos >= count) { + fill(); + if (pos >= count) { + return -1; + } + } + return getBufIfOpen()[pos++] & 0xff; + } + + /** + * Read characters into a portion of an array, reading from the underlying + * stream at most once if necessary. + */ + private int read1(byte[] b, int off, int len) throws IOException { + int avail = count - pos; + if (avail <= 0) { + /* + * If the requested length is at least as large as the buffer, and + * if there is no mark/reset activity, do not bother to copy the + * bytes into the local buffer. In this way buffered streams will + * cascade harmlessly. + */ + if (len >= getBufIfOpen().length && markpos < 0) { + return getInIfOpen().read(b, off, len); + } + fill(); + avail = count - pos; + if (avail <= 0) { + return -1; + } + } + int cnt = (avail < len) ? avail : len; + System.arraycopy(getBufIfOpen(), pos, b, off, cnt); + pos += cnt; + return cnt; + } + + /** + * Reads bytes from this byte-input stream into the specified byte array, + * starting at the given offset. + * + *

+ * This method implements the general contract of the corresponding + * {@link InputStream#read(byte[], int, int) read} method of + * the {@link InputStream} class. As an additional + * convenience, it attempts to read as many bytes as possible by repeatedly + * invoking the read method of the underlying stream. This + * iterated read continues until one of the following + * conditions becomes true: + *

    + * + *
  • The specified number of bytes have been read, + * + *
  • The read method of the underlying stream returns + * -1, indicating end-of-file, or + * + *
  • The available method of the underlying stream + * returns zero, indicating that further input requests would block. + * + *
+ * If the first read on the underlying stream returns + * -1 to indicate end-of-file then this method returns + * -1. Otherwise this method returns the number of bytes + * actually read. + * + *

+ * Subclasses of this class are encouraged, but not required, to + * attempt to read as many bytes as possible in the same fashion. + * + * @param b destination buffer. + * @param off offset at which to start storing bytes. + * @param len maximum number of bytes to read. + * @return the number of bytes read, or -1 if the end of + * the stream has been reached. + * @exception IOException if this input stream has been closed by + * invoking its {@link #close()} method, + * or an I/O error occurs. + */ + @Override + public int read(byte b[], int off, int len) + throws IOException { + getBufIfOpen(); // Check for closed stream + if ((off | len | (off + len) | (b.length - (off + len))) < 0) { + throw new IndexOutOfBoundsException(); + } else if (len == 0) { + return 0; + } + + int n = 0; + for (;;) { + int nread = read1(b, off + n, len - n); + if (nread <= 0) { + return (n == 0) ? nread : n; + } + n += nread; + if (n >= len) { + return n; + } + // if not closed but no bytes available, return + InputStream input = in; + if (input != null && input.available() <= 0) { + return n; + } + } + } + + /** + * See the general contract of the skip + * method of InputStream. + * + * @exception IOException if the stream does not support seek, + * or if this input stream has been closed by + * invoking its {@link #close()} method, or an + * I/O error occurs. + */ + @Override + public long skip(long n) throws IOException { + getBufIfOpen(); // Check for closed stream + if (n <= 0) { + return 0; + } + long avail = count - pos; + + if (avail <= 0) { + // If no mark position set then don't keep in buffer + if (markpos < 0) { + return getInIfOpen().skip(n); + } + + // Fill in buffer to save bytes for reset + fill(); + avail = count - pos; + if (avail <= 0) { + return 0; + } + } + + long skipped = (avail < n) ? avail : n; + pos += skipped; + return skipped; + } + + /** + * Returns an estimate of the number of bytes that can be read (or + * skipped over) from this input stream without blocking by the next + * invocation of a method for this input stream. The next invocation might be + * the same thread or another thread. A single read or skip of this + * many bytes will not block, but may read or skip fewer bytes. + *

+ * This method returns the sum of the number of bytes remaining to be read in + * the buffer (count - pos) and the result of calling the + * {@link java.io.FilterInputStream#in in}.available(). + * + * @return an estimate of the number of bytes that can be read (or skipped + * over) from this input stream without blocking. + * @exception IOException if this input stream has been closed by + * invoking its {@link #close()} method, + * or an I/O error occurs. + */ + @Override + public int available() throws IOException { + int n = count - pos; + int avail = getInIfOpen().available(); + return n > (Integer.MAX_VALUE - avail) ? Integer.MAX_VALUE : n + avail; + } + + /** + * See the general contract of the mark + * method of InputStream. + * + * @param readlimit the maximum limit of bytes that can be read before + * the mark position becomes invalid. + * @see java.io.BufferedInputStream#reset() + */ + @Override + public void mark(int readlimit) { + marklimit = readlimit; + markpos = pos; + } + + /** + * See the general contract of the reset + * method of InputStream. + *

+ * If markpos is -1 + * (no mark has been set or the mark has been + * invalidated), an IOException + * is thrown. Otherwise, pos is + * set equal to markpos. + * + * @exception IOException if this stream has not been marked or, + * if the mark has been invalidated, or the stream + * has been closed by invoking its {@link #close()} + * method, or an I/O error occurs. + * @see java.io.BufferedInputStream#mark(int) + */ + @Override + public void reset() throws IOException { + getBufIfOpen(); // Cause exception if closed + if (markpos < 0) { + throw new IOException("Resetting to invalid mark"); + } + pos = markpos; + } + + /** + * Tests if this input stream supports the mark + * and reset methods. The markSupported + * method of BufferedInputStream returns + * true. + * + * @return a boolean indicating if this stream type supports + * the mark and reset methods. + * @see java.io.InputStream#mark(int) + * @see java.io.InputStream#reset() + */ + @Override + public boolean markSupported() { + return true; + } + + /** + * Closes this input stream and releases any system resources + * associated with the stream. + * Once the stream has been closed, further read(), available(), reset(), + * or skip() invocations will throw an IOException. + * Closing a previously closed stream has no effect. + * + * @exception IOException if an I/O error occurs. + */ + @Override + public void close() throws IOException { + this.in.close(); } } diff --git a/nifi-commons/nifi-utils/src/main/java/org/apache/nifi/util/FormatUtils.java b/nifi-commons/nifi-utils/src/main/java/org/apache/nifi/util/FormatUtils.java index a30c14e15a..cb9ab3635a 100644 --- a/nifi-commons/nifi-utils/src/main/java/org/apache/nifi/util/FormatUtils.java +++ b/nifi-commons/nifi-utils/src/main/java/org/apache/nifi/util/FormatUtils.java @@ -210,4 +210,42 @@ public class FormatUtils { return sb.toString(); } + /** + * Formats nanoseconds in the format: + * 3 seconds, 8 millis, 3 nanos - if includeTotalNanos = false, + * 3 seconds, 8 millis, 3 nanos (3008000003 nanos) - if includeTotalNanos = true + * + * @param nanos the number of nanoseconds to format + * @param includeTotalNanos whether or not to include the total number of nanoseconds in parentheses in the returned value + * @return a human-readable String that is a formatted representation of the given number of nanoseconds. + */ + public static String formatNanos(final long nanos, final boolean includeTotalNanos) { + final StringBuilder sb = new StringBuilder(); + + final long seconds = nanos > 1000000000L ? nanos / 1000000000L : 0L; + long millis = nanos > 1000000L ? nanos / 1000000L : 0L; + final long nanosLeft = nanos % 1000000L; + + if (seconds > 0) { + sb.append(seconds).append(" seconds"); + } + if (millis > 0) { + if (seconds > 0) { + sb.append(", "); + millis -= seconds * 1000L; + } + + sb.append(millis).append(" millis"); + } + if (seconds > 0 || millis > 0) { + sb.append(", "); + } + sb.append(nanosLeft).append(" nanos"); + + if (includeTotalNanos) { + sb.append(" (").append(nanos).append(" nanos)"); + } + + return sb.toString(); + } } diff --git a/nifi-commons/nifi-utils/src/main/java/org/apache/nifi/util/timebuffer/CountSizeEntityAccess.java b/nifi-commons/nifi-utils/src/main/java/org/apache/nifi/util/timebuffer/CountSizeEntityAccess.java new file mode 100644 index 0000000000..ddbf29b791 --- /dev/null +++ b/nifi-commons/nifi-utils/src/main/java/org/apache/nifi/util/timebuffer/CountSizeEntityAccess.java @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.util.timebuffer; + +public class CountSizeEntityAccess implements EntityAccess { + @Override + public TimedCountSize aggregate(final TimedCountSize oldValue, final TimedCountSize toAdd) { + if (oldValue == null && toAdd == null) { + return new TimedCountSize(0L, 0L); + } else if (oldValue == null) { + return toAdd; + } else if (toAdd == null) { + return oldValue; + } + + return new TimedCountSize(oldValue.getCount() + toAdd.getCount(), oldValue.getSize() + toAdd.getSize()); + } + + @Override + public TimedCountSize createNew() { + return new TimedCountSize(0L, 0L); + } + + @Override + public long getTimestamp(final TimedCountSize entity) { + return entity == null ? 0L : entity.getTimestamp(); + } +} diff --git a/nifi-commons/nifi-utils/src/main/java/org/apache/nifi/util/timebuffer/TimedCountSize.java b/nifi-commons/nifi-utils/src/main/java/org/apache/nifi/util/timebuffer/TimedCountSize.java new file mode 100644 index 0000000000..f1df707201 --- /dev/null +++ b/nifi-commons/nifi-utils/src/main/java/org/apache/nifi/util/timebuffer/TimedCountSize.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.util.timebuffer; + +public class TimedCountSize { + private final long count; + private final long size; + private final long timestamp = System.currentTimeMillis(); + + public TimedCountSize(final long count, final long size) { + this.count = count; + this.size = size; + } + + public long getCount() { + return count; + } + + public long getSize() { + return size; + } + + public long getTimestamp() { + return timestamp; + } +} diff --git a/nifi-commons/nifi-write-ahead-log/src/main/java/org/wali/MinimalLockingWriteAheadLog.java b/nifi-commons/nifi-write-ahead-log/src/main/java/org/wali/MinimalLockingWriteAheadLog.java index f20f9174cb..12e2b10acb 100644 --- a/nifi-commons/nifi-write-ahead-log/src/main/java/org/wali/MinimalLockingWriteAheadLog.java +++ b/nifi-commons/nifi-write-ahead-log/src/main/java/org/wali/MinimalLockingWriteAheadLog.java @@ -80,7 +80,7 @@ public final class MinimalLockingWriteAheadLog implements WriteAheadRepositor private final Path partialPath; private final Path snapshotPath; - private final SerDe serde; + private final SerDeFactory serdeFactory; private final SyncListener syncListener; private final FileChannel lockChannel; private final AtomicLong transactionIdGenerator = new AtomicLong(0L); @@ -105,7 +105,15 @@ public final class MinimalLockingWriteAheadLog implements WriteAheadRepositor private volatile boolean recovered = false; public MinimalLockingWriteAheadLog(final Path path, final int partitionCount, final SerDe serde, final SyncListener syncListener) throws IOException { - this(new TreeSet<>(Collections.singleton(path)), partitionCount, serde, syncListener); + this(new TreeSet<>(Collections.singleton(path)), partitionCount, new SingletonSerDeFactory(serde), syncListener); + } + + public MinimalLockingWriteAheadLog(final Path path, final int partitionCount, final SerDeFactory serdeFactory, final SyncListener syncListener) throws IOException { + this(new TreeSet<>(Collections.singleton(path)), partitionCount, serdeFactory, syncListener); + } + + public MinimalLockingWriteAheadLog(final SortedSet paths, final int partitionCount, final SerDe serde, final SyncListener syncListener) throws IOException { + this(paths, partitionCount, new SingletonSerDeFactory(serde), syncListener); } /** @@ -116,16 +124,16 @@ public final class MinimalLockingWriteAheadLog implements WriteAheadRepositor * @param partitionCount the number of partitions/journals to use. For best * performance, this should be close to the number of threads that are * expected to update the repository simultaneously - * @param serde the serializer/deserializer for records + * @param serdeFactory the factory for the serializer/deserializer for records * @param syncListener the listener * @throws IOException if unable to initialize due to IO issue */ @SuppressWarnings("unchecked") - public MinimalLockingWriteAheadLog(final SortedSet paths, final int partitionCount, final SerDe serde, final SyncListener syncListener) throws IOException { + public MinimalLockingWriteAheadLog(final SortedSet paths, final int partitionCount, final SerDeFactory serdeFactory, final SyncListener syncListener) throws IOException { this.syncListener = syncListener; requireNonNull(paths); - requireNonNull(serde); + requireNonNull(serdeFactory); if (paths.isEmpty()) { throw new IllegalArgumentException("Paths must be non-empty"); @@ -172,7 +180,7 @@ public final class MinimalLockingWriteAheadLog implements WriteAheadRepositor this.basePath = paths.iterator().next(); this.partialPath = basePath.resolve("snapshot.partial"); this.snapshotPath = basePath.resolve("snapshot"); - this.serde = serde; + this.serdeFactory = serdeFactory; final Path lockPath = basePath.resolve("wali.lock"); lockChannel = new FileOutputStream(lockPath.toFile()).getChannel(); @@ -189,7 +197,7 @@ public final class MinimalLockingWriteAheadLog implements WriteAheadRepositor final Path partitionBasePath = pathIterator.next(); - partitions[i] = new Partition<>(partitionBasePath.resolve("partition-" + i), serde, i, getVersion()); + partitions[i] = new Partition<>(partitionBasePath.resolve("partition-" + i), serdeFactory, i, getVersion()); } } @@ -242,13 +250,13 @@ public final class MinimalLockingWriteAheadLog implements WriteAheadRepositor } for (final T record : records) { - final UpdateType updateType = serde.getUpdateType(record); - final Object recordIdentifier = serde.getRecordIdentifier(record); + final UpdateType updateType = serdeFactory.getUpdateType(record); + final Object recordIdentifier = serdeFactory.getRecordIdentifier(record); if (updateType == UpdateType.DELETE) { recordMap.remove(recordIdentifier); } else if (updateType == UpdateType.SWAP_OUT) { - final String newLocation = serde.getLocation(record); + final String newLocation = serdeFactory.getLocation(record); if (newLocation == null) { logger.error("Received Record (ID=" + recordIdentifier + ") with UpdateType of SWAP_OUT but " + "no indicator of where the Record is to be Swapped Out to; these records may be " @@ -258,7 +266,7 @@ public final class MinimalLockingWriteAheadLog implements WriteAheadRepositor this.externalLocations.add(newLocation); } } else if (updateType == UpdateType.SWAP_IN) { - final String newLocation = serde.getLocation(record); + final String newLocation = serdeFactory.getLocation(record); if (newLocation == null) { logger.error("Received Record (ID=" + recordIdentifier + ") with UpdateType of SWAP_IN but no " + "indicator of where the Record is to be Swapped In from; these records may be duplicated " @@ -360,11 +368,14 @@ public final class MinimalLockingWriteAheadLog implements WriteAheadRepositor + waliImplementationVersion + " of the " + waliImplementationClass + " class; cannot restore using Version " + getVersion()); } - dataIn.readUTF(); // ignore serde class name for now + final String serdeEncoding = dataIn.readUTF(); // ignore serde class name for now final int serdeVersion = dataIn.readInt(); final long maxTransactionId = dataIn.readLong(); final int numRecords = dataIn.readInt(); + final SerDe serde = serdeFactory.createSerDe(serdeEncoding); + serde.readHeader(dataIn); + for (int i = 0; i < numRecords; i++) { final T record = serde.deserializeRecord(dataIn, serdeVersion); if (record == null) { @@ -491,6 +502,8 @@ public final class MinimalLockingWriteAheadLog implements WriteAheadRepositor long stopTheWorldNanos = -1L; long stopTheWorldStart = -1L; try { + final List partitionStreams = new ArrayList<>(); + writeLock.lock(); try { stopTheWorldStart = System.nanoTime(); @@ -512,25 +525,48 @@ public final class MinimalLockingWriteAheadLog implements WriteAheadRepositor swapLocations = new HashSet<>(externalLocations); for (final Partition partition : partitions) { try { - partition.rollover(); + partitionStreams.add(partition.rollover()); } catch (final Throwable t) { partition.blackList(); numberBlackListedPartitions.getAndIncrement(); throw t; } } - - // notify global sync with the write lock held. We do this because we don't want the repository to get updated - // while the listener is performing its necessary tasks - if (syncListener != null) { - syncListener.onGlobalSync(); - } } finally { writeLock.unlock(); } stopTheWorldNanos = System.nanoTime() - stopTheWorldStart; + // Close all of the Partitions' Output Streams. We do this here, instead of in Partition.rollover() + // because we want to do this outside of the write lock. Because calling close() on FileOutputStream can + // be very expensive, as it has to flush the data to disk, we don't want to prevent other Process Sessions + // from getting committed. Since rollover() transitions the partition to write to a new file already, there + // is no reason that we need to close this FileOutputStream before releasing the write lock. Also, if any Exception + // does get thrown when calling close(), we don't need to blacklist the partition, as the stream that was getting + // closed is not the stream being written to for the partition anyway. We also catch any IOException and wait until + // after we've attempted to close all streams before we throw an Exception, to avoid resource leaks if one of them + // is unable to be closed (due to out of storage space, for instance). + IOException failure = null; + for (final OutputStream partitionStream : partitionStreams) { + try { + partitionStream.close(); + } catch (final IOException e) { + failure = e; + } + } + if (failure != null) { + throw failure; + } + + // notify global sync with the write lock held. We do this because we don't want the repository to get updated + // while the listener is performing its necessary tasks + if (syncListener != null) { + syncListener.onGlobalSync(); + } + + final SerDe serde = serdeFactory.createSerDe(null); + // perform checkpoint, writing to .partial file fileOut = new FileOutputStream(partialPath.toFile()); dataOut = new DataOutputStream(fileOut); @@ -540,6 +576,7 @@ public final class MinimalLockingWriteAheadLog implements WriteAheadRepositor dataOut.writeInt(serde.getVersion()); dataOut.writeLong(maxTransactionId); dataOut.writeInt(records.size()); + serde.writeHeader(dataOut); for (final T record : records) { logger.trace("Checkpointing {}", record); @@ -627,7 +664,8 @@ public final class MinimalLockingWriteAheadLog implements WriteAheadRepositor public static final String JOURNAL_EXTENSION = ".journal"; private static final Pattern JOURNAL_FILENAME_PATTERN = Pattern.compile("\\d+\\.journal"); - private final SerDe serde; + private final SerDeFactory serdeFactory; + private SerDe serde; private final Path editDirectory; private final int writeAheadLogVersion; @@ -650,9 +688,9 @@ public final class MinimalLockingWriteAheadLog implements WriteAheadRepositor private final Queue recoveryFiles; - public Partition(final Path path, final SerDe serde, final int partitionIndex, final int writeAheadLogVersion) throws IOException { + public Partition(final Path path, final SerDeFactory serdeFactory, final int partitionIndex, final int writeAheadLogVersion) throws IOException { this.editDirectory = path; - this.serde = serde; + this.serdeFactory = serdeFactory; final File file = path.toFile(); if (!file.exists() && !file.mkdirs()) { @@ -744,24 +782,16 @@ public final class MinimalLockingWriteAheadLog implements WriteAheadRepositor * * @throws IOException if failure to rollover */ - public void rollover() throws IOException { + public OutputStream rollover() throws IOException { lock.lock(); try { // Note that here we are closing fileOut and NOT dataOut. See the note in the close() // method to understand the logic behind this. - final OutputStream out = fileOut; - if (out != null) { - try { - out.close(); - } catch (final IOException ioe) { - dataOut = null; - fileOut = null; - - blackList(); - throw ioe; - } - } + final OutputStream oldOutputStream = fileOut; + dataOut = null; + fileOut = null; + this.serde = serdeFactory.createSerDe(null); final Path editPath = getNewEditPath(); final FileOutputStream fos = new FileOutputStream(editPath.toFile()); try { @@ -770,10 +800,18 @@ public final class MinimalLockingWriteAheadLog implements WriteAheadRepositor outStream.writeInt(writeAheadLogVersion); outStream.writeUTF(serde.getClass().getName()); outStream.writeInt(serde.getVersion()); + serde.writeHeader(outStream); + outStream.flush(); dataOut = outStream; fileOut = fos; } catch (final IOException ioe) { + try { + oldOutputStream.close(); + } catch (final IOException ioe2) { + ioe.addSuppressed(ioe2); + } + logger.error("Failed to create new journal for {} due to {}", new Object[] {this, ioe.toString()}, ioe); try { fos.close(); @@ -790,6 +828,7 @@ public final class MinimalLockingWriteAheadLog implements WriteAheadRepositor currentJournalFilename = editPath.toFile().getName(); blackListed = false; + return oldOutputStream; } finally { lock.unlock(); } @@ -959,9 +998,11 @@ public final class MinimalLockingWriteAheadLog implements WriteAheadRepositor + "WALI version " + waliVersion + ", but the version used to restore it is only " + writeAheadLogVersion); } - @SuppressWarnings("unused") - final String serdeClassName = recoveryIn.readUTF(); + final String serdeEncoding = recoveryIn.readUTF(); this.recoveryVersion = recoveryIn.readInt(); + serde = serdeFactory.createSerDe(serdeEncoding); + + serde.readHeader(recoveryIn); break; } @@ -1009,12 +1050,15 @@ public final class MinimalLockingWriteAheadLog implements WriteAheadRepositor final Path newEditPath = getNewEditPath(); + this.serde = serdeFactory.createSerDe(null); final FileOutputStream fos = new FileOutputStream(newEditPath.toFile()); final DataOutputStream outStream = new DataOutputStream(new BufferedOutputStream(fos)); outStream.writeUTF(MinimalLockingWriteAheadLog.class.getName()); outStream.writeInt(writeAheadLogVersion); outStream.writeUTF(serde.getClass().getName()); outStream.writeInt(serde.getVersion()); + serde.writeHeader(outStream); + outStream.flush(); dataOut = outStream; fileOut = fos; diff --git a/nifi-commons/nifi-write-ahead-log/src/main/java/org/wali/SerDe.java b/nifi-commons/nifi-write-ahead-log/src/main/java/org/wali/SerDe.java index cc984a6122..d1919e7f4c 100644 --- a/nifi-commons/nifi-write-ahead-log/src/main/java/org/wali/SerDe.java +++ b/nifi-commons/nifi-write-ahead-log/src/main/java/org/wali/SerDe.java @@ -29,6 +29,15 @@ import java.util.Map; */ public interface SerDe { + /** + * Provides the SerDe a chance to write header information to the given output stream + * + * @param out the DataOutputStream to write to + * @throws IOException if unable to write to the OutputStream + */ + default void writeHeader(DataOutputStream out) throws IOException { + } + /** *

* Serializes an Edit Record to the log via the given @@ -54,6 +63,15 @@ public interface SerDe { */ void serializeRecord(T record, DataOutputStream out) throws IOException; + /** + * Provides the SerDe the opportunity to read header information before deserializing any records + * + * @param in the InputStream to read from + * @throws IOException if unable to read from the InputStream + */ + default void readHeader(DataInputStream in) throws IOException { + } + /** *

* Reads an Edit Record from the given {@link DataInputStream} and merges @@ -65,9 +83,9 @@ public interface SerDe { * * @param in to deserialize from * @param currentRecordStates an unmodifiable map of Record ID's to the - * current state of that record + * current state of that record * @param version the version of the SerDe that was used to serialize the - * edit record + * edit record * @return deserialized record * @throws IOException if failure reading */ @@ -125,4 +143,12 @@ public interface SerDe { * @return version */ int getVersion(); + + /** + * Closes any resources that the SerDe is holding open + * + * @throws IOException if unable to close resources + */ + default void close() throws IOException { + } } diff --git a/nifi-commons/nifi-write-ahead-log/src/main/java/org/wali/SerDeFactory.java b/nifi-commons/nifi-write-ahead-log/src/main/java/org/wali/SerDeFactory.java new file mode 100644 index 0000000000..09e6f7b50c --- /dev/null +++ b/nifi-commons/nifi-write-ahead-log/src/main/java/org/wali/SerDeFactory.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.wali; + +public interface SerDeFactory { + + /** + * Returns a new SerDe + * + * @param encodingName the name of encoding that was used when writing the serialized data, or null if + * the SerDe is to be used for serialization purposes + * @return a SerDe + */ + SerDe createSerDe(String encodingName); + + /** + * Returns the unique ID for the given record + * + * @param record to obtain identifier for + * @return identifier of record + */ + Object getRecordIdentifier(T record); + + /** + * Returns the UpdateType for the given record + * + * @param record to retrieve update type for + * @return update type + */ + UpdateType getUpdateType(T record); + + /** + * Returns the external location of the given record; this is used when a + * record is moved away from WALI or is being re-introduced to WALI. For + * example, WALI can be updated with a record of type + * {@link UpdateType#SWAP_OUT} that indicates a Location of + * file://tmp/external1 and can then be re-introduced to WALI by updating + * WALI with a record of type {@link UpdateType#CREATE} that indicates a + * Location of file://tmp/external1 + * + * @param record to get location of + * @return location + */ + String getLocation(T record); +} diff --git a/nifi-commons/nifi-write-ahead-log/src/main/java/org/wali/SingletonSerDeFactory.java b/nifi-commons/nifi-write-ahead-log/src/main/java/org/wali/SingletonSerDeFactory.java new file mode 100644 index 0000000000..403f082cb6 --- /dev/null +++ b/nifi-commons/nifi-write-ahead-log/src/main/java/org/wali/SingletonSerDeFactory.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.wali; + +public class SingletonSerDeFactory implements SerDeFactory { + private final SerDe serde; + + public SingletonSerDeFactory(final SerDe serde) { + this.serde = serde; + } + + @Override + public SerDe createSerDe(final String encodingName) { + return serde; + } + + @Override + public Object getRecordIdentifier(final T record) { + return serde.getRecordIdentifier(record); + } + + @Override + public UpdateType getUpdateType(final T record) { + return serde.getUpdateType(record); + } + + @Override + public String getLocation(final T record) { + return serde.getLocation(record); + } +} diff --git a/nifi-commons/pom.xml b/nifi-commons/pom.xml index 558110abf2..27810cd297 100644 --- a/nifi-commons/pom.xml +++ b/nifi-commons/pom.xml @@ -38,5 +38,6 @@ nifi-site-to-site-client nifi-hl7-query-language nifi-hadoop-utils + nifi-schema-utils diff --git a/nifi-framework-api/src/main/java/org/apache/nifi/controller/repository/claim/ResourceClaim.java b/nifi-framework-api/src/main/java/org/apache/nifi/controller/repository/claim/ResourceClaim.java index bb788963b7..7e4495b16e 100644 --- a/nifi-framework-api/src/main/java/org/apache/nifi/controller/repository/claim/ResourceClaim.java +++ b/nifi-framework-api/src/main/java/org/apache/nifi/controller/repository/claim/ResourceClaim.java @@ -64,4 +64,28 @@ public interface ResourceClaim extends Comparable { * @return true if the Resource Claim is in use, false otherwise */ boolean isInUse(); + + + /** + * Provides the natural ordering for ResourceClaim objects. By default they are sorted by their id, then container, then section + * + * @param other other claim + * @return x such that x <= -1 if this is less than other; + * x=0 if this.equals(other); + * x >= 1 if this is greater than other + */ + @Override + default int compareTo(final ResourceClaim other) { + final int idComparison = getId().compareTo(other.getId()); + if (idComparison != 0) { + return idComparison; + } + + final int containerComparison = getContainer().compareTo(other.getContainer()); + if (containerComparison != 0) { + return containerComparison; + } + + return getSection().compareTo(other.getSection()); + } } diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/pom.xml b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/pom.xml index fdb5a83923..7ddae3699a 100644 --- a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/pom.xml +++ b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/pom.xml @@ -47,6 +47,10 @@ org.apache.nifi nifi-utils + + org.apache.nifi + nifi-schema-utils + org.apache.nifi nifi-nar-utils diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/FileSystemSwapManager.java b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/FileSystemSwapManager.java index 350ccebf82..208bbceb09 100644 --- a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/FileSystemSwapManager.java +++ b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/FileSystemSwapManager.java @@ -17,6 +17,7 @@ package org.apache.nifi.controller; import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.EOFException; @@ -28,34 +29,31 @@ import java.io.FilenameFilter; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.nio.charset.StandardCharsets; import java.nio.file.Path; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.List; -import java.util.Map; import java.util.UUID; import java.util.regex.Pattern; + import org.apache.nifi.controller.queue.FlowFileQueue; -import org.apache.nifi.controller.queue.QueueSize; import org.apache.nifi.controller.repository.FlowFileRecord; import org.apache.nifi.controller.repository.FlowFileRepository; import org.apache.nifi.controller.repository.FlowFileSwapManager; -import org.apache.nifi.controller.repository.IncompleteSwapFileException; -import org.apache.nifi.controller.repository.StandardFlowFileRecord; import org.apache.nifi.controller.repository.SwapContents; import org.apache.nifi.controller.repository.SwapManagerInitializationContext; import org.apache.nifi.controller.repository.SwapSummary; -import org.apache.nifi.controller.repository.claim.ContentClaim; -import org.apache.nifi.controller.repository.claim.ResourceClaim; import org.apache.nifi.controller.repository.claim.ResourceClaimManager; -import org.apache.nifi.controller.repository.claim.StandardContentClaim; -import org.apache.nifi.controller.swap.StandardSwapContents; -import org.apache.nifi.controller.swap.StandardSwapSummary; +import org.apache.nifi.controller.swap.SchemaSwapDeserializer; +import org.apache.nifi.controller.swap.SchemaSwapSerializer; +import org.apache.nifi.controller.swap.SimpleSwapDeserializer; +import org.apache.nifi.controller.swap.SwapDeserializer; +import org.apache.nifi.controller.swap.SwapSerializer; import org.apache.nifi.events.EventReporter; import org.apache.nifi.reporting.Severity; -import org.apache.nifi.stream.io.BufferedOutputStream; +import org.apache.nifi.stream.io.StreamUtils; import org.apache.nifi.util.NiFiProperties; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -83,6 +81,8 @@ public class FileSystemSwapManager implements FlowFileSwapManager { private EventReporter eventReporter; private ResourceClaimManager claimManager; + private static final byte[] MAGIC_HEADER = {'S', 'W', 'A', 'P'}; + /** * Default no args constructor for service loading only. */ @@ -116,8 +116,14 @@ public class FileSystemSwapManager implements FlowFileSwapManager { final File swapTempFile = new File(swapFile.getParentFile(), swapFile.getName() + ".part"); final String swapLocation = swapFile.getAbsolutePath(); - try (final FileOutputStream fos = new FileOutputStream(swapTempFile)) { - serializeFlowFiles(toSwap, flowFileQueue, swapLocation, fos); + final SwapSerializer serializer = new SchemaSwapSerializer(); + try (final FileOutputStream fos = new FileOutputStream(swapTempFile); + final OutputStream out = new BufferedOutputStream(fos)) { + out.write(MAGIC_HEADER); + final DataOutputStream dos = new DataOutputStream(out); + dos.writeUTF(serializer.getSerializationName()); + + serializer.serializeFlowFiles(toSwap, flowFileQueue, swapLocation, out); fos.getFD().sync(); } catch (final IOException ioe) { // we failed to write out the entire swap file. Delete the temporary file, if we can. @@ -154,14 +160,13 @@ public class FileSystemSwapManager implements FlowFileSwapManager { throw new FileNotFoundException("Failed to swap in FlowFiles from external storage location " + swapLocation + " into FlowFile Queue because the file could not be found"); } - final SwapContents swapContents; try (final InputStream fis = new FileInputStream(swapFile); final InputStream bis = new BufferedInputStream(fis); final DataInputStream in = new DataInputStream(bis)) { - swapContents = deserializeFlowFiles(in, swapLocation, flowFileQueue, claimManager); - } - return swapContents; + final SwapDeserializer deserializer = createSwapDeserializer(in); + return deserializer.deserializeFlowFiles(in, swapLocation, flowFileQueue, claimManager); + } } @Override @@ -210,11 +215,13 @@ public class FileSystemSwapManager implements FlowFileSwapManager { // "--.swap". If we have two dashes, then we can just check if the queue ID is equal // to the id of the queue given and if not we can just move on. final String[] splits = swapFile.getName().split("-"); - if (splits.length == 3) { - final String queueIdentifier = splits[1]; - if (!queueIdentifier.equals(flowFileQueue.getIdentifier())) { - continue; + if (splits.length > 6) { + final String queueIdentifier = splits[1] + "-" + splits[2] + "-" + splits[3] + "-" + splits[4] + "-" + splits[5]; + if (queueIdentifier.equals(flowFileQueue.getIdentifier())) { + swapLocations.add(swapFile.getAbsolutePath()); } + + continue; } // Read the queue identifier from the swap file to check if the swap file is for this queue @@ -222,18 +229,22 @@ public class FileSystemSwapManager implements FlowFileSwapManager { final InputStream bufferedIn = new BufferedInputStream(fis); final DataInputStream in = new DataInputStream(bufferedIn)) { - final int swapEncodingVersion = in.readInt(); - if (swapEncodingVersion > SWAP_ENCODING_VERSION) { - final String errMsg = "Cannot swap FlowFiles in from " + swapFile + " because the encoding version is " - + swapEncodingVersion + ", which is too new (expecting " + SWAP_ENCODING_VERSION + " or less)"; - + final SwapDeserializer deserializer; + try { + deserializer = createSwapDeserializer(in); + } catch (final Exception e) { + final String errMsg = "Cannot swap FlowFiles in from " + swapFile + " due to " + e; eventReporter.reportEvent(Severity.ERROR, EVENT_CATEGORY, errMsg); throw new IOException(errMsg); } - final String connectionId = in.readUTF(); - if (connectionId.equals(flowFileQueue.getIdentifier())) { - swapLocations.add(swapFile.getAbsolutePath()); + // If deserializer is not an instance of Simple Swap Deserializer, then it means that the serializer is new enough that + // we use the 3-element filename as illustrated above, so this is only necessary for the SimpleSwapDeserializer. + if (deserializer instanceof SimpleSwapDeserializer) { + final String connectionId = in.readUTF(); + if (connectionId.equals(flowFileQueue.getIdentifier())) { + swapLocations.add(swapFile.getAbsolutePath()); + } } } } @@ -251,353 +262,36 @@ public class FileSystemSwapManager implements FlowFileSwapManager { final InputStream bufferedIn = new BufferedInputStream(fis); final DataInputStream in = new DataInputStream(bufferedIn)) { - final int swapEncodingVersion = in.readInt(); - if (swapEncodingVersion > SWAP_ENCODING_VERSION) { - final String errMsg = "Cannot swap FlowFiles in from " + swapFile + " because the encoding version is " - + swapEncodingVersion + ", which is too new (expecting " + SWAP_ENCODING_VERSION + " or less)"; - - eventReporter.reportEvent(Severity.ERROR, EVENT_CATEGORY, errMsg); - throw new IOException(errMsg); - } - - final int numRecords; - final long contentSize; - Long maxRecordId = null; - try { - in.readUTF(); // ignore Connection ID - numRecords = in.readInt(); - contentSize = in.readLong(); - - if (numRecords == 0) { - return StandardSwapSummary.EMPTY_SUMMARY; - } - - if (swapEncodingVersion > 7) { - maxRecordId = in.readLong(); - } - } catch (final EOFException eof) { - logger.warn("Found premature End-of-File when reading Swap File {}. EOF occurred before any FlowFiles were encountered", swapLocation); - return StandardSwapSummary.EMPTY_SUMMARY; - } - - final QueueSize queueSize = new QueueSize(numRecords, contentSize); - final SwapContents swapContents = deserializeFlowFiles(in, queueSize, maxRecordId, swapEncodingVersion, true, claimManager, swapLocation); - return swapContents.getSummary(); + final SwapDeserializer deserializer = createSwapDeserializer(in); + return deserializer.getSwapSummary(in, swapLocation, claimManager); } } - public static int serializeFlowFiles(final List toSwap, final FlowFileQueue queue, final String swapLocation, final OutputStream destination) throws IOException { - if (toSwap == null || toSwap.isEmpty()) { - return 0; - } - long contentSize = 0L; - for (final FlowFileRecord record : toSwap) { - contentSize += record.getSize(); - } + private SwapDeserializer createSwapDeserializer(final DataInputStream dis) throws IOException { + dis.mark(MAGIC_HEADER.length); - // persist record to disk via the swap file - final OutputStream bufferedOut = new BufferedOutputStream(destination); - final DataOutputStream out = new DataOutputStream(bufferedOut); + final byte[] magicHeader = new byte[MAGIC_HEADER.length]; try { - out.writeInt(SWAP_ENCODING_VERSION); - out.writeUTF(queue.getIdentifier()); - out.writeInt(toSwap.size()); - out.writeLong(contentSize); - - // get the max record id and write that out so that we know it quickly for restoration - long maxRecordId = 0L; - for (final FlowFileRecord flowFile : toSwap) { - if (flowFile.getId() > maxRecordId) { - maxRecordId = flowFile.getId(); - } - } - - out.writeLong(maxRecordId); - - for (final FlowFileRecord flowFile : toSwap) { - out.writeLong(flowFile.getId()); - out.writeLong(flowFile.getEntryDate()); - out.writeLong(flowFile.getLineageStartDate()); - out.writeLong(flowFile.getLineageStartIndex()); - out.writeLong(flowFile.getLastQueueDate()); - out.writeLong(flowFile.getQueueDateIndex()); - out.writeLong(flowFile.getSize()); - - final ContentClaim claim = flowFile.getContentClaim(); - if (claim == null) { - out.writeBoolean(false); - } else { - out.writeBoolean(true); - final ResourceClaim resourceClaim = claim.getResourceClaim(); - out.writeUTF(resourceClaim.getId()); - out.writeUTF(resourceClaim.getContainer()); - out.writeUTF(resourceClaim.getSection()); - out.writeLong(claim.getOffset()); - out.writeLong(claim.getLength()); - out.writeLong(flowFile.getContentClaimOffset()); - out.writeBoolean(resourceClaim.isLossTolerant()); - } - - final Map attributes = flowFile.getAttributes(); - out.writeInt(attributes.size()); - for (final Map.Entry entry : attributes.entrySet()) { - writeString(entry.getKey(), out); - writeString(entry.getValue(), out); - } - } - } finally { - out.flush(); - } - - logger.info("Successfully swapped out {} FlowFiles from {} to Swap File {}", toSwap.size(), queue, swapLocation); - - return toSwap.size(); - } - - private static void writeString(final String toWrite, final OutputStream out) throws IOException { - final byte[] bytes = toWrite.getBytes(StandardCharsets.UTF_8); - final int utflen = bytes.length; - - if (utflen < 65535) { - out.write(utflen >>> 8); - out.write(utflen); - out.write(bytes); - } else { - out.write(255); - out.write(255); - out.write(utflen >>> 24); - out.write(utflen >>> 16); - out.write(utflen >>> 8); - out.write(utflen); - out.write(bytes); - } - } - - static SwapContents deserializeFlowFiles(final DataInputStream in, final String swapLocation, final FlowFileQueue queue, final ResourceClaimManager claimManager) throws IOException { - final int swapEncodingVersion = in.readInt(); - if (swapEncodingVersion > SWAP_ENCODING_VERSION) { - throw new IOException("Cannot swap FlowFiles in from SwapFile because the encoding version is " - + swapEncodingVersion + ", which is too new (expecting " + SWAP_ENCODING_VERSION + " or less)"); - } - - final String connectionId = in.readUTF(); // Connection ID - if (!connectionId.equals(queue.getIdentifier())) { - throw new IllegalArgumentException("Cannot deserialize FlowFiles from Swap File at location " + swapLocation - + " because those FlowFiles belong to Connection with ID " + connectionId + " and an attempt was made to swap them into a Connection with ID " + queue.getIdentifier()); - } - - int numRecords = 0; - long contentSize = 0L; - Long maxRecordId = null; - try { - numRecords = in.readInt(); - contentSize = in.readLong(); // Content Size - if (swapEncodingVersion > 7) { - maxRecordId = in.readLong(); // Max Record ID - } + StreamUtils.fillBuffer(dis, magicHeader); } catch (final EOFException eof) { - final QueueSize queueSize = new QueueSize(numRecords, contentSize); - final SwapSummary summary = new StandardSwapSummary(queueSize, maxRecordId, Collections.emptyList()); - final SwapContents partialContents = new StandardSwapContents(summary, Collections.emptyList()); - throw new IncompleteSwapFileException(swapLocation, partialContents); + throw new IOException("Failed to read swap file because the file contained less than 4 bytes of data"); } - final QueueSize queueSize = new QueueSize(numRecords, contentSize); - return deserializeFlowFiles(in, queueSize, maxRecordId, swapEncodingVersion, false, claimManager, swapLocation); - } - - private static SwapContents deserializeFlowFiles(final DataInputStream in, final QueueSize queueSize, final Long maxRecordId, - final int serializationVersion, final boolean incrementContentClaims, final ResourceClaimManager claimManager, final String location) throws IOException { - final List flowFiles = new ArrayList<>(queueSize.getObjectCount()); - final List resourceClaims = new ArrayList<>(queueSize.getObjectCount()); - Long maxId = maxRecordId; - - for (int i = 0; i < queueSize.getObjectCount(); i++) { - try { - // legacy encoding had an "action" because it used to be couple with FlowFile Repository code - if (serializationVersion < 3) { - final int action = in.read(); - if (action != 1) { - throw new IOException("Swap File is version " + serializationVersion + " but did not contain a 'UPDATE' record type"); - } - } - - final StandardFlowFileRecord.Builder ffBuilder = new StandardFlowFileRecord.Builder(); - final long recordId = in.readLong(); - if (maxId == null || recordId > maxId) { - maxId = recordId; - } - - ffBuilder.id(recordId); - ffBuilder.entryDate(in.readLong()); - - if (serializationVersion > 1) { - // Lineage information was added in version 2 - if (serializationVersion < 10) { - final int numLineageIdentifiers = in.readInt(); - for (int lineageIdIdx = 0; lineageIdIdx < numLineageIdentifiers; lineageIdIdx++) { - in.readUTF(); //skip each identifier - } - } - - // version 9 adds in a 'lineage start index' - final long lineageStartDate = in.readLong(); - final long lineageStartIndex; - if (serializationVersion > 8) { - lineageStartIndex = in.readLong(); - } else { - lineageStartIndex = 0L; - } - - ffBuilder.lineageStart(lineageStartDate, lineageStartIndex); - - if (serializationVersion > 5) { - // Version 9 adds in a 'queue date index' - final long lastQueueDate = in.readLong(); - final long queueDateIndex; - if (serializationVersion > 8) { - queueDateIndex = in.readLong(); - } else { - queueDateIndex = 0L; - } - - ffBuilder.lastQueued(lastQueueDate, queueDateIndex); - } - } - - ffBuilder.size(in.readLong()); - - if (serializationVersion < 3) { - readString(in); // connection Id - } - - final boolean hasClaim = in.readBoolean(); - ResourceClaim resourceClaim = null; - if (hasClaim) { - final String claimId; - if (serializationVersion < 5) { - claimId = String.valueOf(in.readLong()); - } else { - claimId = in.readUTF(); - } - - final String container = in.readUTF(); - final String section = in.readUTF(); - - final long resourceOffset; - final long resourceLength; - if (serializationVersion < 6) { - resourceOffset = 0L; - resourceLength = -1L; - } else { - resourceOffset = in.readLong(); - resourceLength = in.readLong(); - } - - final long claimOffset = in.readLong(); - - final boolean lossTolerant; - if (serializationVersion >= 4) { - lossTolerant = in.readBoolean(); - } else { - lossTolerant = false; - } - - resourceClaim = claimManager.getResourceClaim(container, section, claimId); - if (resourceClaim == null) { - logger.error("Swap file indicates that FlowFile was referencing Resource Claim at container={}, section={}, claimId={}, " - + "but this Resource Claim cannot be found! Will create a temporary Resource Claim, but this may affect the framework's " - + "ability to properly clean up this resource", container, section, claimId); - resourceClaim = claimManager.newResourceClaim(container, section, claimId, lossTolerant, true); - } - - final StandardContentClaim claim = new StandardContentClaim(resourceClaim, resourceOffset); - claim.setLength(resourceLength); - - if (incrementContentClaims) { - claimManager.incrementClaimantCount(resourceClaim); - } - - ffBuilder.contentClaim(claim); - ffBuilder.contentClaimOffset(claimOffset); - } - - boolean attributesChanged = true; - if (serializationVersion < 3) { - attributesChanged = in.readBoolean(); - } - - if (attributesChanged) { - final int numAttributes = in.readInt(); - for (int j = 0; j < numAttributes; j++) { - final String key = readString(in); - final String value = readString(in); - - ffBuilder.addAttribute(key, value); - } - } - - final FlowFileRecord record = ffBuilder.build(); - if (resourceClaim != null) { - resourceClaims.add(resourceClaim); - } - - flowFiles.add(record); - } catch (final EOFException eof) { - final SwapSummary swapSummary = new StandardSwapSummary(queueSize, maxId, resourceClaims); - final SwapContents partialContents = new StandardSwapContents(swapSummary, flowFiles); - throw new IncompleteSwapFileException(location, partialContents); + if (Arrays.equals(magicHeader, MAGIC_HEADER)) { + final String serializationName = dis.readUTF(); + if (serializationName.equals(SchemaSwapDeserializer.getSerializationName())) { + return new SchemaSwapDeserializer(); } - } - final SwapSummary swapSummary = new StandardSwapSummary(queueSize, maxId, resourceClaims); - return new StandardSwapContents(swapSummary, flowFiles); - } - - private static String readString(final InputStream in) throws IOException { - final Integer numBytes = readFieldLength(in); - if (numBytes == null) { - throw new EOFException(); - } - final byte[] bytes = new byte[numBytes]; - fillBuffer(in, bytes, numBytes); - return new String(bytes, StandardCharsets.UTF_8); - } - - private static Integer readFieldLength(final InputStream in) throws IOException { - final int firstValue = in.read(); - final int secondValue = in.read(); - if (firstValue < 0) { - return null; - } - if (secondValue < 0) { - throw new EOFException(); - } - if (firstValue == 0xff && secondValue == 0xff) { - final int ch1 = in.read(); - final int ch2 = in.read(); - final int ch3 = in.read(); - final int ch4 = in.read(); - if ((ch1 | ch2 | ch3 | ch4) < 0) { - throw new EOFException(); - } - return (ch1 << 24) + (ch2 << 16) + (ch3 << 8) + ch4; + throw new IOException("Cannot find a suitable Deserializer for swap file, written with Serialization Name '" + serializationName + "'"); } else { - return (firstValue << 8) + secondValue; + // SimpleSwapDeserializer is old and did not write out a magic header. + dis.reset(); + return new SimpleSwapDeserializer(); } } - private static void fillBuffer(final InputStream in, final byte[] buffer, final int length) throws IOException { - int bytesRead; - int totalBytesRead = 0; - while ((bytesRead = in.read(buffer, totalBytesRead, length - totalBytesRead)) > 0) { - totalBytesRead += bytesRead; - } - if (totalBytesRead != length) { - throw new EOFException(); - } - } private void error(final String error) { logger.error(error); diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/StandardFlowFileQueue.java b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/StandardFlowFileQueue.java index 68af208983..e20e250524 100644 --- a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/StandardFlowFileQueue.java +++ b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/StandardFlowFileQueue.java @@ -820,6 +820,7 @@ public class StandardFlowFileQueue implements FlowFileQueue { long swapByteCount = 0L; Long maxId = null; List resourceClaims = new ArrayList<>(); + final long startNanos = System.nanoTime(); writeLock.lock(); try { @@ -866,6 +867,11 @@ public class StandardFlowFileQueue implements FlowFileQueue { writeLock.unlock("Recover Swap Files"); } + if (!swapLocations.isEmpty()) { + final long millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - startNanos); + logger.info("Recovered {} swap files for {} in {} millis", swapLocations.size(), this, millis); + } + return new StandardSwapSummary(new QueueSize(swapFlowFileCount, swapByteCount), maxId, resourceClaims); } diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/RepositoryRecordSerde.java b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/RepositoryRecordSerde.java new file mode 100644 index 0000000000..44ed62df6d --- /dev/null +++ b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/RepositoryRecordSerde.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.controller.repository; + +import java.util.Map; + +import org.apache.nifi.controller.queue.FlowFileQueue; +import org.wali.SerDe; +import org.wali.UpdateType; + +public abstract class RepositoryRecordSerde implements SerDe { + private Map flowFileQueueMap = null; + + protected void setQueueMap(final Map queueMap) { + this.flowFileQueueMap = queueMap; + } + + protected Map getQueueMap() { + return flowFileQueueMap; + } + + protected FlowFileQueue getFlowFileQueue(final String queueId) { + return flowFileQueueMap.get(queueId); + } + + @Override + public Long getRecordIdentifier(final RepositoryRecord record) { + return record.getCurrent().getId(); + } + + @Override + public UpdateType getUpdateType(final RepositoryRecord record) { + switch (record.getType()) { + case CONTENTMISSING: + case DELETE: + return UpdateType.DELETE; + case CREATE: + return UpdateType.CREATE; + case UPDATE: + return UpdateType.UPDATE; + case SWAP_OUT: + return UpdateType.SWAP_OUT; + case SWAP_IN: + return UpdateType.SWAP_IN; + } + return null; + } + + @Override + public String getLocation(final RepositoryRecord record) { + return record.getSwapLocation(); + } +} diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/RepositoryRecordSerdeFactory.java b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/RepositoryRecordSerdeFactory.java new file mode 100644 index 0000000000..c19fa94cb9 --- /dev/null +++ b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/RepositoryRecordSerdeFactory.java @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.controller.repository; + +import java.util.Map; + +import org.apache.nifi.controller.queue.FlowFileQueue; +import org.apache.nifi.controller.repository.claim.ResourceClaimManager; +import org.wali.SerDe; +import org.wali.SerDeFactory; +import org.wali.UpdateType; + +public class RepositoryRecordSerdeFactory implements SerDeFactory { + private final String LEGACY_SERDE_ENCODING_NAME = "org.apache.nifi.controller.repository.WriteAheadFlowFileRepository$WriteAheadRecordSerde"; + private final ResourceClaimManager resourceClaimManager; + private Map flowFileQueueMap = null; + + public RepositoryRecordSerdeFactory(final ResourceClaimManager claimManager) { + this.resourceClaimManager = claimManager; + } + + protected void setQueueMap(final Map queueMap) { + this.flowFileQueueMap = queueMap; + } + + protected Map getQueueMap() { + return flowFileQueueMap; + } + + @Override + public SerDe createSerDe(final String encodingName) { + if (encodingName == null || SchemaRepositoryRecordSerde.class.getName().equals(encodingName)) { + final SchemaRepositoryRecordSerde serde = new SchemaRepositoryRecordSerde(resourceClaimManager); + serde.setQueueMap(flowFileQueueMap); + return serde; + } + + if (WriteAheadRepositoryRecordSerde.class.getName().equals(encodingName) + || LEGACY_SERDE_ENCODING_NAME.equals(encodingName)) { + final WriteAheadRepositoryRecordSerde serde = new WriteAheadRepositoryRecordSerde(resourceClaimManager); + serde.setQueueMap(flowFileQueueMap); + return serde; + } + + throw new IllegalArgumentException("Cannot create Deserializer for Repository Records because the encoding '" + encodingName + "' is not known"); + } + + protected FlowFileQueue getFlowFileQueue(final String queueId) { + return flowFileQueueMap.get(queueId); + } + + @Override + public Long getRecordIdentifier(final RepositoryRecord record) { + return record.getCurrent().getId(); + } + + @Override + public UpdateType getUpdateType(final RepositoryRecord record) { + switch (record.getType()) { + case CONTENTMISSING: + case DELETE: + return UpdateType.DELETE; + case CREATE: + return UpdateType.CREATE; + case UPDATE: + return UpdateType.UPDATE; + case SWAP_OUT: + return UpdateType.SWAP_OUT; + case SWAP_IN: + return UpdateType.SWAP_IN; + } + return null; + } + + @Override + public String getLocation(final RepositoryRecord record) { + return record.getSwapLocation(); + } + +} diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/SchemaRepositoryRecordSerde.java b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/SchemaRepositoryRecordSerde.java new file mode 100644 index 0000000000..916fd76116 --- /dev/null +++ b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/SchemaRepositoryRecordSerde.java @@ -0,0 +1,213 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.controller.repository; + +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.IOException; +import java.util.Map; + +import org.apache.nifi.controller.queue.FlowFileQueue; +import org.apache.nifi.controller.repository.claim.ContentClaim; +import org.apache.nifi.controller.repository.claim.ResourceClaimManager; +import org.apache.nifi.controller.repository.schema.ContentClaimFieldMap; +import org.apache.nifi.controller.repository.schema.ContentClaimSchema; +import org.apache.nifi.controller.repository.schema.FlowFileSchema; +import org.apache.nifi.controller.repository.schema.RepositoryRecordFieldMap; +import org.apache.nifi.controller.repository.schema.RepositoryRecordSchema; +import org.apache.nifi.controller.repository.schema.RepositoryRecordUpdate; +import org.apache.nifi.repository.schema.FieldType; +import org.apache.nifi.repository.schema.Record; +import org.apache.nifi.repository.schema.RecordSchema; +import org.apache.nifi.repository.schema.Repetition; +import org.apache.nifi.repository.schema.SchemaRecordReader; +import org.apache.nifi.repository.schema.SchemaRecordWriter; +import org.apache.nifi.repository.schema.SimpleRecordField; +import org.wali.SerDe; +import org.wali.UpdateType; + +public class SchemaRepositoryRecordSerde extends RepositoryRecordSerde implements SerDe { + private static final int MAX_ENCODING_VERSION = 1; + + private final RecordSchema writeSchema = RepositoryRecordSchema.REPOSITORY_RECORD_SCHEMA_V1; + private final RecordSchema contentClaimSchema = ContentClaimSchema.CONTENT_CLAIM_SCHEMA_V1; + + private final ResourceClaimManager resourceClaimManager; + private volatile RecordSchema recoverySchema; + + public SchemaRepositoryRecordSerde(final ResourceClaimManager resourceClaimManager) { + this.resourceClaimManager = resourceClaimManager; + } + + @Override + public void writeHeader(final DataOutputStream out) throws IOException { + writeSchema.writeTo(out); + } + + @Override + public void serializeEdit(final RepositoryRecord previousRecordState, final RepositoryRecord newRecordState, final DataOutputStream out) throws IOException { + serializeRecord(newRecordState, out); + } + + @Override + public void serializeRecord(final RepositoryRecord record, final DataOutputStream out) throws IOException { + final RecordSchema schema; + switch (record.getType()) { + case CREATE: + case UPDATE: + schema = RepositoryRecordSchema.CREATE_OR_UPDATE_SCHEMA_V1; + break; + case CONTENTMISSING: + case DELETE: + schema = RepositoryRecordSchema.DELETE_SCHEMA_V1; + break; + case SWAP_IN: + schema = RepositoryRecordSchema.SWAP_IN_SCHEMA_V1; + break; + case SWAP_OUT: + schema = RepositoryRecordSchema.SWAP_OUT_SCHEMA_V1; + break; + default: + throw new IllegalArgumentException("Received Repository Record with unknown Update Type: " + record.getType()); // won't happen. + } + + final RepositoryRecordFieldMap fieldMap = new RepositoryRecordFieldMap(record, schema, contentClaimSchema); + final RepositoryRecordUpdate update = new RepositoryRecordUpdate(fieldMap, RepositoryRecordSchema.REPOSITORY_RECORD_SCHEMA_V1); + + new SchemaRecordWriter().writeRecord(update, out); + } + + @Override + public void readHeader(final DataInputStream in) throws IOException { + recoverySchema = RecordSchema.readFrom(in); + } + + @Override + public RepositoryRecord deserializeEdit(final DataInputStream in, final Map currentRecordStates, final int version) throws IOException { + return deserializeRecord(in, version); + } + + @Override + public RepositoryRecord deserializeRecord(final DataInputStream in, final int version) throws IOException { + final SchemaRecordReader reader = SchemaRecordReader.fromSchema(recoverySchema); + final Record updateRecord = reader.readRecord(in); + + // Top level is always going to be a "Repository Record Update" record because we need a 'Union' type record at the + // top level that indicates which type of record we have. + final Record record = (Record) updateRecord.getFieldValue(RepositoryRecordSchema.REPOSITORY_RECORD_UPDATE_V1); + + final String actionType = (String) record.getFieldValue(RepositoryRecordSchema.ACTION_TYPE_FIELD); + final UpdateType updateType = UpdateType.valueOf(actionType); + switch (updateType) { + case CREATE: + return createRecord(record); + case DELETE: + return deleteRecord(record); + case SWAP_IN: + return swapInRecord(record); + case SWAP_OUT: + return swapOutRecord(record); + case UPDATE: + return updateRecord(record); + default: + throw new IOException("Found unrecognized Update Type '" + actionType + "'"); + } + } + + + @SuppressWarnings("unchecked") + private StandardRepositoryRecord createRecord(final Record record) { + final StandardFlowFileRecord.Builder ffBuilder = new StandardFlowFileRecord.Builder(); + ffBuilder.id((Long) record.getFieldValue(RepositoryRecordSchema.RECORD_ID)); + ffBuilder.entryDate((Long) record.getFieldValue(FlowFileSchema.ENTRY_DATE)); + + final Long lastQueueDate = (Long) record.getFieldValue(FlowFileSchema.QUEUE_DATE); + final Long queueDateIndex = (Long) record.getFieldValue(FlowFileSchema.QUEUE_DATE_INDEX); + ffBuilder.lastQueued(lastQueueDate, queueDateIndex); + + final Long lineageStartDate = (Long) record.getFieldValue(FlowFileSchema.LINEAGE_START_DATE); + final Long lineageStartIndex = (Long) record.getFieldValue(FlowFileSchema.LINEAGE_START_INDEX); + ffBuilder.lineageStart(lineageStartDate, lineageStartIndex); + + populateContentClaim(ffBuilder, record); + ffBuilder.size((Long) record.getFieldValue(FlowFileSchema.FLOWFILE_SIZE)); + + ffBuilder.addAttributes((Map) record.getFieldValue(FlowFileSchema.ATTRIBUTES)); + + final FlowFileRecord flowFileRecord = ffBuilder.build(); + + final String queueId = (String) record.getFieldValue(RepositoryRecordSchema.QUEUE_IDENTIFIER); + final FlowFileQueue queue = getFlowFileQueue(queueId); + + return new StandardRepositoryRecord(queue, flowFileRecord); + } + + private void populateContentClaim(final StandardFlowFileRecord.Builder ffBuilder, final Record record) { + final Object claimMap = record.getFieldValue(FlowFileSchema.CONTENT_CLAIM); + if (claimMap == null) { + return; + } + + final Record claimRecord = (Record) claimMap; + final ContentClaim contentClaim = ContentClaimFieldMap.getContentClaim(claimRecord, resourceClaimManager); + final Long offset = ContentClaimFieldMap.getContentClaimOffset(claimRecord); + + ffBuilder.contentClaim(contentClaim); + ffBuilder.contentClaimOffset(offset); + } + + private RepositoryRecord updateRecord(final Record record) { + return createRecord(record); + } + + private RepositoryRecord deleteRecord(final Record record) { + final Long recordId = (Long) record.getFieldValue(RepositoryRecordSchema.RECORD_ID_FIELD); + final StandardFlowFileRecord.Builder ffBuilder = new StandardFlowFileRecord.Builder().id(recordId); + final FlowFileRecord flowFileRecord = ffBuilder.build(); + + final StandardRepositoryRecord repoRecord = new StandardRepositoryRecord((FlowFileQueue) null, flowFileRecord); + repoRecord.markForDelete(); + return repoRecord; + } + + private RepositoryRecord swapInRecord(final Record record) { + final StandardRepositoryRecord repoRecord = createRecord(record); + final String swapLocation = (String) record.getFieldValue(new SimpleRecordField(RepositoryRecordSchema.SWAP_LOCATION, FieldType.STRING, Repetition.EXACTLY_ONE)); + repoRecord.setSwapLocation(swapLocation); + return repoRecord; + } + + private RepositoryRecord swapOutRecord(final Record record) { + final Long recordId = (Long) record.getFieldValue(RepositoryRecordSchema.RECORD_ID_FIELD); + final String queueId = (String) record.getFieldValue(new SimpleRecordField(RepositoryRecordSchema.QUEUE_IDENTIFIER, FieldType.STRING, Repetition.EXACTLY_ONE)); + final String swapLocation = (String) record.getFieldValue(new SimpleRecordField(RepositoryRecordSchema.SWAP_LOCATION, FieldType.STRING, Repetition.EXACTLY_ONE)); + final FlowFileQueue queue = getFlowFileQueue(queueId); + + final FlowFileRecord flowFileRecord = new StandardFlowFileRecord.Builder() + .id(recordId) + .build(); + + return new StandardRepositoryRecord(queue, flowFileRecord, swapLocation); + } + + @Override + public int getVersion() { + return MAX_ENCODING_VERSION; + } + +} diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/StandardProcessSession.java b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/StandardProcessSession.java index a696e790ae..10f0d8cd9c 100644 --- a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/StandardProcessSession.java +++ b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/StandardProcessSession.java @@ -49,8 +49,6 @@ import org.apache.nifi.controller.queue.FlowFileQueue; import org.apache.nifi.controller.queue.QueueSize; import org.apache.nifi.controller.repository.claim.ContentClaim; import org.apache.nifi.controller.repository.claim.ResourceClaim; -import org.apache.nifi.controller.repository.io.ByteCountingInputStream; -import org.apache.nifi.controller.repository.io.ByteCountingOutputStream; import org.apache.nifi.controller.repository.io.DisableOnCloseInputStream; import org.apache.nifi.controller.repository.io.DisableOnCloseOutputStream; import org.apache.nifi.controller.repository.io.FlowFileAccessInputStream; @@ -75,6 +73,8 @@ import org.apache.nifi.provenance.ProvenanceEventType; import org.apache.nifi.provenance.ProvenanceReporter; import org.apache.nifi.provenance.StandardProvenanceEventRecord; import org.apache.nifi.stream.io.BufferedOutputStream; +import org.apache.nifi.stream.io.ByteCountingInputStream; +import org.apache.nifi.stream.io.ByteCountingOutputStream; import org.apache.nifi.stream.io.StreamUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -121,8 +121,8 @@ public final class StandardProcessSession implements ProcessSession, ProvenanceE private int removedCount = 0; // number of flowfiles removed in this session private long removedBytes = 0L; // size of all flowfiles removed in this session - private final AtomicLong bytesRead = new AtomicLong(0L); - private final AtomicLong bytesWritten = new AtomicLong(0L); + private long bytesRead = 0L; + private long bytesWritten = 0L; private int flowFilesIn = 0, flowFilesOut = 0; private long contentSizeIn = 0L, contentSizeOut = 0L; @@ -975,8 +975,8 @@ public final class StandardProcessSession implements ProcessSession, ProvenanceE final Connectable connectable = context.getConnectable(); final StandardFlowFileEvent flowFileEvent = new StandardFlowFileEvent(connectable.getIdentifier()); - flowFileEvent.setBytesRead(bytesRead.get()); - flowFileEvent.setBytesWritten(bytesWritten.get()); + flowFileEvent.setBytesRead(bytesRead); + flowFileEvent.setBytesWritten(bytesWritten); // update event repository try { @@ -1064,8 +1064,8 @@ public final class StandardProcessSession implements ProcessSession, ProvenanceE flowFilesOut = 0; removedCount = 0; removedBytes = 0L; - bytesRead.set(0L); - bytesWritten.set(0L); + bytesRead = 0L; + bytesWritten = 0L; connectionCounts.clear(); createdFlowFiles.clear(); removedFlowFiles.clear(); @@ -2006,8 +2006,8 @@ public final class StandardProcessSession implements ProcessSession, ProvenanceE // callback for reading FlowFile 1 and if we used the same stream we'd be destroying the ability to read from FlowFile 1. if (allowCachingOfStream && recursionSet.isEmpty()) { if (currentReadClaim == claim) { - if (currentReadClaimStream != null && currentReadClaimStream.getStreamLocation() <= offset) { - final long bytesToSkip = offset - currentReadClaimStream.getStreamLocation(); + if (currentReadClaimStream != null && currentReadClaimStream.getBytesConsumed() <= offset) { + final long bytesToSkip = offset - currentReadClaimStream.getBytesConsumed(); if (bytesToSkip > 0) { StreamUtils.skip(currentReadClaimStream, bytesToSkip); } @@ -2023,7 +2023,7 @@ public final class StandardProcessSession implements ProcessSession, ProvenanceE } currentReadClaim = claim; - currentReadClaimStream = new ByteCountingInputStream(rawInStream, new AtomicLong(0L)); + currentReadClaimStream = new ByteCountingInputStream(rawInStream); StreamUtils.skip(currentReadClaimStream, offset); // Use a non-closeable stream because we want to keep it open after the callback has finished so that we can @@ -2270,8 +2270,8 @@ public final class StandardProcessSession implements ProcessSession, ProvenanceE writtenCount += footer.length; } } finally { - bytesWritten.getAndAdd(writtenCount); - bytesRead.getAndAdd(readCount); + bytesWritten += writtenCount; + bytesRead += readCount; } } catch (final ContentNotFoundException nfe) { destroyContent(newClaim); @@ -2311,8 +2311,8 @@ public final class StandardProcessSession implements ProcessSession, ProvenanceE validateRecordState(source); final StandardRepositoryRecord record = records.get(source); + long writtenToFlowFile = 0L; ContentClaim newClaim = null; - final AtomicLong writtenHolder = new AtomicLong(0L); try { newClaim = context.getContentRepository().create(context.getConnectable().isLossTolerant()); claimLog.debug("Creating ContentClaim {} for 'write' for {}", newClaim, source); @@ -2320,9 +2320,14 @@ public final class StandardProcessSession implements ProcessSession, ProvenanceE ensureNotAppending(newClaim); try (final OutputStream stream = context.getContentRepository().write(newClaim); final OutputStream disableOnClose = new DisableOnCloseOutputStream(stream); - final OutputStream countingOut = new ByteCountingOutputStream(disableOnClose, writtenHolder)) { - recursionSet.add(source); - writer.process(new FlowFileAccessOutputStream(countingOut, source)); + final ByteCountingOutputStream countingOut = new ByteCountingOutputStream(disableOnClose)) { + try { + recursionSet.add(source); + writer.process(new FlowFileAccessOutputStream(countingOut, source)); + } finally { + writtenToFlowFile = countingOut.getBytesWritten(); + bytesWritten += countingOut.getBytesWritten(); + } } finally { recursionSet.remove(source); } @@ -2342,8 +2347,6 @@ public final class StandardProcessSession implements ProcessSession, ProvenanceE resetWriteClaims(); // need to reset write claim before we can remove the claim destroyContent(newClaim); throw t; - } finally { - bytesWritten.getAndAdd(writtenHolder.get()); } removeTemporaryClaim(record); @@ -2351,7 +2354,7 @@ public final class StandardProcessSession implements ProcessSession, ProvenanceE .fromFlowFile(record.getCurrent()) .contentClaim(newClaim) .contentClaimOffset(0) - .size(writtenHolder.get()) + .size(writtenToFlowFile) .build(); record.setWorking(newFile); @@ -2379,7 +2382,7 @@ public final class StandardProcessSession implements ProcessSession, ProvenanceE final OutputStream rawOutStream = context.getContentRepository().write(newClaim); final OutputStream bufferedOutStream = new BufferedOutputStream(rawOutStream); - outStream = new ByteCountingOutputStream(bufferedOutStream, new AtomicLong(0L)); + outStream = new ByteCountingOutputStream(bufferedOutStream); originalByteWrittenCount = 0; appendableStreams.put(newClaim, outStream); @@ -2448,7 +2451,7 @@ public final class StandardProcessSession implements ProcessSession, ProvenanceE } finally { if (outStream != null) { final long bytesWrittenThisIteration = outStream.getBytesWritten() - originalByteWrittenCount; - bytesWritten.getAndAdd(bytesWrittenThisIteration); + bytesWritten += bytesWrittenThisIteration; } } @@ -2542,8 +2545,8 @@ public final class StandardProcessSession implements ProcessSession, ProvenanceE final StandardRepositoryRecord record = records.get(source); final ContentClaim currClaim = record.getCurrentClaim(); + long writtenToFlowFile = 0L; ContentClaim newClaim = null; - final AtomicLong writtenHolder = new AtomicLong(0L); try { newClaim = context.getContentRepository().create(context.getConnectable().isLossTolerant()); claimLog.debug("Creating ContentClaim {} for 'write' for {}", newClaim, source); @@ -2556,7 +2559,7 @@ public final class StandardProcessSession implements ProcessSession, ProvenanceE final InputStream countingIn = new ByteCountingInputStream(disableOnCloseIn, bytesRead); final OutputStream os = context.getContentRepository().write(newClaim); final OutputStream disableOnCloseOut = new DisableOnCloseOutputStream(os); - final OutputStream countingOut = new ByteCountingOutputStream(disableOnCloseOut, writtenHolder)) { + final ByteCountingOutputStream countingOut = new ByteCountingOutputStream(disableOnCloseOut)) { recursionSet.add(source); @@ -2574,6 +2577,7 @@ public final class StandardProcessSession implements ProcessSession, ProvenanceE cnfeThrown = true; throw cnfe; } finally { + writtenToFlowFile = countingOut.getBytesWritten(); recursionSet.remove(source); // if cnfeThrown is true, we don't need to re-thrown the Exception; it will propagate. @@ -2595,7 +2599,7 @@ public final class StandardProcessSession implements ProcessSession, ProvenanceE destroyContent(newClaim); throw t; } finally { - bytesWritten.getAndAdd(writtenHolder.get()); + bytesWritten += writtenToFlowFile; } removeTemporaryClaim(record); @@ -2603,7 +2607,7 @@ public final class StandardProcessSession implements ProcessSession, ProvenanceE .fromFlowFile(record.getCurrent()) .contentClaim(newClaim) .contentClaimOffset(0L) - .size(writtenHolder.get()) + .size(writtenToFlowFile) .build(); record.setWorking(newFile); @@ -2635,8 +2639,8 @@ public final class StandardProcessSession implements ProcessSession, ProvenanceE long newSize = 0L; try { newSize = context.getContentRepository().importFrom(source, newClaim); - bytesWritten.getAndAdd(newSize); - bytesRead.getAndAdd(newSize); + bytesWritten += newSize; + bytesRead += newSize; } catch (final Throwable t) { destroyContent(newClaim); throw new FlowFileAccessException("Failed to import data from " + source + " for " + destination + " due to " + t.toString(), t); @@ -2671,7 +2675,7 @@ public final class StandardProcessSession implements ProcessSession, ProvenanceE claimLog.debug("Creating ContentClaim {} for 'importFrom' for {}", newClaim, destination); newSize = context.getContentRepository().importFrom(source, newClaim); - bytesWritten.getAndAdd(newSize); + bytesWritten += newSize; } catch (final IOException e) { throw new FlowFileAccessException("Unable to create ContentClaim due to " + e.toString(), e); } @@ -2697,8 +2701,8 @@ public final class StandardProcessSession implements ProcessSession, ProvenanceE ensureNotAppending(record.getCurrentClaim()); final long copyCount = context.getContentRepository().exportTo(record.getCurrentClaim(), destination, append, record.getCurrentClaimOffset(), source.getSize()); - bytesRead.getAndAdd(copyCount); - bytesWritten.getAndAdd(copyCount); + bytesRead += copyCount; + bytesWritten += copyCount; } catch (final ContentNotFoundException nfe) { handleContentNotFound(nfe, record); } catch (final Throwable t) { @@ -3016,8 +3020,8 @@ public final class StandardProcessSession implements ProcessSession, ProvenanceE this.removedCount += session.removedCount; this.removedBytes += session.removedBytes; - this.bytesRead += session.bytesRead.get(); - this.bytesWritten += session.bytesWritten.get(); + this.bytesRead += session.bytesRead; + this.bytesWritten += session.bytesWritten; this.flowFilesIn += session.flowFilesIn; this.flowFilesOut += session.flowFilesOut; this.contentSizeIn += session.contentSizeIn; diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/WriteAheadFlowFileRepository.java b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/WriteAheadFlowFileRepository.java index 9c2a7d858a..2a323de6d0 100644 --- a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/WriteAheadFlowFileRepository.java +++ b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/WriteAheadFlowFileRepository.java @@ -16,12 +16,7 @@ */ package org.apache.nifi.controller.repository; -import java.io.DataInputStream; -import java.io.DataOutputStream; -import java.io.EOFException; import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; @@ -45,16 +40,12 @@ import org.apache.nifi.controller.queue.FlowFileQueue; import org.apache.nifi.controller.repository.claim.ContentClaim; import org.apache.nifi.controller.repository.claim.ResourceClaim; import org.apache.nifi.controller.repository.claim.ResourceClaimManager; -import org.apache.nifi.controller.repository.claim.StandardContentClaim; -import org.apache.nifi.flowfile.FlowFile; import org.apache.nifi.util.FormatUtils; import org.apache.nifi.util.NiFiProperties; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.wali.MinimalLockingWriteAheadLog; -import org.wali.SerDe; import org.wali.SyncListener; -import org.wali.UpdateType; import org.wali.WriteAheadRepository; /** @@ -95,7 +86,7 @@ public class WriteAheadFlowFileRepository implements FlowFileRepository, SyncLis // effectively final private WriteAheadRepository wal; - private WriteAheadRecordSerde serde; + private RepositoryRecordSerdeFactory serdeFactory; private ResourceClaimManager claimManager; // WALI Provides the ability to register callbacks for when a Partition or the entire Repository is sync'ed with the underlying disk. @@ -153,8 +144,8 @@ public class WriteAheadFlowFileRepository implements FlowFileRepository, SyncLis // TODO: Allow for backup path that can be used if disk out of space?? Would allow a snapshot to be stored on // backup and then the data deleted from the normal location; then can move backup to normal location and // delete backup. On restore, if no files exist in partition's directory, would have to check backup directory - serde = new WriteAheadRecordSerde(claimManager); - wal = new MinimalLockingWriteAheadLog<>(flowFileRepositoryPath, numPartitions, serde, this); + serdeFactory = new RepositoryRecordSerdeFactory(claimManager); + wal = new MinimalLockingWriteAheadLog<>(flowFileRepositoryPath, numPartitions, serdeFactory, this); } @Override @@ -319,6 +310,10 @@ public class WriteAheadFlowFileRepository implements FlowFileRepository, SyncLis repoRecords.add(repoRecord); } + // TODO: We should probably update this to support bulk 'SWAP OUT' records. As-is, we have to write out a + // 'SWAP OUT' record for each FlowFile, which includes the Update Type, FlowFile ID, swap file location, and Queue ID. + // We could instead have a single record with Update Type of 'SWAP OUT' and just include swap file location, Queue ID, + // and all FlowFile ID's. // update WALI to indicate that the records were swapped out. wal.update(repoRecords, true); @@ -347,9 +342,9 @@ public class WriteAheadFlowFileRepository implements FlowFileRepository, SyncLis for (final FlowFileQueue queue : queueProvider.getAllQueues()) { queueMap.put(queue.getIdentifier(), queue); } - serde.setQueueMap(queueMap); + serdeFactory.setQueueMap(queueMap); final Collection recordList = wal.recoverRecords(); - serde.setQueueMap(null); + serdeFactory.setQueueMap(null); for (final RepositoryRecord record : recordList) { final ContentClaim claim = record.getCurrentClaim(); @@ -361,7 +356,7 @@ public class WriteAheadFlowFileRepository implements FlowFileRepository, SyncLis // Determine the next sequence number for FlowFiles long maxId = minimumSequenceNumber; for (final RepositoryRecord record : recordList) { - final long recordId = serde.getRecordIdentifier(record); + final long recordId = serdeFactory.getRecordIdentifier(record); if (recordId > maxId) { maxId = recordId; } @@ -414,526 +409,4 @@ public class WriteAheadFlowFileRepository implements FlowFileRepository, SyncLis public int checkpoint() throws IOException { return wal.checkpoint(); } - - private static class WriteAheadRecordSerde implements SerDe { - - private static final int CURRENT_ENCODING_VERSION = 9; - - public static final byte ACTION_CREATE = 0; - public static final byte ACTION_UPDATE = 1; - public static final byte ACTION_DELETE = 2; - public static final byte ACTION_SWAPPED_OUT = 3; - public static final byte ACTION_SWAPPED_IN = 4; - - private Map flowFileQueueMap = null; - private long recordsRestored = 0L; - private final ResourceClaimManager claimManager; - - public WriteAheadRecordSerde(final ResourceClaimManager claimManager) { - this.claimManager = claimManager; - } - - private void setQueueMap(final Map queueMap) { - this.flowFileQueueMap = queueMap; - } - - @Override - public void serializeEdit(final RepositoryRecord previousRecordState, final RepositoryRecord record, final DataOutputStream out) throws IOException { - serializeEdit(previousRecordState, record, out, false); - } - - public void serializeEdit(final RepositoryRecord previousRecordState, final RepositoryRecord record, final DataOutputStream out, final boolean forceAttributesWritten) throws IOException { - if (record.isMarkedForAbort()) { - logger.warn("Repository Record {} is marked to be aborted; it will be persisted in the FlowFileRepository as a DELETE record", record); - out.write(ACTION_DELETE); - out.writeLong(getRecordIdentifier(record)); - serializeContentClaim(record.getCurrentClaim(), record.getCurrentClaimOffset(), out); - return; - } - - final UpdateType updateType = getUpdateType(record); - - if (updateType.equals(UpdateType.DELETE)) { - out.write(ACTION_DELETE); - out.writeLong(getRecordIdentifier(record)); - serializeContentClaim(record.getCurrentClaim(), record.getCurrentClaimOffset(), out); - return; - } - - // If there's a Destination Connection, that's the one that we want to associated with this record. - // However, on restart, we will restore the FlowFile and set this connection to its "originalConnection". - // If we then serialize the FlowFile again before it's transferred, it's important to allow this to happen, - // so we use the originalConnection instead - FlowFileQueue associatedQueue = record.getDestination(); - if (associatedQueue == null) { - associatedQueue = record.getOriginalQueue(); - } - - if (updateType.equals(UpdateType.SWAP_OUT)) { - out.write(ACTION_SWAPPED_OUT); - out.writeLong(getRecordIdentifier(record)); - out.writeUTF(associatedQueue.getIdentifier()); - out.writeUTF(getLocation(record)); - return; - } - - final FlowFile flowFile = record.getCurrent(); - final ContentClaim claim = record.getCurrentClaim(); - - switch (updateType) { - case UPDATE: - out.write(ACTION_UPDATE); - break; - case CREATE: - out.write(ACTION_CREATE); - break; - case SWAP_IN: - out.write(ACTION_SWAPPED_IN); - break; - default: - throw new AssertionError(); - } - - out.writeLong(getRecordIdentifier(record)); - out.writeLong(flowFile.getEntryDate()); - out.writeLong(flowFile.getLineageStartDate()); - out.writeLong(flowFile.getLineageStartIndex()); - - final Long queueDate = flowFile.getLastQueueDate(); - out.writeLong(queueDate == null ? System.currentTimeMillis() : queueDate); - out.writeLong(flowFile.getQueueDateIndex()); - out.writeLong(flowFile.getSize()); - - if (associatedQueue == null) { - logger.warn("{} Repository Record {} has no Connection associated with it; it will be destroyed on restart", - new Object[]{this, record}); - writeString("", out); - } else { - writeString(associatedQueue.getIdentifier(), out); - } - - serializeContentClaim(claim, record.getCurrentClaimOffset(), out); - - if (forceAttributesWritten || record.isAttributesChanged() || updateType == UpdateType.CREATE || updateType == UpdateType.SWAP_IN) { - out.write(1); // indicate attributes changed - final Map attributes = flowFile.getAttributes(); - out.writeInt(attributes.size()); - for (final Map.Entry entry : attributes.entrySet()) { - writeString(entry.getKey(), out); - writeString(entry.getValue(), out); - } - } else { - out.write(0); // indicate attributes did not change - } - - if (updateType == UpdateType.SWAP_IN) { - out.writeUTF(record.getSwapLocation()); - } - } - - @Override - public RepositoryRecord deserializeEdit(final DataInputStream in, final Map currentRecordStates, final int version) throws IOException { - final int action = in.read(); - final long recordId = in.readLong(); - if (action == ACTION_DELETE) { - final StandardFlowFileRecord.Builder ffBuilder = new StandardFlowFileRecord.Builder().id(recordId); - - if (version > 4) { - deserializeClaim(in, version, ffBuilder); - } - - final FlowFileRecord flowFileRecord = ffBuilder.build(); - final StandardRepositoryRecord record = new StandardRepositoryRecord((FlowFileQueue) null, flowFileRecord); - record.markForDelete(); - - return record; - } - - if (action == ACTION_SWAPPED_OUT) { - final String queueId = in.readUTF(); - final String location = in.readUTF(); - final FlowFileQueue queue = flowFileQueueMap.get(queueId); - - final FlowFileRecord flowFileRecord = new StandardFlowFileRecord.Builder() - .id(recordId) - .build(); - - return new StandardRepositoryRecord(queue, flowFileRecord, location); - } - - final StandardFlowFileRecord.Builder ffBuilder = new StandardFlowFileRecord.Builder(); - final RepositoryRecord record = currentRecordStates.get(recordId); - ffBuilder.id(recordId); - if (record != null) { - ffBuilder.fromFlowFile(record.getCurrent()); - } - ffBuilder.entryDate(in.readLong()); - - if (version > 1) { - // read the lineage identifiers and lineage start date, which were added in version 2. - if (version < 9) { - final int numLineageIds = in.readInt(); - for (int i = 0; i < numLineageIds; i++) { - in.readUTF(); //skip identifiers - } - } - final long lineageStartDate = in.readLong(); - final long lineageStartIndex; - if (version > 7) { - lineageStartIndex = in.readLong(); - } else { - lineageStartIndex = 0L; - } - ffBuilder.lineageStart(lineageStartDate, lineageStartIndex); - - if (version > 5) { - final long lastQueueDate = in.readLong(); - final long queueDateIndex; - if (version > 7) { - queueDateIndex = in.readLong(); - } else { - queueDateIndex = 0L; - } - - ffBuilder.lastQueued(lastQueueDate, queueDateIndex); - } - } - - ffBuilder.size(in.readLong()); - final String connectionId = readString(in); - - logger.debug("{} -> {}", new Object[]{recordId, connectionId}); - - deserializeClaim(in, version, ffBuilder); - - // recover new attributes, if they changed - final int attributesChanged = in.read(); - if (attributesChanged == -1) { - throw new EOFException(); - } else if (attributesChanged == 1) { - final int numAttributes = in.readInt(); - final Map attributes = new HashMap<>(); - for (int i = 0; i < numAttributes; i++) { - final String key = readString(in); - final String value = readString(in); - attributes.put(key, value); - } - - ffBuilder.addAttributes(attributes); - } else if (attributesChanged != 0) { - throw new IOException("Attribute Change Qualifier not found in stream; found value: " - + attributesChanged + " after successfully restoring " + recordsRestored + " records. The FlowFile Repository appears to be corrupt!"); - } - - final FlowFileRecord flowFile = ffBuilder.build(); - String swapLocation = null; - if (action == ACTION_SWAPPED_IN) { - swapLocation = in.readUTF(); - } - - final StandardRepositoryRecord standardRepoRecord; - - if (flowFileQueueMap == null) { - standardRepoRecord = new StandardRepositoryRecord(null, flowFile); - } else { - final FlowFileQueue queue = flowFileQueueMap.get(connectionId); - standardRepoRecord = new StandardRepositoryRecord(queue, flowFile); - if (swapLocation != null) { - standardRepoRecord.setSwapLocation(swapLocation); - } - - if (connectionId.isEmpty()) { - logger.warn("{} does not have a Queue associated with it; this record will be discarded", flowFile); - standardRepoRecord.markForAbort(); - } else if (queue == null) { - logger.warn("{} maps to unknown Queue {}; this record will be discarded", flowFile, connectionId); - standardRepoRecord.markForAbort(); - } - } - - recordsRestored++; - return standardRepoRecord; - } - - @Override - public StandardRepositoryRecord deserializeRecord(final DataInputStream in, final int version) throws IOException { - final int action = in.read(); - if (action == -1) { - return null; - } - - final long recordId = in.readLong(); - if (action == ACTION_DELETE) { - final StandardFlowFileRecord.Builder ffBuilder = new StandardFlowFileRecord.Builder().id(recordId); - - if (version > 4) { - deserializeClaim(in, version, ffBuilder); - } - - final FlowFileRecord flowFileRecord = ffBuilder.build(); - final StandardRepositoryRecord record = new StandardRepositoryRecord((FlowFileQueue) null, flowFileRecord); - record.markForDelete(); - return record; - } - - // if action was not delete, it must be create/swap in - final StandardFlowFileRecord.Builder ffBuilder = new StandardFlowFileRecord.Builder(); - final long entryDate = in.readLong(); - - if (version > 1) { - // read the lineage identifiers and lineage start date, which were added in version 2. - if (version < 9) { - final int numLineageIds = in.readInt(); - for (int i = 0; i < numLineageIds; i++) { - in.readUTF(); //skip identifiers - } - } - - final long lineageStartDate = in.readLong(); - final long lineageStartIndex; - if (version > 7) { - lineageStartIndex = in.readLong(); - } else { - lineageStartIndex = 0L; - } - ffBuilder.lineageStart(lineageStartDate, lineageStartIndex); - - if (version > 5) { - final long lastQueueDate = in.readLong(); - final long queueDateIndex; - if (version > 7) { - queueDateIndex = in.readLong(); - } else { - queueDateIndex = 0L; - } - - ffBuilder.lastQueued(lastQueueDate, queueDateIndex); - } - } - - final long size = in.readLong(); - final String connectionId = readString(in); - - logger.debug("{} -> {}", new Object[]{recordId, connectionId}); - - ffBuilder.id(recordId); - ffBuilder.entryDate(entryDate); - ffBuilder.size(size); - - deserializeClaim(in, version, ffBuilder); - - final int attributesChanged = in.read(); - if (attributesChanged == 1) { - final int numAttributes = in.readInt(); - final Map attributes = new HashMap<>(); - for (int i = 0; i < numAttributes; i++) { - final String key = readString(in); - final String value = readString(in); - attributes.put(key, value); - } - - ffBuilder.addAttributes(attributes); - } else if (attributesChanged == -1) { - throw new EOFException(); - } else if (attributesChanged != 0) { - throw new IOException("Attribute Change Qualifier not found in stream; found value: " - + attributesChanged + " after successfully restoring " + recordsRestored + " records"); - } - - final FlowFileRecord flowFile = ffBuilder.build(); - String swapLocation = null; - if (action == ACTION_SWAPPED_IN) { - swapLocation = in.readUTF(); - } - - final StandardRepositoryRecord record; - - if (flowFileQueueMap == null) { - record = new StandardRepositoryRecord(null, flowFile); - } else { - final FlowFileQueue queue = flowFileQueueMap.get(connectionId); - record = new StandardRepositoryRecord(queue, flowFile); - if (swapLocation != null) { - record.setSwapLocation(swapLocation); - } - - if (connectionId.isEmpty()) { - logger.warn("{} does not have a FlowFile Queue associated with it; this record will be discarded", flowFile); - record.markForAbort(); - } else if (queue == null) { - logger.warn("{} maps to unknown FlowFile Queue {}; this record will be discarded", flowFile, connectionId); - record.markForAbort(); - } - } - - recordsRestored++; - return record; - } - - @Override - public void serializeRecord(final RepositoryRecord record, final DataOutputStream out) throws IOException { - serializeEdit(null, record, out, true); - } - - private void serializeContentClaim(final ContentClaim claim, final long offset, final DataOutputStream out) throws IOException { - if (claim == null) { - out.write(0); - } else { - out.write(1); - - final ResourceClaim resourceClaim = claim.getResourceClaim(); - writeString(resourceClaim.getId(), out); - writeString(resourceClaim.getContainer(), out); - writeString(resourceClaim.getSection(), out); - out.writeLong(claim.getOffset()); - out.writeLong(claim.getLength()); - - out.writeLong(offset); - out.writeBoolean(resourceClaim.isLossTolerant()); - } - } - - private void deserializeClaim(final DataInputStream in, final int serializationVersion, final StandardFlowFileRecord.Builder ffBuilder) throws IOException { - // determine current Content Claim. - final int claimExists = in.read(); - if (claimExists == 1) { - final String claimId; - if (serializationVersion < 4) { - claimId = String.valueOf(in.readLong()); - } else { - claimId = readString(in); - } - - final String container = readString(in); - final String section = readString(in); - - final long resourceOffset; - final long resourceLength; - if (serializationVersion < 7) { - resourceOffset = 0L; - resourceLength = -1L; - } else { - resourceOffset = in.readLong(); - resourceLength = in.readLong(); - } - - final long claimOffset = in.readLong(); - - final boolean lossTolerant; - if (serializationVersion >= 3) { - lossTolerant = in.readBoolean(); - } else { - lossTolerant = false; - } - - final ResourceClaim resourceClaim = claimManager.newResourceClaim(container, section, claimId, lossTolerant, false); - final StandardContentClaim contentClaim = new StandardContentClaim(resourceClaim, resourceOffset); - contentClaim.setLength(resourceLength); - - ffBuilder.contentClaim(contentClaim); - ffBuilder.contentClaimOffset(claimOffset); - } else if (claimExists == -1) { - throw new EOFException(); - } else if (claimExists != 0) { - throw new IOException("Claim Existence Qualifier not found in stream; found value: " - + claimExists + " after successfully restoring " + recordsRestored + " records"); - } - } - - private void writeString(final String toWrite, final OutputStream out) throws IOException { - final byte[] bytes = toWrite.getBytes("UTF-8"); - final int utflen = bytes.length; - - if (utflen < 65535) { - out.write(utflen >>> 8); - out.write(utflen); - out.write(bytes); - } else { - out.write(255); - out.write(255); - out.write(utflen >>> 24); - out.write(utflen >>> 16); - out.write(utflen >>> 8); - out.write(utflen); - out.write(bytes); - } - } - - private String readString(final InputStream in) throws IOException { - final Integer numBytes = readFieldLength(in); - if (numBytes == null) { - throw new EOFException(); - } - final byte[] bytes = new byte[numBytes]; - fillBuffer(in, bytes, numBytes); - return new String(bytes, "UTF-8"); - } - - private Integer readFieldLength(final InputStream in) throws IOException { - final int firstValue = in.read(); - final int secondValue = in.read(); - if (firstValue < 0) { - return null; - } - if (secondValue < 0) { - throw new EOFException(); - } - if (firstValue == 0xff && secondValue == 0xff) { - final int ch1 = in.read(); - final int ch2 = in.read(); - final int ch3 = in.read(); - final int ch4 = in.read(); - if ((ch1 | ch2 | ch3 | ch4) < 0) { - throw new EOFException(); - } - return (ch1 << 24) + (ch2 << 16) + (ch3 << 8) + ch4; - } else { - return (firstValue << 8) + secondValue; - } - } - - private void fillBuffer(final InputStream in, final byte[] buffer, final int length) throws IOException { - int bytesRead; - int totalBytesRead = 0; - while ((bytesRead = in.read(buffer, totalBytesRead, length - totalBytesRead)) > 0) { - totalBytesRead += bytesRead; - } - if (totalBytesRead != length) { - throw new EOFException(); - } - } - - @Override - public Long getRecordIdentifier(final RepositoryRecord record) { - return record.getCurrent().getId(); - } - - @Override - public UpdateType getUpdateType(final RepositoryRecord record) { - switch (record.getType()) { - case CONTENTMISSING: - case DELETE: - return UpdateType.DELETE; - case CREATE: - return UpdateType.CREATE; - case UPDATE: - return UpdateType.UPDATE; - case SWAP_OUT: - return UpdateType.SWAP_OUT; - case SWAP_IN: - return UpdateType.SWAP_IN; - } - return null; - } - - @Override - public int getVersion() { - return CURRENT_ENCODING_VERSION; - } - - @Override - public String getLocation(final RepositoryRecord record) { - return record.getSwapLocation(); - } - } } diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/WriteAheadRepositoryRecordSerde.java b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/WriteAheadRepositoryRecordSerde.java new file mode 100644 index 0000000000..e8ce44ee0c --- /dev/null +++ b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/WriteAheadRepositoryRecordSerde.java @@ -0,0 +1,517 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.controller.repository; + +import java.io.DataInputStream; +import java.io.DataOutputStream; +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.HashMap; +import java.util.Map; + +import org.apache.nifi.controller.queue.FlowFileQueue; +import org.apache.nifi.controller.repository.claim.ContentClaim; +import org.apache.nifi.controller.repository.claim.ResourceClaim; +import org.apache.nifi.controller.repository.claim.ResourceClaimManager; +import org.apache.nifi.controller.repository.claim.StandardContentClaim; +import org.apache.nifi.flowfile.FlowFile; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.wali.SerDe; +import org.wali.UpdateType; + +public class WriteAheadRepositoryRecordSerde extends RepositoryRecordSerde implements SerDe { + private static final Logger logger = LoggerFactory.getLogger(WriteAheadRepositoryRecordSerde.class); + + private static final int CURRENT_ENCODING_VERSION = 9; + + public static final byte ACTION_CREATE = 0; + public static final byte ACTION_UPDATE = 1; + public static final byte ACTION_DELETE = 2; + public static final byte ACTION_SWAPPED_OUT = 3; + public static final byte ACTION_SWAPPED_IN = 4; + + private long recordsRestored = 0L; + private final ResourceClaimManager claimManager; + + public WriteAheadRepositoryRecordSerde(final ResourceClaimManager claimManager) { + this.claimManager = claimManager; + } + + @Override + public void serializeEdit(final RepositoryRecord previousRecordState, final RepositoryRecord record, final DataOutputStream out) throws IOException { + serializeEdit(previousRecordState, record, out, false); + } + + public void serializeEdit(final RepositoryRecord previousRecordState, final RepositoryRecord record, final DataOutputStream out, final boolean forceAttributesWritten) throws IOException { + if (record.isMarkedForAbort()) { + logger.warn("Repository Record {} is marked to be aborted; it will be persisted in the FlowFileRepository as a DELETE record", record); + out.write(ACTION_DELETE); + out.writeLong(getRecordIdentifier(record)); + serializeContentClaim(record.getCurrentClaim(), record.getCurrentClaimOffset(), out); + return; + } + + final UpdateType updateType = getUpdateType(record); + + if (updateType.equals(UpdateType.DELETE)) { + out.write(ACTION_DELETE); + out.writeLong(getRecordIdentifier(record)); + serializeContentClaim(record.getCurrentClaim(), record.getCurrentClaimOffset(), out); + return; + } + + // If there's a Destination Connection, that's the one that we want to associated with this record. + // However, on restart, we will restore the FlowFile and set this connection to its "originalConnection". + // If we then serialize the FlowFile again before it's transferred, it's important to allow this to happen, + // so we use the originalConnection instead + FlowFileQueue associatedQueue = record.getDestination(); + if (associatedQueue == null) { + associatedQueue = record.getOriginalQueue(); + } + + if (updateType.equals(UpdateType.SWAP_OUT)) { + out.write(ACTION_SWAPPED_OUT); + out.writeLong(getRecordIdentifier(record)); + out.writeUTF(associatedQueue.getIdentifier()); + out.writeUTF(getLocation(record)); + return; + } + + final FlowFile flowFile = record.getCurrent(); + final ContentClaim claim = record.getCurrentClaim(); + + switch (updateType) { + case UPDATE: + out.write(ACTION_UPDATE); + break; + case CREATE: + out.write(ACTION_CREATE); + break; + case SWAP_IN: + out.write(ACTION_SWAPPED_IN); + break; + default: + throw new AssertionError(); + } + + out.writeLong(getRecordIdentifier(record)); + out.writeLong(flowFile.getEntryDate()); + out.writeLong(flowFile.getLineageStartDate()); + out.writeLong(flowFile.getLineageStartIndex()); + + final Long queueDate = flowFile.getLastQueueDate(); + out.writeLong(queueDate == null ? System.currentTimeMillis() : queueDate); + out.writeLong(flowFile.getQueueDateIndex()); + out.writeLong(flowFile.getSize()); + + if (associatedQueue == null) { + logger.warn("{} Repository Record {} has no Connection associated with it; it will be destroyed on restart", + new Object[] {this, record}); + writeString("", out); + } else { + writeString(associatedQueue.getIdentifier(), out); + } + + serializeContentClaim(claim, record.getCurrentClaimOffset(), out); + + if (forceAttributesWritten || record.isAttributesChanged() || updateType == UpdateType.CREATE || updateType == UpdateType.SWAP_IN) { + out.write(1); // indicate attributes changed + final Map attributes = flowFile.getAttributes(); + out.writeInt(attributes.size()); + for (final Map.Entry entry : attributes.entrySet()) { + writeString(entry.getKey(), out); + writeString(entry.getValue(), out); + } + } else { + out.write(0); // indicate attributes did not change + } + + if (updateType == UpdateType.SWAP_IN) { + out.writeUTF(record.getSwapLocation()); + } + } + + @Override + public RepositoryRecord deserializeEdit(final DataInputStream in, final Map currentRecordStates, final int version) throws IOException { + final int action = in.read(); + final long recordId = in.readLong(); + if (action == ACTION_DELETE) { + final StandardFlowFileRecord.Builder ffBuilder = new StandardFlowFileRecord.Builder().id(recordId); + + if (version > 4) { + deserializeClaim(in, version, ffBuilder); + } + + final FlowFileRecord flowFileRecord = ffBuilder.build(); + final StandardRepositoryRecord record = new StandardRepositoryRecord((FlowFileQueue) null, flowFileRecord); + record.markForDelete(); + + return record; + } + + if (action == ACTION_SWAPPED_OUT) { + final String queueId = in.readUTF(); + final String location = in.readUTF(); + final FlowFileQueue queue = getFlowFileQueue(queueId); + + final FlowFileRecord flowFileRecord = new StandardFlowFileRecord.Builder() + .id(recordId) + .build(); + + return new StandardRepositoryRecord(queue, flowFileRecord, location); + } + + final StandardFlowFileRecord.Builder ffBuilder = new StandardFlowFileRecord.Builder(); + final RepositoryRecord record = currentRecordStates.get(recordId); + ffBuilder.id(recordId); + if (record != null) { + ffBuilder.fromFlowFile(record.getCurrent()); + } + ffBuilder.entryDate(in.readLong()); + + if (version > 1) { + // read the lineage identifiers and lineage start date, which were added in version 2. + if (version < 9) { + final int numLineageIds = in.readInt(); + for (int i = 0; i < numLineageIds; i++) { + in.readUTF(); //skip identifiers + } + } + final long lineageStartDate = in.readLong(); + final long lineageStartIndex; + if (version > 7) { + lineageStartIndex = in.readLong(); + } else { + lineageStartIndex = 0L; + } + ffBuilder.lineageStart(lineageStartDate, lineageStartIndex); + + if (version > 5) { + final long lastQueueDate = in.readLong(); + final long queueDateIndex; + if (version > 7) { + queueDateIndex = in.readLong(); + } else { + queueDateIndex = 0L; + } + + ffBuilder.lastQueued(lastQueueDate, queueDateIndex); + } + } + + ffBuilder.size(in.readLong()); + final String connectionId = readString(in); + + logger.debug("{} -> {}", new Object[] {recordId, connectionId}); + + deserializeClaim(in, version, ffBuilder); + + // recover new attributes, if they changed + final int attributesChanged = in.read(); + if (attributesChanged == -1) { + throw new EOFException(); + } else if (attributesChanged == 1) { + final int numAttributes = in.readInt(); + final Map attributes = new HashMap<>(); + for (int i = 0; i < numAttributes; i++) { + final String key = readString(in); + final String value = readString(in); + attributes.put(key, value); + } + + ffBuilder.addAttributes(attributes); + } else if (attributesChanged != 0) { + throw new IOException("Attribute Change Qualifier not found in stream; found value: " + + attributesChanged + " after successfully restoring " + recordsRestored + " records. The FlowFile Repository appears to be corrupt!"); + } + + final FlowFileRecord flowFile = ffBuilder.build(); + String swapLocation = null; + if (action == ACTION_SWAPPED_IN) { + swapLocation = in.readUTF(); + } + + final FlowFileQueue queue = getFlowFileQueue(connectionId); + final StandardRepositoryRecord standardRepoRecord = new StandardRepositoryRecord(queue, flowFile); + if (swapLocation != null) { + standardRepoRecord.setSwapLocation(swapLocation); + } + + if (connectionId.isEmpty()) { + logger.warn("{} does not have a Queue associated with it; this record will be discarded", flowFile); + standardRepoRecord.markForAbort(); + } else if (queue == null) { + logger.warn("{} maps to unknown Queue {}; this record will be discarded", flowFile, connectionId); + standardRepoRecord.markForAbort(); + } + + recordsRestored++; + return standardRepoRecord; + } + + @Override + public StandardRepositoryRecord deserializeRecord(final DataInputStream in, final int version) throws IOException { + final int action = in.read(); + if (action == -1) { + return null; + } + + final long recordId = in.readLong(); + if (action == ACTION_DELETE) { + final StandardFlowFileRecord.Builder ffBuilder = new StandardFlowFileRecord.Builder().id(recordId); + + if (version > 4) { + deserializeClaim(in, version, ffBuilder); + } + + final FlowFileRecord flowFileRecord = ffBuilder.build(); + final StandardRepositoryRecord record = new StandardRepositoryRecord((FlowFileQueue) null, flowFileRecord); + record.markForDelete(); + return record; + } + + // if action was not delete, it must be create/swap in + final StandardFlowFileRecord.Builder ffBuilder = new StandardFlowFileRecord.Builder(); + final long entryDate = in.readLong(); + + if (version > 1) { + // read the lineage identifiers and lineage start date, which were added in version 2. + if (version < 9) { + final int numLineageIds = in.readInt(); + for (int i = 0; i < numLineageIds; i++) { + in.readUTF(); //skip identifiers + } + } + + final long lineageStartDate = in.readLong(); + final long lineageStartIndex; + if (version > 7) { + lineageStartIndex = in.readLong(); + } else { + lineageStartIndex = 0L; + } + ffBuilder.lineageStart(lineageStartDate, lineageStartIndex); + + if (version > 5) { + final long lastQueueDate = in.readLong(); + final long queueDateIndex; + if (version > 7) { + queueDateIndex = in.readLong(); + } else { + queueDateIndex = 0L; + } + + ffBuilder.lastQueued(lastQueueDate, queueDateIndex); + } + } + + final long size = in.readLong(); + final String connectionId = readString(in); + + logger.debug("{} -> {}", new Object[] {recordId, connectionId}); + + ffBuilder.id(recordId); + ffBuilder.entryDate(entryDate); + ffBuilder.size(size); + + deserializeClaim(in, version, ffBuilder); + + final int attributesChanged = in.read(); + if (attributesChanged == 1) { + final int numAttributes = in.readInt(); + final Map attributes = new HashMap<>(); + for (int i = 0; i < numAttributes; i++) { + final String key = readString(in); + final String value = readString(in); + attributes.put(key, value); + } + + ffBuilder.addAttributes(attributes); + } else if (attributesChanged == -1) { + throw new EOFException(); + } else if (attributesChanged != 0) { + throw new IOException("Attribute Change Qualifier not found in stream; found value: " + + attributesChanged + " after successfully restoring " + recordsRestored + " records"); + } + + final FlowFileRecord flowFile = ffBuilder.build(); + String swapLocation = null; + if (action == ACTION_SWAPPED_IN) { + swapLocation = in.readUTF(); + } + + final StandardRepositoryRecord record; + final FlowFileQueue queue = getFlowFileQueue(connectionId); + record = new StandardRepositoryRecord(queue, flowFile); + if (swapLocation != null) { + record.setSwapLocation(swapLocation); + } + + if (connectionId.isEmpty()) { + logger.warn("{} does not have a FlowFile Queue associated with it; this record will be discarded", flowFile); + record.markForAbort(); + } else if (queue == null) { + logger.warn("{} maps to unknown FlowFile Queue {}; this record will be discarded", flowFile, connectionId); + record.markForAbort(); + } + + recordsRestored++; + return record; + } + + @Override + public void serializeRecord(final RepositoryRecord record, final DataOutputStream out) throws IOException { + serializeEdit(null, record, out, true); + } + + private void serializeContentClaim(final ContentClaim claim, final long offset, final DataOutputStream out) throws IOException { + if (claim == null) { + out.write(0); + } else { + out.write(1); + + final ResourceClaim resourceClaim = claim.getResourceClaim(); + writeString(resourceClaim.getId(), out); + writeString(resourceClaim.getContainer(), out); + writeString(resourceClaim.getSection(), out); + out.writeLong(claim.getOffset()); + out.writeLong(claim.getLength()); + + out.writeLong(offset); + out.writeBoolean(resourceClaim.isLossTolerant()); + } + } + + private void deserializeClaim(final DataInputStream in, final int serializationVersion, final StandardFlowFileRecord.Builder ffBuilder) throws IOException { + // determine current Content Claim. + final int claimExists = in.read(); + if (claimExists == 1) { + final String claimId; + if (serializationVersion < 4) { + claimId = String.valueOf(in.readLong()); + } else { + claimId = readString(in); + } + + final String container = readString(in); + final String section = readString(in); + + final long resourceOffset; + final long resourceLength; + if (serializationVersion < 7) { + resourceOffset = 0L; + resourceLength = -1L; + } else { + resourceOffset = in.readLong(); + resourceLength = in.readLong(); + } + + final long claimOffset = in.readLong(); + + final boolean lossTolerant; + if (serializationVersion >= 3) { + lossTolerant = in.readBoolean(); + } else { + lossTolerant = false; + } + + final ResourceClaim resourceClaim = claimManager.newResourceClaim(container, section, claimId, lossTolerant, false); + final StandardContentClaim contentClaim = new StandardContentClaim(resourceClaim, resourceOffset); + contentClaim.setLength(resourceLength); + + ffBuilder.contentClaim(contentClaim); + ffBuilder.contentClaimOffset(claimOffset); + } else if (claimExists == -1) { + throw new EOFException(); + } else if (claimExists != 0) { + throw new IOException("Claim Existence Qualifier not found in stream; found value: " + + claimExists + " after successfully restoring " + recordsRestored + " records"); + } + } + + private void writeString(final String toWrite, final OutputStream out) throws IOException { + final byte[] bytes = toWrite.getBytes("UTF-8"); + final int utflen = bytes.length; + + if (utflen < 65535) { + out.write(utflen >>> 8); + out.write(utflen); + out.write(bytes); + } else { + out.write(255); + out.write(255); + out.write(utflen >>> 24); + out.write(utflen >>> 16); + out.write(utflen >>> 8); + out.write(utflen); + out.write(bytes); + } + } + + private String readString(final InputStream in) throws IOException { + final Integer numBytes = readFieldLength(in); + if (numBytes == null) { + throw new EOFException(); + } + final byte[] bytes = new byte[numBytes]; + fillBuffer(in, bytes, numBytes); + return new String(bytes, "UTF-8"); + } + + private Integer readFieldLength(final InputStream in) throws IOException { + final int firstValue = in.read(); + final int secondValue = in.read(); + if (firstValue < 0) { + return null; + } + if (secondValue < 0) { + throw new EOFException(); + } + if (firstValue == 0xff && secondValue == 0xff) { + final int ch1 = in.read(); + final int ch2 = in.read(); + final int ch3 = in.read(); + final int ch4 = in.read(); + if ((ch1 | ch2 | ch3 | ch4) < 0) { + throw new EOFException(); + } + return (ch1 << 24) + (ch2 << 16) + (ch3 << 8) + ch4; + } else { + return (firstValue << 8) + secondValue; + } + } + + private void fillBuffer(final InputStream in, final byte[] buffer, final int length) throws IOException { + int bytesRead; + int totalBytesRead = 0; + while ((bytesRead = in.read(buffer, totalBytesRead, length - totalBytesRead)) > 0) { + totalBytesRead += bytesRead; + } + if (totalBytesRead != length) { + throw new EOFException(); + } + } + + @Override + public int getVersion() { + return CURRENT_ENCODING_VERSION; + } +} \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/claim/StandardResourceClaim.java b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/claim/StandardResourceClaim.java index 25dbaee264..7e87199e6b 100644 --- a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/claim/StandardResourceClaim.java +++ b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/claim/StandardResourceClaim.java @@ -64,29 +64,6 @@ public class StandardResourceClaim implements ResourceClaim, Comparable= 1 if this is greater than other - */ - @Override - public int compareTo(final ResourceClaim other) { - final int idComparison = id.compareTo(other.getId()); - if (idComparison != 0) { - return idComparison; - } - - final int containerComparison = container.compareTo(other.getContainer()); - if (containerComparison != 0) { - return containerComparison; - } - - return section.compareTo(other.getSection()); - } - @Override public boolean equals(final Object other) { if (this == other) { diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/claim/StandardResourceClaimManager.java b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/claim/StandardResourceClaimManager.java index 7d554b1ae7..e4f060e64b 100644 --- a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/claim/StandardResourceClaimManager.java +++ b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/claim/StandardResourceClaimManager.java @@ -29,10 +29,9 @@ import org.slf4j.LoggerFactory; public class StandardResourceClaimManager implements ResourceClaimManager { - private static final ConcurrentMap claimantCounts = new ConcurrentHashMap<>(); private static final Logger logger = LoggerFactory.getLogger(StandardResourceClaimManager.class); - - private static final BlockingQueue destructableClaims = new LinkedBlockingQueue<>(50000); + private final ConcurrentMap claimantCounts = new ConcurrentHashMap<>(); + private final BlockingQueue destructableClaims = new LinkedBlockingQueue<>(50000); @Override public ResourceClaim newResourceClaim(final String container, final String section, final String id, final boolean lossTolerant, final boolean writable) { @@ -50,7 +49,7 @@ public class StandardResourceClaimManager implements ResourceClaimManager { return (count == null) ? null : count.getClaim(); } - private static AtomicInteger getCounter(final ResourceClaim claim) { + private AtomicInteger getCounter(final ResourceClaim claim) { if (claim == null) { return null; } diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/io/ByteCountingInputStream.java b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/io/ByteCountingInputStream.java deleted file mode 100644 index 7de25accec..0000000000 --- a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/io/ByteCountingInputStream.java +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.nifi.controller.repository.io; - -import java.io.IOException; -import java.io.InputStream; -import java.util.concurrent.atomic.AtomicLong; - -public class ByteCountingInputStream extends InputStream { - - private final AtomicLong bytesReadHolder; - private final InputStream in; - private long bytesSkipped = 0L; - - public ByteCountingInputStream(final InputStream in, final AtomicLong longHolder) { - this.in = in; - this.bytesReadHolder = longHolder; - } - - @Override - public int read() throws IOException { - final int fromSuper = in.read(); - if (fromSuper >= 0) { - bytesReadHolder.getAndIncrement(); - } - return fromSuper; - } - - @Override - public int read(byte[] b, int off, int len) throws IOException { - final int fromSuper = in.read(b, off, len); - if (fromSuper >= 0) { - bytesReadHolder.getAndAdd(fromSuper); - } - - return fromSuper; - } - - @Override - public int read(byte[] b) throws IOException { - return read(b, 0, b.length); - } - - @Override - public long skip(final long n) throws IOException { - final long skipped = in.skip(n); - bytesSkipped += skipped; - return skipped; - } - - @Override - public int available() throws IOException { - return in.available(); - } - - @Override - public void mark(int readlimit) { - in.mark(readlimit); - } - - @Override - public boolean markSupported() { - return in.markSupported(); - } - - @Override - public void reset() throws IOException { - in.reset(); - } - - @Override - public void close() throws IOException { - in.close(); - } - - public long getBytesRead() { - return bytesReadHolder.get(); - } - - public long getBytesSkipped() { - return bytesSkipped; - } - - public long getStreamLocation() { - return getBytesRead() + getBytesSkipped(); - } -} diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/schema/ContentClaimFieldMap.java b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/schema/ContentClaimFieldMap.java new file mode 100644 index 0000000000..76c208dd9a --- /dev/null +++ b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/schema/ContentClaimFieldMap.java @@ -0,0 +1,92 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.controller.repository.schema; + +import java.util.List; + +import org.apache.nifi.controller.repository.claim.ContentClaim; +import org.apache.nifi.controller.repository.claim.ResourceClaim; +import org.apache.nifi.controller.repository.claim.ResourceClaimManager; +import org.apache.nifi.controller.repository.claim.StandardContentClaim; +import org.apache.nifi.repository.schema.Record; +import org.apache.nifi.repository.schema.RecordField; +import org.apache.nifi.repository.schema.RecordSchema; + +public class ContentClaimFieldMap implements Record { + private final ContentClaim contentClaim; + private final long contentClaimOffset; + private final ResourceClaimFieldMap resourceClaimFieldMap; + private final RecordSchema schema; + + public ContentClaimFieldMap(final ContentClaim contentClaim, final long contentClaimOffset, final RecordSchema schema) { + this.contentClaim = contentClaim; + this.contentClaimOffset = contentClaimOffset; + this.schema = schema; + + final List resourceClaimFields = schema.getField(ContentClaimSchema.RESOURCE_CLAIM).getSubFields(); + final RecordSchema resourceClaimSchema = new RecordSchema(resourceClaimFields); + this.resourceClaimFieldMap = new ResourceClaimFieldMap(contentClaim.getResourceClaim(), resourceClaimSchema); + } + + @Override + public Object getFieldValue(final String fieldName) { + switch (fieldName) { + case ContentClaimSchema.RESOURCE_CLAIM: + return resourceClaimFieldMap; + case ContentClaimSchema.CONTENT_CLAIM_LENGTH: + return contentClaim.getLength(); + case ContentClaimSchema.CONTENT_CLAIM_OFFSET: + return contentClaimOffset; + case ContentClaimSchema.RESOURCE_CLAIM_OFFSET: + return contentClaim.getOffset(); + default: + return null; + } + } + + @Override + public RecordSchema getSchema() { + return schema; + } + + @Override + public String toString() { + return "ContentClaimFieldMap[" + contentClaim + "]"; + } + + public static ContentClaim getContentClaim(final Record claimRecord, final ResourceClaimManager resourceClaimManager) { + final Record resourceClaimRecord = (Record) claimRecord.getFieldValue(ContentClaimSchema.RESOURCE_CLAIM); + final String container = (String) resourceClaimRecord.getFieldValue(ContentClaimSchema.CLAIM_CONTAINER); + final String section = (String) resourceClaimRecord.getFieldValue(ContentClaimSchema.CLAIM_SECTION); + final String identifier = (String) resourceClaimRecord.getFieldValue(ContentClaimSchema.CLAIM_IDENTIFIER); + final Boolean lossTolerant = (Boolean) resourceClaimRecord.getFieldValue(ContentClaimSchema.LOSS_TOLERANT); + + final Long length = (Long) claimRecord.getFieldValue(ContentClaimSchema.CONTENT_CLAIM_LENGTH); + final Long resourceOffset = (Long) claimRecord.getFieldValue(ContentClaimSchema.RESOURCE_CLAIM_OFFSET); + + final ResourceClaim resourceClaim = resourceClaimManager.newResourceClaim(container, section, identifier, lossTolerant, false); + final StandardContentClaim contentClaim = new StandardContentClaim(resourceClaim, resourceOffset); + contentClaim.setLength(length); + + return contentClaim; + } + + public static Long getContentClaimOffset(final Record claimRecord) { + return (Long) claimRecord.getFieldValue(ContentClaimSchema.CONTENT_CLAIM_OFFSET); + } +} diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/schema/ContentClaimSchema.java b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/schema/ContentClaimSchema.java new file mode 100644 index 0000000000..c55c7586ff --- /dev/null +++ b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/schema/ContentClaimSchema.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.controller.repository.schema; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.apache.nifi.repository.schema.ComplexRecordField; +import org.apache.nifi.repository.schema.FieldType; +import org.apache.nifi.repository.schema.RecordField; +import org.apache.nifi.repository.schema.RecordSchema; +import org.apache.nifi.repository.schema.Repetition; +import org.apache.nifi.repository.schema.SimpleRecordField; + +public class ContentClaimSchema { + + // resource claim fields + public static final String CLAIM_CONTAINER = "Container"; + public static final String CLAIM_SECTION = "Section"; + public static final String CLAIM_IDENTIFIER = "Identifier"; + public static final String LOSS_TOLERANT = "Loss Tolerant"; + public static final String RESOURCE_CLAIM = "Resource Claim"; + + // content claim fields + public static final String RESOURCE_CLAIM_OFFSET = "Resource Claim Offset"; // offset into resource claim where the content claim begins + public static final String CONTENT_CLAIM_OFFSET = "Content Claim Offset"; // offset into the content claim where the flowfile begins + public static final String CONTENT_CLAIM_LENGTH = "Content Claim Length"; + + public static final RecordSchema CONTENT_CLAIM_SCHEMA_V1; + public static final RecordSchema RESOURCE_CLAIM_SCHEMA_V1; + + static { + final List resourceClaimFields = new ArrayList<>(); + resourceClaimFields.add(new SimpleRecordField(CLAIM_CONTAINER, FieldType.STRING, Repetition.EXACTLY_ONE)); + resourceClaimFields.add(new SimpleRecordField(CLAIM_SECTION, FieldType.STRING, Repetition.EXACTLY_ONE)); + resourceClaimFields.add(new SimpleRecordField(CLAIM_IDENTIFIER, FieldType.STRING, Repetition.EXACTLY_ONE)); + resourceClaimFields.add(new SimpleRecordField(LOSS_TOLERANT, FieldType.BOOLEAN, Repetition.EXACTLY_ONE)); + RESOURCE_CLAIM_SCHEMA_V1 = new RecordSchema(Collections.unmodifiableList(resourceClaimFields)); + + final List contentClaimFields = new ArrayList<>(); + contentClaimFields.add(new ComplexRecordField(RESOURCE_CLAIM, Repetition.EXACTLY_ONE, resourceClaimFields)); + contentClaimFields.add(new SimpleRecordField(RESOURCE_CLAIM_OFFSET, FieldType.LONG, Repetition.EXACTLY_ONE)); + contentClaimFields.add(new SimpleRecordField(CONTENT_CLAIM_OFFSET, FieldType.LONG, Repetition.EXACTLY_ONE)); + contentClaimFields.add(new SimpleRecordField(CONTENT_CLAIM_LENGTH, FieldType.LONG, Repetition.EXACTLY_ONE)); + CONTENT_CLAIM_SCHEMA_V1 = new RecordSchema(Collections.unmodifiableList(contentClaimFields)); + } +} diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/schema/FlowFileRecordFieldMap.java b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/schema/FlowFileRecordFieldMap.java new file mode 100644 index 0000000000..ff0615f59e --- /dev/null +++ b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/schema/FlowFileRecordFieldMap.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.controller.repository.schema; + +import java.util.Map; + +import org.apache.nifi.controller.repository.FlowFileRecord; +import org.apache.nifi.controller.repository.StandardFlowFileRecord; +import org.apache.nifi.controller.repository.claim.ContentClaim; +import org.apache.nifi.controller.repository.claim.ResourceClaimManager; +import org.apache.nifi.repository.schema.Record; +import org.apache.nifi.repository.schema.RecordField; +import org.apache.nifi.repository.schema.RecordSchema; + +public class FlowFileRecordFieldMap implements Record { + private final FlowFileRecord flowFile; + private final RecordSchema schema; + private final RecordSchema contentClaimSchema; + private final ContentClaimFieldMap contentClaim; + + public FlowFileRecordFieldMap(final FlowFileRecord flowFile, final RecordSchema schema) { + this.flowFile = flowFile; + this.schema = schema; + + final RecordField contentClaimField = schema.getField(FlowFileSchema.CONTENT_CLAIM); + contentClaimSchema = new RecordSchema(contentClaimField.getSubFields()); + contentClaim = flowFile.getContentClaim() == null ? null : new ContentClaimFieldMap(flowFile.getContentClaim(), flowFile.getContentClaimOffset(), contentClaimSchema); + } + + @Override + public RecordSchema getSchema() { + return schema; + } + + @Override + public Object getFieldValue(final String fieldName) { + switch (fieldName) { + case FlowFileSchema.ATTRIBUTES: + return flowFile.getAttributes(); + case FlowFileSchema.CONTENT_CLAIM: + return contentClaim; + case FlowFileSchema.ENTRY_DATE: + return flowFile.getEntryDate(); + case FlowFileSchema.FLOWFILE_SIZE: + return flowFile.getSize(); + case FlowFileSchema.LINEAGE_START_DATE: + return flowFile.getLineageStartDate(); + case FlowFileSchema.LINEAGE_START_INDEX: + return flowFile.getLineageStartIndex(); + case FlowFileSchema.QUEUE_DATE: + return flowFile.getLastQueueDate(); + case FlowFileSchema.QUEUE_DATE_INDEX: + return flowFile.getQueueDateIndex(); + case FlowFileSchema.RECORD_ID: + return flowFile.getId(); + } + + return null; + } + + @SuppressWarnings("unchecked") + public static FlowFileRecord getFlowFile(final Record record, final ResourceClaimManager claimManager) { + final StandardFlowFileRecord.Builder builder = new StandardFlowFileRecord.Builder(); + builder.id((Long) record.getFieldValue(FlowFileSchema.RECORD_ID)); + builder.entryDate((Long) record.getFieldValue(FlowFileSchema.ENTRY_DATE)); + builder.size((Long) record.getFieldValue(FlowFileSchema.FLOWFILE_SIZE)); + builder.addAttributes((Map) record.getFieldValue(FlowFileSchema.ATTRIBUTES)); + builder.lineageStart((Long) record.getFieldValue(FlowFileSchema.LINEAGE_START_DATE), (Long) record.getFieldValue(FlowFileSchema.LINEAGE_START_INDEX)); + builder.lastQueued((Long) record.getFieldValue(FlowFileSchema.QUEUE_DATE), (Long) record.getFieldValue(FlowFileSchema.QUEUE_DATE_INDEX)); + + final Record contentClaimRecord = (Record) record.getFieldValue(FlowFileSchema.CONTENT_CLAIM); + if (contentClaimRecord != null) { + final ContentClaim claim = ContentClaimFieldMap.getContentClaim(contentClaimRecord, claimManager); + builder.contentClaim(claim); + + final Long offset = ContentClaimFieldMap.getContentClaimOffset(contentClaimRecord); + if (offset != null) { + builder.contentClaimOffset(offset); + } + } + + return builder.build(); + } +} diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/schema/FlowFileSchema.java b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/schema/FlowFileSchema.java new file mode 100644 index 0000000000..53eab703d7 --- /dev/null +++ b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/schema/FlowFileSchema.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.controller.repository.schema; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.nifi.repository.schema.ComplexRecordField; +import org.apache.nifi.repository.schema.FieldType; +import org.apache.nifi.repository.schema.MapRecordField; +import org.apache.nifi.repository.schema.RecordField; +import org.apache.nifi.repository.schema.RecordSchema; +import org.apache.nifi.repository.schema.Repetition; +import org.apache.nifi.repository.schema.SimpleRecordField; + +public class FlowFileSchema { + + public static final String RECORD_ID = "Record ID"; + public static final String ENTRY_DATE = "Entry Date"; + public static final String LINEAGE_START_DATE = "Lineage Start Date"; + public static final String LINEAGE_START_INDEX = "Lineage Start Index"; + public static final String QUEUE_DATE = "Queued Date"; + public static final String QUEUE_DATE_INDEX = "Queued Date Index"; + public static final String FLOWFILE_SIZE = "FlowFile Size"; + public static final String CONTENT_CLAIM = "Content Claim"; + public static final String ATTRIBUTES = "Attributes"; + + // attribute fields + public static final String ATTRIBUTE_NAME = "Attribute Name"; + public static final String ATTRIBUTE_VALUE = "Attribute Value"; + + public static final RecordSchema FLOWFILE_SCHEMA_V1; + + static { + final List flowFileFields = new ArrayList<>(); + + final RecordField attributeNameField = new SimpleRecordField(ATTRIBUTE_NAME, FieldType.STRING, Repetition.EXACTLY_ONE); + final RecordField attributeValueField = new SimpleRecordField(ATTRIBUTE_VALUE, FieldType.STRING, Repetition.EXACTLY_ONE); + + flowFileFields.add(new SimpleRecordField(RECORD_ID, FieldType.LONG, Repetition.EXACTLY_ONE)); + flowFileFields.add(new SimpleRecordField(ENTRY_DATE, FieldType.LONG, Repetition.EXACTLY_ONE)); + flowFileFields.add(new SimpleRecordField(LINEAGE_START_DATE, FieldType.LONG, Repetition.EXACTLY_ONE)); + flowFileFields.add(new SimpleRecordField(LINEAGE_START_INDEX, FieldType.LONG, Repetition.EXACTLY_ONE)); + flowFileFields.add(new SimpleRecordField(QUEUE_DATE, FieldType.LONG, Repetition.EXACTLY_ONE)); + flowFileFields.add(new SimpleRecordField(QUEUE_DATE_INDEX, FieldType.LONG, Repetition.EXACTLY_ONE)); + flowFileFields.add(new SimpleRecordField(FLOWFILE_SIZE, FieldType.LONG, Repetition.EXACTLY_ONE)); + flowFileFields.add(new ComplexRecordField(CONTENT_CLAIM, Repetition.ZERO_OR_ONE, ContentClaimSchema.CONTENT_CLAIM_SCHEMA_V1.getFields())); + flowFileFields.add(new MapRecordField(ATTRIBUTES, attributeNameField, attributeValueField, Repetition.ZERO_OR_ONE)); + + FLOWFILE_SCHEMA_V1 = new RecordSchema(flowFileFields); + } +} diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/schema/RepositoryRecordFieldMap.java b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/schema/RepositoryRecordFieldMap.java new file mode 100644 index 0000000000..9804decc88 --- /dev/null +++ b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/schema/RepositoryRecordFieldMap.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.controller.repository.schema; + +import org.apache.nifi.controller.queue.FlowFileQueue; +import org.apache.nifi.controller.repository.FlowFileRecord; +import org.apache.nifi.controller.repository.RepositoryRecord; +import org.apache.nifi.repository.schema.Record; +import org.apache.nifi.repository.schema.RecordSchema; + +public class RepositoryRecordFieldMap implements Record { + private final RepositoryRecord record; + private final FlowFileRecord flowFile; + private final RecordSchema schema; + private final RecordSchema contentClaimSchema; + + public RepositoryRecordFieldMap(final RepositoryRecord record, final RecordSchema repoRecordSchema, final RecordSchema contentClaimSchema) { + this.schema = repoRecordSchema; + this.contentClaimSchema = contentClaimSchema; + this.record = record; + this.flowFile = record.getCurrent(); + } + + @Override + public Object getFieldValue(final String fieldName) { + switch (fieldName) { + case RepositoryRecordSchema.ACTION_TYPE: + return record.getType().name(); + case RepositoryRecordSchema.RECORD_ID: + return record.getCurrent().getId(); + case RepositoryRecordSchema.SWAP_LOCATION: + return record.getSwapLocation(); + case FlowFileSchema.ATTRIBUTES: + return flowFile.getAttributes(); + case FlowFileSchema.ENTRY_DATE: + return flowFile.getEntryDate(); + case FlowFileSchema.FLOWFILE_SIZE: + return flowFile.getSize(); + case FlowFileSchema.LINEAGE_START_DATE: + return flowFile.getLineageStartDate(); + case FlowFileSchema.LINEAGE_START_INDEX: + return flowFile.getLineageStartIndex(); + case FlowFileSchema.QUEUE_DATE: + return flowFile.getLastQueueDate(); + case FlowFileSchema.QUEUE_DATE_INDEX: + return flowFile.getQueueDateIndex(); + case FlowFileSchema.CONTENT_CLAIM: + final ContentClaimFieldMap contentClaimFieldMap = record.getCurrentClaim() == null ? null + : new ContentClaimFieldMap(record.getCurrentClaim(), record.getCurrentClaimOffset(), contentClaimSchema); + return contentClaimFieldMap; + case RepositoryRecordSchema.QUEUE_IDENTIFIER: + final FlowFileQueue queue = record.getDestination() == null ? record.getOriginalQueue() : record.getDestination(); + return queue.getIdentifier(); + default: + return null; + } + } + + @Override + public RecordSchema getSchema() { + return schema; + } + + @Override + public String toString() { + return "RepositoryRecordFieldMap[" + record + "]"; + } +} diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/schema/RepositoryRecordSchema.java b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/schema/RepositoryRecordSchema.java new file mode 100644 index 0000000000..5887c8af05 --- /dev/null +++ b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/schema/RepositoryRecordSchema.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.controller.repository.schema; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.apache.nifi.repository.schema.ComplexRecordField; +import org.apache.nifi.repository.schema.FieldType; +import org.apache.nifi.repository.schema.RecordField; +import org.apache.nifi.repository.schema.RecordSchema; +import org.apache.nifi.repository.schema.Repetition; +import org.apache.nifi.repository.schema.SimpleRecordField; +import org.apache.nifi.repository.schema.UnionRecordField; + +public class RepositoryRecordSchema { + + public static final String REPOSITORY_RECORD_UPDATE_V1 = "Repository Record Update"; // top level field name + + // repository record fields + public static final String ACTION_TYPE = "Action"; + public static final String RECORD_ID = "Record ID"; + public static final String QUEUE_IDENTIFIER = "Queue Identifier"; + public static final String SWAP_LOCATION = "Swap Location"; + + // Update types + public static final String CREATE_OR_UPDATE_ACTION = "Create or Update"; + public static final String DELETE_ACTION = "Delete"; + public static final String SWAP_IN_ACTION = "Swap In"; + public static final String SWAP_OUT_ACTION = "Swap Out"; + + public static final RecordSchema REPOSITORY_RECORD_SCHEMA_V1; + public static final RecordSchema CREATE_OR_UPDATE_SCHEMA_V1; + public static final RecordSchema DELETE_SCHEMA_V1; + public static final RecordSchema SWAP_IN_SCHEMA_V1; + public static final RecordSchema SWAP_OUT_SCHEMA_V1; + + public static final RecordField ACTION_TYPE_FIELD = new SimpleRecordField(ACTION_TYPE, FieldType.STRING, Repetition.EXACTLY_ONE); + public static final RecordField RECORD_ID_FIELD = new SimpleRecordField(RECORD_ID, FieldType.LONG, Repetition.EXACTLY_ONE); + + static { + // Fields for "Create" or "Update" records + final List createOrUpdateFields = new ArrayList<>(); + createOrUpdateFields.add(ACTION_TYPE_FIELD); + createOrUpdateFields.addAll(FlowFileSchema.FLOWFILE_SCHEMA_V1.getFields()); + createOrUpdateFields.add(new SimpleRecordField(QUEUE_IDENTIFIER, FieldType.STRING, Repetition.EXACTLY_ONE)); + createOrUpdateFields.add(new SimpleRecordField(SWAP_LOCATION, FieldType.STRING, Repetition.ZERO_OR_ONE)); + final ComplexRecordField createOrUpdate = new ComplexRecordField(CREATE_OR_UPDATE_ACTION, Repetition.EXACTLY_ONE, createOrUpdateFields); + CREATE_OR_UPDATE_SCHEMA_V1 = new RecordSchema(createOrUpdateFields); + + // Fields for "Delete" records + final List deleteFields = new ArrayList<>(); + deleteFields.add(ACTION_TYPE_FIELD); + deleteFields.add(RECORD_ID_FIELD); + final ComplexRecordField delete = new ComplexRecordField(DELETE_ACTION, Repetition.EXACTLY_ONE, deleteFields); + DELETE_SCHEMA_V1 = new RecordSchema(deleteFields); + + // Fields for "Swap Out" records + final List swapOutFields = new ArrayList<>(); + swapOutFields.add(ACTION_TYPE_FIELD); + swapOutFields.add(RECORD_ID_FIELD); + swapOutFields.add(new SimpleRecordField(QUEUE_IDENTIFIER, FieldType.STRING, Repetition.EXACTLY_ONE)); + swapOutFields.add(new SimpleRecordField(SWAP_LOCATION, FieldType.STRING, Repetition.EXACTLY_ONE)); + final ComplexRecordField swapOut = new ComplexRecordField(SWAP_OUT_ACTION, Repetition.EXACTLY_ONE, swapOutFields); + SWAP_OUT_SCHEMA_V1 = new RecordSchema(swapOutFields); + + // Fields for "Swap In" records + final List swapInFields = new ArrayList<>(createOrUpdateFields); + swapInFields.add(new SimpleRecordField(SWAP_LOCATION, FieldType.STRING, Repetition.EXACTLY_ONE)); + final ComplexRecordField swapIn = new ComplexRecordField(SWAP_IN_ACTION, Repetition.EXACTLY_ONE, swapInFields); + SWAP_IN_SCHEMA_V1 = new RecordSchema(swapInFields); + + // Union Field that creates the top-level field type + final UnionRecordField repoUpdateField = new UnionRecordField(REPOSITORY_RECORD_UPDATE_V1, Repetition.EXACTLY_ONE, createOrUpdate, delete, swapOut, swapIn); + REPOSITORY_RECORD_SCHEMA_V1 = new RecordSchema(Collections.singletonList(repoUpdateField)); + } +} diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/schema/RepositoryRecordUpdate.java b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/schema/RepositoryRecordUpdate.java new file mode 100644 index 0000000000..ad51f4d830 --- /dev/null +++ b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/schema/RepositoryRecordUpdate.java @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.controller.repository.schema; + +import org.apache.nifi.repository.schema.NamedValue; +import org.apache.nifi.repository.schema.Record; +import org.apache.nifi.repository.schema.RecordSchema; +import org.wali.UpdateType; + +public class RepositoryRecordUpdate implements Record { + private final RecordSchema schema; + private final RepositoryRecordFieldMap fieldMap; + + public RepositoryRecordUpdate(final RepositoryRecordFieldMap fieldMap, final RecordSchema schema) { + this.schema = schema; + this.fieldMap = fieldMap; + } + + @Override + public RecordSchema getSchema() { + return schema; + } + + @Override + public Object getFieldValue(final String fieldName) { + if (RepositoryRecordSchema.REPOSITORY_RECORD_UPDATE_V1.equals(fieldName)) { + final String actionType = (String) fieldMap.getFieldValue(RepositoryRecordSchema.ACTION_TYPE); + final UpdateType updateType = UpdateType.valueOf(actionType); + + final String actionName; + switch (updateType) { + case CREATE: + case UPDATE: + actionName = RepositoryRecordSchema.CREATE_OR_UPDATE_ACTION; + break; + case DELETE: + actionName = RepositoryRecordSchema.DELETE_ACTION; + break; + case SWAP_IN: + actionName = RepositoryRecordSchema.SWAP_IN_ACTION; + break; + case SWAP_OUT: + actionName = RepositoryRecordSchema.SWAP_OUT_ACTION; + break; + default: + return null; + } + + return new NamedValue(actionName, fieldMap); + } + return null; + } + +} diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/schema/ResourceClaimFieldMap.java b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/schema/ResourceClaimFieldMap.java new file mode 100644 index 0000000000..afa19ea805 --- /dev/null +++ b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/schema/ResourceClaimFieldMap.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.controller.repository.schema; + +import org.apache.nifi.controller.repository.claim.ResourceClaim; +import org.apache.nifi.controller.repository.claim.ResourceClaimManager; +import org.apache.nifi.repository.schema.Record; +import org.apache.nifi.repository.schema.RecordSchema; + +public class ResourceClaimFieldMap implements Record { + private final ResourceClaim resourceClaim; + private final RecordSchema schema; + + public ResourceClaimFieldMap(final ResourceClaim resourceClaim, final RecordSchema schema) { + this.resourceClaim = resourceClaim; + this.schema = schema; + } + + @Override + public RecordSchema getSchema() { + return schema; + } + + @Override + public Object getFieldValue(final String fieldName) { + switch (fieldName) { + case ContentClaimSchema.CLAIM_CONTAINER: + return resourceClaim.getContainer(); + case ContentClaimSchema.CLAIM_SECTION: + return resourceClaim.getSection(); + case ContentClaimSchema.CLAIM_IDENTIFIER: + return resourceClaim.getId(); + case ContentClaimSchema.LOSS_TOLERANT: + return resourceClaim.isLossTolerant(); + } + + return null; + } + + public static ResourceClaim getResourceClaim(final Record record, final ResourceClaimManager claimManager) { + final String container = (String) record.getFieldValue(ContentClaimSchema.CLAIM_CONTAINER); + final String section = (String) record.getFieldValue(ContentClaimSchema.CLAIM_SECTION); + final String identifier = (String) record.getFieldValue(ContentClaimSchema.CLAIM_IDENTIFIER); + final Boolean lossTolerant = (Boolean) record.getFieldValue(ContentClaimSchema.LOSS_TOLERANT); + + return claimManager.newResourceClaim(container, section, identifier, lossTolerant, false); + } + + @Override + public int hashCode() { + return 41 + 91 * resourceClaim.hashCode(); + } + + @Override + public boolean equals(final Object obj) { + if (obj == null) { + return false; + } + if (obj == this) { + return true; + } + + if (obj.getClass() != ResourceClaimFieldMap.class) { + return false; + } + + final ResourceClaimFieldMap other = (ResourceClaimFieldMap) obj; + return resourceClaim.equals(other.resourceClaim); + } +} diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/swap/SchemaSwapDeserializer.java b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/swap/SchemaSwapDeserializer.java new file mode 100644 index 0000000000..88e141527d --- /dev/null +++ b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/swap/SchemaSwapDeserializer.java @@ -0,0 +1,77 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.controller.swap; + +import java.io.DataInputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.apache.nifi.controller.queue.FlowFileQueue; +import org.apache.nifi.controller.repository.FlowFileRecord; +import org.apache.nifi.controller.repository.SwapContents; +import org.apache.nifi.controller.repository.SwapSummary; +import org.apache.nifi.controller.repository.claim.ResourceClaimManager; +import org.apache.nifi.controller.repository.schema.FlowFileRecordFieldMap; +import org.apache.nifi.repository.schema.ComplexRecordField; +import org.apache.nifi.repository.schema.Record; +import org.apache.nifi.repository.schema.RecordField; +import org.apache.nifi.repository.schema.RecordSchema; +import org.apache.nifi.repository.schema.Repetition; +import org.apache.nifi.repository.schema.SchemaRecordReader; + +public class SchemaSwapDeserializer implements SwapDeserializer { + + @Override + @SuppressWarnings("unchecked") + public SwapContents deserializeFlowFiles(final DataInputStream in, final String swapLocation, final FlowFileQueue queue, final ResourceClaimManager claimManager) throws IOException { + final RecordSchema schema = RecordSchema.readFrom(in); + final SchemaRecordReader reader = SchemaRecordReader.fromSchema(schema); + + final Record parentRecord = reader.readRecord(in); + final List flowFileRecords = (List) parentRecord.getFieldValue(SwapSchema.FLOWFILE_CONTENTS); + + final List flowFiles = new ArrayList<>(flowFileRecords.size()); + for (final Record record : flowFileRecords) { + flowFiles.add(FlowFileRecordFieldMap.getFlowFile(record, claimManager)); + } + + final Record summaryRecord = (Record) parentRecord.getFieldValue(SwapSchema.SWAP_SUMMARY); + final SwapSummary swapSummary = SwapSummaryFieldMap.getSwapSummary(summaryRecord, claimManager); + + return new StandardSwapContents(swapSummary, flowFiles); + } + + @Override + public SwapSummary getSwapSummary(final DataInputStream in, final String swapLocation, final ResourceClaimManager claimManager) throws IOException { + final RecordSchema schema = RecordSchema.readFrom(in); + final List summaryFields = schema.getField(SwapSchema.SWAP_SUMMARY).getSubFields(); + final RecordField summaryRecordField = new ComplexRecordField(SwapSchema.SWAP_SUMMARY, Repetition.EXACTLY_ONE, summaryFields); + final RecordSchema summarySchema = new RecordSchema(Collections.singletonList(summaryRecordField)); + + final Record summaryRecordParent = SchemaRecordReader.fromSchema(summarySchema).readRecord(in); + final Record summaryRecord = (Record) summaryRecordParent.getFieldValue(SwapSchema.SWAP_SUMMARY); + final SwapSummary swapSummary = SwapSummaryFieldMap.getSwapSummary(summaryRecord, claimManager); + return swapSummary; + } + + public static String getSerializationName() { + return SchemaSwapSerializer.SERIALIZATION_NAME; + } +} diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/swap/SchemaSwapSerializer.java b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/swap/SchemaSwapSerializer.java new file mode 100644 index 0000000000..195f55af8f --- /dev/null +++ b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/swap/SchemaSwapSerializer.java @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.controller.swap; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import org.apache.nifi.controller.queue.FlowFileQueue; +import org.apache.nifi.controller.queue.QueueSize; +import org.apache.nifi.controller.repository.FlowFileRecord; +import org.apache.nifi.controller.repository.SwapSummary; +import org.apache.nifi.controller.repository.claim.ContentClaim; +import org.apache.nifi.controller.repository.claim.ResourceClaim; +import org.apache.nifi.controller.repository.schema.FlowFileRecordFieldMap; +import org.apache.nifi.controller.repository.schema.FlowFileSchema; +import org.apache.nifi.repository.schema.ComplexRecordField; +import org.apache.nifi.repository.schema.FieldMapRecord; +import org.apache.nifi.repository.schema.FieldType; +import org.apache.nifi.repository.schema.Record; +import org.apache.nifi.repository.schema.RecordField; +import org.apache.nifi.repository.schema.RecordSchema; +import org.apache.nifi.repository.schema.Repetition; +import org.apache.nifi.repository.schema.SchemaRecordWriter; +import org.apache.nifi.repository.schema.SimpleRecordField; + +public class SchemaSwapSerializer implements SwapSerializer { + static final String SERIALIZATION_NAME = "Schema Swap Serialization"; + + private final RecordSchema schema = SwapSchema.FULL_SWAP_FILE_SCHEMA_V1; + private final RecordSchema flowFileSchema = new RecordSchema(schema.getField(SwapSchema.FLOWFILE_CONTENTS).getSubFields()); + + @Override + public void serializeFlowFiles(final List toSwap, final FlowFileQueue queue, final String swapLocation, final OutputStream out) throws IOException { + schema.writeTo(out); + + long contentSize = 0L; + long maxFlowFileId = -1L; + final List resourceClaims = new ArrayList<>(); + for (final FlowFileRecord flowFile : toSwap) { + contentSize += flowFile.getSize(); + if (flowFile.getId() > maxFlowFileId) { + maxFlowFileId = flowFile.getId(); + } + + final ContentClaim contentClaim = flowFile.getContentClaim(); + if (contentClaim != null) { + resourceClaims.add(contentClaim.getResourceClaim()); + } + } + + final QueueSize queueSize = new QueueSize(toSwap.size(), contentSize); + final SwapSummary swapSummary = new StandardSwapSummary(queueSize, maxFlowFileId, resourceClaims); + final Record summaryRecord = new SwapSummaryFieldMap(swapSummary, queue.getIdentifier(), SwapSchema.SWAP_SUMMARY_SCHEMA_V1); + + final List flowFileRecords = toSwap.stream() + .map(flowFile -> new FlowFileRecordFieldMap(flowFile, flowFileSchema)) + .collect(Collectors.toList()); + + // Create a simple record to hold the summary and the flowfile contents + final RecordField summaryField = new SimpleRecordField(SwapSchema.SWAP_SUMMARY, FieldType.COMPLEX, Repetition.EXACTLY_ONE); + final RecordField contentsField = new ComplexRecordField(SwapSchema.FLOWFILE_CONTENTS, Repetition.ZERO_OR_MORE, FlowFileSchema.FLOWFILE_SCHEMA_V1.getFields()); + final List fields = new ArrayList<>(2); + fields.add(summaryField); + fields.add(contentsField); + + final Map swapFileMap = new LinkedHashMap<>(); + swapFileMap.put(summaryField, summaryRecord); + swapFileMap.put(contentsField, flowFileRecords); + final Record swapFileRecord = new FieldMapRecord(swapFileMap, new RecordSchema(fields)); + + final SchemaRecordWriter writer = new SchemaRecordWriter(); + writer.writeRecord(swapFileRecord, out); + out.flush(); + } + + @Override + public String getSerializationName() { + return SERIALIZATION_NAME; + } + +} diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/swap/SimpleSwapDeserializer.java b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/swap/SimpleSwapDeserializer.java new file mode 100644 index 0000000000..b86d9a8a89 --- /dev/null +++ b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/swap/SimpleSwapDeserializer.java @@ -0,0 +1,303 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.controller.swap; + +import java.io.DataInputStream; +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.apache.nifi.controller.queue.FlowFileQueue; +import org.apache.nifi.controller.queue.QueueSize; +import org.apache.nifi.controller.repository.FlowFileRecord; +import org.apache.nifi.controller.repository.IncompleteSwapFileException; +import org.apache.nifi.controller.repository.StandardFlowFileRecord; +import org.apache.nifi.controller.repository.SwapContents; +import org.apache.nifi.controller.repository.SwapSummary; +import org.apache.nifi.controller.repository.claim.ResourceClaim; +import org.apache.nifi.controller.repository.claim.ResourceClaimManager; +import org.apache.nifi.controller.repository.claim.StandardContentClaim; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class SimpleSwapDeserializer implements SwapDeserializer { + public static final int SWAP_ENCODING_VERSION = 10; + private static final Logger logger = LoggerFactory.getLogger(SimpleSwapDeserializer.class); + + @Override + public SwapSummary getSwapSummary(final DataInputStream in, final String swapLocation, final ResourceClaimManager claimManager) throws IOException { + final int swapEncodingVersion = in.readInt(); + if (swapEncodingVersion > SWAP_ENCODING_VERSION) { + final String errMsg = "Cannot swap FlowFiles in from " + swapLocation + " because the encoding version is " + + swapEncodingVersion + ", which is too new (expecting " + SWAP_ENCODING_VERSION + " or less)"; + + throw new IOException(errMsg); + } + + final int numRecords; + final long contentSize; + Long maxRecordId = null; + try { + in.readUTF(); // ignore Connection ID + numRecords = in.readInt(); + contentSize = in.readLong(); + + if (numRecords == 0) { + return StandardSwapSummary.EMPTY_SUMMARY; + } + + if (swapEncodingVersion > 7) { + maxRecordId = in.readLong(); + } + } catch (final EOFException eof) { + logger.warn("Found premature End-of-File when reading Swap File {}. EOF occurred before any FlowFiles were encountered", swapLocation); + return StandardSwapSummary.EMPTY_SUMMARY; + } + + final QueueSize queueSize = new QueueSize(numRecords, contentSize); + final SwapContents swapContents = deserializeFlowFiles(in, queueSize, maxRecordId, swapEncodingVersion, claimManager, swapLocation); + return swapContents.getSummary(); + } + + + @Override + public SwapContents deserializeFlowFiles(final DataInputStream in, final String swapLocation, final FlowFileQueue queue, final ResourceClaimManager claimManager) throws IOException { + final int swapEncodingVersion = in.readInt(); + if (swapEncodingVersion > SWAP_ENCODING_VERSION) { + throw new IOException("Cannot swap FlowFiles in from SwapFile because the encoding version is " + + swapEncodingVersion + ", which is too new (expecting " + SWAP_ENCODING_VERSION + " or less)"); + } + + final String connectionId = in.readUTF(); // Connection ID + if (!connectionId.equals(queue.getIdentifier())) { + throw new IllegalArgumentException("Cannot deserialize FlowFiles from Swap File at location " + swapLocation + + " because those FlowFiles belong to Connection with ID " + connectionId + " and an attempt was made to swap them into a Connection with ID " + queue.getIdentifier()); + } + + int numRecords = 0; + long contentSize = 0L; + Long maxRecordId = null; + try { + numRecords = in.readInt(); + contentSize = in.readLong(); // Content Size + if (swapEncodingVersion > 7) { + maxRecordId = in.readLong(); // Max Record ID + } + } catch (final EOFException eof) { + final QueueSize queueSize = new QueueSize(numRecords, contentSize); + final SwapSummary summary = new StandardSwapSummary(queueSize, maxRecordId, Collections.emptyList()); + final SwapContents partialContents = new StandardSwapContents(summary, Collections.emptyList()); + throw new IncompleteSwapFileException(swapLocation, partialContents); + } + + final QueueSize queueSize = new QueueSize(numRecords, contentSize); + return deserializeFlowFiles(in, queueSize, maxRecordId, swapEncodingVersion, claimManager, swapLocation); + } + + private static SwapContents deserializeFlowFiles(final DataInputStream in, final QueueSize queueSize, final Long maxRecordId, + final int serializationVersion, final ResourceClaimManager claimManager, final String location) throws IOException { + final List flowFiles = new ArrayList<>(queueSize.getObjectCount()); + final List resourceClaims = new ArrayList<>(queueSize.getObjectCount()); + Long maxId = maxRecordId; + + for (int i = 0; i < queueSize.getObjectCount(); i++) { + try { + // legacy encoding had an "action" because it used to be couple with FlowFile Repository code + if (serializationVersion < 3) { + final int action = in.read(); + if (action != 1) { + throw new IOException("Swap File is version " + serializationVersion + " but did not contain a 'UPDATE' record type"); + } + } + + final StandardFlowFileRecord.Builder ffBuilder = new StandardFlowFileRecord.Builder(); + final long recordId = in.readLong(); + if (maxId == null || recordId > maxId) { + maxId = recordId; + } + + ffBuilder.id(recordId); + ffBuilder.entryDate(in.readLong()); + + if (serializationVersion > 1) { + // Lineage information was added in version 2 + if (serializationVersion < 10) { + final int numLineageIdentifiers = in.readInt(); + for (int lineageIdIdx = 0; lineageIdIdx < numLineageIdentifiers; lineageIdIdx++) { + in.readUTF(); //skip each identifier + } + } + + // version 9 adds in a 'lineage start index' + final long lineageStartDate = in.readLong(); + final long lineageStartIndex; + if (serializationVersion > 8) { + lineageStartIndex = in.readLong(); + } else { + lineageStartIndex = 0L; + } + + ffBuilder.lineageStart(lineageStartDate, lineageStartIndex); + + if (serializationVersion > 5) { + // Version 9 adds in a 'queue date index' + final long lastQueueDate = in.readLong(); + final long queueDateIndex; + if (serializationVersion > 8) { + queueDateIndex = in.readLong(); + } else { + queueDateIndex = 0L; + } + + ffBuilder.lastQueued(lastQueueDate, queueDateIndex); + } + } + + ffBuilder.size(in.readLong()); + + if (serializationVersion < 3) { + readString(in); // connection Id + } + + final boolean hasClaim = in.readBoolean(); + ResourceClaim resourceClaim = null; + if (hasClaim) { + final String claimId; + if (serializationVersion < 5) { + claimId = String.valueOf(in.readLong()); + } else { + claimId = in.readUTF(); + } + + final String container = in.readUTF(); + final String section = in.readUTF(); + + final long resourceOffset; + final long resourceLength; + if (serializationVersion < 6) { + resourceOffset = 0L; + resourceLength = -1L; + } else { + resourceOffset = in.readLong(); + resourceLength = in.readLong(); + } + + final long claimOffset = in.readLong(); + + final boolean lossTolerant; + if (serializationVersion >= 4) { + lossTolerant = in.readBoolean(); + } else { + lossTolerant = false; + } + + resourceClaim = claimManager.getResourceClaim(container, section, claimId); + if (resourceClaim == null) { + logger.error("Swap file indicates that FlowFile was referencing Resource Claim at container={}, section={}, claimId={}, " + + "but this Resource Claim cannot be found! Will create a temporary Resource Claim, but this may affect the framework's " + + "ability to properly clean up this resource", container, section, claimId); + resourceClaim = claimManager.newResourceClaim(container, section, claimId, lossTolerant, true); + } + + final StandardContentClaim claim = new StandardContentClaim(resourceClaim, resourceOffset); + claim.setLength(resourceLength); + + ffBuilder.contentClaim(claim); + ffBuilder.contentClaimOffset(claimOffset); + } + + boolean attributesChanged = true; + if (serializationVersion < 3) { + attributesChanged = in.readBoolean(); + } + + if (attributesChanged) { + final int numAttributes = in.readInt(); + for (int j = 0; j < numAttributes; j++) { + final String key = readString(in); + final String value = readString(in); + + ffBuilder.addAttribute(key, value); + } + } + + final FlowFileRecord record = ffBuilder.build(); + if (resourceClaim != null) { + resourceClaims.add(resourceClaim); + } + + flowFiles.add(record); + } catch (final EOFException eof) { + final SwapSummary swapSummary = new StandardSwapSummary(queueSize, maxId, resourceClaims); + final SwapContents partialContents = new StandardSwapContents(swapSummary, flowFiles); + throw new IncompleteSwapFileException(location, partialContents); + } + } + + final SwapSummary swapSummary = new StandardSwapSummary(queueSize, maxId, resourceClaims); + return new StandardSwapContents(swapSummary, flowFiles); + } + + private static String readString(final InputStream in) throws IOException { + final Integer numBytes = readFieldLength(in); + if (numBytes == null) { + throw new EOFException(); + } + final byte[] bytes = new byte[numBytes]; + fillBuffer(in, bytes, numBytes); + return new String(bytes, StandardCharsets.UTF_8); + } + + private static Integer readFieldLength(final InputStream in) throws IOException { + final int firstValue = in.read(); + final int secondValue = in.read(); + if (firstValue < 0) { + return null; + } + if (secondValue < 0) { + throw new EOFException(); + } + if (firstValue == 0xff && secondValue == 0xff) { + final int ch1 = in.read(); + final int ch2 = in.read(); + final int ch3 = in.read(); + final int ch4 = in.read(); + if ((ch1 | ch2 | ch3 | ch4) < 0) { + throw new EOFException(); + } + return (ch1 << 24) + (ch2 << 16) + (ch3 << 8) + ch4; + } else { + return (firstValue << 8) + secondValue; + } + } + + private static void fillBuffer(final InputStream in, final byte[] buffer, final int length) throws IOException { + int bytesRead; + int totalBytesRead = 0; + while ((bytesRead = in.read(buffer, totalBytesRead, length - totalBytesRead)) > 0) { + totalBytesRead += bytesRead; + } + if (totalBytesRead != length) { + throw new EOFException(); + } + } +} diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/swap/SimpleSwapSerializer.java b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/swap/SimpleSwapSerializer.java new file mode 100644 index 0000000000..ea8b99b571 --- /dev/null +++ b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/swap/SimpleSwapSerializer.java @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.controller.swap; + +import java.io.DataOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.charset.StandardCharsets; +import java.util.List; +import java.util.Map; + +import org.apache.nifi.controller.queue.FlowFileQueue; +import org.apache.nifi.controller.repository.FlowFileRecord; +import org.apache.nifi.controller.repository.claim.ContentClaim; +import org.apache.nifi.controller.repository.claim.ResourceClaim; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * @deprecated in favor of using {@link SchemaSwapSerializer}. + */ +@Deprecated +public class SimpleSwapSerializer implements SwapSerializer { + private static final Logger logger = LoggerFactory.getLogger(SimpleSwapSerializer.class); + public static final int SWAP_ENCODING_VERSION = 10; + + + @Override + public void serializeFlowFiles(final List toSwap, final FlowFileQueue queue, final String swapLocation, final OutputStream destination) throws IOException { + if (toSwap == null || toSwap.isEmpty()) { + return; + } + + long contentSize = 0L; + for (final FlowFileRecord record : toSwap) { + contentSize += record.getSize(); + } + + // persist record to disk via the swap file + final DataOutputStream out = new DataOutputStream(destination); + try { + out.writeInt(SWAP_ENCODING_VERSION); + out.writeUTF(queue.getIdentifier()); + out.writeInt(toSwap.size()); + out.writeLong(contentSize); + + // get the max record id and write that out so that we know it quickly for restoration + long maxRecordId = 0L; + for (final FlowFileRecord flowFile : toSwap) { + if (flowFile.getId() > maxRecordId) { + maxRecordId = flowFile.getId(); + } + } + + out.writeLong(maxRecordId); + + for (final FlowFileRecord flowFile : toSwap) { + out.writeLong(flowFile.getId()); + out.writeLong(flowFile.getEntryDate()); + out.writeLong(flowFile.getLineageStartDate()); + out.writeLong(flowFile.getLineageStartIndex()); + out.writeLong(flowFile.getLastQueueDate()); + out.writeLong(flowFile.getQueueDateIndex()); + out.writeLong(flowFile.getSize()); + + final ContentClaim claim = flowFile.getContentClaim(); + if (claim == null) { + out.writeBoolean(false); + } else { + out.writeBoolean(true); + final ResourceClaim resourceClaim = claim.getResourceClaim(); + out.writeUTF(resourceClaim.getId()); + out.writeUTF(resourceClaim.getContainer()); + out.writeUTF(resourceClaim.getSection()); + out.writeLong(claim.getOffset()); + out.writeLong(claim.getLength()); + out.writeLong(flowFile.getContentClaimOffset()); + out.writeBoolean(resourceClaim.isLossTolerant()); + } + + final Map attributes = flowFile.getAttributes(); + out.writeInt(attributes.size()); + for (final Map.Entry entry : attributes.entrySet()) { + writeString(entry.getKey(), out); + writeString(entry.getValue(), out); + } + } + } finally { + out.flush(); + } + + logger.info("Successfully swapped out {} FlowFiles from {} to Swap File {}", toSwap.size(), queue, swapLocation); + } + + private void writeString(final String toWrite, final OutputStream out) throws IOException { + final byte[] bytes = toWrite.getBytes(StandardCharsets.UTF_8); + final int utflen = bytes.length; + + if (utflen < 65535) { + out.write(utflen >>> 8); + out.write(utflen); + out.write(bytes); + } else { + out.write(255); + out.write(255); + out.write(utflen >>> 24); + out.write(utflen >>> 16); + out.write(utflen >>> 8); + out.write(utflen); + out.write(bytes); + } + } + + @Override + public String getSerializationName() { + return "Simple Swap Serializer"; + } +} diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/swap/SwapDeserializer.java b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/swap/SwapDeserializer.java new file mode 100644 index 0000000000..a3fb30a826 --- /dev/null +++ b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/swap/SwapDeserializer.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.controller.swap; + +import java.io.DataInputStream; +import java.io.IOException; + +import org.apache.nifi.controller.queue.FlowFileQueue; +import org.apache.nifi.controller.repository.SwapContents; +import org.apache.nifi.controller.repository.SwapSummary; +import org.apache.nifi.controller.repository.claim.ResourceClaimManager; + +public interface SwapDeserializer { + + SwapContents deserializeFlowFiles(DataInputStream in, String swapLocation, FlowFileQueue queue, ResourceClaimManager claimManager) throws IOException; + + SwapSummary getSwapSummary(DataInputStream in, String swapLocation, ResourceClaimManager claimManager) throws IOException; +} diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/swap/SwapSchema.java b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/swap/SwapSchema.java new file mode 100644 index 0000000000..70fb539272 --- /dev/null +++ b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/swap/SwapSchema.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.controller.swap; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +import org.apache.nifi.controller.repository.schema.ContentClaimSchema; +import org.apache.nifi.controller.repository.schema.FlowFileSchema; +import org.apache.nifi.repository.schema.ComplexRecordField; +import org.apache.nifi.repository.schema.FieldType; +import org.apache.nifi.repository.schema.MapRecordField; +import org.apache.nifi.repository.schema.RecordField; +import org.apache.nifi.repository.schema.RecordSchema; +import org.apache.nifi.repository.schema.Repetition; +import org.apache.nifi.repository.schema.SimpleRecordField; + +public class SwapSchema { + + public static final RecordSchema SWAP_SUMMARY_SCHEMA_V1; + public static final RecordSchema SWAP_CONTENTS_SCHEMA_V1; + public static final RecordSchema FULL_SWAP_FILE_SCHEMA_V1; + + public static final String RESOURCE_CLAIMS = "Resource Claims"; + public static final String RESOURCE_CLAIM = "Resource Claim"; + public static final String RESOURCE_CLAIM_COUNT = "Claim Count"; + + public static final String QUEUE_IDENTIFIER = "Queue Identifier"; + public static final String FLOWFILE_COUNT = "FlowFile Count"; + public static final String FLOWFILE_SIZE = "FlowFile Size"; + public static final String MAX_RECORD_ID = "Max Record ID"; + public static final String SWAP_SUMMARY = "Swap Summary"; + public static final String FLOWFILE_CONTENTS = "FlowFiles"; + + + static { + final RecordField queueIdentifier = new SimpleRecordField(QUEUE_IDENTIFIER, FieldType.STRING, Repetition.EXACTLY_ONE); + final RecordField flowFileCount = new SimpleRecordField(FLOWFILE_COUNT, FieldType.INT, Repetition.EXACTLY_ONE); + final RecordField flowFileSize = new SimpleRecordField(FLOWFILE_SIZE, FieldType.LONG, Repetition.EXACTLY_ONE); + final RecordField maxRecordId = new SimpleRecordField(MAX_RECORD_ID, FieldType.LONG, Repetition.EXACTLY_ONE); + + final RecordField resourceClaimField = new ComplexRecordField(RESOURCE_CLAIM, Repetition.EXACTLY_ONE, ContentClaimSchema.RESOURCE_CLAIM_SCHEMA_V1.getFields()); + final RecordField claimCountField = new SimpleRecordField(RESOURCE_CLAIM_COUNT, FieldType.INT, Repetition.EXACTLY_ONE); + final RecordField resourceClaims = new MapRecordField(RESOURCE_CLAIMS, resourceClaimField, claimCountField, Repetition.EXACTLY_ONE); + + final List summaryFields = new ArrayList<>(); + summaryFields.add(queueIdentifier); + summaryFields.add(flowFileCount); + summaryFields.add(flowFileSize); + summaryFields.add(maxRecordId); + summaryFields.add(resourceClaims); + SWAP_SUMMARY_SCHEMA_V1 = new RecordSchema(summaryFields); + + final RecordField flowFiles = new ComplexRecordField(FLOWFILE_CONTENTS, Repetition.ZERO_OR_MORE, FlowFileSchema.FLOWFILE_SCHEMA_V1.getFields()); + final List contentsFields = Collections.singletonList(flowFiles); + SWAP_CONTENTS_SCHEMA_V1 = new RecordSchema(contentsFields); + + final List fullSchemaFields = new ArrayList<>(); + fullSchemaFields.add(new ComplexRecordField(SWAP_SUMMARY, Repetition.EXACTLY_ONE, summaryFields)); + fullSchemaFields.add(new ComplexRecordField(FLOWFILE_CONTENTS, Repetition.ZERO_OR_MORE, FlowFileSchema.FLOWFILE_SCHEMA_V1.getFields())); + FULL_SWAP_FILE_SCHEMA_V1 = new RecordSchema(fullSchemaFields); + } +} diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/swap/SwapSerializer.java b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/swap/SwapSerializer.java new file mode 100644 index 0000000000..e8439e6365 --- /dev/null +++ b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/swap/SwapSerializer.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.controller.swap; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.List; + +import org.apache.nifi.controller.queue.FlowFileQueue; +import org.apache.nifi.controller.repository.FlowFileRecord; + +public interface SwapSerializer { + + void serializeFlowFiles(List toSwap, FlowFileQueue queue, String swapLocation, OutputStream destination) throws IOException; + + String getSerializationName(); + +} diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/swap/SwapSummaryFieldMap.java b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/swap/SwapSummaryFieldMap.java new file mode 100644 index 0000000000..ab58ed6319 --- /dev/null +++ b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/swap/SwapSummaryFieldMap.java @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.controller.swap; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.nifi.controller.queue.QueueSize; +import org.apache.nifi.controller.repository.SwapSummary; +import org.apache.nifi.controller.repository.claim.ResourceClaim; +import org.apache.nifi.controller.repository.claim.ResourceClaimManager; +import org.apache.nifi.controller.repository.schema.ResourceClaimFieldMap; +import org.apache.nifi.repository.schema.Record; +import org.apache.nifi.repository.schema.RecordField; +import org.apache.nifi.repository.schema.RecordSchema; + +public class SwapSummaryFieldMap implements Record { + private final SwapSummary swapSummary; + private final RecordSchema schema; + private final String queueIdentifier; + private final Map claimCounts; + + public SwapSummaryFieldMap(final SwapSummary summary, final String queueIdentifier, final RecordSchema schema) { + this.swapSummary = summary; + this.queueIdentifier = queueIdentifier; + this.schema = schema; + + final RecordField resourceClaimField = schema.getField(SwapSchema.RESOURCE_CLAIMS).getSubFields().get(0); + final RecordSchema resourceClaimSchema = new RecordSchema(resourceClaimField.getSubFields()); + + final List resourceClaims = summary.getResourceClaims(); + claimCounts = new HashMap<>(); + for (final ResourceClaim claim : resourceClaims) { + final ResourceClaimFieldMap fieldMap = new ResourceClaimFieldMap(claim, resourceClaimSchema); + + final Integer count = claimCounts.get(fieldMap); + if (count == null) { + claimCounts.put(fieldMap, 1); + } else { + claimCounts.put(fieldMap, count + 1); + } + } + } + + @Override + public RecordSchema getSchema() { + return schema; + } + + @Override + public Object getFieldValue(final String fieldName) { + switch (fieldName) { + case SwapSchema.MAX_RECORD_ID: + return swapSummary.getMaxFlowFileId(); + case SwapSchema.FLOWFILE_COUNT: + return swapSummary.getQueueSize().getObjectCount(); + case SwapSchema.FLOWFILE_SIZE: + return swapSummary.getQueueSize().getByteCount(); + case SwapSchema.QUEUE_IDENTIFIER: + return queueIdentifier; + case SwapSchema.RESOURCE_CLAIMS: + return claimCounts; + } + + return null; + } + + @SuppressWarnings("unchecked") + public static SwapSummary getSwapSummary(final Record record, final ResourceClaimManager claimManager) { + final int flowFileCount = (Integer) record.getFieldValue(SwapSchema.FLOWFILE_COUNT); + final long flowFileSize = (Long) record.getFieldValue(SwapSchema.FLOWFILE_SIZE); + final QueueSize queueSize = new QueueSize(flowFileCount, flowFileSize); + + final long maxFlowFileId = (Long) record.getFieldValue(SwapSchema.MAX_RECORD_ID); + + final Map resourceClaimRecords = (Map) record.getFieldValue(SwapSchema.RESOURCE_CLAIMS); + final List resourceClaims = new ArrayList<>(); + for (final Map.Entry entry : resourceClaimRecords.entrySet()) { + final Record resourceClaimRecord = entry.getKey(); + final ResourceClaim claim = ResourceClaimFieldMap.getResourceClaim(resourceClaimRecord, claimManager); + + for (int i = 0; i < entry.getValue(); i++) { + resourceClaims.add(claim); + } + } + + return new StandardSwapSummary(queueSize, maxFlowFileId, resourceClaims); + } +} diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/test/java/org/apache/nifi/controller/TestFileSystemSwapManager.java b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/test/java/org/apache/nifi/controller/TestFileSystemSwapManager.java index 97226b2002..46bea3155f 100644 --- a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/test/java/org/apache/nifi/controller/TestFileSystemSwapManager.java +++ b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/test/java/org/apache/nifi/controller/TestFileSystemSwapManager.java @@ -22,26 +22,20 @@ import java.io.BufferedInputStream; import java.io.DataInputStream; import java.io.File; import java.io.FileInputStream; -import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; -import java.nio.file.Files; -import java.util.ArrayList; import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; import java.util.List; -import java.util.Map; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicLong; import org.apache.nifi.controller.queue.FlowFileQueue; import org.apache.nifi.controller.repository.FlowFileRecord; +import org.apache.nifi.controller.repository.FlowFileRepository; import org.apache.nifi.controller.repository.SwapContents; -import org.apache.nifi.controller.repository.claim.ContentClaim; +import org.apache.nifi.controller.repository.SwapManagerInitializationContext; import org.apache.nifi.controller.repository.claim.ResourceClaim; import org.apache.nifi.controller.repository.claim.ResourceClaimManager; -import org.apache.nifi.flowfile.FlowFile; +import org.apache.nifi.events.EventReporter; import org.junit.Test; import org.mockito.Mockito; @@ -56,7 +50,9 @@ public class TestFileSystemSwapManager { final FlowFileQueue flowFileQueue = Mockito.mock(FlowFileQueue.class); Mockito.when(flowFileQueue.getIdentifier()).thenReturn("87bb99fe-412c-49f6-a441-d1b0af4e20b4"); - final SwapContents swapContents = FileSystemSwapManager.deserializeFlowFiles(in, "/src/test/resources/old-swap-file.swap", flowFileQueue, new NopResourceClaimManager()); + final FileSystemSwapManager swapManager = createSwapManager(); + final SwapContents swapContents = swapManager.peek("src/test/resources/old-swap-file.swap", flowFileQueue); + final List records = swapContents.getFlowFiles(); assertEquals(10000, records.size()); @@ -67,53 +63,32 @@ public class TestFileSystemSwapManager { } } - @Test - public void testRoundTripSerializeDeserialize() throws IOException { - final List toSwap = new ArrayList<>(10000); - final Map attrs = new HashMap<>(); - for (int i = 0; i < 10000; i++) { - attrs.put("i", String.valueOf(i)); - final FlowFileRecord ff = new TestFlowFile(attrs, i); - toSwap.add(ff); - } - final FlowFileQueue flowFileQueue = Mockito.mock(FlowFileQueue.class); - Mockito.when(flowFileQueue.getIdentifier()).thenReturn("87bb99fe-412c-49f6-a441-d1b0af4e20b4"); + private FileSystemSwapManager createSwapManager() { + final FileSystemSwapManager swapManager = new FileSystemSwapManager(); + final ResourceClaimManager resourceClaimManager = new NopResourceClaimManager(); + final FlowFileRepository flowfileRepo = Mockito.mock(FlowFileRepository.class); + swapManager.initialize(new SwapManagerInitializationContext() { + @Override + public ResourceClaimManager getResourceClaimManager() { + return resourceClaimManager; + } - final String swapLocation = "target/testRoundTrip.swap"; - final File swapFile = new File(swapLocation); - Files.deleteIfExists(swapFile.toPath()); + @Override + public FlowFileRepository getFlowFileRepository() { + return flowfileRepo; + } - try (final FileOutputStream fos = new FileOutputStream(swapFile)) { - FileSystemSwapManager.serializeFlowFiles(toSwap, flowFileQueue, swapLocation, fos); - } + @Override + public EventReporter getEventReporter() { + return EventReporter.NO_OP; + } + }); - final SwapContents swappedIn; - try (final FileInputStream fis = new FileInputStream(swapFile); - final DataInputStream dis = new DataInputStream(fis)) { - swappedIn = FileSystemSwapManager.deserializeFlowFiles(dis, swapLocation, flowFileQueue, Mockito.mock(ResourceClaimManager.class)); - } - - assertEquals(toSwap.size(), swappedIn.getFlowFiles().size()); - for (int i = 0; i < toSwap.size(); i++) { - final FlowFileRecord pre = toSwap.get(i); - final FlowFileRecord post = swappedIn.getFlowFiles().get(i); - - assertEquals(pre.getSize(), post.getSize()); - assertEquals(pre.getAttributes(), post.getAttributes()); - assertEquals(pre.getSize(), post.getSize()); - assertEquals(pre.getId(), post.getId()); - assertEquals(pre.getContentClaim(), post.getContentClaim()); - assertEquals(pre.getContentClaimOffset(), post.getContentClaimOffset()); - assertEquals(pre.getEntryDate(), post.getEntryDate()); - assertEquals(pre.getLastQueueDate(), post.getLastQueueDate()); - assertEquals(pre.getLineageStartDate(), post.getLineageStartDate()); - assertEquals(pre.getPenaltyExpirationMillis(), post.getPenaltyExpirationMillis()); - } + return swapManager; } public class NopResourceClaimManager implements ResourceClaimManager { - @Override public ResourceClaim newResourceClaim(String container, String section, String id, boolean lossTolerant, boolean writable) { return null; @@ -165,89 +140,4 @@ public class TestFileSystemSwapManager { } } - private static class TestFlowFile implements FlowFileRecord { - - private static final AtomicLong idGenerator = new AtomicLong(0L); - - private final long id = idGenerator.getAndIncrement(); - private final long entryDate = System.currentTimeMillis(); - private final long lastQueueDate = System.currentTimeMillis(); - private final Map attributes; - private final long size; - - public TestFlowFile(final Map attributes, final long size) { - this.attributes = attributes; - this.size = size; - } - - @Override - public long getId() { - return id; - } - - @Override - public long getEntryDate() { - return entryDate; - } - - @Override - public long getLineageStartDate() { - return entryDate; - } - - @Override - public Long getLastQueueDate() { - return lastQueueDate; - } - - @Override - public boolean isPenalized() { - return false; - } - - @Override - public String getAttribute(String key) { - return attributes.get(key); - } - - @Override - public long getSize() { - return size; - } - - @Override - public Map getAttributes() { - return Collections.unmodifiableMap(attributes); - } - - @Override - public int compareTo(final FlowFile o) { - return Long.compare(id, o.getId()); - } - - @Override - public long getPenaltyExpirationMillis() { - return -1L; - } - - @Override - public ContentClaim getContentClaim() { - return null; - } - - @Override - public long getContentClaimOffset() { - return 0; - } - - @Override - public long getLineageStartIndex() { - return 0; - } - - @Override - public long getQueueDateIndex() { - return 0; - } - } } diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/test/java/org/apache/nifi/controller/repository/TestWriteAheadFlowFileRepository.java b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/test/java/org/apache/nifi/controller/repository/TestWriteAheadFlowFileRepository.java index b2ea0b9601..65258223bc 100644 --- a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/test/java/org/apache/nifi/controller/repository/TestWriteAheadFlowFileRepository.java +++ b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/test/java/org/apache/nifi/controller/repository/TestWriteAheadFlowFileRepository.java @@ -49,6 +49,7 @@ import org.apache.nifi.controller.swap.StandardSwapContents; import org.apache.nifi.controller.swap.StandardSwapSummary; import org.apache.nifi.util.NiFiProperties; import org.apache.nifi.util.file.FileUtils; +import org.junit.After; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; @@ -64,6 +65,7 @@ public class TestWriteAheadFlowFileRepository { } @Before + @After public void clearRepo() throws IOException { final File target = new File("target"); final File testRepo = new File(target, "test-repo"); diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/test/java/org/apache/nifi/controller/swap/MockFlowFile.java b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/test/java/org/apache/nifi/controller/swap/MockFlowFile.java new file mode 100644 index 0000000000..7b5f72ca9c --- /dev/null +++ b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/test/java/org/apache/nifi/controller/swap/MockFlowFile.java @@ -0,0 +1,136 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.controller.swap; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.atomic.AtomicLong; + +import org.apache.nifi.controller.repository.FlowFileRecord; +import org.apache.nifi.controller.repository.claim.ContentClaim; +import org.apache.nifi.controller.repository.claim.ResourceClaim; +import org.apache.nifi.controller.repository.claim.ResourceClaimManager; +import org.apache.nifi.controller.repository.claim.StandardContentClaim; +import org.apache.nifi.flowfile.FlowFile; + +public class MockFlowFile implements FlowFileRecord { + private static final AtomicLong idGenerator = new AtomicLong(0L); + + private final long id; + private final long entryDate = System.currentTimeMillis(); + private final long lastQueueDate = System.currentTimeMillis(); + private final Map attributes; + private final long size; + private final ContentClaim contentClaim; + + public MockFlowFile(final Map attributes, final long size, final ResourceClaimManager claimManager) { + this(attributes, size, createContentClaim(String.valueOf(idGenerator.get()), claimManager)); + } + + public MockFlowFile(final Map attributes, final long size, final ContentClaim contentClaim) { + this(idGenerator.getAndIncrement(), attributes, size, contentClaim); + } + + public MockFlowFile(final long id, final Map attributes, final long size, final ContentClaim contentClaim) { + this.id = id; + this.attributes = new HashMap<>(attributes); + this.size = size; + this.contentClaim = contentClaim; + } + + public static ContentClaim createContentClaim(final String id, final ResourceClaimManager claimManager) { + final ResourceClaim resourceClaim = claimManager.newResourceClaim("container", "section", id, false, false); + claimManager.incrementClaimantCount(resourceClaim); + return new StandardContentClaim(resourceClaim, 3L); + } + + @Override + public long getId() { + return id; + } + + @Override + public long getEntryDate() { + return entryDate; + } + + @Override + public long getLineageStartDate() { + return entryDate; + } + + @Override + public Long getLastQueueDate() { + return lastQueueDate; + } + + @Override + public boolean isPenalized() { + return false; + } + + @Override + public String getAttribute(String key) { + return attributes.get(key); + } + + @Override + public long getSize() { + return size; + } + + @Override + public Map getAttributes() { + return Collections.unmodifiableMap(attributes); + } + + @Override + public int compareTo(final FlowFile o) { + return Long.compare(id, o.getId()); + } + + @Override + public long getPenaltyExpirationMillis() { + return -1L; + } + + @Override + public ContentClaim getContentClaim() { + return contentClaim; + } + + @Override + public long getContentClaimOffset() { + return 1; + } + + @Override + public long getLineageStartIndex() { + return 0; + } + + @Override + public long getQueueDateIndex() { + return 0; + } + + public static void resetIdGenerator() { + idGenerator.set(0L); + } +} \ No newline at end of file diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/test/java/org/apache/nifi/controller/swap/TestSchemaSwapSerializerDeserializer.java b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/test/java/org/apache/nifi/controller/swap/TestSchemaSwapSerializerDeserializer.java new file mode 100644 index 0000000000..8565f380a4 --- /dev/null +++ b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/test/java/org/apache/nifi/controller/swap/TestSchemaSwapSerializerDeserializer.java @@ -0,0 +1,195 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.controller.swap; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; + +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.DataInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; + +import org.apache.nifi.controller.queue.FlowFileQueue; +import org.apache.nifi.controller.repository.FlowFileRecord; +import org.apache.nifi.controller.repository.SwapContents; +import org.apache.nifi.controller.repository.SwapSummary; +import org.apache.nifi.controller.repository.claim.ContentClaim; +import org.apache.nifi.controller.repository.claim.ResourceClaim; +import org.apache.nifi.controller.repository.claim.ResourceClaimManager; +import org.apache.nifi.controller.repository.claim.StandardResourceClaimManager; +import org.apache.nifi.stream.io.NullOutputStream; +import org.junit.Before; +import org.junit.Ignore; +import org.junit.Test; +import org.mockito.Mockito; + +public class TestSchemaSwapSerializerDeserializer { + + @Before + public void setup() { + MockFlowFile.resetIdGenerator(); + } + + @Test + public void testRoundTripSerializeDeserializeSummary() throws IOException { + final ResourceClaimManager resourceClaimManager = new StandardResourceClaimManager(); + final ResourceClaim firstResourceClaim = resourceClaimManager.newResourceClaim("container", "section", "id", true, false); + resourceClaimManager.incrementClaimantCount(firstResourceClaim); + + final List toSwap = new ArrayList<>(10000); + final Map attrs = new HashMap<>(); + long size = 0L; + final ContentClaim firstClaim = MockFlowFile.createContentClaim("id", resourceClaimManager); + for (int i = 0; i < 10000; i++) { + attrs.put("i", String.valueOf(i)); + final FlowFileRecord ff = i < 2 ? new MockFlowFile(attrs, i, firstClaim) : new MockFlowFile(attrs, i, resourceClaimManager); + toSwap.add(ff); + size += i; + } + + final FlowFileQueue flowFileQueue = Mockito.mock(FlowFileQueue.class); + Mockito.when(flowFileQueue.getIdentifier()).thenReturn("87bb99fe-412c-49f6-a441-d1b0af4e20b4"); + + final String swapLocation = "target/testRoundTrip.swap"; + final File swapFile = new File(swapLocation); + Files.deleteIfExists(swapFile.toPath()); + + final SwapSerializer serializer = new SchemaSwapSerializer(); + try (final FileOutputStream fos = new FileOutputStream(swapFile)) { + serializer.serializeFlowFiles(toSwap, flowFileQueue, swapLocation, fos); + } + + final SwapDeserializer deserializer = new SchemaSwapDeserializer(); + final SwapSummary swapSummary; + try (final FileInputStream fis = new FileInputStream(swapFile); + final DataInputStream dis = new DataInputStream(fis)) { + + swapSummary = deserializer.getSwapSummary(dis, swapLocation, resourceClaimManager); + } + + assertEquals(10000, swapSummary.getQueueSize().getObjectCount()); + assertEquals(size, swapSummary.getQueueSize().getByteCount()); + assertEquals(9999, swapSummary.getMaxFlowFileId().intValue()); + + final List resourceClaims = swapSummary.getResourceClaims(); + assertEquals(10000, resourceClaims.size()); + assertFalse(resourceClaims.stream().anyMatch(claim -> claim == null)); + assertEquals(2, resourceClaims.stream().filter(claim -> claim.getId().equals("id")).collect(Collectors.counting()).intValue()); + + final Set uniqueClaims = new HashSet<>(resourceClaims); + assertEquals(9999, uniqueClaims.size()); + } + + @Test + public void testRoundTripSerializeDeserializeFullSwapFile() throws IOException, InterruptedException { + final ResourceClaimManager resourceClaimManager = new StandardResourceClaimManager(); + + final List toSwap = new ArrayList<>(10000); + final Map attrs = new HashMap<>(); + long size = 0L; + for (int i = 0; i < 10000; i++) { + attrs.put("i", String.valueOf(i)); + final FlowFileRecord ff = new MockFlowFile(attrs, i, resourceClaimManager); + toSwap.add(ff); + size += i; + } + + final FlowFileQueue flowFileQueue = Mockito.mock(FlowFileQueue.class); + Mockito.when(flowFileQueue.getIdentifier()).thenReturn("87bb99fe-412c-49f6-a441-d1b0af4e20b4"); + + final String swapLocation = "target/testRoundTrip.swap"; + final File swapFile = new File(swapLocation); + Files.deleteIfExists(swapFile.toPath()); + + final SwapSerializer serializer = new SchemaSwapSerializer(); + try (final OutputStream fos = new FileOutputStream(swapFile); + final OutputStream out = new BufferedOutputStream(fos)) { + serializer.serializeFlowFiles(toSwap, flowFileQueue, swapLocation, out); + } + + final SwapContents contents; + final SwapDeserializer deserializer = new SchemaSwapDeserializer(); + try (final FileInputStream fis = new FileInputStream(swapFile); + final InputStream bufferedIn = new BufferedInputStream(fis); + final DataInputStream dis = new DataInputStream(bufferedIn)) { + + contents = deserializer.deserializeFlowFiles(dis, swapLocation, flowFileQueue, resourceClaimManager); + } + + final SwapSummary swapSummary = contents.getSummary(); + assertEquals(10000, swapSummary.getQueueSize().getObjectCount()); + assertEquals(size, swapSummary.getQueueSize().getByteCount()); + assertEquals(9999, swapSummary.getMaxFlowFileId().intValue()); + + assertEquals(10000, contents.getFlowFiles().size()); + + int counter = 0; + for (final FlowFileRecord flowFile : contents.getFlowFiles()) { + final int i = counter++; + assertEquals(String.valueOf(i), flowFile.getAttribute("i")); + assertEquals(i, flowFile.getSize()); + } + } + + @Test + @Ignore("For manual testing, in order to ensure that changes do not negatively impact performance") + public void testWritePerformance() throws IOException, InterruptedException { + final ResourceClaimManager resourceClaimManager = new StandardResourceClaimManager(); + + final List toSwap = new ArrayList<>(10000); + final Map attrs = new HashMap<>(); + for (int i = 0; i < 10000; i++) { + attrs.put("i", String.valueOf(i)); + final FlowFileRecord ff = new MockFlowFile(attrs, i, resourceClaimManager); + toSwap.add(ff); + } + + final FlowFileQueue flowFileQueue = Mockito.mock(FlowFileQueue.class); + Mockito.when(flowFileQueue.getIdentifier()).thenReturn("87bb99fe-412c-49f6-a441-d1b0af4e20b4"); + + final String swapLocation = "target/testRoundTrip.swap"; + + final int iterations = 1000; + + final long start = System.nanoTime(); + final SwapSerializer serializer = new SchemaSwapSerializer(); + for (int i = 0; i < iterations; i++) { + try (final OutputStream out = new NullOutputStream()) { + serializer.serializeFlowFiles(toSwap, flowFileQueue, swapLocation, out); + } + } + + final long millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start); + System.out.println("Wrote " + iterations + " Swap Files in " + millis + " millis"); + } +} diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/test/java/org/apache/nifi/controller/swap/TestSimpleSwapSerializerDeserializer.java b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/test/java/org/apache/nifi/controller/swap/TestSimpleSwapSerializerDeserializer.java new file mode 100644 index 0000000000..045833385f --- /dev/null +++ b/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/test/java/org/apache/nifi/controller/swap/TestSimpleSwapSerializerDeserializer.java @@ -0,0 +1,139 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.controller.swap; + +import static org.junit.Assert.assertEquals; + +import java.io.DataInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +import org.apache.nifi.controller.queue.FlowFileQueue; +import org.apache.nifi.controller.repository.FlowFileRecord; +import org.apache.nifi.controller.repository.SwapContents; +import org.apache.nifi.controller.repository.claim.ResourceClaimManager; +import org.apache.nifi.controller.repository.claim.StandardResourceClaimManager; +import org.apache.nifi.stream.io.NullOutputStream; +import org.junit.Before; +import org.junit.Ignore; +import org.junit.Test; +import org.mockito.Mockito; + +public class TestSimpleSwapSerializerDeserializer { + @Before + public void setup() { + MockFlowFile.resetIdGenerator(); + } + + @Test + public void testRoundTripSerializeDeserialize() throws IOException { + final ResourceClaimManager resourceClaimManager = new StandardResourceClaimManager(); + + final List toSwap = new ArrayList<>(10000); + final Map attrs = new HashMap<>(); + for (int i = 0; i < 10000; i++) { + attrs.put("i", String.valueOf(i)); + final FlowFileRecord ff = new MockFlowFile(attrs, i, resourceClaimManager); + toSwap.add(ff); + } + + final String queueId = "87bb99fe-412c-49f6-a441-d1b0af4e20b4"; + final FlowFileQueue flowFileQueue = Mockito.mock(FlowFileQueue.class); + Mockito.when(flowFileQueue.getIdentifier()).thenReturn(queueId); + + final String swapLocation = "target/testRoundTrip-" + queueId + ".swap"; + final File swapFile = new File(swapLocation); + + Files.deleteIfExists(swapFile.toPath()); + try { + final SimpleSwapSerializer serializer = new SimpleSwapSerializer(); + try (final FileOutputStream fos = new FileOutputStream(swapFile)) { + serializer.serializeFlowFiles(toSwap, flowFileQueue, swapLocation, fos); + } + + final SimpleSwapDeserializer deserializer = new SimpleSwapDeserializer(); + final SwapContents swappedIn; + try (final FileInputStream fis = new FileInputStream(swapFile); + final DataInputStream dis = new DataInputStream(fis)) { + swappedIn = deserializer.deserializeFlowFiles(dis, swapLocation, flowFileQueue, resourceClaimManager); + } + + assertEquals(toSwap.size(), swappedIn.getFlowFiles().size()); + for (int i = 0; i < toSwap.size(); i++) { + final FlowFileRecord pre = toSwap.get(i); + final FlowFileRecord post = swappedIn.getFlowFiles().get(i); + + assertEquals(pre.getSize(), post.getSize()); + assertEquals(pre.getAttributes(), post.getAttributes()); + assertEquals(pre.getSize(), post.getSize()); + assertEquals(pre.getId(), post.getId()); + assertEquals(pre.getContentClaim(), post.getContentClaim()); + assertEquals(pre.getContentClaimOffset(), post.getContentClaimOffset()); + assertEquals(pre.getEntryDate(), post.getEntryDate()); + assertEquals(pre.getLastQueueDate(), post.getLastQueueDate()); + assertEquals(pre.getLineageStartDate(), post.getLineageStartDate()); + assertEquals(pre.getPenaltyExpirationMillis(), post.getPenaltyExpirationMillis()); + } + } finally { + Files.deleteIfExists(swapFile.toPath()); + } + } + + @Test + @Ignore("For manual testing only. Not intended to be run as part of the automated unit tests but can " + + "be convenient for determining a baseline for performance if making modifications.") + public void testWritePerformance() throws IOException, InterruptedException { + final ResourceClaimManager resourceClaimManager = new StandardResourceClaimManager(); + + final List toSwap = new ArrayList<>(10000); + final Map attrs = new HashMap<>(); + for (int i = 0; i < 10000; i++) { + attrs.put("i", String.valueOf(i)); + final FlowFileRecord ff = new MockFlowFile(attrs, i, resourceClaimManager); + toSwap.add(ff); + } + + final FlowFileQueue flowFileQueue = Mockito.mock(FlowFileQueue.class); + Mockito.when(flowFileQueue.getIdentifier()).thenReturn("87bb99fe-412c-49f6-a441-d1b0af4e20b4"); + + final String swapLocation = "target/testRoundTrip.swap"; + + final int iterations = 1000; + + final long start = System.nanoTime(); + final SwapSerializer serializer = new SimpleSwapSerializer(); + for (int i = 0; i < iterations; i++) { + try (final OutputStream out = new NullOutputStream()) { + serializer.serializeFlowFiles(toSwap, flowFileQueue, swapLocation, out); + } + } + + final long millis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start); + System.out.println("Wrote " + iterations + " Swap Files in " + millis + " millis"); + } + +} diff --git a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/pom.xml b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/pom.xml index 47e7bcd65c..52e53a89cd 100644 --- a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/pom.xml +++ b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/pom.xml @@ -34,6 +34,10 @@ org.apache.nifi nifi-data-provenance-utils + + org.apache.nifi + nifi-schema-utils + org.apache.nifi nifi-utils diff --git a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/AbstractRecordWriter.java b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/AbstractRecordWriter.java new file mode 100644 index 0000000000..2c84861dad --- /dev/null +++ b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/AbstractRecordWriter.java @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.provenance; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; + +import org.apache.nifi.provenance.serialization.RecordWriter; +import org.apache.nifi.provenance.toc.TocWriter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public abstract class AbstractRecordWriter implements RecordWriter { + private static final Logger logger = LoggerFactory.getLogger(AbstractRecordWriter.class); + + private final File file; + private final TocWriter tocWriter; + private final Lock lock = new ReentrantLock(); + + private volatile boolean dirty = false; + private volatile boolean closed = false; + + private int recordsWritten = 0; + + public AbstractRecordWriter(final File file, final TocWriter writer) throws IOException { + logger.trace("Creating Record Writer for {}", file); + + this.file = file; + this.tocWriter = writer; + } + + @Override + public synchronized void close() throws IOException { + closed = true; + + logger.trace("Closing Record Writer for {}", file == null ? null : file.getName()); + + lock(); + try { + flush(); + + try { + // We want to close 'out' only if the writer is not 'dirty'. + // If the writer is dirty, then there was a failure to write + // to disk, which means that we likely have a partial record written + // to disk. + // + // If we call close() on out, it will in turn call flush() on the underlying + // output stream, which is a BufferedOutputStream. As a result, we will end + // up flushing the buffer after a partially written record, which results in + // essentially random bytes being written to the repository, which causes + // corruption and un-recoverability. Since we will close the underlying 'rawOutStream' + // below, we will still appropriately clean up the resources help by this writer, so + // we are still OK in terms of closing all resources held by the writer. + final OutputStream buffered = getBufferedOutputStream(); + if (buffered != null && !isDirty()) { + buffered.close(); + } + } finally { + final OutputStream underlying = getUnderlyingOutputStream(); + if (underlying != null) { + try { + getUnderlyingOutputStream().close(); + } finally { + if (tocWriter != null) { + tocWriter.close(); + } + } + } + } + } catch (final IOException ioe) { + markDirty(); + throw ioe; + } finally { + unlock(); + } + } + + @Override + public int getRecordsWritten() { + return recordsWritten; + } + + @Override + public File getFile() { + return file; + } + + @Override + public void lock() { + lock.lock(); + } + + @Override + public void unlock() { + lock.unlock(); + } + + @Override + public boolean tryLock() { + final boolean obtainedLock = lock.tryLock(); + if (obtainedLock && isDirty()) { + // once we have obtained the lock, we need to check if the writer + // has been marked dirty. If so, we cannot write to the underlying + // file, so we need to unlock and return false. Otherwise, it's okay + // to write to the underlying file, so return true. + lock.unlock(); + return false; + } + return obtainedLock; + } + + @Override + public void markDirty() { + this.dirty = true; + } + + public boolean isDirty() { + return dirty; + } + + protected void resetDirtyFlag() { + this.dirty = false; + } + + @Override + public void sync() throws IOException { + try { + if (tocWriter != null) { + tocWriter.sync(); + } + + syncUnderlyingOutputStream(); + } catch (final IOException ioe) { + markDirty(); + throw ioe; + } + } + + @Override + public TocWriter getTocWriter() { + return tocWriter; + } + + @Override + public boolean isClosed() { + return closed; + } + + protected abstract OutputStream getBufferedOutputStream(); + + protected abstract OutputStream getUnderlyingOutputStream(); + + protected abstract void syncUnderlyingOutputStream() throws IOException; +} diff --git a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/ByteArraySchemaRecordReader.java b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/ByteArraySchemaRecordReader.java new file mode 100644 index 0000000000..297f084c51 --- /dev/null +++ b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/ByteArraySchemaRecordReader.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.provenance; + +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStream; + +import org.apache.nifi.provenance.schema.EventRecord; +import org.apache.nifi.provenance.serialization.CompressableRecordReader; +import org.apache.nifi.provenance.toc.TocReader; +import org.apache.nifi.repository.schema.Record; +import org.apache.nifi.repository.schema.RecordSchema; +import org.apache.nifi.repository.schema.SchemaRecordReader; +import org.apache.nifi.stream.io.LimitingInputStream; +import org.apache.nifi.stream.io.StreamUtils; + + +public class ByteArraySchemaRecordReader extends CompressableRecordReader { + private RecordSchema schema; // effectively final + private SchemaRecordReader recordReader; // effectively final + + public ByteArraySchemaRecordReader(final InputStream in, final String filename, final int maxAttributeChars) throws IOException { + super(in, filename, maxAttributeChars); + } + + public ByteArraySchemaRecordReader(final InputStream in, final String filename, final TocReader tocReader, final int maxAttributeChars) throws IOException { + super(in, filename, tocReader, maxAttributeChars); + } + + private void verifySerializationVersion(final int serializationVersion) { + if (serializationVersion > ByteArraySchemaRecordWriter.SERIALIZATION_VERSION) { + throw new IllegalArgumentException("Unable to deserialize record because the version is " + serializationVersion + + " and supported versions are 1-" + ByteArraySchemaRecordWriter.SERIALIZATION_VERSION); + } + } + + @Override + protected void readHeader(final DataInputStream in, final int serializationVersion) throws IOException { + verifySerializationVersion(serializationVersion); + final int schemaLength = in.readInt(); + final byte[] buffer = new byte[schemaLength]; + StreamUtils.fillBuffer(in, buffer); + + try (final ByteArrayInputStream bais = new ByteArrayInputStream(buffer)) { + schema = RecordSchema.readFrom(bais); + } + + recordReader = SchemaRecordReader.fromSchema(schema); + } + + @Override + protected StandardProvenanceEventRecord nextRecord(final DataInputStream in, final int serializationVersion) throws IOException { + verifySerializationVersion(serializationVersion); + final long byteOffset = getBytesConsumed(); + final int recordLength = in.readInt(); + + final InputStream limitedIn = new LimitingInputStream(in, recordLength); + final Record eventRecord = recordReader.readRecord(limitedIn); + + return EventRecord.getEvent(eventRecord, getFilename(), byteOffset, getMaxAttributeLength()); + } + +} diff --git a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/ByteArraySchemaRecordWriter.java b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/ByteArraySchemaRecordWriter.java new file mode 100644 index 0000000000..cae2f40d86 --- /dev/null +++ b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/ByteArraySchemaRecordWriter.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.provenance; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; + +import org.apache.nifi.provenance.schema.EventRecord; +import org.apache.nifi.provenance.schema.EventRecordFields; +import org.apache.nifi.provenance.schema.ProvenanceEventSchema; +import org.apache.nifi.provenance.serialization.CompressableRecordWriter; +import org.apache.nifi.provenance.toc.TocWriter; +import org.apache.nifi.repository.schema.Record; +import org.apache.nifi.repository.schema.RecordSchema; +import org.apache.nifi.repository.schema.SchemaRecordWriter; +import org.apache.nifi.stream.io.DataOutputStream; + +public class ByteArraySchemaRecordWriter extends CompressableRecordWriter { + private static final RecordSchema eventSchema = ProvenanceEventSchema.PROVENANCE_EVENT_SCHEMA_V1; + private static final RecordSchema contentClaimSchema = new RecordSchema(eventSchema.getField(EventRecordFields.Names.CONTENT_CLAIM).getSubFields()); + public static final int SERIALIZATION_VERSION = 1; + public static final String SERIALIZATION_NAME = "ByteArraySchemaRecordWriter"; + + private final SchemaRecordWriter recordWriter = new SchemaRecordWriter(); + + public ByteArraySchemaRecordWriter(final File file, final TocWriter tocWriter, final boolean compressed, final int uncompressedBlockSize) throws IOException { + super(file, tocWriter, compressed, uncompressedBlockSize); + } + + public ByteArraySchemaRecordWriter(final OutputStream out, final TocWriter tocWriter, final boolean compressed, final int uncompressedBlockSize) throws IOException { + super(out, tocWriter, compressed, uncompressedBlockSize); + } + + @Override + protected String getSerializationName() { + return SERIALIZATION_NAME; + } + + @Override + protected int getSerializationVersion() { + return SERIALIZATION_VERSION; + } + + @Override + public void writeHeader(final long firstEventId, final DataOutputStream out) throws IOException { + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + eventSchema.writeTo(baos); + + out.writeInt(baos.size()); + baos.writeTo(out); + } + + protected Record createRecord(final ProvenanceEventRecord event, final long eventId) { + return new EventRecord(event, eventId, eventSchema, contentClaimSchema); + } + + @Override + protected void writeRecord(final ProvenanceEventRecord event, final long eventId, final DataOutputStream out) throws IOException { + try (final ByteArrayOutputStream baos = new ByteArrayOutputStream(256)) { + + final Record eventRecord = createRecord(event, eventId); + recordWriter.writeRecord(eventRecord, baos); + + out.writeInt(baos.size()); + baos.writeTo(out); + } + } +} diff --git a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/PersistentProvenanceRepository.java b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/PersistentProvenanceRepository.java index f70bf7dfb4..c20ce6ecc4 100644 --- a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/PersistentProvenanceRepository.java +++ b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/PersistentProvenanceRepository.java @@ -110,6 +110,11 @@ import org.apache.nifi.util.RingBuffer; import org.apache.nifi.util.RingBuffer.ForEachEvaluator; import org.apache.nifi.util.StopWatch; import org.apache.nifi.util.Tuple; +import org.apache.nifi.util.timebuffer.CountSizeEntityAccess; +import org.apache.nifi.util.timebuffer.LongEntityAccess; +import org.apache.nifi.util.timebuffer.TimedBuffer; +import org.apache.nifi.util.timebuffer.TimedCountSize; +import org.apache.nifi.util.timebuffer.TimestampedLong; import org.apache.nifi.web.ResourceNotFoundException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -120,7 +125,6 @@ public class PersistentProvenanceRepository implements ProvenanceRepository { private static final String FILE_EXTENSION = ".prov"; private static final String TEMP_FILE_SUFFIX = ".prov.part"; private static final long PURGE_EVENT_MILLISECONDS = 2500L; //Determines the frequency over which the task to delete old events will occur - public static final int SERIALIZATION_VERSION = 9; public static final Pattern NUMBER_PATTERN = Pattern.compile("\\d+"); public static final Pattern INDEX_PATTERN = Pattern.compile("index-\\d+"); public static final Pattern LOG_FILENAME_PATTERN = Pattern.compile("(\\d+).*\\.prov"); @@ -180,6 +184,9 @@ public class PersistentProvenanceRepository implements ProvenanceRepository { private Authorizer authorizer; // effectively final private ProvenanceAuthorizableFactory resourceFactory; // effectively final + private final TimedBuffer updateCounts = new TimedBuffer<>(TimeUnit.SECONDS, 300, new CountSizeEntityAccess()); + private final TimedBuffer backpressurePauseMillis = new TimedBuffer<>(TimeUnit.SECONDS, 300, new LongEntityAccess()); + /** * default no args constructor for service loading only. */ @@ -401,7 +408,7 @@ public class PersistentProvenanceRepository implements ProvenanceRepository { final File journalDirectory = new File(storageDirectory, "journals"); final File journalFile = new File(journalDirectory, String.valueOf(initialRecordId) + ".journal." + i); - writers[i] = RecordWriters.newRecordWriter(journalFile, false, false); + writers[i] = RecordWriters.newSchemaRecordWriter(journalFile, false, false); writers[i].writeHeader(initialRecordId); } @@ -762,18 +769,23 @@ public class PersistentProvenanceRepository implements ProvenanceRepository { try { try { + long recordsWritten = 0L; for (final ProvenanceEventRecord nextRecord : records) { final long eventId = idGenerator.getAndIncrement(); bytesWritten += writer.writeRecord(nextRecord, eventId); + recordsWritten++; logger.trace("Wrote record with ID {} to {}", eventId, writer); } + writer.flush(); + if (alwaysSync) { writer.sync(); } totalJournalSize = bytesWrittenSinceRollover.addAndGet(bytesWritten); recordsWrittenSinceRollover.getAndIncrement(); + this.updateCounts.add(new TimedCountSize(recordsWritten, bytesWritten)); } catch (final Throwable t) { // We need to set the repoDirty flag before we release the lock for this journal. // Otherwise, another thread may write to this journal -- this is a problem because @@ -1331,14 +1343,17 @@ public class PersistentProvenanceRepository implements ProvenanceRepository { updated = idToPathMap.compareAndSet(existingPathMap, newIdToPathMap); } - logger.info("Successfully Rolled over Provenance Event file containing {} records", recordsWritten); + final TimedCountSize countSize = updateCounts.getAggregateValue(System.currentTimeMillis() - TimeUnit.MILLISECONDS.convert(5, TimeUnit.MINUTES)); + logger.info("Successfully Rolled over Provenance Event file containing {} records. In the past 5 minutes, " + + "{} events have been written to the Provenance Repository, totaling {}", + recordsWritten, countSize.getCount(), FormatUtils.formatDataSize(countSize.getSize())); } //if files were rolled over or if out of retries stop the future if (fileRolledOver != null || retryAttempts.decrementAndGet() == 0) { if (fileRolledOver == null && retryAttempts.get() == 0) { - logger.error("Failed to merge Journal Files {} after {} attempts. ", journalsToMerge, MAX_JOURNAL_ROLLOVER_RETRIES); + logger.error("Failed to merge Journal Files {} after {} attempts.", journalsToMerge, MAX_JOURNAL_ROLLOVER_RETRIES); } rolloverCompletions.getAndIncrement(); @@ -1387,6 +1402,8 @@ public class PersistentProvenanceRepository implements ProvenanceRepository { // max capacity for the repo, or if we have 5 sets of journal files waiting to be merged, we will block here until // that is no longer the case. if (journalFileCount > journalCountThreshold || repoSize > sizeThreshold) { + final long stopTheWorldStart = System.nanoTime(); + logger.warn("The rate of the dataflow is exceeding the provenance recording rate. " + "Slowing down flow to accommodate. Currently, there are {} journal files ({} bytes) and " + "threshold for blocking is {} ({} bytes)", journalFileCount, repoSize, journalCountThreshold, sizeThreshold); @@ -1428,8 +1445,12 @@ public class PersistentProvenanceRepository implements ProvenanceRepository { repoSize = getSize(getLogFiles(), 0L); } + final long stopTheWorldNanos = System.nanoTime() - stopTheWorldStart; + backpressurePauseMillis.add(new TimestampedLong(stopTheWorldNanos)); + final TimestampedLong pauseNanosLastFiveMinutes = backpressurePauseMillis.getAggregateValue(System.currentTimeMillis() - TimeUnit.MILLISECONDS.convert(5, TimeUnit.MINUTES)); logger.info("Provenance Repository has now caught up with rolling over journal files. Current number of " - + "journal files to be rolled over is {}", journalFileCount); + + "journal files to be rolled over is {}. Provenance Repository Back Pressure paused Session commits for {} ({} total in the last 5 minutes).", + journalFileCount, FormatUtils.formatNanos(stopTheWorldNanos, true), FormatUtils.formatNanos(pauseNanosLastFiveMinutes.getValue(), true)); } // we've finished rolling over successfully. Create new writers and reset state. @@ -1635,7 +1656,7 @@ public class PersistentProvenanceRepository implements ProvenanceRepository { } if (eventReporter != null) { - eventReporter.reportEvent(Severity.ERROR, EVENT_CATEGORY, "re " + ioe.toString()); + eventReporter.reportEvent(Severity.ERROR, EVENT_CATEGORY, "Failed to merge Journal Files due to " + ioe.toString()); } } } @@ -1696,7 +1717,7 @@ public class PersistentProvenanceRepository implements ProvenanceRepository { // loop over each entry in the map, persisting the records to the merged file in order, and populating the map // with the next entry from the journal file from which the previous record was written. - try (final RecordWriter writer = RecordWriters.newRecordWriter(writerFile, configuration.isCompressOnRollover(), true)) { + try (final RecordWriter writer = RecordWriters.newSchemaRecordWriter(writerFile, configuration.isCompressOnRollover(), true)) { writer.writeHeader(minEventId); final IndexingAction indexingAction = createIndexingAction(); @@ -1903,13 +1924,22 @@ public class PersistentProvenanceRepository implements ProvenanceRepository { private StandardProvenanceEventRecord truncateAttributes(final StandardProvenanceEventRecord original) { boolean requireTruncation = false; - for (final Map.Entry entry : original.getAttributes().entrySet()) { - if (entry.getValue().length() > maxAttributeChars) { + for (final String updatedAttr : original.getUpdatedAttributes().values()) { + if (updatedAttr != null && updatedAttr.length() > maxAttributeChars) { requireTruncation = true; break; } } + if (!requireTruncation) { + for (final String previousAttr : original.getPreviousAttributes().values()) { + if (previousAttr != null && previousAttr.length() > maxAttributeChars) { + requireTruncation = true; + break; + } + } + } + if (!requireTruncation) { return original; } diff --git a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/StandardRecordReader.java b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/StandardRecordReader.java index 2db9ed32f0..f018685e4c 100644 --- a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/StandardRecordReader.java +++ b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/StandardRecordReader.java @@ -17,178 +17,36 @@ package org.apache.nifi.provenance; import java.io.DataInputStream; -import java.io.EOFException; import java.io.IOException; import java.io.InputStream; -import java.nio.charset.StandardCharsets; import java.util.Collections; import java.util.HashMap; import java.util.Map; import java.util.UUID; import java.util.regex.Pattern; -import java.util.zip.GZIPInputStream; -import org.apache.nifi.provenance.serialization.RecordReader; +import org.apache.nifi.provenance.serialization.CompressableRecordReader; import org.apache.nifi.provenance.toc.TocReader; -import org.apache.nifi.stream.io.BufferedInputStream; -import org.apache.nifi.stream.io.ByteCountingInputStream; -import org.apache.nifi.stream.io.LimitingInputStream; import org.apache.nifi.stream.io.StreamUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class StandardRecordReader implements RecordReader { +public class StandardRecordReader extends CompressableRecordReader { private static final Logger logger = LoggerFactory.getLogger(StandardRecordReader.class); private static final Pattern UUID_PATTERN = Pattern.compile("[a-fA-F0-9]{8}\\-([a-fA-F0-9]{4}\\-){3}[a-fA-F0-9]{12}"); - private final ByteCountingInputStream rawInputStream; - private final String filename; - private final int serializationVersion; - private final boolean compressed; - private final TocReader tocReader; - private final int headerLength; - private final int maxAttributeChars; - - private DataInputStream dis; - private ByteCountingInputStream byteCountingIn; - public StandardRecordReader(final InputStream in, final String filename, final int maxAttributeChars) throws IOException { this(in, filename, null, maxAttributeChars); } public StandardRecordReader(final InputStream in, final String filename, final TocReader tocReader, final int maxAttributeChars) throws IOException { + super(in, filename, tocReader, maxAttributeChars); logger.trace("Creating RecordReader for {}", filename); - - rawInputStream = new ByteCountingInputStream(in); - this.maxAttributeChars = maxAttributeChars; - - final InputStream limitedStream; - if ( tocReader == null ) { - limitedStream = rawInputStream; - } else { - final long offset1 = tocReader.getBlockOffset(1); - if ( offset1 < 0 ) { - limitedStream = rawInputStream; - } else { - limitedStream = new LimitingInputStream(rawInputStream, offset1 - rawInputStream.getBytesConsumed()); - } - } - - final InputStream readableStream; - if (filename.endsWith(".gz")) { - readableStream = new BufferedInputStream(new GZIPInputStream(limitedStream)); - compressed = true; - } else { - readableStream = new BufferedInputStream(limitedStream); - compressed = false; - } - - byteCountingIn = new ByteCountingInputStream(readableStream); - dis = new DataInputStream(byteCountingIn); - - final String repoClassName = dis.readUTF(); - final int serializationVersion = dis.readInt(); - headerLength = repoClassName.getBytes(StandardCharsets.UTF_8).length + 2 + 4; // 2 bytes for string length, 4 for integer. - - if (serializationVersion < 1 || serializationVersion > 9) { - throw new IllegalArgumentException("Unable to deserialize record because the version is " + serializationVersion + " and supported versions are 1-9"); - } - - this.serializationVersion = serializationVersion; - this.filename = filename; - this.tocReader = tocReader; - } - - @Override - public void skipToBlock(final int blockIndex) throws IOException { - if ( tocReader == null ) { - throw new IllegalStateException("Cannot skip to block " + blockIndex + " for Provenance Log " + filename + " because no Table-of-Contents file was found for this Log"); - } - - if ( blockIndex < 0 ) { - throw new IllegalArgumentException("Cannot skip to block " + blockIndex + " because the value is negative"); - } - - if ( blockIndex == getBlockIndex() ) { - return; - } - - final long offset = tocReader.getBlockOffset(blockIndex); - if ( offset < 0 ) { - throw new IOException("Unable to find block " + blockIndex + " in Provenance Log " + filename); - } - - final long curOffset = rawInputStream.getBytesConsumed(); - - final long bytesToSkip = offset - curOffset; - if ( bytesToSkip >= 0 ) { - try { - StreamUtils.skip(rawInputStream, bytesToSkip); - logger.debug("Skipped stream from offset {} to {} ({} bytes skipped)", curOffset, offset, bytesToSkip); - } catch (final IOException e) { - throw new IOException("Failed to skip to offset " + offset + " for block " + blockIndex + " of Provenance Log " + filename, e); - } - - resetStreamForNextBlock(); - } - } - - private void resetStreamForNextBlock() throws IOException { - final InputStream limitedStream; - if ( tocReader == null ) { - limitedStream = rawInputStream; - } else { - final long offset = tocReader.getBlockOffset(1 + getBlockIndex()); - if ( offset < 0 ) { - limitedStream = rawInputStream; - } else { - limitedStream = new LimitingInputStream(rawInputStream, offset - rawInputStream.getBytesConsumed()); - } - } - - final InputStream readableStream; - if (compressed) { - readableStream = new BufferedInputStream(new GZIPInputStream(limitedStream)); - } else { - readableStream = new BufferedInputStream(limitedStream); - } - - byteCountingIn = new ByteCountingInputStream(readableStream, rawInputStream.getBytesConsumed()); - dis = new DataInputStream(byteCountingIn); } - @Override - public TocReader getTocReader() { - return tocReader; - } - - @Override - public boolean isBlockIndexAvailable() { - return tocReader != null; - } - - @Override - public int getBlockIndex() { - if ( tocReader == null ) { - throw new IllegalStateException("Cannot determine Block Index because no Table-of-Contents could be found for Provenance Log " + filename); - } - - return tocReader.getBlockIndex(rawInputStream.getBytesConsumed()); - } - - @Override - public long getBytesConsumed() { - return byteCountingIn.getBytesConsumed(); - } - - private StandardProvenanceEventRecord readPreVersion6Record() throws IOException { - final long startOffset = byteCountingIn.getBytesConsumed(); - - if (!isData()) { - return null; - } - + private StandardProvenanceEventRecord readPreVersion6Record(final DataInputStream dis, final int serializationVersion) throws IOException { + final long startOffset = getBytesConsumed(); final StandardProvenanceEventRecord.Builder builder = new StandardProvenanceEventRecord.Builder(); final long eventId = dis.readLong(); @@ -254,7 +112,7 @@ public class StandardRecordReader implements RecordReader { builder.setAttributes(Collections.emptyMap(), attrs); builder.setCurrentContentClaim(null, null, null, null, fileSize); - builder.setStorageLocation(filename, startOffset); + builder.setStorageLocation(getFilename(), startOffset); final StandardProvenanceEventRecord record = builder.build(); record.setEventId(eventId); @@ -262,17 +120,18 @@ public class StandardRecordReader implements RecordReader { } @Override - public StandardProvenanceEventRecord nextRecord() throws IOException { + public StandardProvenanceEventRecord nextRecord(final DataInputStream dis, final int serializationVersion) throws IOException { + if (serializationVersion > StandardRecordWriter.SERIALIZATION_VERISON) { + throw new IllegalArgumentException("Unable to deserialize record because the version is " + + serializationVersion + " and supported versions are 1-" + StandardRecordWriter.SERIALIZATION_VERISON); + } + // Schema changed drastically in version 6 so we created a new method to handle old records if (serializationVersion < 6) { - return readPreVersion6Record(); + return readPreVersion6Record(dis, serializationVersion); } - final long startOffset = byteCountingIn.getBytesConsumed(); - - if (!isData()) { - return null; - } + final long startOffset = getBytesConsumed(); final StandardProvenanceEventRecord.Builder builder = new StandardProvenanceEventRecord.Builder(); @@ -288,7 +147,7 @@ public class StandardRecordReader implements RecordReader { if (serializationVersion < 9){ final int numLineageIdentifiers = dis.readInt(); for (int i = 0; i < numLineageIdentifiers; i++) { - readUUID(dis); //skip identifiers + readUUID(dis, serializationVersion); //skip identifiers } } @@ -303,7 +162,7 @@ public class StandardRecordReader implements RecordReader { builder.setComponentId(readNullableString(dis)); builder.setComponentType(readNullableString(dis)); - final String uuid = readUUID(dis); + final String uuid = readUUID(dis, serializationVersion); builder.setFlowFileUUID(uuid); builder.setDetails(readNullableString(dis)); @@ -335,12 +194,12 @@ public class StandardRecordReader implements RecordReader { if (eventType == ProvenanceEventType.FORK || eventType == ProvenanceEventType.JOIN || eventType == ProvenanceEventType.CLONE || eventType == ProvenanceEventType.REPLAY) { final int numParents = dis.readInt(); for (int i = 0; i < numParents; i++) { - builder.addParentUuid(readUUID(dis)); + builder.addParentUuid(readUUID(dis, serializationVersion)); } final int numChildren = dis.readInt(); for (int i = 0; i < numChildren; i++) { - builder.addChildUuid(readUUID(dis)); + builder.addChildUuid(readUUID(dis, serializationVersion)); } } else if (eventType == ProvenanceEventType.RECEIVE) { builder.setTransitUri(readNullableString(dis)); @@ -357,7 +216,7 @@ public class StandardRecordReader implements RecordReader { builder.setFlowFileEntryDate(flowFileEntryDate); builder.setLineageStartDate(lineageStartDate); - builder.setStorageLocation(filename, startOffset); + builder.setStorageLocation(getFilename(), startOffset); final StandardProvenanceEventRecord record = builder.build(); record.setEventId(eventId); @@ -373,8 +232,8 @@ public class StandardRecordReader implements RecordReader { final String truncatedValue; if (value == null) { truncatedValue = null; - } else if (value.length() > maxAttributeChars) { - truncatedValue = value.substring(0, maxAttributeChars); + } else if (value.length() > getMaxAttributeLength()) { + truncatedValue = value.substring(0, getMaxAttributeLength()); } else { truncatedValue = value; } @@ -385,8 +244,8 @@ public class StandardRecordReader implements RecordReader { return attrs; } - private String readUUID(final DataInputStream in) throws IOException { - if ( serializationVersion < 8 ) { + private String readUUID(final DataInputStream in, final int serializationVersion) throws IOException { + if (serializationVersion < 8) { final long msb = in.readLong(); final long lsb = in.readLong(); return new UUID(msb, lsb).toString(); @@ -427,80 +286,4 @@ public class StandardRecordReader implements RecordReader { StreamUtils.fillBuffer(in, strBytes); return new String(strBytes, "UTF-8"); } - - private boolean isData() throws IOException { - byteCountingIn.mark(1); - int nextByte = byteCountingIn.read(); - byteCountingIn.reset(); - - if ( nextByte < 0 ) { - try { - resetStreamForNextBlock(); - } catch (final EOFException eof) { - return false; - } - - byteCountingIn.mark(1); - nextByte = byteCountingIn.read(); - byteCountingIn.reset(); - } - - return nextByte >= 0; - } - - @Override - public long getMaxEventId() throws IOException { - if ( tocReader != null ) { - final long lastBlockOffset = tocReader.getLastBlockOffset(); - skipToBlock(tocReader.getBlockIndex(lastBlockOffset)); - } - - ProvenanceEventRecord record; - ProvenanceEventRecord lastRecord = null; - try { - while ((record = nextRecord()) != null) { - lastRecord = record; - } - } catch (final EOFException eof) { - // This can happen if we stop NIFi while the record is being written. - // This is OK, we just ignore this record. The session will not have been - // committed, so we can just process the FlowFile again. - } - - return lastRecord == null ? -1L : lastRecord.getEventId(); - } - - @Override - public void close() throws IOException { - logger.trace("Closing Record Reader for {}", filename); - - dis.close(); - rawInputStream.close(); - - if ( tocReader != null ) { - tocReader.close(); - } - } - - @Override - public void skip(final long bytesToSkip) throws IOException { - StreamUtils.skip(dis, bytesToSkip); - } - - @Override - public void skipTo(final long position) throws IOException { - // we are subtracting headerLength from the number of bytes consumed because we used to - // consider the offset of the first record "0" - now we consider it whatever position it - // it really is in the stream. - final long currentPosition = byteCountingIn.getBytesConsumed() - headerLength; - if (currentPosition == position) { - return; - } - if (currentPosition > position) { - throw new IOException("Cannot skip to byte offset " + position + " in stream because already at byte offset " + currentPosition); - } - - final long toSkip = position - currentPosition; - StreamUtils.skip(dis, toSkip); - } } diff --git a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/StandardRecordWriter.java b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/StandardRecordWriter.java index 35832c475f..a95bd4f960 100644 --- a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/StandardRecordWriter.java +++ b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/StandardRecordWriter.java @@ -17,239 +17,142 @@ package org.apache.nifi.provenance; import java.io.File; -import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.util.Collection; import java.util.Map; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.locks.Lock; -import java.util.concurrent.locks.ReentrantLock; +import org.apache.nifi.provenance.serialization.CompressableRecordWriter; import org.apache.nifi.provenance.serialization.RecordWriter; import org.apache.nifi.provenance.toc.TocWriter; -import org.apache.nifi.stream.io.BufferedOutputStream; -import org.apache.nifi.stream.io.ByteCountingOutputStream; import org.apache.nifi.stream.io.DataOutputStream; -import org.apache.nifi.stream.io.GZIPOutputStream; -import org.apache.nifi.stream.io.NonCloseableOutputStream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -public class StandardRecordWriter implements RecordWriter { +/** + * @deprecated Deprecated in favor of SchemaRecordWriter + */ +@Deprecated +public class StandardRecordWriter extends CompressableRecordWriter implements RecordWriter { private static final Logger logger = LoggerFactory.getLogger(StandardRecordWriter.class); + public static final int SERIALIZATION_VERISON = 9; + public static final String SERIALIZATION_NAME = "org.apache.nifi.provenance.PersistentProvenanceRepository"; private final File file; - private final FileOutputStream fos; - private final ByteCountingOutputStream rawOutStream; - private final TocWriter tocWriter; - private final boolean compressed; - private final int uncompressedBlockSize; - private final AtomicBoolean dirtyFlag = new AtomicBoolean(false); - - private DataOutputStream out; - private ByteCountingOutputStream byteCountingOut; - private long lastBlockOffset = 0L; - private int recordCount = 0; - private volatile boolean closed = false; - - private final Lock lock = new ReentrantLock(); public StandardRecordWriter(final File file, final TocWriter writer, final boolean compressed, final int uncompressedBlockSize) throws IOException { + super(file, writer, compressed, uncompressedBlockSize); logger.trace("Creating Record Writer for {}", file.getName()); this.file = file; - this.compressed = compressed; - this.fos = new FileOutputStream(file); - rawOutStream = new ByteCountingOutputStream(fos); - this.uncompressedBlockSize = uncompressedBlockSize; + } - this.tocWriter = writer; + public StandardRecordWriter(final OutputStream out, final TocWriter tocWriter, final boolean compressed, final int uncompressedBlockSize) throws IOException { + super(out, tocWriter, compressed, uncompressedBlockSize); + this.file = null; } @Override - public synchronized File getFile() { - return file; + protected String getSerializationName() { + return SERIALIZATION_NAME; } @Override - public synchronized void writeHeader(final long firstEventId) throws IOException { - if (isDirty()) { - throw new IOException("Cannot update Provenance Repository because this Record Writer has already failed to write to the Repository"); - } - - try { - lastBlockOffset = rawOutStream.getBytesWritten(); - resetWriteStream(firstEventId); - - out.writeUTF(PersistentProvenanceRepository.class.getName()); - out.writeInt(PersistentProvenanceRepository.SERIALIZATION_VERSION); - out.flush(); - } catch (final IOException ioe) { - markDirty(); - throw ioe; - } - } - - - /** - * Resets the streams to prepare for a new block - * @param eventId the first id that will be written to the new block - * @throws IOException if unable to flush/close the current streams properly - */ - private void resetWriteStream(final long eventId) throws IOException { - try { - if (out != null) { - out.flush(); - } - - final long byteOffset = (byteCountingOut == null) ? rawOutStream.getBytesWritten() : byteCountingOut.getBytesWritten(); - - final OutputStream writableStream; - if ( compressed ) { - // because of the way that GZIPOutputStream works, we need to call close() on it in order for it - // to write its trailing bytes. But we don't want to close the underlying OutputStream, so we wrap - // the underlying OutputStream in a NonCloseableOutputStream - // We don't have to check if the writer is dirty because we will have already checked before calling this method. - if ( out != null ) { - out.close(); - } - - if ( tocWriter != null ) { - tocWriter.addBlockOffset(rawOutStream.getBytesWritten(), eventId); - } - - writableStream = new BufferedOutputStream(new GZIPOutputStream(new NonCloseableOutputStream(rawOutStream), 1), 65536); - } else { - if ( tocWriter != null ) { - tocWriter.addBlockOffset(rawOutStream.getBytesWritten(), eventId); - } - - writableStream = new BufferedOutputStream(rawOutStream, 65536); - } - - this.byteCountingOut = new ByteCountingOutputStream(writableStream, byteOffset); - this.out = new DataOutputStream(byteCountingOut); - dirtyFlag.set(false); - } catch (final IOException ioe) { - markDirty(); - throw ioe; - } + protected int getSerializationVersion() { + return SERIALIZATION_VERISON; } @Override - public synchronized long writeRecord(final ProvenanceEventRecord record, final long recordIdentifier) throws IOException { - if (isDirty()) { - throw new IOException("Cannot update Provenance Repository because this Record Writer has already failed to write to the Repository"); + protected void writeHeader(long firstEventId, DataOutputStream out) throws IOException { + } + + @Override + protected void writeRecord(final ProvenanceEventRecord record, final long recordIdentifier, final DataOutputStream out) throws IOException { + final ProvenanceEventType recordType = record.getEventType(); + + out.writeLong(recordIdentifier); + out.writeUTF(record.getEventType().name()); + out.writeLong(record.getEventTime()); + out.writeLong(record.getFlowFileEntryDate()); + out.writeLong(record.getEventDuration()); + out.writeLong(record.getLineageStartDate()); + + writeNullableString(out, record.getComponentId()); + writeNullableString(out, record.getComponentType()); + writeUUID(out, record.getFlowFileUuid()); + writeNullableString(out, record.getDetails()); + + // Write FlowFile attributes + final Map attrs = record.getPreviousAttributes(); + out.writeInt(attrs.size()); + for (final Map.Entry entry : attrs.entrySet()) { + writeLongString(out, entry.getKey()); + writeLongString(out, entry.getValue()); } - try { - final ProvenanceEventType recordType = record.getEventType(); - final long startBytes = byteCountingOut.getBytesWritten(); + final Map attrUpdates = record.getUpdatedAttributes(); + out.writeInt(attrUpdates.size()); + for (final Map.Entry entry : attrUpdates.entrySet()) { + writeLongString(out, entry.getKey()); + writeLongNullableString(out, entry.getValue()); + } - // add a new block to the TOC if needed. - if ( tocWriter != null && (startBytes - lastBlockOffset >= uncompressedBlockSize) ) { - lastBlockOffset = startBytes; - - if ( compressed ) { - // because of the way that GZIPOutputStream works, we need to call close() on it in order for it - // to write its trailing bytes. But we don't want to close the underlying OutputStream, so we wrap - // the underlying OutputStream in a NonCloseableOutputStream - resetWriteStream(recordIdentifier); - } - } - - out.writeLong(recordIdentifier); - out.writeUTF(record.getEventType().name()); - out.writeLong(record.getEventTime()); - out.writeLong(record.getFlowFileEntryDate()); - out.writeLong(record.getEventDuration()); - out.writeLong(record.getLineageStartDate()); - - writeNullableString(out, record.getComponentId()); - writeNullableString(out, record.getComponentType()); - writeUUID(out, record.getFlowFileUuid()); - writeNullableString(out, record.getDetails()); - - // Write FlowFile attributes - final Map attrs = record.getPreviousAttributes(); - out.writeInt(attrs.size()); - for (final Map.Entry entry : attrs.entrySet()) { - writeLongString(out, entry.getKey()); - writeLongString(out, entry.getValue()); - } - - final Map attrUpdates = record.getUpdatedAttributes(); - out.writeInt(attrUpdates.size()); - for (final Map.Entry entry : attrUpdates.entrySet()) { - writeLongString(out, entry.getKey()); - writeLongNullableString(out, entry.getValue()); - } - - // If Content Claim Info is present, write out a 'TRUE' followed by claim info. Else, write out 'false'. - if (record.getContentClaimSection() != null && record.getContentClaimContainer() != null && record.getContentClaimIdentifier() != null) { - out.writeBoolean(true); - out.writeUTF(record.getContentClaimContainer()); - out.writeUTF(record.getContentClaimSection()); - out.writeUTF(record.getContentClaimIdentifier()); - if (record.getContentClaimOffset() == null) { - out.writeLong(0L); - } else { - out.writeLong(record.getContentClaimOffset()); - } - out.writeLong(record.getFileSize()); + // If Content Claim Info is present, write out a 'TRUE' followed by claim info. Else, write out 'false'. + if (record.getContentClaimSection() != null && record.getContentClaimContainer() != null && record.getContentClaimIdentifier() != null) { + out.writeBoolean(true); + out.writeUTF(record.getContentClaimContainer()); + out.writeUTF(record.getContentClaimSection()); + out.writeUTF(record.getContentClaimIdentifier()); + if (record.getContentClaimOffset() == null) { + out.writeLong(0L); } else { - out.writeBoolean(false); + out.writeLong(record.getContentClaimOffset()); } + out.writeLong(record.getFileSize()); + } else { + out.writeBoolean(false); + } - // If Previous Content Claim Info is present, write out a 'TRUE' followed by claim info. Else, write out 'false'. - if (record.getPreviousContentClaimSection() != null && record.getPreviousContentClaimContainer() != null && record.getPreviousContentClaimIdentifier() != null) { - out.writeBoolean(true); - out.writeUTF(record.getPreviousContentClaimContainer()); - out.writeUTF(record.getPreviousContentClaimSection()); - out.writeUTF(record.getPreviousContentClaimIdentifier()); - if (record.getPreviousContentClaimOffset() == null) { - out.writeLong(0L); - } else { - out.writeLong(record.getPreviousContentClaimOffset()); - } - - if (record.getPreviousFileSize() == null) { - out.writeLong(0L); - } else { - out.writeLong(record.getPreviousFileSize()); - } + // If Previous Content Claim Info is present, write out a 'TRUE' followed by claim info. Else, write out 'false'. + if (record.getPreviousContentClaimSection() != null && record.getPreviousContentClaimContainer() != null && record.getPreviousContentClaimIdentifier() != null) { + out.writeBoolean(true); + out.writeUTF(record.getPreviousContentClaimContainer()); + out.writeUTF(record.getPreviousContentClaimSection()); + out.writeUTF(record.getPreviousContentClaimIdentifier()); + if (record.getPreviousContentClaimOffset() == null) { + out.writeLong(0L); } else { - out.writeBoolean(false); + out.writeLong(record.getPreviousContentClaimOffset()); } - // write out the identifier of the destination queue. - writeNullableString(out, record.getSourceQueueIdentifier()); - - // Write type-specific info - if (recordType == ProvenanceEventType.FORK || recordType == ProvenanceEventType.JOIN || recordType == ProvenanceEventType.CLONE || recordType == ProvenanceEventType.REPLAY) { - writeUUIDs(out, record.getParentUuids()); - writeUUIDs(out, record.getChildUuids()); - } else if (recordType == ProvenanceEventType.RECEIVE) { - writeNullableString(out, record.getTransitUri()); - writeNullableString(out, record.getSourceSystemFlowFileIdentifier()); - } else if (recordType == ProvenanceEventType.FETCH) { - writeNullableString(out, record.getTransitUri()); - } else if (recordType == ProvenanceEventType.SEND) { - writeNullableString(out, record.getTransitUri()); - } else if (recordType == ProvenanceEventType.ADDINFO) { - writeNullableString(out, record.getAlternateIdentifierUri()); - } else if (recordType == ProvenanceEventType.ROUTE) { - writeNullableString(out, record.getRelationship()); + if (record.getPreviousFileSize() == null) { + out.writeLong(0L); + } else { + out.writeLong(record.getPreviousFileSize()); } + } else { + out.writeBoolean(false); + } - out.flush(); - recordCount++; - return byteCountingOut.getBytesWritten() - startBytes; - } catch (final IOException ioe) { - markDirty(); - throw ioe; + // write out the identifier of the destination queue. + writeNullableString(out, record.getSourceQueueIdentifier()); + + // Write type-specific info + if (recordType == ProvenanceEventType.FORK || recordType == ProvenanceEventType.JOIN || recordType == ProvenanceEventType.CLONE || recordType == ProvenanceEventType.REPLAY) { + writeUUIDs(out, record.getParentUuids()); + writeUUIDs(out, record.getChildUuids()); + } else if (recordType == ProvenanceEventType.RECEIVE) { + writeNullableString(out, record.getTransitUri()); + writeNullableString(out, record.getSourceSystemFlowFileIdentifier()); + } else if (recordType == ProvenanceEventType.FETCH) { + writeNullableString(out, record.getTransitUri()); + } else if (recordType == ProvenanceEventType.SEND) { + writeNullableString(out, record.getTransitUri()); + } else if (recordType == ProvenanceEventType.ADDINFO) { + writeNullableString(out, record.getAlternateIdentifierUri()); + } else if (recordType == ProvenanceEventType.ROUTE) { + writeNullableString(out, record.getRelationship()); } } @@ -292,110 +195,9 @@ public class StandardRecordWriter implements RecordWriter { out.write(bytes); } - @Override - public synchronized void close() throws IOException { - closed = true; - - logger.trace("Closing Record Writer for {}", file.getName()); - - lock(); - try { - try { - // We want to close 'out' only if the writer is not 'dirty'. - // If the writer is dirty, then there was a failure to write - // to disk, which means that we likely have a partial record written - // to disk. - // - // If we call close() on out, it will in turn call flush() on the underlying - // output stream, which is a BufferedOutputStream. As a result, we will end - // up flushing the buffer after a partially written record, which results in - // essentially random bytes being written to the repository, which causes - // corruption and un-recoverability. Since we will close the underlying 'rawOutStream' - // below, we will still appropriately clean up the resources help by this writer, so - // we are still OK in terms of closing all resources held by the writer. - if (out != null && !isDirty()) { - out.close(); - } - } finally { - try { - rawOutStream.close(); - } finally { - if (tocWriter != null) { - tocWriter.close(); - } - } - } - } catch (final IOException ioe) { - markDirty(); - throw ioe; - } finally { - unlock(); - } - } - - @Override - public boolean isClosed() { - return closed; - } - - @Override - public synchronized int getRecordsWritten() { - return recordCount; - } - - @Override - public void lock() { - lock.lock(); - } - - @Override - public void unlock() { - lock.unlock(); - } - - @Override - public boolean tryLock() { - final boolean obtainedLock = lock.tryLock(); - if (obtainedLock && dirtyFlag.get()) { - // once we have obtained the lock, we need to check if the writer - // has been marked dirty. If so, we cannot write to the underlying - // file, so we need to unlock and return false. Otherwise, it's okay - // to write to the underlying file, so return true. - lock.unlock(); - return false; - } - return obtainedLock; - } @Override public String toString() { return "StandardRecordWriter[file=" + file + "]"; } - - @Override - public void sync() throws IOException { - try { - if ( tocWriter != null ) { - tocWriter.sync(); - } - fos.getFD().sync(); - } catch (final IOException ioe) { - markDirty(); - throw ioe; - } - } - - @Override - public TocWriter getTocWriter() { - return tocWriter; - } - - @Override - public void markDirty() { - dirtyFlag.set(true); - } - - public boolean isDirty() { - return dirtyFlag.get(); - } } diff --git a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/lucene/IndexingAction.java b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/lucene/IndexingAction.java index f47466134e..f725208b10 100644 --- a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/lucene/IndexingAction.java +++ b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/lucene/IndexingAction.java @@ -19,7 +19,6 @@ package org.apache.nifi.provenance.lucene; import java.io.IOException; import java.util.Collections; import java.util.HashSet; -import java.util.Map; import java.util.Set; import org.apache.lucene.document.Document; @@ -45,7 +44,7 @@ public class IndexingAction { } private void addField(final Document doc, final SearchableField field, final String value, final Store store) { - if (value == null || (!nonAttributeSearchableFields.contains(field) && !field.isAttribute())) { + if (value == null || (!field.isAttribute() && !nonAttributeSearchableFields.contains(field))) { return; } @@ -54,11 +53,9 @@ public class IndexingAction { public void index(final StandardProvenanceEventRecord record, final IndexWriter indexWriter, final Integer blockIndex) throws IOException { - final Map attributes = record.getAttributes(); - final Document doc = new Document(); addField(doc, SearchableFields.FlowFileUUID, record.getFlowFileUuid(), Store.NO); - addField(doc, SearchableFields.Filename, attributes.get(CoreAttributes.FILENAME.key()), Store.NO); + addField(doc, SearchableFields.Filename, record.getAttribute(CoreAttributes.FILENAME.key()), Store.NO); addField(doc, SearchableFields.ComponentID, record.getComponentId(), Store.NO); addField(doc, SearchableFields.AlternateIdentifierURI, record.getAlternateIdentifierUri(), Store.NO); addField(doc, SearchableFields.EventType, record.getEventType().name(), Store.NO); @@ -68,13 +65,10 @@ public class IndexingAction { addField(doc, SearchableFields.ContentClaimContainer, record.getContentClaimContainer(), Store.NO); addField(doc, SearchableFields.ContentClaimIdentifier, record.getContentClaimIdentifier(), Store.NO); addField(doc, SearchableFields.SourceQueueIdentifier, record.getSourceQueueIdentifier(), Store.NO); - - if (nonAttributeSearchableFields.contains(SearchableFields.TransitURI)) { - addField(doc, SearchableFields.TransitURI, record.getTransitUri(), Store.NO); - } + addField(doc, SearchableFields.TransitURI, record.getTransitUri(), Store.NO); for (final SearchableField searchableField : attributeSearchableFields) { - addField(doc, searchableField, LuceneUtil.truncateIndexField(attributes.get(searchableField.getSearchableFieldName())), Store.NO); + addField(doc, searchableField, LuceneUtil.truncateIndexField(record.getAttribute(searchableField.getSearchableFieldName())), Store.NO); } final String storageFilename = LuceneUtil.substringBefore(record.getStorageFilename(), "."); @@ -94,19 +88,20 @@ public class IndexingAction { } // If it's event is a FORK, or JOIN, add the FlowFileUUID for all child/parent UUIDs. - if (record.getEventType() == ProvenanceEventType.FORK || record.getEventType() == ProvenanceEventType.CLONE || record.getEventType() == ProvenanceEventType.REPLAY) { + final ProvenanceEventType eventType = record.getEventType(); + if (eventType == ProvenanceEventType.FORK || eventType == ProvenanceEventType.CLONE || eventType == ProvenanceEventType.REPLAY) { for (final String uuid : record.getChildUuids()) { if (!uuid.equals(record.getFlowFileUuid())) { addField(doc, SearchableFields.FlowFileUUID, uuid, Store.NO); } } - } else if (record.getEventType() == ProvenanceEventType.JOIN) { + } else if (eventType == ProvenanceEventType.JOIN) { for (final String uuid : record.getParentUuids()) { if (!uuid.equals(record.getFlowFileUuid())) { addField(doc, SearchableFields.FlowFileUUID, uuid, Store.NO); } } - } else if (record.getEventType() == ProvenanceEventType.RECEIVE && record.getSourceSystemFlowFileIdentifier() != null) { + } else if (eventType == ProvenanceEventType.RECEIVE && record.getSourceSystemFlowFileIdentifier() != null) { // If we get a receive with a Source System FlowFile Identifier, we add another Document that shows the UUID // that the Source System uses to refer to the data. final String sourceIdentifier = record.getSourceSystemFlowFileIdentifier(); diff --git a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/schema/EventRecord.java b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/schema/EventRecord.java new file mode 100644 index 0000000000..c9e7dc8b62 --- /dev/null +++ b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/schema/EventRecord.java @@ -0,0 +1,198 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.provenance.schema; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.apache.nifi.provenance.ProvenanceEventRecord; +import org.apache.nifi.provenance.ProvenanceEventType; +import org.apache.nifi.provenance.StandardProvenanceEventRecord; +import org.apache.nifi.repository.schema.FieldMapRecord; +import org.apache.nifi.repository.schema.Record; +import org.apache.nifi.repository.schema.RecordField; +import org.apache.nifi.repository.schema.RecordSchema; + +public class EventRecord implements Record { + private final RecordSchema schema; + private final ProvenanceEventRecord event; + private final long eventId; + private final Record contentClaimRecord; + private final Record previousClaimRecord; + + public EventRecord(final ProvenanceEventRecord event, final long eventId, final RecordSchema schema, final RecordSchema contentClaimSchema) { + this.schema = schema; + this.event = event; + this.eventId = eventId; + this.contentClaimRecord = createContentClaimRecord(contentClaimSchema, event.getContentClaimContainer(), event.getContentClaimSection(), + event.getContentClaimIdentifier(), event.getContentClaimOffset(), event.getFileSize()); + this.previousClaimRecord = createContentClaimRecord(contentClaimSchema, event.getPreviousContentClaimContainer(), event.getPreviousContentClaimSection(), + event.getPreviousContentClaimIdentifier(), event.getPreviousContentClaimOffset(), event.getPreviousFileSize()); + } + + @Override + public RecordSchema getSchema() { + return schema; + } + + private static Record createContentClaimRecord(final RecordSchema contentClaimSchema, final String container, final String section, + final String identifier, final Long offset, final Long size) { + if (container == null || section == null || identifier == null) { + return null; + } + + final Map fieldValues = new HashMap<>(); + fieldValues.put(EventRecordFields.CONTENT_CLAIM_CONTAINER, container); + fieldValues.put(EventRecordFields.CONTENT_CLAIM_SECTION, section); + fieldValues.put(EventRecordFields.CONTENT_CLAIM_IDENTIFIER, identifier); + fieldValues.put(EventRecordFields.CONTENT_CLAIM_OFFSET, offset); + fieldValues.put(EventRecordFields.CONTENT_CLAIM_SIZE, size); + return new FieldMapRecord(fieldValues, contentClaimSchema); + } + + @Override + public Object getFieldValue(final String fieldName) { + switch (fieldName) { + case EventRecordFields.Names.EVENT_IDENTIFIER: + return eventId; + case EventRecordFields.Names.ALTERNATE_IDENTIFIER: + return event.getAlternateIdentifierUri(); + case EventRecordFields.Names.CHILD_UUIDS: + return event.getChildUuids(); + case EventRecordFields.Names.COMPONENT_ID: + return event.getComponentId(); + case EventRecordFields.Names.COMPONENT_TYPE: + return event.getComponentType(); + case EventRecordFields.Names.CONTENT_CLAIM: + return contentClaimRecord; + case EventRecordFields.Names.EVENT_DETAILS: + return event.getDetails(); + case EventRecordFields.Names.EVENT_DURATION: + return event.getEventDuration(); + case EventRecordFields.Names.EVENT_TIME: + return event.getEventTime(); + case EventRecordFields.Names.EVENT_TYPE: + return event.getEventType().name(); + case EventRecordFields.Names.FLOWFILE_ENTRY_DATE: + return event.getFlowFileEntryDate(); + case EventRecordFields.Names.FLOWFILE_UUID: + return event.getFlowFileUuid(); + case EventRecordFields.Names.LINEAGE_START_DATE: + return event.getLineageStartDate(); + case EventRecordFields.Names.PARENT_UUIDS: + return event.getParentUuids(); + case EventRecordFields.Names.PREVIOUS_ATTRIBUTES: + return event.getPreviousAttributes(); + case EventRecordFields.Names.PREVIOUS_CONTENT_CLAIM: + return previousClaimRecord; + case EventRecordFields.Names.RELATIONSHIP: + return event.getRelationship(); + case EventRecordFields.Names.SOURCE_QUEUE_IDENTIFIER: + return event.getSourceQueueIdentifier(); + case EventRecordFields.Names.SOURCE_SYSTEM_FLOWFILE_IDENTIFIER: + return event.getSourceSystemFlowFileIdentifier(); + case EventRecordFields.Names.TRANSIT_URI: + return event.getTransitUri(); + case EventRecordFields.Names.UPDATED_ATTRIBUTES: + return event.getUpdatedAttributes(); + } + + return null; + } + + @SuppressWarnings("unchecked") + public static StandardProvenanceEventRecord getEvent(final Record record, final String storageFilename, final long storageByteOffset, final int maxAttributeLength) { + final StandardProvenanceEventRecord.Builder builder = new StandardProvenanceEventRecord.Builder(); + builder.setAlternateIdentifierUri((String) record.getFieldValue(EventRecordFields.Names.ALTERNATE_IDENTIFIER)); + builder.setChildUuids((List) record.getFieldValue(EventRecordFields.Names.CHILD_UUIDS)); + builder.setComponentId((String) record.getFieldValue(EventRecordFields.Names.COMPONENT_ID)); + builder.setComponentType((String) record.getFieldValue(EventRecordFields.Names.COMPONENT_TYPE)); + builder.setDetails((String) record.getFieldValue(EventRecordFields.Names.EVENT_DETAILS)); + builder.setEventDuration((Long) record.getFieldValue(EventRecordFields.Names.EVENT_DURATION)); + builder.setEventTime((Long) record.getFieldValue(EventRecordFields.Names.EVENT_TIME)); + builder.setEventType(ProvenanceEventType.valueOf((String) record.getFieldValue(EventRecordFields.Names.EVENT_TYPE))); + builder.setFlowFileEntryDate((Long) record.getFieldValue(EventRecordFields.Names.FLOWFILE_ENTRY_DATE)); + builder.setFlowFileUUID((String) record.getFieldValue(EventRecordFields.Names.FLOWFILE_UUID)); + builder.setLineageStartDate((Long) record.getFieldValue(EventRecordFields.Names.LINEAGE_START_DATE)); + builder.setParentUuids((List) record.getFieldValue(EventRecordFields.Names.PARENT_UUIDS)); + builder.setPreviousAttributes(truncateAttributes((Map) record.getFieldValue(EventRecordFields.Names.PREVIOUS_ATTRIBUTES), maxAttributeLength)); + builder.setEventId((Long) record.getFieldValue(EventRecordFields.Names.EVENT_IDENTIFIER)); + builder.setRelationship((String) record.getFieldValue(EventRecordFields.Names.RELATIONSHIP)); + builder.setSourceQueueIdentifier((String) record.getFieldValue(EventRecordFields.Names.SOURCE_QUEUE_IDENTIFIER)); + builder.setSourceSystemFlowFileIdentifier((String) record.getFieldValue(EventRecordFields.Names.SOURCE_SYSTEM_FLOWFILE_IDENTIFIER)); + builder.setTransitUri((String) record.getFieldValue(EventRecordFields.Names.TRANSIT_URI)); + builder.setUpdatedAttributes(truncateAttributes((Map) record.getFieldValue(EventRecordFields.Names.UPDATED_ATTRIBUTES), maxAttributeLength)); + + builder.setStorageLocation(storageFilename, storageByteOffset); + + final Record currentClaimRecord = (Record) record.getFieldValue(EventRecordFields.Names.CONTENT_CLAIM); + if (currentClaimRecord == null) { + builder.setCurrentContentClaim(null, null, null, null, 0L); + } else { + builder.setCurrentContentClaim( + (String) currentClaimRecord.getFieldValue(EventRecordFields.Names.CONTENT_CLAIM_CONTAINER), + (String) currentClaimRecord.getFieldValue(EventRecordFields.Names.CONTENT_CLAIM_SECTION), + (String) currentClaimRecord.getFieldValue(EventRecordFields.Names.CONTENT_CLAIM_IDENTIFIER), + (Long) currentClaimRecord.getFieldValue(EventRecordFields.Names.CONTENT_CLAIM_OFFSET), + (Long) currentClaimRecord.getFieldValue(EventRecordFields.Names.CONTENT_CLAIM_SIZE)); + } + + final Record previousClaimRecord = (Record) record.getFieldValue(EventRecordFields.Names.PREVIOUS_CONTENT_CLAIM); + if (previousClaimRecord != null) { + builder.setPreviousContentClaim( + (String) previousClaimRecord.getFieldValue(EventRecordFields.Names.CONTENT_CLAIM_CONTAINER), + (String) previousClaimRecord.getFieldValue(EventRecordFields.Names.CONTENT_CLAIM_SECTION), + (String) previousClaimRecord.getFieldValue(EventRecordFields.Names.CONTENT_CLAIM_IDENTIFIER), + (Long) previousClaimRecord.getFieldValue(EventRecordFields.Names.CONTENT_CLAIM_OFFSET), + (Long) previousClaimRecord.getFieldValue(EventRecordFields.Names.CONTENT_CLAIM_SIZE)); + } + + return builder.build(); + } + + private static Map truncateAttributes(final Map attributes, final int maxAttributeLength) { + if (attributes == null) { + return null; + } + + // Check if any attribute value exceeds the attribute length + final boolean anyExceedsLength = attributes.values().stream() + .filter(value -> value != null) + .anyMatch(value -> value.length() > maxAttributeLength); + + if (!anyExceedsLength) { + return attributes; + } + + final Map truncated = new HashMap<>(); + for (final Map.Entry entry : attributes.entrySet()) { + final String key = entry.getKey(); + final String value = entry.getValue(); + + if (value == null || value.length() <= maxAttributeLength) { + truncated.put(key, value); + continue; + } + + truncated.put(key, value.substring(0, maxAttributeLength)); + } + + return truncated; + } +} diff --git a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/schema/EventRecordFields.java b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/schema/EventRecordFields.java new file mode 100644 index 0000000000..0582dd89b6 --- /dev/null +++ b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/schema/EventRecordFields.java @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.provenance.schema; + +import static org.apache.nifi.repository.schema.Repetition.EXACTLY_ONE; +import static org.apache.nifi.repository.schema.Repetition.ZERO_OR_MORE; +import static org.apache.nifi.repository.schema.Repetition.ZERO_OR_ONE; + +import org.apache.nifi.repository.schema.ComplexRecordField; +import org.apache.nifi.repository.schema.FieldType; +import org.apache.nifi.repository.schema.MapRecordField; +import org.apache.nifi.repository.schema.RecordField; +import org.apache.nifi.repository.schema.SimpleRecordField; + +public class EventRecordFields { + + public static class Names { + public static final String EVENT_IDENTIFIER = "Event ID"; + public static final String EVENT_TYPE = "Event Type"; + public static final String EVENT_TIME = "Event Time"; + public static final String FLOWFILE_ENTRY_DATE = "FlowFile Entry Date"; + public static final String EVENT_DURATION = "Event Duration"; + public static final String LINEAGE_START_DATE = "Lineage Start Date"; + public static final String COMPONENT_ID = "Component ID"; + public static final String COMPONENT_TYPE = "Component Type"; + public static final String FLOWFILE_UUID = "FlowFile UUID"; + public static final String EVENT_DETAILS = "Event Details"; + public static final String SOURCE_QUEUE_IDENTIFIER = "Source Queue Identifier"; + public static final String CONTENT_CLAIM = "Content Claim"; + public static final String PREVIOUS_CONTENT_CLAIM = "Previous Content Claim"; + public static final String PARENT_UUIDS = "Parent UUIDs"; + public static final String CHILD_UUIDS = "Child UUIDs"; + + public static final String ATTRIBUTE_NAME = "Attribute Name"; + public static final String ATTRIBUTE_VALUE = "Attribute Value"; + public static final String PREVIOUS_ATTRIBUTES = "Previous Attributes"; + public static final String UPDATED_ATTRIBUTES = "Updated Attributes"; + + public static final String CONTENT_CLAIM_CONTAINER = "Content Claim Container"; + public static final String CONTENT_CLAIM_SECTION = "Content Claim Section"; + public static final String CONTENT_CLAIM_IDENTIFIER = "Content Claim Identifier"; + public static final String CONTENT_CLAIM_OFFSET = "Content Claim Offset"; + public static final String CONTENT_CLAIM_SIZE = "Content Claim Size"; + + public static final String TRANSIT_URI = "Transit URI"; + public static final String SOURCE_SYSTEM_FLOWFILE_IDENTIFIER = "Source System FlowFile Identifier"; + public static final String ALTERNATE_IDENTIFIER = "Alternate Identifier"; + public static final String RELATIONSHIP = "Relationship"; + } + + // General Event fields. + public static final RecordField RECORD_IDENTIFIER = new SimpleRecordField(Names.EVENT_IDENTIFIER, FieldType.LONG, EXACTLY_ONE); + public static final RecordField EVENT_TYPE = new SimpleRecordField(Names.EVENT_TYPE, FieldType.STRING, EXACTLY_ONE); + public static final RecordField EVENT_TIME = new SimpleRecordField(Names.EVENT_TIME, FieldType.LONG, EXACTLY_ONE); + public static final RecordField FLOWFILE_ENTRY_DATE = new SimpleRecordField(Names.FLOWFILE_ENTRY_DATE, FieldType.LONG, EXACTLY_ONE); + public static final RecordField EVENT_DURATION = new SimpleRecordField(Names.EVENT_DURATION, FieldType.LONG, EXACTLY_ONE); + public static final RecordField LINEAGE_START_DATE = new SimpleRecordField(Names.LINEAGE_START_DATE, FieldType.LONG, EXACTLY_ONE); + public static final RecordField COMPONENT_ID = new SimpleRecordField(Names.COMPONENT_ID, FieldType.STRING, ZERO_OR_ONE); + public static final RecordField COMPONENT_TYPE = new SimpleRecordField(Names.COMPONENT_TYPE, FieldType.STRING, ZERO_OR_ONE); + public static final RecordField FLOWFILE_UUID = new SimpleRecordField(Names.FLOWFILE_UUID, FieldType.STRING, EXACTLY_ONE); + public static final RecordField EVENT_DETAILS = new SimpleRecordField(Names.EVENT_DETAILS, FieldType.STRING, ZERO_OR_ONE); + public static final RecordField SOURCE_QUEUE_IDENTIFIER = new SimpleRecordField(Names.SOURCE_QUEUE_IDENTIFIER, FieldType.STRING, ZERO_OR_ONE); + + // Attributes + public static final RecordField ATTRIBUTE_NAME = new SimpleRecordField(Names.ATTRIBUTE_NAME, FieldType.LONG_STRING, EXACTLY_ONE); + public static final RecordField ATTRIBUTE_VALUE_REQUIRED = new SimpleRecordField(Names.ATTRIBUTE_VALUE, FieldType.LONG_STRING, EXACTLY_ONE); + public static final RecordField ATTRIBUTE_VALUE_OPTIONAL = new SimpleRecordField(Names.ATTRIBUTE_VALUE, FieldType.LONG_STRING, ZERO_OR_ONE); + + public static final RecordField PREVIOUS_ATTRIBUTES = new MapRecordField(Names.PREVIOUS_ATTRIBUTES, ATTRIBUTE_NAME, ATTRIBUTE_VALUE_REQUIRED, EXACTLY_ONE); + public static final RecordField UPDATED_ATTRIBUTES = new MapRecordField(Names.UPDATED_ATTRIBUTES, ATTRIBUTE_NAME, ATTRIBUTE_VALUE_OPTIONAL, EXACTLY_ONE); + + // Content Claims + public static final RecordField CONTENT_CLAIM_CONTAINER = new SimpleRecordField(Names.CONTENT_CLAIM_CONTAINER, FieldType.STRING, EXACTLY_ONE); + public static final RecordField CONTENT_CLAIM_SECTION = new SimpleRecordField(Names.CONTENT_CLAIM_SECTION, FieldType.STRING, EXACTLY_ONE); + public static final RecordField CONTENT_CLAIM_IDENTIFIER = new SimpleRecordField(Names.CONTENT_CLAIM_IDENTIFIER, FieldType.STRING, EXACTLY_ONE); + public static final RecordField CONTENT_CLAIM_OFFSET = new SimpleRecordField(Names.CONTENT_CLAIM_OFFSET, FieldType.LONG, EXACTLY_ONE); + public static final RecordField CONTENT_CLAIM_SIZE = new SimpleRecordField(Names.CONTENT_CLAIM_SIZE, FieldType.LONG, EXACTLY_ONE); + public static final RecordField CURRENT_CONTENT_CLAIM = new ComplexRecordField(Names.CONTENT_CLAIM, ZERO_OR_ONE, + CONTENT_CLAIM_CONTAINER, CONTENT_CLAIM_SECTION, CONTENT_CLAIM_IDENTIFIER, CONTENT_CLAIM_OFFSET, CONTENT_CLAIM_SIZE); + public static final RecordField PREVIOUS_CONTENT_CLAIM = new ComplexRecordField(Names.PREVIOUS_CONTENT_CLAIM, ZERO_OR_ONE, + CONTENT_CLAIM_CONTAINER, CONTENT_CLAIM_SECTION, CONTENT_CLAIM_IDENTIFIER, CONTENT_CLAIM_OFFSET, CONTENT_CLAIM_SIZE); + + // EventType-Specific fields + // for FORK, JOIN, CLONE, REPLAY + public static final RecordField PARENT_UUIDS = new SimpleRecordField(Names.PARENT_UUIDS, FieldType.STRING, ZERO_OR_MORE); + public static final RecordField CHILD_UUIDS = new SimpleRecordField(Names.CHILD_UUIDS, FieldType.STRING, ZERO_OR_MORE); + + // for SEND/RECEIVE/FETCH + public static final RecordField TRANSIT_URI = new SimpleRecordField(Names.TRANSIT_URI, FieldType.STRING, ZERO_OR_ONE); + public static final RecordField SOURCE_SYSTEM_FLOWFILE_IDENTIFIER = new SimpleRecordField(Names.SOURCE_QUEUE_IDENTIFIER, FieldType.STRING, ZERO_OR_ONE); + + // for ADD_INFO + public static final RecordField ALTERNATE_IDENTIFIER = new SimpleRecordField(Names.ALTERNATE_IDENTIFIER, FieldType.STRING, ZERO_OR_ONE); + public static final RecordField RELATIONSHIP = new SimpleRecordField(Names.RELATIONSHIP, FieldType.STRING, ZERO_OR_ONE); +} diff --git a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/schema/ProvenanceEventSchema.java b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/schema/ProvenanceEventSchema.java new file mode 100644 index 0000000000..d70bd392a5 --- /dev/null +++ b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/schema/ProvenanceEventSchema.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.provenance.schema; + +import static org.apache.nifi.provenance.schema.EventRecordFields.ALTERNATE_IDENTIFIER; +import static org.apache.nifi.provenance.schema.EventRecordFields.CHILD_UUIDS; +import static org.apache.nifi.provenance.schema.EventRecordFields.COMPONENT_ID; +import static org.apache.nifi.provenance.schema.EventRecordFields.COMPONENT_TYPE; +import static org.apache.nifi.provenance.schema.EventRecordFields.CURRENT_CONTENT_CLAIM; +import static org.apache.nifi.provenance.schema.EventRecordFields.EVENT_DETAILS; +import static org.apache.nifi.provenance.schema.EventRecordFields.EVENT_DURATION; +import static org.apache.nifi.provenance.schema.EventRecordFields.EVENT_TIME; +import static org.apache.nifi.provenance.schema.EventRecordFields.EVENT_TYPE; +import static org.apache.nifi.provenance.schema.EventRecordFields.FLOWFILE_ENTRY_DATE; +import static org.apache.nifi.provenance.schema.EventRecordFields.FLOWFILE_UUID; +import static org.apache.nifi.provenance.schema.EventRecordFields.LINEAGE_START_DATE; +import static org.apache.nifi.provenance.schema.EventRecordFields.PARENT_UUIDS; +import static org.apache.nifi.provenance.schema.EventRecordFields.PREVIOUS_ATTRIBUTES; +import static org.apache.nifi.provenance.schema.EventRecordFields.PREVIOUS_CONTENT_CLAIM; +import static org.apache.nifi.provenance.schema.EventRecordFields.RECORD_IDENTIFIER; +import static org.apache.nifi.provenance.schema.EventRecordFields.RELATIONSHIP; +import static org.apache.nifi.provenance.schema.EventRecordFields.SOURCE_QUEUE_IDENTIFIER; +import static org.apache.nifi.provenance.schema.EventRecordFields.SOURCE_SYSTEM_FLOWFILE_IDENTIFIER; +import static org.apache.nifi.provenance.schema.EventRecordFields.TRANSIT_URI; +import static org.apache.nifi.provenance.schema.EventRecordFields.UPDATED_ATTRIBUTES; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.nifi.repository.schema.RecordField; +import org.apache.nifi.repository.schema.RecordSchema; + +public class ProvenanceEventSchema { + public static final RecordSchema PROVENANCE_EVENT_SCHEMA_V1 = buildSchemaV1(); + + private static RecordSchema buildSchemaV1() { + final List fields = new ArrayList<>(); + fields.add(RECORD_IDENTIFIER); + fields.add(EVENT_TYPE); + fields.add(EVENT_TIME); + fields.add(FLOWFILE_ENTRY_DATE); + fields.add(EVENT_DURATION); + fields.add(LINEAGE_START_DATE); + fields.add(COMPONENT_ID); + fields.add(COMPONENT_TYPE); + fields.add(FLOWFILE_UUID); + fields.add(EVENT_DETAILS); + fields.add(PREVIOUS_ATTRIBUTES); + fields.add(UPDATED_ATTRIBUTES); + fields.add(CURRENT_CONTENT_CLAIM); + fields.add(PREVIOUS_CONTENT_CLAIM); + fields.add(SOURCE_QUEUE_IDENTIFIER); + + // EventType-Specific fields + fields.add(PARENT_UUIDS); // for FORK, JOIN, CLONE, REPLAY events + fields.add(CHILD_UUIDS); // for FORK, JOIN, CLONE, REPLAY events + fields.add(TRANSIT_URI); // for SEND/RECEIVE/FETCH events + fields.add(SOURCE_SYSTEM_FLOWFILE_IDENTIFIER); // for SEND/RECEIVE events + fields.add(ALTERNATE_IDENTIFIER); // for ADD_INFO events + fields.add(RELATIONSHIP); // for ROUTE events + + final RecordSchema schema = new RecordSchema(fields); + return schema; + } +} diff --git a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/serialization/CompressableRecordReader.java b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/serialization/CompressableRecordReader.java new file mode 100644 index 0000000000..056829ad9f --- /dev/null +++ b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/serialization/CompressableRecordReader.java @@ -0,0 +1,282 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.provenance.serialization; + +import java.io.DataInputStream; +import java.io.EOFException; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.util.zip.GZIPInputStream; + +import org.apache.nifi.provenance.ProvenanceEventRecord; +import org.apache.nifi.provenance.StandardProvenanceEventRecord; +import org.apache.nifi.provenance.StandardRecordReader; +import org.apache.nifi.provenance.toc.TocReader; +import org.apache.nifi.stream.io.BufferedInputStream; +import org.apache.nifi.stream.io.ByteCountingInputStream; +import org.apache.nifi.stream.io.LimitingInputStream; +import org.apache.nifi.stream.io.StreamUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public abstract class CompressableRecordReader implements RecordReader { + private static final Logger logger = LoggerFactory.getLogger(StandardRecordReader.class); + + private final ByteCountingInputStream rawInputStream; + private final String filename; + private final int serializationVersion; + private final boolean compressed; + private final TocReader tocReader; + private final int headerLength; + private final int maxAttributeChars; + + private DataInputStream dis; + private ByteCountingInputStream byteCountingIn; + + public CompressableRecordReader(final InputStream in, final String filename, final int maxAttributeChars) throws IOException { + this(in, filename, null, maxAttributeChars); + } + + public CompressableRecordReader(final InputStream in, final String filename, final TocReader tocReader, final int maxAttributeChars) throws IOException { + logger.trace("Creating RecordReader for {}", filename); + + rawInputStream = new ByteCountingInputStream(in); + this.maxAttributeChars = maxAttributeChars; + + final InputStream limitedStream; + if (tocReader == null) { + limitedStream = rawInputStream; + } else { + final long offset1 = tocReader.getBlockOffset(1); + if (offset1 < 0) { + limitedStream = rawInputStream; + } else { + limitedStream = new LimitingInputStream(rawInputStream, offset1 - rawInputStream.getBytesConsumed()); + } + } + + final InputStream readableStream; + if (filename.endsWith(".gz")) { + readableStream = new BufferedInputStream(new GZIPInputStream(limitedStream)); + compressed = true; + } else { + readableStream = new BufferedInputStream(limitedStream); + compressed = false; + } + + byteCountingIn = new ByteCountingInputStream(readableStream); + dis = new DataInputStream(byteCountingIn); + + final String repoClassName = dis.readUTF(); + final int serializationVersion = dis.readInt(); + headerLength = repoClassName.getBytes(StandardCharsets.UTF_8).length + 2 + 4; // 2 bytes for string length, 4 for integer. + + this.serializationVersion = serializationVersion; + this.filename = filename; + this.tocReader = tocReader; + + readHeader(dis, serializationVersion); + } + + @Override + public void skipToBlock(final int blockIndex) throws IOException { + if (tocReader == null) { + throw new IllegalStateException("Cannot skip to block " + blockIndex + " for Provenance Log " + filename + " because no Table-of-Contents file was found for this Log"); + } + + if (blockIndex < 0) { + throw new IllegalArgumentException("Cannot skip to block " + blockIndex + " because the value is negative"); + } + + if (blockIndex == getBlockIndex()) { + return; + } + + final long offset = tocReader.getBlockOffset(blockIndex); + if (offset < 0) { + throw new IOException("Unable to find block " + blockIndex + " in Provenance Log " + filename); + } + + final long curOffset = rawInputStream.getBytesConsumed(); + + final long bytesToSkip = offset - curOffset; + if (bytesToSkip >= 0) { + try { + StreamUtils.skip(rawInputStream, bytesToSkip); + logger.debug("Skipped stream from offset {} to {} ({} bytes skipped)", curOffset, offset, bytesToSkip); + } catch (final IOException e) { + throw new IOException("Failed to skip to offset " + offset + " for block " + blockIndex + " of Provenance Log " + filename, e); + } + + resetStreamForNextBlock(); + } + } + + private void resetStreamForNextBlock() throws IOException { + final InputStream limitedStream; + if (tocReader == null) { + limitedStream = rawInputStream; + } else { + final long offset = tocReader.getBlockOffset(1 + getBlockIndex()); + if (offset < 0) { + limitedStream = rawInputStream; + } else { + limitedStream = new LimitingInputStream(rawInputStream, offset - rawInputStream.getBytesConsumed()); + } + } + + final InputStream readableStream; + if (compressed) { + readableStream = new BufferedInputStream(new GZIPInputStream(limitedStream)); + } else { + readableStream = new BufferedInputStream(limitedStream); + } + + byteCountingIn = new ByteCountingInputStream(readableStream, rawInputStream.getBytesConsumed()); + dis = new DataInputStream(byteCountingIn); + } + + + @Override + public TocReader getTocReader() { + return tocReader; + } + + @Override + public boolean isBlockIndexAvailable() { + return tocReader != null; + } + + @Override + public int getBlockIndex() { + if (tocReader == null) { + throw new IllegalStateException("Cannot determine Block Index because no Table-of-Contents could be found for Provenance Log " + filename); + } + + return tocReader.getBlockIndex(rawInputStream.getBytesConsumed()); + } + + @Override + public long getBytesConsumed() { + return byteCountingIn.getBytesConsumed(); + } + + private boolean isData() throws IOException { + byteCountingIn.mark(1); + int nextByte = byteCountingIn.read(); + byteCountingIn.reset(); + + if (nextByte < 0) { + try { + resetStreamForNextBlock(); + } catch (final EOFException eof) { + return false; + } + + byteCountingIn.mark(1); + nextByte = byteCountingIn.read(); + byteCountingIn.reset(); + } + + return nextByte >= 0; + } + + @Override + public long getMaxEventId() throws IOException { + if (tocReader != null) { + final long lastBlockOffset = tocReader.getLastBlockOffset(); + skipToBlock(tocReader.getBlockIndex(lastBlockOffset)); + } + + ProvenanceEventRecord record; + ProvenanceEventRecord lastRecord = null; + try { + while ((record = nextRecord()) != null) { + lastRecord = record; + } + } catch (final EOFException eof) { + // This can happen if we stop NIFi while the record is being written. + // This is OK, we just ignore this record. The session will not have been + // committed, so we can just process the FlowFile again. + } + + return lastRecord == null ? -1L : lastRecord.getEventId(); + } + + @Override + public void close() throws IOException { + logger.trace("Closing Record Reader for {}", filename); + + try { + dis.close(); + } finally { + try { + rawInputStream.close(); + } finally { + if (tocReader != null) { + tocReader.close(); + } + } + } + } + + @Override + public void skip(final long bytesToSkip) throws IOException { + StreamUtils.skip(dis, bytesToSkip); + } + + @Override + public void skipTo(final long position) throws IOException { + // we are subtracting headerLength from the number of bytes consumed because we used to + // consider the offset of the first record "0" - now we consider it whatever position it + // it really is in the stream. + final long currentPosition = byteCountingIn.getBytesConsumed() - headerLength; + if (currentPosition == position) { + return; + } + if (currentPosition > position) { + throw new IOException("Cannot skip to byte offset " + position + " in stream because already at byte offset " + currentPosition); + } + + final long toSkip = position - currentPosition; + StreamUtils.skip(dis, toSkip); + } + + protected String getFilename() { + return filename; + } + + protected int getMaxAttributeLength() { + return maxAttributeChars; + } + + @Override + public StandardProvenanceEventRecord nextRecord() throws IOException { + if (isData()) { + return nextRecord(dis, serializationVersion); + } else { + return null; + } + } + + protected abstract StandardProvenanceEventRecord nextRecord(DataInputStream in, int serializationVersion) throws IOException; + + protected void readHeader(DataInputStream in, int serializationVersion) throws IOException { + } +} diff --git a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/serialization/CompressableRecordWriter.java b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/serialization/CompressableRecordWriter.java new file mode 100644 index 0000000000..fa0e390f6e --- /dev/null +++ b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/serialization/CompressableRecordWriter.java @@ -0,0 +1,206 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.provenance.serialization; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStream; + +import org.apache.nifi.provenance.AbstractRecordWriter; +import org.apache.nifi.provenance.ProvenanceEventRecord; +import org.apache.nifi.provenance.toc.TocWriter; +import org.apache.nifi.stream.io.BufferedOutputStream; +import org.apache.nifi.stream.io.ByteCountingOutputStream; +import org.apache.nifi.stream.io.DataOutputStream; +import org.apache.nifi.stream.io.GZIPOutputStream; +import org.apache.nifi.stream.io.NonCloseableOutputStream; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public abstract class CompressableRecordWriter extends AbstractRecordWriter { + private static final Logger logger = LoggerFactory.getLogger(CompressableRecordWriter.class); + + private final FileOutputStream fos; + private final ByteCountingOutputStream rawOutStream; + private final boolean compressed; + private final int uncompressedBlockSize; + + private DataOutputStream out; + private ByteCountingOutputStream byteCountingOut; + private long lastBlockOffset = 0L; + private int recordCount = 0; + + + public CompressableRecordWriter(final File file, final TocWriter writer, final boolean compressed, final int uncompressedBlockSize) throws IOException { + super(file, writer); + logger.trace("Creating Record Writer for {}", file.getName()); + + this.compressed = compressed; + this.fos = new FileOutputStream(file); + rawOutStream = new ByteCountingOutputStream(fos); + this.uncompressedBlockSize = uncompressedBlockSize; + } + + public CompressableRecordWriter(final OutputStream out, final TocWriter tocWriter, final boolean compressed, final int uncompressedBlockSize) throws IOException { + super(null, tocWriter); + this.fos = null; + + this.compressed = compressed; + this.uncompressedBlockSize = uncompressedBlockSize; + this.rawOutStream = new ByteCountingOutputStream(out); + } + + + @Override + public synchronized void writeHeader(final long firstEventId) throws IOException { + if (isDirty()) { + throw new IOException("Cannot update Provenance Repository because this Record Writer has already failed to write to the Repository"); + } + + try { + lastBlockOffset = rawOutStream.getBytesWritten(); + resetWriteStream(firstEventId); + out.writeUTF(getSerializationName()); + out.writeInt(getSerializationVersion()); + writeHeader(firstEventId, out); + out.flush(); + lastBlockOffset = rawOutStream.getBytesWritten(); + } catch (final IOException ioe) { + markDirty(); + throw ioe; + } + } + + + + /** + * Resets the streams to prepare for a new block + * + * @param eventId the first id that will be written to the new block + * @throws IOException if unable to flush/close the current streams properly + */ + private void resetWriteStream(final long eventId) throws IOException { + try { + if (out != null) { + out.flush(); + } + + final long byteOffset = (byteCountingOut == null) ? rawOutStream.getBytesWritten() : byteCountingOut.getBytesWritten(); + final TocWriter tocWriter = getTocWriter(); + + final OutputStream writableStream; + if (compressed) { + // because of the way that GZIPOutputStream works, we need to call close() on it in order for it + // to write its trailing bytes. But we don't want to close the underlying OutputStream, so we wrap + // the underlying OutputStream in a NonCloseableOutputStream + // We don't have to check if the writer is dirty because we will have already checked before calling this method. + if (out != null) { + out.close(); + } + + if (tocWriter != null) { + tocWriter.addBlockOffset(rawOutStream.getBytesWritten(), eventId); + } + + writableStream = new BufferedOutputStream(new GZIPOutputStream(new NonCloseableOutputStream(rawOutStream), 1), 65536); + } else { + if (tocWriter != null) { + tocWriter.addBlockOffset(rawOutStream.getBytesWritten(), eventId); + } + + writableStream = new BufferedOutputStream(rawOutStream, 65536); + } + + this.byteCountingOut = new ByteCountingOutputStream(writableStream, byteOffset); + this.out = new DataOutputStream(byteCountingOut); + resetDirtyFlag(); + } catch (final IOException ioe) { + markDirty(); + throw ioe; + } + } + + + + @Override + public long writeRecord(final ProvenanceEventRecord record, final long recordIdentifier) throws IOException { + if (isDirty()) { + throw new IOException("Cannot update Provenance Repository because this Record Writer has already failed to write to the Repository"); + } + + try { + final long startBytes = byteCountingOut.getBytesWritten(); + + // add a new block to the TOC if needed. + if (getTocWriter() != null && (startBytes - lastBlockOffset >= uncompressedBlockSize)) { + lastBlockOffset = startBytes; + + if (compressed) { + // because of the way that GZIPOutputStream works, we need to call close() on it in order for it + // to write its trailing bytes. But we don't want to close the underlying OutputStream, so we wrap + // the underlying OutputStream in a NonCloseableOutputStream + resetWriteStream(recordIdentifier); + } + } + + writeRecord(record, recordIdentifier, out); + + recordCount++; + return byteCountingOut.getBytesWritten() - startBytes; + } catch (final IOException ioe) { + markDirty(); + throw ioe; + } + } + + @Override + public void flush() throws IOException { + out.flush(); + } + + @Override + public synchronized int getRecordsWritten() { + return recordCount; + } + + @Override + protected OutputStream getBufferedOutputStream() { + return out; + } + + @Override + protected OutputStream getUnderlyingOutputStream() { + return fos; + } + + @Override + protected void syncUnderlyingOutputStream() throws IOException { + if (fos != null) { + fos.getFD().sync(); + } + } + + protected abstract void writeRecord(final ProvenanceEventRecord event, final long eventId, final DataOutputStream out) throws IOException; + + protected abstract void writeHeader(final long firstEventId, final DataOutputStream out) throws IOException; + + protected abstract int getSerializationVersion(); + + protected abstract String getSerializationName(); +} diff --git a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/io/ByteCountingOutputStream.java b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/serialization/EmptyRecordReader.java similarity index 51% rename from nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/io/ByteCountingOutputStream.java rename to nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/serialization/EmptyRecordReader.java index 7c778a28b9..38a4cc902a 100644 --- a/nifi-nar-bundles/nifi-framework-bundle/nifi-framework/nifi-framework-core/src/main/java/org/apache/nifi/controller/repository/io/ByteCountingOutputStream.java +++ b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/serialization/EmptyRecordReader.java @@ -14,50 +14,59 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.nifi.controller.repository.io; + +package org.apache.nifi.provenance.serialization; import java.io.IOException; -import java.io.OutputStream; -import java.util.concurrent.atomic.AtomicLong; -public class ByteCountingOutputStream extends OutputStream { +import org.apache.nifi.provenance.StandardProvenanceEventRecord; +import org.apache.nifi.provenance.toc.TocReader; - private final AtomicLong bytesWrittenHolder; - private final OutputStream out; - - public ByteCountingOutputStream(final OutputStream out, final AtomicLong longHolder) { - this.out = out; - this.bytesWrittenHolder = longHolder; - } - - @Override - public void write(int b) throws IOException { - out.write(b); - bytesWrittenHolder.getAndIncrement(); - } - - @Override - public void write(byte[] b) throws IOException { - write(b, 0, b.length); - } - - @Override - public void write(byte[] b, int off, int len) throws IOException { - out.write(b, off, len); - bytesWrittenHolder.getAndAdd(len); - } - - public long getBytesWritten() { - return bytesWrittenHolder.get(); - } - - @Override - public void flush() throws IOException { - out.flush(); - } +public class EmptyRecordReader implements RecordReader { @Override public void close() throws IOException { - out.close(); + } + + @Override + public StandardProvenanceEventRecord nextRecord() throws IOException { + return null; + } + + @Override + public void skip(long bytesToSkip) throws IOException { + } + + @Override + public void skipTo(long position) throws IOException { + } + + @Override + public void skipToBlock(int blockIndex) throws IOException { + } + + @Override + public int getBlockIndex() { + return 0; + } + + @Override + public boolean isBlockIndexAvailable() { + return false; + } + + @Override + public TocReader getTocReader() { + return null; + } + + @Override + public long getBytesConsumed() { + return 0; + } + + @Override + public long getMaxEventId() throws IOException { + return 0; } } diff --git a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/serialization/RecordReaders.java b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/serialization/RecordReaders.java index 7889cd6952..24efcbde7b 100644 --- a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/serialization/RecordReaders.java +++ b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/serialization/RecordReaders.java @@ -16,6 +16,9 @@ */ package org.apache.nifi.provenance.serialization; +import java.io.BufferedInputStream; +import java.io.DataInputStream; +import java.io.EOFException; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; @@ -23,8 +26,12 @@ import java.io.IOException; import java.io.InputStream; import java.nio.file.Path; import java.util.Collection; +import java.util.zip.GZIPInputStream; +import org.apache.nifi.provenance.ByteArraySchemaRecordReader; +import org.apache.nifi.provenance.ByteArraySchemaRecordWriter; import org.apache.nifi.provenance.StandardRecordReader; +import org.apache.nifi.provenance.StandardRecordWriter; import org.apache.nifi.provenance.lucene.LuceneUtil; import org.apache.nifi.provenance.toc.StandardTocReader; import org.apache.nifi.provenance.toc.TocReader; @@ -101,11 +108,39 @@ public class RecordReaders { } final File tocFile = TocUtil.getTocFile(file); - if ( tocFile.exists() ) { - final TocReader tocReader = new StandardTocReader(tocFile); - return new StandardRecordReader(fis, filename, tocReader, maxAttributeChars); - } else { - return new StandardRecordReader(fis, filename, maxAttributeChars); + + final InputStream bufferedInStream = new BufferedInputStream(fis); + final String serializationName; + try { + bufferedInStream.mark(4096); + final InputStream in = filename.endsWith(".gz") ? new GZIPInputStream(bufferedInStream) : bufferedInStream; + final DataInputStream dis = new DataInputStream(in); + serializationName = dis.readUTF(); + bufferedInStream.reset(); + } catch (final EOFException eof) { + return new EmptyRecordReader(); + } + + switch (serializationName) { + case StandardRecordWriter.SERIALIZATION_NAME: { + if (tocFile.exists()) { + final TocReader tocReader = new StandardTocReader(tocFile); + return new StandardRecordReader(bufferedInStream, filename, tocReader, maxAttributeChars); + } else { + return new StandardRecordReader(bufferedInStream, filename, maxAttributeChars); + } + } + case ByteArraySchemaRecordWriter.SERIALIZATION_NAME: { + if (tocFile.exists()) { + final TocReader tocReader = new StandardTocReader(tocFile); + return new ByteArraySchemaRecordReader(bufferedInStream, filename, tocReader, maxAttributeChars); + } else { + return new ByteArraySchemaRecordReader(bufferedInStream, filename, maxAttributeChars); + } + } + default: { + throw new IOException("Unable to read data from file " + file + " because the file was written using an unknown Serializer: " + serializationName); + } } } catch (final IOException ioe) { if ( fis != null ) { diff --git a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/serialization/RecordWriter.java b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/serialization/RecordWriter.java index b157ccc9cb..17dd75c02d 100644 --- a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/serialization/RecordWriter.java +++ b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/serialization/RecordWriter.java @@ -43,6 +43,13 @@ public interface RecordWriter extends Closeable { */ long writeRecord(ProvenanceEventRecord record, long recordIdentifier) throws IOException; + /** + * Flushes any data that is held in a buffer to the underlying storage mechanism + * + * @throws IOException if unable to flush the bytes + */ + void flush() throws IOException; + /** * @return the number of Records that have been written to this RecordWriter */ diff --git a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/serialization/RecordWriters.java b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/serialization/RecordWriters.java index cf8f7b4822..be4c9cf7f5 100644 --- a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/serialization/RecordWriters.java +++ b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/main/java/org/apache/nifi/provenance/serialization/RecordWriters.java @@ -19,7 +19,7 @@ package org.apache.nifi.provenance.serialization; import java.io.File; import java.io.IOException; -import org.apache.nifi.provenance.StandardRecordWriter; +import org.apache.nifi.provenance.ByteArraySchemaRecordWriter; import org.apache.nifi.provenance.toc.StandardTocWriter; import org.apache.nifi.provenance.toc.TocUtil; import org.apache.nifi.provenance.toc.TocWriter; @@ -27,13 +27,13 @@ import org.apache.nifi.provenance.toc.TocWriter; public class RecordWriters { private static final int DEFAULT_COMPRESSION_BLOCK_SIZE = 1024 * 1024; // 1 MB - public static RecordWriter newRecordWriter(final File file, final boolean compressed, final boolean createToc) throws IOException { - return newRecordWriter(file, compressed, createToc, DEFAULT_COMPRESSION_BLOCK_SIZE); + public static RecordWriter newSchemaRecordWriter(final File file, final boolean compressed, final boolean createToc) throws IOException { + return newSchemaRecordWriter(file, compressed, createToc, DEFAULT_COMPRESSION_BLOCK_SIZE); } - public static RecordWriter newRecordWriter(final File file, final boolean compressed, final boolean createToc, final int compressionBlockBytes) throws IOException { + public static RecordWriter newSchemaRecordWriter(final File file, final boolean compressed, final boolean createToc, final int compressionBlockBytes) throws IOException { final TocWriter tocWriter = createToc ? new StandardTocWriter(TocUtil.getTocFile(file), false, false) : null; - return new StandardRecordWriter(file, tocWriter, compressed, compressionBlockBytes); + return new ByteArraySchemaRecordWriter(file, tocWriter, compressed, compressionBlockBytes); } } diff --git a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/test/java/org/apache/nifi/provenance/AbstractTestRecordReaderWriter.java b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/test/java/org/apache/nifi/provenance/AbstractTestRecordReaderWriter.java new file mode 100644 index 0000000000..bae2364f21 --- /dev/null +++ b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/test/java/org/apache/nifi/provenance/AbstractTestRecordReaderWriter.java @@ -0,0 +1,204 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.provenance; + +import static org.apache.nifi.provenance.TestUtil.createFlowFile; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.HashMap; +import java.util.Map; +import java.util.UUID; + +import org.apache.nifi.provenance.serialization.RecordReader; +import org.apache.nifi.provenance.serialization.RecordWriter; +import org.apache.nifi.provenance.toc.StandardTocReader; +import org.apache.nifi.provenance.toc.StandardTocWriter; +import org.apache.nifi.provenance.toc.TocReader; +import org.apache.nifi.provenance.toc.TocUtil; +import org.apache.nifi.provenance.toc.TocWriter; +import org.apache.nifi.util.file.FileUtils; +import org.junit.BeforeClass; +import org.junit.Test; + +public abstract class AbstractTestRecordReaderWriter { + @BeforeClass + public static void setLogLevel() { + System.setProperty("org.slf4j.simpleLogger.log.org.apache.nifi.provenance", "INFO"); + } + + protected ProvenanceEventRecord createEvent() { + final Map attributes = new HashMap<>(); + attributes.put("filename", "1.txt"); + attributes.put("uuid", UUID.randomUUID().toString()); + + final ProvenanceEventBuilder builder = new StandardProvenanceEventRecord.Builder(); + builder.setEventTime(System.currentTimeMillis()); + builder.setEventType(ProvenanceEventType.RECEIVE); + builder.setTransitUri("nifi://unit-test"); + builder.fromFlowFile(createFlowFile(3L, 3000L, attributes)); + builder.setComponentId("1234"); + builder.setComponentType("dummy processor"); + final ProvenanceEventRecord record = builder.build(); + + return record; + } + + @Test + public void testSimpleWriteWithToc() throws IOException { + final File journalFile = new File("target/storage/" + UUID.randomUUID().toString() + "/testSimpleWrite"); + final File tocFile = TocUtil.getTocFile(journalFile); + final TocWriter tocWriter = new StandardTocWriter(tocFile, false, false); + final RecordWriter writer = createWriter(journalFile, tocWriter, false, 1024 * 1024); + + writer.writeHeader(1L); + writer.writeRecord(createEvent(), 1L); + writer.close(); + + final TocReader tocReader = new StandardTocReader(tocFile); + + try (final FileInputStream fis = new FileInputStream(journalFile); + final RecordReader reader = createReader(fis, journalFile.getName(), tocReader, 2048)) { + assertEquals(0, reader.getBlockIndex()); + reader.skipToBlock(0); + final StandardProvenanceEventRecord recovered = reader.nextRecord(); + assertNotNull(recovered); + + assertEquals("nifi://unit-test", recovered.getTransitUri()); + assertNull(reader.nextRecord()); + } + + FileUtils.deleteFile(journalFile.getParentFile(), true); + } + + + @Test + public void testSingleRecordCompressed() throws IOException { + final File journalFile = new File("target/storage/" + UUID.randomUUID().toString() + "/testSimpleWrite.gz"); + final File tocFile = TocUtil.getTocFile(journalFile); + final TocWriter tocWriter = new StandardTocWriter(tocFile, false, false); + final RecordWriter writer = createWriter(journalFile, tocWriter, true, 8192); + + writer.writeHeader(1L); + writer.writeRecord(createEvent(), 1L); + writer.close(); + + final TocReader tocReader = new StandardTocReader(tocFile); + + try (final FileInputStream fis = new FileInputStream(journalFile); + final RecordReader reader = createReader(fis, journalFile.getName(), tocReader, 2048)) { + assertEquals(0, reader.getBlockIndex()); + reader.skipToBlock(0); + final StandardProvenanceEventRecord recovered = reader.nextRecord(); + assertNotNull(recovered); + + assertEquals("nifi://unit-test", recovered.getTransitUri()); + assertNull(reader.nextRecord()); + } + + FileUtils.deleteFile(journalFile.getParentFile(), true); + } + + + @Test + public void testMultipleRecordsSameBlockCompressed() throws IOException { + final File journalFile = new File("target/storage/" + UUID.randomUUID().toString() + "/testSimpleWrite.gz"); + final File tocFile = TocUtil.getTocFile(journalFile); + final TocWriter tocWriter = new StandardTocWriter(tocFile, false, false); + // new record each 1 MB of uncompressed data + final RecordWriter writer = createWriter(journalFile, tocWriter, true, 1024 * 1024); + + writer.writeHeader(1L); + for (int i = 0; i < 10; i++) { + writer.writeRecord(createEvent(), i); + } + writer.close(); + + final TocReader tocReader = new StandardTocReader(tocFile); + + try (final FileInputStream fis = new FileInputStream(journalFile); + final RecordReader reader = createReader(fis, journalFile.getName(), tocReader, 2048)) { + for (int i = 0; i < 10; i++) { + assertEquals(0, reader.getBlockIndex()); + + // call skipToBlock half the time to ensure that we can; avoid calling it + // the other half of the time to ensure that it's okay. + if (i <= 5) { + reader.skipToBlock(0); + } + + final StandardProvenanceEventRecord recovered = reader.nextRecord(); + assertNotNull(recovered); + assertEquals("nifi://unit-test", recovered.getTransitUri()); + } + + assertNull(reader.nextRecord()); + } + + FileUtils.deleteFile(journalFile.getParentFile(), true); + } + + + @Test + public void testMultipleRecordsMultipleBlocksCompressed() throws IOException { + final File journalFile = new File("target/storage/" + UUID.randomUUID().toString() + "/testSimpleWrite.gz"); + final File tocFile = TocUtil.getTocFile(journalFile); + final TocWriter tocWriter = new StandardTocWriter(tocFile, false, false); + // new block each 10 bytes + final RecordWriter writer = createWriter(journalFile, tocWriter, true, 100); + + writer.writeHeader(1L); + for (int i = 0; i < 10; i++) { + writer.writeRecord(createEvent(), i); + } + writer.close(); + + final TocReader tocReader = new StandardTocReader(tocFile); + + try (final FileInputStream fis = new FileInputStream(journalFile); + final RecordReader reader = createReader(fis, journalFile.getName(), tocReader, 2048)) { + for (int i = 0; i < 10; i++) { + final StandardProvenanceEventRecord recovered = reader.nextRecord(); + System.out.println(recovered); + assertNotNull(recovered); + assertEquals(i, recovered.getEventId()); + assertEquals("nifi://unit-test", recovered.getTransitUri()); + + final Map updatedAttrs = recovered.getUpdatedAttributes(); + assertNotNull(updatedAttrs); + assertEquals(2, updatedAttrs.size()); + assertEquals("1.txt", updatedAttrs.get("filename")); + assertTrue(updatedAttrs.containsKey("uuid")); + } + + assertNull(reader.nextRecord()); + } + + FileUtils.deleteFile(journalFile.getParentFile(), true); + } + + protected abstract RecordWriter createWriter(File file, TocWriter tocWriter, boolean compressed, int uncompressedBlockSize) throws IOException; + + protected abstract RecordReader createReader(InputStream in, String journalFilename, TocReader tocReader, int maxAttributeSize) throws IOException; +} diff --git a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/test/java/org/apache/nifi/provenance/LoopingInputStream.java b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/test/java/org/apache/nifi/provenance/LoopingInputStream.java new file mode 100644 index 0000000000..b6a0bfa44b --- /dev/null +++ b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/test/java/org/apache/nifi/provenance/LoopingInputStream.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.provenance; + +import java.io.IOException; +import java.io.InputStream; + +public class LoopingInputStream extends InputStream { + + private final byte[] buffer; + private int index; + + private final byte[] header; + private int headerIndex; + private boolean headerComplete = false; + + public LoopingInputStream(final byte[] header, final byte[] toRepeat) { + this.header = header; + this.buffer = toRepeat; + } + + @Override + public int read(byte[] b) throws IOException { + return read(b, 0, b.length); + } + + @Override + public int read() throws IOException { + if (headerComplete) { + final byte nextByte = buffer[index++]; + if (index >= buffer.length) { + index = 0; + } + + final int returnValue = nextByte & 0xFF; + return returnValue; + } else { + final byte nextByte = header[headerIndex++]; + if (headerIndex >= header.length) { + headerComplete = true; + } + + final int returnValue = nextByte & 0xFF; + return returnValue; + } + } + + @Override + public int read(final byte[] b, final int off, final int len) throws IOException { + if (headerComplete) { + final int toRead = Math.min(len, buffer.length - index); + System.arraycopy(buffer, index, b, off, toRead); + index += toRead; + if (index >= buffer.length) { + index = 0; + } + + return toRead; + } else { + final int toRead = Math.min(len, header.length - headerIndex); + System.arraycopy(header, headerIndex, b, off, toRead); + headerIndex += toRead; + if (headerIndex >= header.length) { + headerComplete = true; + } + + return toRead; + } + } + + @Override + public int available() throws IOException { + return 1; + } + + @Override + public void close() throws IOException { + } +} diff --git a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/test/java/org/apache/nifi/provenance/TestPersistentProvenanceRepository.java b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/test/java/org/apache/nifi/provenance/TestPersistentProvenanceRepository.java index 80a67ebd4b..dec2a7bc62 100644 --- a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/test/java/org/apache/nifi/provenance/TestPersistentProvenanceRepository.java +++ b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/test/java/org/apache/nifi/provenance/TestPersistentProvenanceRepository.java @@ -38,7 +38,6 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; @@ -86,9 +85,6 @@ import org.junit.Ignore; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TestName; -import org.mockito.Mockito; -import org.mockito.invocation.InvocationOnMock; -import org.mockito.stubbing.Answer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -1161,6 +1157,7 @@ public class TestPersistentProvenanceRepository { } @Test + @Ignore("This test relies too much on timing of background events by using Thread.sleep().") public void testIndexDirectoryRemoved() throws InterruptedException, IOException, ParseException { final RepositoryConfiguration config = createConfiguration(); config.setMaxRecordLife(5, TimeUnit.MINUTES); @@ -1198,6 +1195,10 @@ public class TestPersistentProvenanceRepository { Thread.sleep(2000L); + final FileFilter indexFileFilter = file -> file.getName().startsWith("index"); + final int numIndexDirs = config.getStorageDirectories().get(0).listFiles(indexFileFilter).length; + assertEquals(1, numIndexDirs); + // add more records so that we will create a new index final long secondBatchStartTime = System.currentTimeMillis(); for (int i = 0; i < 10; i++) { @@ -1221,12 +1222,6 @@ public class TestPersistentProvenanceRepository { assertEquals(20, result.getMatchingEvents().size()); // Ensure index directories exists - final FileFilter indexFileFilter = new FileFilter() { - @Override - public boolean accept(File pathname) { - return pathname.getName().startsWith("index"); - } - }; File[] indexDirs = config.getStorageDirectories().get(0).listFiles(indexFileFilter); assertEquals(2, indexDirs.length); @@ -1777,8 +1772,12 @@ public class TestPersistentProvenanceRepository { repo = new PersistentProvenanceRepository(config, DEFAULT_ROLLOVER_MILLIS); repo.initialize(getEventReporter(), null, null); + final String maxLengthChars = "12345678901234567890123456789012345678901234567890"; final Map attributes = new HashMap<>(); attributes.put("75chars", "123456789012345678901234567890123456789012345678901234567890123456789012345"); + attributes.put("51chars", "123456789012345678901234567890123456789012345678901"); + attributes.put("50chars", "12345678901234567890123456789012345678901234567890"); + attributes.put("49chars", "1234567890123456789012345678901234567890123456789"); attributes.put("nullChar", null); final ProvenanceEventBuilder builder = new StandardProvenanceEventRecord.Builder(); @@ -1797,11 +1796,14 @@ public class TestPersistentProvenanceRepository { final ProvenanceEventRecord retrieved = repo.getEvent(0L, null); assertNotNull(retrieved); assertEquals("12345678-0000-0000-0000-012345678912", retrieved.getAttributes().get("uuid")); - assertEquals("12345678901234567890123456789012345678901234567890", retrieved.getAttributes().get("75chars")); + assertEquals(maxLengthChars, retrieved.getAttributes().get("75chars")); + assertEquals(maxLengthChars, retrieved.getAttributes().get("51chars")); + assertEquals(maxLengthChars, retrieved.getAttributes().get("50chars")); + assertEquals(maxLengthChars.substring(0, 49), retrieved.getAttributes().get("49chars")); } - @Test(timeout=5000) + @Test(timeout = 15000) public void testExceptionOnIndex() throws IOException { final RepositoryConfiguration config = createConfiguration(); config.setMaxAttributeChars(50); @@ -1914,112 +1916,6 @@ public class TestPersistentProvenanceRepository { } - @Test - public void testBehaviorOnOutOfMemory() throws IOException, InterruptedException { - final RepositoryConfiguration config = createConfiguration(); - config.setMaxEventFileLife(3, TimeUnit.MINUTES); - config.setJournalCount(4); - - // Create a repository that overrides the createWriters() method so that we can return writers that will throw - // OutOfMemoryError where we want to - final AtomicBoolean causeOOME = new AtomicBoolean(false); - repo = new PersistentProvenanceRepository(config, DEFAULT_ROLLOVER_MILLIS) { - @Override - protected RecordWriter[] createWriters(RepositoryConfiguration config, long initialRecordId) throws IOException { - final RecordWriter[] recordWriters = super.createWriters(config, initialRecordId); - - // Spy on each of the writers so that a call to writeUUID throws an OutOfMemoryError if we set the - // causeOOME flag to true - final StandardRecordWriter[] spiedWriters = new StandardRecordWriter[recordWriters.length]; - for (int i = 0; i < recordWriters.length; i++) { - final StandardRecordWriter writer = (StandardRecordWriter) recordWriters[i]; - - spiedWriters[i] = Mockito.spy(writer); - Mockito.doAnswer(new Answer() { - @Override - public Object answer(final InvocationOnMock invocation) throws Throwable { - if (causeOOME.get()) { - throw new OutOfMemoryError(); - } else { - writer.writeUUID(invocation.getArgumentAt(0, DataOutputStream.class), invocation.getArgumentAt(1, String.class)); - } - return null; - } - }).when(spiedWriters[i]).writeUUID(Mockito.any(DataOutputStream.class), Mockito.any(String.class)); - } - - // return the writers that we are spying on - return spiedWriters; - } - }; - repo.initialize(getEventReporter(), null, null); - - final Map attributes = new HashMap<>(); - attributes.put("75chars", "123456789012345678901234567890123456789012345678901234567890123456789012345"); - - final ProvenanceEventBuilder builder = new StandardProvenanceEventRecord.Builder(); - builder.setEventTime(System.currentTimeMillis()); - builder.setEventType(ProvenanceEventType.RECEIVE); - builder.setTransitUri("nifi://unit-test"); - attributes.put("uuid", "12345678-0000-0000-0000-012345678912"); - builder.fromFlowFile(createFlowFile(3L, 3000L, attributes)); - builder.setComponentId("1234"); - builder.setComponentType("dummy processor"); - - // first make sure that we are able to write to the repo successfully. - for (int i = 0; i < 4; i++) { - final ProvenanceEventRecord record = builder.build(); - repo.registerEvent(record); - } - - // cause OOME to occur - causeOOME.set(true); - - // write 4 times to make sure that we mark all partitions as dirty - for (int i = 0; i < 4; i++) { - final ProvenanceEventRecord record = builder.build(); - try { - repo.registerEvent(record); - Assert.fail("Expected OutOfMemoryError but was able to register event"); - } catch (final OutOfMemoryError oome) { - } - } - - // now that all partitions are dirty, ensure that as we keep trying to write, we get an IllegalStateException - // and that we don't corrupt the repository by writing partial records - for (int i = 0; i < 8; i++) { - final ProvenanceEventRecord record = builder.build(); - try { - repo.registerEvent(record); - Assert.fail("Expected OutOfMemoryError but was able to register event"); - } catch (final IllegalStateException ise) { - } - } - - // close repo so that we can create a new one to recover records - repo.close(); - - // make sure we can recover - final PersistentProvenanceRepository recoveryRepo = new PersistentProvenanceRepository(config, DEFAULT_ROLLOVER_MILLIS) { - @Override - protected Set recoverJournalFiles() throws IOException { - try { - return super.recoverJournalFiles(); - } catch (final IOException ioe) { - Assert.fail("Failed to recover properly"); - return null; - } - } - }; - - try { - recoveryRepo.initialize(getEventReporter(), null, null); - } finally { - recoveryRepo.close(); - } - } - - private static class ReportedEvent { private final Severity severity; private final String category; diff --git a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/test/java/org/apache/nifi/provenance/TestSchemaRecordReaderWriter.java b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/test/java/org/apache/nifi/provenance/TestSchemaRecordReaderWriter.java new file mode 100644 index 0000000000..b9bb85ecd8 --- /dev/null +++ b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/test/java/org/apache/nifi/provenance/TestSchemaRecordReaderWriter.java @@ -0,0 +1,315 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.provenance; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.UUID; +import java.util.concurrent.TimeUnit; +import java.util.function.Consumer; + +import org.apache.nifi.provenance.schema.EventRecord; +import org.apache.nifi.provenance.schema.EventRecordFields; +import org.apache.nifi.provenance.schema.ProvenanceEventSchema; +import org.apache.nifi.provenance.serialization.RecordReader; +import org.apache.nifi.provenance.serialization.RecordWriter; +import org.apache.nifi.provenance.toc.NopTocWriter; +import org.apache.nifi.provenance.toc.StandardTocReader; +import org.apache.nifi.provenance.toc.StandardTocWriter; +import org.apache.nifi.provenance.toc.TocReader; +import org.apache.nifi.provenance.toc.TocUtil; +import org.apache.nifi.provenance.toc.TocWriter; +import org.apache.nifi.repository.schema.FieldMapRecord; +import org.apache.nifi.repository.schema.FieldType; +import org.apache.nifi.repository.schema.Record; +import org.apache.nifi.repository.schema.RecordField; +import org.apache.nifi.repository.schema.RecordSchema; +import org.apache.nifi.repository.schema.Repetition; +import org.apache.nifi.repository.schema.SimpleRecordField; +import org.apache.nifi.stream.io.DataOutputStream; +import org.apache.nifi.stream.io.NullOutputStream; +import org.junit.Before; +import org.junit.Ignore; +import org.junit.Test; + +public class TestSchemaRecordReaderWriter extends AbstractTestRecordReaderWriter { + + private File journalFile; + private File tocFile; + + @Before + public void setup() { + journalFile = new File("target/storage/" + UUID.randomUUID().toString() + "/testFieldAddedToSchema"); + tocFile = TocUtil.getTocFile(journalFile); + } + + + @Test + public void testFieldAddedToSchema() throws IOException { + final RecordField unitTestField = new SimpleRecordField("Unit Test Field", FieldType.STRING, Repetition.EXACTLY_ONE); + final Consumer> schemaModifier = fields -> fields.add(unitTestField); + + final Map toAdd = new HashMap<>(); + toAdd.put(unitTestField, "hello"); + + try (final ByteArraySchemaRecordWriter writer = createSchemaWriter(schemaModifier, toAdd)) { + writer.writeHeader(1L); + writer.writeRecord(createEvent(), 3L); + writer.writeRecord(createEvent(), 3L); + } + + try (final InputStream in = new FileInputStream(journalFile); + final TocReader tocReader = new StandardTocReader(tocFile); + final RecordReader reader = createReader(in, journalFile.getName(), tocReader, 10000)) { + + for (int i = 0; i < 2; i++) { + final StandardProvenanceEventRecord event = reader.nextRecord(); + assertNotNull(event); + assertEquals(3L, event.getEventId()); + assertEquals("1234", event.getComponentId()); + assertEquals(ProvenanceEventType.RECEIVE, event.getEventType()); + + assertNotNull(event.getUpdatedAttributes()); + assertFalse(event.getUpdatedAttributes().isEmpty()); + } + } + } + + @Test + public void testFieldRemovedFromSchema() throws IOException { + final TocWriter tocWriter = new StandardTocWriter(tocFile, false, false); + try { + // Create a schema that has the fields modified + final RecordSchema schemaV1 = ProvenanceEventSchema.PROVENANCE_EVENT_SCHEMA_V1; + final List fields = new ArrayList<>(schemaV1.getFields()); + fields.remove(new SimpleRecordField(EventRecordFields.Names.UPDATED_ATTRIBUTES, FieldType.STRING, Repetition.EXACTLY_ONE)); + fields.remove(new SimpleRecordField(EventRecordFields.Names.PREVIOUS_ATTRIBUTES, FieldType.STRING, Repetition.EXACTLY_ONE)); + final RecordSchema recordSchema = new RecordSchema(fields); + + // Create a record writer whose schema does not contain updated attributes or previous attributes. + // This means that we must also override the method that writes out attributes so that we are able + // to avoid actually writing them out. + final ByteArraySchemaRecordWriter writer = new ByteArraySchemaRecordWriter(journalFile, tocWriter, false, 0) { + @Override + public void writeHeader(long firstEventId, DataOutputStream out) throws IOException { + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + recordSchema.writeTo(baos); + + out.writeInt(baos.size()); + baos.writeTo(out); + } + + @Override + protected Record createRecord(final ProvenanceEventRecord event, final long eventId) { + final RecordSchema contentClaimSchema = new RecordSchema(recordSchema.getField(EventRecordFields.Names.CONTENT_CLAIM).getSubFields()); + return new EventRecord(event, eventId, recordSchema, contentClaimSchema); + } + }; + + try { + writer.writeHeader(1L); + writer.writeRecord(createEvent(), 3L); + writer.writeRecord(createEvent(), 3L); + } finally { + writer.close(); + } + } finally { + tocWriter.close(); + } + + // Read the records in and make sure that they have the info that we expect. + try (final InputStream in = new FileInputStream(journalFile); + final TocReader tocReader = new StandardTocReader(tocFile); + final RecordReader reader = createReader(in, journalFile.getName(), tocReader, 10000)) { + + for (int i = 0; i < 2; i++) { + final StandardProvenanceEventRecord event = reader.nextRecord(); + assertNotNull(event); + assertEquals(3L, event.getEventId()); + assertEquals(ProvenanceEventType.RECEIVE, event.getEventType()); + + // We will still have a Map for updated attributes because the + // Provenance Event Builder will create an empty map. + assertNotNull(event.getUpdatedAttributes()); + assertTrue(event.getUpdatedAttributes().isEmpty()); + } + } + } + + + /** + * Creates a SchemaRecordWriter that uses a modified schema + * + * @param fieldModifier the callback for modifying the schema + * @return a SchemaRecordWriter that uses the modified schema + * @throws IOException if unable to create the writer + */ + private ByteArraySchemaRecordWriter createSchemaWriter(final Consumer> fieldModifier, final Map fieldsToAdd) throws IOException { + final TocWriter tocWriter = new StandardTocWriter(tocFile, false, false); + + // Create a schema that has the fields modified + final RecordSchema schemaV1 = ProvenanceEventSchema.PROVENANCE_EVENT_SCHEMA_V1; + final List fields = new ArrayList<>(schemaV1.getFields()); + fieldModifier.accept(fields); + + final RecordSchema recordSchema = new RecordSchema(fields); + final RecordSchema contentClaimSchema = new RecordSchema(recordSchema.getField(EventRecordFields.Names.CONTENT_CLAIM).getSubFields()); + + final ByteArraySchemaRecordWriter writer = new ByteArraySchemaRecordWriter(journalFile, tocWriter, false, 0) { + @Override + public void writeHeader(long firstEventId, DataOutputStream out) throws IOException { + final ByteArrayOutputStream baos = new ByteArrayOutputStream(); + recordSchema.writeTo(baos); + + out.writeInt(baos.size()); + baos.writeTo(out); + } + + @Override + protected Record createRecord(final ProvenanceEventRecord event, final long eventId) { + final Map values = new HashMap<>(); + + final EventRecord eventRecord = new EventRecord(event, eventId, recordSchema, contentClaimSchema); + for (final RecordField field : recordSchema.getFields()) { + final Object value = eventRecord.getFieldValue(field); + values.put(field, value); + } + + values.putAll(fieldsToAdd); + return new FieldMapRecord(values, recordSchema); + } + }; + + return writer; + } + + @Test + @Ignore("For local testing only") + public void testWritePerformance() throws IOException { + // This is a simple micro-benchmarking test so that we can determine how fast the serialization/deserialization is before + // making significant changes. This allows us to ensure that changes that we make do not have significant adverse effects + // on performance of the repository. + final ProvenanceEventRecord event = createEvent(); + + final TocWriter tocWriter = new NopTocWriter(); + + final int numEvents = 10_000_000; + final long startNanos = System.nanoTime(); + try (final OutputStream nullOut = new NullOutputStream(); + final RecordWriter writer = new ByteArraySchemaRecordWriter(nullOut, tocWriter, false, 0)) { + + writer.writeHeader(0L); + + for (int i = 0; i < numEvents; i++) { + writer.writeRecord(event, i); + } + + } + + final long nanos = System.nanoTime() - startNanos; + final long millis = TimeUnit.NANOSECONDS.toMillis(nanos); + System.out.println("Took " + millis + " millis to write " + numEvents + " events"); + } + + + @Test + @Ignore("For local performance testing only") + public void testReadPerformance() throws IOException, InterruptedException { + // This is a simple micro-benchmarking test so that we can determine how fast the serialization/deserialization is before + // making significant changes. This allows us to ensure that changes that we make do not have significant adverse effects + // on performance of the repository. + final ProvenanceEventRecord event = createEvent(); + + final TocReader tocReader = null; + + final byte[] header; + try (final ByteArrayOutputStream headerOut = new ByteArrayOutputStream(); + final DataOutputStream out = new DataOutputStream(headerOut)) { + + final RecordWriter schemaWriter = new ByteArraySchemaRecordWriter(out, null, false, 0); + schemaWriter.writeHeader(1L); + + header = headerOut.toByteArray(); + } + + final byte[] serializedRecord; + try (final ByteArrayOutputStream headerOut = new ByteArrayOutputStream(); + final RecordWriter writer = new ByteArraySchemaRecordWriter(headerOut, null, false, 0)) { + + writer.writeHeader(1L); + headerOut.reset(); + + writer.writeRecord(event, 1L); + writer.flush(); + serializedRecord = headerOut.toByteArray(); + } + + final int numEvents = 10_000_000; + final int recordBytes = serializedRecord.length; + final long totalRecordBytes = (long) recordBytes * (long) numEvents; + + final long startNanos = System.nanoTime(); + try (final InputStream in = new LoopingInputStream(header, serializedRecord); + final RecordReader reader = new ByteArraySchemaRecordReader(in, "filename", tocReader, 100000)) { + + for (int i = 0; i < numEvents; i++) { + reader.nextRecord(); + } + } + + final long nanos = System.nanoTime() - startNanos; + final long millis = TimeUnit.NANOSECONDS.toMillis(nanos); + final double seconds = millis / 1000D; + final long bytesPerSecond = (long) (totalRecordBytes / seconds); + final long megaBytesPerSecond = bytesPerSecond / 1024 / 1024; + System.out.println("Took " + millis + " millis to read " + numEvents + " events or " + megaBytesPerSecond + " MB/sec"); + } + + + @Override + protected RecordWriter createWriter(File file, TocWriter tocWriter, boolean compressed, int uncompressedBlockSize) throws IOException { + return new ByteArraySchemaRecordWriter(file, tocWriter, compressed, uncompressedBlockSize); + } + + + @Override + protected RecordReader createReader(InputStream in, String journalFilename, TocReader tocReader, int maxAttributeSize) throws IOException { + final ByteArraySchemaRecordReader reader = new ByteArraySchemaRecordReader(in, journalFilename, tocReader, maxAttributeSize); + return reader; + } + + private static interface WriteRecordInterceptor { + void writeRawRecord(ProvenanceEventRecord event, long recordIdentifier, DataOutputStream out) throws IOException; + } + + private static WriteRecordInterceptor NOP_INTERCEPTOR = (event, id, out) -> {}; + private static WriteRecordInterceptor WRITE_DUMMY_STRING_INTERCEPTOR = (event, id, out) -> out.writeUTF("hello"); +} diff --git a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/test/java/org/apache/nifi/provenance/TestStandardRecordReaderWriter.java b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/test/java/org/apache/nifi/provenance/TestStandardRecordReaderWriter.java index e11502a60b..cc69b186fa 100644 --- a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/test/java/org/apache/nifi/provenance/TestStandardRecordReaderWriter.java +++ b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/test/java/org/apache/nifi/provenance/TestStandardRecordReaderWriter.java @@ -16,174 +16,106 @@ */ package org.apache.nifi.provenance; -import static org.apache.nifi.provenance.TestUtil.createFlowFile; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; - +import java.io.ByteArrayOutputStream; import java.io.File; -import java.io.FileInputStream; import java.io.IOException; -import java.util.HashMap; -import java.util.Map; -import java.util.UUID; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.concurrent.TimeUnit; -import org.apache.nifi.provenance.toc.StandardTocReader; -import org.apache.nifi.provenance.toc.StandardTocWriter; +import org.apache.nifi.provenance.serialization.RecordReader; +import org.apache.nifi.provenance.serialization.RecordWriter; +import org.apache.nifi.provenance.toc.NopTocWriter; import org.apache.nifi.provenance.toc.TocReader; -import org.apache.nifi.provenance.toc.TocUtil; import org.apache.nifi.provenance.toc.TocWriter; -import org.apache.nifi.util.file.FileUtils; -import org.junit.BeforeClass; +import org.apache.nifi.stream.io.DataOutputStream; +import org.apache.nifi.stream.io.NullOutputStream; +import org.junit.Ignore; import org.junit.Test; -public class TestStandardRecordReaderWriter { - @BeforeClass - public static void setLogLevel() { - System.setProperty("org.slf4j.simpleLogger.log.org.apache.nifi.provenance", "DEBUG"); - } - - private ProvenanceEventRecord createEvent() { - final Map attributes = new HashMap<>(); - attributes.put("filename", "1.txt"); - attributes.put("uuid", UUID.randomUUID().toString()); - - final ProvenanceEventBuilder builder = new StandardProvenanceEventRecord.Builder(); - builder.setEventTime(System.currentTimeMillis()); - builder.setEventType(ProvenanceEventType.RECEIVE); - builder.setTransitUri("nifi://unit-test"); - builder.fromFlowFile(createFlowFile(3L, 3000L, attributes)); - builder.setComponentId("1234"); - builder.setComponentType("dummy processor"); - final ProvenanceEventRecord record = builder.build(); - - return record; - } - - @Test - public void testSimpleWriteWithToc() throws IOException { - final File journalFile = new File("target/storage/" + UUID.randomUUID().toString() + "/testSimpleWrite"); - final File tocFile = TocUtil.getTocFile(journalFile); - final TocWriter tocWriter = new StandardTocWriter(tocFile, false, false); - final StandardRecordWriter writer = new StandardRecordWriter(journalFile, tocWriter, false, 1024 * 1024); - - writer.writeHeader(1L); - writer.writeRecord(createEvent(), 1L); - writer.close(); - - final TocReader tocReader = new StandardTocReader(tocFile); - - try (final FileInputStream fis = new FileInputStream(journalFile); - final StandardRecordReader reader = new StandardRecordReader(fis, journalFile.getName(), tocReader, 2048)) { - assertEquals(0, reader.getBlockIndex()); - reader.skipToBlock(0); - final StandardProvenanceEventRecord recovered = reader.nextRecord(); - assertNotNull(recovered); - - assertEquals("nifi://unit-test", recovered.getTransitUri()); - assertNull(reader.nextRecord()); - } - - FileUtils.deleteFile(journalFile.getParentFile(), true); - } +public class TestStandardRecordReaderWriter extends AbstractTestRecordReaderWriter { @Test - public void testSingleRecordCompressed() throws IOException { - final File journalFile = new File("target/storage/" + UUID.randomUUID().toString() + "/testSimpleWrite.gz"); - final File tocFile = TocUtil.getTocFile(journalFile); - final TocWriter tocWriter = new StandardTocWriter(tocFile, false, false); - final StandardRecordWriter writer = new StandardRecordWriter(journalFile, tocWriter, true, 100); + @Ignore("For local testing only") + public void testWritePerformance() throws IOException { + // This is a simple micro-benchmarking test so that we can determine how fast the serialization/deserialization is before + // making significant changes. This allows us to ensure that changes that we make do not have significant adverse effects + // on performance of the repository. + final ProvenanceEventRecord event = createEvent(); - writer.writeHeader(1L); - writer.writeRecord(createEvent(), 1L); - writer.close(); + final TocWriter tocWriter = new NopTocWriter(); - final TocReader tocReader = new StandardTocReader(tocFile); + final int numEvents = 10_000_000; + final long startNanos = System.nanoTime(); + try (final OutputStream nullOut = new NullOutputStream(); + final RecordWriter writer = new StandardRecordWriter(nullOut, tocWriter, false, 100000)) { - try (final FileInputStream fis = new FileInputStream(journalFile); - final StandardRecordReader reader = new StandardRecordReader(fis, journalFile.getName(), tocReader, 2048)) { - assertEquals(0, reader.getBlockIndex()); - reader.skipToBlock(0); - final StandardProvenanceEventRecord recovered = reader.nextRecord(); - assertNotNull(recovered); + writer.writeHeader(0L); - assertEquals("nifi://unit-test", recovered.getTransitUri()); - assertNull(reader.nextRecord()); - } - - FileUtils.deleteFile(journalFile.getParentFile(), true); - } - - - @Test - public void testMultipleRecordsSameBlockCompressed() throws IOException { - final File journalFile = new File("target/storage/" + UUID.randomUUID().toString() + "/testSimpleWrite.gz"); - final File tocFile = TocUtil.getTocFile(journalFile); - final TocWriter tocWriter = new StandardTocWriter(tocFile, false, false); - // new record each 1 MB of uncompressed data - final StandardRecordWriter writer = new StandardRecordWriter(journalFile, tocWriter, true, 1024 * 1024); - - writer.writeHeader(1L); - for (int i=0; i < 10; i++) { - writer.writeRecord(createEvent(), i); - } - writer.close(); - - final TocReader tocReader = new StandardTocReader(tocFile); - - try (final FileInputStream fis = new FileInputStream(journalFile); - final StandardRecordReader reader = new StandardRecordReader(fis, journalFile.getName(), tocReader, 2048)) { - for (int i=0; i < 10; i++) { - assertEquals(0, reader.getBlockIndex()); - - // call skipToBlock half the time to ensure that we can; avoid calling it - // the other half of the time to ensure that it's okay. - if (i <= 5) { - reader.skipToBlock(0); - } - - final StandardProvenanceEventRecord recovered = reader.nextRecord(); - assertNotNull(recovered); - assertEquals("nifi://unit-test", recovered.getTransitUri()); + for (int i = 0; i < numEvents; i++) { + writer.writeRecord(event, i); } - - assertNull(reader.nextRecord()); } - FileUtils.deleteFile(journalFile.getParentFile(), true); + final long nanos = System.nanoTime() - startNanos; + final long millis = TimeUnit.NANOSECONDS.toMillis(nanos); + System.out.println("Took " + millis + " millis to write " + numEvents + " events"); } - @Test - public void testMultipleRecordsMultipleBlocksCompressed() throws IOException { - final File journalFile = new File("target/storage/" + UUID.randomUUID().toString() + "/testSimpleWrite.gz"); - final File tocFile = TocUtil.getTocFile(journalFile); - final TocWriter tocWriter = new StandardTocWriter(tocFile, false, false); - // new block each 10 bytes - final StandardRecordWriter writer = new StandardRecordWriter(journalFile, tocWriter, true, 100); + @Ignore("For local testing only") + public void testReadPerformance() throws IOException { + // This is a simple micro-benchmarking test so that we can determine how fast the serialization/deserialization is before + // making significant changes. This allows us to ensure that changes that we make do not have significant adverse effects + // on performance of the repository. + final ProvenanceEventRecord event = createEvent(); - writer.writeHeader(1L); - for (int i=0; i < 10; i++) { - writer.writeRecord(createEvent(), i); + final TocReader tocReader = null; + + final byte[] header; + try (final ByteArrayOutputStream headerOut = new ByteArrayOutputStream(); + final DataOutputStream out = new DataOutputStream(headerOut)) { + out.writeUTF(PersistentProvenanceRepository.class.getName()); + out.writeInt(9); + header = headerOut.toByteArray(); } - writer.close(); - final TocReader tocReader = new StandardTocReader(tocFile); + final byte[] serializedRecord; + try (final ByteArrayOutputStream headerOut = new ByteArrayOutputStream(); + final StandardRecordWriter writer = new StandardRecordWriter(headerOut, null, false, 0)) { - try (final FileInputStream fis = new FileInputStream(journalFile); - final StandardRecordReader reader = new StandardRecordReader(fis, journalFile.getName(), tocReader, 2048)) { - for (int i=0; i < 10; i++) { - final StandardProvenanceEventRecord recovered = reader.nextRecord(); - System.out.println(recovered); - assertNotNull(recovered); - assertEquals(i, recovered.getEventId()); - assertEquals("nifi://unit-test", recovered.getTransitUri()); + writer.writeHeader(1L); + headerOut.reset(); + + writer.writeRecord(event, 1L); + writer.flush(); + serializedRecord = headerOut.toByteArray(); + } + + final int numEvents = 10_000_000; + final long startNanos = System.nanoTime(); + try (final InputStream in = new LoopingInputStream(header, serializedRecord); + final RecordReader reader = new StandardRecordReader(in, "filename", tocReader, 100000)) { + + for (int i = 0; i < numEvents; i++) { + reader.nextRecord(); } - - assertNull(reader.nextRecord()); } - FileUtils.deleteFile(journalFile.getParentFile(), true); + final long nanos = System.nanoTime() - startNanos; + final long millis = TimeUnit.NANOSECONDS.toMillis(nanos); + System.out.println("Took " + millis + " millis to read " + numEvents + " events"); } + + @Override + protected RecordWriter createWriter(File file, TocWriter tocWriter, boolean compressed, int uncompressedBlockSize) throws IOException { + return new StandardRecordWriter(file, tocWriter, compressed, uncompressedBlockSize); + } + + @Override + protected RecordReader createReader(InputStream in, String journalFilename, TocReader tocReader, int maxAttributeSize) throws IOException { + return new StandardRecordReader(in, journalFilename, tocReader, maxAttributeSize); + } + } diff --git a/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/test/java/org/apache/nifi/provenance/toc/NopTocWriter.java b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/test/java/org/apache/nifi/provenance/toc/NopTocWriter.java new file mode 100644 index 0000000000..edf62d8504 --- /dev/null +++ b/nifi-nar-bundles/nifi-provenance-repository-bundle/nifi-persistent-provenance-repository/src/test/java/org/apache/nifi/provenance/toc/NopTocWriter.java @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nifi.provenance.toc; + +import java.io.File; +import java.io.IOException; + +public class NopTocWriter implements TocWriter { + private int blockIndex; + + @Override + public void close() throws IOException { + } + + @Override + public void addBlockOffset(long offset, long firstEventId) throws IOException { + blockIndex++; + } + + @Override + public int getCurrentBlockIndex() { + return blockIndex; + } + + @Override + public File getFile() { + return null; + } + + @Override + public void sync() throws IOException { + } + +} diff --git a/pom.xml b/pom.xml index f6eed44230..d19dba0278 100644 --- a/pom.xml +++ b/pom.xml @@ -1305,6 +1305,11 @@ language governing permissions and limitations under the License. --> nifi-processor-utils 1.1.0-SNAPSHOT + + org.apache.nifi + nifi-schema-utils + 1.1.0-SNAPSHOT + org.apache.nifi nifi-hadoop-utils