diff --git a/nifi-assembly/NOTICE b/nifi-assembly/NOTICE
index d3b9461f79..46e6cb6344 100644
--- a/nifi-assembly/NOTICE
+++ b/nifi-assembly/NOTICE
@@ -286,8 +286,23 @@ The following binary components are provided under the Apache Software License v
(ASLv2) Apache Tika
The following NOTICE information applies:
- Apache Tika Core
- Copyright 2007-2015 The Apache Software Foundation
+ Apache Tika
+ Copyright 2015 The Apache Software Foundation
+
+ This product includes software developed at
+ The Apache Software Foundation (http://www.apache.org/).
+
+ Copyright 1993-2010 University Corporation for Atmospheric Research/Unidata
+ This software contains code derived from UCAR/Unidata's NetCDF library.
+
+ Tika-server component uses CDDL-licensed dependencies: jersey (http://jersey.java.net/) and
+ Grizzly (http://grizzly.java.net/)
+
+ Tika-parsers component uses CDDL/LGPL dual-licensed dependency: jhighlight (https://github.com/codelibs/jhighlight)
+
+ OpenCSV: Copyright 2005 Bytecode Pty Ltd. Licensed under the Apache License, Version 2.0
+
+ IPTC Photo Metadata descriptions Copyright 2010 International Press Telecommunications Council.
(ASLv2) Apache Jakarta Commons Digester
The following NOTICE information applies:
diff --git a/nifi-assembly/pom.xml b/nifi-assembly/pom.xml
index 0ae6237e5a..6a75018ebe 100644
--- a/nifi-assembly/pom.xml
+++ b/nifi-assembly/pom.xml
@@ -244,7 +244,7 @@ language governing permissions and limitations under the License. -->
org.apache.nifi
- nifi-image-nar
+ nifi-media-nar
nar
diff --git a/nifi-nar-bundles/nifi-image-bundle/nifi-image-nar/src/main/resources/META-INF/NOTICE b/nifi-nar-bundles/nifi-image-bundle/nifi-image-nar/src/main/resources/META-INF/NOTICE
deleted file mode 100644
index e1c6736437..0000000000
--- a/nifi-nar-bundles/nifi-image-bundle/nifi-image-nar/src/main/resources/META-INF/NOTICE
+++ /dev/null
@@ -1,16 +0,0 @@
-nifi-image-nar
-Copyright 2015-2016 The Apache Software Foundation
-
-This product includes software developed at
-The Apache Software Foundation (http://www.apache.org/).
-
-******************
-Apache Software License v2
-******************
-
-The following binary components are provided under the Apache Software License v2
-
- (ASLv2) Metadata-Extractor
- The following NOTICE information applies:
- Metadata-Extractor
- Copyright 2002-2015 Drew Noakes
diff --git a/nifi-nar-bundles/nifi-image-bundle/nifi-image-viewer/pom.xml b/nifi-nar-bundles/nifi-media-bundle/nifi-image-viewer/pom.xml
similarity index 97%
rename from nifi-nar-bundles/nifi-image-bundle/nifi-image-viewer/pom.xml
rename to nifi-nar-bundles/nifi-media-bundle/nifi-image-viewer/pom.xml
index e5e31c567b..71a6bd7df8 100755
--- a/nifi-nar-bundles/nifi-image-bundle/nifi-image-viewer/pom.xml
+++ b/nifi-nar-bundles/nifi-media-bundle/nifi-image-viewer/pom.xml
@@ -17,7 +17,7 @@
4.0.0
org.apache.nifi
- nifi-image-bundle
+ nifi-media-bundle
1.0.0-SNAPSHOT
nifi-image-viewer
diff --git a/nifi-nar-bundles/nifi-image-bundle/nifi-image-viewer/src/main/java/org/apache/nifi/web/ImageViewerController.java b/nifi-nar-bundles/nifi-media-bundle/nifi-image-viewer/src/main/java/org/apache/nifi/web/ImageViewerController.java
similarity index 100%
rename from nifi-nar-bundles/nifi-image-bundle/nifi-image-viewer/src/main/java/org/apache/nifi/web/ImageViewerController.java
rename to nifi-nar-bundles/nifi-media-bundle/nifi-image-viewer/src/main/java/org/apache/nifi/web/ImageViewerController.java
diff --git a/nifi-nar-bundles/nifi-image-bundle/nifi-image-viewer/src/main/webapp/META-INF/nifi-content-viewer b/nifi-nar-bundles/nifi-media-bundle/nifi-image-viewer/src/main/webapp/META-INF/nifi-content-viewer
similarity index 100%
rename from nifi-nar-bundles/nifi-image-bundle/nifi-image-viewer/src/main/webapp/META-INF/nifi-content-viewer
rename to nifi-nar-bundles/nifi-media-bundle/nifi-image-viewer/src/main/webapp/META-INF/nifi-content-viewer
diff --git a/nifi-nar-bundles/nifi-image-bundle/nifi-image-viewer/src/main/webapp/WEB-INF/jsp/image.jsp b/nifi-nar-bundles/nifi-media-bundle/nifi-image-viewer/src/main/webapp/WEB-INF/jsp/image.jsp
similarity index 100%
rename from nifi-nar-bundles/nifi-image-bundle/nifi-image-viewer/src/main/webapp/WEB-INF/jsp/image.jsp
rename to nifi-nar-bundles/nifi-media-bundle/nifi-image-viewer/src/main/webapp/WEB-INF/jsp/image.jsp
diff --git a/nifi-nar-bundles/nifi-image-bundle/nifi-image-viewer/src/main/webapp/WEB-INF/web.xml b/nifi-nar-bundles/nifi-media-bundle/nifi-image-viewer/src/main/webapp/WEB-INF/web.xml
similarity index 100%
rename from nifi-nar-bundles/nifi-image-bundle/nifi-image-viewer/src/main/webapp/WEB-INF/web.xml
rename to nifi-nar-bundles/nifi-media-bundle/nifi-image-viewer/src/main/webapp/WEB-INF/web.xml
diff --git a/nifi-nar-bundles/nifi-image-bundle/nifi-image-nar/pom.xml b/nifi-nar-bundles/nifi-media-bundle/nifi-media-nar/pom.xml
similarity index 90%
rename from nifi-nar-bundles/nifi-image-bundle/nifi-image-nar/pom.xml
rename to nifi-nar-bundles/nifi-media-bundle/nifi-media-nar/pom.xml
index fb9dd71053..13819f856b 100644
--- a/nifi-nar-bundles/nifi-image-bundle/nifi-image-nar/pom.xml
+++ b/nifi-nar-bundles/nifi-media-bundle/nifi-media-nar/pom.xml
@@ -18,12 +18,12 @@
org.apache.nifi
- nifi-image-bundle
+ nifi-media-bundle
1.0.0-SNAPSHOT
- nifi-image-nar
- 1.0.0-SNAPSHOT
+ nifi-media-nar
+ 0.7.0-SNAPSHOT
nar
true
@@ -33,7 +33,7 @@
org.apache.nifi
- nifi-image-processors
+ nifi-media-processors
1.0.0-SNAPSHOT
diff --git a/nifi-nar-bundles/nifi-image-bundle/nifi-image-nar/src/main/resources/META-INF/LICENSE b/nifi-nar-bundles/nifi-media-bundle/nifi-media-nar/src/main/resources/META-INF/LICENSE
similarity index 100%
rename from nifi-nar-bundles/nifi-image-bundle/nifi-image-nar/src/main/resources/META-INF/LICENSE
rename to nifi-nar-bundles/nifi-media-bundle/nifi-media-nar/src/main/resources/META-INF/LICENSE
diff --git a/nifi-nar-bundles/nifi-media-bundle/nifi-media-nar/src/main/resources/META-INF/NOTICE b/nifi-nar-bundles/nifi-media-bundle/nifi-media-nar/src/main/resources/META-INF/NOTICE
new file mode 100644
index 0000000000..95481c9e64
--- /dev/null
+++ b/nifi-nar-bundles/nifi-media-bundle/nifi-media-nar/src/main/resources/META-INF/NOTICE
@@ -0,0 +1,35 @@
+nifi-media-nar
+Copyright 2015-2016 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+******************
+Apache Software License v2
+******************
+
+The following binary components are provided under the Apache Software License v2
+
+ (ASLv2) Metadata-Extractor
+ The following NOTICE information applies:
+ Metadata-Extractor
+ Copyright 2002-2015 Drew Noakes
+
+ (ASLv2) Apache Tika
+ Apache Tika
+ Copyright 2015 The Apache Software Foundation
+
+ This product includes software developed at
+ The Apache Software Foundation (http://www.apache.org/).
+
+ Copyright 1993-2010 University Corporation for Atmospheric Research/Unidata
+ This software contains code derived from UCAR/Unidata's NetCDF library.
+
+ Tika-server component uses CDDL-licensed dependencies: jersey (http://jersey.java.net/) and
+ Grizzly (http://grizzly.java.net/)
+
+ Tika-parsers component uses CDDL/LGPL dual-licensed dependency: jhighlight (https://github.com/codelibs/jhighlight)
+
+ OpenCSV: Copyright 2005 Bytecode Pty Ltd. Licensed under the Apache License, Version 2.0
+
+ IPTC Photo Metadata descriptions Copyright 2010 International Press Telecommunications Council.
diff --git a/nifi-nar-bundles/nifi-image-bundle/nifi-image-processors/pom.xml b/nifi-nar-bundles/nifi-media-bundle/nifi-media-processors/pom.xml
similarity index 78%
rename from nifi-nar-bundles/nifi-image-bundle/nifi-image-processors/pom.xml
rename to nifi-nar-bundles/nifi-media-bundle/nifi-media-processors/pom.xml
index 125d50a6e2..929b89bfeb 100644
--- a/nifi-nar-bundles/nifi-image-bundle/nifi-image-processors/pom.xml
+++ b/nifi-nar-bundles/nifi-media-bundle/nifi-media-processors/pom.xml
@@ -18,11 +18,11 @@
org.apache.nifi
- nifi-image-bundle
+ nifi-media-bundle
1.0.0-SNAPSHOT
- nifi-image-processors
+ nifi-media-processors
jar
@@ -42,7 +42,17 @@
com.drewnoakes
metadata-extractor
- 2.7.2
+ 2.8.0
+
+
+ org.apache.tika
+ tika-core
+ 1.8
+
+
+ org.apache.tika
+ tika-parsers
+ 1.8
@@ -54,6 +64,8 @@
src/test/resources/notImage.txt
+ src/test/resources/textFile.txt
+ src/test/resources/textFileBig.txt
diff --git a/nifi-nar-bundles/nifi-image-bundle/nifi-image-processors/src/main/java/org/apache/nifi/processors/image/ExtractImageMetadata.java b/nifi-nar-bundles/nifi-media-bundle/nifi-media-processors/src/main/java/org/apache/nifi/processors/image/ExtractImageMetadata.java
similarity index 100%
rename from nifi-nar-bundles/nifi-image-bundle/nifi-image-processors/src/main/java/org/apache/nifi/processors/image/ExtractImageMetadata.java
rename to nifi-nar-bundles/nifi-media-bundle/nifi-media-processors/src/main/java/org/apache/nifi/processors/image/ExtractImageMetadata.java
diff --git a/nifi-nar-bundles/nifi-image-bundle/nifi-image-processors/src/main/java/org/apache/nifi/processors/image/ResizeImage.java b/nifi-nar-bundles/nifi-media-bundle/nifi-media-processors/src/main/java/org/apache/nifi/processors/image/ResizeImage.java
similarity index 100%
rename from nifi-nar-bundles/nifi-image-bundle/nifi-image-processors/src/main/java/org/apache/nifi/processors/image/ResizeImage.java
rename to nifi-nar-bundles/nifi-media-bundle/nifi-media-processors/src/main/java/org/apache/nifi/processors/image/ResizeImage.java
diff --git a/nifi-nar-bundles/nifi-media-bundle/nifi-media-processors/src/main/java/org/apache/nifi/processors/media/ExtractMediaMetadata.java b/nifi-nar-bundles/nifi-media-bundle/nifi-media-processors/src/main/java/org/apache/nifi/processors/media/ExtractMediaMetadata.java
new file mode 100644
index 0000000000..ab7e6ed146
--- /dev/null
+++ b/nifi-nar-bundles/nifi-media-bundle/nifi-media-processors/src/main/java/org/apache/nifi/processors/media/ExtractMediaMetadata.java
@@ -0,0 +1,251 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nifi.processors.media;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.regex.Pattern;
+
+import org.apache.nifi.annotation.behavior.InputRequirement;
+import org.apache.nifi.annotation.behavior.InputRequirement.Requirement;
+import org.apache.nifi.annotation.behavior.SupportsBatching;
+import org.apache.nifi.annotation.behavior.WritesAttribute;
+import org.apache.nifi.annotation.behavior.WritesAttributes;
+import org.apache.nifi.annotation.documentation.CapabilityDescription;
+import org.apache.nifi.annotation.documentation.Tags;
+import org.apache.nifi.annotation.lifecycle.OnScheduled;
+import org.apache.nifi.components.PropertyDescriptor;
+import org.apache.nifi.flowfile.FlowFile;
+import org.apache.nifi.logging.ComponentLog;
+import org.apache.nifi.processor.AbstractProcessor;
+import org.apache.nifi.processor.ProcessContext;
+import org.apache.nifi.processor.ProcessSession;
+import org.apache.nifi.processor.ProcessorInitializationContext;
+import org.apache.nifi.processor.Relationship;
+import org.apache.nifi.processor.exception.ProcessException;
+import org.apache.nifi.processor.io.InputStreamCallback;
+import org.apache.nifi.processor.util.StandardValidators;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.AutoDetectParser;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+@InputRequirement(Requirement.INPUT_REQUIRED)
+@Tags({"media", "file", "format", "metadata", "audio", "video", "image", "document", "pdf"})
+@CapabilityDescription("Extract the content metadata from flowfiles containing audio, video, image, and other file "
+ + "types. This processor relies on the Apache Tika project for file format detection and parsing. It "
+ + "extracts a long list of metadata types for media files including audio, video, and print media "
+ + "formats."
+ + "NOTE: the attribute names and content extracted may vary across upgrades because parsing is performed by "
+ + "the external Tika tools which in turn depend on other projects for metadata extraction. For the more "
+ + "details and the list of supported file types, visit the library's website at http://tika.apache.org/.")
+@WritesAttributes({@WritesAttribute(attribute = "", description = "The extracted content metadata "
+ + "will be inserted with the attribute name \"\", or \"\" if "
+ + "\"Metadata Key Prefix\" is not provided.")})
+@SupportsBatching
+public class ExtractMediaMetadata extends AbstractProcessor {
+
+ static final PropertyDescriptor MAX_NUMBER_OF_ATTRIBUTES = new PropertyDescriptor.Builder()
+ .name("Max Number of Attributes")
+ .description("Specify the max number of attributes to add to the flowfile. There is no guarantee in what order"
+ + " the tags will be processed. By default it will process all of them.")
+ .required(false)
+ .defaultValue("100")
+ .addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR)
+ .build();
+
+ private static final PropertyDescriptor MAX_ATTRIBUTE_LENGTH = new PropertyDescriptor.Builder()
+ .name("Max Attribute Length")
+ .description("Specifies the maximum length of a single attribute value. When a metadata item has multiple"
+ + " values, they will be merged until this length is reached and then \", ...\" will be added as"
+ + " an indicator that additional values where dropped. If a single value is longer than this, it"
+ + " will be truncated and \"(truncated)\" appended to indicate that truncation occurred.")
+ .required(true)
+ .defaultValue("100")
+ .addValidator(StandardValidators.NON_NEGATIVE_INTEGER_VALIDATOR)
+ .build();
+
+ static final PropertyDescriptor METADATA_KEY_FILTER = new PropertyDescriptor.Builder()
+ .name("Metadata Key Filter")
+ .description("A regular expression identifying which metadata keys received from the parser should be"
+ + " added to the flowfile attributes. If left blank, all metadata keys parsed will be added to the"
+ + " flowfile attributes.")
+ .required(false)
+ .addValidator(StandardValidators.REGULAR_EXPRESSION_VALIDATOR)
+ .build();
+
+ static final PropertyDescriptor METADATA_KEY_PREFIX = new PropertyDescriptor.Builder()
+ .name("Metadata Key Prefix")
+ .description("Text to be prefixed to metadata keys as the are added to the flowfile attributes. It is"
+ + " recommended to end with with a separator character like '.' or '-', this is not automatically "
+ + " added by the processor.")
+ .required(false)
+ .addValidator(StandardValidators.ATTRIBUTE_KEY_VALIDATOR)
+ .expressionLanguageSupported(true)
+ .build();
+
+ static final Relationship SUCCESS = new Relationship.Builder()
+ .name("success")
+ .description("Any FlowFile that successfully has media metadata extracted will be routed to success")
+ .build();
+
+ static final Relationship FAILURE = new Relationship.Builder()
+ .name("failure")
+ .description("Any FlowFile that fails to have media metadata extracted will be routed to failure")
+ .build();
+
+ private Set relationships;
+ private List properties;
+
+ private final AtomicReference metadataKeyFilterRef = new AtomicReference<>();
+
+ private volatile AutoDetectParser autoDetectParser;
+
+ @Override
+ protected void init(final ProcessorInitializationContext context) {
+
+ final List properties = new ArrayList<>();
+ properties.add(MAX_NUMBER_OF_ATTRIBUTES);
+ properties.add(MAX_ATTRIBUTE_LENGTH);
+ properties.add(METADATA_KEY_FILTER);
+ properties.add(METADATA_KEY_PREFIX);
+ this.properties = Collections.unmodifiableList(properties);
+
+ final Set relationships = new HashSet<>();
+ relationships.add(SUCCESS);
+ relationships.add(FAILURE);
+ this.relationships = Collections.unmodifiableSet(relationships);
+ }
+
+ @Override
+ public Set getRelationships() {
+ return this.relationships;
+ }
+
+ @Override
+ protected List getSupportedPropertyDescriptors() {
+ return this.properties;
+ }
+
+ @SuppressWarnings("unused")
+ @OnScheduled
+ public void onScheduled(ProcessContext context) {
+ String metadataKeyFilterInput = context.getProperty(METADATA_KEY_FILTER).getValue();
+ if (metadataKeyFilterInput != null && metadataKeyFilterInput.length() > 0) {
+ metadataKeyFilterRef.set(Pattern.compile(metadataKeyFilterInput));
+ } else {
+ metadataKeyFilterRef.set(null);
+ }
+
+ autoDetectParser = new AutoDetectParser();
+ }
+
+ @Override
+ public void onTrigger(final ProcessContext context, final ProcessSession session) throws ProcessException {
+ FlowFile flowFile = session.get();
+ if (flowFile == null) {
+ return;
+ }
+
+ final ComponentLog logger = this.getLogger();
+ final AtomicReference
org.apache.nifi
- nifi-image-nar
+ nifi-media-nar
1.0.0-SNAPSHOT
nar