NIFI-1617 Add source filename metadata to IdentifyMimeType

Signed-off-by: Matt Burgess <mattyb149@apache.org>
This commit is contained in:
Joey Frazee 2016-03-10 16:49:59 -06:00 committed by Matt Burgess
parent 8f40d2b181
commit 3a4546c08a
4 changed files with 21 additions and 10 deletions

View File

@ -1,13 +1,13 @@
<?xml version="1.0"?> <?xml version="1.0"?>
<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
license agreements. See the NOTICE file distributed with this work for additional license agreements. See the NOTICE file distributed with this work for additional
information regarding copyright ownership. The ASF licenses this file to information regarding copyright ownership. The ASF licenses this file to
You under the Apache License, Version 2.0 (the "License"); you may not use You under the Apache License, Version 2.0 (the "License"); you may not use
this file except in compliance with the License. You may obtain a copy of this file except in compliance with the License. You may obtain a copy of
the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
by applicable law or agreed to in writing, software distributed under the by applicable law or agreed to in writing, software distributed under the
License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
OF ANY KIND, either express or implied. See the License for the specific OF ANY KIND, either express or implied. See the License for the specific
language governing permissions and limitations under the License. --> language governing permissions and limitations under the License. -->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
@ -222,7 +222,7 @@ language governing permissions and limitations under the License. -->
<scope>test</scope> <scope>test</scope>
</dependency> </dependency>
</dependencies> </dependencies>
<build> <build>
<plugins> <plugins>
<plugin> <plugin>
@ -244,6 +244,7 @@ language governing permissions and limitations under the License. -->
<exclude>src/test/resources/TestEncryptContent/text.txt</exclude> <exclude>src/test/resources/TestEncryptContent/text.txt</exclude>
<exclude>src/test/resources/TestEncryptContent/text.txt.asc</exclude> <exclude>src/test/resources/TestEncryptContent/text.txt.asc</exclude>
<exclude>src/test/resources/TestIdentifyMimeType/1.txt</exclude> <exclude>src/test/resources/TestIdentifyMimeType/1.txt</exclude>
<exclude>src/test/resources/TestIdentifyMimeType/1.csv</exclude>
<exclude>src/test/resources/TestJson/json-sample.json</exclude> <exclude>src/test/resources/TestJson/json-sample.json</exclude>
<exclude>src/test/resources/TestJson/control-characters.json</exclude> <exclude>src/test/resources/TestJson/control-characters.json</exclude>
<exclude>src/test/resources/TestMergeContent/demarcate</exclude> <exclude>src/test/resources/TestMergeContent/demarcate</exclude>

View File

@ -45,6 +45,7 @@ import org.apache.tika.config.TikaConfig;
import org.apache.tika.detect.Detector; import org.apache.tika.detect.Detector;
import org.apache.tika.io.TikaInputStream; import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaMetadataKeys;
import org.apache.tika.mime.MediaType; import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MimeType; import org.apache.tika.mime.MimeType;
import org.apache.tika.mime.MimeTypeException; import org.apache.tika.mime.MimeTypeException;
@ -117,6 +118,7 @@ public class IdentifyMimeType extends AbstractProcessor {
final ProcessorLog logger = getLogger(); final ProcessorLog logger = getLogger();
final ObjectHolder<String> mimeTypeRef = new ObjectHolder<>(null); final ObjectHolder<String> mimeTypeRef = new ObjectHolder<>(null);
final String filename = flowFile.getAttribute(CoreAttributes.FILENAME.key());
session.read(flowFile, new InputStreamCallback() { session.read(flowFile, new InputStreamCallback() {
@Override @Override
@ -124,6 +126,10 @@ public class IdentifyMimeType extends AbstractProcessor {
try (final InputStream in = new BufferedInputStream(stream)) { try (final InputStream in = new BufferedInputStream(stream)) {
TikaInputStream tikaStream = TikaInputStream.get(in); TikaInputStream tikaStream = TikaInputStream.get(in);
Metadata metadata = new Metadata(); Metadata metadata = new Metadata();
// Add filename if it exists
if (filename != null) {
metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, filename);
}
// Get mime type // Get mime type
MediaType mediatype = detector.detect(tikaStream, metadata); MediaType mediatype = detector.detect(tikaStream, metadata);
mimeTypeRef.set(mediatype.toString()); mimeTypeRef.set(mediatype.toString());

View File

@ -58,6 +58,7 @@ public class TestIdentifyMimeType {
expectedMimeTypes.put("1.7z", "application/x-7z-compressed"); expectedMimeTypes.put("1.7z", "application/x-7z-compressed");
expectedMimeTypes.put("1.mdb", "application/x-msaccess"); expectedMimeTypes.put("1.mdb", "application/x-msaccess");
expectedMimeTypes.put("1.txt", "text/plain"); expectedMimeTypes.put("1.txt", "text/plain");
expectedMimeTypes.put("1.csv", "text/csv");
expectedMimeTypes.put("1.txt.bz2", "application/x-bzip2"); expectedMimeTypes.put("1.txt.bz2", "application/x-bzip2");
expectedMimeTypes.put("1.txt.gz", "application/gzip"); expectedMimeTypes.put("1.txt.gz", "application/gzip");
expectedMimeTypes.put("1.zip", "application/zip"); expectedMimeTypes.put("1.zip", "application/zip");
@ -76,6 +77,7 @@ public class TestIdentifyMimeType {
expectedExtensions.put("1.7z", ".7z"); expectedExtensions.put("1.7z", ".7z");
expectedExtensions.put("1.mdb", ".mdb"); expectedExtensions.put("1.mdb", ".mdb");
expectedExtensions.put("1.txt", ".txt"); expectedExtensions.put("1.txt", ".txt");
expectedExtensions.put("1.csv", ".csv");
expectedExtensions.put("1.txt.bz2", ".bz2"); expectedExtensions.put("1.txt.bz2", ".bz2");
expectedExtensions.put("1.txt.gz", ".gz"); expectedExtensions.put("1.txt.gz", ".gz");
expectedExtensions.put("1.zip", ".zip"); expectedExtensions.put("1.zip", ".zip");

View File

@ -0,0 +1,2 @@
id,name
1,"Jane Smith"
1 id name
2 1 Jane Smith