NIFI-1617 Add source filename metadata to IdentifyMimeType

Signed-off-by: Matt Burgess <mattyb149@apache.org>
This commit is contained in:
Joey Frazee 2016-03-10 16:49:59 -06:00 committed by Matt Burgess
parent 8f40d2b181
commit 3a4546c08a
4 changed files with 21 additions and 10 deletions

View File

@ -1,13 +1,13 @@
<?xml version="1.0"?>
<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
license agreements. See the NOTICE file distributed with this work for additional
information regarding copyright ownership. The ASF licenses this file to
You under the Apache License, Version 2.0 (the "License"); you may not use
this file except in compliance with the License. You may obtain a copy of
the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
by applicable law or agreed to in writing, software distributed under the
License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
OF ANY KIND, either express or implied. See the License for the specific
<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
license agreements. See the NOTICE file distributed with this work for additional
information regarding copyright ownership. The ASF licenses this file to
You under the Apache License, Version 2.0 (the "License"); you may not use
this file except in compliance with the License. You may obtain a copy of
the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
by applicable law or agreed to in writing, software distributed under the
License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
OF ANY KIND, either express or implied. See the License for the specific
language governing permissions and limitations under the License. -->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
@ -222,7 +222,7 @@ language governing permissions and limitations under the License. -->
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
@ -244,6 +244,7 @@ language governing permissions and limitations under the License. -->
<exclude>src/test/resources/TestEncryptContent/text.txt</exclude>
<exclude>src/test/resources/TestEncryptContent/text.txt.asc</exclude>
<exclude>src/test/resources/TestIdentifyMimeType/1.txt</exclude>
<exclude>src/test/resources/TestIdentifyMimeType/1.csv</exclude>
<exclude>src/test/resources/TestJson/json-sample.json</exclude>
<exclude>src/test/resources/TestJson/control-characters.json</exclude>
<exclude>src/test/resources/TestMergeContent/demarcate</exclude>

View File

@ -45,6 +45,7 @@ import org.apache.tika.config.TikaConfig;
import org.apache.tika.detect.Detector;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaMetadataKeys;
import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MimeType;
import org.apache.tika.mime.MimeTypeException;
@ -117,6 +118,7 @@ public class IdentifyMimeType extends AbstractProcessor {
final ProcessorLog logger = getLogger();
final ObjectHolder<String> mimeTypeRef = new ObjectHolder<>(null);
final String filename = flowFile.getAttribute(CoreAttributes.FILENAME.key());
session.read(flowFile, new InputStreamCallback() {
@Override
@ -124,6 +126,10 @@ public class IdentifyMimeType extends AbstractProcessor {
try (final InputStream in = new BufferedInputStream(stream)) {
TikaInputStream tikaStream = TikaInputStream.get(in);
Metadata metadata = new Metadata();
// Add filename if it exists
if (filename != null) {
metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, filename);
}
// Get mime type
MediaType mediatype = detector.detect(tikaStream, metadata);
mimeTypeRef.set(mediatype.toString());

View File

@ -58,6 +58,7 @@ public class TestIdentifyMimeType {
expectedMimeTypes.put("1.7z", "application/x-7z-compressed");
expectedMimeTypes.put("1.mdb", "application/x-msaccess");
expectedMimeTypes.put("1.txt", "text/plain");
expectedMimeTypes.put("1.csv", "text/csv");
expectedMimeTypes.put("1.txt.bz2", "application/x-bzip2");
expectedMimeTypes.put("1.txt.gz", "application/gzip");
expectedMimeTypes.put("1.zip", "application/zip");
@ -76,6 +77,7 @@ public class TestIdentifyMimeType {
expectedExtensions.put("1.7z", ".7z");
expectedExtensions.put("1.mdb", ".mdb");
expectedExtensions.put("1.txt", ".txt");
expectedExtensions.put("1.csv", ".csv");
expectedExtensions.put("1.txt.bz2", ".bz2");
expectedExtensions.put("1.txt.gz", ".gz");
expectedExtensions.put("1.zip", ".zip");

View File

@ -0,0 +1,2 @@
id,name
1,"Jane Smith"
1 id name
2 1 Jane Smith