mirror of https://github.com/apache/nifi.git
NIFI-1617 Add source filename metadata to IdentifyMimeType
Signed-off-by: Matt Burgess <mattyb149@apache.org>
This commit is contained in:
parent
8f40d2b181
commit
3a4546c08a
|
@ -1,13 +1,13 @@
|
||||||
<?xml version="1.0"?>
|
<?xml version="1.0"?>
|
||||||
<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
|
<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
|
||||||
license agreements. See the NOTICE file distributed with this work for additional
|
license agreements. See the NOTICE file distributed with this work for additional
|
||||||
information regarding copyright ownership. The ASF licenses this file to
|
information regarding copyright ownership. The ASF licenses this file to
|
||||||
You under the Apache License, Version 2.0 (the "License"); you may not use
|
You under the Apache License, Version 2.0 (the "License"); you may not use
|
||||||
this file except in compliance with the License. You may obtain a copy of
|
this file except in compliance with the License. You may obtain a copy of
|
||||||
the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
|
the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
|
||||||
by applicable law or agreed to in writing, software distributed under the
|
by applicable law or agreed to in writing, software distributed under the
|
||||||
License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
|
License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
|
||||||
OF ANY KIND, either express or implied. See the License for the specific
|
OF ANY KIND, either express or implied. See the License for the specific
|
||||||
language governing permissions and limitations under the License. -->
|
language governing permissions and limitations under the License. -->
|
||||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
<modelVersion>4.0.0</modelVersion>
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
@ -222,7 +222,7 @@ language governing permissions and limitations under the License. -->
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
</dependencies>
|
</dependencies>
|
||||||
|
|
||||||
<build>
|
<build>
|
||||||
<plugins>
|
<plugins>
|
||||||
<plugin>
|
<plugin>
|
||||||
|
@ -244,6 +244,7 @@ language governing permissions and limitations under the License. -->
|
||||||
<exclude>src/test/resources/TestEncryptContent/text.txt</exclude>
|
<exclude>src/test/resources/TestEncryptContent/text.txt</exclude>
|
||||||
<exclude>src/test/resources/TestEncryptContent/text.txt.asc</exclude>
|
<exclude>src/test/resources/TestEncryptContent/text.txt.asc</exclude>
|
||||||
<exclude>src/test/resources/TestIdentifyMimeType/1.txt</exclude>
|
<exclude>src/test/resources/TestIdentifyMimeType/1.txt</exclude>
|
||||||
|
<exclude>src/test/resources/TestIdentifyMimeType/1.csv</exclude>
|
||||||
<exclude>src/test/resources/TestJson/json-sample.json</exclude>
|
<exclude>src/test/resources/TestJson/json-sample.json</exclude>
|
||||||
<exclude>src/test/resources/TestJson/control-characters.json</exclude>
|
<exclude>src/test/resources/TestJson/control-characters.json</exclude>
|
||||||
<exclude>src/test/resources/TestMergeContent/demarcate</exclude>
|
<exclude>src/test/resources/TestMergeContent/demarcate</exclude>
|
||||||
|
|
|
@ -45,6 +45,7 @@ import org.apache.tika.config.TikaConfig;
|
||||||
import org.apache.tika.detect.Detector;
|
import org.apache.tika.detect.Detector;
|
||||||
import org.apache.tika.io.TikaInputStream;
|
import org.apache.tika.io.TikaInputStream;
|
||||||
import org.apache.tika.metadata.Metadata;
|
import org.apache.tika.metadata.Metadata;
|
||||||
|
import org.apache.tika.metadata.TikaMetadataKeys;
|
||||||
import org.apache.tika.mime.MediaType;
|
import org.apache.tika.mime.MediaType;
|
||||||
import org.apache.tika.mime.MimeType;
|
import org.apache.tika.mime.MimeType;
|
||||||
import org.apache.tika.mime.MimeTypeException;
|
import org.apache.tika.mime.MimeTypeException;
|
||||||
|
@ -117,6 +118,7 @@ public class IdentifyMimeType extends AbstractProcessor {
|
||||||
|
|
||||||
final ProcessorLog logger = getLogger();
|
final ProcessorLog logger = getLogger();
|
||||||
final ObjectHolder<String> mimeTypeRef = new ObjectHolder<>(null);
|
final ObjectHolder<String> mimeTypeRef = new ObjectHolder<>(null);
|
||||||
|
final String filename = flowFile.getAttribute(CoreAttributes.FILENAME.key());
|
||||||
|
|
||||||
session.read(flowFile, new InputStreamCallback() {
|
session.read(flowFile, new InputStreamCallback() {
|
||||||
@Override
|
@Override
|
||||||
|
@ -124,6 +126,10 @@ public class IdentifyMimeType extends AbstractProcessor {
|
||||||
try (final InputStream in = new BufferedInputStream(stream)) {
|
try (final InputStream in = new BufferedInputStream(stream)) {
|
||||||
TikaInputStream tikaStream = TikaInputStream.get(in);
|
TikaInputStream tikaStream = TikaInputStream.get(in);
|
||||||
Metadata metadata = new Metadata();
|
Metadata metadata = new Metadata();
|
||||||
|
// Add filename if it exists
|
||||||
|
if (filename != null) {
|
||||||
|
metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, filename);
|
||||||
|
}
|
||||||
// Get mime type
|
// Get mime type
|
||||||
MediaType mediatype = detector.detect(tikaStream, metadata);
|
MediaType mediatype = detector.detect(tikaStream, metadata);
|
||||||
mimeTypeRef.set(mediatype.toString());
|
mimeTypeRef.set(mediatype.toString());
|
||||||
|
|
|
@ -58,6 +58,7 @@ public class TestIdentifyMimeType {
|
||||||
expectedMimeTypes.put("1.7z", "application/x-7z-compressed");
|
expectedMimeTypes.put("1.7z", "application/x-7z-compressed");
|
||||||
expectedMimeTypes.put("1.mdb", "application/x-msaccess");
|
expectedMimeTypes.put("1.mdb", "application/x-msaccess");
|
||||||
expectedMimeTypes.put("1.txt", "text/plain");
|
expectedMimeTypes.put("1.txt", "text/plain");
|
||||||
|
expectedMimeTypes.put("1.csv", "text/csv");
|
||||||
expectedMimeTypes.put("1.txt.bz2", "application/x-bzip2");
|
expectedMimeTypes.put("1.txt.bz2", "application/x-bzip2");
|
||||||
expectedMimeTypes.put("1.txt.gz", "application/gzip");
|
expectedMimeTypes.put("1.txt.gz", "application/gzip");
|
||||||
expectedMimeTypes.put("1.zip", "application/zip");
|
expectedMimeTypes.put("1.zip", "application/zip");
|
||||||
|
@ -76,6 +77,7 @@ public class TestIdentifyMimeType {
|
||||||
expectedExtensions.put("1.7z", ".7z");
|
expectedExtensions.put("1.7z", ".7z");
|
||||||
expectedExtensions.put("1.mdb", ".mdb");
|
expectedExtensions.put("1.mdb", ".mdb");
|
||||||
expectedExtensions.put("1.txt", ".txt");
|
expectedExtensions.put("1.txt", ".txt");
|
||||||
|
expectedExtensions.put("1.csv", ".csv");
|
||||||
expectedExtensions.put("1.txt.bz2", ".bz2");
|
expectedExtensions.put("1.txt.bz2", ".bz2");
|
||||||
expectedExtensions.put("1.txt.gz", ".gz");
|
expectedExtensions.put("1.txt.gz", ".gz");
|
||||||
expectedExtensions.put("1.zip", ".zip");
|
expectedExtensions.put("1.zip", ".zip");
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
id,name
|
||||||
|
1,"Jane Smith"
|
|
Loading…
Reference in New Issue