mirror of https://github.com/apache/nifi.git
NIFI-1617 Add source filename metadata to IdentifyMimeType
Signed-off-by: Matt Burgess <mattyb149@apache.org>
This commit is contained in:
parent
8f40d2b181
commit
3a4546c08a
|
@ -1,13 +1,13 @@
|
|||
<?xml version="1.0"?>
|
||||
<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
|
||||
license agreements. See the NOTICE file distributed with this work for additional
|
||||
information regarding copyright ownership. The ASF licenses this file to
|
||||
You under the Apache License, Version 2.0 (the "License"); you may not use
|
||||
this file except in compliance with the License. You may obtain a copy of
|
||||
the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
|
||||
by applicable law or agreed to in writing, software distributed under the
|
||||
License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
|
||||
OF ANY KIND, either express or implied. See the License for the specific
|
||||
<!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor
|
||||
license agreements. See the NOTICE file distributed with this work for additional
|
||||
information regarding copyright ownership. The ASF licenses this file to
|
||||
You under the Apache License, Version 2.0 (the "License"); you may not use
|
||||
this file except in compliance with the License. You may obtain a copy of
|
||||
the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required
|
||||
by applicable law or agreed to in writing, software distributed under the
|
||||
License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
|
||||
OF ANY KIND, either express or implied. See the License for the specific
|
||||
language governing permissions and limitations under the License. -->
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
@ -222,7 +222,7 @@ language governing permissions and limitations under the License. -->
|
|||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
|
@ -244,6 +244,7 @@ language governing permissions and limitations under the License. -->
|
|||
<exclude>src/test/resources/TestEncryptContent/text.txt</exclude>
|
||||
<exclude>src/test/resources/TestEncryptContent/text.txt.asc</exclude>
|
||||
<exclude>src/test/resources/TestIdentifyMimeType/1.txt</exclude>
|
||||
<exclude>src/test/resources/TestIdentifyMimeType/1.csv</exclude>
|
||||
<exclude>src/test/resources/TestJson/json-sample.json</exclude>
|
||||
<exclude>src/test/resources/TestJson/control-characters.json</exclude>
|
||||
<exclude>src/test/resources/TestMergeContent/demarcate</exclude>
|
||||
|
|
|
@ -45,6 +45,7 @@ import org.apache.tika.config.TikaConfig;
|
|||
import org.apache.tika.detect.Detector;
|
||||
import org.apache.tika.io.TikaInputStream;
|
||||
import org.apache.tika.metadata.Metadata;
|
||||
import org.apache.tika.metadata.TikaMetadataKeys;
|
||||
import org.apache.tika.mime.MediaType;
|
||||
import org.apache.tika.mime.MimeType;
|
||||
import org.apache.tika.mime.MimeTypeException;
|
||||
|
@ -117,6 +118,7 @@ public class IdentifyMimeType extends AbstractProcessor {
|
|||
|
||||
final ProcessorLog logger = getLogger();
|
||||
final ObjectHolder<String> mimeTypeRef = new ObjectHolder<>(null);
|
||||
final String filename = flowFile.getAttribute(CoreAttributes.FILENAME.key());
|
||||
|
||||
session.read(flowFile, new InputStreamCallback() {
|
||||
@Override
|
||||
|
@ -124,6 +126,10 @@ public class IdentifyMimeType extends AbstractProcessor {
|
|||
try (final InputStream in = new BufferedInputStream(stream)) {
|
||||
TikaInputStream tikaStream = TikaInputStream.get(in);
|
||||
Metadata metadata = new Metadata();
|
||||
// Add filename if it exists
|
||||
if (filename != null) {
|
||||
metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, filename);
|
||||
}
|
||||
// Get mime type
|
||||
MediaType mediatype = detector.detect(tikaStream, metadata);
|
||||
mimeTypeRef.set(mediatype.toString());
|
||||
|
|
|
@ -58,6 +58,7 @@ public class TestIdentifyMimeType {
|
|||
expectedMimeTypes.put("1.7z", "application/x-7z-compressed");
|
||||
expectedMimeTypes.put("1.mdb", "application/x-msaccess");
|
||||
expectedMimeTypes.put("1.txt", "text/plain");
|
||||
expectedMimeTypes.put("1.csv", "text/csv");
|
||||
expectedMimeTypes.put("1.txt.bz2", "application/x-bzip2");
|
||||
expectedMimeTypes.put("1.txt.gz", "application/gzip");
|
||||
expectedMimeTypes.put("1.zip", "application/zip");
|
||||
|
@ -76,6 +77,7 @@ public class TestIdentifyMimeType {
|
|||
expectedExtensions.put("1.7z", ".7z");
|
||||
expectedExtensions.put("1.mdb", ".mdb");
|
||||
expectedExtensions.put("1.txt", ".txt");
|
||||
expectedExtensions.put("1.csv", ".csv");
|
||||
expectedExtensions.put("1.txt.bz2", ".bz2");
|
||||
expectedExtensions.put("1.txt.gz", ".gz");
|
||||
expectedExtensions.put("1.zip", ".zip");
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
id,name
|
||||
1,"Jane Smith"
|
|
Loading…
Reference in New Issue