mirror of https://github.com/apache/nifi.git
NIFI-1617 Add source filename metadata to IdentifyMimeType
Signed-off-by: Matt Burgess <mattyb149@apache.org>
This commit is contained in:
parent
8f40d2b181
commit
3a4546c08a
|
@ -244,6 +244,7 @@ language governing permissions and limitations under the License. -->
|
||||||
<exclude>src/test/resources/TestEncryptContent/text.txt</exclude>
|
<exclude>src/test/resources/TestEncryptContent/text.txt</exclude>
|
||||||
<exclude>src/test/resources/TestEncryptContent/text.txt.asc</exclude>
|
<exclude>src/test/resources/TestEncryptContent/text.txt.asc</exclude>
|
||||||
<exclude>src/test/resources/TestIdentifyMimeType/1.txt</exclude>
|
<exclude>src/test/resources/TestIdentifyMimeType/1.txt</exclude>
|
||||||
|
<exclude>src/test/resources/TestIdentifyMimeType/1.csv</exclude>
|
||||||
<exclude>src/test/resources/TestJson/json-sample.json</exclude>
|
<exclude>src/test/resources/TestJson/json-sample.json</exclude>
|
||||||
<exclude>src/test/resources/TestJson/control-characters.json</exclude>
|
<exclude>src/test/resources/TestJson/control-characters.json</exclude>
|
||||||
<exclude>src/test/resources/TestMergeContent/demarcate</exclude>
|
<exclude>src/test/resources/TestMergeContent/demarcate</exclude>
|
||||||
|
|
|
@ -45,6 +45,7 @@ import org.apache.tika.config.TikaConfig;
|
||||||
import org.apache.tika.detect.Detector;
|
import org.apache.tika.detect.Detector;
|
||||||
import org.apache.tika.io.TikaInputStream;
|
import org.apache.tika.io.TikaInputStream;
|
||||||
import org.apache.tika.metadata.Metadata;
|
import org.apache.tika.metadata.Metadata;
|
||||||
|
import org.apache.tika.metadata.TikaMetadataKeys;
|
||||||
import org.apache.tika.mime.MediaType;
|
import org.apache.tika.mime.MediaType;
|
||||||
import org.apache.tika.mime.MimeType;
|
import org.apache.tika.mime.MimeType;
|
||||||
import org.apache.tika.mime.MimeTypeException;
|
import org.apache.tika.mime.MimeTypeException;
|
||||||
|
@ -117,6 +118,7 @@ public class IdentifyMimeType extends AbstractProcessor {
|
||||||
|
|
||||||
final ProcessorLog logger = getLogger();
|
final ProcessorLog logger = getLogger();
|
||||||
final ObjectHolder<String> mimeTypeRef = new ObjectHolder<>(null);
|
final ObjectHolder<String> mimeTypeRef = new ObjectHolder<>(null);
|
||||||
|
final String filename = flowFile.getAttribute(CoreAttributes.FILENAME.key());
|
||||||
|
|
||||||
session.read(flowFile, new InputStreamCallback() {
|
session.read(flowFile, new InputStreamCallback() {
|
||||||
@Override
|
@Override
|
||||||
|
@ -124,6 +126,10 @@ public class IdentifyMimeType extends AbstractProcessor {
|
||||||
try (final InputStream in = new BufferedInputStream(stream)) {
|
try (final InputStream in = new BufferedInputStream(stream)) {
|
||||||
TikaInputStream tikaStream = TikaInputStream.get(in);
|
TikaInputStream tikaStream = TikaInputStream.get(in);
|
||||||
Metadata metadata = new Metadata();
|
Metadata metadata = new Metadata();
|
||||||
|
// Add filename if it exists
|
||||||
|
if (filename != null) {
|
||||||
|
metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, filename);
|
||||||
|
}
|
||||||
// Get mime type
|
// Get mime type
|
||||||
MediaType mediatype = detector.detect(tikaStream, metadata);
|
MediaType mediatype = detector.detect(tikaStream, metadata);
|
||||||
mimeTypeRef.set(mediatype.toString());
|
mimeTypeRef.set(mediatype.toString());
|
||||||
|
|
|
@ -58,6 +58,7 @@ public class TestIdentifyMimeType {
|
||||||
expectedMimeTypes.put("1.7z", "application/x-7z-compressed");
|
expectedMimeTypes.put("1.7z", "application/x-7z-compressed");
|
||||||
expectedMimeTypes.put("1.mdb", "application/x-msaccess");
|
expectedMimeTypes.put("1.mdb", "application/x-msaccess");
|
||||||
expectedMimeTypes.put("1.txt", "text/plain");
|
expectedMimeTypes.put("1.txt", "text/plain");
|
||||||
|
expectedMimeTypes.put("1.csv", "text/csv");
|
||||||
expectedMimeTypes.put("1.txt.bz2", "application/x-bzip2");
|
expectedMimeTypes.put("1.txt.bz2", "application/x-bzip2");
|
||||||
expectedMimeTypes.put("1.txt.gz", "application/gzip");
|
expectedMimeTypes.put("1.txt.gz", "application/gzip");
|
||||||
expectedMimeTypes.put("1.zip", "application/zip");
|
expectedMimeTypes.put("1.zip", "application/zip");
|
||||||
|
@ -76,6 +77,7 @@ public class TestIdentifyMimeType {
|
||||||
expectedExtensions.put("1.7z", ".7z");
|
expectedExtensions.put("1.7z", ".7z");
|
||||||
expectedExtensions.put("1.mdb", ".mdb");
|
expectedExtensions.put("1.mdb", ".mdb");
|
||||||
expectedExtensions.put("1.txt", ".txt");
|
expectedExtensions.put("1.txt", ".txt");
|
||||||
|
expectedExtensions.put("1.csv", ".csv");
|
||||||
expectedExtensions.put("1.txt.bz2", ".bz2");
|
expectedExtensions.put("1.txt.bz2", ".bz2");
|
||||||
expectedExtensions.put("1.txt.gz", ".gz");
|
expectedExtensions.put("1.txt.gz", ".gz");
|
||||||
expectedExtensions.put("1.zip", ".zip");
|
expectedExtensions.put("1.zip", ".zip");
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
id,name
|
||||||
|
1,"Jane Smith"
|
|
Loading…
Reference in New Issue