NIFI-1617 Add source filename metadata to IdentifyMimeType

Signed-off-by: Matt Burgess <mattyb149@apache.org>
This commit is contained in:
Joey Frazee 2016-03-10 16:49:59 -06:00 committed by Matt Burgess
parent 8f40d2b181
commit 3a4546c08a
4 changed files with 21 additions and 10 deletions

View File

@ -244,6 +244,7 @@ language governing permissions and limitations under the License. -->
<exclude>src/test/resources/TestEncryptContent/text.txt</exclude> <exclude>src/test/resources/TestEncryptContent/text.txt</exclude>
<exclude>src/test/resources/TestEncryptContent/text.txt.asc</exclude> <exclude>src/test/resources/TestEncryptContent/text.txt.asc</exclude>
<exclude>src/test/resources/TestIdentifyMimeType/1.txt</exclude> <exclude>src/test/resources/TestIdentifyMimeType/1.txt</exclude>
<exclude>src/test/resources/TestIdentifyMimeType/1.csv</exclude>
<exclude>src/test/resources/TestJson/json-sample.json</exclude> <exclude>src/test/resources/TestJson/json-sample.json</exclude>
<exclude>src/test/resources/TestJson/control-characters.json</exclude> <exclude>src/test/resources/TestJson/control-characters.json</exclude>
<exclude>src/test/resources/TestMergeContent/demarcate</exclude> <exclude>src/test/resources/TestMergeContent/demarcate</exclude>

View File

@ -45,6 +45,7 @@ import org.apache.tika.config.TikaConfig;
import org.apache.tika.detect.Detector; import org.apache.tika.detect.Detector;
import org.apache.tika.io.TikaInputStream; import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata; import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaMetadataKeys;
import org.apache.tika.mime.MediaType; import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MimeType; import org.apache.tika.mime.MimeType;
import org.apache.tika.mime.MimeTypeException; import org.apache.tika.mime.MimeTypeException;
@ -117,6 +118,7 @@ public class IdentifyMimeType extends AbstractProcessor {
final ProcessorLog logger = getLogger(); final ProcessorLog logger = getLogger();
final ObjectHolder<String> mimeTypeRef = new ObjectHolder<>(null); final ObjectHolder<String> mimeTypeRef = new ObjectHolder<>(null);
final String filename = flowFile.getAttribute(CoreAttributes.FILENAME.key());
session.read(flowFile, new InputStreamCallback() { session.read(flowFile, new InputStreamCallback() {
@Override @Override
@ -124,6 +126,10 @@ public class IdentifyMimeType extends AbstractProcessor {
try (final InputStream in = new BufferedInputStream(stream)) { try (final InputStream in = new BufferedInputStream(stream)) {
TikaInputStream tikaStream = TikaInputStream.get(in); TikaInputStream tikaStream = TikaInputStream.get(in);
Metadata metadata = new Metadata(); Metadata metadata = new Metadata();
// Add filename if it exists
if (filename != null) {
metadata.add(TikaMetadataKeys.RESOURCE_NAME_KEY, filename);
}
// Get mime type // Get mime type
MediaType mediatype = detector.detect(tikaStream, metadata); MediaType mediatype = detector.detect(tikaStream, metadata);
mimeTypeRef.set(mediatype.toString()); mimeTypeRef.set(mediatype.toString());

View File

@ -58,6 +58,7 @@ public class TestIdentifyMimeType {
expectedMimeTypes.put("1.7z", "application/x-7z-compressed"); expectedMimeTypes.put("1.7z", "application/x-7z-compressed");
expectedMimeTypes.put("1.mdb", "application/x-msaccess"); expectedMimeTypes.put("1.mdb", "application/x-msaccess");
expectedMimeTypes.put("1.txt", "text/plain"); expectedMimeTypes.put("1.txt", "text/plain");
expectedMimeTypes.put("1.csv", "text/csv");
expectedMimeTypes.put("1.txt.bz2", "application/x-bzip2"); expectedMimeTypes.put("1.txt.bz2", "application/x-bzip2");
expectedMimeTypes.put("1.txt.gz", "application/gzip"); expectedMimeTypes.put("1.txt.gz", "application/gzip");
expectedMimeTypes.put("1.zip", "application/zip"); expectedMimeTypes.put("1.zip", "application/zip");
@ -76,6 +77,7 @@ public class TestIdentifyMimeType {
expectedExtensions.put("1.7z", ".7z"); expectedExtensions.put("1.7z", ".7z");
expectedExtensions.put("1.mdb", ".mdb"); expectedExtensions.put("1.mdb", ".mdb");
expectedExtensions.put("1.txt", ".txt"); expectedExtensions.put("1.txt", ".txt");
expectedExtensions.put("1.csv", ".csv");
expectedExtensions.put("1.txt.bz2", ".bz2"); expectedExtensions.put("1.txt.bz2", ".bz2");
expectedExtensions.put("1.txt.gz", ".gz"); expectedExtensions.put("1.txt.gz", ".gz");
expectedExtensions.put("1.zip", ".zip"); expectedExtensions.put("1.zip", ".zip");

View File

@ -0,0 +1,2 @@
id,name
1,"Jane Smith"
1 id name
2 1 Jane Smith