From d5969bb33a5d35a348cc88b16333bce4ebdf2667 Mon Sep 17 00:00:00 2001 From: javanna Date: Wed, 10 Feb 2016 12:38:39 +0100 Subject: [PATCH] Attachment Processor: setFieldValue only once as a map --- .../attachment/AttachmentProcessor.java | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/plugins/ingest-attachment/src/main/java/org/elasticsearch/ingest/attachment/AttachmentProcessor.java b/plugins/ingest-attachment/src/main/java/org/elasticsearch/ingest/attachment/AttachmentProcessor.java index 93f9ceeadb2..f7edb46f4db 100644 --- a/plugins/ingest-attachment/src/main/java/org/elasticsearch/ingest/attachment/AttachmentProcessor.java +++ b/plugins/ingest-attachment/src/main/java/org/elasticsearch/ingest/attachment/AttachmentProcessor.java @@ -32,6 +32,7 @@ import org.elasticsearch.ingest.core.IngestDocument; import java.io.IOException; import java.util.Arrays; import java.util.EnumSet; +import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.Map; @@ -66,66 +67,69 @@ public final class AttachmentProcessor extends AbstractProcessor { @Override public void execute(IngestDocument ingestDocument) { String base64Input = ingestDocument.getFieldValue(sourceField, String.class); + Map additionalFields = new HashMap<>(); - Metadata metadata = new Metadata(); try { byte[] decodedContent = Base64.decode(base64Input.getBytes(UTF_8)); + Metadata metadata = new Metadata(); String parsedContent = TikaImpl.parse(decodedContent, metadata, indexedChars); if (fields.contains(Field.CONTENT) && Strings.hasLength(parsedContent)) { // somehow tika seems to append a newline at the end automatically, lets remove that again - ingestDocument.setFieldValue(targetField + "." + Field.CONTENT.toLowerCase(), parsedContent.trim()); + additionalFields.put(Field.CONTENT.toLowerCase(), parsedContent.trim()); } if (fields.contains(Field.LANGUAGE) && Strings.hasLength(parsedContent)) { LanguageIdentifier identifier = new LanguageIdentifier(parsedContent); String language = identifier.getLanguage(); - ingestDocument.setFieldValue(targetField + "." + Field.LANGUAGE.toLowerCase(), language); + additionalFields.put(Field.LANGUAGE.toLowerCase(), language); } if (fields.contains(Field.DATE)) { String createdDate = metadata.get(TikaCoreProperties.CREATED); if (createdDate != null) { - ingestDocument.setFieldValue(targetField + "." + Field.DATE.toLowerCase(), createdDate); + additionalFields.put(Field.DATE.toLowerCase(), createdDate); } } if (fields.contains(Field.TITLE)) { String title = metadata.get(TikaCoreProperties.TITLE); if (Strings.hasLength(title)) { - ingestDocument.setFieldValue(targetField + "." + Field.TITLE.toLowerCase(), title); + additionalFields.put(Field.TITLE.toLowerCase(), title); } } if (fields.contains(Field.AUTHOR)) { String author = metadata.get("Author"); if (Strings.hasLength(author)) { - ingestDocument.setFieldValue(targetField + "." + Field.AUTHOR.toLowerCase(), author); + additionalFields.put(Field.AUTHOR.toLowerCase(), author); } } if (fields.contains(Field.KEYWORDS)) { String keywords = metadata.get("Keywords"); if (Strings.hasLength(keywords)) { - ingestDocument.setFieldValue(targetField + "." + Field.KEYWORDS.toLowerCase(), keywords); + additionalFields.put(Field.KEYWORDS.toLowerCase(), keywords); } } if (fields.contains(Field.CONTENT_TYPE)) { String contentType = metadata.get(Metadata.CONTENT_TYPE); if (Strings.hasLength(contentType)) { - ingestDocument.setFieldValue(targetField + "." + Field.CONTENT_TYPE.toLowerCase(), contentType); + additionalFields.put(Field.CONTENT_TYPE.toLowerCase(), contentType); } } if (fields.contains(Field.CONTENT_LENGTH)) { String contentLength = metadata.get(Metadata.CONTENT_LENGTH); String length = Strings.hasLength(contentLength) ? contentLength : String.valueOf(parsedContent.length()); - ingestDocument.setFieldValue(targetField + "." + Field.CONTENT_LENGTH.toLowerCase(), length); + additionalFields.put(Field.CONTENT_LENGTH.toLowerCase(), length); } } catch (Throwable e) { throw new ElasticsearchParseException("Error parsing document in field [{}]", e, sourceField); } + + ingestDocument.setFieldValue(targetField, additionalFields); } @Override