Adds content-length as number
If you run Elasticsearch with the ingest-attachment plugin: ```sh gradle plugins:ingest-attachment:run ``` And then you use it on a document: ```js PUT _ingest/pipeline/attachment { "description" : "Extract attachment information", "processors" : [ { "attachment" : { "field" : "data" } } ] } PUT my_index/my_type/my_id?pipeline=attachment { "data": "e1xydGYxXGFuc2kNCkxvcmVtIGlwc3VtIGRvbG9yIHNpdCBhbWV0DQpccGFyIH0=" } GET my_index/my_type/my_id ``` You were getting this back: ```js # PUT _ingest/pipeline/attachment { "acknowledged": true } # PUT my_index/my_type/my_id?pipeline=attachment { "_index": "my_index", "_type": "my_type", "_id": "my_id", "_version": 2, "result": "updated", "_shards": { "total": 2, "successful": 1, "failed": 0 }, "created": false } # GET my_index/my_type/my_id { "_index": "my_index", "_type": "my_type", "_id": "my_id", "_version": 2, "found": true, "_source": { "data": "e1xydGYxXGFuc2kNCkxvcmVtIGlwc3VtIGRvbG9yIHNpdCBhbWV0DQpccGFyIH0=", "attachment": { "content_type": "application/rtf", "language": "ro", "content": "Lorem ipsum dolor sit amet", "content_length": "28" } } } ``` With this commit you are now getting: ``` # GET my_index/my_type/my_id { "_index": "my_index", "_type": "my_type", "_id": "my_id", "_version": 2, "found": true, "_source": { "data": "e1xydGYxXGFuc2kNCkxvcmVtIGlwc3VtIGRvbG9yIHNpdCBhbWV0DQpccGFyIH0=", "attachment": { "content_type": "application/rtf", "language": "ro", "content": "Lorem ipsum dolor sit amet", "content_length": 28 } } } ``` Closes #19924
This commit is contained in:
parent
90dbce9682
commit
905684fe73
|
@ -119,7 +119,12 @@ public final class AttachmentProcessor extends AbstractProcessor {
|
||||||
|
|
||||||
if (properties.contains(Property.CONTENT_LENGTH)) {
|
if (properties.contains(Property.CONTENT_LENGTH)) {
|
||||||
String contentLength = metadata.get(Metadata.CONTENT_LENGTH);
|
String contentLength = metadata.get(Metadata.CONTENT_LENGTH);
|
||||||
String length = Strings.hasLength(contentLength) ? contentLength : String.valueOf(parsedContent.length());
|
long length;
|
||||||
|
if (Strings.hasLength(contentLength)) {
|
||||||
|
length = Long.parseLong(contentLength);
|
||||||
|
} else {
|
||||||
|
length = parsedContent.length();
|
||||||
|
}
|
||||||
additionalFields.put(Property.CONTENT_LENGTH.toLowerCase(), length);
|
additionalFields.put(Property.CONTENT_LENGTH.toLowerCase(), length);
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
|
|
@ -33,7 +33,7 @@
|
||||||
- length: { _source.attachment: 4 }
|
- length: { _source.attachment: 4 }
|
||||||
- match: { _source.attachment.content: "This is an english text to test if the pipeline works" }
|
- match: { _source.attachment.content: "This is an english text to test if the pipeline works" }
|
||||||
- match: { _source.attachment.language: "en" }
|
- match: { _source.attachment.language: "en" }
|
||||||
- match: { _source.attachment.content_length: "54" }
|
- match: { _source.attachment.content_length: 54 }
|
||||||
- match: { _source.attachment.content_type: "text/plain; charset=ISO-8859-1" }
|
- match: { _source.attachment.content_type: "text/plain; charset=ISO-8859-1" }
|
||||||
|
|
||||||
---
|
---
|
||||||
|
@ -111,4 +111,4 @@
|
||||||
- length: { _source.attachment: 4 }
|
- length: { _source.attachment: 4 }
|
||||||
- match: { _source.attachment.content: "This is an english text to tes" }
|
- match: { _source.attachment.content: "This is an english text to tes" }
|
||||||
- match: { _source.attachment.language: "en" }
|
- match: { _source.attachment.language: "en" }
|
||||||
- match: { _source.attachment.content_length: "30" }
|
- match: { _source.attachment.content_length: 30 }
|
||||||
|
|
|
@ -34,7 +34,7 @@
|
||||||
- match: { _source.attachment.language: "et" }
|
- match: { _source.attachment.language: "et" }
|
||||||
- match: { _source.attachment.author: "David Pilato" }
|
- match: { _source.attachment.author: "David Pilato" }
|
||||||
- match: { _source.attachment.date: "2016-03-10T08:25:00Z" }
|
- match: { _source.attachment.date: "2016-03-10T08:25:00Z" }
|
||||||
- match: { _source.attachment.content_length: "19" }
|
- match: { _source.attachment.content_length: 19 }
|
||||||
- match: { _source.attachment.content_type: "application/msword" }
|
- match: { _source.attachment.content_type: "application/msword" }
|
||||||
|
|
||||||
|
|
||||||
|
@ -74,6 +74,6 @@
|
||||||
- match: { _source.attachment.language: "et" }
|
- match: { _source.attachment.language: "et" }
|
||||||
- match: { _source.attachment.author: "David Pilato" }
|
- match: { _source.attachment.author: "David Pilato" }
|
||||||
- match: { _source.attachment.date: "2016-03-10T08:24:00Z" }
|
- match: { _source.attachment.date: "2016-03-10T08:24:00Z" }
|
||||||
- match: { _source.attachment.content_length: "19" }
|
- match: { _source.attachment.content_length: 19 }
|
||||||
- match: { _source.attachment.content_type: "application/vnd.openxmlformats-officedocument.wordprocessingml.document" }
|
- match: { _source.attachment.content_type: "application/vnd.openxmlformats-officedocument.wordprocessingml.document" }
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue