Adds content-length as number

If you run Elasticsearch with the ingest-attachment plugin:

```sh
gradle plugins:ingest-attachment:run
```

And then you use it on a document:

```js
 PUT _ingest/pipeline/attachment
 {
   "description" : "Extract attachment information",
   "processors" : [
     {
       "attachment" : {
         "field" : "data"
       }
     }
   ]
 }
 PUT my_index/my_type/my_id?pipeline=attachment
 {
   "data": "e1xydGYxXGFuc2kNCkxvcmVtIGlwc3VtIGRvbG9yIHNpdCBhbWV0DQpccGFyIH0="
 }
 GET my_index/my_type/my_id
```

 You were getting this back:

```js
 # PUT _ingest/pipeline/attachment
 {
   "acknowledged": true
 }

 # PUT my_index/my_type/my_id?pipeline=attachment
 {
   "_index": "my_index",
   "_type": "my_type",
   "_id": "my_id",
   "_version": 2,
   "result": "updated",
   "_shards": {
     "total": 2,
     "successful": 1,
     "failed": 0
   },
   "created": false
 }

 # GET my_index/my_type/my_id
 {
   "_index": "my_index",
   "_type": "my_type",
   "_id": "my_id",
   "_version": 2,
   "found": true,
   "_source": {
     "data": "e1xydGYxXGFuc2kNCkxvcmVtIGlwc3VtIGRvbG9yIHNpdCBhbWV0DQpccGFyIH0=",
     "attachment": {
       "content_type": "application/rtf",
       "language": "ro",
       "content": "Lorem ipsum dolor sit amet",
       "content_length": "28"
     }
   }
 }
```

With this commit you are now getting:

```
 # GET my_index/my_type/my_id
 {
   "_index": "my_index",
   "_type": "my_type",
   "_id": "my_id",
   "_version": 2,
   "found": true,
   "_source": {
     "data": "e1xydGYxXGFuc2kNCkxvcmVtIGlwc3VtIGRvbG9yIHNpdCBhbWV0DQpccGFyIH0=",
     "attachment": {
       "content_type": "application/rtf",
       "language": "ro",
       "content": "Lorem ipsum dolor sit amet",
       "content_length": 28
     }
   }
 }
```

Closes #19924
This commit is contained in:
David Pilato 2016-08-10 18:27:09 +02:00
parent 90dbce9682
commit 905684fe73
3 changed files with 10 additions and 5 deletions

View File

@ -119,7 +119,12 @@ public final class AttachmentProcessor extends AbstractProcessor {
if (properties.contains(Property.CONTENT_LENGTH)) {
String contentLength = metadata.get(Metadata.CONTENT_LENGTH);
String length = Strings.hasLength(contentLength) ? contentLength : String.valueOf(parsedContent.length());
long length;
if (Strings.hasLength(contentLength)) {
length = Long.parseLong(contentLength);
} else {
length = parsedContent.length();
}
additionalFields.put(Property.CONTENT_LENGTH.toLowerCase(), length);
}
} catch (Exception e) {

View File

@ -33,7 +33,7 @@
- length: { _source.attachment: 4 }
- match: { _source.attachment.content: "This is an english text to test if the pipeline works" }
- match: { _source.attachment.language: "en" }
- match: { _source.attachment.content_length: "54" }
- match: { _source.attachment.content_length: 54 }
- match: { _source.attachment.content_type: "text/plain; charset=ISO-8859-1" }
---
@ -111,4 +111,4 @@
- length: { _source.attachment: 4 }
- match: { _source.attachment.content: "This is an english text to tes" }
- match: { _source.attachment.language: "en" }
- match: { _source.attachment.content_length: "30" }
- match: { _source.attachment.content_length: 30 }

View File

@ -34,7 +34,7 @@
- match: { _source.attachment.language: "et" }
- match: { _source.attachment.author: "David Pilato" }
- match: { _source.attachment.date: "2016-03-10T08:25:00Z" }
- match: { _source.attachment.content_length: "19" }
- match: { _source.attachment.content_length: 19 }
- match: { _source.attachment.content_type: "application/msword" }
@ -74,6 +74,6 @@
- match: { _source.attachment.language: "et" }
- match: { _source.attachment.author: "David Pilato" }
- match: { _source.attachment.date: "2016-03-10T08:24:00Z" }
- match: { _source.attachment.content_length: "19" }
- match: { _source.attachment.content_length: 19 }
- match: { _source.attachment.content_type: "application/vnd.openxmlformats-officedocument.wordprocessingml.document" }