From 94cf1411088e78eef1c7fb6cf8bc59fbde2f0720 Mon Sep 17 00:00:00 2001 From: David Pilato Date: Tue, 3 Jun 2014 10:09:21 +0200 Subject: [PATCH] Use` _language` field instead of `language` When we want to force a language instead of using Tika language detection, we set `language` field in documents. To be consistent with other forced fields, `_content_type` and `_name`, we should prefix `language` field by an underscore `_`. So `language` become `_language`. We first deprecate `language` in version 2.1.0 and we remove it in 2.3.0. Closes #68. (cherry picked from commit 2f46343) --- README.md | 14 ++++++++++++-- .../index/mapper/attachment/AttachmentMapper.java | 5 ++++- .../LanguageDetectionAttachmentMapperTests.java | 2 +- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index e418a61622d..f9f6dc70906 100644 --- a/README.md +++ b/README.md @@ -46,13 +46,14 @@ In this case, the JSON to index can be: } ``` -Or it is possible to use more elaborated JSON if content type or resource name need to be set explicitly: +Or it is possible to use more elaborated JSON if content type, resource name or language need to be set explicitly: ```javascript { "my_attachment" : { "_content_type" : "application/pdf", "_name" : "resource/name/of/my.pdf", + "_language" : "en", "content" : "... base64 encoded attachment ..." } } @@ -121,7 +122,16 @@ By default, language detection is disabled (`false`) as it could come with a cos This default value can be changed by setting the `index.mapping.attachment.detect_language` setting. It can also be provided on a per document indexed using the `_detect_language` parameter. -Note, this feature is supported since `2.0.0` version. +Note that you can force language using `_language` field when sending your actual document: + +```javascript +{ + "my_attachment" : { + "_language" : "en", + "content" : "... base64 encoded attachment ..." + } +} +``` Highlighting attachments ------------------------ diff --git a/src/main/java/org/elasticsearch/index/mapper/attachment/AttachmentMapper.java b/src/main/java/org/elasticsearch/index/mapper/attachment/AttachmentMapper.java index e1d4963575f..2096936fbda 100644 --- a/src/main/java/org/elasticsearch/index/mapper/attachment/AttachmentMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/attachment/AttachmentMapper.java @@ -352,8 +352,11 @@ public class AttachmentMapper implements Mapper { } else if ("_name".equals(currentFieldName)) { name = parser.text(); } else if ("language".equals(currentFieldName)) { - // TODO should be _language + // TODO deprecated form. Will be removed in 2.3 language = parser.text(); + logger.debug("`language` is now deprecated. Use `_language`. See https://github.com/elasticsearch/elasticsearch-mapper-attachments/issues/68"); + } else if ("_language".equals(currentFieldName)) { + language = parser.text(); } } else if (token == XContentParser.Token.VALUE_NUMBER) { if ("_indexed_chars".equals(currentFieldName) || "_indexedChars".equals(currentFieldName)) { diff --git a/src/test/java/org/elasticsearch/index/mapper/xcontent/LanguageDetectionAttachmentMapperTests.java b/src/test/java/org/elasticsearch/index/mapper/xcontent/LanguageDetectionAttachmentMapperTests.java index 46f82a711dd..602eb8cd02a 100644 --- a/src/test/java/org/elasticsearch/index/mapper/xcontent/LanguageDetectionAttachmentMapperTests.java +++ b/src/test/java/org/elasticsearch/index/mapper/xcontent/LanguageDetectionAttachmentMapperTests.java @@ -74,7 +74,7 @@ public class LanguageDetectionAttachmentMapperTests extends ElasticsearchTestCas .field("content", html); if (forcedLanguage.length > 0) { - xcb.field("language", forcedLanguage[0]); + xcb.field("_language", forcedLanguage[0]); } xcb.endObject().endObject();