Use` _language` field instead of `language`
When we want to force a language instead of using Tika language detection, we set `language` field in documents. To be consistent with other forced fields, `_content_type` and `_name`, we should prefix `language` field by an underscore `_`. So `language` become `_language`. We first deprecate `language` in version 2.1.0 and we remove it in 2.3.0. Closes #68. (cherry picked from commit 2f46343)
This commit is contained in:
parent
7c1c2011bc
commit
94cf141108
14
README.md
14
README.md
|
@ -46,13 +46,14 @@ In this case, the JSON to index can be:
|
|||
}
|
||||
```
|
||||
|
||||
Or it is possible to use more elaborated JSON if content type or resource name need to be set explicitly:
|
||||
Or it is possible to use more elaborated JSON if content type, resource name or language need to be set explicitly:
|
||||
|
||||
```javascript
|
||||
{
|
||||
"my_attachment" : {
|
||||
"_content_type" : "application/pdf",
|
||||
"_name" : "resource/name/of/my.pdf",
|
||||
"_language" : "en",
|
||||
"content" : "... base64 encoded attachment ..."
|
||||
}
|
||||
}
|
||||
|
@ -121,7 +122,16 @@ By default, language detection is disabled (`false`) as it could come with a cos
|
|||
This default value can be changed by setting the `index.mapping.attachment.detect_language` setting.
|
||||
It can also be provided on a per document indexed using the `_detect_language` parameter.
|
||||
|
||||
Note, this feature is supported since `2.0.0` version.
|
||||
Note that you can force language using `_language` field when sending your actual document:
|
||||
|
||||
```javascript
|
||||
{
|
||||
"my_attachment" : {
|
||||
"_language" : "en",
|
||||
"content" : "... base64 encoded attachment ..."
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Highlighting attachments
|
||||
------------------------
|
||||
|
|
|
@ -352,8 +352,11 @@ public class AttachmentMapper implements Mapper {
|
|||
} else if ("_name".equals(currentFieldName)) {
|
||||
name = parser.text();
|
||||
} else if ("language".equals(currentFieldName)) {
|
||||
// TODO should be _language
|
||||
// TODO deprecated form. Will be removed in 2.3
|
||||
language = parser.text();
|
||||
logger.debug("`language` is now deprecated. Use `_language`. See https://github.com/elasticsearch/elasticsearch-mapper-attachments/issues/68");
|
||||
} else if ("_language".equals(currentFieldName)) {
|
||||
language = parser.text();
|
||||
}
|
||||
} else if (token == XContentParser.Token.VALUE_NUMBER) {
|
||||
if ("_indexed_chars".equals(currentFieldName) || "_indexedChars".equals(currentFieldName)) {
|
||||
|
|
|
@ -74,7 +74,7 @@ public class LanguageDetectionAttachmentMapperTests extends ElasticsearchTestCas
|
|||
.field("content", html);
|
||||
|
||||
if (forcedLanguage.length > 0) {
|
||||
xcb.field("language", forcedLanguage[0]);
|
||||
xcb.field("_language", forcedLanguage[0]);
|
||||
}
|
||||
|
||||
xcb.endObject().endObject();
|
||||
|
|
Loading…
Reference in New Issue