Add support for multi-fields
Now https://github.com/elasticsearch/elasticsearch/pull/6867 is merged in elasticsearch core code (branch 1.x - es 1.4), we can support multi fields in mapper attachment plugin. ``` DELETE /test PUT /test { "settings": { "number_of_shards": 1 } } PUT /test/person/_mapping { "person": { "properties": { "file": { "type": "attachment", "path": "full", "fields": { "file": { "type": "string", "fields": { "store": { "type": "string", "store": true } } }, "content_type": { "type": "string", "fields": { "store": { "type": "string", "store": true }, "untouched": { "type": "string", "index": "not_analyzed", "store": true } } } } } } } } PUT /test/person/1?refresh=true { "file": "IkdvZCBTYXZlIHRoZSBRdWVlbiIgKGFsdGVybmF0aXZlbHkgIkdvZCBTYXZlIHRoZSBLaW5nIg==" } GET /test/person/_search { "fields": [ "file.store", "file.content_type.store" ], "aggs": { "store": { "terms": { "field": "file.content_type.store" } }, "untouched": { "terms": { "field": "file.content_type.untouched" } } } } ``` It gives: ```js { "took": 3, "timed_out": false, "_shards": { "total": 1, "successful": 1, "failed": 0 }, "hits": { "total": 1, "max_score": 1, "hits": [ { "_index": "test", "_type": "person", "_id": "1", "_score": 1, "fields": { "file.store": [ "\"God Save the Queen\" (alternatively \"God Save the King\"\n" ], "file.content_type.store": [ "text/plain; charset=ISO-8859-1" ] } } ] }, "aggregations": { "store": { "doc_count_error_upper_bound": 0, "buckets": [ { "key": "1", "doc_count": 1 }, { "key": "8859", "doc_count": 1 }, { "key": "charset", "doc_count": 1 }, { "key": "iso", "doc_count": 1 }, { "key": "plain", "doc_count": 1 }, { "key": "text", "doc_count": 1 } ] }, "untouched": { "doc_count_error_upper_bound": 0, "buckets": [ { "key": "text/plain; charset=ISO-8859-1", "doc_count": 1 } ] } } } ``` Note that using shorter definition works as well: ``` DELETE /test PUT /test { "settings": { "number_of_shards": 1 } } PUT /test/person/_mapping { "person": { "properties": { "file": { "type": "attachment" } } } } PUT /test/person/1?refresh=true { "file": "IkdvZCBTYXZlIHRoZSBRdWVlbiIgKGFsdGVybmF0aXZlbHkgIkdvZCBTYXZlIHRoZSBLaW5nIg==" } GET /test/person/_search { "query": { "match": { "file": "king" } } } ``` gives: ```js { "took": 53, "timed_out": false, "_shards": { "total": 1, "successful": 1, "failed": 0 }, "hits": { "total": 1, "max_score": 0.095891505, "hits": [ { "_index": "test", "_type": "person", "_id": "1", "_score": 0.095891505, "_source": { "file": "IkdvZCBTYXZlIHRoZSBRdWVlbiIgKGFsdGVybmF0aXZlbHkgIkdvZCBTYXZlIHRoZSBLaW5nIg==" } } ] } } ``` Closes #57. (cherry picked from commit 432d7c0)
This commit is contained in:
parent
663d4eaddb
commit
ad986eb2fc
185
README.md
185
README.md
|
@ -25,11 +25,14 @@ Please read documentation relative to the version you are using:
|
|||
The `attachment` type allows to index different "attachment" type field (encoded as `base64`), for example,
|
||||
microsoft office formats, open document formats, ePub, HTML, and so on (full list can be found [here](http://tika.apache.org/1.5/formats.html)).
|
||||
|
||||
The `attachment` type is provided as a plugin extension. The plugin is a simple zip file that can be downloaded and placed under `$ES_HOME/plugins` location. It will be automatically detected and the `attachment` type will be added.
|
||||
The `attachment` type is provided as a plugin extension. The plugin is a simple zip file that can be downloaded and
|
||||
placed under `$ES_HOME/plugins/mapper-attachments` location. When the node will start, it will be automatically detected
|
||||
and the `attachment` type will be added.
|
||||
|
||||
Using the attachment type is simple, in your mapping JSON, simply set a certain JSON element as attachment, for example:
|
||||
|
||||
```javascript
|
||||
PUT /test/person/_mapping
|
||||
{
|
||||
"person" : {
|
||||
"properties" : {
|
||||
|
@ -42,6 +45,7 @@ Using the attachment type is simple, in your mapping JSON, simply set a certain
|
|||
In this case, the JSON to index can be:
|
||||
|
||||
```javascript
|
||||
PUT /test/person/1
|
||||
{
|
||||
"my_attachment" : "... base64 encoded attachment ..."
|
||||
}
|
||||
|
@ -49,7 +53,8 @@ In this case, the JSON to index can be:
|
|||
|
||||
Or it is possible to use more elaborated JSON if content type, resource name or language need to be set explicitly:
|
||||
|
||||
```javascript
|
||||
```
|
||||
PUT /test/person/1
|
||||
{
|
||||
"my_attachment" : {
|
||||
"_content_type" : "application/pdf",
|
||||
|
@ -60,7 +65,8 @@ Or it is possible to use more elaborated JSON if content type, resource name or
|
|||
}
|
||||
```
|
||||
|
||||
The `attachment` type not only indexes the content of the doc, but also automatically adds meta data on the attachment as well (when available).
|
||||
The `attachment` type not only indexes the content of the doc, but also automatically adds meta data on the attachment
|
||||
as well (when available).
|
||||
|
||||
The metadata supported are:
|
||||
|
||||
|
@ -75,9 +81,11 @@ The metadata supported are:
|
|||
|
||||
They can be queried using the "dot notation", for example: `my_attachment.author`.
|
||||
|
||||
Both the meta data and the actual content are simple core type mappers (string, date, ...), thus, they can be controlled in the mappings. For example:
|
||||
Both the meta data and the actual content are simple core type mappers (string, date, ...), thus, they can be controlled
|
||||
in the mappings. For example:
|
||||
|
||||
```javascript
|
||||
PUT /test/person/_mapping
|
||||
{
|
||||
"person" : {
|
||||
"properties" : {
|
||||
|
@ -99,12 +107,98 @@ Both the meta data and the actual content are simple core type mappers (string,
|
|||
}
|
||||
```
|
||||
|
||||
In the above example, the actual content indexed is mapped under `fields` name `file`, and we decide not to index it, so it will only be available in the `_all` field. The other fields map to their respective metadata names, but there is no need to specify the `type` (like `string` or `date`) since it is already known.
|
||||
In the above example, the actual content indexed is mapped under `fields` name `file`, and we decide not to index it, so
|
||||
it will only be available in the `_all` field. The other fields map to their respective metadata names, but there is no
|
||||
need to specify the `type` (like `string` or `date`) since it is already known.
|
||||
|
||||
Querying or accessing metadata
|
||||
------------------------------
|
||||
|
||||
If you need to query on metadata fields, use the attachment field name dot the metadata field. For example:
|
||||
|
||||
```
|
||||
DELETE /test
|
||||
PUT /test
|
||||
PUT /test/person/_mapping
|
||||
{
|
||||
"person": {
|
||||
"properties": {
|
||||
"file": {
|
||||
"type": "attachment",
|
||||
"path": "full",
|
||||
"fields": {
|
||||
"content_type": {
|
||||
"type": "string",
|
||||
"store": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
PUT /test/person/1?refresh=true
|
||||
{
|
||||
"file": "IkdvZCBTYXZlIHRoZSBRdWVlbiIgKGFsdGVybmF0aXZlbHkgIkdvZCBTYXZlIHRoZSBLaW5nIg=="
|
||||
}
|
||||
GET /test/person/_search
|
||||
{
|
||||
"fields": [ "file.content_type" ],
|
||||
"query": {
|
||||
"match": {
|
||||
"file.content_type": "text plain"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Will give you:
|
||||
|
||||
```
|
||||
{
|
||||
"took": 2,
|
||||
"timed_out": false,
|
||||
"_shards": {
|
||||
"total": 5,
|
||||
"successful": 5,
|
||||
"failed": 0
|
||||
},
|
||||
"hits": {
|
||||
"total": 1,
|
||||
"max_score": 0.16273327,
|
||||
"hits": [
|
||||
{
|
||||
"_index": "test",
|
||||
"_type": "person",
|
||||
"_id": "1",
|
||||
"_score": 0.16273327,
|
||||
"fields": {
|
||||
"file.content_type": [
|
||||
"text/plain; charset=ISO-8859-1"
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Indexed Characters
|
||||
------------------
|
||||
|
||||
By default, `100000` characters are extracted when indexing the content. This default value can be changed by setting the `index.mapping.attachment.indexed_chars` setting. It can also be provided on a per document indexed using the `_indexed_chars` parameter. `-1` can be set to extract all text, but note that all the text needs to be allowed to be represented in memory.
|
||||
By default, `100000` characters are extracted when indexing the content. This default value can be changed by setting
|
||||
the `index.mapping.attachment.indexed_chars` setting. It can also be provided on a per document indexed using the
|
||||
`_indexed_chars` parameter. `-1` can be set to extract all text, but note that all the text needs to be allowed to be
|
||||
represented in memory:
|
||||
|
||||
```
|
||||
PUT /test/person/1
|
||||
{
|
||||
"my_attachment" : {
|
||||
"_indexed_chars" : -1,
|
||||
"_content" : "... base64 encoded attachment ..."
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Metadata parsing error handling
|
||||
-------------------------------
|
||||
|
@ -135,22 +229,79 @@ Note that you can force language using `_language` field when sending your actua
|
|||
Highlighting attachments
|
||||
------------------------
|
||||
|
||||
If you want to highlight your attachment content, you will need to store your file content and set `term_vector` as follow:
|
||||
If you want to highlight your attachment content, you will need to set `"store": true` and `"term_vector":"with_positions_offsets"`
|
||||
for your attachment field. Here is a full script which does it:
|
||||
|
||||
```
|
||||
PUT test/my_type/_mapping
|
||||
DELETE /test
|
||||
PUT /test
|
||||
PUT /test/person/_mapping
|
||||
{
|
||||
"my_type" : {
|
||||
"properties" : {
|
||||
"my_html_file" : {
|
||||
"type" : "attachment",
|
||||
"fields" : {
|
||||
"title" : { "store" : "yes" },
|
||||
"my_html_file" : { "term_vector":"with_positions_offsets", "store":"yes" }
|
||||
}
|
||||
}
|
||||
"person": {
|
||||
"properties": {
|
||||
"file": {
|
||||
"type": "attachment",
|
||||
"path": "full",
|
||||
"fields": {
|
||||
"file": {
|
||||
"type": "string",
|
||||
"term_vector":"with_positions_offsets",
|
||||
"store": true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
PUT /test/person/1?refresh=true
|
||||
{
|
||||
"file": "IkdvZCBTYXZlIHRoZSBRdWVlbiIgKGFsdGVybmF0aXZlbHkgIkdvZCBTYXZlIHRoZSBLaW5nIg=="
|
||||
}
|
||||
GET /test/person/_search
|
||||
{
|
||||
"fields": [],
|
||||
"query": {
|
||||
"match": {
|
||||
"file": "king queen"
|
||||
}
|
||||
},
|
||||
"highlight": {
|
||||
"fields": {
|
||||
"file": {
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
It gives back:
|
||||
|
||||
```js
|
||||
{
|
||||
"took": 9,
|
||||
"timed_out": false,
|
||||
"_shards": {
|
||||
"total": 1,
|
||||
"successful": 1,
|
||||
"failed": 0
|
||||
},
|
||||
"hits": {
|
||||
"total": 1,
|
||||
"max_score": 0.13561106,
|
||||
"hits": [
|
||||
{
|
||||
"_index": "test",
|
||||
"_type": "person",
|
||||
"_id": "1",
|
||||
"_score": 0.13561106,
|
||||
"highlight": {
|
||||
"file": [
|
||||
"\"God Save the <em>Queen</em>\" (alternatively \"God Save the <em>King</em>\"\n"
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
|
|
|
@ -19,22 +19,26 @@
|
|||
|
||||
package org.elasticsearch.index.mapper.attachment;
|
||||
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.tika.language.LanguageIdentifier;
|
||||
import org.apache.tika.metadata.Metadata;
|
||||
import org.elasticsearch.common.io.stream.BytesStreamInput;
|
||||
import org.elasticsearch.common.logging.ESLogger;
|
||||
import org.elasticsearch.common.logging.ESLoggerFactory;
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.index.fielddata.FieldDataType;
|
||||
import org.elasticsearch.index.mapper.*;
|
||||
import org.elasticsearch.index.mapper.core.DateFieldMapper;
|
||||
import org.elasticsearch.index.mapper.core.IntegerFieldMapper;
|
||||
import org.elasticsearch.index.mapper.core.StringFieldMapper;
|
||||
import org.elasticsearch.index.mapper.core.AbstractFieldMapper;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.elasticsearch.index.mapper.MapperBuilders.*;
|
||||
import static org.elasticsearch.index.mapper.core.TypeParsers.parseMultiField;
|
||||
import static org.elasticsearch.index.mapper.core.TypeParsers.parsePathType;
|
||||
import static org.elasticsearch.plugin.mapper.attachments.tika.TikaInstance.tika;
|
||||
|
||||
|
@ -57,7 +61,7 @@ import static org.elasticsearch.plugin.mapper.attachments.tika.TikaInstance.tika
|
|||
* _content_length = Specify the maximum amount of characters to extract from the attachment. If not specified, then the default for
|
||||
* tika is 100,000 characters. Caution is required when setting large values as this can cause memory issues.
|
||||
*/
|
||||
public class AttachmentMapper implements Mapper {
|
||||
public class AttachmentMapper extends AbstractFieldMapper<Object> {
|
||||
|
||||
private static ESLogger logger = ESLoggerFactory.getLogger(AttachmentMapper.class.getName());
|
||||
|
||||
|
@ -67,7 +71,18 @@ public class AttachmentMapper implements Mapper {
|
|||
public static final ContentPath.Type PATH_TYPE = ContentPath.Type.FULL;
|
||||
}
|
||||
|
||||
public static class Builder extends Mapper.Builder<Builder, AttachmentMapper> {
|
||||
public static class FieldNames {
|
||||
public static final String TITLE = "title";
|
||||
public static final String NAME = "name";
|
||||
public static final String AUTHOR = "author";
|
||||
public static final String KEYWORDS = "keywords";
|
||||
public static final String DATE = "date";
|
||||
public static final String CONTENT_TYPE = "content_type";
|
||||
public static final String CONTENT_LENGTH = "content_length";
|
||||
public static final String LANGUAGE = "language";
|
||||
}
|
||||
|
||||
public static class Builder extends AbstractFieldMapper.Builder<Builder, AttachmentMapper> {
|
||||
|
||||
private ContentPath.Type pathType = Defaults.PATH_TYPE;
|
||||
|
||||
|
@ -79,24 +94,24 @@ public class AttachmentMapper implements Mapper {
|
|||
|
||||
private Mapper.Builder contentBuilder;
|
||||
|
||||
private Mapper.Builder titleBuilder = stringField("title");
|
||||
private Mapper.Builder titleBuilder = stringField(FieldNames.TITLE);
|
||||
|
||||
private Mapper.Builder nameBuilder = stringField("name");
|
||||
private Mapper.Builder nameBuilder = stringField(FieldNames.NAME);
|
||||
|
||||
private Mapper.Builder authorBuilder = stringField("author");
|
||||
private Mapper.Builder authorBuilder = stringField(FieldNames.AUTHOR);
|
||||
|
||||
private Mapper.Builder keywordsBuilder = stringField("keywords");
|
||||
private Mapper.Builder keywordsBuilder = stringField(FieldNames.KEYWORDS);
|
||||
|
||||
private Mapper.Builder dateBuilder = dateField("date");
|
||||
private Mapper.Builder dateBuilder = dateField(FieldNames.DATE);
|
||||
|
||||
private Mapper.Builder contentTypeBuilder = stringField("content_type");
|
||||
private Mapper.Builder contentTypeBuilder = stringField(FieldNames.CONTENT_TYPE);
|
||||
|
||||
private Mapper.Builder contentLengthBuilder = integerField("content_length");
|
||||
private Mapper.Builder contentLengthBuilder = integerField(FieldNames.CONTENT_LENGTH);
|
||||
|
||||
private Mapper.Builder languageBuilder = stringField("language");
|
||||
private Mapper.Builder languageBuilder = stringField(FieldNames.LANGUAGE);
|
||||
|
||||
public Builder(String name) {
|
||||
super(name);
|
||||
super(name, new FieldType(AbstractFieldMapper.Defaults.FIELD_TYPE));
|
||||
this.builder = this;
|
||||
this.contentBuilder = stringField(name);
|
||||
}
|
||||
|
@ -194,7 +209,9 @@ public class AttachmentMapper implements Mapper {
|
|||
langDetect = Boolean.FALSE;
|
||||
}
|
||||
|
||||
return new AttachmentMapper(name, pathType, defaultIndexedChars, ignoreErrors, langDetect, contentMapper, dateMapper, titleMapper, nameMapper, authorMapper, keywordsMapper, contentTypeMapper, contentLength, language);
|
||||
return new AttachmentMapper(buildNames(context), pathType, defaultIndexedChars, ignoreErrors, langDetect, contentMapper,
|
||||
dateMapper, titleMapper, nameMapper, authorMapper, keywordsMapper, contentTypeMapper, contentLength,
|
||||
language, multiFieldsBuilder.build(this, context), copyTo);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -221,6 +238,20 @@ public class AttachmentMapper implements Mapper {
|
|||
*/
|
||||
public static class TypeParser implements Mapper.TypeParser {
|
||||
|
||||
private Mapper.Builder<?, ?> findMapperBuilder(Map<String, Object> propNode, String propName, ParserContext parserContext) {
|
||||
String type;
|
||||
Object typeNode = propNode.get("type");
|
||||
if (typeNode != null) {
|
||||
type = typeNode.toString();
|
||||
} else {
|
||||
type = "string";
|
||||
}
|
||||
Mapper.TypeParser typeParser = parserContext.typeParser(type);
|
||||
Mapper.Builder<?, ?> mapperBuilder = typeParser.parse(propName, (Map<String, Object>) propNode, parserContext);
|
||||
|
||||
return mapperBuilder;
|
||||
}
|
||||
|
||||
@SuppressWarnings({"unchecked"})
|
||||
@Override
|
||||
public Mapper.Builder parse(String name, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException {
|
||||
|
@ -235,37 +266,41 @@ public class AttachmentMapper implements Mapper {
|
|||
Map<String, Object> fieldsNode = (Map<String, Object>) fieldNode;
|
||||
for (Map.Entry<String, Object> entry1 : fieldsNode.entrySet()) {
|
||||
String propName = entry1.getKey();
|
||||
Object propNode = entry1.getValue();
|
||||
Map<String, Object> propNode = (Map<String, Object>) entry1.getValue();
|
||||
|
||||
boolean isString = false;
|
||||
if (propNode != null && propNode instanceof Map) {
|
||||
Object oType = ((Map<String, Object>) propNode).get("type");
|
||||
if (oType != null && oType.equals(StringFieldMapper.CONTENT_TYPE)) {
|
||||
isString = true;
|
||||
Mapper.Builder<?, ?> mapperBuilder = findMapperBuilder(propNode, propName, parserContext);
|
||||
parseMultiField((AbstractFieldMapper.Builder) mapperBuilder, fieldName, (Map<String, Object>) fieldNode, parserContext, propName, propNode);
|
||||
|
||||
if (propName.equals(name)) {
|
||||
builder.content(mapperBuilder);
|
||||
} else {
|
||||
switch (propName) {
|
||||
case FieldNames.DATE:
|
||||
builder.date(mapperBuilder);
|
||||
break;
|
||||
case FieldNames.AUTHOR:
|
||||
builder.author(mapperBuilder);
|
||||
break;
|
||||
case FieldNames.CONTENT_LENGTH:
|
||||
builder.contentLength(mapperBuilder);
|
||||
break;
|
||||
case FieldNames.CONTENT_TYPE:
|
||||
builder.contentType(mapperBuilder);
|
||||
break;
|
||||
case FieldNames.KEYWORDS:
|
||||
builder.keywords(mapperBuilder);
|
||||
break;
|
||||
case FieldNames.LANGUAGE:
|
||||
builder.language(mapperBuilder);
|
||||
break;
|
||||
case FieldNames.TITLE:
|
||||
builder.title(mapperBuilder);
|
||||
break;
|
||||
case FieldNames.NAME:
|
||||
builder.name(mapperBuilder);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (name.equals(propName)) {
|
||||
// that is the content
|
||||
builder.content(parserContext.typeParser(StringFieldMapper.CONTENT_TYPE).parse(name, (Map<String, Object>) propNode, parserContext));
|
||||
} else if ("date".equals(propName)) {
|
||||
// If a specific format is already defined here, we should use it
|
||||
builder.date(parserContext.typeParser(isString ? StringFieldMapper.CONTENT_TYPE : DateFieldMapper.CONTENT_TYPE).parse("date", (Map<String, Object>) propNode, parserContext));
|
||||
} else if ("title".equals(propName)) {
|
||||
builder.title(parserContext.typeParser(StringFieldMapper.CONTENT_TYPE).parse("title", (Map<String, Object>) propNode, parserContext));
|
||||
} else if ("name".equals(propName)) {
|
||||
builder.name(parserContext.typeParser(StringFieldMapper.CONTENT_TYPE).parse("name", (Map<String, Object>) propNode, parserContext));
|
||||
} else if ("author".equals(propName)) {
|
||||
builder.author(parserContext.typeParser(StringFieldMapper.CONTENT_TYPE).parse("author", (Map<String, Object>) propNode, parserContext));
|
||||
} else if ("keywords".equals(propName)) {
|
||||
builder.keywords(parserContext.typeParser(StringFieldMapper.CONTENT_TYPE).parse("keywords", (Map<String, Object>) propNode, parserContext));
|
||||
} else if ("content_type".equals(propName)) {
|
||||
builder.contentType(parserContext.typeParser(StringFieldMapper.CONTENT_TYPE).parse("content_type", (Map<String, Object>) propNode, parserContext));
|
||||
} else if ("content_length".equals(propName)) {
|
||||
builder.contentLength(parserContext.typeParser(IntegerFieldMapper.CONTENT_TYPE).parse("content_length", (Map<String, Object>) propNode, parserContext));
|
||||
} else if ("language".equals(propName)) {
|
||||
builder.language(parserContext.typeParser(StringFieldMapper.CONTENT_TYPE).parse("language", (Map<String, Object>) propNode, parserContext));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -274,8 +309,6 @@ public class AttachmentMapper implements Mapper {
|
|||
}
|
||||
}
|
||||
|
||||
private final String name;
|
||||
|
||||
private final ContentPath.Type pathType;
|
||||
|
||||
private final int defaultIndexedChars;
|
||||
|
@ -302,10 +335,13 @@ public class AttachmentMapper implements Mapper {
|
|||
|
||||
private final Mapper languageMapper;
|
||||
|
||||
public AttachmentMapper(String name, ContentPath.Type pathType, int defaultIndexedChars, Boolean ignoreErrors, Boolean defaultLangDetect, Mapper contentMapper,
|
||||
public AttachmentMapper(Names names, ContentPath.Type pathType, int defaultIndexedChars, Boolean ignoreErrors,
|
||||
Boolean defaultLangDetect, Mapper contentMapper,
|
||||
Mapper dateMapper, Mapper titleMapper, Mapper nameMapper, Mapper authorMapper,
|
||||
Mapper keywordsMapper, Mapper contentTypeMapper, Mapper contentLengthMapper, Mapper languageMapper) {
|
||||
this.name = name;
|
||||
Mapper keywordsMapper, Mapper contentTypeMapper, Mapper contentLengthMapper,
|
||||
Mapper languageMapper, MultiFields multiFields, CopyTo copyTo) {
|
||||
super(names, 1.0f, AbstractFieldMapper.Defaults.FIELD_TYPE, false, null, null, null, null, null, null, null,
|
||||
ImmutableSettings.EMPTY, multiFields, copyTo);
|
||||
this.pathType = pathType;
|
||||
this.defaultIndexedChars = defaultIndexedChars;
|
||||
this.ignoreErrors = ignoreErrors;
|
||||
|
@ -322,8 +358,18 @@ public class AttachmentMapper implements Mapper {
|
|||
}
|
||||
|
||||
@Override
|
||||
public String name() {
|
||||
return name;
|
||||
public Object value(Object value) {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldType defaultFieldType() {
|
||||
return AbstractFieldMapper.Defaults.FIELD_TYPE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldDataType defaultFieldDataType() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -393,7 +439,7 @@ public class AttachmentMapper implements Mapper {
|
|||
return;
|
||||
}
|
||||
|
||||
context.externalValue(parsedContent);
|
||||
context = context.createExternalValueContext(parsedContent);
|
||||
contentMapper.parse(context);
|
||||
|
||||
if (langDetect) {
|
||||
|
@ -404,78 +450,99 @@ public class AttachmentMapper implements Mapper {
|
|||
LanguageIdentifier identifier = new LanguageIdentifier(parsedContent);
|
||||
language = identifier.getLanguage();
|
||||
}
|
||||
context.externalValue(language);
|
||||
context = context.createExternalValueContext(language);
|
||||
languageMapper.parse(context);
|
||||
} catch(Throwable t) {
|
||||
logger.warn("Cannot detect language: {}", t.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
context.externalValue(name);
|
||||
nameMapper.parse(context);
|
||||
} catch(MapperParsingException e){
|
||||
if (!ignoreErrors) throw e;
|
||||
if (logger.isDebugEnabled()) logger.debug("Ignoring MapperParsingException catch while parsing name: {}", e.getMessage());
|
||||
}
|
||||
|
||||
try {
|
||||
context.externalValue(metadata.get(Metadata.DATE));
|
||||
dateMapper.parse(context);
|
||||
} catch(MapperParsingException e){
|
||||
if (!ignoreErrors) throw e;
|
||||
if (logger.isDebugEnabled()) logger.debug("Ignoring MapperParsingException catch while parsing date: {}: {}", e.getMessage(), context.externalValue());
|
||||
}
|
||||
|
||||
try {
|
||||
context.externalValue(metadata.get(Metadata.TITLE));
|
||||
titleMapper.parse(context);
|
||||
} catch(MapperParsingException e){
|
||||
if (!ignoreErrors) throw e;
|
||||
if (logger.isDebugEnabled()) logger.debug("Ignoring MapperParsingException catch while parsing title: {}: {}", e.getMessage(), context.externalValue());
|
||||
}
|
||||
|
||||
try {
|
||||
context.externalValue(metadata.get(Metadata.AUTHOR));
|
||||
authorMapper.parse(context);
|
||||
} catch(MapperParsingException e){
|
||||
if (!ignoreErrors) throw e;
|
||||
if (logger.isDebugEnabled()) logger.debug("Ignoring MapperParsingException catch while parsing author: {}: {}", e.getMessage(), context.externalValue());
|
||||
}
|
||||
|
||||
try {
|
||||
context.externalValue(metadata.get(Metadata.KEYWORDS));
|
||||
keywordsMapper.parse(context);
|
||||
} catch(MapperParsingException e){
|
||||
if (!ignoreErrors) throw e;
|
||||
if (logger.isDebugEnabled()) logger.debug("Ignoring MapperParsingException catch while parsing keywords: {}: {}", e.getMessage(), context.externalValue());
|
||||
}
|
||||
|
||||
try {
|
||||
if (contentType != null) {
|
||||
context.externalValue(contentType);
|
||||
} else {
|
||||
context.externalValue(metadata.get(Metadata.CONTENT_TYPE));
|
||||
if (name != null) {
|
||||
try {
|
||||
context = context.createExternalValueContext(name);
|
||||
nameMapper.parse(context);
|
||||
} catch(MapperParsingException e){
|
||||
if (!ignoreErrors) throw e;
|
||||
if (logger.isDebugEnabled()) logger.debug("Ignoring MapperParsingException catch while parsing name: {}", e.getMessage());
|
||||
}
|
||||
contentTypeMapper.parse(context);
|
||||
} catch(MapperParsingException e){
|
||||
if (!ignoreErrors) throw e;
|
||||
if (logger.isDebugEnabled()) logger.debug("Ignoring MapperParsingException catch while parsing content_type: {}: {}", e.getMessage(), context.externalValue());
|
||||
}
|
||||
|
||||
if (metadata.get(Metadata.DATE) != null) {
|
||||
try {
|
||||
context = context.createExternalValueContext(metadata.get(Metadata.DATE));
|
||||
dateMapper.parse(context);
|
||||
} catch(MapperParsingException e){
|
||||
if (!ignoreErrors) throw e;
|
||||
if (logger.isDebugEnabled()) logger.debug("Ignoring MapperParsingException catch while parsing date: {}: {}", e.getMessage(), context.externalValue());
|
||||
}
|
||||
}
|
||||
|
||||
if (metadata.get(Metadata.TITLE) != null) {
|
||||
try {
|
||||
context = context.createExternalValueContext(metadata.get(Metadata.TITLE));
|
||||
titleMapper.parse(context);
|
||||
} catch(MapperParsingException e){
|
||||
if (!ignoreErrors) throw e;
|
||||
if (logger.isDebugEnabled()) logger.debug("Ignoring MapperParsingException catch while parsing title: {}: {}", e.getMessage(), context.externalValue());
|
||||
}
|
||||
}
|
||||
|
||||
if (metadata.get(Metadata.AUTHOR) != null) {
|
||||
try {
|
||||
context = context.createExternalValueContext(metadata.get(Metadata.AUTHOR));
|
||||
authorMapper.parse(context);
|
||||
} catch(MapperParsingException e){
|
||||
if (!ignoreErrors) throw e;
|
||||
if (logger.isDebugEnabled()) logger.debug("Ignoring MapperParsingException catch while parsing author: {}: {}", e.getMessage(), context.externalValue());
|
||||
}
|
||||
}
|
||||
|
||||
if (metadata.get(Metadata.KEYWORDS) != null) {
|
||||
try {
|
||||
context = context.createExternalValueContext(metadata.get(Metadata.KEYWORDS));
|
||||
keywordsMapper.parse(context);
|
||||
} catch(MapperParsingException e){
|
||||
if (!ignoreErrors) throw e;
|
||||
if (logger.isDebugEnabled()) logger.debug("Ignoring MapperParsingException catch while parsing keywords: {}: {}", e.getMessage(), context.externalValue());
|
||||
}
|
||||
}
|
||||
|
||||
if (contentType == null) {
|
||||
contentType = metadata.get(Metadata.CONTENT_TYPE);
|
||||
}
|
||||
if (contentType != null) {
|
||||
try {
|
||||
context = context.createExternalValueContext(contentType);
|
||||
contentTypeMapper.parse(context);
|
||||
} catch(MapperParsingException e){
|
||||
if (!ignoreErrors) throw e;
|
||||
if (logger.isDebugEnabled()) logger.debug("Ignoring MapperParsingException catch while parsing content_type: {}: {}", e.getMessage(), context.externalValue());
|
||||
}
|
||||
}
|
||||
|
||||
int length = content.length;
|
||||
// If we have CONTENT_LENGTH from Tika we use it
|
||||
if (metadata.get(Metadata.CONTENT_LENGTH) != null) {
|
||||
length = Integer.parseInt(metadata.get(Metadata.CONTENT_LENGTH));
|
||||
}
|
||||
|
||||
try {
|
||||
if (metadata.get(Metadata.CONTENT_LENGTH) != null) {
|
||||
// We try to get CONTENT_LENGTH from Tika first
|
||||
context.externalValue(metadata.get(Metadata.CONTENT_LENGTH));
|
||||
} else {
|
||||
// Otherwise, we use our byte[] length
|
||||
context.externalValue(content.length);
|
||||
}
|
||||
context = context.createExternalValueContext(length);
|
||||
contentLengthMapper.parse(context);
|
||||
} catch(MapperParsingException e){
|
||||
if (!ignoreErrors) throw e;
|
||||
if (logger.isDebugEnabled()) logger.debug("Ignoring MapperParsingException catch while parsing content_length: {}: {}", e.getMessage(), context.externalValue());
|
||||
}
|
||||
|
||||
// multiFields.parse(this, context);
|
||||
if (copyTo != null) {
|
||||
copyTo.parse(context);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void parseCreateField(ParseContext parseContext, List<Field> fields) throws IOException {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -515,7 +582,7 @@ public class AttachmentMapper implements Mapper {
|
|||
|
||||
@Override
|
||||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.startObject(name);
|
||||
builder.startObject(name());
|
||||
builder.field("type", CONTENT_TYPE);
|
||||
builder.field("path", pathType.name().toLowerCase());
|
||||
|
||||
|
@ -529,9 +596,16 @@ public class AttachmentMapper implements Mapper {
|
|||
contentTypeMapper.toXContent(builder, params);
|
||||
contentLengthMapper.toXContent(builder, params);
|
||||
languageMapper.toXContent(builder, params);
|
||||
multiFields.toXContent(builder, params);
|
||||
builder.endObject();
|
||||
|
||||
multiFields.toXContent(builder, params);
|
||||
builder.endObject();
|
||||
return builder;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected String contentType() {
|
||||
return CONTENT_TYPE;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,82 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.mapper.xcontent;
|
||||
|
||||
import org.elasticsearch.common.inject.Injector;
|
||||
import org.elasticsearch.common.inject.ModulesBuilder;
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.settings.SettingsModule;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.env.EnvironmentModule;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.IndexNameModule;
|
||||
import org.elasticsearch.index.analysis.AnalysisModule;
|
||||
import org.elasticsearch.index.analysis.AnalysisService;
|
||||
import org.elasticsearch.index.codec.docvaluesformat.DocValuesFormatService;
|
||||
import org.elasticsearch.index.codec.postingsformat.PostingsFormatService;
|
||||
import org.elasticsearch.index.fielddata.IndexFieldDataService;
|
||||
import org.elasticsearch.index.mapper.DocumentMapperParser;
|
||||
import org.elasticsearch.index.mapper.MapperService;
|
||||
import org.elasticsearch.index.settings.IndexSettingsModule;
|
||||
import org.elasticsearch.index.similarity.SimilarityLookupService;
|
||||
import org.elasticsearch.indices.analysis.IndicesAnalysisModule;
|
||||
import org.elasticsearch.indices.analysis.IndicesAnalysisService;
|
||||
import org.elasticsearch.indices.fielddata.breaker.NoneCircuitBreakerService;
|
||||
|
||||
public class MapperTestUtils {
|
||||
|
||||
public static DocumentMapperParser newParser() {
|
||||
return new DocumentMapperParser(new Index("test"), ImmutableSettings.Builder.EMPTY_SETTINGS, newAnalysisService(), new PostingsFormatService(new Index("test")),
|
||||
new DocValuesFormatService(new Index("test")), newSimilarityLookupService(), null);
|
||||
}
|
||||
|
||||
public static DocumentMapperParser newParser(Settings indexSettings) {
|
||||
return new DocumentMapperParser(new Index("test"), indexSettings, newAnalysisService(indexSettings), new PostingsFormatService(new Index("test")),
|
||||
new DocValuesFormatService(new Index("test")), newSimilarityLookupService(), null);
|
||||
}
|
||||
|
||||
public static MapperService newMapperService() {
|
||||
return newMapperService(new Index("test"), ImmutableSettings.Builder.EMPTY_SETTINGS);
|
||||
}
|
||||
|
||||
public static MapperService newMapperService(Index index, Settings indexSettings) {
|
||||
return new MapperService(index, indexSettings, new Environment(), newAnalysisService(), new IndexFieldDataService(index, new NoneCircuitBreakerService()),
|
||||
new PostingsFormatService(index), new DocValuesFormatService(index), newSimilarityLookupService(), null);
|
||||
}
|
||||
|
||||
public static AnalysisService newAnalysisService() {
|
||||
return newAnalysisService(ImmutableSettings.Builder.EMPTY_SETTINGS);
|
||||
}
|
||||
|
||||
public static AnalysisService newAnalysisService(Settings indexSettings) {
|
||||
Injector parentInjector = new ModulesBuilder().add(new SettingsModule(indexSettings), new EnvironmentModule(new Environment(ImmutableSettings.Builder.EMPTY_SETTINGS)), new IndicesAnalysisModule()).createInjector();
|
||||
Injector injector = new ModulesBuilder().add(
|
||||
new IndexSettingsModule(new Index("test"), indexSettings),
|
||||
new IndexNameModule(new Index("test")),
|
||||
new AnalysisModule(indexSettings, parentInjector.getInstance(IndicesAnalysisService.class))).createChildInjector(parentInjector);
|
||||
|
||||
return injector.getInstance(AnalysisService.class);
|
||||
}
|
||||
|
||||
public static SimilarityLookupService newSimilarityLookupService() {
|
||||
return new SimilarityLookupService(new Index("test"), ImmutableSettings.Builder.EMPTY_SETTINGS);
|
||||
}
|
||||
}
|
|
@ -19,11 +19,15 @@
|
|||
|
||||
package org.elasticsearch.index.mapper.xcontent;
|
||||
|
||||
import org.elasticsearch.common.Base64;
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.analysis.AnalysisService;
|
||||
import org.elasticsearch.index.mapper.DocumentMapper;
|
||||
import org.elasticsearch.index.mapper.DocumentMapperParser;
|
||||
import org.elasticsearch.index.mapper.MapperService;
|
||||
import org.elasticsearch.index.mapper.ParsedDocument;
|
||||
import org.elasticsearch.index.mapper.attachment.AttachmentMapper;
|
||||
import org.elasticsearch.index.mapper.core.DateFieldMapper;
|
||||
import org.elasticsearch.index.mapper.core.StringFieldMapper;
|
||||
|
@ -32,7 +36,7 @@ import org.junit.Before;
|
|||
import org.junit.Test;
|
||||
|
||||
import static org.elasticsearch.common.io.Streams.copyToStringFromClasspath;
|
||||
import static org.hamcrest.Matchers.instanceOf;
|
||||
import static org.hamcrest.Matchers.*;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -74,4 +78,71 @@ public class MultifieldAttachmentMapperTests extends ElasticsearchTestCase {
|
|||
assertThat(docMapper.mappers().fullName("file.content_type").mapper(), instanceOf(StringFieldMapper.class));
|
||||
assertThat(docMapper.mappers().fullName("file.content_type.suggest").mapper(), instanceOf(StringFieldMapper.class));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testExternalValues() throws Exception {
|
||||
String originalText = "This is an elasticsearch mapper attachment test.";
|
||||
String contentType = "text/plain; charset=ISO-8859-1";
|
||||
String forcedName = "dummyname.txt";
|
||||
|
||||
String bytes = Base64.encodeBytes(originalText.getBytes());
|
||||
|
||||
MapperService mapperService = MapperTestUtils.newMapperService();
|
||||
mapperService.documentMapperParser().putTypeParser(AttachmentMapper.CONTENT_TYPE, new AttachmentMapper.TypeParser());
|
||||
|
||||
String mapping = copyToStringFromClasspath("/org/elasticsearch/index/mapper/multifield/multifield-mapping.json");
|
||||
|
||||
DocumentMapper documentMapper = mapperService.documentMapperParser().parse(mapping);
|
||||
|
||||
ParsedDocument doc = documentMapper.parse("person", "1", XContentFactory.jsonBuilder()
|
||||
.startObject()
|
||||
.field("file", bytes)
|
||||
.endObject()
|
||||
.bytes());
|
||||
|
||||
assertThat(doc.rootDoc().getField("file"), notNullValue());
|
||||
assertThat(doc.rootDoc().getField("file").stringValue(), is(originalText + "\n"));
|
||||
|
||||
assertThat(doc.rootDoc().getField("file.content_type"), notNullValue());
|
||||
assertThat(doc.rootDoc().getField("file.content_type").stringValue(), is(contentType));
|
||||
assertThat(doc.rootDoc().getField("file.content_type.suggest"), notNullValue());
|
||||
assertThat(doc.rootDoc().getField("file.content_type.suggest").stringValue(), is(contentType));
|
||||
assertThat(doc.rootDoc().getField("file.content_length"), notNullValue());
|
||||
assertThat(doc.rootDoc().getField("file.content_length").numericValue().intValue(), is(originalText.length()));
|
||||
|
||||
assertThat(doc.rootDoc().getField("file.suggest"), notNullValue());
|
||||
assertThat(doc.rootDoc().getField("file.suggest").stringValue(), is(originalText + "\n"));
|
||||
|
||||
// Let's force some values
|
||||
doc = documentMapper.parse("person", "1", XContentFactory.jsonBuilder()
|
||||
.startObject()
|
||||
.startObject("file")
|
||||
.field("content", bytes)
|
||||
.field("_name", forcedName)
|
||||
.endObject()
|
||||
.endObject()
|
||||
.bytes());
|
||||
|
||||
assertThat(doc.rootDoc().getField("file"), notNullValue());
|
||||
assertThat(doc.rootDoc().getField("file").stringValue(), is(originalText + "\n"));
|
||||
|
||||
assertThat(doc.rootDoc().getField("file.content_type"), notNullValue());
|
||||
assertThat(doc.rootDoc().getField("file.content_type").stringValue(), is(contentType));
|
||||
assertThat(doc.rootDoc().getField("file.content_type.suggest"), notNullValue());
|
||||
assertThat(doc.rootDoc().getField("file.content_type.suggest").stringValue(), is(contentType));
|
||||
assertThat(doc.rootDoc().getField("file.content_length"), notNullValue());
|
||||
assertThat(doc.rootDoc().getField("file.content_length").numericValue().intValue(), is(originalText.length()));
|
||||
|
||||
assertThat(doc.rootDoc().getField("file.suggest"), notNullValue());
|
||||
assertThat(doc.rootDoc().getField("file.suggest").stringValue(), is(originalText + "\n"));
|
||||
|
||||
assertThat(doc.rootDoc().getField("file.name"), notNullValue());
|
||||
assertThat(doc.rootDoc().getField("file.name").stringValue(), is(forcedName));
|
||||
// In mapping we have default store:false
|
||||
assertThat(doc.rootDoc().getField("file.name").fieldType().stored(), is(false));
|
||||
assertThat(doc.rootDoc().getField("file.name.suggest"), notNullValue());
|
||||
assertThat(doc.rootDoc().getField("file.name.suggest").stringValue(), is(forcedName));
|
||||
// In mapping we set store:true for suggest subfield
|
||||
assertThat(doc.rootDoc().getField("file.name.suggest").fieldType().stored(), is(true));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -20,8 +20,8 @@
|
|||
package org.elasticsearch.plugin.mapper.attachments.test;
|
||||
|
||||
import org.elasticsearch.action.count.CountResponse;
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.action.search.SearchResponse;
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.mapper.MapperParsingException;
|
||||
import org.elasticsearch.plugins.PluginsService;
|
||||
|
@ -32,7 +32,6 @@ import org.junit.Test;
|
|||
import static org.elasticsearch.client.Requests.putMappingRequest;
|
||||
import static org.elasticsearch.common.io.Streams.copyToBytesFromClasspath;
|
||||
import static org.elasticsearch.common.io.Streams.copyToStringFromClasspath;
|
||||
import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
|
||||
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
||||
import static org.elasticsearch.index.query.QueryBuilders.queryString;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
|
@ -58,13 +57,6 @@ public class SimpleAttachmentIntegrationTests extends ElasticsearchIntegrationTe
|
|||
createIndex("test");
|
||||
}
|
||||
|
||||
@Override
|
||||
public Settings indexSettings() {
|
||||
return settingsBuilder()
|
||||
.put("index.numberOfReplicas", 0)
|
||||
.build();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimpleAttachment() throws Exception {
|
||||
String mapping = copyToStringFromClasspath("/org/elasticsearch/index/mapper/xcontent/test-mapping.json");
|
||||
|
|
|
@ -26,7 +26,10 @@
|
|||
"name": {
|
||||
"type": "string",
|
||||
"fields": {
|
||||
"suggest": { "type": "string" }
|
||||
"suggest": {
|
||||
"type": "string",
|
||||
"store": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"author": {
|
||||
|
|
Loading…
Reference in New Issue