Add support for multi-fields

Now https://github.com/elasticsearch/elasticsearch/pull/6867 is merged in elasticsearch core code (branch 1.x - es 1.4),
we can support multi fields in mapper attachment plugin.

```
DELETE /test
PUT /test
{
  "settings": {
    "number_of_shards": 1
  }
}
PUT /test/person/_mapping
{
  "person": {
    "properties": {
      "file": {
        "type": "attachment",
        "path": "full",
        "fields": {
          "file": {
            "type": "string",
            "fields": {
              "store": {
                "type": "string",
                "store": true
              }
            }
          },
          "content_type": {
            "type": "string",
            "fields": {
              "store": {
                "type": "string",
                "store": true
              },
              "untouched": {
                "type": "string",
                "index": "not_analyzed",
                "store": true
              }
            }
          }
        }
      }
    }
  }
}

PUT /test/person/1?refresh=true
{
  "file": "IkdvZCBTYXZlIHRoZSBRdWVlbiIgKGFsdGVybmF0aXZlbHkgIkdvZCBTYXZlIHRoZSBLaW5nIg=="
}

GET /test/person/_search
{
  "fields": [
    "file.store",
    "file.content_type.store"
  ],
  "aggs": {
    "store": {
      "terms": {
        "field": "file.content_type.store"
      }
    },
    "untouched": {
      "terms": {
        "field": "file.content_type.untouched"
      }
    }
  }
}
```

It gives:

```js
{
   "took": 3,
   "timed_out": false,
   "_shards": {
      "total": 1,
      "successful": 1,
      "failed": 0
   },
   "hits": {
      "total": 1,
      "max_score": 1,
      "hits": [
         {
            "_index": "test",
            "_type": "person",
            "_id": "1",
            "_score": 1,
            "fields": {
               "file.store": [
                  "\"God Save the Queen\" (alternatively \"God Save the King\"\n"
               ],
               "file.content_type.store": [
                  "text/plain; charset=ISO-8859-1"
               ]
            }
         }
      ]
   },
   "aggregations": {
      "store": {
         "doc_count_error_upper_bound": 0,
         "buckets": [
            {
               "key": "1",
               "doc_count": 1
            },
            {
               "key": "8859",
               "doc_count": 1
            },
            {
               "key": "charset",
               "doc_count": 1
            },
            {
               "key": "iso",
               "doc_count": 1
            },
            {
               "key": "plain",
               "doc_count": 1
            },
            {
               "key": "text",
               "doc_count": 1
            }
         ]
      },
      "untouched": {
         "doc_count_error_upper_bound": 0,
         "buckets": [
            {
               "key": "text/plain; charset=ISO-8859-1",
               "doc_count": 1
            }
         ]
      }
   }
}
```

Note that using shorter definition works as well:

```
DELETE /test
PUT /test
{
  "settings": {
    "number_of_shards": 1
  }
}
PUT /test/person/_mapping
{
  "person": {
    "properties": {
      "file": {
        "type": "attachment"
      }
    }
  }
}
PUT /test/person/1?refresh=true
{
  "file": "IkdvZCBTYXZlIHRoZSBRdWVlbiIgKGFsdGVybmF0aXZlbHkgIkdvZCBTYXZlIHRoZSBLaW5nIg=="
}

GET /test/person/_search
{
  "query": {
    "match": {
      "file": "king"
    }
  }
}
```

gives:

```js
{
   "took": 53,
   "timed_out": false,
   "_shards": {
      "total": 1,
      "successful": 1,
      "failed": 0
   },
   "hits": {
      "total": 1,
      "max_score": 0.095891505,
      "hits": [
         {
            "_index": "test",
            "_type": "person",
            "_id": "1",
            "_score": 0.095891505,
            "_source": {
               "file": "IkdvZCBTYXZlIHRoZSBRdWVlbiIgKGFsdGVybmF0aXZlbHkgIkdvZCBTYXZlIHRoZSBLaW5nIg=="
            }
         }
      ]
   }
}
```

Closes #57.

(cherry picked from commit 432d7c0)
This commit is contained in:
David Pilato 2014-07-26 00:03:28 +02:00
parent 663d4eaddb
commit ad986eb2fc
6 changed files with 510 additions and 137 deletions

185
README.md
View File

@ -25,11 +25,14 @@ Please read documentation relative to the version you are using:
The `attachment` type allows to index different "attachment" type field (encoded as `base64`), for example,
microsoft office formats, open document formats, ePub, HTML, and so on (full list can be found [here](http://tika.apache.org/1.5/formats.html)).
The `attachment` type is provided as a plugin extension. The plugin is a simple zip file that can be downloaded and placed under `$ES_HOME/plugins` location. It will be automatically detected and the `attachment` type will be added.
The `attachment` type is provided as a plugin extension. The plugin is a simple zip file that can be downloaded and
placed under `$ES_HOME/plugins/mapper-attachments` location. When the node will start, it will be automatically detected
and the `attachment` type will be added.
Using the attachment type is simple, in your mapping JSON, simply set a certain JSON element as attachment, for example:
```javascript
PUT /test/person/_mapping
{
"person" : {
"properties" : {
@ -42,6 +45,7 @@ Using the attachment type is simple, in your mapping JSON, simply set a certain
In this case, the JSON to index can be:
```javascript
PUT /test/person/1
{
"my_attachment" : "... base64 encoded attachment ..."
}
@ -49,7 +53,8 @@ In this case, the JSON to index can be:
Or it is possible to use more elaborated JSON if content type, resource name or language need to be set explicitly:
```javascript
```
PUT /test/person/1
{
"my_attachment" : {
"_content_type" : "application/pdf",
@ -60,7 +65,8 @@ Or it is possible to use more elaborated JSON if content type, resource name or
}
```
The `attachment` type not only indexes the content of the doc, but also automatically adds meta data on the attachment as well (when available).
The `attachment` type not only indexes the content of the doc, but also automatically adds meta data on the attachment
as well (when available).
The metadata supported are:
@ -75,9 +81,11 @@ The metadata supported are:
They can be queried using the "dot notation", for example: `my_attachment.author`.
Both the meta data and the actual content are simple core type mappers (string, date, ...), thus, they can be controlled in the mappings. For example:
Both the meta data and the actual content are simple core type mappers (string, date, ...), thus, they can be controlled
in the mappings. For example:
```javascript
PUT /test/person/_mapping
{
"person" : {
"properties" : {
@ -99,12 +107,98 @@ Both the meta data and the actual content are simple core type mappers (string,
}
```
In the above example, the actual content indexed is mapped under `fields` name `file`, and we decide not to index it, so it will only be available in the `_all` field. The other fields map to their respective metadata names, but there is no need to specify the `type` (like `string` or `date`) since it is already known.
In the above example, the actual content indexed is mapped under `fields` name `file`, and we decide not to index it, so
it will only be available in the `_all` field. The other fields map to their respective metadata names, but there is no
need to specify the `type` (like `string` or `date`) since it is already known.
Querying or accessing metadata
------------------------------
If you need to query on metadata fields, use the attachment field name dot the metadata field. For example:
```
DELETE /test
PUT /test
PUT /test/person/_mapping
{
"person": {
"properties": {
"file": {
"type": "attachment",
"path": "full",
"fields": {
"content_type": {
"type": "string",
"store": true
}
}
}
}
}
}
PUT /test/person/1?refresh=true
{
"file": "IkdvZCBTYXZlIHRoZSBRdWVlbiIgKGFsdGVybmF0aXZlbHkgIkdvZCBTYXZlIHRoZSBLaW5nIg=="
}
GET /test/person/_search
{
"fields": [ "file.content_type" ],
"query": {
"match": {
"file.content_type": "text plain"
}
}
}
```
Will give you:
```
{
"took": 2,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.16273327,
"hits": [
{
"_index": "test",
"_type": "person",
"_id": "1",
"_score": 0.16273327,
"fields": {
"file.content_type": [
"text/plain; charset=ISO-8859-1"
]
}
}
]
}
}
```
Indexed Characters
------------------
By default, `100000` characters are extracted when indexing the content. This default value can be changed by setting the `index.mapping.attachment.indexed_chars` setting. It can also be provided on a per document indexed using the `_indexed_chars` parameter. `-1` can be set to extract all text, but note that all the text needs to be allowed to be represented in memory.
By default, `100000` characters are extracted when indexing the content. This default value can be changed by setting
the `index.mapping.attachment.indexed_chars` setting. It can also be provided on a per document indexed using the
`_indexed_chars` parameter. `-1` can be set to extract all text, but note that all the text needs to be allowed to be
represented in memory:
```
PUT /test/person/1
{
"my_attachment" : {
"_indexed_chars" : -1,
"_content" : "... base64 encoded attachment ..."
}
}
```
Metadata parsing error handling
-------------------------------
@ -135,22 +229,79 @@ Note that you can force language using `_language` field when sending your actua
Highlighting attachments
------------------------
If you want to highlight your attachment content, you will need to store your file content and set `term_vector` as follow:
If you want to highlight your attachment content, you will need to set `"store": true` and `"term_vector":"with_positions_offsets"`
for your attachment field. Here is a full script which does it:
```
PUT test/my_type/_mapping
DELETE /test
PUT /test
PUT /test/person/_mapping
{
"my_type" : {
"properties" : {
"my_html_file" : {
"type" : "attachment",
"fields" : {
"title" : { "store" : "yes" },
"my_html_file" : { "term_vector":"with_positions_offsets", "store":"yes" }
}
}
"person": {
"properties": {
"file": {
"type": "attachment",
"path": "full",
"fields": {
"file": {
"type": "string",
"term_vector":"with_positions_offsets",
"store": true
}
}
}
}
}
}
PUT /test/person/1?refresh=true
{
"file": "IkdvZCBTYXZlIHRoZSBRdWVlbiIgKGFsdGVybmF0aXZlbHkgIkdvZCBTYXZlIHRoZSBLaW5nIg=="
}
GET /test/person/_search
{
"fields": [],
"query": {
"match": {
"file": "king queen"
}
},
"highlight": {
"fields": {
"file": {
}
}
}
}
```
It gives back:
```js
{
"took": 9,
"timed_out": false,
"_shards": {
"total": 1,
"successful": 1,
"failed": 0
},
"hits": {
"total": 1,
"max_score": 0.13561106,
"hits": [
{
"_index": "test",
"_type": "person",
"_id": "1",
"_score": 0.13561106,
"highlight": {
"file": [
"\"God Save the <em>Queen</em>\" (alternatively \"God Save the <em>King</em>\"\n"
]
}
}
]
}
}
```

View File

@ -19,22 +19,26 @@
package org.elasticsearch.index.mapper.attachment;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.tika.language.LanguageIdentifier;
import org.apache.tika.metadata.Metadata;
import org.elasticsearch.common.io.stream.BytesStreamInput;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.logging.ESLoggerFactory;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.fielddata.FieldDataType;
import org.elasticsearch.index.mapper.*;
import org.elasticsearch.index.mapper.core.DateFieldMapper;
import org.elasticsearch.index.mapper.core.IntegerFieldMapper;
import org.elasticsearch.index.mapper.core.StringFieldMapper;
import org.elasticsearch.index.mapper.core.AbstractFieldMapper;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import static org.elasticsearch.index.mapper.MapperBuilders.*;
import static org.elasticsearch.index.mapper.core.TypeParsers.parseMultiField;
import static org.elasticsearch.index.mapper.core.TypeParsers.parsePathType;
import static org.elasticsearch.plugin.mapper.attachments.tika.TikaInstance.tika;
@ -57,7 +61,7 @@ import static org.elasticsearch.plugin.mapper.attachments.tika.TikaInstance.tika
* _content_length = Specify the maximum amount of characters to extract from the attachment. If not specified, then the default for
* tika is 100,000 characters. Caution is required when setting large values as this can cause memory issues.
*/
public class AttachmentMapper implements Mapper {
public class AttachmentMapper extends AbstractFieldMapper<Object> {
private static ESLogger logger = ESLoggerFactory.getLogger(AttachmentMapper.class.getName());
@ -67,7 +71,18 @@ public class AttachmentMapper implements Mapper {
public static final ContentPath.Type PATH_TYPE = ContentPath.Type.FULL;
}
public static class Builder extends Mapper.Builder<Builder, AttachmentMapper> {
public static class FieldNames {
public static final String TITLE = "title";
public static final String NAME = "name";
public static final String AUTHOR = "author";
public static final String KEYWORDS = "keywords";
public static final String DATE = "date";
public static final String CONTENT_TYPE = "content_type";
public static final String CONTENT_LENGTH = "content_length";
public static final String LANGUAGE = "language";
}
public static class Builder extends AbstractFieldMapper.Builder<Builder, AttachmentMapper> {
private ContentPath.Type pathType = Defaults.PATH_TYPE;
@ -79,24 +94,24 @@ public class AttachmentMapper implements Mapper {
private Mapper.Builder contentBuilder;
private Mapper.Builder titleBuilder = stringField("title");
private Mapper.Builder titleBuilder = stringField(FieldNames.TITLE);
private Mapper.Builder nameBuilder = stringField("name");
private Mapper.Builder nameBuilder = stringField(FieldNames.NAME);
private Mapper.Builder authorBuilder = stringField("author");
private Mapper.Builder authorBuilder = stringField(FieldNames.AUTHOR);
private Mapper.Builder keywordsBuilder = stringField("keywords");
private Mapper.Builder keywordsBuilder = stringField(FieldNames.KEYWORDS);
private Mapper.Builder dateBuilder = dateField("date");
private Mapper.Builder dateBuilder = dateField(FieldNames.DATE);
private Mapper.Builder contentTypeBuilder = stringField("content_type");
private Mapper.Builder contentTypeBuilder = stringField(FieldNames.CONTENT_TYPE);
private Mapper.Builder contentLengthBuilder = integerField("content_length");
private Mapper.Builder contentLengthBuilder = integerField(FieldNames.CONTENT_LENGTH);
private Mapper.Builder languageBuilder = stringField("language");
private Mapper.Builder languageBuilder = stringField(FieldNames.LANGUAGE);
public Builder(String name) {
super(name);
super(name, new FieldType(AbstractFieldMapper.Defaults.FIELD_TYPE));
this.builder = this;
this.contentBuilder = stringField(name);
}
@ -194,7 +209,9 @@ public class AttachmentMapper implements Mapper {
langDetect = Boolean.FALSE;
}
return new AttachmentMapper(name, pathType, defaultIndexedChars, ignoreErrors, langDetect, contentMapper, dateMapper, titleMapper, nameMapper, authorMapper, keywordsMapper, contentTypeMapper, contentLength, language);
return new AttachmentMapper(buildNames(context), pathType, defaultIndexedChars, ignoreErrors, langDetect, contentMapper,
dateMapper, titleMapper, nameMapper, authorMapper, keywordsMapper, contentTypeMapper, contentLength,
language, multiFieldsBuilder.build(this, context), copyTo);
}
}
@ -221,6 +238,20 @@ public class AttachmentMapper implements Mapper {
*/
public static class TypeParser implements Mapper.TypeParser {
private Mapper.Builder<?, ?> findMapperBuilder(Map<String, Object> propNode, String propName, ParserContext parserContext) {
String type;
Object typeNode = propNode.get("type");
if (typeNode != null) {
type = typeNode.toString();
} else {
type = "string";
}
Mapper.TypeParser typeParser = parserContext.typeParser(type);
Mapper.Builder<?, ?> mapperBuilder = typeParser.parse(propName, (Map<String, Object>) propNode, parserContext);
return mapperBuilder;
}
@SuppressWarnings({"unchecked"})
@Override
public Mapper.Builder parse(String name, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException {
@ -235,37 +266,41 @@ public class AttachmentMapper implements Mapper {
Map<String, Object> fieldsNode = (Map<String, Object>) fieldNode;
for (Map.Entry<String, Object> entry1 : fieldsNode.entrySet()) {
String propName = entry1.getKey();
Object propNode = entry1.getValue();
Map<String, Object> propNode = (Map<String, Object>) entry1.getValue();
boolean isString = false;
if (propNode != null && propNode instanceof Map) {
Object oType = ((Map<String, Object>) propNode).get("type");
if (oType != null && oType.equals(StringFieldMapper.CONTENT_TYPE)) {
isString = true;
Mapper.Builder<?, ?> mapperBuilder = findMapperBuilder(propNode, propName, parserContext);
parseMultiField((AbstractFieldMapper.Builder) mapperBuilder, fieldName, (Map<String, Object>) fieldNode, parserContext, propName, propNode);
if (propName.equals(name)) {
builder.content(mapperBuilder);
} else {
switch (propName) {
case FieldNames.DATE:
builder.date(mapperBuilder);
break;
case FieldNames.AUTHOR:
builder.author(mapperBuilder);
break;
case FieldNames.CONTENT_LENGTH:
builder.contentLength(mapperBuilder);
break;
case FieldNames.CONTENT_TYPE:
builder.contentType(mapperBuilder);
break;
case FieldNames.KEYWORDS:
builder.keywords(mapperBuilder);
break;
case FieldNames.LANGUAGE:
builder.language(mapperBuilder);
break;
case FieldNames.TITLE:
builder.title(mapperBuilder);
break;
case FieldNames.NAME:
builder.name(mapperBuilder);
break;
}
}
if (name.equals(propName)) {
// that is the content
builder.content(parserContext.typeParser(StringFieldMapper.CONTENT_TYPE).parse(name, (Map<String, Object>) propNode, parserContext));
} else if ("date".equals(propName)) {
// If a specific format is already defined here, we should use it
builder.date(parserContext.typeParser(isString ? StringFieldMapper.CONTENT_TYPE : DateFieldMapper.CONTENT_TYPE).parse("date", (Map<String, Object>) propNode, parserContext));
} else if ("title".equals(propName)) {
builder.title(parserContext.typeParser(StringFieldMapper.CONTENT_TYPE).parse("title", (Map<String, Object>) propNode, parserContext));
} else if ("name".equals(propName)) {
builder.name(parserContext.typeParser(StringFieldMapper.CONTENT_TYPE).parse("name", (Map<String, Object>) propNode, parserContext));
} else if ("author".equals(propName)) {
builder.author(parserContext.typeParser(StringFieldMapper.CONTENT_TYPE).parse("author", (Map<String, Object>) propNode, parserContext));
} else if ("keywords".equals(propName)) {
builder.keywords(parserContext.typeParser(StringFieldMapper.CONTENT_TYPE).parse("keywords", (Map<String, Object>) propNode, parserContext));
} else if ("content_type".equals(propName)) {
builder.contentType(parserContext.typeParser(StringFieldMapper.CONTENT_TYPE).parse("content_type", (Map<String, Object>) propNode, parserContext));
} else if ("content_length".equals(propName)) {
builder.contentLength(parserContext.typeParser(IntegerFieldMapper.CONTENT_TYPE).parse("content_length", (Map<String, Object>) propNode, parserContext));
} else if ("language".equals(propName)) {
builder.language(parserContext.typeParser(StringFieldMapper.CONTENT_TYPE).parse("language", (Map<String, Object>) propNode, parserContext));
}
}
}
}
@ -274,8 +309,6 @@ public class AttachmentMapper implements Mapper {
}
}
private final String name;
private final ContentPath.Type pathType;
private final int defaultIndexedChars;
@ -302,10 +335,13 @@ public class AttachmentMapper implements Mapper {
private final Mapper languageMapper;
public AttachmentMapper(String name, ContentPath.Type pathType, int defaultIndexedChars, Boolean ignoreErrors, Boolean defaultLangDetect, Mapper contentMapper,
public AttachmentMapper(Names names, ContentPath.Type pathType, int defaultIndexedChars, Boolean ignoreErrors,
Boolean defaultLangDetect, Mapper contentMapper,
Mapper dateMapper, Mapper titleMapper, Mapper nameMapper, Mapper authorMapper,
Mapper keywordsMapper, Mapper contentTypeMapper, Mapper contentLengthMapper, Mapper languageMapper) {
this.name = name;
Mapper keywordsMapper, Mapper contentTypeMapper, Mapper contentLengthMapper,
Mapper languageMapper, MultiFields multiFields, CopyTo copyTo) {
super(names, 1.0f, AbstractFieldMapper.Defaults.FIELD_TYPE, false, null, null, null, null, null, null, null,
ImmutableSettings.EMPTY, multiFields, copyTo);
this.pathType = pathType;
this.defaultIndexedChars = defaultIndexedChars;
this.ignoreErrors = ignoreErrors;
@ -322,8 +358,18 @@ public class AttachmentMapper implements Mapper {
}
@Override
public String name() {
return name;
public Object value(Object value) {
return null;
}
@Override
public FieldType defaultFieldType() {
return AbstractFieldMapper.Defaults.FIELD_TYPE;
}
@Override
public FieldDataType defaultFieldDataType() {
return null;
}
@Override
@ -393,7 +439,7 @@ public class AttachmentMapper implements Mapper {
return;
}
context.externalValue(parsedContent);
context = context.createExternalValueContext(parsedContent);
contentMapper.parse(context);
if (langDetect) {
@ -404,78 +450,99 @@ public class AttachmentMapper implements Mapper {
LanguageIdentifier identifier = new LanguageIdentifier(parsedContent);
language = identifier.getLanguage();
}
context.externalValue(language);
context = context.createExternalValueContext(language);
languageMapper.parse(context);
} catch(Throwable t) {
logger.warn("Cannot detect language: {}", t.getMessage());
}
}
try {
context.externalValue(name);
nameMapper.parse(context);
} catch(MapperParsingException e){
if (!ignoreErrors) throw e;
if (logger.isDebugEnabled()) logger.debug("Ignoring MapperParsingException catch while parsing name: {}", e.getMessage());
}
try {
context.externalValue(metadata.get(Metadata.DATE));
dateMapper.parse(context);
} catch(MapperParsingException e){
if (!ignoreErrors) throw e;
if (logger.isDebugEnabled()) logger.debug("Ignoring MapperParsingException catch while parsing date: {}: {}", e.getMessage(), context.externalValue());
}
try {
context.externalValue(metadata.get(Metadata.TITLE));
titleMapper.parse(context);
} catch(MapperParsingException e){
if (!ignoreErrors) throw e;
if (logger.isDebugEnabled()) logger.debug("Ignoring MapperParsingException catch while parsing title: {}: {}", e.getMessage(), context.externalValue());
}
try {
context.externalValue(metadata.get(Metadata.AUTHOR));
authorMapper.parse(context);
} catch(MapperParsingException e){
if (!ignoreErrors) throw e;
if (logger.isDebugEnabled()) logger.debug("Ignoring MapperParsingException catch while parsing author: {}: {}", e.getMessage(), context.externalValue());
}
try {
context.externalValue(metadata.get(Metadata.KEYWORDS));
keywordsMapper.parse(context);
} catch(MapperParsingException e){
if (!ignoreErrors) throw e;
if (logger.isDebugEnabled()) logger.debug("Ignoring MapperParsingException catch while parsing keywords: {}: {}", e.getMessage(), context.externalValue());
}
try {
if (contentType != null) {
context.externalValue(contentType);
} else {
context.externalValue(metadata.get(Metadata.CONTENT_TYPE));
if (name != null) {
try {
context = context.createExternalValueContext(name);
nameMapper.parse(context);
} catch(MapperParsingException e){
if (!ignoreErrors) throw e;
if (logger.isDebugEnabled()) logger.debug("Ignoring MapperParsingException catch while parsing name: {}", e.getMessage());
}
contentTypeMapper.parse(context);
} catch(MapperParsingException e){
if (!ignoreErrors) throw e;
if (logger.isDebugEnabled()) logger.debug("Ignoring MapperParsingException catch while parsing content_type: {}: {}", e.getMessage(), context.externalValue());
}
if (metadata.get(Metadata.DATE) != null) {
try {
context = context.createExternalValueContext(metadata.get(Metadata.DATE));
dateMapper.parse(context);
} catch(MapperParsingException e){
if (!ignoreErrors) throw e;
if (logger.isDebugEnabled()) logger.debug("Ignoring MapperParsingException catch while parsing date: {}: {}", e.getMessage(), context.externalValue());
}
}
if (metadata.get(Metadata.TITLE) != null) {
try {
context = context.createExternalValueContext(metadata.get(Metadata.TITLE));
titleMapper.parse(context);
} catch(MapperParsingException e){
if (!ignoreErrors) throw e;
if (logger.isDebugEnabled()) logger.debug("Ignoring MapperParsingException catch while parsing title: {}: {}", e.getMessage(), context.externalValue());
}
}
if (metadata.get(Metadata.AUTHOR) != null) {
try {
context = context.createExternalValueContext(metadata.get(Metadata.AUTHOR));
authorMapper.parse(context);
} catch(MapperParsingException e){
if (!ignoreErrors) throw e;
if (logger.isDebugEnabled()) logger.debug("Ignoring MapperParsingException catch while parsing author: {}: {}", e.getMessage(), context.externalValue());
}
}
if (metadata.get(Metadata.KEYWORDS) != null) {
try {
context = context.createExternalValueContext(metadata.get(Metadata.KEYWORDS));
keywordsMapper.parse(context);
} catch(MapperParsingException e){
if (!ignoreErrors) throw e;
if (logger.isDebugEnabled()) logger.debug("Ignoring MapperParsingException catch while parsing keywords: {}: {}", e.getMessage(), context.externalValue());
}
}
if (contentType == null) {
contentType = metadata.get(Metadata.CONTENT_TYPE);
}
if (contentType != null) {
try {
context = context.createExternalValueContext(contentType);
contentTypeMapper.parse(context);
} catch(MapperParsingException e){
if (!ignoreErrors) throw e;
if (logger.isDebugEnabled()) logger.debug("Ignoring MapperParsingException catch while parsing content_type: {}: {}", e.getMessage(), context.externalValue());
}
}
int length = content.length;
// If we have CONTENT_LENGTH from Tika we use it
if (metadata.get(Metadata.CONTENT_LENGTH) != null) {
length = Integer.parseInt(metadata.get(Metadata.CONTENT_LENGTH));
}
try {
if (metadata.get(Metadata.CONTENT_LENGTH) != null) {
// We try to get CONTENT_LENGTH from Tika first
context.externalValue(metadata.get(Metadata.CONTENT_LENGTH));
} else {
// Otherwise, we use our byte[] length
context.externalValue(content.length);
}
context = context.createExternalValueContext(length);
contentLengthMapper.parse(context);
} catch(MapperParsingException e){
if (!ignoreErrors) throw e;
if (logger.isDebugEnabled()) logger.debug("Ignoring MapperParsingException catch while parsing content_length: {}: {}", e.getMessage(), context.externalValue());
}
// multiFields.parse(this, context);
if (copyTo != null) {
copyTo.parse(context);
}
}
@Override
protected void parseCreateField(ParseContext parseContext, List<Field> fields) throws IOException {
}
@Override
@ -515,7 +582,7 @@ public class AttachmentMapper implements Mapper {
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(name);
builder.startObject(name());
builder.field("type", CONTENT_TYPE);
builder.field("path", pathType.name().toLowerCase());
@ -529,9 +596,16 @@ public class AttachmentMapper implements Mapper {
contentTypeMapper.toXContent(builder, params);
contentLengthMapper.toXContent(builder, params);
languageMapper.toXContent(builder, params);
multiFields.toXContent(builder, params);
builder.endObject();
multiFields.toXContent(builder, params);
builder.endObject();
return builder;
}
@Override
protected String contentType() {
return CONTENT_TYPE;
}
}

View File

@ -0,0 +1,82 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.mapper.xcontent;
import org.elasticsearch.common.inject.Injector;
import org.elasticsearch.common.inject.ModulesBuilder;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.settings.SettingsModule;
import org.elasticsearch.env.Environment;
import org.elasticsearch.env.EnvironmentModule;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.IndexNameModule;
import org.elasticsearch.index.analysis.AnalysisModule;
import org.elasticsearch.index.analysis.AnalysisService;
import org.elasticsearch.index.codec.docvaluesformat.DocValuesFormatService;
import org.elasticsearch.index.codec.postingsformat.PostingsFormatService;
import org.elasticsearch.index.fielddata.IndexFieldDataService;
import org.elasticsearch.index.mapper.DocumentMapperParser;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.settings.IndexSettingsModule;
import org.elasticsearch.index.similarity.SimilarityLookupService;
import org.elasticsearch.indices.analysis.IndicesAnalysisModule;
import org.elasticsearch.indices.analysis.IndicesAnalysisService;
import org.elasticsearch.indices.fielddata.breaker.NoneCircuitBreakerService;
public class MapperTestUtils {
public static DocumentMapperParser newParser() {
return new DocumentMapperParser(new Index("test"), ImmutableSettings.Builder.EMPTY_SETTINGS, newAnalysisService(), new PostingsFormatService(new Index("test")),
new DocValuesFormatService(new Index("test")), newSimilarityLookupService(), null);
}
public static DocumentMapperParser newParser(Settings indexSettings) {
return new DocumentMapperParser(new Index("test"), indexSettings, newAnalysisService(indexSettings), new PostingsFormatService(new Index("test")),
new DocValuesFormatService(new Index("test")), newSimilarityLookupService(), null);
}
public static MapperService newMapperService() {
return newMapperService(new Index("test"), ImmutableSettings.Builder.EMPTY_SETTINGS);
}
public static MapperService newMapperService(Index index, Settings indexSettings) {
return new MapperService(index, indexSettings, new Environment(), newAnalysisService(), new IndexFieldDataService(index, new NoneCircuitBreakerService()),
new PostingsFormatService(index), new DocValuesFormatService(index), newSimilarityLookupService(), null);
}
public static AnalysisService newAnalysisService() {
return newAnalysisService(ImmutableSettings.Builder.EMPTY_SETTINGS);
}
public static AnalysisService newAnalysisService(Settings indexSettings) {
Injector parentInjector = new ModulesBuilder().add(new SettingsModule(indexSettings), new EnvironmentModule(new Environment(ImmutableSettings.Builder.EMPTY_SETTINGS)), new IndicesAnalysisModule()).createInjector();
Injector injector = new ModulesBuilder().add(
new IndexSettingsModule(new Index("test"), indexSettings),
new IndexNameModule(new Index("test")),
new AnalysisModule(indexSettings, parentInjector.getInstance(IndicesAnalysisService.class))).createChildInjector(parentInjector);
return injector.getInstance(AnalysisService.class);
}
public static SimilarityLookupService newSimilarityLookupService() {
return new SimilarityLookupService(new Index("test"), ImmutableSettings.Builder.EMPTY_SETTINGS);
}
}

View File

@ -19,11 +19,15 @@
package org.elasticsearch.index.mapper.xcontent;
import org.elasticsearch.common.Base64;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.analysis.AnalysisService;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.DocumentMapperParser;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.ParsedDocument;
import org.elasticsearch.index.mapper.attachment.AttachmentMapper;
import org.elasticsearch.index.mapper.core.DateFieldMapper;
import org.elasticsearch.index.mapper.core.StringFieldMapper;
@ -32,7 +36,7 @@ import org.junit.Before;
import org.junit.Test;
import static org.elasticsearch.common.io.Streams.copyToStringFromClasspath;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.*;
/**
*
@ -74,4 +78,71 @@ public class MultifieldAttachmentMapperTests extends ElasticsearchTestCase {
assertThat(docMapper.mappers().fullName("file.content_type").mapper(), instanceOf(StringFieldMapper.class));
assertThat(docMapper.mappers().fullName("file.content_type.suggest").mapper(), instanceOf(StringFieldMapper.class));
}
@Test
public void testExternalValues() throws Exception {
String originalText = "This is an elasticsearch mapper attachment test.";
String contentType = "text/plain; charset=ISO-8859-1";
String forcedName = "dummyname.txt";
String bytes = Base64.encodeBytes(originalText.getBytes());
MapperService mapperService = MapperTestUtils.newMapperService();
mapperService.documentMapperParser().putTypeParser(AttachmentMapper.CONTENT_TYPE, new AttachmentMapper.TypeParser());
String mapping = copyToStringFromClasspath("/org/elasticsearch/index/mapper/multifield/multifield-mapping.json");
DocumentMapper documentMapper = mapperService.documentMapperParser().parse(mapping);
ParsedDocument doc = documentMapper.parse("person", "1", XContentFactory.jsonBuilder()
.startObject()
.field("file", bytes)
.endObject()
.bytes());
assertThat(doc.rootDoc().getField("file"), notNullValue());
assertThat(doc.rootDoc().getField("file").stringValue(), is(originalText + "\n"));
assertThat(doc.rootDoc().getField("file.content_type"), notNullValue());
assertThat(doc.rootDoc().getField("file.content_type").stringValue(), is(contentType));
assertThat(doc.rootDoc().getField("file.content_type.suggest"), notNullValue());
assertThat(doc.rootDoc().getField("file.content_type.suggest").stringValue(), is(contentType));
assertThat(doc.rootDoc().getField("file.content_length"), notNullValue());
assertThat(doc.rootDoc().getField("file.content_length").numericValue().intValue(), is(originalText.length()));
assertThat(doc.rootDoc().getField("file.suggest"), notNullValue());
assertThat(doc.rootDoc().getField("file.suggest").stringValue(), is(originalText + "\n"));
// Let's force some values
doc = documentMapper.parse("person", "1", XContentFactory.jsonBuilder()
.startObject()
.startObject("file")
.field("content", bytes)
.field("_name", forcedName)
.endObject()
.endObject()
.bytes());
assertThat(doc.rootDoc().getField("file"), notNullValue());
assertThat(doc.rootDoc().getField("file").stringValue(), is(originalText + "\n"));
assertThat(doc.rootDoc().getField("file.content_type"), notNullValue());
assertThat(doc.rootDoc().getField("file.content_type").stringValue(), is(contentType));
assertThat(doc.rootDoc().getField("file.content_type.suggest"), notNullValue());
assertThat(doc.rootDoc().getField("file.content_type.suggest").stringValue(), is(contentType));
assertThat(doc.rootDoc().getField("file.content_length"), notNullValue());
assertThat(doc.rootDoc().getField("file.content_length").numericValue().intValue(), is(originalText.length()));
assertThat(doc.rootDoc().getField("file.suggest"), notNullValue());
assertThat(doc.rootDoc().getField("file.suggest").stringValue(), is(originalText + "\n"));
assertThat(doc.rootDoc().getField("file.name"), notNullValue());
assertThat(doc.rootDoc().getField("file.name").stringValue(), is(forcedName));
// In mapping we have default store:false
assertThat(doc.rootDoc().getField("file.name").fieldType().stored(), is(false));
assertThat(doc.rootDoc().getField("file.name.suggest"), notNullValue());
assertThat(doc.rootDoc().getField("file.name.suggest").stringValue(), is(forcedName));
// In mapping we set store:true for suggest subfield
assertThat(doc.rootDoc().getField("file.name.suggest").fieldType().stored(), is(true));
}
}

View File

@ -20,8 +20,8 @@
package org.elasticsearch.plugin.mapper.attachments.test;
import org.elasticsearch.action.count.CountResponse;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.plugins.PluginsService;
@ -32,7 +32,6 @@ import org.junit.Test;
import static org.elasticsearch.client.Requests.putMappingRequest;
import static org.elasticsearch.common.io.Streams.copyToBytesFromClasspath;
import static org.elasticsearch.common.io.Streams.copyToStringFromClasspath;
import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.index.query.QueryBuilders.queryString;
import static org.hamcrest.Matchers.equalTo;
@ -58,13 +57,6 @@ public class SimpleAttachmentIntegrationTests extends ElasticsearchIntegrationTe
createIndex("test");
}
@Override
public Settings indexSettings() {
return settingsBuilder()
.put("index.numberOfReplicas", 0)
.build();
}
@Test
public void testSimpleAttachment() throws Exception {
String mapping = copyToStringFromClasspath("/org/elasticsearch/index/mapper/xcontent/test-mapping.json");

View File

@ -26,7 +26,10 @@
"name": {
"type": "string",
"fields": {
"suggest": { "type": "string" }
"suggest": {
"type": "string",
"store": true
}
}
},
"author": {