commit c4a12754758897119ad780e57d9fa3c076f7d506 Author: Shay Banon Date: Mon Dec 5 14:05:14 2011 +0200 first commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000000..06a1e6fedb6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +/data +/work +/logs +/.idea +/target +.DS_Store +*.iml diff --git a/README.md b/README.md new file mode 100644 index 00000000000..3aecb9886e4 --- /dev/null +++ b/README.md @@ -0,0 +1,15 @@ +Mapper Attachments Type for ElasticSearch +================================== + +The mapper attachments plugin adds the `attachment` type to ElasticSearch using Tika. + +In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-mapper-attachments/1.0.0`. + + --------------------------------------- + | memcached Plugin | ElasticSearch | + --------------------------------------- + | master | 0.18 -> master | + --------------------------------------- + | 1.0.0 | 0.18 -> master | + --------------------------------------- + diff --git a/pom.xml b/pom.xml new file mode 100644 index 00000000000..4a20fe7c4a4 --- /dev/null +++ b/pom.xml @@ -0,0 +1,140 @@ + + + elasticsearch-mapper-attachments + 4.0.0 + org.elasticsearch + elasticsearch-mapper-attachments + 1.0.0 + jar + Mapper Attachments Type for ElasticSearch + 2009 + + + The Apache Software License, Version 2.0 + http://www.apache.org/licenses/LICENSE-2.0.txt + repo + + + + scm:git:git@github.com:elasticsearch/elasticsearch-mapper-attachments.git + scm:git:git@github.com:elasticsearch/elasticsearch-mapper-attachments.git + + http://github.com/elasticsearch/elasticsearch-mapper-attachments + + + + org.sonatype.oss + oss-parent + 7 + + + + 0.18.5 + + + + + + + + org.elasticsearch + elasticsearch + ${elasticsearch.version} + compile + + + + org.apache.tika + tika-app + 0.10 + compile + + + + log4j + log4j + 1.2.16 + runtime + + + + org.testng + testng + 6.3.1 + test + + + + org.hamcrest + hamcrest-core + 1.3.RC2 + test + + + + org.hamcrest + hamcrest-library + 1.3.RC2 + test + + + + + + + + ${basedir}/src/test/java + + **/*.json + **/*.yml + **/*.html + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 2.3.2 + + 1.6 + 1.6 + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.11 + + + **/*Tests.java + + + + + org.apache.maven.plugins + maven-source-plugin + 2.1.2 + + + attach-sources + + jar + + + + + + maven-assembly-plugin + + + ${basedir}/src/main/assemblies/plugin.xml + + + + + + \ No newline at end of file diff --git a/src/main/assemblies/plugin.xml b/src/main/assemblies/plugin.xml new file mode 100644 index 00000000000..1e1324cf636 --- /dev/null +++ b/src/main/assemblies/plugin.xml @@ -0,0 +1,26 @@ + + + + + zip + + false + + + / + true + true + + org.elasticsearch:elasticsearch + + + + / + true + true + + org.apache.tika:tika-app + + + + \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/mapper/attachment/AttachmentMapper.java b/src/main/java/org/elasticsearch/index/mapper/attachment/AttachmentMapper.java new file mode 100644 index 00000000000..6d698009a88 --- /dev/null +++ b/src/main/java/org/elasticsearch/index/mapper/attachment/AttachmentMapper.java @@ -0,0 +1,344 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.mapper.attachment; + +import org.apache.tika.exception.TikaException; +import org.apache.tika.metadata.Metadata; +import org.elasticsearch.common.io.FastByteArrayInputStream; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.mapper.*; +import org.elasticsearch.index.mapper.core.DateFieldMapper; +import org.elasticsearch.index.mapper.core.StringFieldMapper; + +import java.io.IOException; +import java.util.Map; + +import static org.elasticsearch.index.mapper.MapperBuilders.dateField; +import static org.elasticsearch.index.mapper.MapperBuilders.stringField; +import static org.elasticsearch.index.mapper.core.TypeParsers.parsePathType; +import static org.elasticsearch.plugin.mapper.attachments.tika.TikaInstance.tika; + +/** + *
+ *      field1 : "..."
+ * 
+ *

Or: + *

+ * {
+ *      file1 : {
+ *          _content_type : "application/pdf",
+ *          _name : "..../something.pdf",
+ *          content : ""
+ *      }
+ * }
+ * 
+ * + * + */ +public class AttachmentMapper implements Mapper { + + public static final String CONTENT_TYPE = "attachment"; + + public static class Defaults { + public static final ContentPath.Type PATH_TYPE = ContentPath.Type.FULL; + } + + public static class Builder extends Mapper.Builder { + + private ContentPath.Type pathType = Defaults.PATH_TYPE; + + private StringFieldMapper.Builder contentBuilder; + + private StringFieldMapper.Builder titleBuilder = stringField("title"); + + private StringFieldMapper.Builder authorBuilder = stringField("author"); + + private StringFieldMapper.Builder keywordsBuilder = stringField("keywords"); + + private DateFieldMapper.Builder dateBuilder = dateField("date"); + + private StringFieldMapper.Builder contentTypeBuilder = stringField("content_type"); + + public Builder(String name) { + super(name); + this.builder = this; + this.contentBuilder = stringField(name); + } + + public Builder pathType(ContentPath.Type pathType) { + this.pathType = pathType; + return this; + } + + public Builder content(StringFieldMapper.Builder content) { + this.contentBuilder = content; + return this; + } + + public Builder date(DateFieldMapper.Builder date) { + this.dateBuilder = date; + return this; + } + + public Builder author(StringFieldMapper.Builder author) { + this.authorBuilder = author; + return this; + } + + public Builder title(StringFieldMapper.Builder title) { + this.titleBuilder = title; + return this; + } + + public Builder keywords(StringFieldMapper.Builder keywords) { + this.keywordsBuilder = keywords; + return this; + } + + public Builder contentType(StringFieldMapper.Builder contentType) { + this.contentTypeBuilder = contentType; + return this; + } + + @Override + public AttachmentMapper build(BuilderContext context) { + ContentPath.Type origPathType = context.path().pathType(); + context.path().pathType(pathType); + + // create the content mapper under the actual name + StringFieldMapper contentMapper = contentBuilder.build(context); + + // create the DC one under the name + context.path().add(name); + DateFieldMapper dateMapper = dateBuilder.build(context); + StringFieldMapper authorMapper = authorBuilder.build(context); + StringFieldMapper titleMapper = titleBuilder.build(context); + StringFieldMapper keywordsMapper = keywordsBuilder.build(context); + StringFieldMapper contentTypeMapper = contentTypeBuilder.build(context); + context.path().remove(); + + context.path().pathType(origPathType); + + return new AttachmentMapper(name, pathType, contentMapper, dateMapper, titleMapper, authorMapper, keywordsMapper, contentTypeMapper); + } + } + + /** + *
+     *  field1 : { type : "attachment" }
+     * 
+ * Or: + *
+     *  field1 : {
+     *      type : "attachment",
+     *      fields : {
+     *          field1 : {type : "binary"},
+     *          title : {store : "yes"},
+     *          date : {store : "yes"}
+     *      }
+     * }
+     * 
+ * + * + */ + public static class TypeParser implements Mapper.TypeParser { + + @SuppressWarnings({"unchecked"}) + @Override + public Mapper.Builder parse(String name, Map node, ParserContext parserContext) throws MapperParsingException { + AttachmentMapper.Builder builder = new AttachmentMapper.Builder(name); + + for (Map.Entry entry : node.entrySet()) { + String fieldName = entry.getKey(); + Object fieldNode = entry.getValue(); + if (fieldName.equals("path")) { + builder.pathType(parsePathType(name, fieldNode.toString())); + } else if (fieldName.equals("fields")) { + Map fieldsNode = (Map) fieldNode; + for (Map.Entry entry1 : fieldsNode.entrySet()) { + String propName = entry1.getKey(); + Object propNode = entry1.getValue(); + + if (name.equals(propName)) { + // that is the content + builder.content((StringFieldMapper.Builder) parserContext.typeParser("string").parse(name, (Map) propNode, parserContext)); + } else if ("date".equals(propName)) { + builder.date((DateFieldMapper.Builder) parserContext.typeParser("date").parse("date", (Map) propNode, parserContext)); + } else if ("title".equals(propName)) { + builder.title((StringFieldMapper.Builder) parserContext.typeParser("string").parse("title", (Map) propNode, parserContext)); + } else if ("author".equals(propName)) { + builder.author((StringFieldMapper.Builder) parserContext.typeParser("string").parse("author", (Map) propNode, parserContext)); + } else if ("keywords".equals(propName)) { + builder.keywords((StringFieldMapper.Builder) parserContext.typeParser("string").parse("keywords", (Map) propNode, parserContext)); + } else if ("content_type".equals(propName)) { + builder.contentType((StringFieldMapper.Builder) parserContext.typeParser("string").parse("content_type", (Map) propNode, parserContext)); + } + } + } + } + + return builder; + } + } + + private final String name; + + private final ContentPath.Type pathType; + + private final StringFieldMapper contentMapper; + + private final DateFieldMapper dateMapper; + + private final StringFieldMapper authorMapper; + + private final StringFieldMapper titleMapper; + + private final StringFieldMapper keywordsMapper; + + private final StringFieldMapper contentTypeMapper; + + public AttachmentMapper(String name, ContentPath.Type pathType, StringFieldMapper contentMapper, + DateFieldMapper dateMapper, StringFieldMapper titleMapper, StringFieldMapper authorMapper, + StringFieldMapper keywordsMapper, StringFieldMapper contentTypeMapper) { + this.name = name; + this.pathType = pathType; + this.contentMapper = contentMapper; + this.dateMapper = dateMapper; + this.titleMapper = titleMapper; + this.authorMapper = authorMapper; + this.keywordsMapper = keywordsMapper; + this.contentTypeMapper = contentTypeMapper; + } + + @Override + public String name() { + return name; + } + + @Override + public void parse(ParseContext context) throws IOException { + byte[] content = null; + String contentType = null; + String name = null; + + XContentParser parser = context.parser(); + XContentParser.Token token = parser.currentToken(); + if (token == XContentParser.Token.VALUE_STRING) { + content = parser.binaryValue(); + } else { + String currentFieldName = null; + while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { + if (token == XContentParser.Token.FIELD_NAME) { + currentFieldName = parser.currentName(); + } else if (token == XContentParser.Token.VALUE_STRING) { + if ("content".equals(currentFieldName)) { + content = parser.binaryValue(); + } else if ("_content_type".equals(currentFieldName)) { + contentType = parser.text(); + } else if ("_name".equals(currentFieldName)) { + name = parser.text(); + } + } + } + } + + Metadata metadata = new Metadata(); + if (contentType != null) { + metadata.add(Metadata.CONTENT_TYPE, contentType); + } + if (name != null) { + metadata.add(Metadata.RESOURCE_NAME_KEY, name); + } + + String parsedContent; + try { + parsedContent = tika().parseToString(new FastByteArrayInputStream(content), metadata); + } catch (TikaException e) { + throw new MapperParsingException("Failed to extract text for [" + name + "]", e); + } + + context.externalValue(parsedContent); + contentMapper.parse(context); + + context.externalValue(metadata.get(Metadata.DATE)); + dateMapper.parse(context); + + context.externalValue(metadata.get(Metadata.TITLE)); + titleMapper.parse(context); + + context.externalValue(metadata.get(Metadata.AUTHOR)); + authorMapper.parse(context); + + context.externalValue(metadata.get(Metadata.KEYWORDS)); + keywordsMapper.parse(context); + + context.externalValue(metadata.get(Metadata.CONTENT_TYPE)); + contentTypeMapper.parse(context); + } + + @Override + public void merge(Mapper mergeWith, MergeContext mergeContext) throws MergeMappingException { + // ignore this for now + } + + @Override + public void traverse(FieldMapperListener fieldMapperListener) { + contentMapper.traverse(fieldMapperListener); + dateMapper.traverse(fieldMapperListener); + titleMapper.traverse(fieldMapperListener); + authorMapper.traverse(fieldMapperListener); + keywordsMapper.traverse(fieldMapperListener); + contentTypeMapper.traverse(fieldMapperListener); + } + + @Override + public void traverse(ObjectMapperListener objectMapperListener) { + } + + @Override + public void close() { + contentMapper.close(); + dateMapper.close(); + titleMapper.close(); + authorMapper.close(); + keywordsMapper.close(); + contentTypeMapper.close(); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(name); + builder.field("type", CONTENT_TYPE); + builder.field("path", pathType.name().toLowerCase()); + + builder.startObject("fields"); + contentMapper.toXContent(builder, params); + authorMapper.toXContent(builder, params); + titleMapper.toXContent(builder, params); + dateMapper.toXContent(builder, params); + keywordsMapper.toXContent(builder, params); + contentTypeMapper.toXContent(builder, params); + builder.endObject(); + + builder.endObject(); + return builder; + } +} diff --git a/src/main/java/org/elasticsearch/index/mapper/attachment/RegisterAttachmentType.java b/src/main/java/org/elasticsearch/index/mapper/attachment/RegisterAttachmentType.java new file mode 100644 index 00000000000..b889e9a604e --- /dev/null +++ b/src/main/java/org/elasticsearch/index/mapper/attachment/RegisterAttachmentType.java @@ -0,0 +1,40 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.mapper.attachment; + +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.AbstractIndexComponent; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.settings.IndexSettings; + +/** + * + */ +public class RegisterAttachmentType extends AbstractIndexComponent { + + @Inject + public RegisterAttachmentType(Index index, @IndexSettings Settings indexSettings, MapperService mapperService) { + super(index, indexSettings); + + mapperService.documentMapperParser().putTypeParser("attachment", new AttachmentMapper.TypeParser()); + } +} diff --git a/src/main/java/org/elasticsearch/plugin/mapper/attachments/AttachmentsIndexModule.java b/src/main/java/org/elasticsearch/plugin/mapper/attachments/AttachmentsIndexModule.java new file mode 100644 index 00000000000..7368604c701 --- /dev/null +++ b/src/main/java/org/elasticsearch/plugin/mapper/attachments/AttachmentsIndexModule.java @@ -0,0 +1,34 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.plugin.mapper.attachments; + +import org.elasticsearch.common.inject.AbstractModule; +import org.elasticsearch.index.mapper.attachment.RegisterAttachmentType; + +/** + * + */ +public class AttachmentsIndexModule extends AbstractModule { + + @Override + protected void configure() { + bind(RegisterAttachmentType.class).asEagerSingleton(); + } +} diff --git a/src/main/java/org/elasticsearch/plugin/mapper/attachments/MapperAttachmentsPlugin.java b/src/main/java/org/elasticsearch/plugin/mapper/attachments/MapperAttachmentsPlugin.java new file mode 100644 index 00000000000..3bdfdfd4a03 --- /dev/null +++ b/src/main/java/org/elasticsearch/plugin/mapper/attachments/MapperAttachmentsPlugin.java @@ -0,0 +1,50 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.plugin.mapper.attachments; + +import org.elasticsearch.common.inject.Module; +import org.elasticsearch.plugins.AbstractPlugin; + +import java.util.Collection; + +import static org.elasticsearch.common.collect.Lists.newArrayList; + +/** + * + */ +public class MapperAttachmentsPlugin extends AbstractPlugin { + + @Override + public String name() { + return "mapper-attachments"; + } + + @Override + public String description() { + return "Adds the attachment type allowing to parse difference attachment formats"; + } + + @Override + public Collection> indexModules() { + Collection> modules = newArrayList(); + modules.add(AttachmentsIndexModule.class); + return modules; + } +} diff --git a/src/main/java/org/elasticsearch/plugin/mapper/attachments/tika/TikaInstance.java b/src/main/java/org/elasticsearch/plugin/mapper/attachments/tika/TikaInstance.java new file mode 100644 index 00000000000..c5847491312 --- /dev/null +++ b/src/main/java/org/elasticsearch/plugin/mapper/attachments/tika/TikaInstance.java @@ -0,0 +1,34 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.plugin.mapper.attachments.tika; + +import org.apache.tika.Tika; + +/** + * + */ +public class TikaInstance { + + private static final Tika tika = new Tika(); + + public static Tika tika() { + return tika; + } +} diff --git a/src/main/resources/es-plugin.properties b/src/main/resources/es-plugin.properties new file mode 100644 index 00000000000..08c6c9331d4 --- /dev/null +++ b/src/main/resources/es-plugin.properties @@ -0,0 +1 @@ +plugin=org.elasticsearch.plugin.mapper.attachments.MapperAttachmentsPlugin diff --git a/src/test/java/org/elasticsearch/index/mapper/xcontent/SimpleAttachmentMapperTests.java b/src/test/java/org/elasticsearch/index/mapper/xcontent/SimpleAttachmentMapperTests.java new file mode 100644 index 00000000000..c86296a0ec7 --- /dev/null +++ b/src/test/java/org/elasticsearch/index/mapper/xcontent/SimpleAttachmentMapperTests.java @@ -0,0 +1,78 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.mapper.xcontent; + +import org.apache.lucene.document.Document; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.analysis.AnalysisService; +import org.elasticsearch.index.mapper.DocumentMapper; +import org.elasticsearch.index.mapper.DocumentMapperParser; +import org.elasticsearch.index.mapper.attachment.AttachmentMapper; +import org.testng.annotations.BeforeClass; +import org.testng.annotations.Test; + +import static org.elasticsearch.common.io.Streams.copyToBytesFromClasspath; +import static org.elasticsearch.common.io.Streams.copyToStringFromClasspath; +import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; + +/** + * + */ +@Test +public class SimpleAttachmentMapperTests { + + private DocumentMapperParser mapperParser; + + @BeforeClass + public void setupMapperParser() { + mapperParser = new DocumentMapperParser(new Index("test"), new AnalysisService(new Index("test"))); + mapperParser.putTypeParser(AttachmentMapper.CONTENT_TYPE, new AttachmentMapper.TypeParser()); + } + + @Test + public void testSimpleMappings() throws Exception { + String mapping = copyToStringFromClasspath("/org/elasticsearch/index/mapper/xcontent/test-mapping.json"); + DocumentMapper docMapper = mapperParser.parse(mapping); + byte[] html = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/xcontent/testXHTML.html"); + + byte[] json = jsonBuilder().startObject().field("_id", 1).field("file", html).endObject().copiedBytes(); + + Document doc = docMapper.parse(json).rootDoc(); + + assertThat(doc.get(docMapper.mappers().smartName("file.content_type").mapper().names().indexName()), equalTo("application/xhtml+xml")); + assertThat(doc.get(docMapper.mappers().smartName("file.title").mapper().names().indexName()), equalTo("XHTML test document")); + assertThat(doc.get(docMapper.mappers().smartName("file").mapper().names().indexName()), containsString("This document tests the ability of Apache Tika to extract content")); + + // re-parse it + String builtMapping = docMapper.mappingSource().string(); + docMapper = mapperParser.parse(builtMapping); + + json = jsonBuilder().startObject().field("_id", 1).field("file", html).endObject().copiedBytes(); + + doc = docMapper.parse(json).rootDoc(); + + assertThat(doc.get(docMapper.mappers().smartName("file.content_type").mapper().names().indexName()), equalTo("application/xhtml+xml")); + assertThat(doc.get(docMapper.mappers().smartName("file.title").mapper().names().indexName()), equalTo("XHTML test document")); + assertThat(doc.get(docMapper.mappers().smartName("file").mapper().names().indexName()), containsString("This document tests the ability of Apache Tika to extract content")); + } +} diff --git a/src/test/java/org/elasticsearch/index/mapper/xcontent/test-mapping.json b/src/test/java/org/elasticsearch/index/mapper/xcontent/test-mapping.json new file mode 100644 index 00000000000..4f2a9288a7b --- /dev/null +++ b/src/test/java/org/elasticsearch/index/mapper/xcontent/test-mapping.json @@ -0,0 +1,9 @@ +{ + person:{ + properties:{ + "file":{ + type:"attachment" + } + } + } +} \ No newline at end of file diff --git a/src/test/java/org/elasticsearch/index/mapper/xcontent/testXHTML.html b/src/test/java/org/elasticsearch/index/mapper/xcontent/testXHTML.html new file mode 100644 index 00000000000..bdccf83ba50 --- /dev/null +++ b/src/test/java/org/elasticsearch/index/mapper/xcontent/testXHTML.html @@ -0,0 +1,29 @@ + + + + XHTML test document + + + + +

+ This document tests the ability of Apache Tika to extract content + from an XHTML document. +

+ + \ No newline at end of file diff --git a/src/test/java/org/elasticsearch/plugin/mapper/attachments/test/SimpleAttachmentIntegrationTests.java b/src/test/java/org/elasticsearch/plugin/mapper/attachments/test/SimpleAttachmentIntegrationTests.java new file mode 100644 index 00000000000..aac92dcbf1d --- /dev/null +++ b/src/test/java/org/elasticsearch/plugin/mapper/attachments/test/SimpleAttachmentIntegrationTests.java @@ -0,0 +1,98 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.plugin.mapper.attachments.test; + +import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse; +import org.elasticsearch.action.admin.cluster.health.ClusterHealthStatus; +import org.elasticsearch.action.count.CountResponse; +import org.elasticsearch.common.logging.ESLogger; +import org.elasticsearch.common.logging.Loggers; +import org.elasticsearch.common.network.NetworkUtils; +import org.elasticsearch.node.Node; +import org.testng.annotations.*; + +import static org.elasticsearch.client.Requests.*; +import static org.elasticsearch.common.io.Streams.copyToBytesFromClasspath; +import static org.elasticsearch.common.io.Streams.copyToStringFromClasspath; +import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder; +import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; +import static org.elasticsearch.index.query.QueryBuilders.fieldQuery; +import static org.elasticsearch.node.NodeBuilder.nodeBuilder; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.equalTo; + +/** + * + */ +@Test +public class SimpleAttachmentIntegrationTests { + + private final ESLogger logger = Loggers.getLogger(getClass()); + + private Node node; + + @BeforeClass + public void setupServer() { + node = nodeBuilder().local(true).settings(settingsBuilder() + .put("path.data", "target/data") + .put("cluster.name", "test-cluster-" + NetworkUtils.getLocalAddress()) + .put("gateway.type", "none")).node(); + } + + @AfterClass + public void closeServer() { + node.close(); + } + + @BeforeMethod + public void createIndex() { + logger.info("creating index [test]"); + node.client().admin().indices().create(createIndexRequest("test").settings(settingsBuilder().put("index.numberOfReplicas", 0))).actionGet(); + logger.info("Running Cluster Health"); + ClusterHealthResponse clusterHealth = node.client().admin().cluster().health(clusterHealthRequest().waitForGreenStatus()).actionGet(); + logger.info("Done Cluster Health, status " + clusterHealth.status()); + assertThat(clusterHealth.timedOut(), equalTo(false)); + assertThat(clusterHealth.status(), equalTo(ClusterHealthStatus.GREEN)); + } + + @AfterMethod + public void deleteIndex() { + logger.info("deleting index [test]"); + node.client().admin().indices().delete(deleteIndexRequest("test")).actionGet(); + } + + @Test + public void testSimpleAttachment() throws Exception { + String mapping = copyToStringFromClasspath("/org/elasticsearch/index/mapper/xcontent/test-mapping.json"); + byte[] html = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/xcontent/testXHTML.html"); + + node.client().admin().indices().putMapping(putMappingRequest("test").type("person").source(mapping)).actionGet(); + + node.client().index(indexRequest("test").type("person") + .source(jsonBuilder().startObject().field("file", html).endObject())).actionGet(); + node.client().admin().indices().refresh(refreshRequest()).actionGet(); + + CountResponse countResponse = node.client().count(countRequest("test").query(fieldQuery("file.title", "test document"))).actionGet(); + assertThat(countResponse.count(), equalTo(1l)); + + countResponse = node.client().count(countRequest("test").query(fieldQuery("file", "tests the ability"))).actionGet(); + assertThat(countResponse.count(), equalTo(1l)); + } +} \ No newline at end of file diff --git a/src/test/resources/log4j.properties b/src/test/resources/log4j.properties new file mode 100644 index 00000000000..497c97f9959 --- /dev/null +++ b/src/test/resources/log4j.properties @@ -0,0 +1,5 @@ +log4j.rootLogger=INFO, out + +log4j.appender.out=org.apache.log4j.ConsoleAppender +log4j.appender.out.layout=org.apache.log4j.PatternLayout +log4j.appender.out.layout.conversionPattern=[%d{ISO8601}][%-5p][%-25c] %m%n