first commit
This commit is contained in:
commit
c4a1275475
|
@ -0,0 +1,7 @@
|
|||
/data
|
||||
/work
|
||||
/logs
|
||||
/.idea
|
||||
/target
|
||||
.DS_Store
|
||||
*.iml
|
|
@ -0,0 +1,15 @@
|
|||
Mapper Attachments Type for ElasticSearch
|
||||
==================================
|
||||
|
||||
The mapper attachments plugin adds the `attachment` type to ElasticSearch using Tika.
|
||||
|
||||
In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-mapper-attachments/1.0.0`.
|
||||
|
||||
---------------------------------------
|
||||
| memcached Plugin | ElasticSearch |
|
||||
---------------------------------------
|
||||
| master | 0.18 -> master |
|
||||
---------------------------------------
|
||||
| 1.0.0 | 0.18 -> master |
|
||||
---------------------------------------
|
||||
|
|
@ -0,0 +1,140 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<name>elasticsearch-mapper-attachments</name>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>org.elasticsearch</groupId>
|
||||
<artifactId>elasticsearch-mapper-attachments</artifactId>
|
||||
<version>1.0.0</version>
|
||||
<packaging>jar</packaging>
|
||||
<description>Mapper Attachments Type for ElasticSearch</description>
|
||||
<inceptionYear>2009</inceptionYear>
|
||||
<licenses>
|
||||
<license>
|
||||
<name>The Apache Software License, Version 2.0</name>
|
||||
<url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
|
||||
<distribution>repo</distribution>
|
||||
</license>
|
||||
</licenses>
|
||||
<scm>
|
||||
<connection>scm:git:git@github.com:elasticsearch/elasticsearch-mapper-attachments.git</connection>
|
||||
<developerConnection>scm:git:git@github.com:elasticsearch/elasticsearch-mapper-attachments.git
|
||||
</developerConnection>
|
||||
<url>http://github.com/elasticsearch/elasticsearch-mapper-attachments</url>
|
||||
</scm>
|
||||
|
||||
<parent>
|
||||
<groupId>org.sonatype.oss</groupId>
|
||||
<artifactId>oss-parent</artifactId>
|
||||
<version>7</version>
|
||||
</parent>
|
||||
|
||||
<properties>
|
||||
<elasticsearch.version>0.18.5</elasticsearch.version>
|
||||
</properties>
|
||||
|
||||
<repositories>
|
||||
</repositories>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.elasticsearch</groupId>
|
||||
<artifactId>elasticsearch</artifactId>
|
||||
<version>${elasticsearch.version}</version>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.apache.tika</groupId>
|
||||
<artifactId>tika-app</artifactId>
|
||||
<version>0.10</version>
|
||||
<scope>compile</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>log4j</groupId>
|
||||
<artifactId>log4j</artifactId>
|
||||
<version>1.2.16</version>
|
||||
<scope>runtime</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.testng</groupId>
|
||||
<artifactId>testng</artifactId>
|
||||
<version>6.3.1</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.hamcrest</groupId>
|
||||
<artifactId>hamcrest-core</artifactId>
|
||||
<version>1.3.RC2</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.hamcrest</groupId>
|
||||
<artifactId>hamcrest-library</artifactId>
|
||||
<version>1.3.RC2</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
|
||||
<testResources>
|
||||
<testResource>
|
||||
<directory>${basedir}/src/test/java</directory>
|
||||
<includes>
|
||||
<include>**/*.json</include>
|
||||
<include>**/*.yml</include>
|
||||
<include>**/*.html</include>
|
||||
</includes>
|
||||
</testResource>
|
||||
</testResources>
|
||||
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<version>2.3.2</version>
|
||||
<configuration>
|
||||
<source>1.6</source>
|
||||
<target>1.6</target>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-surefire-plugin</artifactId>
|
||||
<version>2.11</version>
|
||||
<configuration>
|
||||
<includes>
|
||||
<include>**/*Tests.java</include>
|
||||
</includes>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-source-plugin</artifactId>
|
||||
<version>2.1.2</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>attach-sources</id>
|
||||
<goals>
|
||||
<goal>jar</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-assembly-plugin</artifactId>
|
||||
<configuration>
|
||||
<descriptors>
|
||||
<descriptor>${basedir}/src/main/assemblies/plugin.xml</descriptor>
|
||||
</descriptors>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
</project>
|
|
@ -0,0 +1,26 @@
|
|||
<?xml version="1.0"?>
|
||||
<assembly>
|
||||
<id></id>
|
||||
<formats>
|
||||
<format>zip</format>
|
||||
</formats>
|
||||
<includeBaseDirectory>false</includeBaseDirectory>
|
||||
<dependencySets>
|
||||
<dependencySet>
|
||||
<outputDirectory>/</outputDirectory>
|
||||
<useProjectArtifact>true</useProjectArtifact>
|
||||
<useTransitiveFiltering>true</useTransitiveFiltering>
|
||||
<excludes>
|
||||
<exclude>org.elasticsearch:elasticsearch</exclude>
|
||||
</excludes>
|
||||
</dependencySet>
|
||||
<dependencySet>
|
||||
<outputDirectory>/</outputDirectory>
|
||||
<useProjectArtifact>true</useProjectArtifact>
|
||||
<useTransitiveFiltering>true</useTransitiveFiltering>
|
||||
<includes>
|
||||
<include>org.apache.tika:tika-app</include>
|
||||
</includes>
|
||||
</dependencySet>
|
||||
</dependencySets>
|
||||
</assembly>
|
|
@ -0,0 +1,344 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.mapper.attachment;
|
||||
|
||||
import org.apache.tika.exception.TikaException;
|
||||
import org.apache.tika.metadata.Metadata;
|
||||
import org.elasticsearch.common.io.FastByteArrayInputStream;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentParser;
|
||||
import org.elasticsearch.index.mapper.*;
|
||||
import org.elasticsearch.index.mapper.core.DateFieldMapper;
|
||||
import org.elasticsearch.index.mapper.core.StringFieldMapper;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.elasticsearch.index.mapper.MapperBuilders.dateField;
|
||||
import static org.elasticsearch.index.mapper.MapperBuilders.stringField;
|
||||
import static org.elasticsearch.index.mapper.core.TypeParsers.parsePathType;
|
||||
import static org.elasticsearch.plugin.mapper.attachments.tika.TikaInstance.tika;
|
||||
|
||||
/**
|
||||
* <pre>
|
||||
* field1 : "..."
|
||||
* </pre>
|
||||
* <p>Or:
|
||||
* <pre>
|
||||
* {
|
||||
* file1 : {
|
||||
* _content_type : "application/pdf",
|
||||
* _name : "..../something.pdf",
|
||||
* content : ""
|
||||
* }
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
*
|
||||
*/
|
||||
public class AttachmentMapper implements Mapper {
|
||||
|
||||
public static final String CONTENT_TYPE = "attachment";
|
||||
|
||||
public static class Defaults {
|
||||
public static final ContentPath.Type PATH_TYPE = ContentPath.Type.FULL;
|
||||
}
|
||||
|
||||
public static class Builder extends Mapper.Builder<Builder, AttachmentMapper> {
|
||||
|
||||
private ContentPath.Type pathType = Defaults.PATH_TYPE;
|
||||
|
||||
private StringFieldMapper.Builder contentBuilder;
|
||||
|
||||
private StringFieldMapper.Builder titleBuilder = stringField("title");
|
||||
|
||||
private StringFieldMapper.Builder authorBuilder = stringField("author");
|
||||
|
||||
private StringFieldMapper.Builder keywordsBuilder = stringField("keywords");
|
||||
|
||||
private DateFieldMapper.Builder dateBuilder = dateField("date");
|
||||
|
||||
private StringFieldMapper.Builder contentTypeBuilder = stringField("content_type");
|
||||
|
||||
public Builder(String name) {
|
||||
super(name);
|
||||
this.builder = this;
|
||||
this.contentBuilder = stringField(name);
|
||||
}
|
||||
|
||||
public Builder pathType(ContentPath.Type pathType) {
|
||||
this.pathType = pathType;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder content(StringFieldMapper.Builder content) {
|
||||
this.contentBuilder = content;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder date(DateFieldMapper.Builder date) {
|
||||
this.dateBuilder = date;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder author(StringFieldMapper.Builder author) {
|
||||
this.authorBuilder = author;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder title(StringFieldMapper.Builder title) {
|
||||
this.titleBuilder = title;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder keywords(StringFieldMapper.Builder keywords) {
|
||||
this.keywordsBuilder = keywords;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder contentType(StringFieldMapper.Builder contentType) {
|
||||
this.contentTypeBuilder = contentType;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public AttachmentMapper build(BuilderContext context) {
|
||||
ContentPath.Type origPathType = context.path().pathType();
|
||||
context.path().pathType(pathType);
|
||||
|
||||
// create the content mapper under the actual name
|
||||
StringFieldMapper contentMapper = contentBuilder.build(context);
|
||||
|
||||
// create the DC one under the name
|
||||
context.path().add(name);
|
||||
DateFieldMapper dateMapper = dateBuilder.build(context);
|
||||
StringFieldMapper authorMapper = authorBuilder.build(context);
|
||||
StringFieldMapper titleMapper = titleBuilder.build(context);
|
||||
StringFieldMapper keywordsMapper = keywordsBuilder.build(context);
|
||||
StringFieldMapper contentTypeMapper = contentTypeBuilder.build(context);
|
||||
context.path().remove();
|
||||
|
||||
context.path().pathType(origPathType);
|
||||
|
||||
return new AttachmentMapper(name, pathType, contentMapper, dateMapper, titleMapper, authorMapper, keywordsMapper, contentTypeMapper);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* <pre>
|
||||
* field1 : { type : "attachment" }
|
||||
* </pre>
|
||||
* Or:
|
||||
* <pre>
|
||||
* field1 : {
|
||||
* type : "attachment",
|
||||
* fields : {
|
||||
* field1 : {type : "binary"},
|
||||
* title : {store : "yes"},
|
||||
* date : {store : "yes"}
|
||||
* }
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
*
|
||||
*/
|
||||
public static class TypeParser implements Mapper.TypeParser {
|
||||
|
||||
@SuppressWarnings({"unchecked"})
|
||||
@Override
|
||||
public Mapper.Builder parse(String name, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException {
|
||||
AttachmentMapper.Builder builder = new AttachmentMapper.Builder(name);
|
||||
|
||||
for (Map.Entry<String, Object> entry : node.entrySet()) {
|
||||
String fieldName = entry.getKey();
|
||||
Object fieldNode = entry.getValue();
|
||||
if (fieldName.equals("path")) {
|
||||
builder.pathType(parsePathType(name, fieldNode.toString()));
|
||||
} else if (fieldName.equals("fields")) {
|
||||
Map<String, Object> fieldsNode = (Map<String, Object>) fieldNode;
|
||||
for (Map.Entry<String, Object> entry1 : fieldsNode.entrySet()) {
|
||||
String propName = entry1.getKey();
|
||||
Object propNode = entry1.getValue();
|
||||
|
||||
if (name.equals(propName)) {
|
||||
// that is the content
|
||||
builder.content((StringFieldMapper.Builder) parserContext.typeParser("string").parse(name, (Map<String, Object>) propNode, parserContext));
|
||||
} else if ("date".equals(propName)) {
|
||||
builder.date((DateFieldMapper.Builder) parserContext.typeParser("date").parse("date", (Map<String, Object>) propNode, parserContext));
|
||||
} else if ("title".equals(propName)) {
|
||||
builder.title((StringFieldMapper.Builder) parserContext.typeParser("string").parse("title", (Map<String, Object>) propNode, parserContext));
|
||||
} else if ("author".equals(propName)) {
|
||||
builder.author((StringFieldMapper.Builder) parserContext.typeParser("string").parse("author", (Map<String, Object>) propNode, parserContext));
|
||||
} else if ("keywords".equals(propName)) {
|
||||
builder.keywords((StringFieldMapper.Builder) parserContext.typeParser("string").parse("keywords", (Map<String, Object>) propNode, parserContext));
|
||||
} else if ("content_type".equals(propName)) {
|
||||
builder.contentType((StringFieldMapper.Builder) parserContext.typeParser("string").parse("content_type", (Map<String, Object>) propNode, parserContext));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return builder;
|
||||
}
|
||||
}
|
||||
|
||||
private final String name;
|
||||
|
||||
private final ContentPath.Type pathType;
|
||||
|
||||
private final StringFieldMapper contentMapper;
|
||||
|
||||
private final DateFieldMapper dateMapper;
|
||||
|
||||
private final StringFieldMapper authorMapper;
|
||||
|
||||
private final StringFieldMapper titleMapper;
|
||||
|
||||
private final StringFieldMapper keywordsMapper;
|
||||
|
||||
private final StringFieldMapper contentTypeMapper;
|
||||
|
||||
public AttachmentMapper(String name, ContentPath.Type pathType, StringFieldMapper contentMapper,
|
||||
DateFieldMapper dateMapper, StringFieldMapper titleMapper, StringFieldMapper authorMapper,
|
||||
StringFieldMapper keywordsMapper, StringFieldMapper contentTypeMapper) {
|
||||
this.name = name;
|
||||
this.pathType = pathType;
|
||||
this.contentMapper = contentMapper;
|
||||
this.dateMapper = dateMapper;
|
||||
this.titleMapper = titleMapper;
|
||||
this.authorMapper = authorMapper;
|
||||
this.keywordsMapper = keywordsMapper;
|
||||
this.contentTypeMapper = contentTypeMapper;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String name() {
|
||||
return name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void parse(ParseContext context) throws IOException {
|
||||
byte[] content = null;
|
||||
String contentType = null;
|
||||
String name = null;
|
||||
|
||||
XContentParser parser = context.parser();
|
||||
XContentParser.Token token = parser.currentToken();
|
||||
if (token == XContentParser.Token.VALUE_STRING) {
|
||||
content = parser.binaryValue();
|
||||
} else {
|
||||
String currentFieldName = null;
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
|
||||
if (token == XContentParser.Token.FIELD_NAME) {
|
||||
currentFieldName = parser.currentName();
|
||||
} else if (token == XContentParser.Token.VALUE_STRING) {
|
||||
if ("content".equals(currentFieldName)) {
|
||||
content = parser.binaryValue();
|
||||
} else if ("_content_type".equals(currentFieldName)) {
|
||||
contentType = parser.text();
|
||||
} else if ("_name".equals(currentFieldName)) {
|
||||
name = parser.text();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Metadata metadata = new Metadata();
|
||||
if (contentType != null) {
|
||||
metadata.add(Metadata.CONTENT_TYPE, contentType);
|
||||
}
|
||||
if (name != null) {
|
||||
metadata.add(Metadata.RESOURCE_NAME_KEY, name);
|
||||
}
|
||||
|
||||
String parsedContent;
|
||||
try {
|
||||
parsedContent = tika().parseToString(new FastByteArrayInputStream(content), metadata);
|
||||
} catch (TikaException e) {
|
||||
throw new MapperParsingException("Failed to extract text for [" + name + "]", e);
|
||||
}
|
||||
|
||||
context.externalValue(parsedContent);
|
||||
contentMapper.parse(context);
|
||||
|
||||
context.externalValue(metadata.get(Metadata.DATE));
|
||||
dateMapper.parse(context);
|
||||
|
||||
context.externalValue(metadata.get(Metadata.TITLE));
|
||||
titleMapper.parse(context);
|
||||
|
||||
context.externalValue(metadata.get(Metadata.AUTHOR));
|
||||
authorMapper.parse(context);
|
||||
|
||||
context.externalValue(metadata.get(Metadata.KEYWORDS));
|
||||
keywordsMapper.parse(context);
|
||||
|
||||
context.externalValue(metadata.get(Metadata.CONTENT_TYPE));
|
||||
contentTypeMapper.parse(context);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void merge(Mapper mergeWith, MergeContext mergeContext) throws MergeMappingException {
|
||||
// ignore this for now
|
||||
}
|
||||
|
||||
@Override
|
||||
public void traverse(FieldMapperListener fieldMapperListener) {
|
||||
contentMapper.traverse(fieldMapperListener);
|
||||
dateMapper.traverse(fieldMapperListener);
|
||||
titleMapper.traverse(fieldMapperListener);
|
||||
authorMapper.traverse(fieldMapperListener);
|
||||
keywordsMapper.traverse(fieldMapperListener);
|
||||
contentTypeMapper.traverse(fieldMapperListener);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void traverse(ObjectMapperListener objectMapperListener) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
contentMapper.close();
|
||||
dateMapper.close();
|
||||
titleMapper.close();
|
||||
authorMapper.close();
|
||||
keywordsMapper.close();
|
||||
contentTypeMapper.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.startObject(name);
|
||||
builder.field("type", CONTENT_TYPE);
|
||||
builder.field("path", pathType.name().toLowerCase());
|
||||
|
||||
builder.startObject("fields");
|
||||
contentMapper.toXContent(builder, params);
|
||||
authorMapper.toXContent(builder, params);
|
||||
titleMapper.toXContent(builder, params);
|
||||
dateMapper.toXContent(builder, params);
|
||||
keywordsMapper.toXContent(builder, params);
|
||||
contentTypeMapper.toXContent(builder, params);
|
||||
builder.endObject();
|
||||
|
||||
builder.endObject();
|
||||
return builder;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.mapper.attachment;
|
||||
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.AbstractIndexComponent;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.mapper.MapperService;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class RegisterAttachmentType extends AbstractIndexComponent {
|
||||
|
||||
@Inject
|
||||
public RegisterAttachmentType(Index index, @IndexSettings Settings indexSettings, MapperService mapperService) {
|
||||
super(index, indexSettings);
|
||||
|
||||
mapperService.documentMapperParser().putTypeParser("attachment", new AttachmentMapper.TypeParser());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.plugin.mapper.attachments;
|
||||
|
||||
import org.elasticsearch.common.inject.AbstractModule;
|
||||
import org.elasticsearch.index.mapper.attachment.RegisterAttachmentType;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class AttachmentsIndexModule extends AbstractModule {
|
||||
|
||||
@Override
|
||||
protected void configure() {
|
||||
bind(RegisterAttachmentType.class).asEagerSingleton();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,50 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.plugin.mapper.attachments;
|
||||
|
||||
import org.elasticsearch.common.inject.Module;
|
||||
import org.elasticsearch.plugins.AbstractPlugin;
|
||||
|
||||
import java.util.Collection;
|
||||
|
||||
import static org.elasticsearch.common.collect.Lists.newArrayList;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class MapperAttachmentsPlugin extends AbstractPlugin {
|
||||
|
||||
@Override
|
||||
public String name() {
|
||||
return "mapper-attachments";
|
||||
}
|
||||
|
||||
@Override
|
||||
public String description() {
|
||||
return "Adds the attachment type allowing to parse difference attachment formats";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<Class<? extends Module>> indexModules() {
|
||||
Collection<Class<? extends Module>> modules = newArrayList();
|
||||
modules.add(AttachmentsIndexModule.class);
|
||||
return modules;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.plugin.mapper.attachments.tika;
|
||||
|
||||
import org.apache.tika.Tika;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class TikaInstance {
|
||||
|
||||
private static final Tika tika = new Tika();
|
||||
|
||||
public static Tika tika() {
|
||||
return tika;
|
||||
}
|
||||
}
|
|
@ -0,0 +1 @@
|
|||
plugin=org.elasticsearch.plugin.mapper.attachments.MapperAttachmentsPlugin
|
|
@ -0,0 +1,78 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.mapper.xcontent;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.analysis.AnalysisService;
|
||||
import org.elasticsearch.index.mapper.DocumentMapper;
|
||||
import org.elasticsearch.index.mapper.DocumentMapperParser;
|
||||
import org.elasticsearch.index.mapper.attachment.AttachmentMapper;
|
||||
import org.testng.annotations.BeforeClass;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import static org.elasticsearch.common.io.Streams.copyToBytesFromClasspath;
|
||||
import static org.elasticsearch.common.io.Streams.copyToStringFromClasspath;
|
||||
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
||||
import static org.hamcrest.MatcherAssert.assertThat;
|
||||
import static org.hamcrest.Matchers.containsString;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
@Test
|
||||
public class SimpleAttachmentMapperTests {
|
||||
|
||||
private DocumentMapperParser mapperParser;
|
||||
|
||||
@BeforeClass
|
||||
public void setupMapperParser() {
|
||||
mapperParser = new DocumentMapperParser(new Index("test"), new AnalysisService(new Index("test")));
|
||||
mapperParser.putTypeParser(AttachmentMapper.CONTENT_TYPE, new AttachmentMapper.TypeParser());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimpleMappings() throws Exception {
|
||||
String mapping = copyToStringFromClasspath("/org/elasticsearch/index/mapper/xcontent/test-mapping.json");
|
||||
DocumentMapper docMapper = mapperParser.parse(mapping);
|
||||
byte[] html = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/xcontent/testXHTML.html");
|
||||
|
||||
byte[] json = jsonBuilder().startObject().field("_id", 1).field("file", html).endObject().copiedBytes();
|
||||
|
||||
Document doc = docMapper.parse(json).rootDoc();
|
||||
|
||||
assertThat(doc.get(docMapper.mappers().smartName("file.content_type").mapper().names().indexName()), equalTo("application/xhtml+xml"));
|
||||
assertThat(doc.get(docMapper.mappers().smartName("file.title").mapper().names().indexName()), equalTo("XHTML test document"));
|
||||
assertThat(doc.get(docMapper.mappers().smartName("file").mapper().names().indexName()), containsString("This document tests the ability of Apache Tika to extract content"));
|
||||
|
||||
// re-parse it
|
||||
String builtMapping = docMapper.mappingSource().string();
|
||||
docMapper = mapperParser.parse(builtMapping);
|
||||
|
||||
json = jsonBuilder().startObject().field("_id", 1).field("file", html).endObject().copiedBytes();
|
||||
|
||||
doc = docMapper.parse(json).rootDoc();
|
||||
|
||||
assertThat(doc.get(docMapper.mappers().smartName("file.content_type").mapper().names().indexName()), equalTo("application/xhtml+xml"));
|
||||
assertThat(doc.get(docMapper.mappers().smartName("file.title").mapper().names().indexName()), equalTo("XHTML test document"));
|
||||
assertThat(doc.get(docMapper.mappers().smartName("file").mapper().names().indexName()), containsString("This document tests the ability of Apache Tika to extract content"));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
{
|
||||
person:{
|
||||
properties:{
|
||||
"file":{
|
||||
type:"attachment"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<title>XHTML test document</title>
|
||||
<meta name="Author" content="Tika Developers"/>
|
||||
<meta http-equiv="refresh" content="5"/>
|
||||
</head>
|
||||
<body>
|
||||
<p>
|
||||
This document tests the ability of Apache Tika to extract content
|
||||
from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,98 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.plugin.mapper.attachments.test;
|
||||
|
||||
import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
|
||||
import org.elasticsearch.action.admin.cluster.health.ClusterHealthStatus;
|
||||
import org.elasticsearch.action.count.CountResponse;
|
||||
import org.elasticsearch.common.logging.ESLogger;
|
||||
import org.elasticsearch.common.logging.Loggers;
|
||||
import org.elasticsearch.common.network.NetworkUtils;
|
||||
import org.elasticsearch.node.Node;
|
||||
import org.testng.annotations.*;
|
||||
|
||||
import static org.elasticsearch.client.Requests.*;
|
||||
import static org.elasticsearch.common.io.Streams.copyToBytesFromClasspath;
|
||||
import static org.elasticsearch.common.io.Streams.copyToStringFromClasspath;
|
||||
import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
|
||||
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
|
||||
import static org.elasticsearch.index.query.QueryBuilders.fieldQuery;
|
||||
import static org.elasticsearch.node.NodeBuilder.nodeBuilder;
|
||||
import static org.hamcrest.MatcherAssert.assertThat;
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
@Test
|
||||
public class SimpleAttachmentIntegrationTests {
|
||||
|
||||
private final ESLogger logger = Loggers.getLogger(getClass());
|
||||
|
||||
private Node node;
|
||||
|
||||
@BeforeClass
|
||||
public void setupServer() {
|
||||
node = nodeBuilder().local(true).settings(settingsBuilder()
|
||||
.put("path.data", "target/data")
|
||||
.put("cluster.name", "test-cluster-" + NetworkUtils.getLocalAddress())
|
||||
.put("gateway.type", "none")).node();
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public void closeServer() {
|
||||
node.close();
|
||||
}
|
||||
|
||||
@BeforeMethod
|
||||
public void createIndex() {
|
||||
logger.info("creating index [test]");
|
||||
node.client().admin().indices().create(createIndexRequest("test").settings(settingsBuilder().put("index.numberOfReplicas", 0))).actionGet();
|
||||
logger.info("Running Cluster Health");
|
||||
ClusterHealthResponse clusterHealth = node.client().admin().cluster().health(clusterHealthRequest().waitForGreenStatus()).actionGet();
|
||||
logger.info("Done Cluster Health, status " + clusterHealth.status());
|
||||
assertThat(clusterHealth.timedOut(), equalTo(false));
|
||||
assertThat(clusterHealth.status(), equalTo(ClusterHealthStatus.GREEN));
|
||||
}
|
||||
|
||||
@AfterMethod
|
||||
public void deleteIndex() {
|
||||
logger.info("deleting index [test]");
|
||||
node.client().admin().indices().delete(deleteIndexRequest("test")).actionGet();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimpleAttachment() throws Exception {
|
||||
String mapping = copyToStringFromClasspath("/org/elasticsearch/index/mapper/xcontent/test-mapping.json");
|
||||
byte[] html = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/xcontent/testXHTML.html");
|
||||
|
||||
node.client().admin().indices().putMapping(putMappingRequest("test").type("person").source(mapping)).actionGet();
|
||||
|
||||
node.client().index(indexRequest("test").type("person")
|
||||
.source(jsonBuilder().startObject().field("file", html).endObject())).actionGet();
|
||||
node.client().admin().indices().refresh(refreshRequest()).actionGet();
|
||||
|
||||
CountResponse countResponse = node.client().count(countRequest("test").query(fieldQuery("file.title", "test document"))).actionGet();
|
||||
assertThat(countResponse.count(), equalTo(1l));
|
||||
|
||||
countResponse = node.client().count(countRequest("test").query(fieldQuery("file", "tests the ability"))).actionGet();
|
||||
assertThat(countResponse.count(), equalTo(1l));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,5 @@
|
|||
log4j.rootLogger=INFO, out
|
||||
|
||||
log4j.appender.out=org.apache.log4j.ConsoleAppender
|
||||
log4j.appender.out.layout=org.apache.log4j.PatternLayout
|
||||
log4j.appender.out.layout.conversionPattern=[%d{ISO8601}][%-5p][%-25c] %m%n
|
Loading…
Reference in New Issue