first commit

This commit is contained in:
Shay Banon 2011-12-05 14:05:14 +02:00
commit c4a1275475
15 changed files with 910 additions and 0 deletions

7
.gitignore vendored Normal file
View File

@ -0,0 +1,7 @@
/data
/work
/logs
/.idea
/target
.DS_Store
*.iml

15
README.md Normal file
View File

@ -0,0 +1,15 @@
Mapper Attachments Type for ElasticSearch
==================================
The mapper attachments plugin adds the `attachment` type to ElasticSearch using Tika.
In order to install the plugin, simply run: `bin/plugin -install elasticsearch/elasticsearch-mapper-attachments/1.0.0`.
---------------------------------------
| memcached Plugin | ElasticSearch |
---------------------------------------
| master | 0.18 -> master |
---------------------------------------
| 1.0.0 | 0.18 -> master |
---------------------------------------

140
pom.xml Normal file
View File

@ -0,0 +1,140 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<name>elasticsearch-mapper-attachments</name>
<modelVersion>4.0.0</modelVersion>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch-mapper-attachments</artifactId>
<version>1.0.0</version>
<packaging>jar</packaging>
<description>Mapper Attachments Type for ElasticSearch</description>
<inceptionYear>2009</inceptionYear>
<licenses>
<license>
<name>The Apache Software License, Version 2.0</name>
<url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
<distribution>repo</distribution>
</license>
</licenses>
<scm>
<connection>scm:git:git@github.com:elasticsearch/elasticsearch-mapper-attachments.git</connection>
<developerConnection>scm:git:git@github.com:elasticsearch/elasticsearch-mapper-attachments.git
</developerConnection>
<url>http://github.com/elasticsearch/elasticsearch-mapper-attachments</url>
</scm>
<parent>
<groupId>org.sonatype.oss</groupId>
<artifactId>oss-parent</artifactId>
<version>7</version>
</parent>
<properties>
<elasticsearch.version>0.18.5</elasticsearch.version>
</properties>
<repositories>
</repositories>
<dependencies>
<dependency>
<groupId>org.elasticsearch</groupId>
<artifactId>elasticsearch</artifactId>
<version>${elasticsearch.version}</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-app</artifactId>
<version>0.10</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.16</version>
<scope>runtime</scope>
</dependency>
<dependency>
<groupId>org.testng</groupId>
<artifactId>testng</artifactId>
<version>6.3.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.hamcrest</groupId>
<artifactId>hamcrest-core</artifactId>
<version>1.3.RC2</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.hamcrest</groupId>
<artifactId>hamcrest-library</artifactId>
<version>1.3.RC2</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<testResources>
<testResource>
<directory>${basedir}/src/test/java</directory>
<includes>
<include>**/*.json</include>
<include>**/*.yml</include>
<include>**/*.html</include>
</includes>
</testResource>
</testResources>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>2.3.2</version>
<configuration>
<source>1.6</source>
<target>1.6</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<version>2.11</version>
<configuration>
<includes>
<include>**/*Tests.java</include>
</includes>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<version>2.1.2</version>
<executions>
<execution>
<id>attach-sources</id>
<goals>
<goal>jar</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptors>
<descriptor>${basedir}/src/main/assemblies/plugin.xml</descriptor>
</descriptors>
</configuration>
</plugin>
</plugins>
</build>
</project>

View File

@ -0,0 +1,26 @@
<?xml version="1.0"?>
<assembly>
<id></id>
<formats>
<format>zip</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<dependencySets>
<dependencySet>
<outputDirectory>/</outputDirectory>
<useProjectArtifact>true</useProjectArtifact>
<useTransitiveFiltering>true</useTransitiveFiltering>
<excludes>
<exclude>org.elasticsearch:elasticsearch</exclude>
</excludes>
</dependencySet>
<dependencySet>
<outputDirectory>/</outputDirectory>
<useProjectArtifact>true</useProjectArtifact>
<useTransitiveFiltering>true</useTransitiveFiltering>
<includes>
<include>org.apache.tika:tika-app</include>
</includes>
</dependencySet>
</dependencySets>
</assembly>

View File

@ -0,0 +1,344 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.mapper.attachment;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.elasticsearch.common.io.FastByteArrayInputStream;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.mapper.*;
import org.elasticsearch.index.mapper.core.DateFieldMapper;
import org.elasticsearch.index.mapper.core.StringFieldMapper;
import java.io.IOException;
import java.util.Map;
import static org.elasticsearch.index.mapper.MapperBuilders.dateField;
import static org.elasticsearch.index.mapper.MapperBuilders.stringField;
import static org.elasticsearch.index.mapper.core.TypeParsers.parsePathType;
import static org.elasticsearch.plugin.mapper.attachments.tika.TikaInstance.tika;
/**
* <pre>
* field1 : "..."
* </pre>
* <p>Or:
* <pre>
* {
* file1 : {
* _content_type : "application/pdf",
* _name : "..../something.pdf",
* content : ""
* }
* }
* </pre>
*
*
*/
public class AttachmentMapper implements Mapper {
public static final String CONTENT_TYPE = "attachment";
public static class Defaults {
public static final ContentPath.Type PATH_TYPE = ContentPath.Type.FULL;
}
public static class Builder extends Mapper.Builder<Builder, AttachmentMapper> {
private ContentPath.Type pathType = Defaults.PATH_TYPE;
private StringFieldMapper.Builder contentBuilder;
private StringFieldMapper.Builder titleBuilder = stringField("title");
private StringFieldMapper.Builder authorBuilder = stringField("author");
private StringFieldMapper.Builder keywordsBuilder = stringField("keywords");
private DateFieldMapper.Builder dateBuilder = dateField("date");
private StringFieldMapper.Builder contentTypeBuilder = stringField("content_type");
public Builder(String name) {
super(name);
this.builder = this;
this.contentBuilder = stringField(name);
}
public Builder pathType(ContentPath.Type pathType) {
this.pathType = pathType;
return this;
}
public Builder content(StringFieldMapper.Builder content) {
this.contentBuilder = content;
return this;
}
public Builder date(DateFieldMapper.Builder date) {
this.dateBuilder = date;
return this;
}
public Builder author(StringFieldMapper.Builder author) {
this.authorBuilder = author;
return this;
}
public Builder title(StringFieldMapper.Builder title) {
this.titleBuilder = title;
return this;
}
public Builder keywords(StringFieldMapper.Builder keywords) {
this.keywordsBuilder = keywords;
return this;
}
public Builder contentType(StringFieldMapper.Builder contentType) {
this.contentTypeBuilder = contentType;
return this;
}
@Override
public AttachmentMapper build(BuilderContext context) {
ContentPath.Type origPathType = context.path().pathType();
context.path().pathType(pathType);
// create the content mapper under the actual name
StringFieldMapper contentMapper = contentBuilder.build(context);
// create the DC one under the name
context.path().add(name);
DateFieldMapper dateMapper = dateBuilder.build(context);
StringFieldMapper authorMapper = authorBuilder.build(context);
StringFieldMapper titleMapper = titleBuilder.build(context);
StringFieldMapper keywordsMapper = keywordsBuilder.build(context);
StringFieldMapper contentTypeMapper = contentTypeBuilder.build(context);
context.path().remove();
context.path().pathType(origPathType);
return new AttachmentMapper(name, pathType, contentMapper, dateMapper, titleMapper, authorMapper, keywordsMapper, contentTypeMapper);
}
}
/**
* <pre>
* field1 : { type : "attachment" }
* </pre>
* Or:
* <pre>
* field1 : {
* type : "attachment",
* fields : {
* field1 : {type : "binary"},
* title : {store : "yes"},
* date : {store : "yes"}
* }
* }
* </pre>
*
*
*/
public static class TypeParser implements Mapper.TypeParser {
@SuppressWarnings({"unchecked"})
@Override
public Mapper.Builder parse(String name, Map<String, Object> node, ParserContext parserContext) throws MapperParsingException {
AttachmentMapper.Builder builder = new AttachmentMapper.Builder(name);
for (Map.Entry<String, Object> entry : node.entrySet()) {
String fieldName = entry.getKey();
Object fieldNode = entry.getValue();
if (fieldName.equals("path")) {
builder.pathType(parsePathType(name, fieldNode.toString()));
} else if (fieldName.equals("fields")) {
Map<String, Object> fieldsNode = (Map<String, Object>) fieldNode;
for (Map.Entry<String, Object> entry1 : fieldsNode.entrySet()) {
String propName = entry1.getKey();
Object propNode = entry1.getValue();
if (name.equals(propName)) {
// that is the content
builder.content((StringFieldMapper.Builder) parserContext.typeParser("string").parse(name, (Map<String, Object>) propNode, parserContext));
} else if ("date".equals(propName)) {
builder.date((DateFieldMapper.Builder) parserContext.typeParser("date").parse("date", (Map<String, Object>) propNode, parserContext));
} else if ("title".equals(propName)) {
builder.title((StringFieldMapper.Builder) parserContext.typeParser("string").parse("title", (Map<String, Object>) propNode, parserContext));
} else if ("author".equals(propName)) {
builder.author((StringFieldMapper.Builder) parserContext.typeParser("string").parse("author", (Map<String, Object>) propNode, parserContext));
} else if ("keywords".equals(propName)) {
builder.keywords((StringFieldMapper.Builder) parserContext.typeParser("string").parse("keywords", (Map<String, Object>) propNode, parserContext));
} else if ("content_type".equals(propName)) {
builder.contentType((StringFieldMapper.Builder) parserContext.typeParser("string").parse("content_type", (Map<String, Object>) propNode, parserContext));
}
}
}
}
return builder;
}
}
private final String name;
private final ContentPath.Type pathType;
private final StringFieldMapper contentMapper;
private final DateFieldMapper dateMapper;
private final StringFieldMapper authorMapper;
private final StringFieldMapper titleMapper;
private final StringFieldMapper keywordsMapper;
private final StringFieldMapper contentTypeMapper;
public AttachmentMapper(String name, ContentPath.Type pathType, StringFieldMapper contentMapper,
DateFieldMapper dateMapper, StringFieldMapper titleMapper, StringFieldMapper authorMapper,
StringFieldMapper keywordsMapper, StringFieldMapper contentTypeMapper) {
this.name = name;
this.pathType = pathType;
this.contentMapper = contentMapper;
this.dateMapper = dateMapper;
this.titleMapper = titleMapper;
this.authorMapper = authorMapper;
this.keywordsMapper = keywordsMapper;
this.contentTypeMapper = contentTypeMapper;
}
@Override
public String name() {
return name;
}
@Override
public void parse(ParseContext context) throws IOException {
byte[] content = null;
String contentType = null;
String name = null;
XContentParser parser = context.parser();
XContentParser.Token token = parser.currentToken();
if (token == XContentParser.Token.VALUE_STRING) {
content = parser.binaryValue();
} else {
String currentFieldName = null;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
} else if (token == XContentParser.Token.VALUE_STRING) {
if ("content".equals(currentFieldName)) {
content = parser.binaryValue();
} else if ("_content_type".equals(currentFieldName)) {
contentType = parser.text();
} else if ("_name".equals(currentFieldName)) {
name = parser.text();
}
}
}
}
Metadata metadata = new Metadata();
if (contentType != null) {
metadata.add(Metadata.CONTENT_TYPE, contentType);
}
if (name != null) {
metadata.add(Metadata.RESOURCE_NAME_KEY, name);
}
String parsedContent;
try {
parsedContent = tika().parseToString(new FastByteArrayInputStream(content), metadata);
} catch (TikaException e) {
throw new MapperParsingException("Failed to extract text for [" + name + "]", e);
}
context.externalValue(parsedContent);
contentMapper.parse(context);
context.externalValue(metadata.get(Metadata.DATE));
dateMapper.parse(context);
context.externalValue(metadata.get(Metadata.TITLE));
titleMapper.parse(context);
context.externalValue(metadata.get(Metadata.AUTHOR));
authorMapper.parse(context);
context.externalValue(metadata.get(Metadata.KEYWORDS));
keywordsMapper.parse(context);
context.externalValue(metadata.get(Metadata.CONTENT_TYPE));
contentTypeMapper.parse(context);
}
@Override
public void merge(Mapper mergeWith, MergeContext mergeContext) throws MergeMappingException {
// ignore this for now
}
@Override
public void traverse(FieldMapperListener fieldMapperListener) {
contentMapper.traverse(fieldMapperListener);
dateMapper.traverse(fieldMapperListener);
titleMapper.traverse(fieldMapperListener);
authorMapper.traverse(fieldMapperListener);
keywordsMapper.traverse(fieldMapperListener);
contentTypeMapper.traverse(fieldMapperListener);
}
@Override
public void traverse(ObjectMapperListener objectMapperListener) {
}
@Override
public void close() {
contentMapper.close();
dateMapper.close();
titleMapper.close();
authorMapper.close();
keywordsMapper.close();
contentTypeMapper.close();
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(name);
builder.field("type", CONTENT_TYPE);
builder.field("path", pathType.name().toLowerCase());
builder.startObject("fields");
contentMapper.toXContent(builder, params);
authorMapper.toXContent(builder, params);
titleMapper.toXContent(builder, params);
dateMapper.toXContent(builder, params);
keywordsMapper.toXContent(builder, params);
contentTypeMapper.toXContent(builder, params);
builder.endObject();
builder.endObject();
return builder;
}
}

View File

@ -0,0 +1,40 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.mapper.attachment;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.AbstractIndexComponent;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.settings.IndexSettings;
/**
*
*/
public class RegisterAttachmentType extends AbstractIndexComponent {
@Inject
public RegisterAttachmentType(Index index, @IndexSettings Settings indexSettings, MapperService mapperService) {
super(index, indexSettings);
mapperService.documentMapperParser().putTypeParser("attachment", new AttachmentMapper.TypeParser());
}
}

View File

@ -0,0 +1,34 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.plugin.mapper.attachments;
import org.elasticsearch.common.inject.AbstractModule;
import org.elasticsearch.index.mapper.attachment.RegisterAttachmentType;
/**
*
*/
public class AttachmentsIndexModule extends AbstractModule {
@Override
protected void configure() {
bind(RegisterAttachmentType.class).asEagerSingleton();
}
}

View File

@ -0,0 +1,50 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.plugin.mapper.attachments;
import org.elasticsearch.common.inject.Module;
import org.elasticsearch.plugins.AbstractPlugin;
import java.util.Collection;
import static org.elasticsearch.common.collect.Lists.newArrayList;
/**
*
*/
public class MapperAttachmentsPlugin extends AbstractPlugin {
@Override
public String name() {
return "mapper-attachments";
}
@Override
public String description() {
return "Adds the attachment type allowing to parse difference attachment formats";
}
@Override
public Collection<Class<? extends Module>> indexModules() {
Collection<Class<? extends Module>> modules = newArrayList();
modules.add(AttachmentsIndexModule.class);
return modules;
}
}

View File

@ -0,0 +1,34 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.plugin.mapper.attachments.tika;
import org.apache.tika.Tika;
/**
*
*/
public class TikaInstance {
private static final Tika tika = new Tika();
public static Tika tika() {
return tika;
}
}

View File

@ -0,0 +1 @@
plugin=org.elasticsearch.plugin.mapper.attachments.MapperAttachmentsPlugin

View File

@ -0,0 +1,78 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.mapper.xcontent;
import org.apache.lucene.document.Document;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.analysis.AnalysisService;
import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.DocumentMapperParser;
import org.elasticsearch.index.mapper.attachment.AttachmentMapper;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import static org.elasticsearch.common.io.Streams.copyToBytesFromClasspath;
import static org.elasticsearch.common.io.Streams.copyToStringFromClasspath;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
/**
*
*/
@Test
public class SimpleAttachmentMapperTests {
private DocumentMapperParser mapperParser;
@BeforeClass
public void setupMapperParser() {
mapperParser = new DocumentMapperParser(new Index("test"), new AnalysisService(new Index("test")));
mapperParser.putTypeParser(AttachmentMapper.CONTENT_TYPE, new AttachmentMapper.TypeParser());
}
@Test
public void testSimpleMappings() throws Exception {
String mapping = copyToStringFromClasspath("/org/elasticsearch/index/mapper/xcontent/test-mapping.json");
DocumentMapper docMapper = mapperParser.parse(mapping);
byte[] html = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/xcontent/testXHTML.html");
byte[] json = jsonBuilder().startObject().field("_id", 1).field("file", html).endObject().copiedBytes();
Document doc = docMapper.parse(json).rootDoc();
assertThat(doc.get(docMapper.mappers().smartName("file.content_type").mapper().names().indexName()), equalTo("application/xhtml+xml"));
assertThat(doc.get(docMapper.mappers().smartName("file.title").mapper().names().indexName()), equalTo("XHTML test document"));
assertThat(doc.get(docMapper.mappers().smartName("file").mapper().names().indexName()), containsString("This document tests the ability of Apache Tika to extract content"));
// re-parse it
String builtMapping = docMapper.mappingSource().string();
docMapper = mapperParser.parse(builtMapping);
json = jsonBuilder().startObject().field("_id", 1).field("file", html).endObject().copiedBytes();
doc = docMapper.parse(json).rootDoc();
assertThat(doc.get(docMapper.mappers().smartName("file.content_type").mapper().names().indexName()), equalTo("application/xhtml+xml"));
assertThat(doc.get(docMapper.mappers().smartName("file.title").mapper().names().indexName()), equalTo("XHTML test document"));
assertThat(doc.get(docMapper.mappers().smartName("file").mapper().names().indexName()), containsString("This document tests the ability of Apache Tika to extract content"));
}
}

View File

@ -0,0 +1,9 @@
{
person:{
properties:{
"file":{
type:"attachment"
}
}
}
}

View File

@ -0,0 +1,29 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>XHTML test document</title>
<meta name="Author" content="Tika Developers"/>
<meta http-equiv="refresh" content="5"/>
</head>
<body>
<p>
This document tests the ability of Apache Tika to extract content
from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
</p>
</body>
</html>

View File

@ -0,0 +1,98 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.plugin.mapper.attachments.test;
import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
import org.elasticsearch.action.admin.cluster.health.ClusterHealthStatus;
import org.elasticsearch.action.count.CountResponse;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.common.network.NetworkUtils;
import org.elasticsearch.node.Node;
import org.testng.annotations.*;
import static org.elasticsearch.client.Requests.*;
import static org.elasticsearch.common.io.Streams.copyToBytesFromClasspath;
import static org.elasticsearch.common.io.Streams.copyToStringFromClasspath;
import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.index.query.QueryBuilders.fieldQuery;
import static org.elasticsearch.node.NodeBuilder.nodeBuilder;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
/**
*
*/
@Test
public class SimpleAttachmentIntegrationTests {
private final ESLogger logger = Loggers.getLogger(getClass());
private Node node;
@BeforeClass
public void setupServer() {
node = nodeBuilder().local(true).settings(settingsBuilder()
.put("path.data", "target/data")
.put("cluster.name", "test-cluster-" + NetworkUtils.getLocalAddress())
.put("gateway.type", "none")).node();
}
@AfterClass
public void closeServer() {
node.close();
}
@BeforeMethod
public void createIndex() {
logger.info("creating index [test]");
node.client().admin().indices().create(createIndexRequest("test").settings(settingsBuilder().put("index.numberOfReplicas", 0))).actionGet();
logger.info("Running Cluster Health");
ClusterHealthResponse clusterHealth = node.client().admin().cluster().health(clusterHealthRequest().waitForGreenStatus()).actionGet();
logger.info("Done Cluster Health, status " + clusterHealth.status());
assertThat(clusterHealth.timedOut(), equalTo(false));
assertThat(clusterHealth.status(), equalTo(ClusterHealthStatus.GREEN));
}
@AfterMethod
public void deleteIndex() {
logger.info("deleting index [test]");
node.client().admin().indices().delete(deleteIndexRequest("test")).actionGet();
}
@Test
public void testSimpleAttachment() throws Exception {
String mapping = copyToStringFromClasspath("/org/elasticsearch/index/mapper/xcontent/test-mapping.json");
byte[] html = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/xcontent/testXHTML.html");
node.client().admin().indices().putMapping(putMappingRequest("test").type("person").source(mapping)).actionGet();
node.client().index(indexRequest("test").type("person")
.source(jsonBuilder().startObject().field("file", html).endObject())).actionGet();
node.client().admin().indices().refresh(refreshRequest()).actionGet();
CountResponse countResponse = node.client().count(countRequest("test").query(fieldQuery("file.title", "test document"))).actionGet();
assertThat(countResponse.count(), equalTo(1l));
countResponse = node.client().count(countRequest("test").query(fieldQuery("file", "tests the ability"))).actionGet();
assertThat(countResponse.count(), equalTo(1l));
}
}

View File

@ -0,0 +1,5 @@
log4j.rootLogger=INFO, out
log4j.appender.out=org.apache.log4j.ConsoleAppender
log4j.appender.out.layout=org.apache.log4j.PatternLayout
log4j.appender.out.layout.conversionPattern=[%d{ISO8601}][%-5p][%-25c] %m%n