more work on attachments, have basic types support external set values

This commit is contained in:
kimchy 2010-03-29 00:19:33 +03:00
parent b42245df53
commit 913a486f99
25 changed files with 847 additions and 65 deletions

View File

@ -20,6 +20,7 @@
<entry name="?*.json" />
<entry name="?*.yml" />
<entry name="?*.txt" />
<entry name="?*.pdf" />
</wildcardResourcePatterns>
<annotationProcessing enabled="false" useClasspath="true" />
</component>

View File

@ -49,6 +49,7 @@
<w>streamable</w>
<w>successul</w>
<w>throwable</w>
<w>tika</w>
<w>timestamp</w>
<w>translog</w>
<w>traslog</w>

View File

@ -4,6 +4,10 @@
<root url="jar://$GRADLE_REPOSITORY$/org.apache.tika/tika-app/bundles/tika-app-0.6.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
<SOURCES>
<root url="file://$PROJECT_DIR$/../../../opt/tika/0.6/tika-parsers/src/main/java" />
<root url="file://$PROJECT_DIR$/../../../opt/tika/0.6/tika-core/src/main/java" />
<root url="file://$PROJECT_DIR$/../../../opt/tika/0.6/tika-app/src/main/java" />
</SOURCES>
</library>
</component>

View File

@ -13,6 +13,9 @@
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="module" module-name="elasticsearch" />
<orderEntry type="library" name="tika" level="project" />
<orderEntry type="library" scope="TEST" name="testng" level="project" />
<orderEntry type="library" scope="TEST" name="hamcrest" level="project" />
<orderEntry type="module" module-name="test-testng" scope="TEST" />
</component>
</module>

View File

@ -154,17 +154,26 @@ public class JsonDateFieldMapper extends JsonNumberFieldMapper<Long> {
@Override protected Field parseCreateField(JsonParseContext jsonContext) throws IOException {
String dateAsString;
if (jsonContext.externalValueSet()) {
dateAsString = (String) jsonContext.externalValue();
if (dateAsString == null) {
dateAsString = nullValue;
}
} else {
if (jsonContext.jp().getCurrentToken() == JsonToken.VALUE_NULL) {
dateAsString = nullValue;
} else {
dateAsString = jsonContext.jp().getText();
}
}
if (dateAsString == null) {
return null;
}
if (includeInAll == null || includeInAll) {
jsonContext.allEntries().addText(names.fullName(), dateAsString, boost);
}
long value = dateTimeFormatter.parser().parseMillis(dateAsString);
Field field = null;
if (stored()) {

View File

@ -127,12 +127,26 @@ public class JsonDoubleFieldMapper extends JsonNumberFieldMapper<Double> {
@Override protected Field parseCreateField(JsonParseContext jsonContext) throws IOException {
double value;
if (jsonContext.externalValueSet()) {
Object externalValue = jsonContext.externalValue();
if (externalValue == null) {
if (nullValue == null) {
return null;
}
value = nullValue;
} else {
value = ((Number) externalValue).doubleValue();
}
if (includeInAll == null || includeInAll) {
jsonContext.allEntries().addText(names.fullName(), Double.toString(value), boost);
}
} else {
if (jsonContext.jp().getCurrentToken() == JsonToken.VALUE_NULL) {
if (nullValue == null) {
return null;
}
value = nullValue;
if (includeInAll == null || includeInAll) {
if (nullValueAsString != null && (includeInAll == null || includeInAll)) {
jsonContext.allEntries().addText(names.fullName(), nullValueAsString, boost);
}
} else {
@ -145,6 +159,8 @@ public class JsonDoubleFieldMapper extends JsonNumberFieldMapper<Double> {
jsonContext.allEntries().addText(names.fullName(), jsonContext.jp().getText(), boost);
}
}
}
Field field = null;
if (stored()) {
field = new Field(names.indexName(), Numbers.doubleToBytes(value), store);

View File

@ -127,12 +127,26 @@ public class JsonFloatFieldMapper extends JsonNumberFieldMapper<Float> {
@Override protected Field parseCreateField(JsonParseContext jsonContext) throws IOException {
float value;
if (jsonContext.externalValueSet()) {
Object externalValue = jsonContext.externalValue();
if (externalValue == null) {
if (nullValue == null) {
return null;
}
value = nullValue;
} else {
value = ((Number) externalValue).floatValue();
}
if (includeInAll == null || includeInAll) {
jsonContext.allEntries().addText(names.fullName(), Float.toString(value), boost);
}
} else {
if (jsonContext.jp().getCurrentToken() == JsonToken.VALUE_NULL) {
if (nullValue == null) {
return null;
}
value = nullValue;
if (includeInAll == null || includeInAll) {
if (nullValueAsString != null && (includeInAll == null || includeInAll)) {
jsonContext.allEntries().addText(names.fullName(), nullValueAsString, boost);
}
} else {
@ -145,6 +159,8 @@ public class JsonFloatFieldMapper extends JsonNumberFieldMapper<Float> {
jsonContext.allEntries().addText(names.fullName(), jsonContext.jp().getText(), boost);
}
}
}
Field field = null;
if (stored()) {
field = new Field(names.indexName(), Numbers.floatToBytes(value), store);

View File

@ -126,12 +126,26 @@ public class JsonIntegerFieldMapper extends JsonNumberFieldMapper<Integer> {
@Override protected Field parseCreateField(JsonParseContext jsonContext) throws IOException {
int value;
if (jsonContext.externalValueSet()) {
Object externalValue = jsonContext.externalValue();
if (externalValue == null) {
if (nullValue == null) {
return null;
}
value = nullValue;
} else {
value = ((Number) externalValue).intValue();
}
if (includeInAll == null || includeInAll) {
jsonContext.allEntries().addText(names.fullName(), Integer.toString(value), boost);
}
} else {
if (jsonContext.jp().getCurrentToken() == JsonToken.VALUE_NULL) {
if (nullValue == null) {
return null;
}
value = nullValue;
if (includeInAll == null || includeInAll) {
if (nullValueAsString != null && (includeInAll == null || includeInAll)) {
jsonContext.allEntries().addText(names.fullName(), nullValueAsString, boost);
}
} else {
@ -144,6 +158,8 @@ public class JsonIntegerFieldMapper extends JsonNumberFieldMapper<Integer> {
jsonContext.allEntries().addText(names.fullName(), jsonContext.jp().getText(), boost);
}
}
}
Field field = null;
if (stored()) {
field = new Field(names.indexName(), Numbers.intToBytes(value), store);

View File

@ -126,12 +126,26 @@ public class JsonLongFieldMapper extends JsonNumberFieldMapper<Long> {
@Override protected Field parseCreateField(JsonParseContext jsonContext) throws IOException {
long value;
if (jsonContext.externalValueSet()) {
Object externalValue = jsonContext.externalValue();
if (externalValue == null) {
if (nullValue == null) {
return null;
}
value = nullValue;
} else {
value = ((Number) externalValue).longValue();
}
if (includeInAll == null || includeInAll) {
jsonContext.allEntries().addText(names.fullName(), Long.toString(value), boost);
}
} else {
if (jsonContext.jp().getCurrentToken() == JsonToken.VALUE_NULL) {
if (nullValue == null) {
return null;
}
value = nullValue;
if (includeInAll == null || includeInAll) {
if (nullValueAsString != null && (includeInAll == null || includeInAll)) {
jsonContext.allEntries().addText(names.fullName(), nullValueAsString, boost);
}
} else {
@ -144,6 +158,8 @@ public class JsonLongFieldMapper extends JsonNumberFieldMapper<Long> {
jsonContext.allEntries().addText(names.fullName(), jsonContext.jp().getText(), boost);
}
}
}
Field field = null;
if (stored()) {
field = new Field(names.indexName(), Numbers.longToBytes(value), store);

View File

@ -55,6 +55,10 @@ public class JsonParseContext {
private boolean mappersAdded = false;
private boolean externalValueSet;
private Object externalValue;
private AllEntries allEntries = new AllEntries();
public JsonParseContext(JsonDocumentMapper docMapper, JsonPath path) {
@ -144,6 +148,20 @@ public class JsonParseContext {
return this.allEntries;
}
public void externalValue(Object externalValue) {
this.externalValueSet = true;
this.externalValue = externalValue;
}
public boolean externalValueSet() {
return this.externalValueSet;
}
public Object externalValue() {
externalValueSet = false;
return externalValue;
}
/**
* A string builder that can be used to construct complex names for example.
* Its better to reuse the.

View File

@ -125,25 +125,41 @@ public class JsonShortFieldMapper extends JsonNumberFieldMapper<Short> {
}
@Override protected Field parseCreateField(JsonParseContext jsonContext) throws IOException {
int value;
short value;
if (jsonContext.externalValueSet()) {
Object externalValue = jsonContext.externalValue();
if (externalValue == null) {
if (nullValue == null) {
return null;
}
value = nullValue;
} else {
value = ((Number) externalValue).shortValue();
}
if (includeInAll == null || includeInAll) {
jsonContext.allEntries().addText(names.fullName(), Short.toString(value), boost);
}
} else {
if (jsonContext.jp().getCurrentToken() == JsonToken.VALUE_NULL) {
if (nullValue == null) {
return null;
}
value = nullValue;
if (includeInAll == null || includeInAll) {
if (nullValueAsString != null && (includeInAll == null || includeInAll)) {
jsonContext.allEntries().addText(names.fullName(), nullValueAsString, boost);
}
} else {
if (jsonContext.jp().getCurrentToken() == JsonToken.VALUE_STRING) {
value = Integer.parseInt(jsonContext.jp().getText());
value = Short.parseShort(jsonContext.jp().getText());
} else {
value = jsonContext.jp().getIntValue();
value = jsonContext.jp().getShortValue();
}
if (includeInAll == null || includeInAll) {
jsonContext.allEntries().addText(names.fullName(), jsonContext.jp().getText(), boost);
}
}
}
Field field = null;
if (stored()) {
field = new Field(names.indexName(), Numbers.shortToBytes(value), store);

View File

@ -98,11 +98,18 @@ public class JsonStringFieldMapper extends JsonFieldMapper<String> implements Js
@Override protected Field parseCreateField(JsonParseContext jsonContext) throws IOException {
String value;
if (jsonContext.externalValueSet()) {
value = (String) jsonContext.externalValue();
if (value == null) {
value = nullValue;
}
} else {
if (jsonContext.jp().getCurrentToken() == JsonToken.VALUE_NULL) {
value = nullValue;
} else {
value = jsonContext.jp().getText();
}
}
if (value == null) {
return null;
}

View File

@ -98,7 +98,6 @@ public abstract class JsonQueryBuilders {
* a single field.
*
* @param name The name of the field
* @param query The query string
*/
public static FieldJsonQueryBuilder fieldQuery(String name, String query) {
return new FieldJsonQueryBuilder(name, query);

View File

@ -32,6 +32,8 @@ public interface Plugin {
String name();
String description();
Collection<Class<? extends Module>> modules();
Collection<Class<? extends LifecycleComponent>> services();

View File

@ -64,4 +64,8 @@ public class AllTokenFilter extends TokenFilter {
}
return true;
}
@Override public String toString() {
return allEntries.toString();
}
}

View File

@ -0,0 +1,33 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.plugin.attachments;
import com.google.inject.AbstractModule;
import org.elasticsearch.plugin.attachments.index.mapper.JsonAttachmentMapperInjector;
/**
* @author kimchy (shay.banon)
*/
public class AttachmentsIndexModule extends AbstractModule {
@Override protected void configure() {
bind(JsonAttachmentMapperInjector.class).asEagerSingleton();
}
}

View File

@ -19,8 +19,13 @@
package org.elasticsearch.plugin.attachments;
import com.google.inject.Module;
import org.elasticsearch.plugins.AbstractPlugin;
import java.util.Collection;
import static com.google.common.collect.Lists.*;
/**
* @author kimchy (shay.banon)
*/
@ -29,4 +34,14 @@ public class AttachmentsPlugin extends AbstractPlugin {
@Override public String name() {
return "attachments";
}
@Override public String description() {
return "Adds the attachment type allowing to parse difference attachment formats";
}
@Override public Collection<Class<? extends Module>> indexModules() {
Collection<Class<? extends Module>> modules = newArrayList();
modules.add(AttachmentsIndexModule.class);
return modules;
}
}

View File

@ -0,0 +1,249 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.plugin.attachments.index.mapper;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.codehaus.jackson.JsonParser;
import org.codehaus.jackson.JsonToken;
import org.elasticsearch.index.mapper.FieldMapperListener;
import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.mapper.MergeMappingException;
import org.elasticsearch.index.mapper.json.*;
import org.elasticsearch.util.io.FastByteArrayInputStream;
import org.elasticsearch.util.json.JsonBuilder;
import java.io.IOException;
import static org.elasticsearch.index.mapper.json.JsonMapperBuilders.*;
import static org.elasticsearch.plugin.attachments.tika.TikaInstance.*;
/**
* <pre>
* field1 : "..."
* </pre>
* <p>Or:
* <pre>
* {
* file1 : {
* _content_type : "application/pdf",
* _name : "..../something.pdf",
* content : ""
* }
* }
* </pre>
*
* @author kimchy (shay.banon)
*/
public class JsonAttachmentMapper implements JsonMapper {
public static final String JSON_TYPE = "attachment";
public static class Defaults {
public static final JsonPath.Type PATH_TYPE = JsonPath.Type.FULL;
}
public static class Builder extends JsonMapper.Builder<Builder, JsonAttachmentMapper> {
private JsonPath.Type pathType = Defaults.PATH_TYPE;
private JsonStringFieldMapper.Builder contentBuilder;
private JsonStringFieldMapper.Builder titleBuilder = stringField("title");
private JsonStringFieldMapper.Builder authorBuilder = stringField("author");
private JsonStringFieldMapper.Builder keywordsBuilder = stringField("keywords");
private JsonDateFieldMapper.Builder dateBuilder = dateField("date");
public Builder(String name) {
super(name);
this.builder = this;
this.contentBuilder = stringField(name);
}
public Builder pathType(JsonPath.Type pathType) {
this.pathType = pathType;
return this;
}
public Builder content(JsonStringFieldMapper.Builder content) {
this.contentBuilder = content;
return this;
}
public Builder date(JsonDateFieldMapper.Builder date) {
this.dateBuilder = date;
return this;
}
public Builder author(JsonStringFieldMapper.Builder author) {
this.authorBuilder = author;
return this;
}
public Builder title(JsonStringFieldMapper.Builder title) {
this.titleBuilder = title;
return this;
}
public Builder keywords(JsonStringFieldMapper.Builder keywords) {
this.keywordsBuilder = keywords;
return this;
}
@Override public JsonAttachmentMapper build(BuilderContext context) {
JsonPath.Type origPathType = context.path().pathType();
context.path().pathType(pathType);
// create the content mapper under the actual name
JsonStringFieldMapper contentMapper = contentBuilder.build(context);
// create the DC one under the name
context.path().add(name);
JsonDateFieldMapper dateMapper = dateBuilder.build(context);
JsonStringFieldMapper authorMapper = authorBuilder.build(context);
JsonStringFieldMapper titleMapper = titleBuilder.build(context);
JsonStringFieldMapper keywordsMapper = keywordsBuilder.build(context);
context.path().remove();
context.path().pathType(origPathType);
return new JsonAttachmentMapper(name, pathType, contentMapper, dateMapper, titleMapper, authorMapper, keywordsMapper);
}
}
private final String name;
private final JsonPath.Type pathType;
private final JsonStringFieldMapper contentMapper;
private final JsonDateFieldMapper dateMapper;
private final JsonStringFieldMapper authorMapper;
private final JsonStringFieldMapper titleMapper;
private final JsonStringFieldMapper keywordsMapper;
public JsonAttachmentMapper(String name, JsonPath.Type pathType, JsonStringFieldMapper contentMapper,
JsonDateFieldMapper dateMapper, JsonStringFieldMapper titleMapper, JsonStringFieldMapper authorMapper,
JsonStringFieldMapper keywordsMapper) {
this.name = name;
this.pathType = pathType;
this.contentMapper = contentMapper;
this.dateMapper = dateMapper;
this.titleMapper = titleMapper;
this.authorMapper = authorMapper;
this.keywordsMapper = keywordsMapper;
}
@Override public String name() {
return name;
}
@Override public void parse(JsonParseContext jsonContext) throws IOException {
byte[] content = null;
String contentType = null;
String name = null;
JsonParser jp = jsonContext.jp();
JsonToken token = jp.getCurrentToken();
if (token == JsonToken.VALUE_STRING) {
content = jp.getBinaryValue();
} else {
String currentFieldName = null;
while ((token = jp.nextToken()) != JsonToken.END_OBJECT) {
if (token == JsonToken.FIELD_NAME) {
currentFieldName = jp.getCurrentName();
} else if (token == JsonToken.VALUE_STRING) {
if ("content".equals(currentFieldName)) {
content = jp.getBinaryValue();
} else if ("_content_type".equals(currentFieldName)) {
contentType = jp.getText();
} else if ("_name".equals(currentFieldName)) {
name = jp.getText();
}
}
}
}
Metadata metadata = new Metadata();
if (contentType != null) {
metadata.add(Metadata.CONTENT_TYPE, contentType);
}
if (name != null) {
metadata.add(Metadata.RESOURCE_NAME_KEY, name);
}
String parsedContent;
try {
parsedContent = tika().parseToString(new FastByteArrayInputStream(content), metadata);
} catch (TikaException e) {
throw new MapperParsingException("Failed to extract text for [" + name + "]", e);
}
jsonContext.externalValue(parsedContent);
contentMapper.parse(jsonContext);
jsonContext.externalValue(metadata.get(Metadata.DATE));
dateMapper.parse(jsonContext);
jsonContext.externalValue(metadata.get(Metadata.TITLE));
titleMapper.parse(jsonContext);
jsonContext.externalValue(metadata.get(Metadata.AUTHOR));
authorMapper.parse(jsonContext);
jsonContext.externalValue(metadata.get(Metadata.KEYWORDS));
keywordsMapper.parse(jsonContext);
}
@Override public void merge(JsonMapper mergeWith, JsonMergeContext mergeContext) throws MergeMappingException {
// ignore this for now
}
@Override public void traverse(FieldMapperListener fieldMapperListener) {
contentMapper.traverse(fieldMapperListener);
dateMapper.traverse(fieldMapperListener);
titleMapper.traverse(fieldMapperListener);
authorMapper.traverse(fieldMapperListener);
keywordsMapper.traverse(fieldMapperListener);
}
@Override public void toJson(JsonBuilder builder, Params params) throws IOException {
builder.startObject(name);
builder.field("type", JSON_TYPE);
builder.field("pathType", pathType.name().toLowerCase());
builder.startObject("fields");
contentMapper.toJson(builder, params);
authorMapper.toJson(builder, params);
titleMapper.toJson(builder, params);
dateMapper.toJson(builder, params);
keywordsMapper.toJson(builder, params);
builder.endObject();
builder.endObject();
}
}

View File

@ -0,0 +1,40 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.plugin.attachments.index.mapper;
import com.google.inject.Inject;
import org.elasticsearch.index.AbstractIndexComponent;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.json.JsonDocumentMapperParser;
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.util.settings.Settings;
/**
* @author kimchy (shay.banon)
*/
public class JsonAttachmentMapperInjector extends AbstractIndexComponent {
@Inject public JsonAttachmentMapperInjector(Index index, @IndexSettings Settings indexSettings, MapperService mapperService) {
super(index, indexSettings);
((JsonDocumentMapperParser) mapperService.documentMapperParser()).putTypeParser("attachment", new JsonAttachmentTypeParser());
}
}

View File

@ -0,0 +1,91 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.plugin.attachments.index.mapper;
import org.codehaus.jackson.JsonNode;
import org.codehaus.jackson.node.ObjectNode;
import org.elasticsearch.index.mapper.MapperParsingException;
import org.elasticsearch.index.mapper.json.JsonDateFieldMapper;
import org.elasticsearch.index.mapper.json.JsonMapper;
import org.elasticsearch.index.mapper.json.JsonStringFieldMapper;
import org.elasticsearch.index.mapper.json.JsonTypeParser;
import java.util.Iterator;
import java.util.Map;
import static org.elasticsearch.index.mapper.json.JsonTypeParsers.*;
/**
* <pre>
* field1 : { type : "attachment" }
* </pre>
* Or:
* <pre>
* field1 : {
* type : "attachment",
* fields : {
* field1 : {type : "binary"},
* title : {store : "yes"},
* date : {store : "yes"}
* }
* }
* </pre>
*
* @author kimchy (shay.banon)
*/
public class JsonAttachmentTypeParser implements JsonTypeParser {
@Override public JsonMapper.Builder parse(String name, JsonNode node, ParserContext parserContext) throws MapperParsingException {
ObjectNode attachmentNode = (ObjectNode) node;
JsonAttachmentMapper.Builder builder = new JsonAttachmentMapper.Builder(name);
for (Iterator<Map.Entry<String, JsonNode>> fieldsIt = attachmentNode.getFields(); fieldsIt.hasNext();) {
Map.Entry<String, JsonNode> entry = fieldsIt.next();
String fieldName = entry.getKey();
JsonNode fieldNode = entry.getValue();
if (fieldName.equals("pathType")) {
builder.pathType(parsePathType(name, fieldNode.getValueAsText()));
} else if (fieldName.equals("fields")) {
ObjectNode fieldsNode = (ObjectNode) fieldNode;
for (Iterator<Map.Entry<String, JsonNode>> propsIt = fieldsNode.getFields(); propsIt.hasNext();) {
Map.Entry<String, JsonNode> entry1 = propsIt.next();
String propName = entry1.getKey();
JsonNode propNode = entry1.getValue();
if (name.equals(propName)) {
// that is the content
builder.content((JsonStringFieldMapper.Builder) parserContext.typeParser("string").parse(name, propNode, parserContext));
} else if ("date".equals(propName)) {
builder.date((JsonDateFieldMapper.Builder) parserContext.typeParser("date").parse("date", propNode, parserContext));
} else if ("title".equals(propName)) {
builder.title((JsonStringFieldMapper.Builder) parserContext.typeParser("string").parse("title", propNode, parserContext));
} else if ("author".equals(propName)) {
builder.author((JsonStringFieldMapper.Builder) parserContext.typeParser("string").parse("author", propNode, parserContext));
} else if ("keywords".equals(propName)) {
builder.keywords((JsonStringFieldMapper.Builder) parserContext.typeParser("string").parse("keywords", propNode, parserContext));
}
}
}
}
return builder;
}
}

View File

@ -0,0 +1,34 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.plugin.attachments.tika;
import org.apache.tika.Tika;
/**
* @author kimchy (shay.banon)
*/
public class TikaInstance {
private static final Tika tika = new Tika();
public static Tika tika() {
return tika;
}
}

View File

@ -0,0 +1,69 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.plugin.attachments.index.mapper;
import org.apache.lucene.document.Document;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.analysis.AnalysisService;
import org.elasticsearch.index.mapper.json.JsonDocumentMapper;
import org.elasticsearch.index.mapper.json.JsonDocumentMapperParser;
import org.testng.annotations.BeforeTest;
import org.testng.annotations.Test;
import static org.elasticsearch.util.io.Streams.*;
import static org.elasticsearch.util.json.JsonBuilder.*;
import static org.hamcrest.MatcherAssert.*;
import static org.hamcrest.Matchers.*;
/**
* @author kimchy (shay.banon)
*/
@Test
public class SimpleAttachmentMapperTests {
private JsonDocumentMapperParser mapperParser;
@BeforeTest public void setupMapperParser() {
mapperParser = new JsonDocumentMapperParser(new AnalysisService(new Index("test")));
mapperParser.putTypeParser(JsonAttachmentMapper.JSON_TYPE, new JsonAttachmentTypeParser());
}
@Test public void testSimpleMappings() throws Exception {
String mapping = copyToStringFromClasspath("/org/elasticsearch/plugin/attachments/index/mapper/test-mapping.json");
JsonDocumentMapper docMapper = (JsonDocumentMapper) mapperParser.parse(mapping);
byte[] json = jsonBuilder().startObject().field("_id", 1).field("file", copyToBytesFromClasspath("/org/elasticsearch/plugin/attachments/index/mapper/testXHTML.html")).endObject().copiedBytes();
Document doc = docMapper.parse(json).doc();
assertThat(doc.get(docMapper.mappers().smartName("file.title").mapper().names().indexName()), equalTo("XHTML test document"));
assertThat(doc.get(docMapper.mappers().smartName("file").mapper().names().indexName()), containsString("This document tests the ability of Apache Tika to extract content"));
// re-parse it
String builtMapping = docMapper.buildSource();
docMapper = (JsonDocumentMapper) mapperParser.parse(builtMapping);
json = jsonBuilder().startObject().field("_id", 1).field("file", copyToBytesFromClasspath("/org/elasticsearch/plugin/attachments/index/mapper/testXHTML.html")).endObject().copiedBytes();
doc = docMapper.parse(json).doc();
assertThat(doc.get(docMapper.mappers().smartName("file.title").mapper().names().indexName()), equalTo("XHTML test document"));
assertThat(doc.get(docMapper.mappers().smartName("file").mapper().names().indexName()), containsString("This document tests the ability of Apache Tika to extract content"));
}
}

View File

@ -0,0 +1,7 @@
{
person : {
properties : {
"file" : { type : "attachment" }
}
}
}

View File

@ -0,0 +1,29 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>XHTML test document</title>
<meta name="Author" content="Tika Developers"/>
<meta http-equiv="refresh" content="5"/>
</head>
<body>
<p>
This document tests the ability of Apache Tika to extract content
from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
</p>
</body>
</html>

View File

@ -0,0 +1,87 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.plugin.attachments.test;
import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
import org.elasticsearch.action.admin.cluster.health.ClusterHealthStatus;
import org.elasticsearch.action.count.CountResponse;
import org.elasticsearch.server.Server;
import org.elasticsearch.util.logging.Loggers;
import org.slf4j.Logger;
import org.testng.annotations.*;
import static org.elasticsearch.client.Requests.*;
import static org.elasticsearch.index.query.json.JsonQueryBuilders.*;
import static org.elasticsearch.server.ServerBuilder.*;
import static org.elasticsearch.util.io.Streams.*;
import static org.elasticsearch.util.json.JsonBuilder.*;
import static org.elasticsearch.util.settings.ImmutableSettings.*;
import static org.hamcrest.MatcherAssert.*;
import static org.hamcrest.Matchers.*;
/**
* @author kimchy (shay.banon)
*/
@Test
public class SimpleAttachmentIntegrationTests {
private final Logger logger = Loggers.getLogger(getClass());
private Server server;
@BeforeClass public void setupServer() {
server = serverBuilder().settings(settingsBuilder().put("node.local", true)).server();
}
@AfterClass public void closeServer() {
server.close();
}
@BeforeMethod public void createIndex() {
logger.info("creating index [test]");
server.client().admin().indices().create(createIndexRequest("test").settings(settingsBuilder().put("index.numberOfReplicas", 0))).actionGet();
logger.info("Running Cluster Health");
ClusterHealthResponse clusterHealth = server.client().admin().cluster().health(clusterHealth().waitForGreenStatus()).actionGet();
logger.info("Done Cluster Health, status " + clusterHealth.status());
assertThat(clusterHealth.timedOut(), equalTo(false));
assertThat(clusterHealth.status(), equalTo(ClusterHealthStatus.GREEN));
}
@AfterMethod public void deleteIndex() {
logger.info("deleting index [test]");
server.client().admin().indices().delete(deleteIndexRequest("test")).actionGet();
}
@Test public void testSimpleAttachment() throws Exception {
String mapping = copyToStringFromClasspath("/org/elasticsearch/plugin/attachments/index/mapper/test-mapping.json");
server.client().admin().indices().putMapping(putMappingRequest("test").mappingSource(mapping)).actionGet();
server.client().index(indexRequest("test").type("person")
.source(jsonBuilder().startObject().field("file", copyToBytesFromClasspath("/org/elasticsearch/plugin/attachments/index/mapper/testXHTML.html")).endObject())).actionGet();
server.client().admin().indices().refresh(refreshRequest()).actionGet();
CountResponse countResponse = server.client().count(countRequest("test").querySource(fieldQuery("file.title", "test document"))).actionGet();
assertThat(countResponse.count(), equalTo(1l));
countResponse = server.client().count(countRequest("test").querySource(fieldQuery("file", "tests the ability"))).actionGet();
assertThat(countResponse.count(), equalTo(1l));
}
}