more work on attachments, have basic types support external set values
This commit is contained in:
parent
b42245df53
commit
913a486f99
|
@ -20,6 +20,7 @@
|
|||
<entry name="?*.json" />
|
||||
<entry name="?*.yml" />
|
||||
<entry name="?*.txt" />
|
||||
<entry name="?*.pdf" />
|
||||
</wildcardResourcePatterns>
|
||||
<annotationProcessing enabled="false" useClasspath="true" />
|
||||
</component>
|
||||
|
|
|
@ -49,6 +49,7 @@
|
|||
<w>streamable</w>
|
||||
<w>successul</w>
|
||||
<w>throwable</w>
|
||||
<w>tika</w>
|
||||
<w>timestamp</w>
|
||||
<w>translog</w>
|
||||
<w>traslog</w>
|
||||
|
|
|
@ -4,6 +4,10 @@
|
|||
<root url="jar://$GRADLE_REPOSITORY$/org.apache.tika/tika-app/bundles/tika-app-0.6.jar!/" />
|
||||
</CLASSES>
|
||||
<JAVADOC />
|
||||
<SOURCES />
|
||||
<SOURCES>
|
||||
<root url="file://$PROJECT_DIR$/../../../opt/tika/0.6/tika-parsers/src/main/java" />
|
||||
<root url="file://$PROJECT_DIR$/../../../opt/tika/0.6/tika-core/src/main/java" />
|
||||
<root url="file://$PROJECT_DIR$/../../../opt/tika/0.6/tika-app/src/main/java" />
|
||||
</SOURCES>
|
||||
</library>
|
||||
</component>
|
|
@ -13,6 +13,9 @@
|
|||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
<orderEntry type="module" module-name="elasticsearch" />
|
||||
<orderEntry type="library" name="tika" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="testng" level="project" />
|
||||
<orderEntry type="library" scope="TEST" name="hamcrest" level="project" />
|
||||
<orderEntry type="module" module-name="test-testng" scope="TEST" />
|
||||
</component>
|
||||
</module>
|
||||
|
||||
|
|
|
@ -154,17 +154,26 @@ public class JsonDateFieldMapper extends JsonNumberFieldMapper<Long> {
|
|||
|
||||
@Override protected Field parseCreateField(JsonParseContext jsonContext) throws IOException {
|
||||
String dateAsString;
|
||||
if (jsonContext.externalValueSet()) {
|
||||
dateAsString = (String) jsonContext.externalValue();
|
||||
if (dateAsString == null) {
|
||||
dateAsString = nullValue;
|
||||
}
|
||||
} else {
|
||||
if (jsonContext.jp().getCurrentToken() == JsonToken.VALUE_NULL) {
|
||||
dateAsString = nullValue;
|
||||
} else {
|
||||
dateAsString = jsonContext.jp().getText();
|
||||
}
|
||||
}
|
||||
|
||||
if (dateAsString == null) {
|
||||
return null;
|
||||
}
|
||||
if (includeInAll == null || includeInAll) {
|
||||
jsonContext.allEntries().addText(names.fullName(), dateAsString, boost);
|
||||
}
|
||||
|
||||
long value = dateTimeFormatter.parser().parseMillis(dateAsString);
|
||||
Field field = null;
|
||||
if (stored()) {
|
||||
|
|
|
@ -127,12 +127,26 @@ public class JsonDoubleFieldMapper extends JsonNumberFieldMapper<Double> {
|
|||
|
||||
@Override protected Field parseCreateField(JsonParseContext jsonContext) throws IOException {
|
||||
double value;
|
||||
if (jsonContext.externalValueSet()) {
|
||||
Object externalValue = jsonContext.externalValue();
|
||||
if (externalValue == null) {
|
||||
if (nullValue == null) {
|
||||
return null;
|
||||
}
|
||||
value = nullValue;
|
||||
} else {
|
||||
value = ((Number) externalValue).doubleValue();
|
||||
}
|
||||
if (includeInAll == null || includeInAll) {
|
||||
jsonContext.allEntries().addText(names.fullName(), Double.toString(value), boost);
|
||||
}
|
||||
} else {
|
||||
if (jsonContext.jp().getCurrentToken() == JsonToken.VALUE_NULL) {
|
||||
if (nullValue == null) {
|
||||
return null;
|
||||
}
|
||||
value = nullValue;
|
||||
if (includeInAll == null || includeInAll) {
|
||||
if (nullValueAsString != null && (includeInAll == null || includeInAll)) {
|
||||
jsonContext.allEntries().addText(names.fullName(), nullValueAsString, boost);
|
||||
}
|
||||
} else {
|
||||
|
@ -145,6 +159,8 @@ public class JsonDoubleFieldMapper extends JsonNumberFieldMapper<Double> {
|
|||
jsonContext.allEntries().addText(names.fullName(), jsonContext.jp().getText(), boost);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Field field = null;
|
||||
if (stored()) {
|
||||
field = new Field(names.indexName(), Numbers.doubleToBytes(value), store);
|
||||
|
|
|
@ -127,12 +127,26 @@ public class JsonFloatFieldMapper extends JsonNumberFieldMapper<Float> {
|
|||
|
||||
@Override protected Field parseCreateField(JsonParseContext jsonContext) throws IOException {
|
||||
float value;
|
||||
if (jsonContext.externalValueSet()) {
|
||||
Object externalValue = jsonContext.externalValue();
|
||||
if (externalValue == null) {
|
||||
if (nullValue == null) {
|
||||
return null;
|
||||
}
|
||||
value = nullValue;
|
||||
} else {
|
||||
value = ((Number) externalValue).floatValue();
|
||||
}
|
||||
if (includeInAll == null || includeInAll) {
|
||||
jsonContext.allEntries().addText(names.fullName(), Float.toString(value), boost);
|
||||
}
|
||||
} else {
|
||||
if (jsonContext.jp().getCurrentToken() == JsonToken.VALUE_NULL) {
|
||||
if (nullValue == null) {
|
||||
return null;
|
||||
}
|
||||
value = nullValue;
|
||||
if (includeInAll == null || includeInAll) {
|
||||
if (nullValueAsString != null && (includeInAll == null || includeInAll)) {
|
||||
jsonContext.allEntries().addText(names.fullName(), nullValueAsString, boost);
|
||||
}
|
||||
} else {
|
||||
|
@ -145,6 +159,8 @@ public class JsonFloatFieldMapper extends JsonNumberFieldMapper<Float> {
|
|||
jsonContext.allEntries().addText(names.fullName(), jsonContext.jp().getText(), boost);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Field field = null;
|
||||
if (stored()) {
|
||||
field = new Field(names.indexName(), Numbers.floatToBytes(value), store);
|
||||
|
|
|
@ -126,12 +126,26 @@ public class JsonIntegerFieldMapper extends JsonNumberFieldMapper<Integer> {
|
|||
|
||||
@Override protected Field parseCreateField(JsonParseContext jsonContext) throws IOException {
|
||||
int value;
|
||||
if (jsonContext.externalValueSet()) {
|
||||
Object externalValue = jsonContext.externalValue();
|
||||
if (externalValue == null) {
|
||||
if (nullValue == null) {
|
||||
return null;
|
||||
}
|
||||
value = nullValue;
|
||||
} else {
|
||||
value = ((Number) externalValue).intValue();
|
||||
}
|
||||
if (includeInAll == null || includeInAll) {
|
||||
jsonContext.allEntries().addText(names.fullName(), Integer.toString(value), boost);
|
||||
}
|
||||
} else {
|
||||
if (jsonContext.jp().getCurrentToken() == JsonToken.VALUE_NULL) {
|
||||
if (nullValue == null) {
|
||||
return null;
|
||||
}
|
||||
value = nullValue;
|
||||
if (includeInAll == null || includeInAll) {
|
||||
if (nullValueAsString != null && (includeInAll == null || includeInAll)) {
|
||||
jsonContext.allEntries().addText(names.fullName(), nullValueAsString, boost);
|
||||
}
|
||||
} else {
|
||||
|
@ -144,6 +158,8 @@ public class JsonIntegerFieldMapper extends JsonNumberFieldMapper<Integer> {
|
|||
jsonContext.allEntries().addText(names.fullName(), jsonContext.jp().getText(), boost);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Field field = null;
|
||||
if (stored()) {
|
||||
field = new Field(names.indexName(), Numbers.intToBytes(value), store);
|
||||
|
|
|
@ -126,12 +126,26 @@ public class JsonLongFieldMapper extends JsonNumberFieldMapper<Long> {
|
|||
|
||||
@Override protected Field parseCreateField(JsonParseContext jsonContext) throws IOException {
|
||||
long value;
|
||||
if (jsonContext.externalValueSet()) {
|
||||
Object externalValue = jsonContext.externalValue();
|
||||
if (externalValue == null) {
|
||||
if (nullValue == null) {
|
||||
return null;
|
||||
}
|
||||
value = nullValue;
|
||||
} else {
|
||||
value = ((Number) externalValue).longValue();
|
||||
}
|
||||
if (includeInAll == null || includeInAll) {
|
||||
jsonContext.allEntries().addText(names.fullName(), Long.toString(value), boost);
|
||||
}
|
||||
} else {
|
||||
if (jsonContext.jp().getCurrentToken() == JsonToken.VALUE_NULL) {
|
||||
if (nullValue == null) {
|
||||
return null;
|
||||
}
|
||||
value = nullValue;
|
||||
if (includeInAll == null || includeInAll) {
|
||||
if (nullValueAsString != null && (includeInAll == null || includeInAll)) {
|
||||
jsonContext.allEntries().addText(names.fullName(), nullValueAsString, boost);
|
||||
}
|
||||
} else {
|
||||
|
@ -144,6 +158,8 @@ public class JsonLongFieldMapper extends JsonNumberFieldMapper<Long> {
|
|||
jsonContext.allEntries().addText(names.fullName(), jsonContext.jp().getText(), boost);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Field field = null;
|
||||
if (stored()) {
|
||||
field = new Field(names.indexName(), Numbers.longToBytes(value), store);
|
||||
|
|
|
@ -55,6 +55,10 @@ public class JsonParseContext {
|
|||
|
||||
private boolean mappersAdded = false;
|
||||
|
||||
private boolean externalValueSet;
|
||||
|
||||
private Object externalValue;
|
||||
|
||||
private AllEntries allEntries = new AllEntries();
|
||||
|
||||
public JsonParseContext(JsonDocumentMapper docMapper, JsonPath path) {
|
||||
|
@ -144,6 +148,20 @@ public class JsonParseContext {
|
|||
return this.allEntries;
|
||||
}
|
||||
|
||||
public void externalValue(Object externalValue) {
|
||||
this.externalValueSet = true;
|
||||
this.externalValue = externalValue;
|
||||
}
|
||||
|
||||
public boolean externalValueSet() {
|
||||
return this.externalValueSet;
|
||||
}
|
||||
|
||||
public Object externalValue() {
|
||||
externalValueSet = false;
|
||||
return externalValue;
|
||||
}
|
||||
|
||||
/**
|
||||
* A string builder that can be used to construct complex names for example.
|
||||
* Its better to reuse the.
|
||||
|
|
|
@ -125,25 +125,41 @@ public class JsonShortFieldMapper extends JsonNumberFieldMapper<Short> {
|
|||
}
|
||||
|
||||
@Override protected Field parseCreateField(JsonParseContext jsonContext) throws IOException {
|
||||
int value;
|
||||
short value;
|
||||
if (jsonContext.externalValueSet()) {
|
||||
Object externalValue = jsonContext.externalValue();
|
||||
if (externalValue == null) {
|
||||
if (nullValue == null) {
|
||||
return null;
|
||||
}
|
||||
value = nullValue;
|
||||
} else {
|
||||
value = ((Number) externalValue).shortValue();
|
||||
}
|
||||
if (includeInAll == null || includeInAll) {
|
||||
jsonContext.allEntries().addText(names.fullName(), Short.toString(value), boost);
|
||||
}
|
||||
} else {
|
||||
if (jsonContext.jp().getCurrentToken() == JsonToken.VALUE_NULL) {
|
||||
if (nullValue == null) {
|
||||
return null;
|
||||
}
|
||||
value = nullValue;
|
||||
if (includeInAll == null || includeInAll) {
|
||||
if (nullValueAsString != null && (includeInAll == null || includeInAll)) {
|
||||
jsonContext.allEntries().addText(names.fullName(), nullValueAsString, boost);
|
||||
}
|
||||
} else {
|
||||
if (jsonContext.jp().getCurrentToken() == JsonToken.VALUE_STRING) {
|
||||
value = Integer.parseInt(jsonContext.jp().getText());
|
||||
value = Short.parseShort(jsonContext.jp().getText());
|
||||
} else {
|
||||
value = jsonContext.jp().getIntValue();
|
||||
value = jsonContext.jp().getShortValue();
|
||||
}
|
||||
if (includeInAll == null || includeInAll) {
|
||||
jsonContext.allEntries().addText(names.fullName(), jsonContext.jp().getText(), boost);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Field field = null;
|
||||
if (stored()) {
|
||||
field = new Field(names.indexName(), Numbers.shortToBytes(value), store);
|
||||
|
|
|
@ -98,11 +98,18 @@ public class JsonStringFieldMapper extends JsonFieldMapper<String> implements Js
|
|||
|
||||
@Override protected Field parseCreateField(JsonParseContext jsonContext) throws IOException {
|
||||
String value;
|
||||
if (jsonContext.externalValueSet()) {
|
||||
value = (String) jsonContext.externalValue();
|
||||
if (value == null) {
|
||||
value = nullValue;
|
||||
}
|
||||
} else {
|
||||
if (jsonContext.jp().getCurrentToken() == JsonToken.VALUE_NULL) {
|
||||
value = nullValue;
|
||||
} else {
|
||||
value = jsonContext.jp().getText();
|
||||
}
|
||||
}
|
||||
if (value == null) {
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -98,7 +98,6 @@ public abstract class JsonQueryBuilders {
|
|||
* a single field.
|
||||
*
|
||||
* @param name The name of the field
|
||||
* @param query The query string
|
||||
*/
|
||||
public static FieldJsonQueryBuilder fieldQuery(String name, String query) {
|
||||
return new FieldJsonQueryBuilder(name, query);
|
||||
|
|
|
@ -32,6 +32,8 @@ public interface Plugin {
|
|||
|
||||
String name();
|
||||
|
||||
String description();
|
||||
|
||||
Collection<Class<? extends Module>> modules();
|
||||
|
||||
Collection<Class<? extends LifecycleComponent>> services();
|
||||
|
|
|
@ -64,4 +64,8 @@ public class AllTokenFilter extends TokenFilter {
|
|||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override public String toString() {
|
||||
return allEntries.toString();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,33 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.plugin.attachments;
|
||||
|
||||
import com.google.inject.AbstractModule;
|
||||
import org.elasticsearch.plugin.attachments.index.mapper.JsonAttachmentMapperInjector;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class AttachmentsIndexModule extends AbstractModule {
|
||||
|
||||
@Override protected void configure() {
|
||||
bind(JsonAttachmentMapperInjector.class).asEagerSingleton();
|
||||
}
|
||||
}
|
|
@ -19,8 +19,13 @@
|
|||
|
||||
package org.elasticsearch.plugin.attachments;
|
||||
|
||||
import com.google.inject.Module;
|
||||
import org.elasticsearch.plugins.AbstractPlugin;
|
||||
|
||||
import java.util.Collection;
|
||||
|
||||
import static com.google.common.collect.Lists.*;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
|
@ -29,4 +34,14 @@ public class AttachmentsPlugin extends AbstractPlugin {
|
|||
@Override public String name() {
|
||||
return "attachments";
|
||||
}
|
||||
|
||||
@Override public String description() {
|
||||
return "Adds the attachment type allowing to parse difference attachment formats";
|
||||
}
|
||||
|
||||
@Override public Collection<Class<? extends Module>> indexModules() {
|
||||
Collection<Class<? extends Module>> modules = newArrayList();
|
||||
modules.add(AttachmentsIndexModule.class);
|
||||
return modules;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,249 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.plugin.attachments.index.mapper;
|
||||
|
||||
import org.apache.tika.exception.TikaException;
|
||||
import org.apache.tika.metadata.Metadata;
|
||||
import org.codehaus.jackson.JsonParser;
|
||||
import org.codehaus.jackson.JsonToken;
|
||||
import org.elasticsearch.index.mapper.FieldMapperListener;
|
||||
import org.elasticsearch.index.mapper.MapperParsingException;
|
||||
import org.elasticsearch.index.mapper.MergeMappingException;
|
||||
import org.elasticsearch.index.mapper.json.*;
|
||||
import org.elasticsearch.util.io.FastByteArrayInputStream;
|
||||
import org.elasticsearch.util.json.JsonBuilder;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import static org.elasticsearch.index.mapper.json.JsonMapperBuilders.*;
|
||||
import static org.elasticsearch.plugin.attachments.tika.TikaInstance.*;
|
||||
|
||||
/**
|
||||
* <pre>
|
||||
* field1 : "..."
|
||||
* </pre>
|
||||
* <p>Or:
|
||||
* <pre>
|
||||
* {
|
||||
* file1 : {
|
||||
* _content_type : "application/pdf",
|
||||
* _name : "..../something.pdf",
|
||||
* content : ""
|
||||
* }
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class JsonAttachmentMapper implements JsonMapper {
|
||||
|
||||
public static final String JSON_TYPE = "attachment";
|
||||
|
||||
public static class Defaults {
|
||||
public static final JsonPath.Type PATH_TYPE = JsonPath.Type.FULL;
|
||||
}
|
||||
|
||||
public static class Builder extends JsonMapper.Builder<Builder, JsonAttachmentMapper> {
|
||||
|
||||
private JsonPath.Type pathType = Defaults.PATH_TYPE;
|
||||
|
||||
private JsonStringFieldMapper.Builder contentBuilder;
|
||||
|
||||
private JsonStringFieldMapper.Builder titleBuilder = stringField("title");
|
||||
|
||||
private JsonStringFieldMapper.Builder authorBuilder = stringField("author");
|
||||
|
||||
private JsonStringFieldMapper.Builder keywordsBuilder = stringField("keywords");
|
||||
|
||||
private JsonDateFieldMapper.Builder dateBuilder = dateField("date");
|
||||
|
||||
public Builder(String name) {
|
||||
super(name);
|
||||
this.builder = this;
|
||||
this.contentBuilder = stringField(name);
|
||||
}
|
||||
|
||||
public Builder pathType(JsonPath.Type pathType) {
|
||||
this.pathType = pathType;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder content(JsonStringFieldMapper.Builder content) {
|
||||
this.contentBuilder = content;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder date(JsonDateFieldMapper.Builder date) {
|
||||
this.dateBuilder = date;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder author(JsonStringFieldMapper.Builder author) {
|
||||
this.authorBuilder = author;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder title(JsonStringFieldMapper.Builder title) {
|
||||
this.titleBuilder = title;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder keywords(JsonStringFieldMapper.Builder keywords) {
|
||||
this.keywordsBuilder = keywords;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override public JsonAttachmentMapper build(BuilderContext context) {
|
||||
JsonPath.Type origPathType = context.path().pathType();
|
||||
context.path().pathType(pathType);
|
||||
|
||||
// create the content mapper under the actual name
|
||||
JsonStringFieldMapper contentMapper = contentBuilder.build(context);
|
||||
|
||||
// create the DC one under the name
|
||||
context.path().add(name);
|
||||
JsonDateFieldMapper dateMapper = dateBuilder.build(context);
|
||||
JsonStringFieldMapper authorMapper = authorBuilder.build(context);
|
||||
JsonStringFieldMapper titleMapper = titleBuilder.build(context);
|
||||
JsonStringFieldMapper keywordsMapper = keywordsBuilder.build(context);
|
||||
context.path().remove();
|
||||
|
||||
context.path().pathType(origPathType);
|
||||
|
||||
return new JsonAttachmentMapper(name, pathType, contentMapper, dateMapper, titleMapper, authorMapper, keywordsMapper);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private final String name;
|
||||
|
||||
private final JsonPath.Type pathType;
|
||||
|
||||
private final JsonStringFieldMapper contentMapper;
|
||||
|
||||
private final JsonDateFieldMapper dateMapper;
|
||||
|
||||
private final JsonStringFieldMapper authorMapper;
|
||||
|
||||
private final JsonStringFieldMapper titleMapper;
|
||||
|
||||
private final JsonStringFieldMapper keywordsMapper;
|
||||
|
||||
public JsonAttachmentMapper(String name, JsonPath.Type pathType, JsonStringFieldMapper contentMapper,
|
||||
JsonDateFieldMapper dateMapper, JsonStringFieldMapper titleMapper, JsonStringFieldMapper authorMapper,
|
||||
JsonStringFieldMapper keywordsMapper) {
|
||||
this.name = name;
|
||||
this.pathType = pathType;
|
||||
this.contentMapper = contentMapper;
|
||||
this.dateMapper = dateMapper;
|
||||
this.titleMapper = titleMapper;
|
||||
this.authorMapper = authorMapper;
|
||||
this.keywordsMapper = keywordsMapper;
|
||||
}
|
||||
|
||||
@Override public String name() {
|
||||
return name;
|
||||
}
|
||||
|
||||
@Override public void parse(JsonParseContext jsonContext) throws IOException {
|
||||
byte[] content = null;
|
||||
String contentType = null;
|
||||
String name = null;
|
||||
|
||||
JsonParser jp = jsonContext.jp();
|
||||
JsonToken token = jp.getCurrentToken();
|
||||
if (token == JsonToken.VALUE_STRING) {
|
||||
content = jp.getBinaryValue();
|
||||
} else {
|
||||
String currentFieldName = null;
|
||||
while ((token = jp.nextToken()) != JsonToken.END_OBJECT) {
|
||||
if (token == JsonToken.FIELD_NAME) {
|
||||
currentFieldName = jp.getCurrentName();
|
||||
} else if (token == JsonToken.VALUE_STRING) {
|
||||
if ("content".equals(currentFieldName)) {
|
||||
content = jp.getBinaryValue();
|
||||
} else if ("_content_type".equals(currentFieldName)) {
|
||||
contentType = jp.getText();
|
||||
} else if ("_name".equals(currentFieldName)) {
|
||||
name = jp.getText();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Metadata metadata = new Metadata();
|
||||
if (contentType != null) {
|
||||
metadata.add(Metadata.CONTENT_TYPE, contentType);
|
||||
}
|
||||
if (name != null) {
|
||||
metadata.add(Metadata.RESOURCE_NAME_KEY, name);
|
||||
}
|
||||
|
||||
String parsedContent;
|
||||
try {
|
||||
parsedContent = tika().parseToString(new FastByteArrayInputStream(content), metadata);
|
||||
} catch (TikaException e) {
|
||||
throw new MapperParsingException("Failed to extract text for [" + name + "]", e);
|
||||
}
|
||||
|
||||
jsonContext.externalValue(parsedContent);
|
||||
contentMapper.parse(jsonContext);
|
||||
|
||||
jsonContext.externalValue(metadata.get(Metadata.DATE));
|
||||
dateMapper.parse(jsonContext);
|
||||
|
||||
jsonContext.externalValue(metadata.get(Metadata.TITLE));
|
||||
titleMapper.parse(jsonContext);
|
||||
|
||||
jsonContext.externalValue(metadata.get(Metadata.AUTHOR));
|
||||
authorMapper.parse(jsonContext);
|
||||
|
||||
jsonContext.externalValue(metadata.get(Metadata.KEYWORDS));
|
||||
keywordsMapper.parse(jsonContext);
|
||||
}
|
||||
|
||||
@Override public void merge(JsonMapper mergeWith, JsonMergeContext mergeContext) throws MergeMappingException {
|
||||
// ignore this for now
|
||||
}
|
||||
|
||||
@Override public void traverse(FieldMapperListener fieldMapperListener) {
|
||||
contentMapper.traverse(fieldMapperListener);
|
||||
dateMapper.traverse(fieldMapperListener);
|
||||
titleMapper.traverse(fieldMapperListener);
|
||||
authorMapper.traverse(fieldMapperListener);
|
||||
keywordsMapper.traverse(fieldMapperListener);
|
||||
}
|
||||
|
||||
@Override public void toJson(JsonBuilder builder, Params params) throws IOException {
|
||||
builder.startObject(name);
|
||||
builder.field("type", JSON_TYPE);
|
||||
builder.field("pathType", pathType.name().toLowerCase());
|
||||
|
||||
builder.startObject("fields");
|
||||
contentMapper.toJson(builder, params);
|
||||
authorMapper.toJson(builder, params);
|
||||
titleMapper.toJson(builder, params);
|
||||
dateMapper.toJson(builder, params);
|
||||
keywordsMapper.toJson(builder, params);
|
||||
builder.endObject();
|
||||
|
||||
builder.endObject();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,40 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.plugin.attachments.index.mapper;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import org.elasticsearch.index.AbstractIndexComponent;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.mapper.MapperService;
|
||||
import org.elasticsearch.index.mapper.json.JsonDocumentMapperParser;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
import org.elasticsearch.util.settings.Settings;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class JsonAttachmentMapperInjector extends AbstractIndexComponent {
|
||||
|
||||
@Inject public JsonAttachmentMapperInjector(Index index, @IndexSettings Settings indexSettings, MapperService mapperService) {
|
||||
super(index, indexSettings);
|
||||
|
||||
((JsonDocumentMapperParser) mapperService.documentMapperParser()).putTypeParser("attachment", new JsonAttachmentTypeParser());
|
||||
}
|
||||
}
|
|
@ -0,0 +1,91 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.plugin.attachments.index.mapper;
|
||||
|
||||
import org.codehaus.jackson.JsonNode;
|
||||
import org.codehaus.jackson.node.ObjectNode;
|
||||
import org.elasticsearch.index.mapper.MapperParsingException;
|
||||
import org.elasticsearch.index.mapper.json.JsonDateFieldMapper;
|
||||
import org.elasticsearch.index.mapper.json.JsonMapper;
|
||||
import org.elasticsearch.index.mapper.json.JsonStringFieldMapper;
|
||||
import org.elasticsearch.index.mapper.json.JsonTypeParser;
|
||||
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.elasticsearch.index.mapper.json.JsonTypeParsers.*;
|
||||
|
||||
/**
|
||||
* <pre>
|
||||
* field1 : { type : "attachment" }
|
||||
* </pre>
|
||||
* Or:
|
||||
* <pre>
|
||||
* field1 : {
|
||||
* type : "attachment",
|
||||
* fields : {
|
||||
* field1 : {type : "binary"},
|
||||
* title : {store : "yes"},
|
||||
* date : {store : "yes"}
|
||||
* }
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class JsonAttachmentTypeParser implements JsonTypeParser {
|
||||
|
||||
@Override public JsonMapper.Builder parse(String name, JsonNode node, ParserContext parserContext) throws MapperParsingException {
|
||||
ObjectNode attachmentNode = (ObjectNode) node;
|
||||
JsonAttachmentMapper.Builder builder = new JsonAttachmentMapper.Builder(name);
|
||||
|
||||
for (Iterator<Map.Entry<String, JsonNode>> fieldsIt = attachmentNode.getFields(); fieldsIt.hasNext();) {
|
||||
Map.Entry<String, JsonNode> entry = fieldsIt.next();
|
||||
String fieldName = entry.getKey();
|
||||
JsonNode fieldNode = entry.getValue();
|
||||
if (fieldName.equals("pathType")) {
|
||||
builder.pathType(parsePathType(name, fieldNode.getValueAsText()));
|
||||
} else if (fieldName.equals("fields")) {
|
||||
ObjectNode fieldsNode = (ObjectNode) fieldNode;
|
||||
for (Iterator<Map.Entry<String, JsonNode>> propsIt = fieldsNode.getFields(); propsIt.hasNext();) {
|
||||
Map.Entry<String, JsonNode> entry1 = propsIt.next();
|
||||
String propName = entry1.getKey();
|
||||
JsonNode propNode = entry1.getValue();
|
||||
|
||||
if (name.equals(propName)) {
|
||||
// that is the content
|
||||
builder.content((JsonStringFieldMapper.Builder) parserContext.typeParser("string").parse(name, propNode, parserContext));
|
||||
} else if ("date".equals(propName)) {
|
||||
builder.date((JsonDateFieldMapper.Builder) parserContext.typeParser("date").parse("date", propNode, parserContext));
|
||||
} else if ("title".equals(propName)) {
|
||||
builder.title((JsonStringFieldMapper.Builder) parserContext.typeParser("string").parse("title", propNode, parserContext));
|
||||
} else if ("author".equals(propName)) {
|
||||
builder.author((JsonStringFieldMapper.Builder) parserContext.typeParser("string").parse("author", propNode, parserContext));
|
||||
} else if ("keywords".equals(propName)) {
|
||||
builder.keywords((JsonStringFieldMapper.Builder) parserContext.typeParser("string").parse("keywords", propNode, parserContext));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return builder;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,34 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.plugin.attachments.tika;
|
||||
|
||||
import org.apache.tika.Tika;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class TikaInstance {
|
||||
|
||||
private static final Tika tika = new Tika();
|
||||
|
||||
public static Tika tika() {
|
||||
return tika;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,69 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.plugin.attachments.index.mapper;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.analysis.AnalysisService;
|
||||
import org.elasticsearch.index.mapper.json.JsonDocumentMapper;
|
||||
import org.elasticsearch.index.mapper.json.JsonDocumentMapperParser;
|
||||
import org.testng.annotations.BeforeTest;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import static org.elasticsearch.util.io.Streams.*;
|
||||
import static org.elasticsearch.util.json.JsonBuilder.*;
|
||||
import static org.hamcrest.MatcherAssert.*;
|
||||
import static org.hamcrest.Matchers.*;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
@Test
|
||||
public class SimpleAttachmentMapperTests {
|
||||
|
||||
private JsonDocumentMapperParser mapperParser;
|
||||
|
||||
@BeforeTest public void setupMapperParser() {
|
||||
mapperParser = new JsonDocumentMapperParser(new AnalysisService(new Index("test")));
|
||||
mapperParser.putTypeParser(JsonAttachmentMapper.JSON_TYPE, new JsonAttachmentTypeParser());
|
||||
}
|
||||
|
||||
@Test public void testSimpleMappings() throws Exception {
|
||||
String mapping = copyToStringFromClasspath("/org/elasticsearch/plugin/attachments/index/mapper/test-mapping.json");
|
||||
JsonDocumentMapper docMapper = (JsonDocumentMapper) mapperParser.parse(mapping);
|
||||
byte[] json = jsonBuilder().startObject().field("_id", 1).field("file", copyToBytesFromClasspath("/org/elasticsearch/plugin/attachments/index/mapper/testXHTML.html")).endObject().copiedBytes();
|
||||
|
||||
Document doc = docMapper.parse(json).doc();
|
||||
|
||||
assertThat(doc.get(docMapper.mappers().smartName("file.title").mapper().names().indexName()), equalTo("XHTML test document"));
|
||||
assertThat(doc.get(docMapper.mappers().smartName("file").mapper().names().indexName()), containsString("This document tests the ability of Apache Tika to extract content"));
|
||||
|
||||
// re-parse it
|
||||
String builtMapping = docMapper.buildSource();
|
||||
docMapper = (JsonDocumentMapper) mapperParser.parse(builtMapping);
|
||||
|
||||
json = jsonBuilder().startObject().field("_id", 1).field("file", copyToBytesFromClasspath("/org/elasticsearch/plugin/attachments/index/mapper/testXHTML.html")).endObject().copiedBytes();
|
||||
|
||||
doc = docMapper.parse(json).doc();
|
||||
|
||||
assertThat(doc.get(docMapper.mappers().smartName("file.title").mapper().names().indexName()), equalTo("XHTML test document"));
|
||||
assertThat(doc.get(docMapper.mappers().smartName("file").mapper().names().indexName()), containsString("This document tests the ability of Apache Tika to extract content"));
|
||||
}
|
||||
}
|
|
@ -0,0 +1,7 @@
|
|||
{
|
||||
person : {
|
||||
properties : {
|
||||
"file" : { type : "attachment" }
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<html xmlns="http://www.w3.org/1999/xhtml">
|
||||
<head>
|
||||
<title>XHTML test document</title>
|
||||
<meta name="Author" content="Tika Developers"/>
|
||||
<meta http-equiv="refresh" content="5"/>
|
||||
</head>
|
||||
<body>
|
||||
<p>
|
||||
This document tests the ability of Apache Tika to extract content
|
||||
from an <a href="http://www.w3.org/TR/xhtml1/">XHTML document</a>.
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,87 @@
|
|||
/*
|
||||
* Licensed to Elastic Search and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Elastic Search licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.plugin.attachments.test;
|
||||
|
||||
import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse;
|
||||
import org.elasticsearch.action.admin.cluster.health.ClusterHealthStatus;
|
||||
import org.elasticsearch.action.count.CountResponse;
|
||||
import org.elasticsearch.server.Server;
|
||||
import org.elasticsearch.util.logging.Loggers;
|
||||
import org.slf4j.Logger;
|
||||
import org.testng.annotations.*;
|
||||
|
||||
import static org.elasticsearch.client.Requests.*;
|
||||
import static org.elasticsearch.index.query.json.JsonQueryBuilders.*;
|
||||
import static org.elasticsearch.server.ServerBuilder.*;
|
||||
import static org.elasticsearch.util.io.Streams.*;
|
||||
import static org.elasticsearch.util.json.JsonBuilder.*;
|
||||
import static org.elasticsearch.util.settings.ImmutableSettings.*;
|
||||
import static org.hamcrest.MatcherAssert.*;
|
||||
import static org.hamcrest.Matchers.*;
|
||||
|
||||
/**
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
@Test
|
||||
public class SimpleAttachmentIntegrationTests {
|
||||
|
||||
private final Logger logger = Loggers.getLogger(getClass());
|
||||
|
||||
private Server server;
|
||||
|
||||
@BeforeClass public void setupServer() {
|
||||
server = serverBuilder().settings(settingsBuilder().put("node.local", true)).server();
|
||||
}
|
||||
|
||||
@AfterClass public void closeServer() {
|
||||
server.close();
|
||||
}
|
||||
|
||||
@BeforeMethod public void createIndex() {
|
||||
logger.info("creating index [test]");
|
||||
server.client().admin().indices().create(createIndexRequest("test").settings(settingsBuilder().put("index.numberOfReplicas", 0))).actionGet();
|
||||
logger.info("Running Cluster Health");
|
||||
ClusterHealthResponse clusterHealth = server.client().admin().cluster().health(clusterHealth().waitForGreenStatus()).actionGet();
|
||||
logger.info("Done Cluster Health, status " + clusterHealth.status());
|
||||
assertThat(clusterHealth.timedOut(), equalTo(false));
|
||||
assertThat(clusterHealth.status(), equalTo(ClusterHealthStatus.GREEN));
|
||||
}
|
||||
|
||||
@AfterMethod public void deleteIndex() {
|
||||
logger.info("deleting index [test]");
|
||||
server.client().admin().indices().delete(deleteIndexRequest("test")).actionGet();
|
||||
}
|
||||
|
||||
@Test public void testSimpleAttachment() throws Exception {
|
||||
String mapping = copyToStringFromClasspath("/org/elasticsearch/plugin/attachments/index/mapper/test-mapping.json");
|
||||
|
||||
server.client().admin().indices().putMapping(putMappingRequest("test").mappingSource(mapping)).actionGet();
|
||||
|
||||
server.client().index(indexRequest("test").type("person")
|
||||
.source(jsonBuilder().startObject().field("file", copyToBytesFromClasspath("/org/elasticsearch/plugin/attachments/index/mapper/testXHTML.html")).endObject())).actionGet();
|
||||
server.client().admin().indices().refresh(refreshRequest()).actionGet();
|
||||
|
||||
CountResponse countResponse = server.client().count(countRequest("test").querySource(fieldQuery("file.title", "test document"))).actionGet();
|
||||
assertThat(countResponse.count(), equalTo(1l));
|
||||
|
||||
countResponse = server.client().count(countRequest("test").querySource(fieldQuery("file", "tests the ability"))).actionGet();
|
||||
assertThat(countResponse.count(), equalTo(1l));
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue