Analysis: Add detail response support

add explain option
fix char_filter bug

Closes #11076 #15257
This commit is contained in:
Jun Ohtani 2015-06-15 16:32:44 +09:00
parent 1ef24d2a85
commit fab44398d9
10 changed files with 1055 additions and 43 deletions

View File

@ -18,6 +18,7 @@
*/
package org.elasticsearch.action.admin.indices.analyze;
import org.elasticsearch.Version;
import org.elasticsearch.action.ActionRequestValidationException;
import org.elasticsearch.action.support.single.shard.SingleShardRequest;
import org.elasticsearch.common.Strings;
@ -46,6 +47,10 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> {
private String field;
private boolean explain = false;
private String[] attributes = Strings.EMPTY_ARRAY;
public AnalyzeRequest() {
}
@ -86,6 +91,9 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> {
}
public AnalyzeRequest tokenFilters(String... tokenFilters) {
if (tokenFilters == null) {
throw new IllegalArgumentException("token filters must not be null");
}
this.tokenFilters = tokenFilters;
return this;
}
@ -95,6 +103,9 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> {
}
public AnalyzeRequest charFilters(String... charFilters) {
if (charFilters == null) {
throw new IllegalArgumentException("char filters must not be null");
}
this.charFilters = charFilters;
return this;
}
@ -112,18 +123,33 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> {
return this.field;
}
public AnalyzeRequest explain(boolean explain) {
this.explain = explain;
return this;
}
public boolean explain() {
return this.explain;
}
public AnalyzeRequest attributes(String... attributes) {
if (attributes == null) {
throw new IllegalArgumentException("attributes must not be null");
}
this.attributes = attributes;
return this;
}
public String[] attributes() {
return this.attributes;
}
@Override
public ActionRequestValidationException validate() {
ActionRequestValidationException validationException = null;
if (text == null || text.length == 0) {
validationException = addValidationError("text is missing", validationException);
}
if (tokenFilters == null) {
validationException = addValidationError("token filters must not be null", validationException);
}
if (charFilters == null) {
validationException = addValidationError("char filters must not be null", validationException);
}
return validationException;
}
@ -136,6 +162,10 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> {
tokenFilters = in.readStringArray();
charFilters = in.readStringArray();
field = in.readOptionalString();
if (in.getVersion().onOrAfter(Version.V_2_2_0)) {
explain = in.readBoolean();
attributes = in.readStringArray();
}
}
@Override
@ -147,5 +177,9 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> {
out.writeStringArray(tokenFilters);
out.writeStringArray(charFilters);
out.writeOptionalString(field);
if (out.getVersion().onOrAfter(Version.V_2_2_0)) {
out.writeBoolean(explain);
out.writeStringArray(attributes);
}
}
}

View File

@ -78,6 +78,22 @@ public class AnalyzeRequestBuilder extends SingleShardOperationRequestBuilder<An
return this;
}
/**
* Sets explain
*/
public AnalyzeRequestBuilder setExplain(boolean explain) {
request.explain(explain);
return this;
}
/**
* Sets attributes that will include results
*/
public AnalyzeRequestBuilder setAttributes(String attributes){
request.attributes(attributes);
return this;
}
/**
* Sets texts to analyze
*/

View File

@ -18,6 +18,7 @@
*/
package org.elasticsearch.action.admin.indices.analyze;
import org.elasticsearch.Version;
import org.elasticsearch.action.ActionResponse;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
@ -30,28 +31,32 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
/**
*
*/
public class AnalyzeResponse extends ActionResponse implements Iterable<AnalyzeResponse.AnalyzeToken>, ToXContent {
public static class AnalyzeToken implements Streamable {
public static class AnalyzeToken implements Streamable, ToXContent {
private String term;
private int startOffset;
private int endOffset;
private int position;
private Map<String, Object> attributes;
private String type;
AnalyzeToken() {
}
public AnalyzeToken(String term, int position, int startOffset, int endOffset, String type) {
public AnalyzeToken(String term, int position, int startOffset, int endOffset, String type,
Map<String, Object> attributes) {
this.term = term;
this.position = position;
this.startOffset = startOffset;
this.endOffset = endOffset;
this.type = type;
this.attributes = attributes;
}
public String getTerm() {
@ -74,6 +79,27 @@ public class AnalyzeResponse extends ActionResponse implements Iterable<AnalyzeR
return this.type;
}
public Map<String, Object> getAttributes(){
return this.attributes;
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.field(Fields.TOKEN, term);
builder.field(Fields.START_OFFSET, startOffset);
builder.field(Fields.END_OFFSET, endOffset);
builder.field(Fields.TYPE, type);
builder.field(Fields.POSITION, position);
if (attributes != null && !attributes.isEmpty()) {
for (Map.Entry<String, Object> entity : attributes.entrySet()) {
builder.field(entity.getKey(), entity.getValue());
}
}
builder.endObject();
return builder;
}
public static AnalyzeToken readAnalyzeToken(StreamInput in) throws IOException {
AnalyzeToken analyzeToken = new AnalyzeToken();
analyzeToken.readFrom(in);
@ -87,6 +113,9 @@ public class AnalyzeResponse extends ActionResponse implements Iterable<AnalyzeR
endOffset = in.readInt();
position = in.readVInt();
type = in.readOptionalString();
if (in.getVersion().onOrAfter(Version.V_2_2_0)) {
attributes = (Map<String, Object>) in.readGenericValue();
}
}
@Override
@ -96,22 +125,32 @@ public class AnalyzeResponse extends ActionResponse implements Iterable<AnalyzeR
out.writeInt(endOffset);
out.writeVInt(position);
out.writeOptionalString(type);
if (out.getVersion().onOrAfter(Version.V_2_2_0)) {
out.writeGenericValue(attributes);
}
}
}
private DetailAnalyzeResponse detail;
private List<AnalyzeToken> tokens;
AnalyzeResponse() {
}
public AnalyzeResponse(List<AnalyzeToken> tokens) {
public AnalyzeResponse(List<AnalyzeToken> tokens, DetailAnalyzeResponse detail) {
this.tokens = tokens;
this.detail = detail;
}
public List<AnalyzeToken> getTokens() {
return this.tokens;
}
public DetailAnalyzeResponse detail() {
return this.detail;
}
@Override
public Iterator<AnalyzeToken> iterator() {
return tokens.iterator();
@ -119,17 +158,19 @@ public class AnalyzeResponse extends ActionResponse implements Iterable<AnalyzeR
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startArray(Fields.TOKENS);
for (AnalyzeToken token : tokens) {
builder.startObject();
builder.field(Fields.TOKEN, token.getTerm());
builder.field(Fields.START_OFFSET, token.getStartOffset());
builder.field(Fields.END_OFFSET, token.getEndOffset());
builder.field(Fields.TYPE, token.getType());
builder.field(Fields.POSITION, token.getPosition());
if (tokens != null) {
builder.startArray(Fields.TOKENS);
for (AnalyzeToken token : tokens) {
token.toXContent(builder, params);
}
builder.endArray();
}
if (detail != null) {
builder.startObject(Fields.DETAIL);
detail.toXContent(builder, params);
builder.endObject();
}
builder.endArray();
return builder;
}
@ -141,14 +182,24 @@ public class AnalyzeResponse extends ActionResponse implements Iterable<AnalyzeR
for (int i = 0; i < size; i++) {
tokens.add(AnalyzeToken.readAnalyzeToken(in));
}
if (in.getVersion().onOrAfter(Version.V_2_2_0)) {
detail = in.readOptionalStreamable(DetailAnalyzeResponse::new);
}
}
@Override
public void writeTo(StreamOutput out) throws IOException {
super.writeTo(out);
out.writeVInt(tokens.size());
for (AnalyzeToken token : tokens) {
token.writeTo(out);
if (tokens != null) {
out.writeVInt(tokens.size());
for (AnalyzeToken token : tokens) {
token.writeTo(out);
}
} else {
out.writeVInt(0);
}
if (out.getVersion().onOrAfter(Version.V_2_2_0)) {
out.writeOptionalStreamable(detail);
}
}
@ -159,5 +210,6 @@ public class AnalyzeResponse extends ActionResponse implements Iterable<AnalyzeR
static final XContentBuilderString END_OFFSET = new XContentBuilderString("end_offset");
static final XContentBuilderString TYPE = new XContentBuilderString("type");
static final XContentBuilderString POSITION = new XContentBuilderString("position");
static final XContentBuilderString DETAIL = new XContentBuilderString("detail");
}
}

View File

@ -0,0 +1,319 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.action.admin.indices.analyze;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Streamable;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentBuilderString;
import java.io.IOException;
public class DetailAnalyzeResponse implements Streamable, ToXContent {
DetailAnalyzeResponse() {
}
private boolean customAnalyzer = false;
private AnalyzeTokenList analyzer;
private CharFilteredText[] charfilters;
private AnalyzeTokenList tokenizer;
private AnalyzeTokenList[] tokenfilters;
public DetailAnalyzeResponse(AnalyzeTokenList analyzer) {
this(false, analyzer, null, null, null);
}
public DetailAnalyzeResponse(CharFilteredText[] charfilters, AnalyzeTokenList tokenizer, AnalyzeTokenList[] tokenfilters) {
this(true, null, charfilters, tokenizer, tokenfilters);
}
public DetailAnalyzeResponse(boolean customAnalyzer,
AnalyzeTokenList analyzer,
CharFilteredText[] charfilters,
AnalyzeTokenList tokenizer,
AnalyzeTokenList[] tokenfilters) {
this.customAnalyzer = customAnalyzer;
this.analyzer = analyzer;
this.charfilters = charfilters;
this.tokenizer = tokenizer;
this.tokenfilters = tokenfilters;
}
public AnalyzeTokenList analyzer() {
return this.analyzer;
}
public DetailAnalyzeResponse analyzer(AnalyzeTokenList analyzer) {
this.analyzer = analyzer;
return this;
}
public CharFilteredText[] charfilters() {
return this.charfilters;
}
public DetailAnalyzeResponse charfilters(CharFilteredText[] charfilters) {
this.charfilters = charfilters;
return this;
}
public AnalyzeTokenList tokenizer() {
return tokenizer;
}
public DetailAnalyzeResponse tokenizer(AnalyzeTokenList tokenizer) {
this.tokenizer = tokenizer;
return this;
}
public AnalyzeTokenList[] tokenfilters() {
return tokenfilters;
}
public DetailAnalyzeResponse tokenfilters(AnalyzeTokenList[] tokenfilters) {
this.tokenfilters = tokenfilters;
return this;
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.field(Fields.CUSTOM_ANALYZER, customAnalyzer);
if (analyzer != null) {
builder.startObject(Fields.ANALYZER);
analyzer.toXContentWithoutObject(builder, params);
builder.endObject();
}
if (charfilters != null) {
builder.startArray(Fields.CHARFILTERS);
for (CharFilteredText charfilter : charfilters) {
charfilter.toXContent(builder, params);
}
builder.endArray();
}
if (tokenizer != null) {
builder.startObject(Fields.TOKENIZER);
tokenizer.toXContentWithoutObject(builder, params);
builder.endObject();
}
if (tokenfilters != null) {
builder.startArray(Fields.TOKENFILTERS);
for (AnalyzeTokenList tokenfilter : tokenfilters) {
tokenfilter.toXContent(builder, params);
}
builder.endArray();
}
return builder;
}
static final class Fields {
static final XContentBuilderString NAME = new XContentBuilderString("name");
static final XContentBuilderString FILTERED_TEXT = new XContentBuilderString("filtered_text");
static final XContentBuilderString CUSTOM_ANALYZER = new XContentBuilderString("custom_analyzer");
static final XContentBuilderString ANALYZER = new XContentBuilderString("analyzer");
static final XContentBuilderString CHARFILTERS = new XContentBuilderString("charfilters");
static final XContentBuilderString TOKENIZER = new XContentBuilderString("tokenizer");
static final XContentBuilderString TOKENFILTERS = new XContentBuilderString("tokenfilters");
}
@Override
public void readFrom(StreamInput in) throws IOException {
this.customAnalyzer = in.readBoolean();
if (customAnalyzer) {
tokenizer = AnalyzeTokenList.readAnalyzeTokenList(in);
int size = in.readVInt();
if (size > 0) {
charfilters = new CharFilteredText[size];
for (int i = 0; i < size; i++) {
charfilters[i] = CharFilteredText.readCharFilteredText(in);
}
}
size = in.readVInt();
if (size > 0) {
tokenfilters = new AnalyzeTokenList[size];
for (int i = 0; i < size; i++) {
tokenfilters[i] = AnalyzeTokenList.readAnalyzeTokenList(in);
}
}
} else {
analyzer = AnalyzeTokenList.readAnalyzeTokenList(in);
}
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeBoolean(customAnalyzer);
if (customAnalyzer) {
tokenizer.writeTo(out);
if (charfilters != null) {
out.writeVInt(charfilters.length);
for (CharFilteredText charfilter : charfilters) {
charfilter.writeTo(out);
}
} else {
out.writeVInt(0);
}
if (tokenfilters != null) {
out.writeVInt(tokenfilters.length);
for (AnalyzeTokenList tokenfilter : tokenfilters) {
tokenfilter.writeTo(out);
}
} else {
out.writeVInt(0);
}
} else {
analyzer.writeTo(out);
}
}
public static class AnalyzeTokenList implements Streamable, ToXContent {
private String name;
private AnalyzeResponse.AnalyzeToken[] tokens;
AnalyzeTokenList() {
}
public AnalyzeTokenList(String name, AnalyzeResponse.AnalyzeToken[] tokens) {
this.name = name;
this.tokens = tokens;
}
public String getName() {
return name;
}
public AnalyzeResponse.AnalyzeToken[] getTokens() {
return tokens;
}
public static AnalyzeTokenList readAnalyzeTokenList(StreamInput in) throws IOException {
AnalyzeTokenList list = new AnalyzeTokenList();
list.readFrom(in);
return list;
}
public XContentBuilder toXContentWithoutObject(XContentBuilder builder, Params params) throws IOException {
builder.field(Fields.NAME, this.name);
builder.startArray(AnalyzeResponse.Fields.TOKENS);
for (AnalyzeResponse.AnalyzeToken token : tokens) {
token.toXContent(builder, params);
}
builder.endArray();
return builder;
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.field(Fields.NAME, this.name);
builder.startArray(AnalyzeResponse.Fields.TOKENS);
for (AnalyzeResponse.AnalyzeToken token : tokens) {
token.toXContent(builder, params);
}
builder.endArray();
builder.endObject();
return builder;
}
@Override
public void readFrom(StreamInput in) throws IOException {
name = in.readString();
int size = in.readVInt();
if (size > 0) {
tokens = new AnalyzeResponse.AnalyzeToken[size];
for (int i = 0; i < size; i++) {
tokens[i] = AnalyzeResponse.AnalyzeToken.readAnalyzeToken(in);
}
}
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(name);
if (tokens != null) {
out.writeVInt(tokens.length);
for (AnalyzeResponse.AnalyzeToken token : tokens) {
token.writeTo(out);
}
} else {
out.writeVInt(0);
}
}
}
public static class CharFilteredText implements Streamable, ToXContent {
private String name;
private String[] texts;
CharFilteredText() {
}
public CharFilteredText(String name, String[] texts) {
this.name = name;
if (texts != null) {
this.texts = texts;
} else {
this.texts = Strings.EMPTY_ARRAY;
}
}
public String getName() {
return name;
}
public String[] getTexts() {
return texts;
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.field(Fields.NAME, name);
builder.field(Fields.FILTERED_TEXT, texts);
builder.endObject();
return builder;
}
public static CharFilteredText readCharFilteredText(StreamInput in) throws IOException {
CharFilteredText text = new CharFilteredText();
text.readFrom(in);
return text;
}
@Override
public void readFrom(StreamInput in) throws IOException {
name = in.readString();
texts = in.readStringArray();
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeString(name);
out.writeStringArray(texts);
}
}
}

View File

@ -20,10 +20,15 @@ package org.elasticsearch.action.admin.indices.analyze;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.Attribute;
import org.apache.lucene.util.AttributeReflector;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.action.support.ActionFilters;
import org.elasticsearch.action.support.single.shard.TransportSingleShardAction;
@ -33,6 +38,7 @@ import org.elasticsearch.cluster.block.ClusterBlockException;
import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
import org.elasticsearch.cluster.routing.ShardsIterator;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.io.FastStringReader;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexService;
@ -46,8 +52,8 @@ import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.TransportService;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.io.Reader;
import java.util.*;
/**
* Transport action used to execute analyze requests
@ -222,6 +228,23 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
throw new IllegalArgumentException("failed to find analyzer");
}
List<AnalyzeResponse.AnalyzeToken> tokens = null;
DetailAnalyzeResponse detail = null;
if (request.explain()) {
detail = detailAnalyze(request, analyzer, field);
} else {
tokens = simpleAnalyze(request, analyzer, field);
}
if (closeAnalyzer) {
analyzer.close();
}
return new AnalyzeResponse(tokens, detail);
}
private static List<AnalyzeResponse.AnalyzeToken> simpleAnalyze(AnalyzeRequest request, Analyzer analyzer, String field) {
List<AnalyzeResponse.AnalyzeToken> tokens = new ArrayList<>();
int lastPosition = -1;
int lastOffset = 0;
@ -238,7 +261,7 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
if (increment > 0) {
lastPosition = lastPosition + increment;
}
tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(), lastOffset + offset.endOffset(), type.type()));
tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(), lastOffset + offset.endOffset(), type.type(), null));
}
stream.end();
@ -251,11 +274,211 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
throw new ElasticsearchException("failed to analyze", e);
}
}
return tokens;
}
if (closeAnalyzer) {
analyzer.close();
private static DetailAnalyzeResponse detailAnalyze(AnalyzeRequest request, Analyzer analyzer, String field) {
DetailAnalyzeResponse detailResponse;
final Set<String> includeAttributes = new HashSet<>();
if (request.attributes() != null) {
for (String attribute : request.attributes()) {
includeAttributes.add(attribute.toLowerCase(Locale.ROOT));
}
}
return new AnalyzeResponse(tokens);
CustomAnalyzer customAnalyzer = null;
if (analyzer instanceof CustomAnalyzer) {
customAnalyzer = (CustomAnalyzer) analyzer;
} else if (analyzer instanceof NamedAnalyzer && ((NamedAnalyzer) analyzer).analyzer() instanceof CustomAnalyzer) {
customAnalyzer = (CustomAnalyzer) ((NamedAnalyzer) analyzer).analyzer();
}
if (customAnalyzer != null) {
// customAnalyzer = divide charfilter, tokenizer tokenfilters
CharFilterFactory[] charFilterFactories = customAnalyzer.charFilters();
TokenizerFactory tokenizerFactory = customAnalyzer.tokenizerFactory();
TokenFilterFactory[] tokenFilterFactories = customAnalyzer.tokenFilters();
String[][] charFiltersTexts = new String[charFilterFactories != null ? charFilterFactories.length : 0][request.text().length];
TokenListCreator[] tokenFiltersTokenListCreator = new TokenListCreator[tokenFilterFactories != null ? tokenFilterFactories.length : 0];
TokenListCreator tokenizerTokenListCreator = new TokenListCreator();
for (int textIndex = 0; textIndex < request.text().length; textIndex++) {
String charFilteredSource = request.text()[textIndex];
Reader reader = new FastStringReader(charFilteredSource);
if (charFilterFactories != null) {
for (int charFilterIndex = 0; charFilterIndex < charFilterFactories.length; charFilterIndex++) {
reader = charFilterFactories[charFilterIndex].create(reader);
Reader readerForWriteOut = new FastStringReader(charFilteredSource);
readerForWriteOut = charFilterFactories[charFilterIndex].create(readerForWriteOut);
charFilteredSource = writeCharStream(readerForWriteOut);
charFiltersTexts[charFilterIndex][textIndex] = charFilteredSource;
}
}
// analyzing only tokenizer
Tokenizer tokenizer = tokenizerFactory.create();
tokenizer.setReader(reader);
tokenizerTokenListCreator.analyze(tokenizer, customAnalyzer, field, includeAttributes);
// analyzing each tokenfilter
if (tokenFilterFactories != null) {
for (int tokenFilterIndex = 0; tokenFilterIndex < tokenFilterFactories.length; tokenFilterIndex++) {
if (tokenFiltersTokenListCreator[tokenFilterIndex] == null) {
tokenFiltersTokenListCreator[tokenFilterIndex] = new TokenListCreator();
}
TokenStream stream = createStackedTokenStream(request.text()[textIndex],
charFilterFactories, tokenizerFactory, tokenFilterFactories, tokenFilterIndex + 1);
tokenFiltersTokenListCreator[tokenFilterIndex].analyze(stream, customAnalyzer, field, includeAttributes);
}
}
}
DetailAnalyzeResponse.CharFilteredText[] charFilteredLists = new DetailAnalyzeResponse.CharFilteredText[charFiltersTexts.length];
if (charFilterFactories != null) {
for (int charFilterIndex = 0; charFilterIndex < charFiltersTexts.length; charFilterIndex++) {
charFilteredLists[charFilterIndex] = new DetailAnalyzeResponse.CharFilteredText(
charFilterFactories[charFilterIndex].name(), charFiltersTexts[charFilterIndex]);
}
}
DetailAnalyzeResponse.AnalyzeTokenList[] tokenFilterLists = new DetailAnalyzeResponse.AnalyzeTokenList[tokenFiltersTokenListCreator.length];
if (tokenFilterFactories != null) {
for (int tokenFilterIndex = 0; tokenFilterIndex < tokenFiltersTokenListCreator.length; tokenFilterIndex++) {
tokenFilterLists[tokenFilterIndex] = new DetailAnalyzeResponse.AnalyzeTokenList(
tokenFilterFactories[tokenFilterIndex].name(), tokenFiltersTokenListCreator[tokenFilterIndex].getArrayTokens());
}
}
detailResponse = new DetailAnalyzeResponse(charFilteredLists, new DetailAnalyzeResponse.AnalyzeTokenList(tokenizerFactory.name(), tokenizerTokenListCreator.getArrayTokens()), tokenFilterLists);
} else {
String name;
if (analyzer instanceof NamedAnalyzer) {
name = ((NamedAnalyzer) analyzer).name();
} else {
name = analyzer.getClass().getName();
}
TokenListCreator tokenListCreator = new TokenListCreator();
for (String text : request.text()) {
tokenListCreator.analyze(analyzer.tokenStream(field, text), analyzer, field,
includeAttributes);
}
detailResponse = new DetailAnalyzeResponse(new DetailAnalyzeResponse.AnalyzeTokenList(name, tokenListCreator.getArrayTokens()));
}
return detailResponse;
}
private static TokenStream createStackedTokenStream(String source, CharFilterFactory[] charFilterFactories, TokenizerFactory tokenizerFactory, TokenFilterFactory[] tokenFilterFactories, int current) {
Reader reader = new FastStringReader(source);
for (CharFilterFactory charFilterFactory : charFilterFactories) {
reader = charFilterFactory.create(reader);
}
Tokenizer tokenizer = tokenizerFactory.create();
tokenizer.setReader(reader);
TokenStream tokenStream = tokenizer;
for (int i = 0; i < current; i++) {
tokenStream = tokenFilterFactories[i].create(tokenStream);
}
return tokenStream;
}
private static String writeCharStream(Reader input) {
final int BUFFER_SIZE = 1024;
char[] buf = new char[BUFFER_SIZE];
int len;
StringBuilder sb = new StringBuilder();
do {
try {
len = input.read(buf, 0, BUFFER_SIZE);
} catch (IOException e) {
throw new ElasticsearchException("failed to analyze (charFiltering)", e);
}
if (len > 0)
sb.append(buf, 0, len);
} while (len == BUFFER_SIZE);
return sb.toString();
}
private static class TokenListCreator {
int lastPosition = -1;
int lastOffset = 0;
List<AnalyzeResponse.AnalyzeToken> tokens;
TokenListCreator() {
tokens = new ArrayList<>();
}
private void analyze(TokenStream stream, Analyzer analyzer, String field, Set<String> includeAttributes) {
try {
stream.reset();
CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
TypeAttribute type = stream.addAttribute(TypeAttribute.class);
while (stream.incrementToken()) {
int increment = posIncr.getPositionIncrement();
if (increment > 0) {
lastPosition = lastPosition + increment;
}
tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(),
lastOffset +offset.endOffset(), type.type(), extractExtendedAttributes(stream, includeAttributes)));
}
stream.end();
lastOffset += offset.endOffset();
lastPosition += posIncr.getPositionIncrement();
lastPosition += analyzer.getPositionIncrementGap(field);
lastOffset += analyzer.getOffsetGap(field);
} catch (IOException e) {
throw new ElasticsearchException("failed to analyze", e);
} finally {
IOUtils.closeWhileHandlingException(stream);
}
}
private AnalyzeResponse.AnalyzeToken[] getArrayTokens() {
return tokens.toArray(new AnalyzeResponse.AnalyzeToken[tokens.size()]);
}
}
/**
* other attribute extract object.
* Extracted object group by AttributeClassName
*
* @param stream current TokenStream
* @param includeAttributes filtering attributes
* @return Map&lt;key value&gt;
*/
private static Map<String, Object> extractExtendedAttributes(TokenStream stream, final Set<String> includeAttributes) {
final Map<String, Object> extendedAttributes = new TreeMap<>();
stream.reflectWith(new AttributeReflector() {
@Override
public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
if (CharTermAttribute.class.isAssignableFrom(attClass))
return;
if (PositionIncrementAttribute.class.isAssignableFrom(attClass))
return;
if (OffsetAttribute.class.isAssignableFrom(attClass))
return;
if (TypeAttribute.class.isAssignableFrom(attClass))
return;
if (includeAttributes == null || includeAttributes.isEmpty() || includeAttributes.contains(key.toLowerCase(Locale.ROOT))) {
if (value instanceof BytesRef) {
final BytesRef p = (BytesRef) value;
value = p.toString();
}
extendedAttributes.put(key, value);
}
}
});
return extendedAttributes;
}
}

View File

@ -21,7 +21,8 @@ package org.elasticsearch.rest.action.admin.indices.analyze;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.ParseFieldMatcher;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
@ -47,6 +48,17 @@ import static org.elasticsearch.rest.RestRequest.Method.POST;
*/
public class RestAnalyzeAction extends BaseRestHandler {
public static class Fields {
public static final ParseField ANALYZER = new ParseField("analyzer");
public static final ParseField TEXT = new ParseField("text");
public static final ParseField FIELD = new ParseField("field");
public static final ParseField TOKENIZER = new ParseField("tokenizer");
public static final ParseField TOKEN_FILTERS = new ParseField("token_filters", "filters");
public static final ParseField CHAR_FILTERS = new ParseField("char_filters");
public static final ParseField EXPLAIN = new ParseField("explain");
public static final ParseField ATTRIBUTES = new ParseField("attributes");
}
@Inject
public RestAnalyzeAction(Settings settings, RestController controller, Client client) {
super(settings, controller, client);
@ -68,6 +80,8 @@ public class RestAnalyzeAction extends BaseRestHandler {
analyzeRequest.tokenizer(request.param("tokenizer"));
analyzeRequest.tokenFilters(request.paramAsStringArray("token_filters", request.paramAsStringArray("filters", analyzeRequest.tokenFilters())));
analyzeRequest.charFilters(request.paramAsStringArray("char_filters", analyzeRequest.charFilters()));
analyzeRequest.explain(request.paramAsBoolean("explain", false));
analyzeRequest.attributes(request.paramAsStringArray("attributes", analyzeRequest.attributes()));
if (RestActions.hasBodyContent(request)) {
XContentType type = RestActions.guessBodyContentType(request);
@ -78,14 +92,14 @@ public class RestAnalyzeAction extends BaseRestHandler {
}
} else {
// NOTE: if rest request with xcontent body has request parameters, the parameters does not override xcontent values
buildFromContent(RestActions.getRestContent(request), analyzeRequest);
buildFromContent(RestActions.getRestContent(request), analyzeRequest, parseFieldMatcher);
}
}
client.admin().indices().analyze(analyzeRequest, new RestToXContentListener<AnalyzeResponse>(channel));
}
public static void buildFromContent(BytesReference content, AnalyzeRequest analyzeRequest) {
public static void buildFromContent(BytesReference content, AnalyzeRequest analyzeRequest, ParseFieldMatcher parseFieldMatcher) {
try (XContentParser parser = XContentHelper.createParser(content)) {
if (parser.nextToken() != XContentParser.Token.START_OBJECT) {
throw new IllegalArgumentException("Malforrmed content, must start with an object");
@ -95,9 +109,9 @@ public class RestAnalyzeAction extends BaseRestHandler {
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
} else if ("text".equals(currentFieldName) && token == XContentParser.Token.VALUE_STRING) {
} else if (parseFieldMatcher.match(currentFieldName, Fields.TEXT) && token == XContentParser.Token.VALUE_STRING) {
analyzeRequest.text(parser.text());
} else if ("text".equals(currentFieldName) && token == XContentParser.Token.START_ARRAY) {
} else if (parseFieldMatcher.match(currentFieldName, Fields.TEXT) && token == XContentParser.Token.START_ARRAY) {
List<String> texts = new ArrayList<>();
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
if (token.isValue() == false) {
@ -105,14 +119,14 @@ public class RestAnalyzeAction extends BaseRestHandler {
}
texts.add(parser.text());
}
analyzeRequest.text(texts.toArray(Strings.EMPTY_ARRAY));
} else if ("analyzer".equals(currentFieldName) && token == XContentParser.Token.VALUE_STRING) {
analyzeRequest.text(texts.toArray(new String[texts.size()]));
} else if (parseFieldMatcher.match(currentFieldName, Fields.ANALYZER) && token == XContentParser.Token.VALUE_STRING) {
analyzeRequest.analyzer(parser.text());
} else if ("field".equals(currentFieldName) && token == XContentParser.Token.VALUE_STRING) {
} else if (parseFieldMatcher.match(currentFieldName, Fields.FIELD) && token == XContentParser.Token.VALUE_STRING) {
analyzeRequest.field(parser.text());
} else if ("tokenizer".equals(currentFieldName) && token == XContentParser.Token.VALUE_STRING) {
} else if (parseFieldMatcher.match(currentFieldName, Fields.TOKENIZER) && token == XContentParser.Token.VALUE_STRING) {
analyzeRequest.tokenizer(parser.text());
} else if (("token_filters".equals(currentFieldName) || "filters".equals(currentFieldName)) && token == XContentParser.Token.START_ARRAY) {
} else if (parseFieldMatcher.match(currentFieldName, Fields.TOKEN_FILTERS) && token == XContentParser.Token.START_ARRAY) {
List<String> filters = new ArrayList<>();
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
if (token.isValue() == false) {
@ -120,8 +134,8 @@ public class RestAnalyzeAction extends BaseRestHandler {
}
filters.add(parser.text());
}
analyzeRequest.tokenFilters(filters.toArray(Strings.EMPTY_ARRAY));
} else if ("char_filters".equals(currentFieldName) && token == XContentParser.Token.START_ARRAY) {
analyzeRequest.tokenFilters(filters.toArray(new String[filters.size()]));
} else if (parseFieldMatcher.match(currentFieldName, Fields.CHAR_FILTERS) && token == XContentParser.Token.START_ARRAY) {
List<String> charFilters = new ArrayList<>();
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
if (token.isValue() == false) {
@ -129,7 +143,18 @@ public class RestAnalyzeAction extends BaseRestHandler {
}
charFilters.add(parser.text());
}
analyzeRequest.tokenFilters(charFilters.toArray(Strings.EMPTY_ARRAY));
analyzeRequest.charFilters(charFilters.toArray(new String[charFilters.size()]));
} else if (parseFieldMatcher.match(currentFieldName, Fields.EXPLAIN) && token == XContentParser.Token.VALUE_BOOLEAN) {
analyzeRequest.explain(parser.booleanValue());
} else if (parseFieldMatcher.match(currentFieldName, Fields.ATTRIBUTES) && token == XContentParser.Token.START_ARRAY){
List<String> attributes = new ArrayList<>();
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
if (token.isValue() == false) {
throw new IllegalArgumentException(currentFieldName + " array element should only contain attribute name");
}
attributes.add(parser.text());
}
analyzeRequest.attributes(attributes.toArray(new String[attributes.size()]));
} else {
throw new IllegalArgumentException("Unknown parameter [" + currentFieldName + "] in request body or parameter is of the wrong type[" + token + "] ");
}

View File

@ -22,11 +22,14 @@ import org.elasticsearch.action.admin.indices.alias.Alias;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequestBuilder;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
import org.elasticsearch.common.ParseFieldMatcher;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.rest.action.admin.indices.analyze.RestAnalyzeAction;
import org.elasticsearch.test.ESIntegTestCase;
import org.hamcrest.core.IsNull;
import java.io.IOException;
@ -36,8 +39,10 @@ import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.hasSize;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.is;
import static org.hamcrest.Matchers.notNullValue;
import static org.hamcrest.Matchers.startsWith;
/**
*
*/
@ -201,7 +206,7 @@ public class AnalyzeActionIT extends ESIntegTestCase {
AnalyzeRequest analyzeRequest = new AnalyzeRequest("for test");
RestAnalyzeAction.buildFromContent(content, analyzeRequest);
RestAnalyzeAction.buildFromContent(content, analyzeRequest, new ParseFieldMatcher(Settings.EMPTY));
assertThat(analyzeRequest.text().length, equalTo(1));
assertThat(analyzeRequest.text(), equalTo(new String[]{"THIS IS A TEST"}));
@ -213,7 +218,7 @@ public class AnalyzeActionIT extends ESIntegTestCase {
AnalyzeRequest analyzeRequest = new AnalyzeRequest("for test");
try {
RestAnalyzeAction.buildFromContent(new BytesArray("{invalid_json}"), analyzeRequest);
RestAnalyzeAction.buildFromContent(new BytesArray("{invalid_json}"), analyzeRequest, new ParseFieldMatcher(Settings.EMPTY));
fail("shouldn't get here");
} catch (Exception e) {
assertThat(e, instanceOf(IllegalArgumentException.class));
@ -230,7 +235,7 @@ public class AnalyzeActionIT extends ESIntegTestCase {
.endObject().bytes();
try {
RestAnalyzeAction.buildFromContent(invalidContent, analyzeRequest);
RestAnalyzeAction.buildFromContent(invalidContent, analyzeRequest, new ParseFieldMatcher(Settings.EMPTY));
fail("shouldn't get here");
} catch (Exception e) {
assertThat(e, instanceOf(IllegalArgumentException.class));
@ -267,4 +272,235 @@ public class AnalyzeActionIT extends ESIntegTestCase {
}
public void testDetailAnalyze() throws Exception {
assertAcked(prepareCreate("test").addAlias(new Alias("alias"))
.setSettings(
settingsBuilder()
.put("index.analysis.char_filter.my_mapping.type", "mapping")
.putArray("index.analysis.char_filter.my_mapping.mappings", "PH=>F")
.put("index.analysis.analyzer.test_analyzer.type", "custom")
.put("index.analysis.analyzer.test_analyzer.position_increment_gap", "100")
.put("index.analysis.analyzer.test_analyzer.tokenizer", "standard")
.putArray("index.analysis.analyzer.test_analyzer.char_filter", "my_mapping")
.putArray("index.analysis.analyzer.test_analyzer.filter", "snowball")));
ensureGreen();
for (int i = 0; i < 10; i++) {
AnalyzeResponse analyzeResponse = admin().indices().prepareAnalyze().setIndex(indexOrAlias()).setText("THIS IS A PHISH")
.setExplain(true).setCharFilters("my_mapping").setTokenizer("keyword").setTokenFilters("lowercase").get();
assertThat(analyzeResponse.detail().analyzer(), IsNull.nullValue());
//charfilters
// global charfilter is not change text.
assertThat(analyzeResponse.detail().charfilters().length, equalTo(1));
assertThat(analyzeResponse.detail().charfilters()[0].getName(), equalTo("my_mapping"));
assertThat(analyzeResponse.detail().charfilters()[0].getTexts().length, equalTo(1));
assertThat(analyzeResponse.detail().charfilters()[0].getTexts()[0], equalTo("THIS IS A FISH"));
//tokenizer
assertThat(analyzeResponse.detail().tokenizer().getName(), equalTo("keyword"));
assertThat(analyzeResponse.detail().tokenizer().getTokens().length, equalTo(1));
assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getTerm(), equalTo("THIS IS A FISH"));
assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getStartOffset(), equalTo(0));
assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getEndOffset(), equalTo(15));
//tokenfilters
assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(1));
assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("lowercase"));
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(1));
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getTerm(), equalTo("this is a fish"));
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getPosition(), equalTo(0));
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getStartOffset(), equalTo(0));
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getEndOffset(), equalTo(15));
}
}
public void testDetailAnalyzeWithNoIndex() throws Exception {
//analyzer only
AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST")
.setExplain(true).setAnalyzer("simple").get();
assertThat(analyzeResponse.detail().tokenizer(), IsNull.nullValue());
assertThat(analyzeResponse.detail().tokenfilters(), IsNull.nullValue());
assertThat(analyzeResponse.detail().charfilters(), IsNull.nullValue());
assertThat(analyzeResponse.detail().analyzer().getName(), equalTo("simple"));
assertThat(analyzeResponse.detail().analyzer().getTokens().length, equalTo(4));
}
public void testDetailAnalyzeCustomAnalyzerWithNoIndex() throws Exception {
//analyzer only
AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST")
.setExplain(true).setAnalyzer("simple").get();
assertThat(analyzeResponse.detail().tokenizer(), IsNull.nullValue());
assertThat(analyzeResponse.detail().tokenfilters(), IsNull.nullValue());
assertThat(analyzeResponse.detail().charfilters(), IsNull.nullValue());
assertThat(analyzeResponse.detail().analyzer().getName(), equalTo("simple"));
assertThat(analyzeResponse.detail().analyzer().getTokens().length, equalTo(4));
//custom analyzer
analyzeResponse = client().admin().indices().prepareAnalyze("<text>THIS IS A TEST</text>")
.setExplain(true).setCharFilters("html_strip").setTokenizer("keyword").setTokenFilters("lowercase").get();
assertThat(analyzeResponse.detail().analyzer(), IsNull.nullValue());
//charfilters
// global charfilter is not change text.
assertThat(analyzeResponse.detail().charfilters().length, equalTo(1));
assertThat(analyzeResponse.detail().charfilters()[0].getName(), equalTo("html_strip"));
assertThat(analyzeResponse.detail().charfilters()[0].getTexts().length, equalTo(1));
assertThat(analyzeResponse.detail().charfilters()[0].getTexts()[0], equalTo("\nTHIS IS A TEST\n"));
//tokenizer
assertThat(analyzeResponse.detail().tokenizer().getName(), equalTo("keyword"));
assertThat(analyzeResponse.detail().tokenizer().getTokens().length, equalTo(1));
assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getTerm(), equalTo("\nTHIS IS A TEST\n"));
//tokenfilters
assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(1));
assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("lowercase"));
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(1));
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getTerm(), equalTo("\nthis is a test\n"));
//check other attributes
analyzeResponse = client().admin().indices().prepareAnalyze("This is troubled")
.setExplain(true).setTokenizer("standard").setTokenFilters("snowball").get();
assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(1));
assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("snowball"));
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(3));
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getTerm(), equalTo("troubl"));
String[] expectedAttributesKey = {
"bytes",
"positionLength",
"keyword"};
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getAttributes().size(), equalTo(expectedAttributesKey.length));
Object extendedAttribute;
for (String key : expectedAttributesKey) {
extendedAttribute = analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getAttributes().get(key);
assertThat(extendedAttribute, notNullValue());
}
}
public void testDetailAnalyzeSpecifyAttributes() throws Exception {
AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("This is troubled")
.setExplain(true).setTokenizer("standard").setTokenFilters("snowball").setAttributes("keyword").get();
assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(1));
assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("snowball"));
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(3));
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getTerm(), equalTo("troubl"));
String[] expectedAttributesKey = {
"keyword"};
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getAttributes().size(), equalTo(expectedAttributesKey.length));
Object extendedAttribute;
for (String key : expectedAttributesKey) {
extendedAttribute = analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getAttributes().get(key);
assertThat(extendedAttribute, notNullValue());
}
}
public void testDetailAnalyzeWithMultiValues() throws Exception {
assertAcked(prepareCreate("test").addAlias(new Alias("alias")));
ensureGreen();
client().admin().indices().preparePutMapping("test")
.setType("document").setSource("simple", "type=string,analyzer=simple,position_increment_gap=100").get();
String[] texts = new String[]{"THIS IS A TEST", "THE SECOND TEXT"};
AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze().setIndex(indexOrAlias()).setText(texts)
.setExplain(true).setField("simple").setText(texts).execute().get();
assertThat(analyzeResponse.detail().analyzer().getName(), equalTo("simple"));
assertThat(analyzeResponse.detail().analyzer().getTokens().length, equalTo(7));
AnalyzeResponse.AnalyzeToken token = analyzeResponse.detail().analyzer().getTokens()[3];
assertThat(token.getTerm(), equalTo("test"));
assertThat(token.getPosition(), equalTo(3));
assertThat(token.getStartOffset(), equalTo(10));
assertThat(token.getEndOffset(), equalTo(14));
token = analyzeResponse.detail().analyzer().getTokens()[5];
assertThat(token.getTerm(), equalTo("second"));
assertThat(token.getPosition(), equalTo(105));
assertThat(token.getStartOffset(), equalTo(19));
assertThat(token.getEndOffset(), equalTo(25));
}
public void testDetailAnalyzeWithMultiValuesWithCustomAnalyzer() throws Exception {
assertAcked(prepareCreate("test").addAlias(new Alias("alias"))
.setSettings(
settingsBuilder()
.put("index.analysis.char_filter.my_mapping.type", "mapping")
.putArray("index.analysis.char_filter.my_mapping.mappings", "PH=>F")
.put("index.analysis.analyzer.test_analyzer.type", "custom")
.put("index.analysis.analyzer.test_analyzer.position_increment_gap", "100")
.put("index.analysis.analyzer.test_analyzer.tokenizer", "standard")
.putArray("index.analysis.analyzer.test_analyzer.char_filter", "my_mapping")
.putArray("index.analysis.analyzer.test_analyzer.filter", "snowball", "lowercase")));
ensureGreen();
client().admin().indices().preparePutMapping("test")
.setType("document").setSource("simple", "type=string,analyzer=simple,position_increment_gap=100").get();
//only analyzer =
String[] texts = new String[]{"this is a PHISH", "the troubled text"};
AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze().setIndex(indexOrAlias()).setText(texts)
.setExplain(true).setAnalyzer("test_analyzer").setText(texts).execute().get();
// charfilter
assertThat(analyzeResponse.detail().charfilters().length, equalTo(1));
assertThat(analyzeResponse.detail().charfilters()[0].getName(), equalTo("my_mapping"));
assertThat(analyzeResponse.detail().charfilters()[0].getTexts().length, equalTo(2));
assertThat(analyzeResponse.detail().charfilters()[0].getTexts()[0], equalTo("this is a FISH"));
assertThat(analyzeResponse.detail().charfilters()[0].getTexts()[1], equalTo("the troubled text"));
// tokenizer
assertThat(analyzeResponse.detail().tokenizer().getName(), equalTo("standard"));
assertThat(analyzeResponse.detail().tokenizer().getTokens().length, equalTo(7));
AnalyzeResponse.AnalyzeToken token = analyzeResponse.detail().tokenizer().getTokens()[3];
assertThat(token.getTerm(), equalTo("FISH"));
assertThat(token.getPosition(), equalTo(3));
assertThat(token.getStartOffset(), equalTo(10));
assertThat(token.getEndOffset(), equalTo(15));
token = analyzeResponse.detail().tokenizer().getTokens()[5];
assertThat(token.getTerm(), equalTo("troubled"));
assertThat(token.getPosition(), equalTo(105));
assertThat(token.getStartOffset(), equalTo(20));
assertThat(token.getEndOffset(), equalTo(28));
// tokenfilter(snowball)
assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(2));
assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("snowball"));
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(7));
token = analyzeResponse.detail().tokenfilters()[0].getTokens()[3];
assertThat(token.getTerm(), equalTo("FISH"));
assertThat(token.getPosition(), equalTo(3));
assertThat(token.getStartOffset(), equalTo(10));
assertThat(token.getEndOffset(), equalTo(15));
token = analyzeResponse.detail().tokenfilters()[0].getTokens()[5];
assertThat(token.getTerm(), equalTo("troubl"));
assertThat(token.getPosition(), equalTo(105));
assertThat(token.getStartOffset(), equalTo(20));
assertThat(token.getEndOffset(), equalTo(28));
// tokenfilter(lowercase)
assertThat(analyzeResponse.detail().tokenfilters()[1].getName(), equalTo("lowercase"));
assertThat(analyzeResponse.detail().tokenfilters()[1].getTokens().length, equalTo(7));
token = analyzeResponse.detail().tokenfilters()[1].getTokens()[3];
assertThat(token.getTerm(), equalTo("fish"));
assertThat(token.getPosition(), equalTo(3));
assertThat(token.getStartOffset(), equalTo(10));
assertThat(token.getEndOffset(), equalTo(15));
token = analyzeResponse.detail().tokenfilters()[0].getTokens()[5];
assertThat(token.getTerm(), equalTo("troubl"));
assertThat(token.getPosition(), equalTo(105));
assertThat(token.getStartOffset(), equalTo(20));
assertThat(token.getEndOffset(), equalTo(28));
}
}

View File

@ -100,3 +100,74 @@ provided it doesn't start with `{` :
--------------------------------------------------
curl -XGET 'localhost:9200/_analyze?tokenizer=keyword&token_filters=lowercase&char_filters=html_strip' -d 'this is a <b>test</b>'
--------------------------------------------------
=== Explain Analyze
If you want to get more advanced details, set `explain` to `true` (defaults to `false`). It will output all token attributes for each token.
You can filter token attributes you want to output by setting `attributes` option.
experimental[The format of the additional detail information is experimental and can change at any time]
[source,js]
--------------------------------------------------
GET test/_analyze
{
"tokenizer" : "standard",
"token_filters" : ["snowball"],
"text" : "detailed output",
"explain" : true,
"attributes" : ["keyword"] <1>
}
--------------------------------------------------
// AUTOSENSE
<1> Set "keyword" to output "keyword" attribute only
coming[2.0.0, body based parameters were added in 2.0.0]
The request returns the following result:
[source,js]
--------------------------------------------------
{
"detail" : {
"custom_analyzer" : true,
"charfilters" : [ ],
"tokenizer" : {
"name" : "standard",
"tokens" : [ {
"token" : "detailed",
"start_offset" : 0,
"end_offset" : 8,
"type" : "<ALPHANUM>",
"position" : 0
}, {
"token" : "output",
"start_offset" : 9,
"end_offset" : 15,
"type" : "<ALPHANUM>",
"position" : 1
} ]
},
"tokenfilters" : [ {
"name" : "snowball",
"tokens" : [ {
"token" : "detail",
"start_offset" : 0,
"end_offset" : 8,
"type" : "<ALPHANUM>",
"position" : 0,
"keyword" : false <1>
}, {
"token" : "output",
"start_offset" : 9,
"end_offset" : 15,
"type" : "<ALPHANUM>",
"position" : 1,
"keyword" : false <1>
} ]
} ]
}
}
--------------------------------------------------
<1> Output only "keyword" attribute, since specify "attributes" in the request.

View File

@ -44,6 +44,14 @@
"type" : "string",
"description" : "The name of the tokenizer to use for the analysis"
},
"detail": {
"type" : "boolean",
"description" : "With `true`, outputs more advanced details. (default: false)"
},
"attributes": {
"type" : "list",
"description" : "A comma-separated list of token attributes to output, this parameter works only with `detail=true`"
},
"format": {
"type": "enum",
"options" : ["detailed","text"],

View File

@ -71,3 +71,31 @@ setup:
- length: {tokens: 2 }
- match: { tokens.0.token: foo bar }
- match: { tokens.1.token: baz }
---
"Detail response with Analyzer":
- do:
indices.analyze:
body: {"text": "This is troubled", "analyzer": standard, "explain": true}
- length: { detail.analyzer.tokens: 3 }
- match: { detail.analyzer.name: standard }
- match: { detail.analyzer.tokens.0.token: this }
- match: { detail.analyzer.tokens.1.token: is }
- match: { detail.analyzer.tokens.2.token: troubled }
---
"Detail output spcified attribute":
- do:
indices.analyze:
body: {"text": "<text>This is troubled</text>", "char_filters": ["html_strip"], "filters": ["snowball"], "tokenizer": standard, "explain": true, "attributes": ["keyword"]}
- length: { detail.charfilters: 1 }
- length: { detail.tokenizer.tokens: 3 }
- length: { detail.tokenfilters.0.tokens: 3 }
- match: { detail.tokenizer.name: standard }
- match: { detail.tokenizer.tokens.0.token: This }
- match: { detail.tokenizer.tokens.1.token: is }
- match: { detail.tokenizer.tokens.2.token: troubled }
- match: { detail.tokenfilters.0.name: snowball }
- match: { detail.tokenfilters.0.tokens.0.token: This }
- match: { detail.tokenfilters.0.tokens.1.token: is }
- match: { detail.tokenfilters.0.tokens.2.token: troubl }
- match: { detail.tokenfilters.0.tokens.2.keyword: false }