From fab44398d9d48f12319bc018d4b436f723b6508e Mon Sep 17 00:00:00 2001 From: Jun Ohtani Date: Mon, 15 Jun 2015 16:32:44 +0900 Subject: [PATCH] Analysis: Add detail response support add explain option fix char_filter bug Closes #11076 #15257 --- .../admin/indices/analyze/AnalyzeRequest.java | 46 ++- .../analyze/AnalyzeRequestBuilder.java | 16 + .../indices/analyze/AnalyzeResponse.java | 82 ++++- .../analyze/DetailAnalyzeResponse.java | 319 ++++++++++++++++++ .../analyze/TransportAnalyzeAction.java | 235 ++++++++++++- .../indices/analyze/RestAnalyzeAction.java | 51 ++- .../indices/analyze/AnalyzeActionIT.java | 242 ++++++++++++- docs/reference/indices/analyze.asciidoc | 71 ++++ .../rest-api-spec/api/indices.analyze.json | 8 + .../test/indices.analyze/10_analyze.yaml | 28 ++ 10 files changed, 1055 insertions(+), 43 deletions(-) create mode 100644 core/src/main/java/org/elasticsearch/action/admin/indices/analyze/DetailAnalyzeResponse.java diff --git a/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequest.java b/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequest.java index 6482e340d1a..db1a03efbc6 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequest.java +++ b/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequest.java @@ -18,6 +18,7 @@ */ package org.elasticsearch.action.admin.indices.analyze; +import org.elasticsearch.Version; import org.elasticsearch.action.ActionRequestValidationException; import org.elasticsearch.action.support.single.shard.SingleShardRequest; import org.elasticsearch.common.Strings; @@ -46,6 +47,10 @@ public class AnalyzeRequest extends SingleShardRequest { private String field; + private boolean explain = false; + + private String[] attributes = Strings.EMPTY_ARRAY; + public AnalyzeRequest() { } @@ -86,6 +91,9 @@ public class AnalyzeRequest extends SingleShardRequest { } public AnalyzeRequest tokenFilters(String... tokenFilters) { + if (tokenFilters == null) { + throw new IllegalArgumentException("token filters must not be null"); + } this.tokenFilters = tokenFilters; return this; } @@ -95,6 +103,9 @@ public class AnalyzeRequest extends SingleShardRequest { } public AnalyzeRequest charFilters(String... charFilters) { + if (charFilters == null) { + throw new IllegalArgumentException("char filters must not be null"); + } this.charFilters = charFilters; return this; } @@ -112,18 +123,33 @@ public class AnalyzeRequest extends SingleShardRequest { return this.field; } + public AnalyzeRequest explain(boolean explain) { + this.explain = explain; + return this; + } + + public boolean explain() { + return this.explain; + } + + public AnalyzeRequest attributes(String... attributes) { + if (attributes == null) { + throw new IllegalArgumentException("attributes must not be null"); + } + this.attributes = attributes; + return this; + } + + public String[] attributes() { + return this.attributes; + } + @Override public ActionRequestValidationException validate() { ActionRequestValidationException validationException = null; if (text == null || text.length == 0) { validationException = addValidationError("text is missing", validationException); } - if (tokenFilters == null) { - validationException = addValidationError("token filters must not be null", validationException); - } - if (charFilters == null) { - validationException = addValidationError("char filters must not be null", validationException); - } return validationException; } @@ -136,6 +162,10 @@ public class AnalyzeRequest extends SingleShardRequest { tokenFilters = in.readStringArray(); charFilters = in.readStringArray(); field = in.readOptionalString(); + if (in.getVersion().onOrAfter(Version.V_2_2_0)) { + explain = in.readBoolean(); + attributes = in.readStringArray(); + } } @Override @@ -147,5 +177,9 @@ public class AnalyzeRequest extends SingleShardRequest { out.writeStringArray(tokenFilters); out.writeStringArray(charFilters); out.writeOptionalString(field); + if (out.getVersion().onOrAfter(Version.V_2_2_0)) { + out.writeBoolean(explain); + out.writeStringArray(attributes); + } } } diff --git a/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequestBuilder.java b/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequestBuilder.java index 9ed02e6be1c..23c1739d771 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequestBuilder.java +++ b/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequestBuilder.java @@ -78,6 +78,22 @@ public class AnalyzeRequestBuilder extends SingleShardOperationRequestBuilder, ToXContent { - public static class AnalyzeToken implements Streamable { + public static class AnalyzeToken implements Streamable, ToXContent { private String term; private int startOffset; private int endOffset; private int position; + private Map attributes; private String type; AnalyzeToken() { } - public AnalyzeToken(String term, int position, int startOffset, int endOffset, String type) { + public AnalyzeToken(String term, int position, int startOffset, int endOffset, String type, + Map attributes) { this.term = term; this.position = position; this.startOffset = startOffset; this.endOffset = endOffset; this.type = type; + this.attributes = attributes; } public String getTerm() { @@ -74,6 +79,27 @@ public class AnalyzeResponse extends ActionResponse implements Iterable getAttributes(){ + return this.attributes; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(Fields.TOKEN, term); + builder.field(Fields.START_OFFSET, startOffset); + builder.field(Fields.END_OFFSET, endOffset); + builder.field(Fields.TYPE, type); + builder.field(Fields.POSITION, position); + if (attributes != null && !attributes.isEmpty()) { + for (Map.Entry entity : attributes.entrySet()) { + builder.field(entity.getKey(), entity.getValue()); + } + } + builder.endObject(); + return builder; + } + public static AnalyzeToken readAnalyzeToken(StreamInput in) throws IOException { AnalyzeToken analyzeToken = new AnalyzeToken(); analyzeToken.readFrom(in); @@ -87,6 +113,9 @@ public class AnalyzeResponse extends ActionResponse implements Iterable) in.readGenericValue(); + } } @Override @@ -96,22 +125,32 @@ public class AnalyzeResponse extends ActionResponse implements Iterable tokens; AnalyzeResponse() { } - public AnalyzeResponse(List tokens) { + public AnalyzeResponse(List tokens, DetailAnalyzeResponse detail) { this.tokens = tokens; + this.detail = detail; } public List getTokens() { return this.tokens; } + public DetailAnalyzeResponse detail() { + return this.detail; + } + @Override public Iterator iterator() { return tokens.iterator(); @@ -119,17 +158,19 @@ public class AnalyzeResponse extends ActionResponse implements Iterable 0) { + charfilters = new CharFilteredText[size]; + for (int i = 0; i < size; i++) { + charfilters[i] = CharFilteredText.readCharFilteredText(in); + } + } + size = in.readVInt(); + if (size > 0) { + tokenfilters = new AnalyzeTokenList[size]; + for (int i = 0; i < size; i++) { + tokenfilters[i] = AnalyzeTokenList.readAnalyzeTokenList(in); + } + } + } else { + analyzer = AnalyzeTokenList.readAnalyzeTokenList(in); + } + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeBoolean(customAnalyzer); + if (customAnalyzer) { + tokenizer.writeTo(out); + if (charfilters != null) { + out.writeVInt(charfilters.length); + for (CharFilteredText charfilter : charfilters) { + charfilter.writeTo(out); + } + } else { + out.writeVInt(0); + } + if (tokenfilters != null) { + out.writeVInt(tokenfilters.length); + for (AnalyzeTokenList tokenfilter : tokenfilters) { + tokenfilter.writeTo(out); + } + } else { + out.writeVInt(0); + } + } else { + analyzer.writeTo(out); + } + } + + public static class AnalyzeTokenList implements Streamable, ToXContent { + private String name; + private AnalyzeResponse.AnalyzeToken[] tokens; + + AnalyzeTokenList() { + } + + public AnalyzeTokenList(String name, AnalyzeResponse.AnalyzeToken[] tokens) { + this.name = name; + this.tokens = tokens; + } + + public String getName() { + return name; + } + + public AnalyzeResponse.AnalyzeToken[] getTokens() { + return tokens; + } + + public static AnalyzeTokenList readAnalyzeTokenList(StreamInput in) throws IOException { + AnalyzeTokenList list = new AnalyzeTokenList(); + list.readFrom(in); + return list; + } + + public XContentBuilder toXContentWithoutObject(XContentBuilder builder, Params params) throws IOException { + builder.field(Fields.NAME, this.name); + builder.startArray(AnalyzeResponse.Fields.TOKENS); + for (AnalyzeResponse.AnalyzeToken token : tokens) { + token.toXContent(builder, params); + } + builder.endArray(); + return builder; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(Fields.NAME, this.name); + builder.startArray(AnalyzeResponse.Fields.TOKENS); + for (AnalyzeResponse.AnalyzeToken token : tokens) { + token.toXContent(builder, params); + } + builder.endArray(); + builder.endObject(); + return builder; + } + + @Override + public void readFrom(StreamInput in) throws IOException { + name = in.readString(); + int size = in.readVInt(); + if (size > 0) { + tokens = new AnalyzeResponse.AnalyzeToken[size]; + for (int i = 0; i < size; i++) { + tokens[i] = AnalyzeResponse.AnalyzeToken.readAnalyzeToken(in); + } + } + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(name); + if (tokens != null) { + out.writeVInt(tokens.length); + for (AnalyzeResponse.AnalyzeToken token : tokens) { + token.writeTo(out); + } + } else { + out.writeVInt(0); + } + } + } + + public static class CharFilteredText implements Streamable, ToXContent { + private String name; + private String[] texts; + CharFilteredText() { + } + + public CharFilteredText(String name, String[] texts) { + this.name = name; + if (texts != null) { + this.texts = texts; + } else { + this.texts = Strings.EMPTY_ARRAY; + } + } + + public String getName() { + return name; + } + + public String[] getTexts() { + return texts; + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(Fields.NAME, name); + builder.field(Fields.FILTERED_TEXT, texts); + builder.endObject(); + return builder; + } + + public static CharFilteredText readCharFilteredText(StreamInput in) throws IOException { + CharFilteredText text = new CharFilteredText(); + text.readFrom(in); + return text; + } + + @Override + public void readFrom(StreamInput in) throws IOException { + name = in.readString(); + texts = in.readStringArray(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(name); + out.writeStringArray(texts); + } + } +} diff --git a/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java b/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java index ba49c33a15d..ecdf977b923 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java +++ b/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java @@ -20,10 +20,15 @@ package org.elasticsearch.action.admin.indices.analyze; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; +import org.apache.lucene.util.Attribute; +import org.apache.lucene.util.AttributeReflector; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IOUtils; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.action.support.single.shard.TransportSingleShardAction; @@ -33,6 +38,7 @@ import org.elasticsearch.cluster.block.ClusterBlockException; import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.cluster.routing.ShardsIterator; import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.io.FastStringReader; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexService; @@ -46,8 +52,8 @@ import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.TransportService; import java.io.IOException; -import java.util.ArrayList; -import java.util.List; +import java.io.Reader; +import java.util.*; /** * Transport action used to execute analyze requests @@ -222,6 +228,23 @@ public class TransportAnalyzeAction extends TransportSingleShardAction tokens = null; + DetailAnalyzeResponse detail = null; + + if (request.explain()) { + detail = detailAnalyze(request, analyzer, field); + } else { + tokens = simpleAnalyze(request, analyzer, field); + } + + if (closeAnalyzer) { + analyzer.close(); + } + + return new AnalyzeResponse(tokens, detail); + } + + private static List simpleAnalyze(AnalyzeRequest request, Analyzer analyzer, String field) { List tokens = new ArrayList<>(); int lastPosition = -1; int lastOffset = 0; @@ -238,7 +261,7 @@ public class TransportAnalyzeAction extends TransportSingleShardAction 0) { lastPosition = lastPosition + increment; } - tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(), lastOffset + offset.endOffset(), type.type())); + tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(), lastOffset + offset.endOffset(), type.type(), null)); } stream.end(); @@ -251,11 +274,211 @@ public class TransportAnalyzeAction extends TransportSingleShardAction includeAttributes = new HashSet<>(); + if (request.attributes() != null) { + for (String attribute : request.attributes()) { + includeAttributes.add(attribute.toLowerCase(Locale.ROOT)); + } } - return new AnalyzeResponse(tokens); + CustomAnalyzer customAnalyzer = null; + if (analyzer instanceof CustomAnalyzer) { + customAnalyzer = (CustomAnalyzer) analyzer; + } else if (analyzer instanceof NamedAnalyzer && ((NamedAnalyzer) analyzer).analyzer() instanceof CustomAnalyzer) { + customAnalyzer = (CustomAnalyzer) ((NamedAnalyzer) analyzer).analyzer(); + } + + if (customAnalyzer != null) { + // customAnalyzer = divide charfilter, tokenizer tokenfilters + CharFilterFactory[] charFilterFactories = customAnalyzer.charFilters(); + TokenizerFactory tokenizerFactory = customAnalyzer.tokenizerFactory(); + TokenFilterFactory[] tokenFilterFactories = customAnalyzer.tokenFilters(); + + String[][] charFiltersTexts = new String[charFilterFactories != null ? charFilterFactories.length : 0][request.text().length]; + TokenListCreator[] tokenFiltersTokenListCreator = new TokenListCreator[tokenFilterFactories != null ? tokenFilterFactories.length : 0]; + + TokenListCreator tokenizerTokenListCreator = new TokenListCreator(); + + for (int textIndex = 0; textIndex < request.text().length; textIndex++) { + String charFilteredSource = request.text()[textIndex]; + + Reader reader = new FastStringReader(charFilteredSource); + if (charFilterFactories != null) { + + for (int charFilterIndex = 0; charFilterIndex < charFilterFactories.length; charFilterIndex++) { + reader = charFilterFactories[charFilterIndex].create(reader); + Reader readerForWriteOut = new FastStringReader(charFilteredSource); + readerForWriteOut = charFilterFactories[charFilterIndex].create(readerForWriteOut); + charFilteredSource = writeCharStream(readerForWriteOut); + charFiltersTexts[charFilterIndex][textIndex] = charFilteredSource; + } + } + + // analyzing only tokenizer + Tokenizer tokenizer = tokenizerFactory.create(); + tokenizer.setReader(reader); + tokenizerTokenListCreator.analyze(tokenizer, customAnalyzer, field, includeAttributes); + + // analyzing each tokenfilter + if (tokenFilterFactories != null) { + for (int tokenFilterIndex = 0; tokenFilterIndex < tokenFilterFactories.length; tokenFilterIndex++) { + if (tokenFiltersTokenListCreator[tokenFilterIndex] == null) { + tokenFiltersTokenListCreator[tokenFilterIndex] = new TokenListCreator(); + } + TokenStream stream = createStackedTokenStream(request.text()[textIndex], + charFilterFactories, tokenizerFactory, tokenFilterFactories, tokenFilterIndex + 1); + tokenFiltersTokenListCreator[tokenFilterIndex].analyze(stream, customAnalyzer, field, includeAttributes); + } + } + } + + DetailAnalyzeResponse.CharFilteredText[] charFilteredLists = new DetailAnalyzeResponse.CharFilteredText[charFiltersTexts.length]; + if (charFilterFactories != null) { + for (int charFilterIndex = 0; charFilterIndex < charFiltersTexts.length; charFilterIndex++) { + charFilteredLists[charFilterIndex] = new DetailAnalyzeResponse.CharFilteredText( + charFilterFactories[charFilterIndex].name(), charFiltersTexts[charFilterIndex]); + } + } + DetailAnalyzeResponse.AnalyzeTokenList[] tokenFilterLists = new DetailAnalyzeResponse.AnalyzeTokenList[tokenFiltersTokenListCreator.length]; + if (tokenFilterFactories != null) { + for (int tokenFilterIndex = 0; tokenFilterIndex < tokenFiltersTokenListCreator.length; tokenFilterIndex++) { + tokenFilterLists[tokenFilterIndex] = new DetailAnalyzeResponse.AnalyzeTokenList( + tokenFilterFactories[tokenFilterIndex].name(), tokenFiltersTokenListCreator[tokenFilterIndex].getArrayTokens()); + } + } + detailResponse = new DetailAnalyzeResponse(charFilteredLists, new DetailAnalyzeResponse.AnalyzeTokenList(tokenizerFactory.name(), tokenizerTokenListCreator.getArrayTokens()), tokenFilterLists); + } else { + String name; + if (analyzer instanceof NamedAnalyzer) { + name = ((NamedAnalyzer) analyzer).name(); + } else { + name = analyzer.getClass().getName(); + } + + TokenListCreator tokenListCreator = new TokenListCreator(); + for (String text : request.text()) { + tokenListCreator.analyze(analyzer.tokenStream(field, text), analyzer, field, + includeAttributes); + } + detailResponse = new DetailAnalyzeResponse(new DetailAnalyzeResponse.AnalyzeTokenList(name, tokenListCreator.getArrayTokens())); + } + return detailResponse; + } + + private static TokenStream createStackedTokenStream(String source, CharFilterFactory[] charFilterFactories, TokenizerFactory tokenizerFactory, TokenFilterFactory[] tokenFilterFactories, int current) { + Reader reader = new FastStringReader(source); + for (CharFilterFactory charFilterFactory : charFilterFactories) { + reader = charFilterFactory.create(reader); + } + Tokenizer tokenizer = tokenizerFactory.create(); + tokenizer.setReader(reader); + TokenStream tokenStream = tokenizer; + for (int i = 0; i < current; i++) { + tokenStream = tokenFilterFactories[i].create(tokenStream); + } + return tokenStream; + } + + private static String writeCharStream(Reader input) { + final int BUFFER_SIZE = 1024; + char[] buf = new char[BUFFER_SIZE]; + int len; + StringBuilder sb = new StringBuilder(); + do { + try { + len = input.read(buf, 0, BUFFER_SIZE); + } catch (IOException e) { + throw new ElasticsearchException("failed to analyze (charFiltering)", e); + } + if (len > 0) + sb.append(buf, 0, len); + } while (len == BUFFER_SIZE); + return sb.toString(); + } + + private static class TokenListCreator { + int lastPosition = -1; + int lastOffset = 0; + List tokens; + + TokenListCreator() { + tokens = new ArrayList<>(); + } + + private void analyze(TokenStream stream, Analyzer analyzer, String field, Set includeAttributes) { + try { + stream.reset(); + CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); + PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class); + OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class); + TypeAttribute type = stream.addAttribute(TypeAttribute.class); + + while (stream.incrementToken()) { + int increment = posIncr.getPositionIncrement(); + if (increment > 0) { + lastPosition = lastPosition + increment; + } + tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(), + lastOffset +offset.endOffset(), type.type(), extractExtendedAttributes(stream, includeAttributes))); + + } + stream.end(); + lastOffset += offset.endOffset(); + lastPosition += posIncr.getPositionIncrement(); + + lastPosition += analyzer.getPositionIncrementGap(field); + lastOffset += analyzer.getOffsetGap(field); + + } catch (IOException e) { + throw new ElasticsearchException("failed to analyze", e); + } finally { + IOUtils.closeWhileHandlingException(stream); + } + } + + private AnalyzeResponse.AnalyzeToken[] getArrayTokens() { + return tokens.toArray(new AnalyzeResponse.AnalyzeToken[tokens.size()]); + } + + } + + /** + * other attribute extract object. + * Extracted object group by AttributeClassName + * + * @param stream current TokenStream + * @param includeAttributes filtering attributes + * @return Map<key value> + */ + private static Map extractExtendedAttributes(TokenStream stream, final Set includeAttributes) { + final Map extendedAttributes = new TreeMap<>(); + + stream.reflectWith(new AttributeReflector() { + @Override + public void reflect(Class attClass, String key, Object value) { + if (CharTermAttribute.class.isAssignableFrom(attClass)) + return; + if (PositionIncrementAttribute.class.isAssignableFrom(attClass)) + return; + if (OffsetAttribute.class.isAssignableFrom(attClass)) + return; + if (TypeAttribute.class.isAssignableFrom(attClass)) + return; + if (includeAttributes == null || includeAttributes.isEmpty() || includeAttributes.contains(key.toLowerCase(Locale.ROOT))) { + if (value instanceof BytesRef) { + final BytesRef p = (BytesRef) value; + value = p.toString(); + } + extendedAttributes.put(key, value); + } + } + }); + + return extendedAttributes; } } diff --git a/core/src/main/java/org/elasticsearch/rest/action/admin/indices/analyze/RestAnalyzeAction.java b/core/src/main/java/org/elasticsearch/rest/action/admin/indices/analyze/RestAnalyzeAction.java index 57ceb21f41e..3a86911f464 100644 --- a/core/src/main/java/org/elasticsearch/rest/action/admin/indices/analyze/RestAnalyzeAction.java +++ b/core/src/main/java/org/elasticsearch/rest/action/admin/indices/analyze/RestAnalyzeAction.java @@ -21,7 +21,8 @@ package org.elasticsearch.rest.action.admin.indices.analyze; import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest; import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse; import org.elasticsearch.client.Client; -import org.elasticsearch.common.Strings; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.ParseFieldMatcher; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.settings.Settings; @@ -47,6 +48,17 @@ import static org.elasticsearch.rest.RestRequest.Method.POST; */ public class RestAnalyzeAction extends BaseRestHandler { + public static class Fields { + public static final ParseField ANALYZER = new ParseField("analyzer"); + public static final ParseField TEXT = new ParseField("text"); + public static final ParseField FIELD = new ParseField("field"); + public static final ParseField TOKENIZER = new ParseField("tokenizer"); + public static final ParseField TOKEN_FILTERS = new ParseField("token_filters", "filters"); + public static final ParseField CHAR_FILTERS = new ParseField("char_filters"); + public static final ParseField EXPLAIN = new ParseField("explain"); + public static final ParseField ATTRIBUTES = new ParseField("attributes"); + } + @Inject public RestAnalyzeAction(Settings settings, RestController controller, Client client) { super(settings, controller, client); @@ -68,6 +80,8 @@ public class RestAnalyzeAction extends BaseRestHandler { analyzeRequest.tokenizer(request.param("tokenizer")); analyzeRequest.tokenFilters(request.paramAsStringArray("token_filters", request.paramAsStringArray("filters", analyzeRequest.tokenFilters()))); analyzeRequest.charFilters(request.paramAsStringArray("char_filters", analyzeRequest.charFilters())); + analyzeRequest.explain(request.paramAsBoolean("explain", false)); + analyzeRequest.attributes(request.paramAsStringArray("attributes", analyzeRequest.attributes())); if (RestActions.hasBodyContent(request)) { XContentType type = RestActions.guessBodyContentType(request); @@ -78,14 +92,14 @@ public class RestAnalyzeAction extends BaseRestHandler { } } else { // NOTE: if rest request with xcontent body has request parameters, the parameters does not override xcontent values - buildFromContent(RestActions.getRestContent(request), analyzeRequest); + buildFromContent(RestActions.getRestContent(request), analyzeRequest, parseFieldMatcher); } } client.admin().indices().analyze(analyzeRequest, new RestToXContentListener(channel)); } - public static void buildFromContent(BytesReference content, AnalyzeRequest analyzeRequest) { + public static void buildFromContent(BytesReference content, AnalyzeRequest analyzeRequest, ParseFieldMatcher parseFieldMatcher) { try (XContentParser parser = XContentHelper.createParser(content)) { if (parser.nextToken() != XContentParser.Token.START_OBJECT) { throw new IllegalArgumentException("Malforrmed content, must start with an object"); @@ -95,9 +109,9 @@ public class RestAnalyzeAction extends BaseRestHandler { while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { currentFieldName = parser.currentName(); - } else if ("text".equals(currentFieldName) && token == XContentParser.Token.VALUE_STRING) { + } else if (parseFieldMatcher.match(currentFieldName, Fields.TEXT) && token == XContentParser.Token.VALUE_STRING) { analyzeRequest.text(parser.text()); - } else if ("text".equals(currentFieldName) && token == XContentParser.Token.START_ARRAY) { + } else if (parseFieldMatcher.match(currentFieldName, Fields.TEXT) && token == XContentParser.Token.START_ARRAY) { List texts = new ArrayList<>(); while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { if (token.isValue() == false) { @@ -105,14 +119,14 @@ public class RestAnalyzeAction extends BaseRestHandler { } texts.add(parser.text()); } - analyzeRequest.text(texts.toArray(Strings.EMPTY_ARRAY)); - } else if ("analyzer".equals(currentFieldName) && token == XContentParser.Token.VALUE_STRING) { + analyzeRequest.text(texts.toArray(new String[texts.size()])); + } else if (parseFieldMatcher.match(currentFieldName, Fields.ANALYZER) && token == XContentParser.Token.VALUE_STRING) { analyzeRequest.analyzer(parser.text()); - } else if ("field".equals(currentFieldName) && token == XContentParser.Token.VALUE_STRING) { + } else if (parseFieldMatcher.match(currentFieldName, Fields.FIELD) && token == XContentParser.Token.VALUE_STRING) { analyzeRequest.field(parser.text()); - } else if ("tokenizer".equals(currentFieldName) && token == XContentParser.Token.VALUE_STRING) { + } else if (parseFieldMatcher.match(currentFieldName, Fields.TOKENIZER) && token == XContentParser.Token.VALUE_STRING) { analyzeRequest.tokenizer(parser.text()); - } else if (("token_filters".equals(currentFieldName) || "filters".equals(currentFieldName)) && token == XContentParser.Token.START_ARRAY) { + } else if (parseFieldMatcher.match(currentFieldName, Fields.TOKEN_FILTERS) && token == XContentParser.Token.START_ARRAY) { List filters = new ArrayList<>(); while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { if (token.isValue() == false) { @@ -120,8 +134,8 @@ public class RestAnalyzeAction extends BaseRestHandler { } filters.add(parser.text()); } - analyzeRequest.tokenFilters(filters.toArray(Strings.EMPTY_ARRAY)); - } else if ("char_filters".equals(currentFieldName) && token == XContentParser.Token.START_ARRAY) { + analyzeRequest.tokenFilters(filters.toArray(new String[filters.size()])); + } else if (parseFieldMatcher.match(currentFieldName, Fields.CHAR_FILTERS) && token == XContentParser.Token.START_ARRAY) { List charFilters = new ArrayList<>(); while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { if (token.isValue() == false) { @@ -129,7 +143,18 @@ public class RestAnalyzeAction extends BaseRestHandler { } charFilters.add(parser.text()); } - analyzeRequest.tokenFilters(charFilters.toArray(Strings.EMPTY_ARRAY)); + analyzeRequest.charFilters(charFilters.toArray(new String[charFilters.size()])); + } else if (parseFieldMatcher.match(currentFieldName, Fields.EXPLAIN) && token == XContentParser.Token.VALUE_BOOLEAN) { + analyzeRequest.explain(parser.booleanValue()); + } else if (parseFieldMatcher.match(currentFieldName, Fields.ATTRIBUTES) && token == XContentParser.Token.START_ARRAY){ + List attributes = new ArrayList<>(); + while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { + if (token.isValue() == false) { + throw new IllegalArgumentException(currentFieldName + " array element should only contain attribute name"); + } + attributes.add(parser.text()); + } + analyzeRequest.attributes(attributes.toArray(new String[attributes.size()])); } else { throw new IllegalArgumentException("Unknown parameter [" + currentFieldName + "] in request body or parameter is of the wrong type[" + token + "] "); } diff --git a/core/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java b/core/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java index 9f4f2b58e70..80993229bec 100644 --- a/core/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java +++ b/core/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionIT.java @@ -22,11 +22,14 @@ import org.elasticsearch.action.admin.indices.alias.Alias; import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest; import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequestBuilder; import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse; +import org.elasticsearch.common.ParseFieldMatcher; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.rest.action.admin.indices.analyze.RestAnalyzeAction; import org.elasticsearch.test.ESIntegTestCase; +import org.hamcrest.core.IsNull; import java.io.IOException; @@ -36,8 +39,10 @@ import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.instanceOf; import static org.hamcrest.Matchers.is; +import static org.hamcrest.Matchers.notNullValue; import static org.hamcrest.Matchers.startsWith; + /** * */ @@ -201,7 +206,7 @@ public class AnalyzeActionIT extends ESIntegTestCase { AnalyzeRequest analyzeRequest = new AnalyzeRequest("for test"); - RestAnalyzeAction.buildFromContent(content, analyzeRequest); + RestAnalyzeAction.buildFromContent(content, analyzeRequest, new ParseFieldMatcher(Settings.EMPTY)); assertThat(analyzeRequest.text().length, equalTo(1)); assertThat(analyzeRequest.text(), equalTo(new String[]{"THIS IS A TEST"})); @@ -213,7 +218,7 @@ public class AnalyzeActionIT extends ESIntegTestCase { AnalyzeRequest analyzeRequest = new AnalyzeRequest("for test"); try { - RestAnalyzeAction.buildFromContent(new BytesArray("{invalid_json}"), analyzeRequest); + RestAnalyzeAction.buildFromContent(new BytesArray("{invalid_json}"), analyzeRequest, new ParseFieldMatcher(Settings.EMPTY)); fail("shouldn't get here"); } catch (Exception e) { assertThat(e, instanceOf(IllegalArgumentException.class)); @@ -230,7 +235,7 @@ public class AnalyzeActionIT extends ESIntegTestCase { .endObject().bytes(); try { - RestAnalyzeAction.buildFromContent(invalidContent, analyzeRequest); + RestAnalyzeAction.buildFromContent(invalidContent, analyzeRequest, new ParseFieldMatcher(Settings.EMPTY)); fail("shouldn't get here"); } catch (Exception e) { assertThat(e, instanceOf(IllegalArgumentException.class)); @@ -267,4 +272,235 @@ public class AnalyzeActionIT extends ESIntegTestCase { } + public void testDetailAnalyze() throws Exception { + assertAcked(prepareCreate("test").addAlias(new Alias("alias")) + .setSettings( + settingsBuilder() + .put("index.analysis.char_filter.my_mapping.type", "mapping") + .putArray("index.analysis.char_filter.my_mapping.mappings", "PH=>F") + .put("index.analysis.analyzer.test_analyzer.type", "custom") + .put("index.analysis.analyzer.test_analyzer.position_increment_gap", "100") + .put("index.analysis.analyzer.test_analyzer.tokenizer", "standard") + .putArray("index.analysis.analyzer.test_analyzer.char_filter", "my_mapping") + .putArray("index.analysis.analyzer.test_analyzer.filter", "snowball"))); + ensureGreen(); + + for (int i = 0; i < 10; i++) { + AnalyzeResponse analyzeResponse = admin().indices().prepareAnalyze().setIndex(indexOrAlias()).setText("THIS IS A PHISH") + .setExplain(true).setCharFilters("my_mapping").setTokenizer("keyword").setTokenFilters("lowercase").get(); + + assertThat(analyzeResponse.detail().analyzer(), IsNull.nullValue()); + //charfilters + // global charfilter is not change text. + assertThat(analyzeResponse.detail().charfilters().length, equalTo(1)); + assertThat(analyzeResponse.detail().charfilters()[0].getName(), equalTo("my_mapping")); + assertThat(analyzeResponse.detail().charfilters()[0].getTexts().length, equalTo(1)); + assertThat(analyzeResponse.detail().charfilters()[0].getTexts()[0], equalTo("THIS IS A FISH")); + //tokenizer + assertThat(analyzeResponse.detail().tokenizer().getName(), equalTo("keyword")); + assertThat(analyzeResponse.detail().tokenizer().getTokens().length, equalTo(1)); + assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getTerm(), equalTo("THIS IS A FISH")); + assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getStartOffset(), equalTo(0)); + assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getEndOffset(), equalTo(15)); + //tokenfilters + assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(1)); + assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("lowercase")); + assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(1)); + assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getTerm(), equalTo("this is a fish")); + assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getPosition(), equalTo(0)); + assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getStartOffset(), equalTo(0)); + assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getEndOffset(), equalTo(15)); + } + } + + public void testDetailAnalyzeWithNoIndex() throws Exception { + //analyzer only + AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST") + .setExplain(true).setAnalyzer("simple").get(); + + assertThat(analyzeResponse.detail().tokenizer(), IsNull.nullValue()); + assertThat(analyzeResponse.detail().tokenfilters(), IsNull.nullValue()); + assertThat(analyzeResponse.detail().charfilters(), IsNull.nullValue()); + assertThat(analyzeResponse.detail().analyzer().getName(), equalTo("simple")); + assertThat(analyzeResponse.detail().analyzer().getTokens().length, equalTo(4)); + } + + public void testDetailAnalyzeCustomAnalyzerWithNoIndex() throws Exception { + //analyzer only + AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST") + .setExplain(true).setAnalyzer("simple").get(); + + assertThat(analyzeResponse.detail().tokenizer(), IsNull.nullValue()); + assertThat(analyzeResponse.detail().tokenfilters(), IsNull.nullValue()); + assertThat(analyzeResponse.detail().charfilters(), IsNull.nullValue()); + assertThat(analyzeResponse.detail().analyzer().getName(), equalTo("simple")); + assertThat(analyzeResponse.detail().analyzer().getTokens().length, equalTo(4)); + + //custom analyzer + analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST") + .setExplain(true).setCharFilters("html_strip").setTokenizer("keyword").setTokenFilters("lowercase").get(); + assertThat(analyzeResponse.detail().analyzer(), IsNull.nullValue()); + //charfilters + // global charfilter is not change text. + assertThat(analyzeResponse.detail().charfilters().length, equalTo(1)); + assertThat(analyzeResponse.detail().charfilters()[0].getName(), equalTo("html_strip")); + assertThat(analyzeResponse.detail().charfilters()[0].getTexts().length, equalTo(1)); + assertThat(analyzeResponse.detail().charfilters()[0].getTexts()[0], equalTo("\nTHIS IS A TEST\n")); + //tokenizer + assertThat(analyzeResponse.detail().tokenizer().getName(), equalTo("keyword")); + assertThat(analyzeResponse.detail().tokenizer().getTokens().length, equalTo(1)); + assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getTerm(), equalTo("\nTHIS IS A TEST\n")); + //tokenfilters + assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(1)); + assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("lowercase")); + assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(1)); + assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getTerm(), equalTo("\nthis is a test\n")); + + + //check other attributes + analyzeResponse = client().admin().indices().prepareAnalyze("This is troubled") + .setExplain(true).setTokenizer("standard").setTokenFilters("snowball").get(); + + assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(1)); + assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("snowball")); + assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(3)); + assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getTerm(), equalTo("troubl")); + String[] expectedAttributesKey = { + "bytes", + "positionLength", + "keyword"}; + assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getAttributes().size(), equalTo(expectedAttributesKey.length)); + Object extendedAttribute; + + for (String key : expectedAttributesKey) { + extendedAttribute = analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getAttributes().get(key); + assertThat(extendedAttribute, notNullValue()); + } + } + + public void testDetailAnalyzeSpecifyAttributes() throws Exception { + AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("This is troubled") + .setExplain(true).setTokenizer("standard").setTokenFilters("snowball").setAttributes("keyword").get(); + + assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(1)); + assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("snowball")); + assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(3)); + assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getTerm(), equalTo("troubl")); + String[] expectedAttributesKey = { + "keyword"}; + assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getAttributes().size(), equalTo(expectedAttributesKey.length)); + Object extendedAttribute; + + for (String key : expectedAttributesKey) { + extendedAttribute = analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getAttributes().get(key); + assertThat(extendedAttribute, notNullValue()); + } + } + + public void testDetailAnalyzeWithMultiValues() throws Exception { + assertAcked(prepareCreate("test").addAlias(new Alias("alias"))); + ensureGreen(); + client().admin().indices().preparePutMapping("test") + .setType("document").setSource("simple", "type=string,analyzer=simple,position_increment_gap=100").get(); + + String[] texts = new String[]{"THIS IS A TEST", "THE SECOND TEXT"}; + AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze().setIndex(indexOrAlias()).setText(texts) + .setExplain(true).setField("simple").setText(texts).execute().get(); + + assertThat(analyzeResponse.detail().analyzer().getName(), equalTo("simple")); + assertThat(analyzeResponse.detail().analyzer().getTokens().length, equalTo(7)); + AnalyzeResponse.AnalyzeToken token = analyzeResponse.detail().analyzer().getTokens()[3]; + + assertThat(token.getTerm(), equalTo("test")); + assertThat(token.getPosition(), equalTo(3)); + assertThat(token.getStartOffset(), equalTo(10)); + assertThat(token.getEndOffset(), equalTo(14)); + + token = analyzeResponse.detail().analyzer().getTokens()[5]; + assertThat(token.getTerm(), equalTo("second")); + assertThat(token.getPosition(), equalTo(105)); + assertThat(token.getStartOffset(), equalTo(19)); + assertThat(token.getEndOffset(), equalTo(25)); + } + + public void testDetailAnalyzeWithMultiValuesWithCustomAnalyzer() throws Exception { + assertAcked(prepareCreate("test").addAlias(new Alias("alias")) + .setSettings( + settingsBuilder() + .put("index.analysis.char_filter.my_mapping.type", "mapping") + .putArray("index.analysis.char_filter.my_mapping.mappings", "PH=>F") + .put("index.analysis.analyzer.test_analyzer.type", "custom") + .put("index.analysis.analyzer.test_analyzer.position_increment_gap", "100") + .put("index.analysis.analyzer.test_analyzer.tokenizer", "standard") + .putArray("index.analysis.analyzer.test_analyzer.char_filter", "my_mapping") + .putArray("index.analysis.analyzer.test_analyzer.filter", "snowball", "lowercase"))); + ensureGreen(); + + client().admin().indices().preparePutMapping("test") + .setType("document").setSource("simple", "type=string,analyzer=simple,position_increment_gap=100").get(); + + //only analyzer = + String[] texts = new String[]{"this is a PHISH", "the troubled text"}; + AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze().setIndex(indexOrAlias()).setText(texts) + .setExplain(true).setAnalyzer("test_analyzer").setText(texts).execute().get(); + + // charfilter + assertThat(analyzeResponse.detail().charfilters().length, equalTo(1)); + assertThat(analyzeResponse.detail().charfilters()[0].getName(), equalTo("my_mapping")); + assertThat(analyzeResponse.detail().charfilters()[0].getTexts().length, equalTo(2)); + assertThat(analyzeResponse.detail().charfilters()[0].getTexts()[0], equalTo("this is a FISH")); + assertThat(analyzeResponse.detail().charfilters()[0].getTexts()[1], equalTo("the troubled text")); + + // tokenizer + assertThat(analyzeResponse.detail().tokenizer().getName(), equalTo("standard")); + assertThat(analyzeResponse.detail().tokenizer().getTokens().length, equalTo(7)); + AnalyzeResponse.AnalyzeToken token = analyzeResponse.detail().tokenizer().getTokens()[3]; + + assertThat(token.getTerm(), equalTo("FISH")); + assertThat(token.getPosition(), equalTo(3)); + assertThat(token.getStartOffset(), equalTo(10)); + assertThat(token.getEndOffset(), equalTo(15)); + + token = analyzeResponse.detail().tokenizer().getTokens()[5]; + assertThat(token.getTerm(), equalTo("troubled")); + assertThat(token.getPosition(), equalTo(105)); + assertThat(token.getStartOffset(), equalTo(20)); + assertThat(token.getEndOffset(), equalTo(28)); + + // tokenfilter(snowball) + assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(2)); + assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("snowball")); + assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(7)); + token = analyzeResponse.detail().tokenfilters()[0].getTokens()[3]; + + assertThat(token.getTerm(), equalTo("FISH")); + assertThat(token.getPosition(), equalTo(3)); + assertThat(token.getStartOffset(), equalTo(10)); + assertThat(token.getEndOffset(), equalTo(15)); + + token = analyzeResponse.detail().tokenfilters()[0].getTokens()[5]; + assertThat(token.getTerm(), equalTo("troubl")); + assertThat(token.getPosition(), equalTo(105)); + assertThat(token.getStartOffset(), equalTo(20)); + assertThat(token.getEndOffset(), equalTo(28)); + + // tokenfilter(lowercase) + assertThat(analyzeResponse.detail().tokenfilters()[1].getName(), equalTo("lowercase")); + assertThat(analyzeResponse.detail().tokenfilters()[1].getTokens().length, equalTo(7)); + token = analyzeResponse.detail().tokenfilters()[1].getTokens()[3]; + + assertThat(token.getTerm(), equalTo("fish")); + assertThat(token.getPosition(), equalTo(3)); + assertThat(token.getStartOffset(), equalTo(10)); + assertThat(token.getEndOffset(), equalTo(15)); + + token = analyzeResponse.detail().tokenfilters()[0].getTokens()[5]; + assertThat(token.getTerm(), equalTo("troubl")); + assertThat(token.getPosition(), equalTo(105)); + assertThat(token.getStartOffset(), equalTo(20)); + assertThat(token.getEndOffset(), equalTo(28)); + + + } + } diff --git a/docs/reference/indices/analyze.asciidoc b/docs/reference/indices/analyze.asciidoc index 1a256a6330a..1e8cd77ef09 100644 --- a/docs/reference/indices/analyze.asciidoc +++ b/docs/reference/indices/analyze.asciidoc @@ -100,3 +100,74 @@ provided it doesn't start with `{` : -------------------------------------------------- curl -XGET 'localhost:9200/_analyze?tokenizer=keyword&token_filters=lowercase&char_filters=html_strip' -d 'this is a test' -------------------------------------------------- + +=== Explain Analyze + +If you want to get more advanced details, set `explain` to `true` (defaults to `false`). It will output all token attributes for each token. +You can filter token attributes you want to output by setting `attributes` option. + +experimental[The format of the additional detail information is experimental and can change at any time] + +[source,js] +-------------------------------------------------- +GET test/_analyze +{ + "tokenizer" : "standard", + "token_filters" : ["snowball"], + "text" : "detailed output", + "explain" : true, + "attributes" : ["keyword"] <1> +} +-------------------------------------------------- +// AUTOSENSE +<1> Set "keyword" to output "keyword" attribute only + +coming[2.0.0, body based parameters were added in 2.0.0] + +The request returns the following result: + +[source,js] +-------------------------------------------------- +{ + "detail" : { + "custom_analyzer" : true, + "charfilters" : [ ], + "tokenizer" : { + "name" : "standard", + "tokens" : [ { + "token" : "detailed", + "start_offset" : 0, + "end_offset" : 8, + "type" : "", + "position" : 0 + }, { + "token" : "output", + "start_offset" : 9, + "end_offset" : 15, + "type" : "", + "position" : 1 + } ] + }, + "tokenfilters" : [ { + "name" : "snowball", + "tokens" : [ { + "token" : "detail", + "start_offset" : 0, + "end_offset" : 8, + "type" : "", + "position" : 0, + "keyword" : false <1> + }, { + "token" : "output", + "start_offset" : 9, + "end_offset" : 15, + "type" : "", + "position" : 1, + "keyword" : false <1> + } ] + } ] + } +} +-------------------------------------------------- +<1> Output only "keyword" attribute, since specify "attributes" in the request. + diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/indices.analyze.json b/rest-api-spec/src/main/resources/rest-api-spec/api/indices.analyze.json index 00b0ec13a5c..9fe9bfe3cad 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/indices.analyze.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/indices.analyze.json @@ -44,6 +44,14 @@ "type" : "string", "description" : "The name of the tokenizer to use for the analysis" }, + "detail": { + "type" : "boolean", + "description" : "With `true`, outputs more advanced details. (default: false)" + }, + "attributes": { + "type" : "list", + "description" : "A comma-separated list of token attributes to output, this parameter works only with `detail=true`" + }, "format": { "type": "enum", "options" : ["detailed","text"], diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.analyze/10_analyze.yaml b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.analyze/10_analyze.yaml index 49420672861..0b1a090303e 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/indices.analyze/10_analyze.yaml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/indices.analyze/10_analyze.yaml @@ -71,3 +71,31 @@ setup: - length: {tokens: 2 } - match: { tokens.0.token: foo bar } - match: { tokens.1.token: baz } +--- +"Detail response with Analyzer": + - do: + indices.analyze: + body: {"text": "This is troubled", "analyzer": standard, "explain": true} + - length: { detail.analyzer.tokens: 3 } + - match: { detail.analyzer.name: standard } + - match: { detail.analyzer.tokens.0.token: this } + - match: { detail.analyzer.tokens.1.token: is } + - match: { detail.analyzer.tokens.2.token: troubled } +--- +"Detail output spcified attribute": + - do: + indices.analyze: + body: {"text": "This is troubled", "char_filters": ["html_strip"], "filters": ["snowball"], "tokenizer": standard, "explain": true, "attributes": ["keyword"]} + - length: { detail.charfilters: 1 } + - length: { detail.tokenizer.tokens: 3 } + - length: { detail.tokenfilters.0.tokens: 3 } + - match: { detail.tokenizer.name: standard } + - match: { detail.tokenizer.tokens.0.token: This } + - match: { detail.tokenizer.tokens.1.token: is } + - match: { detail.tokenizer.tokens.2.token: troubled } + - match: { detail.tokenfilters.0.name: snowball } + - match: { detail.tokenfilters.0.tokens.0.token: This } + - match: { detail.tokenfilters.0.tokens.1.token: is } + - match: { detail.tokenfilters.0.tokens.2.token: troubl } + - match: { detail.tokenfilters.0.tokens.2.keyword: false } +