Analysis: Add detail response support
add explain option fix char_filter bug Closes #11076 #15257
This commit is contained in:
parent
1ef24d2a85
commit
fab44398d9
|
@ -18,6 +18,7 @@
|
|||
*/
|
||||
package org.elasticsearch.action.admin.indices.analyze;
|
||||
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.action.ActionRequestValidationException;
|
||||
import org.elasticsearch.action.support.single.shard.SingleShardRequest;
|
||||
import org.elasticsearch.common.Strings;
|
||||
|
@ -46,6 +47,10 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> {
|
|||
|
||||
private String field;
|
||||
|
||||
private boolean explain = false;
|
||||
|
||||
private String[] attributes = Strings.EMPTY_ARRAY;
|
||||
|
||||
public AnalyzeRequest() {
|
||||
}
|
||||
|
||||
|
@ -86,6 +91,9 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> {
|
|||
}
|
||||
|
||||
public AnalyzeRequest tokenFilters(String... tokenFilters) {
|
||||
if (tokenFilters == null) {
|
||||
throw new IllegalArgumentException("token filters must not be null");
|
||||
}
|
||||
this.tokenFilters = tokenFilters;
|
||||
return this;
|
||||
}
|
||||
|
@ -95,6 +103,9 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> {
|
|||
}
|
||||
|
||||
public AnalyzeRequest charFilters(String... charFilters) {
|
||||
if (charFilters == null) {
|
||||
throw new IllegalArgumentException("char filters must not be null");
|
||||
}
|
||||
this.charFilters = charFilters;
|
||||
return this;
|
||||
}
|
||||
|
@ -112,18 +123,33 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> {
|
|||
return this.field;
|
||||
}
|
||||
|
||||
public AnalyzeRequest explain(boolean explain) {
|
||||
this.explain = explain;
|
||||
return this;
|
||||
}
|
||||
|
||||
public boolean explain() {
|
||||
return this.explain;
|
||||
}
|
||||
|
||||
public AnalyzeRequest attributes(String... attributes) {
|
||||
if (attributes == null) {
|
||||
throw new IllegalArgumentException("attributes must not be null");
|
||||
}
|
||||
this.attributes = attributes;
|
||||
return this;
|
||||
}
|
||||
|
||||
public String[] attributes() {
|
||||
return this.attributes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ActionRequestValidationException validate() {
|
||||
ActionRequestValidationException validationException = null;
|
||||
if (text == null || text.length == 0) {
|
||||
validationException = addValidationError("text is missing", validationException);
|
||||
}
|
||||
if (tokenFilters == null) {
|
||||
validationException = addValidationError("token filters must not be null", validationException);
|
||||
}
|
||||
if (charFilters == null) {
|
||||
validationException = addValidationError("char filters must not be null", validationException);
|
||||
}
|
||||
return validationException;
|
||||
}
|
||||
|
||||
|
@ -136,6 +162,10 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> {
|
|||
tokenFilters = in.readStringArray();
|
||||
charFilters = in.readStringArray();
|
||||
field = in.readOptionalString();
|
||||
if (in.getVersion().onOrAfter(Version.V_2_2_0)) {
|
||||
explain = in.readBoolean();
|
||||
attributes = in.readStringArray();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -147,5 +177,9 @@ public class AnalyzeRequest extends SingleShardRequest<AnalyzeRequest> {
|
|||
out.writeStringArray(tokenFilters);
|
||||
out.writeStringArray(charFilters);
|
||||
out.writeOptionalString(field);
|
||||
if (out.getVersion().onOrAfter(Version.V_2_2_0)) {
|
||||
out.writeBoolean(explain);
|
||||
out.writeStringArray(attributes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -78,6 +78,22 @@ public class AnalyzeRequestBuilder extends SingleShardOperationRequestBuilder<An
|
|||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets explain
|
||||
*/
|
||||
public AnalyzeRequestBuilder setExplain(boolean explain) {
|
||||
request.explain(explain);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets attributes that will include results
|
||||
*/
|
||||
public AnalyzeRequestBuilder setAttributes(String attributes){
|
||||
request.attributes(attributes);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets texts to analyze
|
||||
*/
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
*/
|
||||
package org.elasticsearch.action.admin.indices.analyze;
|
||||
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.action.ActionResponse;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
|
@ -30,28 +31,32 @@ import java.io.IOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class AnalyzeResponse extends ActionResponse implements Iterable<AnalyzeResponse.AnalyzeToken>, ToXContent {
|
||||
|
||||
public static class AnalyzeToken implements Streamable {
|
||||
public static class AnalyzeToken implements Streamable, ToXContent {
|
||||
private String term;
|
||||
private int startOffset;
|
||||
private int endOffset;
|
||||
private int position;
|
||||
private Map<String, Object> attributes;
|
||||
private String type;
|
||||
|
||||
AnalyzeToken() {
|
||||
}
|
||||
|
||||
public AnalyzeToken(String term, int position, int startOffset, int endOffset, String type) {
|
||||
public AnalyzeToken(String term, int position, int startOffset, int endOffset, String type,
|
||||
Map<String, Object> attributes) {
|
||||
this.term = term;
|
||||
this.position = position;
|
||||
this.startOffset = startOffset;
|
||||
this.endOffset = endOffset;
|
||||
this.type = type;
|
||||
this.attributes = attributes;
|
||||
}
|
||||
|
||||
public String getTerm() {
|
||||
|
@ -74,6 +79,27 @@ public class AnalyzeResponse extends ActionResponse implements Iterable<AnalyzeR
|
|||
return this.type;
|
||||
}
|
||||
|
||||
public Map<String, Object> getAttributes(){
|
||||
return this.attributes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.startObject();
|
||||
builder.field(Fields.TOKEN, term);
|
||||
builder.field(Fields.START_OFFSET, startOffset);
|
||||
builder.field(Fields.END_OFFSET, endOffset);
|
||||
builder.field(Fields.TYPE, type);
|
||||
builder.field(Fields.POSITION, position);
|
||||
if (attributes != null && !attributes.isEmpty()) {
|
||||
for (Map.Entry<String, Object> entity : attributes.entrySet()) {
|
||||
builder.field(entity.getKey(), entity.getValue());
|
||||
}
|
||||
}
|
||||
builder.endObject();
|
||||
return builder;
|
||||
}
|
||||
|
||||
public static AnalyzeToken readAnalyzeToken(StreamInput in) throws IOException {
|
||||
AnalyzeToken analyzeToken = new AnalyzeToken();
|
||||
analyzeToken.readFrom(in);
|
||||
|
@ -87,6 +113,9 @@ public class AnalyzeResponse extends ActionResponse implements Iterable<AnalyzeR
|
|||
endOffset = in.readInt();
|
||||
position = in.readVInt();
|
||||
type = in.readOptionalString();
|
||||
if (in.getVersion().onOrAfter(Version.V_2_2_0)) {
|
||||
attributes = (Map<String, Object>) in.readGenericValue();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -96,22 +125,32 @@ public class AnalyzeResponse extends ActionResponse implements Iterable<AnalyzeR
|
|||
out.writeInt(endOffset);
|
||||
out.writeVInt(position);
|
||||
out.writeOptionalString(type);
|
||||
if (out.getVersion().onOrAfter(Version.V_2_2_0)) {
|
||||
out.writeGenericValue(attributes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private DetailAnalyzeResponse detail;
|
||||
|
||||
private List<AnalyzeToken> tokens;
|
||||
|
||||
AnalyzeResponse() {
|
||||
}
|
||||
|
||||
public AnalyzeResponse(List<AnalyzeToken> tokens) {
|
||||
public AnalyzeResponse(List<AnalyzeToken> tokens, DetailAnalyzeResponse detail) {
|
||||
this.tokens = tokens;
|
||||
this.detail = detail;
|
||||
}
|
||||
|
||||
public List<AnalyzeToken> getTokens() {
|
||||
return this.tokens;
|
||||
}
|
||||
|
||||
public DetailAnalyzeResponse detail() {
|
||||
return this.detail;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<AnalyzeToken> iterator() {
|
||||
return tokens.iterator();
|
||||
|
@ -119,17 +158,19 @@ public class AnalyzeResponse extends ActionResponse implements Iterable<AnalyzeR
|
|||
|
||||
@Override
|
||||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
if (tokens != null) {
|
||||
builder.startArray(Fields.TOKENS);
|
||||
for (AnalyzeToken token : tokens) {
|
||||
builder.startObject();
|
||||
builder.field(Fields.TOKEN, token.getTerm());
|
||||
builder.field(Fields.START_OFFSET, token.getStartOffset());
|
||||
builder.field(Fields.END_OFFSET, token.getEndOffset());
|
||||
builder.field(Fields.TYPE, token.getType());
|
||||
builder.field(Fields.POSITION, token.getPosition());
|
||||
builder.endObject();
|
||||
token.toXContent(builder, params);
|
||||
}
|
||||
builder.endArray();
|
||||
}
|
||||
|
||||
if (detail != null) {
|
||||
builder.startObject(Fields.DETAIL);
|
||||
detail.toXContent(builder, params);
|
||||
builder.endObject();
|
||||
}
|
||||
return builder;
|
||||
}
|
||||
|
||||
|
@ -141,15 +182,25 @@ public class AnalyzeResponse extends ActionResponse implements Iterable<AnalyzeR
|
|||
for (int i = 0; i < size; i++) {
|
||||
tokens.add(AnalyzeToken.readAnalyzeToken(in));
|
||||
}
|
||||
if (in.getVersion().onOrAfter(Version.V_2_2_0)) {
|
||||
detail = in.readOptionalStreamable(DetailAnalyzeResponse::new);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(StreamOutput out) throws IOException {
|
||||
super.writeTo(out);
|
||||
if (tokens != null) {
|
||||
out.writeVInt(tokens.size());
|
||||
for (AnalyzeToken token : tokens) {
|
||||
token.writeTo(out);
|
||||
}
|
||||
} else {
|
||||
out.writeVInt(0);
|
||||
}
|
||||
if (out.getVersion().onOrAfter(Version.V_2_2_0)) {
|
||||
out.writeOptionalStreamable(detail);
|
||||
}
|
||||
}
|
||||
|
||||
static final class Fields {
|
||||
|
@ -159,5 +210,6 @@ public class AnalyzeResponse extends ActionResponse implements Iterable<AnalyzeR
|
|||
static final XContentBuilderString END_OFFSET = new XContentBuilderString("end_offset");
|
||||
static final XContentBuilderString TYPE = new XContentBuilderString("type");
|
||||
static final XContentBuilderString POSITION = new XContentBuilderString("position");
|
||||
static final XContentBuilderString DETAIL = new XContentBuilderString("detail");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,319 @@
|
|||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.action.admin.indices.analyze;
|
||||
|
||||
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.io.stream.StreamInput;
|
||||
import org.elasticsearch.common.io.stream.StreamOutput;
|
||||
import org.elasticsearch.common.io.stream.Streamable;
|
||||
import org.elasticsearch.common.xcontent.ToXContent;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilderString;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class DetailAnalyzeResponse implements Streamable, ToXContent {
|
||||
|
||||
DetailAnalyzeResponse() {
|
||||
}
|
||||
|
||||
private boolean customAnalyzer = false;
|
||||
private AnalyzeTokenList analyzer;
|
||||
private CharFilteredText[] charfilters;
|
||||
private AnalyzeTokenList tokenizer;
|
||||
private AnalyzeTokenList[] tokenfilters;
|
||||
|
||||
public DetailAnalyzeResponse(AnalyzeTokenList analyzer) {
|
||||
this(false, analyzer, null, null, null);
|
||||
}
|
||||
|
||||
public DetailAnalyzeResponse(CharFilteredText[] charfilters, AnalyzeTokenList tokenizer, AnalyzeTokenList[] tokenfilters) {
|
||||
this(true, null, charfilters, tokenizer, tokenfilters);
|
||||
}
|
||||
|
||||
public DetailAnalyzeResponse(boolean customAnalyzer,
|
||||
AnalyzeTokenList analyzer,
|
||||
CharFilteredText[] charfilters,
|
||||
AnalyzeTokenList tokenizer,
|
||||
AnalyzeTokenList[] tokenfilters) {
|
||||
this.customAnalyzer = customAnalyzer;
|
||||
this.analyzer = analyzer;
|
||||
this.charfilters = charfilters;
|
||||
this.tokenizer = tokenizer;
|
||||
this.tokenfilters = tokenfilters;
|
||||
}
|
||||
|
||||
public AnalyzeTokenList analyzer() {
|
||||
return this.analyzer;
|
||||
}
|
||||
|
||||
public DetailAnalyzeResponse analyzer(AnalyzeTokenList analyzer) {
|
||||
this.analyzer = analyzer;
|
||||
return this;
|
||||
}
|
||||
|
||||
public CharFilteredText[] charfilters() {
|
||||
return this.charfilters;
|
||||
}
|
||||
|
||||
public DetailAnalyzeResponse charfilters(CharFilteredText[] charfilters) {
|
||||
this.charfilters = charfilters;
|
||||
return this;
|
||||
}
|
||||
|
||||
public AnalyzeTokenList tokenizer() {
|
||||
return tokenizer;
|
||||
}
|
||||
|
||||
public DetailAnalyzeResponse tokenizer(AnalyzeTokenList tokenizer) {
|
||||
this.tokenizer = tokenizer;
|
||||
return this;
|
||||
}
|
||||
|
||||
public AnalyzeTokenList[] tokenfilters() {
|
||||
return tokenfilters;
|
||||
}
|
||||
|
||||
public DetailAnalyzeResponse tokenfilters(AnalyzeTokenList[] tokenfilters) {
|
||||
this.tokenfilters = tokenfilters;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.field(Fields.CUSTOM_ANALYZER, customAnalyzer);
|
||||
|
||||
if (analyzer != null) {
|
||||
builder.startObject(Fields.ANALYZER);
|
||||
analyzer.toXContentWithoutObject(builder, params);
|
||||
builder.endObject();
|
||||
}
|
||||
|
||||
if (charfilters != null) {
|
||||
builder.startArray(Fields.CHARFILTERS);
|
||||
for (CharFilteredText charfilter : charfilters) {
|
||||
charfilter.toXContent(builder, params);
|
||||
}
|
||||
builder.endArray();
|
||||
}
|
||||
|
||||
if (tokenizer != null) {
|
||||
builder.startObject(Fields.TOKENIZER);
|
||||
tokenizer.toXContentWithoutObject(builder, params);
|
||||
builder.endObject();
|
||||
}
|
||||
|
||||
if (tokenfilters != null) {
|
||||
builder.startArray(Fields.TOKENFILTERS);
|
||||
for (AnalyzeTokenList tokenfilter : tokenfilters) {
|
||||
tokenfilter.toXContent(builder, params);
|
||||
}
|
||||
builder.endArray();
|
||||
}
|
||||
return builder;
|
||||
}
|
||||
|
||||
static final class Fields {
|
||||
static final XContentBuilderString NAME = new XContentBuilderString("name");
|
||||
static final XContentBuilderString FILTERED_TEXT = new XContentBuilderString("filtered_text");
|
||||
static final XContentBuilderString CUSTOM_ANALYZER = new XContentBuilderString("custom_analyzer");
|
||||
static final XContentBuilderString ANALYZER = new XContentBuilderString("analyzer");
|
||||
static final XContentBuilderString CHARFILTERS = new XContentBuilderString("charfilters");
|
||||
static final XContentBuilderString TOKENIZER = new XContentBuilderString("tokenizer");
|
||||
static final XContentBuilderString TOKENFILTERS = new XContentBuilderString("tokenfilters");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void readFrom(StreamInput in) throws IOException {
|
||||
this.customAnalyzer = in.readBoolean();
|
||||
if (customAnalyzer) {
|
||||
tokenizer = AnalyzeTokenList.readAnalyzeTokenList(in);
|
||||
int size = in.readVInt();
|
||||
if (size > 0) {
|
||||
charfilters = new CharFilteredText[size];
|
||||
for (int i = 0; i < size; i++) {
|
||||
charfilters[i] = CharFilteredText.readCharFilteredText(in);
|
||||
}
|
||||
}
|
||||
size = in.readVInt();
|
||||
if (size > 0) {
|
||||
tokenfilters = new AnalyzeTokenList[size];
|
||||
for (int i = 0; i < size; i++) {
|
||||
tokenfilters[i] = AnalyzeTokenList.readAnalyzeTokenList(in);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
analyzer = AnalyzeTokenList.readAnalyzeTokenList(in);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(StreamOutput out) throws IOException {
|
||||
out.writeBoolean(customAnalyzer);
|
||||
if (customAnalyzer) {
|
||||
tokenizer.writeTo(out);
|
||||
if (charfilters != null) {
|
||||
out.writeVInt(charfilters.length);
|
||||
for (CharFilteredText charfilter : charfilters) {
|
||||
charfilter.writeTo(out);
|
||||
}
|
||||
} else {
|
||||
out.writeVInt(0);
|
||||
}
|
||||
if (tokenfilters != null) {
|
||||
out.writeVInt(tokenfilters.length);
|
||||
for (AnalyzeTokenList tokenfilter : tokenfilters) {
|
||||
tokenfilter.writeTo(out);
|
||||
}
|
||||
} else {
|
||||
out.writeVInt(0);
|
||||
}
|
||||
} else {
|
||||
analyzer.writeTo(out);
|
||||
}
|
||||
}
|
||||
|
||||
public static class AnalyzeTokenList implements Streamable, ToXContent {
|
||||
private String name;
|
||||
private AnalyzeResponse.AnalyzeToken[] tokens;
|
||||
|
||||
AnalyzeTokenList() {
|
||||
}
|
||||
|
||||
public AnalyzeTokenList(String name, AnalyzeResponse.AnalyzeToken[] tokens) {
|
||||
this.name = name;
|
||||
this.tokens = tokens;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public AnalyzeResponse.AnalyzeToken[] getTokens() {
|
||||
return tokens;
|
||||
}
|
||||
|
||||
public static AnalyzeTokenList readAnalyzeTokenList(StreamInput in) throws IOException {
|
||||
AnalyzeTokenList list = new AnalyzeTokenList();
|
||||
list.readFrom(in);
|
||||
return list;
|
||||
}
|
||||
|
||||
public XContentBuilder toXContentWithoutObject(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.field(Fields.NAME, this.name);
|
||||
builder.startArray(AnalyzeResponse.Fields.TOKENS);
|
||||
for (AnalyzeResponse.AnalyzeToken token : tokens) {
|
||||
token.toXContent(builder, params);
|
||||
}
|
||||
builder.endArray();
|
||||
return builder;
|
||||
}
|
||||
|
||||
@Override
|
||||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.startObject();
|
||||
builder.field(Fields.NAME, this.name);
|
||||
builder.startArray(AnalyzeResponse.Fields.TOKENS);
|
||||
for (AnalyzeResponse.AnalyzeToken token : tokens) {
|
||||
token.toXContent(builder, params);
|
||||
}
|
||||
builder.endArray();
|
||||
builder.endObject();
|
||||
return builder;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void readFrom(StreamInput in) throws IOException {
|
||||
name = in.readString();
|
||||
int size = in.readVInt();
|
||||
if (size > 0) {
|
||||
tokens = new AnalyzeResponse.AnalyzeToken[size];
|
||||
for (int i = 0; i < size; i++) {
|
||||
tokens[i] = AnalyzeResponse.AnalyzeToken.readAnalyzeToken(in);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(StreamOutput out) throws IOException {
|
||||
out.writeString(name);
|
||||
if (tokens != null) {
|
||||
out.writeVInt(tokens.length);
|
||||
for (AnalyzeResponse.AnalyzeToken token : tokens) {
|
||||
token.writeTo(out);
|
||||
}
|
||||
} else {
|
||||
out.writeVInt(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static class CharFilteredText implements Streamable, ToXContent {
|
||||
private String name;
|
||||
private String[] texts;
|
||||
CharFilteredText() {
|
||||
}
|
||||
|
||||
public CharFilteredText(String name, String[] texts) {
|
||||
this.name = name;
|
||||
if (texts != null) {
|
||||
this.texts = texts;
|
||||
} else {
|
||||
this.texts = Strings.EMPTY_ARRAY;
|
||||
}
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public String[] getTexts() {
|
||||
return texts;
|
||||
}
|
||||
|
||||
@Override
|
||||
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
|
||||
builder.startObject();
|
||||
builder.field(Fields.NAME, name);
|
||||
builder.field(Fields.FILTERED_TEXT, texts);
|
||||
builder.endObject();
|
||||
return builder;
|
||||
}
|
||||
|
||||
public static CharFilteredText readCharFilteredText(StreamInput in) throws IOException {
|
||||
CharFilteredText text = new CharFilteredText();
|
||||
text.readFrom(in);
|
||||
return text;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void readFrom(StreamInput in) throws IOException {
|
||||
name = in.readString();
|
||||
texts = in.readStringArray();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeTo(StreamOutput out) throws IOException {
|
||||
out.writeString(name);
|
||||
out.writeStringArray(texts);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -20,10 +20,15 @@ package org.elasticsearch.action.admin.indices.analyze;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.util.Attribute;
|
||||
import org.apache.lucene.util.AttributeReflector;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
import org.elasticsearch.action.support.ActionFilters;
|
||||
import org.elasticsearch.action.support.single.shard.TransportSingleShardAction;
|
||||
|
@ -33,6 +38,7 @@ import org.elasticsearch.cluster.block.ClusterBlockException;
|
|||
import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver;
|
||||
import org.elasticsearch.cluster.routing.ShardsIterator;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.io.FastStringReader;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
import org.elasticsearch.index.IndexService;
|
||||
|
@ -46,8 +52,8 @@ import org.elasticsearch.threadpool.ThreadPool;
|
|||
import org.elasticsearch.transport.TransportService;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.io.Reader;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Transport action used to execute analyze requests
|
||||
|
@ -222,6 +228,23 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
|
|||
throw new IllegalArgumentException("failed to find analyzer");
|
||||
}
|
||||
|
||||
List<AnalyzeResponse.AnalyzeToken> tokens = null;
|
||||
DetailAnalyzeResponse detail = null;
|
||||
|
||||
if (request.explain()) {
|
||||
detail = detailAnalyze(request, analyzer, field);
|
||||
} else {
|
||||
tokens = simpleAnalyze(request, analyzer, field);
|
||||
}
|
||||
|
||||
if (closeAnalyzer) {
|
||||
analyzer.close();
|
||||
}
|
||||
|
||||
return new AnalyzeResponse(tokens, detail);
|
||||
}
|
||||
|
||||
private static List<AnalyzeResponse.AnalyzeToken> simpleAnalyze(AnalyzeRequest request, Analyzer analyzer, String field) {
|
||||
List<AnalyzeResponse.AnalyzeToken> tokens = new ArrayList<>();
|
||||
int lastPosition = -1;
|
||||
int lastOffset = 0;
|
||||
|
@ -238,7 +261,7 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
|
|||
if (increment > 0) {
|
||||
lastPosition = lastPosition + increment;
|
||||
}
|
||||
tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(), lastOffset + offset.endOffset(), type.type()));
|
||||
tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(), lastOffset + offset.endOffset(), type.type(), null));
|
||||
|
||||
}
|
||||
stream.end();
|
||||
|
@ -251,11 +274,211 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
|
|||
throw new ElasticsearchException("failed to analyze", e);
|
||||
}
|
||||
}
|
||||
|
||||
if (closeAnalyzer) {
|
||||
analyzer.close();
|
||||
return tokens;
|
||||
}
|
||||
|
||||
return new AnalyzeResponse(tokens);
|
||||
private static DetailAnalyzeResponse detailAnalyze(AnalyzeRequest request, Analyzer analyzer, String field) {
|
||||
DetailAnalyzeResponse detailResponse;
|
||||
final Set<String> includeAttributes = new HashSet<>();
|
||||
if (request.attributes() != null) {
|
||||
for (String attribute : request.attributes()) {
|
||||
includeAttributes.add(attribute.toLowerCase(Locale.ROOT));
|
||||
}
|
||||
}
|
||||
|
||||
CustomAnalyzer customAnalyzer = null;
|
||||
if (analyzer instanceof CustomAnalyzer) {
|
||||
customAnalyzer = (CustomAnalyzer) analyzer;
|
||||
} else if (analyzer instanceof NamedAnalyzer && ((NamedAnalyzer) analyzer).analyzer() instanceof CustomAnalyzer) {
|
||||
customAnalyzer = (CustomAnalyzer) ((NamedAnalyzer) analyzer).analyzer();
|
||||
}
|
||||
|
||||
if (customAnalyzer != null) {
|
||||
// customAnalyzer = divide charfilter, tokenizer tokenfilters
|
||||
CharFilterFactory[] charFilterFactories = customAnalyzer.charFilters();
|
||||
TokenizerFactory tokenizerFactory = customAnalyzer.tokenizerFactory();
|
||||
TokenFilterFactory[] tokenFilterFactories = customAnalyzer.tokenFilters();
|
||||
|
||||
String[][] charFiltersTexts = new String[charFilterFactories != null ? charFilterFactories.length : 0][request.text().length];
|
||||
TokenListCreator[] tokenFiltersTokenListCreator = new TokenListCreator[tokenFilterFactories != null ? tokenFilterFactories.length : 0];
|
||||
|
||||
TokenListCreator tokenizerTokenListCreator = new TokenListCreator();
|
||||
|
||||
for (int textIndex = 0; textIndex < request.text().length; textIndex++) {
|
||||
String charFilteredSource = request.text()[textIndex];
|
||||
|
||||
Reader reader = new FastStringReader(charFilteredSource);
|
||||
if (charFilterFactories != null) {
|
||||
|
||||
for (int charFilterIndex = 0; charFilterIndex < charFilterFactories.length; charFilterIndex++) {
|
||||
reader = charFilterFactories[charFilterIndex].create(reader);
|
||||
Reader readerForWriteOut = new FastStringReader(charFilteredSource);
|
||||
readerForWriteOut = charFilterFactories[charFilterIndex].create(readerForWriteOut);
|
||||
charFilteredSource = writeCharStream(readerForWriteOut);
|
||||
charFiltersTexts[charFilterIndex][textIndex] = charFilteredSource;
|
||||
}
|
||||
}
|
||||
|
||||
// analyzing only tokenizer
|
||||
Tokenizer tokenizer = tokenizerFactory.create();
|
||||
tokenizer.setReader(reader);
|
||||
tokenizerTokenListCreator.analyze(tokenizer, customAnalyzer, field, includeAttributes);
|
||||
|
||||
// analyzing each tokenfilter
|
||||
if (tokenFilterFactories != null) {
|
||||
for (int tokenFilterIndex = 0; tokenFilterIndex < tokenFilterFactories.length; tokenFilterIndex++) {
|
||||
if (tokenFiltersTokenListCreator[tokenFilterIndex] == null) {
|
||||
tokenFiltersTokenListCreator[tokenFilterIndex] = new TokenListCreator();
|
||||
}
|
||||
TokenStream stream = createStackedTokenStream(request.text()[textIndex],
|
||||
charFilterFactories, tokenizerFactory, tokenFilterFactories, tokenFilterIndex + 1);
|
||||
tokenFiltersTokenListCreator[tokenFilterIndex].analyze(stream, customAnalyzer, field, includeAttributes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DetailAnalyzeResponse.CharFilteredText[] charFilteredLists = new DetailAnalyzeResponse.CharFilteredText[charFiltersTexts.length];
|
||||
if (charFilterFactories != null) {
|
||||
for (int charFilterIndex = 0; charFilterIndex < charFiltersTexts.length; charFilterIndex++) {
|
||||
charFilteredLists[charFilterIndex] = new DetailAnalyzeResponse.CharFilteredText(
|
||||
charFilterFactories[charFilterIndex].name(), charFiltersTexts[charFilterIndex]);
|
||||
}
|
||||
}
|
||||
DetailAnalyzeResponse.AnalyzeTokenList[] tokenFilterLists = new DetailAnalyzeResponse.AnalyzeTokenList[tokenFiltersTokenListCreator.length];
|
||||
if (tokenFilterFactories != null) {
|
||||
for (int tokenFilterIndex = 0; tokenFilterIndex < tokenFiltersTokenListCreator.length; tokenFilterIndex++) {
|
||||
tokenFilterLists[tokenFilterIndex] = new DetailAnalyzeResponse.AnalyzeTokenList(
|
||||
tokenFilterFactories[tokenFilterIndex].name(), tokenFiltersTokenListCreator[tokenFilterIndex].getArrayTokens());
|
||||
}
|
||||
}
|
||||
detailResponse = new DetailAnalyzeResponse(charFilteredLists, new DetailAnalyzeResponse.AnalyzeTokenList(tokenizerFactory.name(), tokenizerTokenListCreator.getArrayTokens()), tokenFilterLists);
|
||||
} else {
|
||||
String name;
|
||||
if (analyzer instanceof NamedAnalyzer) {
|
||||
name = ((NamedAnalyzer) analyzer).name();
|
||||
} else {
|
||||
name = analyzer.getClass().getName();
|
||||
}
|
||||
|
||||
TokenListCreator tokenListCreator = new TokenListCreator();
|
||||
for (String text : request.text()) {
|
||||
tokenListCreator.analyze(analyzer.tokenStream(field, text), analyzer, field,
|
||||
includeAttributes);
|
||||
}
|
||||
detailResponse = new DetailAnalyzeResponse(new DetailAnalyzeResponse.AnalyzeTokenList(name, tokenListCreator.getArrayTokens()));
|
||||
}
|
||||
return detailResponse;
|
||||
}
|
||||
|
||||
private static TokenStream createStackedTokenStream(String source, CharFilterFactory[] charFilterFactories, TokenizerFactory tokenizerFactory, TokenFilterFactory[] tokenFilterFactories, int current) {
|
||||
Reader reader = new FastStringReader(source);
|
||||
for (CharFilterFactory charFilterFactory : charFilterFactories) {
|
||||
reader = charFilterFactory.create(reader);
|
||||
}
|
||||
Tokenizer tokenizer = tokenizerFactory.create();
|
||||
tokenizer.setReader(reader);
|
||||
TokenStream tokenStream = tokenizer;
|
||||
for (int i = 0; i < current; i++) {
|
||||
tokenStream = tokenFilterFactories[i].create(tokenStream);
|
||||
}
|
||||
return tokenStream;
|
||||
}
|
||||
|
||||
private static String writeCharStream(Reader input) {
|
||||
final int BUFFER_SIZE = 1024;
|
||||
char[] buf = new char[BUFFER_SIZE];
|
||||
int len;
|
||||
StringBuilder sb = new StringBuilder();
|
||||
do {
|
||||
try {
|
||||
len = input.read(buf, 0, BUFFER_SIZE);
|
||||
} catch (IOException e) {
|
||||
throw new ElasticsearchException("failed to analyze (charFiltering)", e);
|
||||
}
|
||||
if (len > 0)
|
||||
sb.append(buf, 0, len);
|
||||
} while (len == BUFFER_SIZE);
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private static class TokenListCreator {
|
||||
int lastPosition = -1;
|
||||
int lastOffset = 0;
|
||||
List<AnalyzeResponse.AnalyzeToken> tokens;
|
||||
|
||||
TokenListCreator() {
|
||||
tokens = new ArrayList<>();
|
||||
}
|
||||
|
||||
private void analyze(TokenStream stream, Analyzer analyzer, String field, Set<String> includeAttributes) {
|
||||
try {
|
||||
stream.reset();
|
||||
CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
|
||||
PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
|
||||
OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class);
|
||||
TypeAttribute type = stream.addAttribute(TypeAttribute.class);
|
||||
|
||||
while (stream.incrementToken()) {
|
||||
int increment = posIncr.getPositionIncrement();
|
||||
if (increment > 0) {
|
||||
lastPosition = lastPosition + increment;
|
||||
}
|
||||
tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(),
|
||||
lastOffset +offset.endOffset(), type.type(), extractExtendedAttributes(stream, includeAttributes)));
|
||||
|
||||
}
|
||||
stream.end();
|
||||
lastOffset += offset.endOffset();
|
||||
lastPosition += posIncr.getPositionIncrement();
|
||||
|
||||
lastPosition += analyzer.getPositionIncrementGap(field);
|
||||
lastOffset += analyzer.getOffsetGap(field);
|
||||
|
||||
} catch (IOException e) {
|
||||
throw new ElasticsearchException("failed to analyze", e);
|
||||
} finally {
|
||||
IOUtils.closeWhileHandlingException(stream);
|
||||
}
|
||||
}
|
||||
|
||||
private AnalyzeResponse.AnalyzeToken[] getArrayTokens() {
|
||||
return tokens.toArray(new AnalyzeResponse.AnalyzeToken[tokens.size()]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* other attribute extract object.
|
||||
* Extracted object group by AttributeClassName
|
||||
*
|
||||
* @param stream current TokenStream
|
||||
* @param includeAttributes filtering attributes
|
||||
* @return Map<key value>
|
||||
*/
|
||||
private static Map<String, Object> extractExtendedAttributes(TokenStream stream, final Set<String> includeAttributes) {
|
||||
final Map<String, Object> extendedAttributes = new TreeMap<>();
|
||||
|
||||
stream.reflectWith(new AttributeReflector() {
|
||||
@Override
|
||||
public void reflect(Class<? extends Attribute> attClass, String key, Object value) {
|
||||
if (CharTermAttribute.class.isAssignableFrom(attClass))
|
||||
return;
|
||||
if (PositionIncrementAttribute.class.isAssignableFrom(attClass))
|
||||
return;
|
||||
if (OffsetAttribute.class.isAssignableFrom(attClass))
|
||||
return;
|
||||
if (TypeAttribute.class.isAssignableFrom(attClass))
|
||||
return;
|
||||
if (includeAttributes == null || includeAttributes.isEmpty() || includeAttributes.contains(key.toLowerCase(Locale.ROOT))) {
|
||||
if (value instanceof BytesRef) {
|
||||
final BytesRef p = (BytesRef) value;
|
||||
value = p.toString();
|
||||
}
|
||||
extendedAttributes.put(key, value);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return extendedAttributes;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,7 +21,8 @@ package org.elasticsearch.rest.action.admin.indices.analyze;
|
|||
import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest;
|
||||
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
|
||||
import org.elasticsearch.client.Client;
|
||||
import org.elasticsearch.common.Strings;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.ParseFieldMatcher;
|
||||
import org.elasticsearch.common.bytes.BytesReference;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
|
@ -47,6 +48,17 @@ import static org.elasticsearch.rest.RestRequest.Method.POST;
|
|||
*/
|
||||
public class RestAnalyzeAction extends BaseRestHandler {
|
||||
|
||||
public static class Fields {
|
||||
public static final ParseField ANALYZER = new ParseField("analyzer");
|
||||
public static final ParseField TEXT = new ParseField("text");
|
||||
public static final ParseField FIELD = new ParseField("field");
|
||||
public static final ParseField TOKENIZER = new ParseField("tokenizer");
|
||||
public static final ParseField TOKEN_FILTERS = new ParseField("token_filters", "filters");
|
||||
public static final ParseField CHAR_FILTERS = new ParseField("char_filters");
|
||||
public static final ParseField EXPLAIN = new ParseField("explain");
|
||||
public static final ParseField ATTRIBUTES = new ParseField("attributes");
|
||||
}
|
||||
|
||||
@Inject
|
||||
public RestAnalyzeAction(Settings settings, RestController controller, Client client) {
|
||||
super(settings, controller, client);
|
||||
|
@ -68,6 +80,8 @@ public class RestAnalyzeAction extends BaseRestHandler {
|
|||
analyzeRequest.tokenizer(request.param("tokenizer"));
|
||||
analyzeRequest.tokenFilters(request.paramAsStringArray("token_filters", request.paramAsStringArray("filters", analyzeRequest.tokenFilters())));
|
||||
analyzeRequest.charFilters(request.paramAsStringArray("char_filters", analyzeRequest.charFilters()));
|
||||
analyzeRequest.explain(request.paramAsBoolean("explain", false));
|
||||
analyzeRequest.attributes(request.paramAsStringArray("attributes", analyzeRequest.attributes()));
|
||||
|
||||
if (RestActions.hasBodyContent(request)) {
|
||||
XContentType type = RestActions.guessBodyContentType(request);
|
||||
|
@ -78,14 +92,14 @@ public class RestAnalyzeAction extends BaseRestHandler {
|
|||
}
|
||||
} else {
|
||||
// NOTE: if rest request with xcontent body has request parameters, the parameters does not override xcontent values
|
||||
buildFromContent(RestActions.getRestContent(request), analyzeRequest);
|
||||
buildFromContent(RestActions.getRestContent(request), analyzeRequest, parseFieldMatcher);
|
||||
}
|
||||
}
|
||||
|
||||
client.admin().indices().analyze(analyzeRequest, new RestToXContentListener<AnalyzeResponse>(channel));
|
||||
}
|
||||
|
||||
public static void buildFromContent(BytesReference content, AnalyzeRequest analyzeRequest) {
|
||||
public static void buildFromContent(BytesReference content, AnalyzeRequest analyzeRequest, ParseFieldMatcher parseFieldMatcher) {
|
||||
try (XContentParser parser = XContentHelper.createParser(content)) {
|
||||
if (parser.nextToken() != XContentParser.Token.START_OBJECT) {
|
||||
throw new IllegalArgumentException("Malforrmed content, must start with an object");
|
||||
|
@ -95,9 +109,9 @@ public class RestAnalyzeAction extends BaseRestHandler {
|
|||
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
|
||||
if (token == XContentParser.Token.FIELD_NAME) {
|
||||
currentFieldName = parser.currentName();
|
||||
} else if ("text".equals(currentFieldName) && token == XContentParser.Token.VALUE_STRING) {
|
||||
} else if (parseFieldMatcher.match(currentFieldName, Fields.TEXT) && token == XContentParser.Token.VALUE_STRING) {
|
||||
analyzeRequest.text(parser.text());
|
||||
} else if ("text".equals(currentFieldName) && token == XContentParser.Token.START_ARRAY) {
|
||||
} else if (parseFieldMatcher.match(currentFieldName, Fields.TEXT) && token == XContentParser.Token.START_ARRAY) {
|
||||
List<String> texts = new ArrayList<>();
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
if (token.isValue() == false) {
|
||||
|
@ -105,14 +119,14 @@ public class RestAnalyzeAction extends BaseRestHandler {
|
|||
}
|
||||
texts.add(parser.text());
|
||||
}
|
||||
analyzeRequest.text(texts.toArray(Strings.EMPTY_ARRAY));
|
||||
} else if ("analyzer".equals(currentFieldName) && token == XContentParser.Token.VALUE_STRING) {
|
||||
analyzeRequest.text(texts.toArray(new String[texts.size()]));
|
||||
} else if (parseFieldMatcher.match(currentFieldName, Fields.ANALYZER) && token == XContentParser.Token.VALUE_STRING) {
|
||||
analyzeRequest.analyzer(parser.text());
|
||||
} else if ("field".equals(currentFieldName) && token == XContentParser.Token.VALUE_STRING) {
|
||||
} else if (parseFieldMatcher.match(currentFieldName, Fields.FIELD) && token == XContentParser.Token.VALUE_STRING) {
|
||||
analyzeRequest.field(parser.text());
|
||||
} else if ("tokenizer".equals(currentFieldName) && token == XContentParser.Token.VALUE_STRING) {
|
||||
} else if (parseFieldMatcher.match(currentFieldName, Fields.TOKENIZER) && token == XContentParser.Token.VALUE_STRING) {
|
||||
analyzeRequest.tokenizer(parser.text());
|
||||
} else if (("token_filters".equals(currentFieldName) || "filters".equals(currentFieldName)) && token == XContentParser.Token.START_ARRAY) {
|
||||
} else if (parseFieldMatcher.match(currentFieldName, Fields.TOKEN_FILTERS) && token == XContentParser.Token.START_ARRAY) {
|
||||
List<String> filters = new ArrayList<>();
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
if (token.isValue() == false) {
|
||||
|
@ -120,8 +134,8 @@ public class RestAnalyzeAction extends BaseRestHandler {
|
|||
}
|
||||
filters.add(parser.text());
|
||||
}
|
||||
analyzeRequest.tokenFilters(filters.toArray(Strings.EMPTY_ARRAY));
|
||||
} else if ("char_filters".equals(currentFieldName) && token == XContentParser.Token.START_ARRAY) {
|
||||
analyzeRequest.tokenFilters(filters.toArray(new String[filters.size()]));
|
||||
} else if (parseFieldMatcher.match(currentFieldName, Fields.CHAR_FILTERS) && token == XContentParser.Token.START_ARRAY) {
|
||||
List<String> charFilters = new ArrayList<>();
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
if (token.isValue() == false) {
|
||||
|
@ -129,7 +143,18 @@ public class RestAnalyzeAction extends BaseRestHandler {
|
|||
}
|
||||
charFilters.add(parser.text());
|
||||
}
|
||||
analyzeRequest.tokenFilters(charFilters.toArray(Strings.EMPTY_ARRAY));
|
||||
analyzeRequest.charFilters(charFilters.toArray(new String[charFilters.size()]));
|
||||
} else if (parseFieldMatcher.match(currentFieldName, Fields.EXPLAIN) && token == XContentParser.Token.VALUE_BOOLEAN) {
|
||||
analyzeRequest.explain(parser.booleanValue());
|
||||
} else if (parseFieldMatcher.match(currentFieldName, Fields.ATTRIBUTES) && token == XContentParser.Token.START_ARRAY){
|
||||
List<String> attributes = new ArrayList<>();
|
||||
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
|
||||
if (token.isValue() == false) {
|
||||
throw new IllegalArgumentException(currentFieldName + " array element should only contain attribute name");
|
||||
}
|
||||
attributes.add(parser.text());
|
||||
}
|
||||
analyzeRequest.attributes(attributes.toArray(new String[attributes.size()]));
|
||||
} else {
|
||||
throw new IllegalArgumentException("Unknown parameter [" + currentFieldName + "] in request body or parameter is of the wrong type[" + token + "] ");
|
||||
}
|
||||
|
|
|
@ -22,11 +22,14 @@ import org.elasticsearch.action.admin.indices.alias.Alias;
|
|||
import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest;
|
||||
import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequestBuilder;
|
||||
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
|
||||
import org.elasticsearch.common.ParseFieldMatcher;
|
||||
import org.elasticsearch.common.bytes.BytesArray;
|
||||
import org.elasticsearch.common.bytes.BytesReference;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.rest.action.admin.indices.analyze.RestAnalyzeAction;
|
||||
import org.elasticsearch.test.ESIntegTestCase;
|
||||
import org.hamcrest.core.IsNull;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
|
@ -36,8 +39,10 @@ import static org.hamcrest.Matchers.equalTo;
|
|||
import static org.hamcrest.Matchers.hasSize;
|
||||
import static org.hamcrest.Matchers.instanceOf;
|
||||
import static org.hamcrest.Matchers.is;
|
||||
import static org.hamcrest.Matchers.notNullValue;
|
||||
import static org.hamcrest.Matchers.startsWith;
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
|
@ -201,7 +206,7 @@ public class AnalyzeActionIT extends ESIntegTestCase {
|
|||
|
||||
AnalyzeRequest analyzeRequest = new AnalyzeRequest("for test");
|
||||
|
||||
RestAnalyzeAction.buildFromContent(content, analyzeRequest);
|
||||
RestAnalyzeAction.buildFromContent(content, analyzeRequest, new ParseFieldMatcher(Settings.EMPTY));
|
||||
|
||||
assertThat(analyzeRequest.text().length, equalTo(1));
|
||||
assertThat(analyzeRequest.text(), equalTo(new String[]{"THIS IS A TEST"}));
|
||||
|
@ -213,7 +218,7 @@ public class AnalyzeActionIT extends ESIntegTestCase {
|
|||
AnalyzeRequest analyzeRequest = new AnalyzeRequest("for test");
|
||||
|
||||
try {
|
||||
RestAnalyzeAction.buildFromContent(new BytesArray("{invalid_json}"), analyzeRequest);
|
||||
RestAnalyzeAction.buildFromContent(new BytesArray("{invalid_json}"), analyzeRequest, new ParseFieldMatcher(Settings.EMPTY));
|
||||
fail("shouldn't get here");
|
||||
} catch (Exception e) {
|
||||
assertThat(e, instanceOf(IllegalArgumentException.class));
|
||||
|
@ -230,7 +235,7 @@ public class AnalyzeActionIT extends ESIntegTestCase {
|
|||
.endObject().bytes();
|
||||
|
||||
try {
|
||||
RestAnalyzeAction.buildFromContent(invalidContent, analyzeRequest);
|
||||
RestAnalyzeAction.buildFromContent(invalidContent, analyzeRequest, new ParseFieldMatcher(Settings.EMPTY));
|
||||
fail("shouldn't get here");
|
||||
} catch (Exception e) {
|
||||
assertThat(e, instanceOf(IllegalArgumentException.class));
|
||||
|
@ -267,4 +272,235 @@ public class AnalyzeActionIT extends ESIntegTestCase {
|
|||
|
||||
}
|
||||
|
||||
public void testDetailAnalyze() throws Exception {
|
||||
assertAcked(prepareCreate("test").addAlias(new Alias("alias"))
|
||||
.setSettings(
|
||||
settingsBuilder()
|
||||
.put("index.analysis.char_filter.my_mapping.type", "mapping")
|
||||
.putArray("index.analysis.char_filter.my_mapping.mappings", "PH=>F")
|
||||
.put("index.analysis.analyzer.test_analyzer.type", "custom")
|
||||
.put("index.analysis.analyzer.test_analyzer.position_increment_gap", "100")
|
||||
.put("index.analysis.analyzer.test_analyzer.tokenizer", "standard")
|
||||
.putArray("index.analysis.analyzer.test_analyzer.char_filter", "my_mapping")
|
||||
.putArray("index.analysis.analyzer.test_analyzer.filter", "snowball")));
|
||||
ensureGreen();
|
||||
|
||||
for (int i = 0; i < 10; i++) {
|
||||
AnalyzeResponse analyzeResponse = admin().indices().prepareAnalyze().setIndex(indexOrAlias()).setText("THIS IS A PHISH")
|
||||
.setExplain(true).setCharFilters("my_mapping").setTokenizer("keyword").setTokenFilters("lowercase").get();
|
||||
|
||||
assertThat(analyzeResponse.detail().analyzer(), IsNull.nullValue());
|
||||
//charfilters
|
||||
// global charfilter is not change text.
|
||||
assertThat(analyzeResponse.detail().charfilters().length, equalTo(1));
|
||||
assertThat(analyzeResponse.detail().charfilters()[0].getName(), equalTo("my_mapping"));
|
||||
assertThat(analyzeResponse.detail().charfilters()[0].getTexts().length, equalTo(1));
|
||||
assertThat(analyzeResponse.detail().charfilters()[0].getTexts()[0], equalTo("THIS IS A FISH"));
|
||||
//tokenizer
|
||||
assertThat(analyzeResponse.detail().tokenizer().getName(), equalTo("keyword"));
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens().length, equalTo(1));
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getTerm(), equalTo("THIS IS A FISH"));
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getStartOffset(), equalTo(0));
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getEndOffset(), equalTo(15));
|
||||
//tokenfilters
|
||||
assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(1));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("lowercase"));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(1));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getTerm(), equalTo("this is a fish"));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getPosition(), equalTo(0));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getStartOffset(), equalTo(0));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getEndOffset(), equalTo(15));
|
||||
}
|
||||
}
|
||||
|
||||
public void testDetailAnalyzeWithNoIndex() throws Exception {
|
||||
//analyzer only
|
||||
AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST")
|
||||
.setExplain(true).setAnalyzer("simple").get();
|
||||
|
||||
assertThat(analyzeResponse.detail().tokenizer(), IsNull.nullValue());
|
||||
assertThat(analyzeResponse.detail().tokenfilters(), IsNull.nullValue());
|
||||
assertThat(analyzeResponse.detail().charfilters(), IsNull.nullValue());
|
||||
assertThat(analyzeResponse.detail().analyzer().getName(), equalTo("simple"));
|
||||
assertThat(analyzeResponse.detail().analyzer().getTokens().length, equalTo(4));
|
||||
}
|
||||
|
||||
public void testDetailAnalyzeCustomAnalyzerWithNoIndex() throws Exception {
|
||||
//analyzer only
|
||||
AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST")
|
||||
.setExplain(true).setAnalyzer("simple").get();
|
||||
|
||||
assertThat(analyzeResponse.detail().tokenizer(), IsNull.nullValue());
|
||||
assertThat(analyzeResponse.detail().tokenfilters(), IsNull.nullValue());
|
||||
assertThat(analyzeResponse.detail().charfilters(), IsNull.nullValue());
|
||||
assertThat(analyzeResponse.detail().analyzer().getName(), equalTo("simple"));
|
||||
assertThat(analyzeResponse.detail().analyzer().getTokens().length, equalTo(4));
|
||||
|
||||
//custom analyzer
|
||||
analyzeResponse = client().admin().indices().prepareAnalyze("<text>THIS IS A TEST</text>")
|
||||
.setExplain(true).setCharFilters("html_strip").setTokenizer("keyword").setTokenFilters("lowercase").get();
|
||||
assertThat(analyzeResponse.detail().analyzer(), IsNull.nullValue());
|
||||
//charfilters
|
||||
// global charfilter is not change text.
|
||||
assertThat(analyzeResponse.detail().charfilters().length, equalTo(1));
|
||||
assertThat(analyzeResponse.detail().charfilters()[0].getName(), equalTo("html_strip"));
|
||||
assertThat(analyzeResponse.detail().charfilters()[0].getTexts().length, equalTo(1));
|
||||
assertThat(analyzeResponse.detail().charfilters()[0].getTexts()[0], equalTo("\nTHIS IS A TEST\n"));
|
||||
//tokenizer
|
||||
assertThat(analyzeResponse.detail().tokenizer().getName(), equalTo("keyword"));
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens().length, equalTo(1));
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getTerm(), equalTo("\nTHIS IS A TEST\n"));
|
||||
//tokenfilters
|
||||
assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(1));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("lowercase"));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(1));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getTerm(), equalTo("\nthis is a test\n"));
|
||||
|
||||
|
||||
//check other attributes
|
||||
analyzeResponse = client().admin().indices().prepareAnalyze("This is troubled")
|
||||
.setExplain(true).setTokenizer("standard").setTokenFilters("snowball").get();
|
||||
|
||||
assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(1));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("snowball"));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(3));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getTerm(), equalTo("troubl"));
|
||||
String[] expectedAttributesKey = {
|
||||
"bytes",
|
||||
"positionLength",
|
||||
"keyword"};
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getAttributes().size(), equalTo(expectedAttributesKey.length));
|
||||
Object extendedAttribute;
|
||||
|
||||
for (String key : expectedAttributesKey) {
|
||||
extendedAttribute = analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getAttributes().get(key);
|
||||
assertThat(extendedAttribute, notNullValue());
|
||||
}
|
||||
}
|
||||
|
||||
public void testDetailAnalyzeSpecifyAttributes() throws Exception {
|
||||
AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("This is troubled")
|
||||
.setExplain(true).setTokenizer("standard").setTokenFilters("snowball").setAttributes("keyword").get();
|
||||
|
||||
assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(1));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("snowball"));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(3));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getTerm(), equalTo("troubl"));
|
||||
String[] expectedAttributesKey = {
|
||||
"keyword"};
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getAttributes().size(), equalTo(expectedAttributesKey.length));
|
||||
Object extendedAttribute;
|
||||
|
||||
for (String key : expectedAttributesKey) {
|
||||
extendedAttribute = analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getAttributes().get(key);
|
||||
assertThat(extendedAttribute, notNullValue());
|
||||
}
|
||||
}
|
||||
|
||||
public void testDetailAnalyzeWithMultiValues() throws Exception {
|
||||
assertAcked(prepareCreate("test").addAlias(new Alias("alias")));
|
||||
ensureGreen();
|
||||
client().admin().indices().preparePutMapping("test")
|
||||
.setType("document").setSource("simple", "type=string,analyzer=simple,position_increment_gap=100").get();
|
||||
|
||||
String[] texts = new String[]{"THIS IS A TEST", "THE SECOND TEXT"};
|
||||
AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze().setIndex(indexOrAlias()).setText(texts)
|
||||
.setExplain(true).setField("simple").setText(texts).execute().get();
|
||||
|
||||
assertThat(analyzeResponse.detail().analyzer().getName(), equalTo("simple"));
|
||||
assertThat(analyzeResponse.detail().analyzer().getTokens().length, equalTo(7));
|
||||
AnalyzeResponse.AnalyzeToken token = analyzeResponse.detail().analyzer().getTokens()[3];
|
||||
|
||||
assertThat(token.getTerm(), equalTo("test"));
|
||||
assertThat(token.getPosition(), equalTo(3));
|
||||
assertThat(token.getStartOffset(), equalTo(10));
|
||||
assertThat(token.getEndOffset(), equalTo(14));
|
||||
|
||||
token = analyzeResponse.detail().analyzer().getTokens()[5];
|
||||
assertThat(token.getTerm(), equalTo("second"));
|
||||
assertThat(token.getPosition(), equalTo(105));
|
||||
assertThat(token.getStartOffset(), equalTo(19));
|
||||
assertThat(token.getEndOffset(), equalTo(25));
|
||||
}
|
||||
|
||||
public void testDetailAnalyzeWithMultiValuesWithCustomAnalyzer() throws Exception {
|
||||
assertAcked(prepareCreate("test").addAlias(new Alias("alias"))
|
||||
.setSettings(
|
||||
settingsBuilder()
|
||||
.put("index.analysis.char_filter.my_mapping.type", "mapping")
|
||||
.putArray("index.analysis.char_filter.my_mapping.mappings", "PH=>F")
|
||||
.put("index.analysis.analyzer.test_analyzer.type", "custom")
|
||||
.put("index.analysis.analyzer.test_analyzer.position_increment_gap", "100")
|
||||
.put("index.analysis.analyzer.test_analyzer.tokenizer", "standard")
|
||||
.putArray("index.analysis.analyzer.test_analyzer.char_filter", "my_mapping")
|
||||
.putArray("index.analysis.analyzer.test_analyzer.filter", "snowball", "lowercase")));
|
||||
ensureGreen();
|
||||
|
||||
client().admin().indices().preparePutMapping("test")
|
||||
.setType("document").setSource("simple", "type=string,analyzer=simple,position_increment_gap=100").get();
|
||||
|
||||
//only analyzer =
|
||||
String[] texts = new String[]{"this is a PHISH", "the troubled text"};
|
||||
AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze().setIndex(indexOrAlias()).setText(texts)
|
||||
.setExplain(true).setAnalyzer("test_analyzer").setText(texts).execute().get();
|
||||
|
||||
// charfilter
|
||||
assertThat(analyzeResponse.detail().charfilters().length, equalTo(1));
|
||||
assertThat(analyzeResponse.detail().charfilters()[0].getName(), equalTo("my_mapping"));
|
||||
assertThat(analyzeResponse.detail().charfilters()[0].getTexts().length, equalTo(2));
|
||||
assertThat(analyzeResponse.detail().charfilters()[0].getTexts()[0], equalTo("this is a FISH"));
|
||||
assertThat(analyzeResponse.detail().charfilters()[0].getTexts()[1], equalTo("the troubled text"));
|
||||
|
||||
// tokenizer
|
||||
assertThat(analyzeResponse.detail().tokenizer().getName(), equalTo("standard"));
|
||||
assertThat(analyzeResponse.detail().tokenizer().getTokens().length, equalTo(7));
|
||||
AnalyzeResponse.AnalyzeToken token = analyzeResponse.detail().tokenizer().getTokens()[3];
|
||||
|
||||
assertThat(token.getTerm(), equalTo("FISH"));
|
||||
assertThat(token.getPosition(), equalTo(3));
|
||||
assertThat(token.getStartOffset(), equalTo(10));
|
||||
assertThat(token.getEndOffset(), equalTo(15));
|
||||
|
||||
token = analyzeResponse.detail().tokenizer().getTokens()[5];
|
||||
assertThat(token.getTerm(), equalTo("troubled"));
|
||||
assertThat(token.getPosition(), equalTo(105));
|
||||
assertThat(token.getStartOffset(), equalTo(20));
|
||||
assertThat(token.getEndOffset(), equalTo(28));
|
||||
|
||||
// tokenfilter(snowball)
|
||||
assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(2));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("snowball"));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(7));
|
||||
token = analyzeResponse.detail().tokenfilters()[0].getTokens()[3];
|
||||
|
||||
assertThat(token.getTerm(), equalTo("FISH"));
|
||||
assertThat(token.getPosition(), equalTo(3));
|
||||
assertThat(token.getStartOffset(), equalTo(10));
|
||||
assertThat(token.getEndOffset(), equalTo(15));
|
||||
|
||||
token = analyzeResponse.detail().tokenfilters()[0].getTokens()[5];
|
||||
assertThat(token.getTerm(), equalTo("troubl"));
|
||||
assertThat(token.getPosition(), equalTo(105));
|
||||
assertThat(token.getStartOffset(), equalTo(20));
|
||||
assertThat(token.getEndOffset(), equalTo(28));
|
||||
|
||||
// tokenfilter(lowercase)
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[1].getName(), equalTo("lowercase"));
|
||||
assertThat(analyzeResponse.detail().tokenfilters()[1].getTokens().length, equalTo(7));
|
||||
token = analyzeResponse.detail().tokenfilters()[1].getTokens()[3];
|
||||
|
||||
assertThat(token.getTerm(), equalTo("fish"));
|
||||
assertThat(token.getPosition(), equalTo(3));
|
||||
assertThat(token.getStartOffset(), equalTo(10));
|
||||
assertThat(token.getEndOffset(), equalTo(15));
|
||||
|
||||
token = analyzeResponse.detail().tokenfilters()[0].getTokens()[5];
|
||||
assertThat(token.getTerm(), equalTo("troubl"));
|
||||
assertThat(token.getPosition(), equalTo(105));
|
||||
assertThat(token.getStartOffset(), equalTo(20));
|
||||
assertThat(token.getEndOffset(), equalTo(28));
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -100,3 +100,74 @@ provided it doesn't start with `{` :
|
|||
--------------------------------------------------
|
||||
curl -XGET 'localhost:9200/_analyze?tokenizer=keyword&token_filters=lowercase&char_filters=html_strip' -d 'this is a <b>test</b>'
|
||||
--------------------------------------------------
|
||||
|
||||
=== Explain Analyze
|
||||
|
||||
If you want to get more advanced details, set `explain` to `true` (defaults to `false`). It will output all token attributes for each token.
|
||||
You can filter token attributes you want to output by setting `attributes` option.
|
||||
|
||||
experimental[The format of the additional detail information is experimental and can change at any time]
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
GET test/_analyze
|
||||
{
|
||||
"tokenizer" : "standard",
|
||||
"token_filters" : ["snowball"],
|
||||
"text" : "detailed output",
|
||||
"explain" : true,
|
||||
"attributes" : ["keyword"] <1>
|
||||
}
|
||||
--------------------------------------------------
|
||||
// AUTOSENSE
|
||||
<1> Set "keyword" to output "keyword" attribute only
|
||||
|
||||
coming[2.0.0, body based parameters were added in 2.0.0]
|
||||
|
||||
The request returns the following result:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"detail" : {
|
||||
"custom_analyzer" : true,
|
||||
"charfilters" : [ ],
|
||||
"tokenizer" : {
|
||||
"name" : "standard",
|
||||
"tokens" : [ {
|
||||
"token" : "detailed",
|
||||
"start_offset" : 0,
|
||||
"end_offset" : 8,
|
||||
"type" : "<ALPHANUM>",
|
||||
"position" : 0
|
||||
}, {
|
||||
"token" : "output",
|
||||
"start_offset" : 9,
|
||||
"end_offset" : 15,
|
||||
"type" : "<ALPHANUM>",
|
||||
"position" : 1
|
||||
} ]
|
||||
},
|
||||
"tokenfilters" : [ {
|
||||
"name" : "snowball",
|
||||
"tokens" : [ {
|
||||
"token" : "detail",
|
||||
"start_offset" : 0,
|
||||
"end_offset" : 8,
|
||||
"type" : "<ALPHANUM>",
|
||||
"position" : 0,
|
||||
"keyword" : false <1>
|
||||
}, {
|
||||
"token" : "output",
|
||||
"start_offset" : 9,
|
||||
"end_offset" : 15,
|
||||
"type" : "<ALPHANUM>",
|
||||
"position" : 1,
|
||||
"keyword" : false <1>
|
||||
} ]
|
||||
} ]
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
<1> Output only "keyword" attribute, since specify "attributes" in the request.
|
||||
|
||||
|
|
|
@ -44,6 +44,14 @@
|
|||
"type" : "string",
|
||||
"description" : "The name of the tokenizer to use for the analysis"
|
||||
},
|
||||
"detail": {
|
||||
"type" : "boolean",
|
||||
"description" : "With `true`, outputs more advanced details. (default: false)"
|
||||
},
|
||||
"attributes": {
|
||||
"type" : "list",
|
||||
"description" : "A comma-separated list of token attributes to output, this parameter works only with `detail=true`"
|
||||
},
|
||||
"format": {
|
||||
"type": "enum",
|
||||
"options" : ["detailed","text"],
|
||||
|
|
|
@ -71,3 +71,31 @@ setup:
|
|||
- length: {tokens: 2 }
|
||||
- match: { tokens.0.token: foo bar }
|
||||
- match: { tokens.1.token: baz }
|
||||
---
|
||||
"Detail response with Analyzer":
|
||||
- do:
|
||||
indices.analyze:
|
||||
body: {"text": "This is troubled", "analyzer": standard, "explain": true}
|
||||
- length: { detail.analyzer.tokens: 3 }
|
||||
- match: { detail.analyzer.name: standard }
|
||||
- match: { detail.analyzer.tokens.0.token: this }
|
||||
- match: { detail.analyzer.tokens.1.token: is }
|
||||
- match: { detail.analyzer.tokens.2.token: troubled }
|
||||
---
|
||||
"Detail output spcified attribute":
|
||||
- do:
|
||||
indices.analyze:
|
||||
body: {"text": "<text>This is troubled</text>", "char_filters": ["html_strip"], "filters": ["snowball"], "tokenizer": standard, "explain": true, "attributes": ["keyword"]}
|
||||
- length: { detail.charfilters: 1 }
|
||||
- length: { detail.tokenizer.tokens: 3 }
|
||||
- length: { detail.tokenfilters.0.tokens: 3 }
|
||||
- match: { detail.tokenizer.name: standard }
|
||||
- match: { detail.tokenizer.tokens.0.token: This }
|
||||
- match: { detail.tokenizer.tokens.1.token: is }
|
||||
- match: { detail.tokenizer.tokens.2.token: troubled }
|
||||
- match: { detail.tokenfilters.0.name: snowball }
|
||||
- match: { detail.tokenfilters.0.tokens.0.token: This }
|
||||
- match: { detail.tokenfilters.0.tokens.1.token: is }
|
||||
- match: { detail.tokenfilters.0.tokens.2.token: troubl }
|
||||
- match: { detail.tokenfilters.0.tokens.2.keyword: false }
|
||||
|
||||
|
|
Loading…
Reference in New Issue