diff --git a/docs/reference/indices/analyze.asciidoc b/docs/reference/indices/analyze.asciidoc index 393aa4a9ad7..1026a113f1e 100644 --- a/docs/reference/indices/analyze.asciidoc +++ b/docs/reference/indices/analyze.asciidoc @@ -18,6 +18,19 @@ curl -XGET 'localhost:9200/_analyze' -d ' coming[2.0.0, body based parameters were added in 2.0.0] +If text parameter is provided as array of strings, it is analyzed as a multi-valued field. + +[source,js] +-------------------------------------------------- +curl -XGET 'localhost:9200/_analyze' -d ' +{ + "analyzer" : "standard", + "text" : ["this is a test", "the second text"] +}' +-------------------------------------------------- + +coming[2.0.0, body based parameters were added in 2.0.0] + Or by building a custom transient analyzer out of tokenizers, token filters and char filters. Token filters can use the shorter 'filters' parameter name: diff --git a/rest-api-spec/api/indices.analyze.json b/rest-api-spec/api/indices.analyze.json index 372693b794a..00b0ec13a5c 100644 --- a/rest-api-spec/api/indices.analyze.json +++ b/rest-api-spec/api/indices.analyze.json @@ -37,7 +37,7 @@ "description" : "With `true`, specify that a local shard should be used if available, with `false`, use a random shard (default: true)" }, "text": { - "type" : "string", + "type" : "list", "description" : "The text on which the analysis should be performed (when request body is not used)" }, "tokenizer": { diff --git a/rest-api-spec/test/indices.analyze/10_analyze.yaml b/rest-api-spec/test/indices.analyze/10_analyze.yaml index 16ab85a72e5..49420672861 100644 --- a/rest-api-spec/test/indices.analyze/10_analyze.yaml +++ b/rest-api-spec/test/indices.analyze/10_analyze.yaml @@ -63,3 +63,11 @@ setup: body: { "text": "Bar Foo", "filters": ["lowercase"], "tokenizer": keyword } - length: {tokens: 1 } - match: { tokens.0.token: bar foo } +--- +"Array text": + - do: + indices.analyze: + body: { "text": ["Foo Bar", "Baz"], "filters": ["lowercase"], "tokenizer": keyword } + - length: {tokens: 2 } + - match: { tokens.0.token: foo bar } + - match: { tokens.1.token: baz } diff --git a/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequest.java b/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequest.java index cabc75c559b..d631f8b8d0a 100644 --- a/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequest.java +++ b/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequest.java @@ -36,7 +36,7 @@ import static org.elasticsearch.action.ValidateActions.addValidationError; */ public class AnalyzeRequest extends SingleCustomOperationRequest { - private String text; + private String[] text; private String analyzer; @@ -61,11 +61,11 @@ public class AnalyzeRequest extends SingleCustomOperationRequest this.index(index); } - public String text() { + public String[] text() { return this.text; } - public AnalyzeRequest text(String text) { + public AnalyzeRequest text(String... text) { this.text = text; return this; } @@ -118,7 +118,7 @@ public class AnalyzeRequest extends SingleCustomOperationRequest @Override public ActionRequestValidationException validate() { ActionRequestValidationException validationException = super.validate(); - if (text == null) { + if (text == null || text.length == 0) { validationException = addValidationError("text is missing", validationException); } if (tokenFilters == null) { @@ -133,7 +133,7 @@ public class AnalyzeRequest extends SingleCustomOperationRequest @Override public void readFrom(StreamInput in) throws IOException { super.readFrom(in); - text = in.readString(); + text = in.readStringArray(); analyzer = in.readOptionalString(); tokenizer = in.readOptionalString(); tokenFilters = in.readStringArray(); @@ -144,7 +144,7 @@ public class AnalyzeRequest extends SingleCustomOperationRequest @Override public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); - out.writeString(text); + out.writeStringArray(text); out.writeOptionalString(analyzer); out.writeOptionalString(tokenizer); out.writeStringArray(tokenFilters); diff --git a/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequestBuilder.java b/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequestBuilder.java index b9d057f0cfe..2707419e304 100644 --- a/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequestBuilder.java +++ b/src/main/java/org/elasticsearch/action/admin/indices/analyze/AnalyzeRequestBuilder.java @@ -30,7 +30,7 @@ public class AnalyzeRequestBuilder extends SingleCustomOperationRequestBuilder tokens = Lists.newArrayList(); TokenStream stream = null; - try { - stream = analyzer.tokenStream(field, request.text()); - stream.reset(); - CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); - PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class); - OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class); - TypeAttribute type = stream.addAttribute(TypeAttribute.class); + int lastPosition = -1; + int lastOffset = 0; + for (String text : request.text()) { + try { + stream = analyzer.tokenStream(field, text); + stream.reset(); + CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); + PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class); + OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class); + TypeAttribute type = stream.addAttribute(TypeAttribute.class); + + while (stream.incrementToken()) { + int increment = posIncr.getPositionIncrement(); + if (increment > 0) { + lastPosition = lastPosition + increment; + } + tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), lastPosition, lastOffset + offset.startOffset(), lastOffset + offset.endOffset(), type.type())); - int position = -1; - while (stream.incrementToken()) { - int increment = posIncr.getPositionIncrement(); - if (increment > 0) { - position = position + increment; } - tokens.add(new AnalyzeResponse.AnalyzeToken(term.toString(), position, offset.startOffset(), offset.endOffset(), type.type())); - } - stream.end(); - } catch (IOException e) { - throw new ElasticsearchException("failed to analyze", e); - } finally { - if (stream != null) { - try { - stream.close(); - } catch (IOException e) { - // ignore - } - } - if (closeAnalyzer) { - analyzer.close(); + stream.end(); + lastOffset += offset.endOffset(); + lastPosition += posIncr.getPositionIncrement(); + + lastPosition += analyzer.getPositionIncrementGap(field); + lastOffset += analyzer.getOffsetGap(field); + + } catch (IOException e) { + throw new ElasticsearchException("failed to analyze", e); + } finally { + IOUtils.closeWhileHandlingException(stream); } } + if (closeAnalyzer) { + analyzer.close(); + } + return new AnalyzeResponse(tokens); } } diff --git a/src/main/java/org/elasticsearch/client/IndicesAdminClient.java b/src/main/java/org/elasticsearch/client/IndicesAdminClient.java index bd1af4586a7..239f48e3b60 100644 --- a/src/main/java/org/elasticsearch/client/IndicesAdminClient.java +++ b/src/main/java/org/elasticsearch/client/IndicesAdminClient.java @@ -587,6 +587,12 @@ public interface IndicesAdminClient extends ElasticsearchClient { */ AnalyzeRequestBuilder prepareAnalyze(String text); + /** + * Analyze text/texts. + * + */ + AnalyzeRequestBuilder prepareAnalyze(); + /** * Puts an index template. */ diff --git a/src/main/java/org/elasticsearch/client/support/AbstractClient.java b/src/main/java/org/elasticsearch/client/support/AbstractClient.java index edec9af848a..703b03f0dc5 100644 --- a/src/main/java/org/elasticsearch/client/support/AbstractClient.java +++ b/src/main/java/org/elasticsearch/client/support/AbstractClient.java @@ -1478,6 +1478,11 @@ public abstract class AbstractClient extends AbstractComponent implements Client return new AnalyzeRequestBuilder(this, AnalyzeAction.INSTANCE, null, text); } + @Override + public AnalyzeRequestBuilder prepareAnalyze() { + return new AnalyzeRequestBuilder(this, AnalyzeAction.INSTANCE); + } + @Override public ActionFuture putTemplate(final PutIndexTemplateRequest request) { return execute(PutIndexTemplateAction.INSTANCE, request); diff --git a/src/main/java/org/elasticsearch/rest/action/admin/indices/analyze/RestAnalyzeAction.java b/src/main/java/org/elasticsearch/rest/action/admin/indices/analyze/RestAnalyzeAction.java index 5ce5eaef4ac..2c7d2eed465 100644 --- a/src/main/java/org/elasticsearch/rest/action/admin/indices/analyze/RestAnalyzeAction.java +++ b/src/main/java/org/elasticsearch/rest/action/admin/indices/analyze/RestAnalyzeAction.java @@ -22,6 +22,7 @@ import com.google.common.collect.Lists; import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest; import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse; import org.elasticsearch.client.Client; +import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.settings.Settings; @@ -55,10 +56,10 @@ public class RestAnalyzeAction extends BaseRestHandler { @Override public void handleRequest(final RestRequest request, final RestChannel channel, final Client client) { - String text = request.param("text"); + String[] texts = request.paramAsStringArrayOrEmptyIfAll("text"); AnalyzeRequest analyzeRequest = new AnalyzeRequest(request.param("index")); - analyzeRequest.text(text); + analyzeRequest.text(texts); analyzeRequest.preferLocal(request.paramAsBoolean("prefer_local", analyzeRequest.preferLocalShard())); analyzeRequest.analyzer(request.param("analyzer")); analyzeRequest.field(request.param("field")); @@ -69,9 +70,9 @@ public class RestAnalyzeAction extends BaseRestHandler { if (RestActions.hasBodyContent(request)) { XContentType type = RestActions.guessBodyContentType(request); if (type == null) { - if (text == null) { - text = RestActions.getRestContent(request).toUtf8(); - analyzeRequest.text(text); + if (texts == null || texts.length == 0) { + texts = new String[]{ RestActions.getRestContent(request).toUtf8() }; + analyzeRequest.text(texts); } } else { // NOTE: if rest request with xcontent body has request parameters, the parameters does not override xcontent values @@ -95,7 +96,16 @@ public class RestAnalyzeAction extends BaseRestHandler { } else if ("prefer_local".equals(currentFieldName) && token == XContentParser.Token.VALUE_BOOLEAN) { analyzeRequest.preferLocal(parser.booleanValue()); } else if ("text".equals(currentFieldName) && token == XContentParser.Token.VALUE_STRING) { - analyzeRequest.text(parser.text()); + analyzeRequest.text(parser.text()); + } else if ("text".equals(currentFieldName) && token == XContentParser.Token.START_ARRAY) { + List texts = Lists.newArrayList(); + while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { + if (token.isValue() == false) { + throw new IllegalArgumentException(currentFieldName + " array element should only contain text"); + } + texts.add(parser.text()); + } + analyzeRequest.text(texts.toArray(Strings.EMPTY_ARRAY)); } else if ("analyzer".equals(currentFieldName) && token == XContentParser.Token.VALUE_STRING) { analyzeRequest.analyzer(parser.text()); } else if ("field".equals(currentFieldName) && token == XContentParser.Token.VALUE_STRING) { @@ -110,7 +120,7 @@ public class RestAnalyzeAction extends BaseRestHandler { } filters.add(parser.text()); } - analyzeRequest.tokenFilters(filters.toArray(new String[0])); + analyzeRequest.tokenFilters(filters.toArray(Strings.EMPTY_ARRAY)); } else if ("char_filters".equals(currentFieldName) && token == XContentParser.Token.START_ARRAY) { List charFilters = Lists.newArrayList(); while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { @@ -119,7 +129,7 @@ public class RestAnalyzeAction extends BaseRestHandler { } charFilters.add(parser.text()); } - analyzeRequest.tokenFilters(charFilters.toArray(new String[0])); + analyzeRequest.tokenFilters(charFilters.toArray(Strings.EMPTY_ARRAY)); } else { throw new IllegalArgumentException("Unknown parameter [" + currentFieldName + "] in request body or parameter is of the wrong type[" + token + "] "); } diff --git a/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionTests.java b/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionTests.java index bf169d254e6..436483b6869 100644 --- a/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionTests.java +++ b/src/test/java/org/elasticsearch/indices/analyze/AnalyzeActionTests.java @@ -158,18 +158,7 @@ public class AnalyzeActionTests extends ElasticsearchIntegrationTest { ensureGreen(); client().admin().indices().preparePutMapping("test") - .setType("document").setSource( - "{\n" + - " \"document\":{\n" + - " \"properties\":{\n" + - " \"simple\":{\n" + - " \"type\":\"string\",\n" + - " \"analyzer\": \"simple\"\n" + - " }\n" + - " }\n" + - " }\n" + - "}" - ).get(); + .setType("document").setSource("simple", "type=string,analyzer=simple").get(); for (int i = 0; i < 10; i++) { final AnalyzeRequestBuilder requestBuilder = client().admin().indices().prepareAnalyze("THIS IS A TEST"); @@ -220,7 +209,8 @@ public class AnalyzeActionTests extends ElasticsearchIntegrationTest { RestAnalyzeAction.buildFromContent(content, analyzeRequest); - assertThat(analyzeRequest.text(), equalTo("THIS IS A TEST")); + assertThat(analyzeRequest.text().length, equalTo(1)); + assertThat(analyzeRequest.text(), equalTo(new String[]{"THIS IS A TEST"})); assertThat(analyzeRequest.tokenizer(), equalTo("keyword")); assertThat(analyzeRequest.tokenFilters(), equalTo(new String[]{"lowercase"})); } @@ -239,7 +229,6 @@ public class AnalyzeActionTests extends ElasticsearchIntegrationTest { } } - @Test public void testParseXContentForAnalyzeRequestWithUnknownParamThrowsException() throws Exception { AnalyzeRequest analyzeRequest = new AnalyzeRequest("for test"); @@ -258,4 +247,35 @@ public class AnalyzeActionTests extends ElasticsearchIntegrationTest { } } + @Test + public void analyzerWithMultiValues() throws Exception { + + assertAcked(prepareCreate("test").addAlias(new Alias("alias"))); + ensureGreen(); + + client().admin().indices().preparePutMapping("test") + .setType("document").setSource("simple", "type=string,analyzer=simple,position_offset_gap=100").get(); + + String[] texts = new String[]{"THIS IS A TEST", "THE SECOND TEXT"}; + + final AnalyzeRequestBuilder requestBuilder = client().admin().indices().prepareAnalyze(); + requestBuilder.setText(texts); + requestBuilder.setIndex(indexOrAlias()); + requestBuilder.setField("simple"); + AnalyzeResponse analyzeResponse = requestBuilder.get(); + assertThat(analyzeResponse.getTokens().size(), equalTo(7)); + AnalyzeResponse.AnalyzeToken token = analyzeResponse.getTokens().get(3); + assertThat(token.getTerm(), equalTo("test")); + assertThat(token.getPosition(), equalTo(3)); + assertThat(token.getStartOffset(), equalTo(10)); + assertThat(token.getEndOffset(), equalTo(14)); + + token = analyzeResponse.getTokens().get(5); + assertThat(token.getTerm(), equalTo("second")); + assertThat(token.getPosition(), equalTo(105)); + assertThat(token.getStartOffset(), equalTo(19)); + assertThat(token.getEndOffset(), equalTo(25)); + + } + }