Remove ldjson support and document ndjson for bulk/msearch (#23049)

This commit removes support for the `application/x-ldjson` Content-Type header as this was only used in the first draft
of the spec and had very little uptake. Additionally, the docs for bulk and msearch have been updated to specifically
call out ndjson and mention that the newline character may be preceded by a carriage return.

Finally, the bulk request handling of the carriage return has been improved to remove this character from the source.

Closes #23025
This commit is contained in:
Jay Modi 2017-02-08 11:55:50 -05:00 committed by GitHub
parent 40f40d7676
commit 7f3769c745
7 changed files with 37 additions and 24 deletions

View File

@ -438,23 +438,25 @@ public class BulkRequest extends ActionRequest implements CompositeIndicesReques
if ("index".equals(action)) { if ("index".equals(action)) {
if (opType == null) { if (opType == null) {
internalAdd(new IndexRequest(index, type, id).routing(routing).parent(parent).version(version).versionType(versionType) internalAdd(new IndexRequest(index, type, id).routing(routing).parent(parent).version(version).versionType(versionType)
.setPipeline(pipeline).source(data.slice(from, nextMarker - from), xContentType), payload); .setPipeline(pipeline)
.source(sliceTrimmingCarriageReturn(data, from, nextMarker,xContentType), xContentType), payload);
} else { } else {
internalAdd(new IndexRequest(index, type, id).routing(routing).parent(parent).version(version).versionType(versionType) internalAdd(new IndexRequest(index, type, id).routing(routing).parent(parent).version(version).versionType(versionType)
.create("create".equals(opType)).setPipeline(pipeline) .create("create".equals(opType)).setPipeline(pipeline)
.source(data.slice(from, nextMarker - from), xContentType), payload); .source(sliceTrimmingCarriageReturn(data, from, nextMarker, xContentType), xContentType), payload);
} }
} else if ("create".equals(action)) { } else if ("create".equals(action)) {
internalAdd(new IndexRequest(index, type, id).routing(routing).parent(parent).version(version).versionType(versionType) internalAdd(new IndexRequest(index, type, id).routing(routing).parent(parent).version(version).versionType(versionType)
.create(true).setPipeline(pipeline) .create(true).setPipeline(pipeline)
.source(data.slice(from, nextMarker - from), xContentType), payload); .source(sliceTrimmingCarriageReturn(data, from, nextMarker, xContentType), xContentType), payload);
} else if ("update".equals(action)) { } else if ("update".equals(action)) {
UpdateRequest updateRequest = new UpdateRequest(index, type, id).routing(routing).parent(parent).retryOnConflict(retryOnConflict) UpdateRequest updateRequest = new UpdateRequest(index, type, id).routing(routing).parent(parent).retryOnConflict(retryOnConflict)
.version(version).versionType(versionType) .version(version).versionType(versionType)
.routing(routing) .routing(routing)
.parent(parent); .parent(parent);
// EMPTY is safe here because we never call namedObject // EMPTY is safe here because we never call namedObject
try (XContentParser sliceParser = xContent.createParser(NamedXContentRegistry.EMPTY, data.slice(from, nextMarker - from))) { try (XContentParser sliceParser = xContent.createParser(NamedXContentRegistry.EMPTY,
sliceTrimmingCarriageReturn(data, from, nextMarker, xContentType))) {
updateRequest.fromXContent(sliceParser); updateRequest.fromXContent(sliceParser);
} }
if (fetchSourceContext != null) { if (fetchSourceContext != null) {
@ -485,6 +487,20 @@ public class BulkRequest extends ActionRequest implements CompositeIndicesReques
return this; return this;
} }
/**
* Returns the sliced {@link BytesReference}. If the {@link XContentType} is JSON, the byte preceding the marker is checked to see
* if it is a carriage return and if so, the BytesReference is sliced so that the carriage return is ignored
*/
private BytesReference sliceTrimmingCarriageReturn(BytesReference bytesReference, int from, int nextMarker, XContentType xContentType) {
final int length;
if (XContentType.JSON == xContentType && bytesReference.get(nextMarker - 1) == (byte) '\r') {
length = nextMarker - from - 1;
} else {
length = nextMarker - from;
}
return bytesReference.slice(from, length);
}
/** /**
* Sets the number of shard copies that must be active before proceeding with the write. * Sets the number of shard copies that must be active before proceeding with the write.
* See {@link ReplicationRequest#waitForActiveShards(ActiveShardCount)} for details. * See {@link ReplicationRequest#waitForActiveShards(ActiveShardCount)} for details.

View File

@ -249,10 +249,8 @@ public class RestController extends AbstractComponent {
"in a supported format."); "in a supported format.");
} else if (restHandler != null && restHandler.supportsContentStream() && restRequest.header("Content-Type") != null) { } else if (restHandler != null && restHandler.supportsContentStream() && restRequest.header("Content-Type") != null) {
final String lowercaseMediaType = restRequest.header("Content-Type").toLowerCase(Locale.ROOT); final String lowercaseMediaType = restRequest.header("Content-Type").toLowerCase(Locale.ROOT);
// we also support line-delimited JSON, which isn't official and has a few variations // we also support newline delimited JSON: http://specs.okfnlabs.org/ndjson/
// http://specs.okfnlabs.org/ndjson/ if (lowercaseMediaType.equals("application/x-ndjson")) {
// https://github.com/ndjson/ndjson-spec/blob/48ea03cea6796b614cfbff4d4eb921f0b1d35c26/specification.md
if (lowercaseMediaType.equals("application/x-ldjson") || lowercaseMediaType.equals("application/x-ndjson")) {
restRequest.setXContentType(XContentType.JSON); restRequest.setXContentType(XContentType.JSON);
} else if (isContentTypeRequired) { } else if (isContentTypeRequired) {
return false; return false;

View File

@ -181,9 +181,7 @@ public abstract class RestRequest implements ToXContent.Params {
/** /**
* Sets the {@link XContentType} * Sets the {@link XContentType}
* @deprecated this is only used to allow BWC with content-type detection
*/ */
@Deprecated
final void setXContentType(XContentType xContentType) { final void setXContentType(XContentType xContentType) {
this.xContentType.set(xContentType); this.xContentType.set(xContentType);
} }

View File

@ -19,7 +19,6 @@
package org.elasticsearch.action.bulk; package org.elasticsearch.action.bulk;
import org.apache.lucene.util.Constants;
import org.elasticsearch.action.ActionRequestValidationException; import org.elasticsearch.action.ActionRequestValidationException;
import org.elasticsearch.action.DocWriteRequest; import org.elasticsearch.action.DocWriteRequest;
import org.elasticsearch.action.delete.DeleteRequest; import org.elasticsearch.action.delete.DeleteRequest;
@ -28,7 +27,6 @@ import org.elasticsearch.action.support.WriteRequest.RefreshPolicy;
import org.elasticsearch.action.update.UpdateRequest; import org.elasticsearch.action.update.UpdateRequest;
import org.elasticsearch.client.Requests; import org.elasticsearch.client.Requests;
import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.io.stream.BytesStreamOutput; import org.elasticsearch.common.io.stream.BytesStreamOutput;
@ -57,10 +55,6 @@ import static org.hamcrest.Matchers.notNullValue;
public class BulkRequestTests extends ESTestCase { public class BulkRequestTests extends ESTestCase {
public void testSimpleBulk1() throws Exception { public void testSimpleBulk1() throws Exception {
String bulkAction = copyToStringFromClasspath("/org/elasticsearch/action/bulk/simple-bulk.json"); String bulkAction = copyToStringFromClasspath("/org/elasticsearch/action/bulk/simple-bulk.json");
// translate Windows line endings (\r\n) to standard ones (\n)
if (Constants.WINDOWS) {
bulkAction = Strings.replace(bulkAction, "\r\n", "\n");
}
BulkRequest bulkRequest = new BulkRequest(); BulkRequest bulkRequest = new BulkRequest();
bulkRequest.add(bulkAction.getBytes(StandardCharsets.UTF_8), 0, bulkAction.length(), null, null, XContentType.JSON); bulkRequest.add(bulkAction.getBytes(StandardCharsets.UTF_8), 0, bulkAction.length(), null, null, XContentType.JSON);
assertThat(bulkRequest.numberOfActions(), equalTo(3)); assertThat(bulkRequest.numberOfActions(), equalTo(3));
@ -74,7 +68,7 @@ public class BulkRequestTests extends ESTestCase {
BulkRequest bulkRequest = new BulkRequest(); BulkRequest bulkRequest = new BulkRequest();
bulkRequest.add(bulkAction.getBytes(StandardCharsets.UTF_8), 0, bulkAction.length(), null, null, XContentType.JSON); bulkRequest.add(bulkAction.getBytes(StandardCharsets.UTF_8), 0, bulkAction.length(), null, null, XContentType.JSON);
assertThat(bulkRequest.numberOfActions(), equalTo(1)); assertThat(bulkRequest.numberOfActions(), equalTo(1));
assertThat(((IndexRequest) bulkRequest.requests().get(0)).source(), equalTo(new BytesArray("{ \"field1\" : \"value1\" }\r"))); assertThat(((IndexRequest) bulkRequest.requests().get(0)).source(), equalTo(new BytesArray("{ \"field1\" : \"value1\" }")));
Map<String, Object> sourceMap = XContentHelper.convertToMap(((IndexRequest) bulkRequest.requests().get(0)).source(), Map<String, Object> sourceMap = XContentHelper.convertToMap(((IndexRequest) bulkRequest.requests().get(0)).source(),
false, XContentType.JSON).v2(); false, XContentType.JSON).v2();
assertEquals("value1", sourceMap.get("field1")); assertEquals("value1", sourceMap.get("field1"));

View File

@ -312,7 +312,7 @@ public class RestControllerTests extends ESTestCase {
} }
public void testDispatchWorksWithNewlineDelimitedJson() { public void testDispatchWorksWithNewlineDelimitedJson() {
final String mimeType = randomFrom("application/x-ldjson", "application/x-ndjson"); final String mimeType = "application/x-ndjson";
String content = randomAsciiOfLengthBetween(1, BREAKER_LIMIT.bytesAsInt()); String content = randomAsciiOfLengthBetween(1, BREAKER_LIMIT.bytesAsInt());
FakeRestRequest fakeRestRequest = new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY) FakeRestRequest fakeRestRequest = new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY)
.withContent(new BytesArray(content), null).withPath("/foo") .withContent(new BytesArray(content), null).withPath("/foo")

View File

@ -21,8 +21,8 @@ Python::
********************************************* *********************************************
The REST API endpoint is `/_bulk`, and it expects the following JSON The REST API endpoint is `/_bulk`, and it expects the following newline delimited JSON
structure: (NDJSON) structure:
[source,js] [source,js]
-------------------------------------------------- --------------------------------------------------
@ -36,7 +36,9 @@ optional_source\n
-------------------------------------------------- --------------------------------------------------
// NOTCONSOLE // NOTCONSOLE
*NOTE*: the final line of data must end with a newline character `\n`. *NOTE*: the final line of data must end with a newline character `\n`. Each newline character
may be preceded by a carriage return `\r`. When sending requests to this endpoint the
`Content-Type` header should be set to `application/x-ndjson`.
The possible actions are `index`, `create`, `delete` and `update`. The possible actions are `index`, `create`, `delete` and `update`.
`index` and `create` expect a source on the next `index` and `create` expect a source on the next

View File

@ -4,9 +4,10 @@
The multi search API allows to execute several search requests within The multi search API allows to execute several search requests within
the same API. The endpoint for it is `_msearch`. the same API. The endpoint for it is `_msearch`.
The format of the request is similar to the bulk API format, and the The format of the request is similar to the bulk API format and makes
structure is as follows (the structure is specifically optimized to use of the newline delimited JSON (NDJSON) format. the structure is as
reduce parsing if a specific search ends up redirected to another node): follows (the structure is specifically optimized to reduce parsing if
a specific search ends up redirected to another node):
[source,js] [source,js]
-------------------------------------------------- --------------------------------------------------
@ -17,6 +18,10 @@ body\n
-------------------------------------------------- --------------------------------------------------
// NOTCONSOLE // NOTCONSOLE
*NOTE*: the final line of data must end with a newline character `\n`. Each newline character
may be preceded by a carriage return `\r`. When sending requests to this endpoint the
`Content-Type` header should be set to `application/x-ndjson`.
The header part includes which index / indices to search on, optional The header part includes which index / indices to search on, optional
(mapping) types to search on, the `search_type`, `preference`, and (mapping) types to search on, the `search_type`, `preference`, and
`routing`. The body includes the typical search body request (including `routing`. The body includes the typical search body request (including