Remove ldjson support and document ndjson for bulk/msearch (#23049)

This commit removes support for the `application/x-ldjson` Content-Type header as this was only used in the first draft
of the spec and had very little uptake. Additionally, the docs for bulk and msearch have been updated to specifically
call out ndjson and mention that the newline character may be preceded by a carriage return.

Finally, the bulk request handling of the carriage return has been improved to remove this character from the source.

Closes #23025
This commit is contained in:
Jay Modi 2017-02-08 11:55:50 -05:00 committed by GitHub
parent 40f40d7676
commit 7f3769c745
7 changed files with 37 additions and 24 deletions

View File

@ -438,23 +438,25 @@ public class BulkRequest extends ActionRequest implements CompositeIndicesReques
if ("index".equals(action)) {
if (opType == null) {
internalAdd(new IndexRequest(index, type, id).routing(routing).parent(parent).version(version).versionType(versionType)
.setPipeline(pipeline).source(data.slice(from, nextMarker - from), xContentType), payload);
.setPipeline(pipeline)
.source(sliceTrimmingCarriageReturn(data, from, nextMarker,xContentType), xContentType), payload);
} else {
internalAdd(new IndexRequest(index, type, id).routing(routing).parent(parent).version(version).versionType(versionType)
.create("create".equals(opType)).setPipeline(pipeline)
.source(data.slice(from, nextMarker - from), xContentType), payload);
.source(sliceTrimmingCarriageReturn(data, from, nextMarker, xContentType), xContentType), payload);
}
} else if ("create".equals(action)) {
internalAdd(new IndexRequest(index, type, id).routing(routing).parent(parent).version(version).versionType(versionType)
.create(true).setPipeline(pipeline)
.source(data.slice(from, nextMarker - from), xContentType), payload);
.source(sliceTrimmingCarriageReturn(data, from, nextMarker, xContentType), xContentType), payload);
} else if ("update".equals(action)) {
UpdateRequest updateRequest = new UpdateRequest(index, type, id).routing(routing).parent(parent).retryOnConflict(retryOnConflict)
.version(version).versionType(versionType)
.routing(routing)
.parent(parent);
// EMPTY is safe here because we never call namedObject
try (XContentParser sliceParser = xContent.createParser(NamedXContentRegistry.EMPTY, data.slice(from, nextMarker - from))) {
try (XContentParser sliceParser = xContent.createParser(NamedXContentRegistry.EMPTY,
sliceTrimmingCarriageReturn(data, from, nextMarker, xContentType))) {
updateRequest.fromXContent(sliceParser);
}
if (fetchSourceContext != null) {
@ -485,6 +487,20 @@ public class BulkRequest extends ActionRequest implements CompositeIndicesReques
return this;
}
/**
* Returns the sliced {@link BytesReference}. If the {@link XContentType} is JSON, the byte preceding the marker is checked to see
* if it is a carriage return and if so, the BytesReference is sliced so that the carriage return is ignored
*/
private BytesReference sliceTrimmingCarriageReturn(BytesReference bytesReference, int from, int nextMarker, XContentType xContentType) {
final int length;
if (XContentType.JSON == xContentType && bytesReference.get(nextMarker - 1) == (byte) '\r') {
length = nextMarker - from - 1;
} else {
length = nextMarker - from;
}
return bytesReference.slice(from, length);
}
/**
* Sets the number of shard copies that must be active before proceeding with the write.
* See {@link ReplicationRequest#waitForActiveShards(ActiveShardCount)} for details.

View File

@ -249,10 +249,8 @@ public class RestController extends AbstractComponent {
"in a supported format.");
} else if (restHandler != null && restHandler.supportsContentStream() && restRequest.header("Content-Type") != null) {
final String lowercaseMediaType = restRequest.header("Content-Type").toLowerCase(Locale.ROOT);
// we also support line-delimited JSON, which isn't official and has a few variations
// http://specs.okfnlabs.org/ndjson/
// https://github.com/ndjson/ndjson-spec/blob/48ea03cea6796b614cfbff4d4eb921f0b1d35c26/specification.md
if (lowercaseMediaType.equals("application/x-ldjson") || lowercaseMediaType.equals("application/x-ndjson")) {
// we also support newline delimited JSON: http://specs.okfnlabs.org/ndjson/
if (lowercaseMediaType.equals("application/x-ndjson")) {
restRequest.setXContentType(XContentType.JSON);
} else if (isContentTypeRequired) {
return false;

View File

@ -181,9 +181,7 @@ public abstract class RestRequest implements ToXContent.Params {
/**
* Sets the {@link XContentType}
* @deprecated this is only used to allow BWC with content-type detection
*/
@Deprecated
final void setXContentType(XContentType xContentType) {
this.xContentType.set(xContentType);
}

View File

@ -19,7 +19,6 @@
package org.elasticsearch.action.bulk;
import org.apache.lucene.util.Constants;
import org.elasticsearch.action.ActionRequestValidationException;
import org.elasticsearch.action.DocWriteRequest;
import org.elasticsearch.action.delete.DeleteRequest;
@ -28,7 +27,6 @@ import org.elasticsearch.action.support.WriteRequest.RefreshPolicy;
import org.elasticsearch.action.update.UpdateRequest;
import org.elasticsearch.client.Requests;
import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.bytes.BytesReference;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
@ -57,10 +55,6 @@ import static org.hamcrest.Matchers.notNullValue;
public class BulkRequestTests extends ESTestCase {
public void testSimpleBulk1() throws Exception {
String bulkAction = copyToStringFromClasspath("/org/elasticsearch/action/bulk/simple-bulk.json");
// translate Windows line endings (\r\n) to standard ones (\n)
if (Constants.WINDOWS) {
bulkAction = Strings.replace(bulkAction, "\r\n", "\n");
}
BulkRequest bulkRequest = new BulkRequest();
bulkRequest.add(bulkAction.getBytes(StandardCharsets.UTF_8), 0, bulkAction.length(), null, null, XContentType.JSON);
assertThat(bulkRequest.numberOfActions(), equalTo(3));
@ -74,7 +68,7 @@ public class BulkRequestTests extends ESTestCase {
BulkRequest bulkRequest = new BulkRequest();
bulkRequest.add(bulkAction.getBytes(StandardCharsets.UTF_8), 0, bulkAction.length(), null, null, XContentType.JSON);
assertThat(bulkRequest.numberOfActions(), equalTo(1));
assertThat(((IndexRequest) bulkRequest.requests().get(0)).source(), equalTo(new BytesArray("{ \"field1\" : \"value1\" }\r")));
assertThat(((IndexRequest) bulkRequest.requests().get(0)).source(), equalTo(new BytesArray("{ \"field1\" : \"value1\" }")));
Map<String, Object> sourceMap = XContentHelper.convertToMap(((IndexRequest) bulkRequest.requests().get(0)).source(),
false, XContentType.JSON).v2();
assertEquals("value1", sourceMap.get("field1"));

View File

@ -312,7 +312,7 @@ public class RestControllerTests extends ESTestCase {
}
public void testDispatchWorksWithNewlineDelimitedJson() {
final String mimeType = randomFrom("application/x-ldjson", "application/x-ndjson");
final String mimeType = "application/x-ndjson";
String content = randomAsciiOfLengthBetween(1, BREAKER_LIMIT.bytesAsInt());
FakeRestRequest fakeRestRequest = new FakeRestRequest.Builder(NamedXContentRegistry.EMPTY)
.withContent(new BytesArray(content), null).withPath("/foo")

View File

@ -21,8 +21,8 @@ Python::
*********************************************
The REST API endpoint is `/_bulk`, and it expects the following JSON
structure:
The REST API endpoint is `/_bulk`, and it expects the following newline delimited JSON
(NDJSON) structure:
[source,js]
--------------------------------------------------
@ -36,7 +36,9 @@ optional_source\n
--------------------------------------------------
// NOTCONSOLE
*NOTE*: the final line of data must end with a newline character `\n`.
*NOTE*: the final line of data must end with a newline character `\n`. Each newline character
may be preceded by a carriage return `\r`. When sending requests to this endpoint the
`Content-Type` header should be set to `application/x-ndjson`.
The possible actions are `index`, `create`, `delete` and `update`.
`index` and `create` expect a source on the next

View File

@ -4,9 +4,10 @@
The multi search API allows to execute several search requests within
the same API. The endpoint for it is `_msearch`.
The format of the request is similar to the bulk API format, and the
structure is as follows (the structure is specifically optimized to
reduce parsing if a specific search ends up redirected to another node):
The format of the request is similar to the bulk API format and makes
use of the newline delimited JSON (NDJSON) format. the structure is as
follows (the structure is specifically optimized to reduce parsing if
a specific search ends up redirected to another node):
[source,js]
--------------------------------------------------
@ -17,6 +18,10 @@ body\n
--------------------------------------------------
// NOTCONSOLE
*NOTE*: the final line of data must end with a newline character `\n`. Each newline character
may be preceded by a carriage return `\r`. When sending requests to this endpoint the
`Content-Type` header should be set to `application/x-ndjson`.
The header part includes which index / indices to search on, optional
(mapping) types to search on, the `search_type`, `preference`, and
`routing`. The body includes the typical search body request (including