Fix source filtering in reindex-from-remote (#22514)

Reindex-from-remote was accepting source filtering in the request
but ignoring it and setting `_source=true` on the search URI. This
fixes the filtering so it is piped through to the remote node and
adds tests for that.

Closes #22507
This commit is contained in:
Nik Everett 2017-01-10 09:00:12 -05:00 committed by GitHub
parent 28273e0a52
commit 5ef78fd015
7 changed files with 140 additions and 18 deletions

View File

@ -71,6 +71,9 @@ public class ReindexRequest extends AbstractBulkIndexByScrollRequest<ReindexRequ
if (getSearchRequest().indices() == null || getSearchRequest().indices().length == 0) { if (getSearchRequest().indices() == null || getSearchRequest().indices().length == 0) {
e = addValidationError("use _all if you really want to copy from all existing indexes", e); e = addValidationError("use _all if you really want to copy from all existing indexes", e);
} }
if (getSearchRequest().source().fetchSource() != null && getSearchRequest().source().fetchSource().fetchSource() == false) {
e = addValidationError("_source:false is not supported in this context", e);
}
/* /*
* Note that we don't call index's validator - it won't work because * Note that we don't call index's validator - it won't work because
* we'll be filling in portions of it as we receive the docs. But we can * we'll be filling in portions of it as we receive the docs. But we can

View File

@ -102,27 +102,30 @@ final class RemoteRequestBuilders {
String storedFieldsParamName = remoteVersion.before(Version.V_5_0_0_alpha4) ? "fields" : "stored_fields"; String storedFieldsParamName = remoteVersion.before(Version.V_5_0_0_alpha4) ? "fields" : "stored_fields";
params.put(storedFieldsParamName, fields.toString()); params.put(storedFieldsParamName, fields.toString());
} }
// We always want the _source document and this will force it to be returned.
params.put("_source", "true");
return params; return params;
} }
static HttpEntity initialSearchEntity(BytesReference query) { static HttpEntity initialSearchEntity(SearchRequest searchRequest, BytesReference query) {
// EMPTY is safe here because we're not calling namedObject // EMPTY is safe here because we're not calling namedObject
try (XContentBuilder entity = JsonXContent.contentBuilder(); try (XContentBuilder entity = JsonXContent.contentBuilder();
XContentParser queryParser = XContentHelper.createParser(NamedXContentRegistry.EMPTY, query)) { XContentParser queryParser = XContentHelper.createParser(NamedXContentRegistry.EMPTY, query)) {
entity.startObject(); entity.startObject();
entity.field("query");
/* entity.field("query"); {
* We're intentionally a bit paranoid here - copying the query as xcontent rather than writing a raw field. We don't want poorly /* We're intentionally a bit paranoid here - copying the query as xcontent rather than writing a raw field. We don't want
* written queries to escape. Ever. * poorly written queries to escape. Ever. */
*/ entity.copyCurrentStructure(queryParser);
entity.copyCurrentStructure(queryParser); XContentParser.Token shouldBeEof = queryParser.nextToken();
XContentParser.Token shouldBeEof = queryParser.nextToken(); if (shouldBeEof != null) {
if (shouldBeEof != null) { throw new ElasticsearchException(
throw new ElasticsearchException( "query was more than a single object. This first token after the object is [" + shouldBeEof + "]");
"query was more than a single object. This first token after the object is [" + shouldBeEof + "]"); }
} }
if (searchRequest.source().fetchSource() != null) {
entity.field("_source", searchRequest.source().fetchSource());
}
entity.endObject(); entity.endObject();
BytesRef bytes = entity.bytes().toBytesRef(); BytesRef bytes = entity.bytes().toBytesRef();
return new ByteArrayEntity(bytes.bytes, bytes.offset, bytes.length, ContentType.APPLICATION_JSON); return new ByteArrayEntity(bytes.bytes, bytes.offset, bytes.length, ContentType.APPLICATION_JSON);

View File

@ -101,7 +101,7 @@ public class RemoteScrollableHitSource extends ScrollableHitSource {
lookupRemoteVersion(version -> { lookupRemoteVersion(version -> {
remoteVersion = version; remoteVersion = version;
execute("POST", initialSearchPath(searchRequest), initialSearchParams(searchRequest, version), execute("POST", initialSearchPath(searchRequest), initialSearchParams(searchRequest, version),
initialSearchEntity(query), RESPONSE_PARSER, r -> onStartResponse(onResponse, r)); initialSearchEntity(searchRequest, query), RESPONSE_PARSER, r -> onStartResponse(onResponse, r));
}); });
} }

View File

@ -152,20 +152,29 @@ public class RemoteRequestBuildersTests extends ESTestCase {
assertThat(params, scroll == null ? not(hasKey("scroll")) : hasEntry("scroll", scroll.toString())); assertThat(params, scroll == null ? not(hasKey("scroll")) : hasEntry("scroll", scroll.toString()));
assertThat(params, hasEntry("size", Integer.toString(size))); assertThat(params, hasEntry("size", Integer.toString(size)));
assertThat(params, fetchVersion == null || fetchVersion == true ? hasEntry("version", null) : not(hasEntry("version", null))); assertThat(params, fetchVersion == null || fetchVersion == true ? hasEntry("version", null) : not(hasEntry("version", null)));
assertThat(params, hasEntry("_source", "true"));
} }
public void testInitialSearchEntity() throws IOException { public void testInitialSearchEntity() throws IOException {
SearchRequest searchRequest = new SearchRequest();
searchRequest.source(new SearchSourceBuilder());
String query = "{\"match_all\":{}}"; String query = "{\"match_all\":{}}";
HttpEntity entity = initialSearchEntity(new BytesArray(query)); HttpEntity entity = initialSearchEntity(searchRequest, new BytesArray(query));
assertEquals(ContentType.APPLICATION_JSON.toString(), entity.getContentType().getValue()); assertEquals(ContentType.APPLICATION_JSON.toString(), entity.getContentType().getValue());
assertEquals("{\"query\":" + query + "}", assertEquals("{\"query\":" + query + "}",
Streams.copyToString(new InputStreamReader(entity.getContent(), StandardCharsets.UTF_8))); Streams.copyToString(new InputStreamReader(entity.getContent(), StandardCharsets.UTF_8)));
// Source filtering is included if set up
searchRequest.source().fetchSource(new String[] {"in1", "in2"}, new String[] {"out"});
entity = initialSearchEntity(searchRequest, new BytesArray(query));
assertEquals(ContentType.APPLICATION_JSON.toString(), entity.getContentType().getValue());
assertEquals("{\"query\":" + query + ",\"_source\":{\"includes\":[\"in1\",\"in2\"],\"excludes\":[\"out\"]}}",
Streams.copyToString(new InputStreamReader(entity.getContent(), StandardCharsets.UTF_8)));
// Invalid XContent fails // Invalid XContent fails
RuntimeException e = expectThrows(RuntimeException.class, () -> initialSearchEntity(new BytesArray("{}, \"trailing\": {}"))); RuntimeException e = expectThrows(RuntimeException.class,
() -> initialSearchEntity(searchRequest, new BytesArray("{}, \"trailing\": {}")));
assertThat(e.getCause().getMessage(), containsString("Unexpected character (',' (code 44))")); assertThat(e.getCause().getMessage(), containsString("Unexpected character (',' (code 44))"));
e = expectThrows(RuntimeException.class, () -> initialSearchEntity(new BytesArray("{"))); e = expectThrows(RuntimeException.class, () -> initialSearchEntity(searchRequest, new BytesArray("{")));
assertThat(e.getCause().getMessage(), containsString("Unexpected end-of-input")); assertThat(e.getCause().getMessage(), containsString("Unexpected end-of-input"));
} }

View File

@ -416,3 +416,42 @@
type: foo type: foo
id: 1 id: 1
- match: { _source: {} } - match: { _source: {} }
---
"Reindex with source filtering":
- do:
index:
index: source
type: foo
id: 1
body: { "text": "test", "filtered": "removed" }
refresh: true
- do:
reindex:
refresh: true
body:
source:
index: source
_source:
excludes:
- filtered
dest:
index: dest
- match: {created: 1}
- match: {updated: 0}
- match: {version_conflicts: 0}
- match: {batches: 1}
- match: {failures: []}
- match: {throttled_millis: 0}
- gte: { took: 0 }
- is_false: task
- is_false: deleted
- do:
get:
index: dest
type: foo
id: 1
- match: { _source.text: "test" }
- is_false: _source.filtered

View File

@ -295,3 +295,21 @@
index: test index: test
dest: dest:
index: dest index: dest
---
"_source:false is rejected":
- do:
index:
index: source
type: foo
id: 1
body: { "text": "test" }
- do:
catch: /_source:false is not supported in this context/
reindex:
body:
source:
index: source
_source: false
dest:
index: dest

View File

@ -311,3 +311,53 @@
index: source index: source
dest: dest:
index: dest index: dest
---
"Reindex from remote with source filtering":
- do:
index:
index: source
type: foo
id: 1
body: { "text": "test", "filtered": "removed" }
refresh: true
# Fetch the http host. We use the host of the master because we know there will always be a master.
- do:
cluster.state: {}
- set: { master_node: master }
- do:
nodes.info:
metric: [ http ]
- is_true: nodes.$master.http.publish_address
- set: {nodes.$master.http.publish_address: host}
- do:
reindex:
refresh: true
body:
source:
remote:
host: http://${host}
index: source
_source:
excludes:
- filtered
dest:
index: dest
- match: {created: 1}
- match: {updated: 0}
- match: {version_conflicts: 0}
- match: {batches: 1}
- match: {failures: []}
- match: {throttled_millis: 0}
- gte: { took: 0 }
- is_false: task
- is_false: deleted
- do:
get:
index: dest
type: foo
id: 1
- match: { _source.text: "test" }
- is_false: _source.filtered