From 5706858722452b13465b15930e4f4cb2e8286449 Mon Sep 17 00:00:00 2001 From: Britta Weber Date: Thu, 17 Jul 2014 11:13:16 +0200 Subject: [PATCH] Add parameter to GET for checking if generated fields can be retrieved Fields of type `token_count`, `murmur3`, `_all` and `_field_names` are generated only when indexing. If a GET requests accesses the transaction log (because no refresh between indexing and GET request) then these fields cannot be retrieved at all. Before the behavior was so: `_all, _field_names`: The field was siletly ignored `murmur3, token_count`: `NumberFormatException` because GET tried to parse the values from the source. In addition, if these fields were not stored, the same behavior occured if the fields were retrieved with GET after a `refresh()` because here also the source was used to get the fields. Now, GET accepts a parameter `ignore_errors_on_generated_fields` which has the following effect: - Throw exception with meaningful error message explaining the problem if set to false (default) - Ignore the field if set to true - Always ignore the field if it was not set to stored This changes the behavior for `_all` and `_field_names` as now an Exception is thrown if a user tries to GET them before a `refresh()`. closes #6676 closes #6973 --- docs/reference/docs/get.asciidoc | 12 +- docs/reference/docs/multi-get.asciidoc | 8 + .../explain/TransportExplainAction.java | 2 +- .../elasticsearch/action/get/GetRequest.java | 19 +- .../action/get/GetRequestBuilder.java | 5 + .../action/get/MultiGetRequest.java | 14 + .../action/get/MultiGetRequestBuilder.java | 5 + .../action/get/MultiGetShardRequest.java | 18 +- .../action/get/TransportGetAction.java | 2 +- .../action/get/TransportMultiGetAction.java | 1 + .../get/TransportShardMultiGetAction.java | 2 +- .../action/update/UpdateHelper.java | 2 +- .../index/get/ShardGetService.java | 52 +- .../index/mapper/FieldMapper.java | 8 + .../mapper/core/AbstractFieldMapper.java | 7 + .../index/mapper/core/Murmur3FieldMapper.java | 5 + .../mapper/core/TokenCountFieldMapper.java | 6 + .../index/mapper/internal/AllFieldMapper.java | 5 + .../internal/FieldNamesFieldMapper.java | 5 + .../termvectors/ShardTermVectorService.java | 2 +- .../rest/action/get/RestGetAction.java | 1 + .../rest/action/get/RestMultiGetAction.java | 1 + .../org/elasticsearch/get/GetActionTests.java | 449 +++++++++++++++++- .../test/ElasticsearchIntegrationTest.java | 13 + 24 files changed, 618 insertions(+), 26 deletions(-) diff --git a/docs/reference/docs/get.asciidoc b/docs/reference/docs/get.asciidoc index 59d91112ea6..53e33591076 100644 --- a/docs/reference/docs/get.asciidoc +++ b/docs/reference/docs/get.asciidoc @@ -124,6 +124,15 @@ Field values fetched from the document it self are always returned as an array. Also only leaf fields can be returned via the `field` option. So object fields can't be returned and such requests will fail. +[float] +[[generated-fields]] +=== Generated fields +added[1.4.0] + +If no refresh occurred between indexing and refresh, GET will access the transaction log to fetch the document. However, some fields are generated only when indexing. +If you try to access a field that is only generated when indexing, you will get an exception (default). You can choose to ignore field that are generated if the transaction log is accessed by setting `ignore_errors_on_generated_fields=true`. + + [float] [[_source]] === Getting the _source directly @@ -223,4 +232,5 @@ it's current version is equal to the specified one. This behavior is the same for all version types with the exception of version type `FORCE` which always retrieves the document. -Note that Elasticsearch do not store older versions of documents. Only the current version can be retrieved. \ No newline at end of file +Note that Elasticsearch do not store older versions of documents. Only the current version can be retrieved. + diff --git a/docs/reference/docs/multi-get.asciidoc b/docs/reference/docs/multi-get.asciidoc index 74be948cbc9..1feb4410ab0 100644 --- a/docs/reference/docs/multi-get.asciidoc +++ b/docs/reference/docs/multi-get.asciidoc @@ -180,6 +180,14 @@ curl 'localhost:9200/_mget' -d '{ }' -------------------------------------------------- +[float] +[[generated-fields]] +=== Generated fields + +added[1.4.0] + +See <> for fields are generated only when indexing. + [float] [[mget-routing]] === Routing diff --git a/src/main/java/org/elasticsearch/action/explain/TransportExplainAction.java b/src/main/java/org/elasticsearch/action/explain/TransportExplainAction.java index 6cab7b1ce25..1bfdfc3fd15 100644 --- a/src/main/java/org/elasticsearch/action/explain/TransportExplainAction.java +++ b/src/main/java/org/elasticsearch/action/explain/TransportExplainAction.java @@ -138,7 +138,7 @@ public class TransportExplainAction extends TransportShardSingleOperationAction< // Advantage is that we're not opening a second searcher to retrieve the _source. Also // because we are working in the same searcher in engineGetResult we can be sure that a // doc isn't deleted between the initial get and this call. - GetResult getResult = indexShard.getService().get(result, request.id(), request.type(), request.fields(), request.fetchSourceContext()); + GetResult getResult = indexShard.getService().get(result, request.id(), request.type(), request.fields(), request.fetchSourceContext(), false); return new ExplainResponse(true, explanation, getResult); } else { return new ExplainResponse(true, explanation); diff --git a/src/main/java/org/elasticsearch/action/get/GetRequest.java b/src/main/java/org/elasticsearch/action/get/GetRequest.java index baa846a846c..8ed7ae5ca8b 100644 --- a/src/main/java/org/elasticsearch/action/get/GetRequest.java +++ b/src/main/java/org/elasticsearch/action/get/GetRequest.java @@ -19,6 +19,7 @@ package org.elasticsearch.action.get; +import org.elasticsearch.Version; import org.elasticsearch.action.ActionRequestValidationException; import org.elasticsearch.action.ValidateActions; import org.elasticsearch.action.support.single.shard.SingleShardOperationRequest; @@ -59,6 +60,7 @@ public class GetRequest extends SingleShardOperationRequest { private VersionType versionType = VersionType.INTERNAL; private long version = Versions.MATCH_ANY; + private boolean ignoreErrorsOnGeneratedFields; GetRequest() { type = "_all"; @@ -240,10 +242,19 @@ public class GetRequest extends SingleShardOperationRequest { return this; } + public GetRequest ignoreErrorsOnGeneratedFields(boolean ignoreErrorsOnGeneratedFields) { + this.ignoreErrorsOnGeneratedFields = ignoreErrorsOnGeneratedFields; + return this; + } + public VersionType versionType() { return this.versionType; } + public boolean ignoreErrorsOnGeneratedFields() { + return ignoreErrorsOnGeneratedFields; + } + @Override public void readFrom(StreamInput in) throws IOException { super.readFrom(in); @@ -265,6 +276,9 @@ public class GetRequest extends SingleShardOperationRequest { } else if (realtime == 1) { this.realtime = true; } + if(in.getVersion().onOrAfter(Version.V_1_4_0)) { + this.ignoreErrorsOnGeneratedFields = in.readBoolean(); + } this.versionType = VersionType.fromValue(in.readByte()); this.version = Versions.readVersionWithVLongForBW(in); @@ -296,7 +310,9 @@ public class GetRequest extends SingleShardOperationRequest { } else { out.writeByte((byte) 1); } - + if(out.getVersion().onOrAfter(Version.V_1_4_0)) { + out.writeBoolean(ignoreErrorsOnGeneratedFields); + } out.writeByte(versionType.getValue()); Versions.writeVersionWithVLongForBW(version, out); @@ -307,4 +323,5 @@ public class GetRequest extends SingleShardOperationRequest { public String toString() { return "get [" + index + "][" + type + "][" + id + "]: routing [" + routing + "]"; } + } diff --git a/src/main/java/org/elasticsearch/action/get/GetRequestBuilder.java b/src/main/java/org/elasticsearch/action/get/GetRequestBuilder.java index 47687b59f9e..f305630608c 100644 --- a/src/main/java/org/elasticsearch/action/get/GetRequestBuilder.java +++ b/src/main/java/org/elasticsearch/action/get/GetRequestBuilder.java @@ -174,6 +174,11 @@ public class GetRequestBuilder extends SingleShardOperationRequestBuilder implements I String preference; Boolean realtime; boolean refresh; + public boolean ignoreErrorsOnGeneratedFields = false; List items = new ArrayList<>(); @@ -323,6 +325,12 @@ public class MultiGetRequest extends ActionRequest implements I return this; } + + public MultiGetRequest ignoreErrorsOnGeneratedFields(boolean ignoreErrorsOnGeneratedFields) { + this.ignoreErrorsOnGeneratedFields = ignoreErrorsOnGeneratedFields; + return this; + } + public MultiGetRequest add(@Nullable String defaultIndex, @Nullable String defaultType, @Nullable String[] defaultFields, @Nullable FetchSourceContext defaultFetchSource, byte[] data, int from, int length) throws Exception { return add(defaultIndex, defaultType, defaultFields, defaultFetchSource, new BytesArray(data, from, length), true); } @@ -495,6 +503,9 @@ public class MultiGetRequest extends ActionRequest implements I } else if (realtime == 1) { this.realtime = true; } + if(in.getVersion().onOrAfter(Version.V_1_4_0)) { + ignoreErrorsOnGeneratedFields = in.readBoolean(); + } int size = in.readVInt(); items = new ArrayList<>(size); @@ -515,6 +526,9 @@ public class MultiGetRequest extends ActionRequest implements I } else { out.writeByte((byte) 1); } + if(out.getVersion().onOrAfter(Version.V_1_4_0)) { + out.writeBoolean(ignoreErrorsOnGeneratedFields); + } out.writeVInt(items.size()); for (Item item : items) { diff --git a/src/main/java/org/elasticsearch/action/get/MultiGetRequestBuilder.java b/src/main/java/org/elasticsearch/action/get/MultiGetRequestBuilder.java index 9aac19162f1..cc8389d8d0d 100644 --- a/src/main/java/org/elasticsearch/action/get/MultiGetRequestBuilder.java +++ b/src/main/java/org/elasticsearch/action/get/MultiGetRequestBuilder.java @@ -82,6 +82,11 @@ public class MultiGetRequestBuilder extends ActionRequestBuilder listener) { client.multiGet(request, listener); diff --git a/src/main/java/org/elasticsearch/action/get/MultiGetShardRequest.java b/src/main/java/org/elasticsearch/action/get/MultiGetShardRequest.java index 454dd051729..cb1853600b8 100644 --- a/src/main/java/org/elasticsearch/action/get/MultiGetShardRequest.java +++ b/src/main/java/org/elasticsearch/action/get/MultiGetShardRequest.java @@ -21,6 +21,7 @@ package org.elasticsearch.action.get; import com.carrotsearch.hppc.IntArrayList; import com.carrotsearch.hppc.LongArrayList; +import org.elasticsearch.Version; import org.elasticsearch.action.support.single.shard.SingleShardOperationRequest; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.io.stream.StreamInput; @@ -39,6 +40,7 @@ public class MultiGetShardRequest extends SingleShardOperationRequest types; @@ -91,6 +93,11 @@ public class MultiGetShardRequest extends SingleShardOperationRequest * Note: Call must release engine searcher associated with engineGetResult! */ - public GetResult get(Engine.GetResult engineGetResult, String id, String type, String[] fields, FetchSourceContext fetchSourceContext) { + public GetResult get(Engine.GetResult engineGetResult, String id, String type, String[] fields, FetchSourceContext fetchSourceContext, boolean ignoreErrorsOnGeneratedFields) { if (!engineGetResult.exists()) { return new GetResult(shardId.index().name(), type, id, -1, false, null, null); } @@ -135,7 +135,7 @@ public class ShardGetService extends AbstractIndexShardComponent { return new GetResult(shardId.index().name(), type, id, -1, false, null, null); } fetchSourceContext = normalizeFetchSourceContent(fetchSourceContext, fields); - GetResult getResult = innerGetLoadFromStoredFields(type, id, fields, fetchSourceContext, engineGetResult, docMapper); + GetResult getResult = innerGetLoadFromStoredFields(type, id, fields, fetchSourceContext, engineGetResult, docMapper, ignoreErrorsOnGeneratedFields); if (getResult.isExists()) { existsMetric.inc(System.nanoTime() - now); } else { @@ -165,7 +165,7 @@ public class ShardGetService extends AbstractIndexShardComponent { return FetchSourceContext.DO_NOT_FETCH_SOURCE; } - public GetResult innerGet(String type, String id, String[] gFields, boolean realtime, long version, VersionType versionType, FetchSourceContext fetchSourceContext) throws ElasticsearchException { + public GetResult innerGet(String type, String id, String[] gFields, boolean realtime, long version, VersionType versionType, FetchSourceContext fetchSourceContext, boolean ignoreErrorsOnGeneratedFields) throws ElasticsearchException { fetchSourceContext = normalizeFetchSourceContent(fetchSourceContext, gFields); boolean loadSource = (gFields != null && gFields.length > 0) || fetchSourceContext.fetchSource(); @@ -207,7 +207,7 @@ public class ShardGetService extends AbstractIndexShardComponent { try { // break between having loaded it from translog (so we only have _source), and having a document to load if (get.docIdAndVersion() != null) { - return innerGetLoadFromStoredFields(type, id, gFields, fetchSourceContext, get, docMapper); + return innerGetLoadFromStoredFields(type, id, gFields, fetchSourceContext, get, docMapper, ignoreErrorsOnGeneratedFields); } else { Translog.Source source = get.source(); @@ -241,20 +241,21 @@ public class ShardGetService extends AbstractIndexShardComponent { searchLookup.source().setNextSource(source.source); } - FieldMapper x = docMapper.mappers().smartNameFieldMapper(field); - if (x == null) { + FieldMapper fieldMapper = docMapper.mappers().smartNameFieldMapper(field); + if (fieldMapper == null) { if (docMapper.objectMappers().get(field) != null) { // Only fail if we know it is a object field, missing paths / fields shouldn't fail. throw new ElasticsearchIllegalArgumentException("field [" + field + "] isn't a leaf field"); } - } else if (docMapper.sourceMapper().enabled() || x.fieldType().stored()) { + } else if (shouldGetFromSource(ignoreErrorsOnGeneratedFields, docMapper, fieldMapper)) { List values = searchLookup.source().extractRawValues(field); if (!values.isEmpty()) { for (int i = 0; i < values.size(); i++) { - values.set(i, x.valueForSearch(values.get(i))); + values.set(i, fieldMapper.valueForSearch(values.get(i))); } value = values; } + } } if (value != null) { @@ -312,7 +313,27 @@ public class ShardGetService extends AbstractIndexShardComponent { } } - private GetResult innerGetLoadFromStoredFields(String type, String id, String[] gFields, FetchSourceContext fetchSourceContext, Engine.GetResult get, DocumentMapper docMapper) { + protected boolean shouldGetFromSource(boolean ignoreErrorsOnGeneratedFields, DocumentMapper docMapper, FieldMapper fieldMapper) { + if (!fieldMapper.isGenerated()) { + //if the field is always there we check if either source mapper is enabled, in which case we get the field + // from source, or, if the field is stored, in which case we have to get if from source here also (we are in the translog phase, doc not indexed yet, we annot access the stored fields) + return docMapper.sourceMapper().enabled() || fieldMapper.fieldType().stored(); + } else { + if (!fieldMapper.fieldType().stored()) { + //if it is not stored, user will not get the generated field back + return false; + } else { + if (ignoreErrorsOnGeneratedFields) { + return false; + } else { + throw new ElasticsearchException("Cannot access field " + fieldMapper.name() + " from transaction log. You can only get this field after refresh() has been called."); + } + } + + } + } + + private GetResult innerGetLoadFromStoredFields(String type, String id, String[] gFields, FetchSourceContext fetchSourceContext, Engine.GetResult get, DocumentMapper docMapper, boolean ignoreErrorsOnGeneratedFields) { Map fields = null; BytesReference source = null; Versions.DocIdAndVersion docIdAndVersion = get.docIdAndVersion(); @@ -335,17 +356,18 @@ public class ShardGetService extends AbstractIndexShardComponent { } // now, go and do the script thingy if needed + if (gFields != null && gFields.length > 0) { SearchLookup searchLookup = null; for (String field : gFields) { Object value = null; - FieldMappers x = docMapper.mappers().smartName(field); - if (x == null) { + FieldMappers fieldMapper = docMapper.mappers().smartName(field); + if (fieldMapper == null) { if (docMapper.objectMappers().get(field) != null) { // Only fail if we know it is a object field, missing paths / fields shouldn't fail. throw new ElasticsearchIllegalArgumentException("field [" + field + "] isn't a leaf field"); } - } else if (!x.mapper().fieldType().stored()) { + } else if (!fieldMapper.mapper().fieldType().stored() && !fieldMapper.mapper().isGenerated()) { if (searchLookup == null) { searchLookup = new SearchLookup(mapperService, fieldDataService, new String[]{type}); searchLookup.setNextReader(docIdAndVersion.context); @@ -356,7 +378,7 @@ public class ShardGetService extends AbstractIndexShardComponent { List values = searchLookup.source().extractRawValues(field); if (!values.isEmpty()) { for (int i = 0; i < values.size(); i++) { - values.set(i, x.mapper().valueForSearch(values.get(i))); + values.set(i, fieldMapper.mapper().valueForSearch(values.get(i))); } value = values; } diff --git a/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java index 59a07ad0333..7b4e23883fd 100644 --- a/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java @@ -292,4 +292,12 @@ public interface FieldMapper extends Mapper { Loading normsLoading(Loading defaultLoading); + /** + * Fields might not be available before indexing, for example _all, token_count,... + * When get is called and these fields are requested, this case needs special treatment. + * + * @return If the field is available before indexing or not. + * */ + public boolean isGenerated(); + } diff --git a/src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java index 689c9a994f7..6a1f3dec477 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java @@ -1133,4 +1133,11 @@ public abstract class AbstractFieldMapper implements FieldMapper { } + /** + * Returns if this field is only generated when indexing. For example, the field of type token_count + */ + public boolean isGenerated() { + return false; + } + } diff --git a/src/main/java/org/elasticsearch/index/mapper/core/Murmur3FieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/Murmur3FieldMapper.java index 52676a04a9d..1b8ba8a9a45 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/Murmur3FieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/Murmur3FieldMapper.java @@ -107,4 +107,9 @@ public class Murmur3FieldMapper extends LongFieldMapper { } + @Override + public boolean isGenerated() { + return true; + } + } diff --git a/src/main/java/org/elasticsearch/index/mapper/core/TokenCountFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/TokenCountFieldMapper.java index 9d57c7bf92e..7beff6ffee9 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/TokenCountFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/TokenCountFieldMapper.java @@ -197,4 +197,10 @@ public class TokenCountFieldMapper extends IntegerFieldMapper { builder.field("analyzer", analyzer()); } + + @Override + public boolean isGenerated() { + return true; + } + } diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/AllFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/internal/AllFieldMapper.java index ca62dce9e99..9e3f824c577 100644 --- a/src/main/java/org/elasticsearch/index/mapper/internal/AllFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/internal/AllFieldMapper.java @@ -351,4 +351,9 @@ public class AllFieldMapper extends AbstractFieldMapper implements Inter public boolean hasDocValues() { return false; } + + @Override + public boolean isGenerated() { + return true; + } } diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/FieldNamesFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/internal/FieldNamesFieldMapper.java index 804105201a7..c795e0c7dcc 100644 --- a/src/main/java/org/elasticsearch/index/mapper/internal/FieldNamesFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/internal/FieldNamesFieldMapper.java @@ -249,4 +249,9 @@ public class FieldNamesFieldMapper extends AbstractFieldMapper implement } return super.toXContent(builder, params); } + + @Override + public boolean isGenerated() { + return true; + } } diff --git a/src/main/java/org/elasticsearch/index/termvectors/ShardTermVectorService.java b/src/main/java/org/elasticsearch/index/termvectors/ShardTermVectorService.java index 66fa992d847..c225bfcbeab 100644 --- a/src/main/java/org/elasticsearch/index/termvectors/ShardTermVectorService.java +++ b/src/main/java/org/elasticsearch/index/termvectors/ShardTermVectorService.java @@ -131,7 +131,7 @@ public class ShardTermVectorService extends AbstractIndexShardComponent { } // TODO: support for fetchSourceContext? GetResult getResult = indexShard.getService().get( - get, request.id(), request.type(), validFields.toArray(Strings.EMPTY_ARRAY), null); + get, request.id(), request.type(), validFields.toArray(Strings.EMPTY_ARRAY), null, false); generatedTermVectors = generateTermVectors(getResult.getFields().values(), request.offsets()); } finally { get.release(); diff --git a/src/main/java/org/elasticsearch/rest/action/get/RestGetAction.java b/src/main/java/org/elasticsearch/rest/action/get/RestGetAction.java index 8a4808b9823..88b72b6e270 100644 --- a/src/main/java/org/elasticsearch/rest/action/get/RestGetAction.java +++ b/src/main/java/org/elasticsearch/rest/action/get/RestGetAction.java @@ -57,6 +57,7 @@ public class RestGetAction extends BaseRestHandler { getRequest.parent(request.param("parent")); getRequest.preference(request.param("preference")); getRequest.realtime(request.paramAsBoolean("realtime", null)); + getRequest.ignoreErrorsOnGeneratedFields(request.paramAsBoolean("ignore_errors_on_generated_fields", false)); String sField = request.param("fields"); if (sField != null) { diff --git a/src/main/java/org/elasticsearch/rest/action/get/RestMultiGetAction.java b/src/main/java/org/elasticsearch/rest/action/get/RestMultiGetAction.java index 81128e5692e..a8cb7f667fb 100644 --- a/src/main/java/org/elasticsearch/rest/action/get/RestMultiGetAction.java +++ b/src/main/java/org/elasticsearch/rest/action/get/RestMultiGetAction.java @@ -57,6 +57,7 @@ public class RestMultiGetAction extends BaseRestHandler { multiGetRequest.refresh(request.paramAsBoolean("refresh", multiGetRequest.refresh())); multiGetRequest.preference(request.param("preference")); multiGetRequest.realtime(request.paramAsBoolean("realtime", null)); + multiGetRequest.ignoreErrorsOnGeneratedFields(request.paramAsBoolean("ignore_errors_on_generated_fields", false)); String[] sFields = null; String sField = request.param("fields"); diff --git a/src/test/java/org/elasticsearch/get/GetActionTests.java b/src/test/java/org/elasticsearch/get/GetActionTests.java index a2780a9decd..b28c07fc255 100644 --- a/src/test/java/org/elasticsearch/get/GetActionTests.java +++ b/src/test/java/org/elasticsearch/get/GetActionTests.java @@ -19,16 +19,16 @@ package org.elasticsearch.get; +import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ElasticsearchIllegalArgumentException; import org.elasticsearch.action.ShardOperationFailedException; import org.elasticsearch.action.admin.cluster.health.ClusterHealthResponse; import org.elasticsearch.action.admin.cluster.health.ClusterHealthStatus; import org.elasticsearch.action.admin.indices.flush.FlushResponse; import org.elasticsearch.action.delete.DeleteResponse; -import org.elasticsearch.action.get.GetResponse; -import org.elasticsearch.action.get.MultiGetRequest; -import org.elasticsearch.action.get.MultiGetResponse; +import org.elasticsearch.action.get.*; import org.elasticsearch.common.Base64; +import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; @@ -39,10 +39,12 @@ import org.elasticsearch.test.ElasticsearchIntegrationTest; import org.elasticsearch.test.junit.annotations.TestLogging; import org.junit.Test; +import java.io.IOException; import java.util.Map; import static org.elasticsearch.client.Requests.clusterHealthRequest; import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.hamcrest.Matchers.*; public class GetActionTests extends ElasticsearchIntegrationTest { @@ -909,4 +911,445 @@ public class GetActionTests extends ElasticsearchIntegrationTest { assertNotNull(getResponse.getField("_all").getValue()); assertThat(getResponse.getField("_all").getValue().toString(), equalTo("some text" + " ")); } + + @Test + public void testUngeneratedFieldsThatAreNeverStored() throws IOException { + String createIndexSource = "{\n" + + " \"settings\": {\n" + + " \"index.translog.disable_flush\": true,\n" + + " \"refresh_interval\": \"-1\"\n" + + " },\n" + + " \"mappings\": {\n" + + " \"doc\": {\n" + + " \"_source\": {\n" + + " \"enabled\": \"" + randomBoolean() + "\"\n" + + " },\n" + + " \"properties\": {\n" + + " \"suggest\": {\n" + + " \"type\": \"completion\"\n" + + " }\n" + + " }\n" + + " }\n" + + " }\n" + + "}"; + assertAcked(prepareCreate("testidx").setSource(createIndexSource)); + ensureGreen(); + String doc = "{\n" + + " \"suggest\": {\n" + + " \"input\": [\n" + + " \"Nevermind\",\n" + + " \"Nirvana\"\n" + + " ],\n" + + " \"output\": \"Nirvana - Nevermind\"\n" + + " }\n" + + "}"; + + index("testidx", "doc", "1", doc); + String[] fieldsList = {"suggest"}; + // before refresh - document is only in translog + assertGetFieldsAlwaysNull("testidx", "doc", "1", fieldsList); + refresh(); + //after refresh - document is in translog and also indexed + assertGetFieldsAlwaysNull("testidx", "doc", "1", fieldsList); + flush(); + //after flush - document is in not anymore translog - only indexed + assertGetFieldsAlwaysNull("testidx", "doc", "1", fieldsList); + } + + @Test + public void testUngeneratedFieldsThatAreAlwaysStored() throws IOException { + String storedString = randomBoolean() ? "yes" : "no"; + String createIndexSource = "{\n" + + " \"settings\": {\n" + + " \"index.translog.disable_flush\": true,\n" + + " \"refresh_interval\": \"-1\"\n" + + " },\n" + + " \"mappings\": {\n" + + " \"parentdoc\": {},\n" + + " \"doc\": {\n" + + " \"_source\": {\n" + + " \"enabled\": " + randomBoolean() + "\n" + + " },\n" + + " \"_parent\": {\n" + + " \"type\": \"parentdoc\",\n" + + " \"store\": \"" + storedString + "\"\n" + + " },\n" + + " \"_ttl\": {\n" + + " \"enabled\": true,\n" + + " \"store\": \"" + storedString + "\"\n" + + " }\n" + + " }\n" + + " }\n" + + "}"; + assertAcked(prepareCreate("testidx").setSource(createIndexSource)); + ensureGreen(); + String doc = "{\n" + + " \"_ttl\": \"1h\"\n" + + "}"; + + client().prepareIndex("testidx", "doc").setId("1").setSource(doc).setParent("1").execute().actionGet(); + + String[] fieldsList = {"_ttl", "_parent"}; + // before refresh - document is only in translog + assertGetFieldsAlwaysWorks("testidx", "doc", "1", fieldsList, "1"); + refresh(); + //after refresh - document is in translog and also indexed + assertGetFieldsAlwaysWorks("testidx", "doc", "1", fieldsList, "1"); + flush(); + //after flush - document is in not anymore translog - only indexed + assertGetFieldsAlwaysWorks("testidx", "doc", "1", fieldsList, "1"); + } + + @Test + public void testUngeneratedFieldsPartOfSourceUnstoredSourceDisabled() throws IOException { + indexSingleDocumentWithUngeneratedFieldsThatArePartOf_source(false, false); + String[] fieldsList = {"my_boost"}; + // before refresh - document is only in translog + assertGetFieldsAlwaysNull("testidx", "doc", "1", fieldsList); + refresh(); + //after refresh - document is in translog and also indexed + assertGetFieldsAlwaysNull("testidx", "doc", "1", fieldsList); + flush(); + //after flush - document is in not anymore translog - only indexed + assertGetFieldsAlwaysNull("testidx", "doc", "1", fieldsList); + } + + @Test + public void testUngeneratedFieldsPartOfSourceEitherStoredOrSourceEnabled() throws IOException { + boolean stored = randomBoolean(); + boolean sourceEnabled = true; + if (stored) { + sourceEnabled = randomBoolean(); + } + indexSingleDocumentWithUngeneratedFieldsThatArePartOf_source(stored, sourceEnabled); + String[] fieldsList = {"my_boost"}; + // before refresh - document is only in translog + assertGetFieldsAlwaysWorks("testidx", "doc", "1", fieldsList); + refresh(); + //after refresh - document is in translog and also indexed + assertGetFieldsAlwaysWorks("testidx", "doc", "1", fieldsList); + flush(); + //after flush - document is in not anymore translog - only indexed + assertGetFieldsAlwaysWorks("testidx", "doc", "1", fieldsList); + } + + void indexSingleDocumentWithUngeneratedFieldsThatArePartOf_source(boolean stored, boolean sourceEnabled) { + String storedString = stored ? "yes" : "no"; + String createIndexSource = "{\n" + + " \"settings\": {\n" + + " \"index.translog.disable_flush\": true,\n" + + " \"refresh_interval\": \"-1\"\n" + + " },\n" + + " \"mappings\": {\n" + + " \"doc\": {\n" + + " \"_source\": {\n" + + " \"enabled\": " + sourceEnabled + "\n" + + " },\n" + + " \"_boost\": {\n" + + " \"name\": \"my_boost\",\n" + + " \"null_value\": 1,\n" + + " \"store\": \"" + storedString + "\"\n" + + " }\n" + + " }\n" + + " }\n" + + "}"; + assertAcked(prepareCreate("testidx").setSource(createIndexSource)); + ensureGreen(); + String doc = "{\n" + + " \"my_boost\": 5.0,\n" + + " \"_ttl\": \"1h\"\n" + + "}\n"; + + client().prepareIndex("testidx", "doc").setId("1").setSource(doc).setRouting("1").execute().actionGet(); + } + + + @Test + public void testUngeneratedFieldsNotPartOfSourceUnstored() throws IOException { + indexSingleDocumentWithUngeneratedFieldsThatAreNeverPartOf_source(false, randomBoolean()); + String[] fieldsList = {"_timestamp", "_size", "_routing"}; + // before refresh - document is only in translog + assertGetFieldsAlwaysNull("testidx", "doc", "1", fieldsList, "1"); + refresh(); + //after refresh - document is in translog and also indexed + assertGetFieldsAlwaysNull("testidx", "doc", "1", fieldsList, "1"); + flush(); + //after flush - document is in not anymore translog - only indexed + assertGetFieldsAlwaysNull("testidx", "doc", "1", fieldsList, "1"); + } + + @Test + public void testUngeneratedFieldsNotPartOfSourceStored() throws IOException { + indexSingleDocumentWithUngeneratedFieldsThatAreNeverPartOf_source(true, randomBoolean()); + String[] fieldsList = {"_timestamp", "_size", "_routing"}; + // before refresh - document is only in translog + assertGetFieldsAlwaysWorks("testidx", "doc", "1", fieldsList, "1"); + refresh(); + //after refresh - document is in translog and also indexed + assertGetFieldsAlwaysWorks("testidx", "doc", "1", fieldsList, "1"); + flush(); + //after flush - document is in not anymore translog - only indexed + assertGetFieldsAlwaysWorks("testidx", "doc", "1", fieldsList, "1"); + } + + void indexSingleDocumentWithUngeneratedFieldsThatAreNeverPartOf_source(boolean stored, boolean sourceEnabled) { + String storedString = stored ? "yes" : "no"; + String createIndexSource = "{\n" + + " \"settings\": {\n" + + " \"index.translog.disable_flush\": true,\n" + + " \"refresh_interval\": \"-1\"\n" + + " },\n" + + " \"mappings\": {\n" + + " \"parentdoc\": {},\n" + + " \"doc\": {\n" + + " \"_timestamp\": {\n" + + " \"store\": \"" + storedString + "\",\n" + + " \"enabled\": true\n" + + " },\n" + + " \"_routing\": {\n" + + " \"store\": \"" + storedString + "\"\n" + + " },\n" + + " \"_size\": {\n" + + " \"store\": \"" + storedString + "\",\n" + + " \"enabled\": true\n" + + " }\n" + + " }\n" + + " }\n" + + "}"; + + assertAcked(prepareCreate("testidx").setSource(createIndexSource)); + ensureGreen(); + String doc = "{\n" + + " \"text\": \"some text.\"\n" + + "}\n"; + client().prepareIndex("testidx", "doc").setId("1").setSource(doc).setRouting("1").execute().actionGet(); + } + + + @Test + public void testGeneratedStringFieldsUnstored() throws IOException { + indexSingleDocumentWithStringFieldsGeneratedFromText(false, randomBoolean()); + String[] fieldsList = {"_all", "_field_names"}; + // before refresh - document is only in translog + assertGetFieldsAlwaysNull("testidx", "doc", "1", fieldsList); + refresh(); + //after refresh - document is in translog and also indexed + assertGetFieldsAlwaysNull("testidx", "doc", "1", fieldsList); + flush(); + //after flush - document is in not anymore translog - only indexed + assertGetFieldsAlwaysNull("testidx", "doc", "1", fieldsList); + } + + @Test + public void testGeneratedStringFieldsStored() throws IOException { + indexSingleDocumentWithStringFieldsGeneratedFromText(true, randomBoolean()); + String[] fieldsList = {"_all", "_field_names"}; + // before refresh - document is only in translog + assertGetFieldsNull("testidx", "doc", "1", fieldsList); + assertGetFieldsException("testidx", "doc", "1", fieldsList); + refresh(); + //after refresh - document is in translog and also indexed + assertGetFieldsAlwaysWorks("testidx", "doc", "1", fieldsList); + flush(); + //after flush - document is in not anymore translog - only indexed + assertGetFieldsAlwaysWorks("testidx", "doc", "1", fieldsList); + } + + void indexSingleDocumentWithStringFieldsGeneratedFromText(boolean stored, boolean sourceEnabled) { + + String storedString = stored ? "yes" : "no"; + String createIndexSource = "{\n" + + " \"settings\": {\n" + + " \"index.translog.disable_flush\": true,\n" + + " \"refresh_interval\": \"-1\"\n" + + " },\n" + + " \"mappings\": {\n" + + " \"doc\": {\n" + + " \"_source\" : {\"enabled\" : " + sourceEnabled + "}," + + " \"_all\" : {\"enabled\" : true, \"store\":\"" + storedString + "\" }," + + " \"_field_names\" : {\"store\":\"" + storedString + "\" }" + + " }\n" + + " }\n" + + "}"; + + assertAcked(prepareCreate("testidx").setSource(createIndexSource)); + ensureGreen(); + String doc = "{\n" + + " \"text1\": \"some text.\"\n," + + " \"text2\": \"more text.\"\n" + + "}\n"; + index("testidx", "doc", "1", doc); + } + + + @Test + public void testGeneratedNumberFieldsUnstored() throws IOException { + indexSingleDocumentWithNumericFieldsGeneratedFromText(false, randomBoolean()); + String[] fieldsList = {"token_count", "text.token_count", "murmur", "text.murmur"}; + // before refresh - document is only in translog + assertGetFieldsAlwaysNull("testidx", "doc", "1", fieldsList); + refresh(); + //after refresh - document is in translog and also indexed + assertGetFieldsAlwaysNull("testidx", "doc", "1", fieldsList); + flush(); + //after flush - document is in not anymore translog - only indexed + assertGetFieldsAlwaysNull("testidx", "doc", "1", fieldsList); + } + + @Test + public void testGeneratedNumberFieldsStored() throws IOException { + indexSingleDocumentWithNumericFieldsGeneratedFromText(true, randomBoolean()); + String[] fieldsList = {"token_count", "text.token_count", "murmur", "text.murmur"}; + // before refresh - document is only in translog + assertGetFieldsNull("testidx", "doc", "1", fieldsList); + assertGetFieldsException("testidx", "doc", "1", fieldsList); + refresh(); + //after refresh - document is in translog and also indexed + assertGetFieldsAlwaysWorks("testidx", "doc", "1", fieldsList); + flush(); + //after flush - document is in not anymore translog - only indexed + assertGetFieldsAlwaysWorks("testidx", "doc", "1", fieldsList); + } + + void indexSingleDocumentWithNumericFieldsGeneratedFromText(boolean stored, boolean sourceEnabled) { + String storedString = stored ? "yes" : "no"; + String createIndexSource = "{\n" + + " \"settings\": {\n" + + " \"index.translog.disable_flush\": true,\n" + + " \"refresh_interval\": \"-1\"\n" + + " },\n" + + " \"mappings\": {\n" + + " \"doc\": {\n" + + " \"_source\" : {\"enabled\" : " + sourceEnabled + "}," + + " \"properties\": {\n" + + " \"token_count\": {\n" + + " \"type\": \"token_count\",\n" + + " \"analyzer\": \"standard\",\n" + + " \"store\": \"" + storedString + "\"" + + " },\n" + + " \"murmur\": {\n" + + " \"type\": \"murmur3\",\n" + + " \"store\": \"" + storedString + "\"" + + " },\n" + + " \"text\": {\n" + + " \"type\": \"string\",\n" + + " \"fields\": {\n" + + " \"token_count\": {\n" + + " \"type\": \"token_count\",\n" + + " \"analyzer\": \"standard\",\n" + + " \"store\": \"" + storedString + "\"" + + " },\n" + + " \"murmur\": {\n" + + " \"type\": \"murmur3\",\n" + + " \"store\": \"" + storedString + "\"" + + " }\n" + + " }\n" + + " }" + + " }\n" + + " }\n" + + " }\n" + + "}"; + + assertAcked(prepareCreate("testidx").setSource(createIndexSource)); + ensureGreen(); + String doc = "{\n" + + " \"murmur\": \"Some value that can be hashed\",\n" + + " \"token_count\": \"A text with five words.\",\n" + + " \"text\": \"A text with five words.\"\n" + + "}\n"; + index("testidx", "doc", "1", doc); + } + + private void assertGetFieldsAlwaysWorks(String index, String type, String docId, String[] fields) { + assertGetFieldsAlwaysWorks(index, type, docId, fields, null); + } + + private void assertGetFieldsAlwaysWorks(String index, String type, String docId, String[] fields, @Nullable String routing) { + for (String field : fields) { + assertGetFieldWorks(index, type, docId, field, false, routing); + assertGetFieldWorks(index, type, docId, field, true, routing); + } + } + + private void assertGetFieldWorks(String index, String type, String docId, String field, boolean ignoreErrors, @Nullable String routing) { + GetResponse response = getDocument(index, type, docId, field, ignoreErrors, routing); + assertThat(response.getId(), equalTo(docId)); + assertTrue(response.isExists()); + assertNotNull(response.getField(field)); + response = multiGetDocument(index, type, docId, field, ignoreErrors, routing); + assertThat(response.getId(), equalTo(docId)); + assertTrue(response.isExists()); + assertNotNull(response.getField(field)); + } + + protected void assertGetFieldsException(String index, String type, String docId, String[] fields) { + for (String field : fields) { + assertGetFieldException(index, type, docId, field); + } + } + + private void assertGetFieldException(String index, String type, String docId, String field) { + try { + client().prepareGet().setIndex(index).setType(type).setId(docId).setFields(field).setIgnoreErrorsOnGeneratedFields(false).get(); + fail(); + } catch (ElasticsearchException e) { + assertTrue(e.getMessage().contains("You can only get this field after refresh() has been called.")); + } + MultiGetResponse multiGetResponse = client().prepareMultiGet().add(new MultiGetRequest.Item(index, type, docId).fields(field)).setIgnoreErrorsOnGeneratedFields(false).get(); + assertNull(multiGetResponse.getResponses()[0].getResponse()); + assertTrue(multiGetResponse.getResponses()[0].getFailure().getMessage().contains("You can only get this field after refresh() has been called.")); + } + + protected void assertGetFieldsNull(String index, String type, String docId, String[] fields) { + assertGetFieldsNull(index, type, docId, fields, null); + } + + protected void assertGetFieldsNull(String index, String type, String docId, String[] fields, @Nullable String routing) { + for (String field : fields) { + assertGetFieldNull(index, type, docId, field, true, routing); + } + } + + protected void assertGetFieldsAlwaysNull(String index, String type, String docId, String[] fields) { + assertGetFieldsAlwaysNull(index, type, docId, fields, null); + } + + protected void assertGetFieldsAlwaysNull(String index, String type, String docId, String[] fields, @Nullable String routing) { + for (String field : fields) { + assertGetFieldNull(index, type, docId, field, true, routing); + assertGetFieldNull(index, type, docId, field, false, routing); + } + } + + protected void assertGetFieldNull(String index, String type, String docId, String field, boolean ignoreErrors, @Nullable String routing) { + //for get + GetResponse response = getDocument(index, type, docId, field, ignoreErrors, routing); + assertTrue(response.isExists()); + assertNull(response.getField(field)); + assertThat(response.getId(), equalTo(docId)); + //same for multi get + response = multiGetDocument(index, type, docId, field, ignoreErrors, routing); + assertNull(response.getField(field)); + assertThat(response.getId(), equalTo(docId)); + assertTrue(response.isExists()); + } + + private GetResponse multiGetDocument(String index, String type, String docId, String field, boolean ignoreErrors, @Nullable String routing) { + MultiGetRequest.Item getItem = new MultiGetRequest.Item(index, type, docId).fields(field); + if (routing != null) { + getItem.routing(routing); + } + MultiGetRequestBuilder multiGetRequestBuilder = client().prepareMultiGet().add(getItem).setIgnoreErrorsOnGeneratedFields(ignoreErrors); + MultiGetResponse multiGetResponse = multiGetRequestBuilder.get(); + assertThat(multiGetResponse.getResponses().length, equalTo(1)); + return multiGetResponse.getResponses()[0].getResponse(); + } + + private GetResponse getDocument(String index, String type, String docId, String field, boolean ignoreErrors, @Nullable String routing) { + GetRequestBuilder getRequestBuilder = client().prepareGet().setIndex(index).setType(type).setId(docId).setFields(field).setIgnoreErrorsOnGeneratedFields(ignoreErrors); + if (routing != null) { + getRequestBuilder.setRouting(routing); + } + return getRequestBuilder.get(); + } } diff --git a/src/test/java/org/elasticsearch/test/ElasticsearchIntegrationTest.java b/src/test/java/org/elasticsearch/test/ElasticsearchIntegrationTest.java index cdaf8a4f649..3507301e09b 100644 --- a/src/test/java/org/elasticsearch/test/ElasticsearchIntegrationTest.java +++ b/src/test/java/org/elasticsearch/test/ElasticsearchIntegrationTest.java @@ -1040,6 +1040,19 @@ public abstract class ElasticsearchIntegrationTest extends ElasticsearchTestCase return client().prepareIndex(index, type, id).setSource(source).execute().actionGet(); } + /** + * Syntactic sugar for: + * + *
+     *   return client().prepareIndex(index, type, id).setSource(source).execute().actionGet();
+     * 
+ * + * where source is a String. + */ + protected final IndexResponse index(String index, String type, String id, String source) { + return client().prepareIndex(index, type, id).setSource(source).execute().actionGet(); + } + /** * Waits for relocations and refreshes all indices in the cluster. *