Remove collect payloads parameter

The collect_payloads parameter of the span_near query was previously
deprecated with the intention to be removed. This commit removes this
parameter.

Relates #20385
This commit is contained in:
Jason Tedor 2016-09-08 09:37:36 -04:00 committed by GitHub
parent 4fb7ac8254
commit 8e7dfae7d1
22 changed files with 18 additions and 2603 deletions

View File

@ -48,7 +48,6 @@ public class SpanNearQueryBuilder extends AbstractQueryBuilder<SpanNearQueryBuil
public static boolean DEFAULT_IN_ORDER = true; public static boolean DEFAULT_IN_ORDER = true;
private static final ParseField SLOP_FIELD = new ParseField("slop"); private static final ParseField SLOP_FIELD = new ParseField("slop");
private static final ParseField COLLECT_PAYLOADS_FIELD = new ParseField("collect_payloads").withAllDeprecated("no longer supported");
private static final ParseField CLAUSES_FIELD = new ParseField("clauses"); private static final ParseField CLAUSES_FIELD = new ParseField("clauses");
private static final ParseField IN_ORDER_FIELD = new ParseField("in_order"); private static final ParseField IN_ORDER_FIELD = new ParseField("in_order");
@ -175,8 +174,6 @@ public class SpanNearQueryBuilder extends AbstractQueryBuilder<SpanNearQueryBuil
} else if (token.isValue()) { } else if (token.isValue()) {
if (parseContext.getParseFieldMatcher().match(currentFieldName, IN_ORDER_FIELD)) { if (parseContext.getParseFieldMatcher().match(currentFieldName, IN_ORDER_FIELD)) {
inOrder = parser.booleanValue(); inOrder = parser.booleanValue();
} else if (parseContext.getParseFieldMatcher().match(currentFieldName, COLLECT_PAYLOADS_FIELD)) {
// Deprecated in 3.0.0
} else if (parseContext.getParseFieldMatcher().match(currentFieldName, SLOP_FIELD)) { } else if (parseContext.getParseFieldMatcher().match(currentFieldName, SLOP_FIELD)) {
slop = parser.intValue(); slop = parser.intValue();
} else if (parseContext.getParseFieldMatcher().match(currentFieldName, AbstractQueryBuilder.BOOST_FIELD)) { } else if (parseContext.getParseFieldMatcher().match(currentFieldName, AbstractQueryBuilder.BOOST_FIELD)) {

View File

@ -24,11 +24,13 @@ import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanQuery;
import org.elasticsearch.Version; import org.elasticsearch.Version;
import org.elasticsearch.common.ParseFieldMatcher; import org.elasticsearch.common.ParseFieldMatcher;
import org.elasticsearch.common.ParsingException;
import org.elasticsearch.test.AbstractQueryTestCase; import org.elasticsearch.test.AbstractQueryTestCase;
import java.io.IOException; import java.io.IOException;
import java.util.Iterator; import java.util.Iterator;
import static org.hamcrest.CoreMatchers.containsString;
import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.CoreMatchers.instanceOf; import static org.hamcrest.CoreMatchers.instanceOf;
@ -112,8 +114,7 @@ public class SpanNearQueryBuilderTests extends AbstractQueryTestCase<SpanNearQue
assertEquals(json, false, parsed.inOrder()); assertEquals(json, false, parsed.inOrder());
} }
public void testCollectPayloadsDeprecated() throws Exception { public void testCollectPayloadsNoLongerSupported() throws Exception {
assertEquals("We can remove support for ignoring collect_payloads in 6.0.0", 5, Version.CURRENT.major);
String json = String json =
"{\n" + "{\n" +
" \"span_near\" : {\n" + " \"span_near\" : {\n" +
@ -146,6 +147,9 @@ public class SpanNearQueryBuilderTests extends AbstractQueryTestCase<SpanNearQue
" }\n" + " }\n" +
"}"; "}";
parseQuery(json, ParseFieldMatcher.EMPTY); // Just don't throw an error and we're fine final ParsingException e = expectThrows(
ParsingException.class,
() -> parseQuery(json, ParseFieldMatcher.EMPTY));
assertThat(e.getMessage(), containsString("[span_near] query does not support [collect_payloads]"));
} }
} }

View File

@ -8,15 +8,15 @@ your application from one version of Elasticsearch to another.
As a general rule: As a general rule:
* Migration between minor versions -- e.g. `5.x` to `5.y` -- can be * Migration between minor versions -- e.g. `6.x` to `6.y` -- can be
performed by <<rolling-upgrades,upgrading one node at a time>>. performed by <<rolling-upgrades,upgrading one node at a time>>.
* Migration between consecutive major versions -- e.g. `2.x` to `5.x` -- * Migration between consecutive major versions -- e.g. `5.x` to `6.x` --
requires a <<restart-upgrade,full cluster restart>>. requires a <<restart-upgrade,full cluster restart>>.
* Migration between non-consecutive major versions -- e.g. `1.x` to `5.x` -- * Migration between non-consecutive major versions -- e.g. `2.x` to `6.x` --
is not supported. is not supported.
See <<setup-upgrade>> for more info. See <<setup-upgrade>> for more info.
-- --
include::migrate_5_0.asciidoc[] include::migrate_6_0.asciidoc[]

View File

@ -1,129 +0,0 @@
[[breaking-changes-5.0]]
== Breaking changes in 5.0
This section discusses the changes that you need to be aware of when migrating
your application to Elasticsearch 5.0.
[IMPORTANT]
.Known networking bug in 5.0.0-alpha5
======================================================
There is a bug in the new Netty4 implementation in this release which affects any REST request with
a body that is sent in two requests, the first with an `Expect: 100-continue` header. This bug will
manifest with an exception similar to the following:
[source,txt]
----
[WARN ][http.netty4] [wtOV9Vb] caught exception while handling client http traffic, closing connection [id: 0x1320b717, L:/0:0:0:0:0:0:0:1:9200 - R:/0:0:0:0:0:0:0:1:54732]
java.lang.UnsupportedOperationException: unsupported message type: DefaultFullHttpResponse (expected: ByteBuf, FileRegion)
----
This is due to incorrect handling of the `Expect` HTTP header, and it can be
worked around in one of three ways:
* Use a client which does not add `Expect` headers (including the official clients).
* Pass a blank `Except` header, e.g.
+
[source,sh]
----
curl -H 'Expect:' ...
----
* Use Netty3 for the HTTP layer by passing the following setting at startup:
+
[source,sh]
----
./bin/elasticsearch -Ehttp.type=netty3
----
======================================================
[float]
[[migration-plugin]]
=== Migration Plugin
The https://github.com/elastic/elasticsearch-migration/blob/2.x/README.asciidoc[`elasticsearch-migration` plugin]
(compatible with Elasticsearch 2.3.0 and above) will help you to find issues
that need to be addressed when upgrading to Elasticsearch 5.0.
[float]
=== Indices created before 5.0
Elasticsearch 5.0 can read indices created in version 2.0 or above. An
Elasticsearch 5.0 node will not start in the presence of indices created in a
version of Elasticsearch before 2.0.
[IMPORTANT]
.Reindex indices from Elasticseach 1.x or before
=========================================
Indices created in Elasticsearch 1.x or before will need to be reindexed with
Elasticsearch 2.x in order to be readable by Elasticsearch 5.x. It is not
sufficient to use the <<indices-upgrade,`upgrade`>> API. The easiest
way to reindex old indices is to upgrade to Elasticsearch 2.3 or later and to use the
`reindex` API, or the reindex UI provided by the <<migration-plugin,Migration Plugin>>.
=========================================
The first time Elasticsearch 5.0 starts, it will automatically rename index
folders to use the index UUID instead of the index name. If you are using
<<indices-shadow-replicas,shadow replicas>> with shared data folders, first
start a single node with access to all data folders, and let it rename all
index folders before starting other nodes in the cluster.
[float]
=== Also see:
* <<breaking_50_search_changes>>
* <<breaking_50_mapping_changes>>
* <<breaking_50_percolator>>
* <<breaking_50_suggester>>
* <<breaking_50_index_apis>>
* <<breaking_50_document_api_changes>>
* <<breaking_50_settings_changes>>
* <<breaking_50_allocation>>
* <<breaking_50_http_changes>>
* <<breaking_50_rest_api_changes>>
* <<breaking_50_cat_api>>
* <<breaking_50_java_api_changes>>
* <<breaking_50_packaging>>
* <<breaking_50_plugins>>
* <<breaking_50_fs>>
* <<breaking_50_aggregations_changes>>
* <<breaking_50_scripting>>
include::migrate_5_0/search.asciidoc[]
include::migrate_5_0/mapping.asciidoc[]
include::migrate_5_0/percolator.asciidoc[]
include::migrate_5_0/suggest.asciidoc[]
include::migrate_5_0/index-apis.asciidoc[]
include::migrate_5_0/docs.asciidoc[]
include::migrate_5_0/settings.asciidoc[]
include::migrate_5_0/allocation.asciidoc[]
include::migrate_5_0/http.asciidoc[]
include::migrate_5_0/rest.asciidoc[]
include::migrate_5_0/cat.asciidoc[]
include::migrate_5_0/java.asciidoc[]
include::migrate_5_0/packaging.asciidoc[]
include::migrate_5_0/plugins.asciidoc[]
include::migrate_5_0/fs.asciidoc[]
include::migrate_5_0/aggregations.asciidoc[]
include::migrate_5_0/scripting.asciidoc[]

View File

@ -1,33 +0,0 @@
[[breaking_50_aggregations_changes]]
=== Aggregation changes
==== Significant terms on numeric fields
Numeric fields have been refactored to use a different data structure that
performs better for range queries. However, since this data structure does
not record document frequencies, numeric fields need to fall back to running
queries in order to estimate the number of matching documents in the
background set, which may incur a performance degradation.
It is recommended to use <<keyword,`keyword`>> fields instead, either directly
or through a <<multi-fields,multi-field>> if the numeric representation is
still needed for sorting, range queries or numeric aggregations like
<<search-aggregations-metrics-stats-aggregation,`stats` aggregations>>.
==== `ip_range` aggregations
Now that Elasticsearch supports `ipv6`, `ip` addresses are encoded in the index
using a binary representation rather than a numeric representation. As a
consequence, the output of `ip_range` aggregations does not give numeric values
for `from` and `to` anymore.
==== `size: 0` on Terms, Significant Terms and Geohash Grid Aggregations
`size: 0` is no longer valid for the terms, significant terms and geohash grid
aggregations. Instead a size should be explicitly specified with a number greater
than zero.
==== Fractional time values
Fractional time values (e.g., 0.5s) are no longer supported. For example, this means when setting
date histogram intervals "1.5h" will be rejected and should instead be input as "90m".

View File

@ -1,59 +0,0 @@
[[breaking_50_allocation]]
=== Allocation changes
==== Primary shard allocation
Previously, primary shards were only assigned if a quorum of shard copies were
found (configurable using `index.recovery.initial_shards`, now deprecated). In
case where a primary had only a single replica, quorum was defined to be a
single shard. This meant that any shard copy of an index with replication
factor 1 could become primary, even it was a stale copy of the data on disk.
This is now fixed thanks to shard allocation IDs.
Allocation IDs assign unique identifiers to shard copies. This allows the
cluster to differentiate between multiple copies of the same data and track
which shards have been active so that, after a cluster restart, only shard
copies containing the most recent data can become primaries.
==== Indices Shard Stores command
By using allocation IDs instead of version numbers to identify shard copies
for primary shard allocation, the former versioning scheme has become
obsolete. This is reflected in the
<<indices-shards-stores,Indices Shard Stores API>>.
A new `allocation_id` field replaces the former `version` field in the result
of the Indices Shard Stores command. This field is available for all shard
copies that have been either created with the current version of Elasticsearch
or have been active in a cluster running a current version of Elasticsearch.
For legacy shard copies that have not been active in a current version of
Elasticsearch, a `legacy_version` field is available instead (equivalent to
the former `version` field).
==== Reroute commands
The reroute command `allocate` has been split into two distinct commands
`allocate_replica` and `allocate_empty_primary`. This was done as we
introduced a new `allocate_stale_primary` command. The new `allocate_replica`
command corresponds to the old `allocate` command with `allow_primary` set to
false. The new `allocate_empty_primary` command corresponds to the old
`allocate` command with `allow_primary` set to true.
==== Custom Reroute Commands
Elasticsearch no longer supports plugins registering custom allocation
commands. It was unused and hopefully unneeded.
==== `index.shared_filesystem.recover_on_any_node` changes
The behavior of `index.shared_filesystem.recover_on_any_node: true` has been
changed. Previously, in the case where no shard copies could be found, an
arbitrary node was chosen by potentially ignoring allocation deciders. Now, we
take balancing into account but don't assign the shard if the allocation
deciders are not satisfied.
The behavior has also changed in the case where shard copies can be found.
Previously, a node not holding the shard copy was chosen if none of the nodes
holding shard copies were satisfying the allocation deciders. Now, the shard
will be assigned to a node having a shard copy, even if none of the nodes
holding a shard copy satisfy the allocation deciders.

View File

@ -1,47 +0,0 @@
[[breaking_50_cat_api]]
=== CAT API changes
==== Use Accept header for specifying response media type
Previous versions of Elasticsearch accepted the Content-type header
field for controlling the media type of the response in the cat API.
This is in opposition to the HTTP spec which specifies the Accept
header field for this purpose. Elasticsearch now uses the Accept header
field and support for using the Content-Type header field for this
purpose has been removed.
==== Host field removed from the cat nodes API
The `host` field has been removed from the cat nodes API as its value
is always equal to the `ip` field. The `name` field is available in the
cat nodes API and should be used instead of the `host` field.
==== Changes to cat recovery API
The fields `bytes_recovered` and `files_recovered` have been added to
the cat recovery API. These fields, respectively, indicate the total
number of bytes and files that have been recovered.
The fields `total_files` and `total_bytes` have been renamed to
`files_total` and `bytes_total`, respectively.
Additionally, the field `translog` has been renamed to
`translog_ops_recovered`, the field `translog_total` to
`translog_ops` and the field `translog_percent` to
`translog_ops_percent`. The short aliases for these fields are `tor`,
`to`, and `top`, respectively.
==== Changes to cat nodes API
The cat nodes endpoint returns `m` for master eligible, `d` for data,
and `i` for ingest. A node with no explicit roles will be a coordinating
only node and marked with `-`. A node can have multiple roles. The
master column has been adapted to return only whether a node is the
current master (`*`) or not (`-`).
==== Changes to cat field data API
The cat field data endpoint adds a row per field instead of a column per field.
The `total` field has been removed from the field data API. Total field data usage per node
can be got by cat nodes API.

View File

@ -1,71 +0,0 @@
[[breaking_50_document_api_changes]]
=== Document API changes
==== `?refresh` no longer supports truthy and falsy values
The `?refresh` request parameter used to accept any value other than `false`,
`0`, `off`, and `no` to mean "make the changes from this request visible for
search immediately." Now it only accepts `?refresh` and `?refresh=true` to
mean that. You can set it to `?refresh=false` and the request will take no
refresh-related action. The same is true if you leave `refresh` off of the
url entirely. If you add `?refresh=wait_for` Elasticsearch will wait for the
changes to become visible before replying to the request but won't take any
immediate refresh related action. See <<docs-refresh>>.
==== `created` field deprecated in the Index API
The `created` field has been deprecated in the Index API. It now returns
`operation`, returning `"operation": "create"` when it created a document and
`"operation": "index"` when it updated the document. This is also true for
`index` bulk operations.
==== `found` field deprecated in the Delete API
The `found` field has been deprecated in the Delete API. It now returns
`operation`, returning `"operation": "deleted"` when it deleted a document and
`"operation": "noop"` when it didn't found the document. This is also true for
`index` bulk operations.
==== Reindex and Update By Query
Before 5.0.0 `_reindex` and `_update_by_query` only retried bulk failures so
they used the following response format:
[source,js]
----------------------
{
...
"retries": 10
...
}
----------------------
Where `retries` counts the number of bulk retries. Now they retry on search
failures as well and use this response format:
[source,js]
----------------------
{
...
"retries": {
"bulk": 10,
"search": 1
}
...
}
----------------------
Where `bulk` counts the number of bulk retries and `search` counts the number
of search retries.
==== get API
As of 5.0.0 the get API will issue a refresh if the requested document has
been changed since the last refresh but the change hasn't been refreshed yet. This
will also make all other changes visible immediately. This can have an impact on
performance if the same document is updated very frequently using a read modify update
pattern since it might create many small segments. This behavior can be disabled by
passing `realtime=false` to the get request.
==== version type 'force' removed
In 5.0.0 document modification operations may no longer specify the
`version_type` of `force` to override any previous version checks.

View File

@ -1,31 +0,0 @@
[[breaking_50_fs]]
=== Filesystem related changes
Only a subset of index files were open with `mmap` on Elasticsearch 2.x. As of
Elasticsearch 5.0, all index files will be open with `mmap` on 64-bit systems.
While this may increase the amount of virtual memory used by Elasticsearch,
there is nothing to worry about since this is only address space consumption
and the actual memory usage of Elasticsearch will stay similar to what it was
in 2.x. See http://blog.thetaphi.de/2012/07/use-lucenes-mmapdirectory-on-64bit.html
for more information.
=== Path to data on disk
In prior versions of Elasticsearch, the `path.data` directory included a folder
for the cluster name, so that data was in a folder such as
`$DATA_DIR/$CLUSTER_NAME/nodes/$nodeOrdinal`. In 5.0 the cluster name as a
directory is deprecated. Data will now be stored in
`$DATA_DIR/nodes/$nodeOrdinal` if there is no existing data. Upon startup,
Elasticsearch will check to see if the cluster folder exists and has data, and
will read from it if necessary. In Elasticsearch 6.0 this backwards-compatible
behavior will be removed.
If you are using a multi-cluster setup with both instances of Elasticsearch
pointing to the same data path, you will need to add the cluster name to the
data path so that different clusters do not overwrite data.
==== Local files
Prior to 5.0, nodes that were marked with both `node.data: false` and `node.master: false` (or the now removed `node.client: true`)
didn't write any files or folder to disk. 5.x added persistent node ids, requiring nodes to store that information. As such, all
node types will write a small state file to their data folders.

View File

@ -1,9 +0,0 @@
[[breaking_50_http_changes]]
=== HTTP changes
==== Compressed HTTP requests are always accepted
Before 5.0, Elasticsearch accepted compressed HTTP requests only if the setting
`http.compressed` was set to `true`. Elasticsearch accepts compressed requests
now but will continue to send compressed responses only if `http.compressed`
is set to `true`.

View File

@ -1,71 +0,0 @@
[[breaking_50_index_apis]]
=== Index APIs changes
==== Closing / deleting indices while running snapshot
In previous versions of Elasticsearch, closing or deleting an index during a
full snapshot would make the snapshot fail. In 5.0, the close/delete index
request will fail instead. The behavior for partial snapshots remains
unchanged: Closing or deleting an index during a partial snapshot is still
possible. The snapshot result is then marked as partial.
==== Warmers
Thanks to several changes like doc values by default and disk-based norms,
warmers are no longer useful. As a consequence, warmers and the warmer API
have been removed: it is no longer possible to register queries that will run
before a new IndexSearcher is published.
Don't worry if you have warmers defined on your indices, they will simply be
ignored when upgrading to 5.0.
==== System CPU stats
The recent CPU usage (as a percent) has been added to the OS stats
reported under the node stats API and the cat nodes API. The breaking
change here is that there is a new object in the `os` object in the node
stats response. This object is called `cpu` and includes `percent` and
`load_average` as fields. This moves the `load_average` field that was
previously a top-level field in the `os` object to the `cpu` object. The
format of the `load_average` field has changed to an object with fields
`1m`, `5m`, and `15m` representing the one-minute, five-minute and
fifteen-minute loads respectively. If any of these fields are not present,
it indicates that the corresponding value is not available.
In the cat nodes API response, the `cpu` field is output by default. The
previous `load` field has been removed and is replaced by `load_1m`,
`load_5m`, and `load_15m` which represent the one-minute, five-minute
and fifteen-minute loads respectively. The field will be null if the
corresponding value is not available.
Finally, the API for `org.elasticsearch.monitor.os.OsStats` has
changed. The `getLoadAverage` method has been removed. The value for
this can now be obtained from `OsStats.Cpu#getLoadAverage` but it is no
longer a double and is instead an object encapsulating the one-minute,
five-minute and fifteen-minute load averages. Additionally, the recent
CPU usage can be obtained from `OsStats.Cpu#getPercent`.
==== Suggest stats
Suggest stats exposed through `suggest` in indices stats has been merged
with `search` stats. `suggest` stats is exposed as part of `search` stats.
==== Creating indices starting with '-' or '+'
Elasticsearch no longer allows indices to be created started with '-' or '+', so
that the multi-index matching and expansion is not confused. It was previously
possible (but a really bad idea) to create indices starting with a hyphen or
plus sign. Any index already existing with these preceding characters will
continue to work normally.
==== Aliases API
The `/_aliases` API no longer supports `indexRouting` and `index-routing`, only
`index_routing`. It also no longer support `searchRouting` and `search-routing`,
only `search_routing`. These were removed because they were untested and we
prefer there to be only one (obvious) way to do things like this.
==== OpType Create without an ID
As of 5.0 indexing a document with `op_type=create` without specifying an ID is not
supported anymore.

View File

@ -1,417 +0,0 @@
[[breaking_50_java_api_changes]]
=== Java API changes
==== Transport client has been moved
The Java transport client has been moved to its own module which can be referenced using:
[source,xml]
-----
<dependency>
<groupId>org.elasticsearch.client</groupId>
<artifactId>transport</artifactId>
<version>5.0.0-alpha5</version>
</dependency>
-----
The transport client is now created using the following snippet:
[source,java]
-----
TransportClient client = new PreBuiltTransportClient(Settings.EMPTY)
.addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName("host1"), 9300))
.addTransportAddress(new InetSocketTransportAddress(InetAddress.getByName("host2"), 9300));
-----
For more information please see the {javaclient}java-api.html[Java client documentation]
==== Count api has been removed
The deprecated count api has been removed from the Java api, use the search api instead and set size to 0.
The following call
[source,java]
-----
client.prepareCount(indices).setQuery(query).get();
-----
can be replaced with
[source,java]
-----
client.prepareSearch(indices).setSource(new SearchSourceBuilder().size(0).query(query)).get();
-----
==== Suggest api has been removed
The suggest api has been removed from the Java api, use the suggest option in search api, it has been optimized
for suggest-only request.
The following call
[source,java]
-----
client.prepareSuggest(indices).addSuggestion("foo", SuggestBuilders.completionSuggestion("field").text("s")).get();
-----
can be replaced with
[source,java]
-----
client.prepareSearch(indices).suggest(new SuggestBuilder().addSuggestion("foo", SuggestBuilders.completionSuggestion("field").text("s"))).get();
-----
==== Elasticsearch will no longer detect logging implementations
Elasticsearch now logs using Log4j 2. Previously if Log4j wasn't on the
classpath it made some effort to degrade to SLF4J or Java logging. Now it will
fail to work without the Log4j 2 API. The log4j-over-slf4j bridge ought to work
when using the Java client. The log4j-1.2-api bridge is used for third-party
dependencies that still use the Log4j 1 API. The Elasticsearch server now only
supports Log4j 2 as configured by `log4j2.properties` and will fail if Log4j
isn't present.
==== Groovy dependencies
In previous versions of Elasticsearch, the Groovy scripting capabilities
depended on the `org.codehaus.groovy:groovy-all` artifact. In addition
to pulling in the Groovy language, this pulls in a very large set of
functionality, none of which is needed for scripting within
Elasticsearch. Aside from the inherent difficulties in managing such a
large set of dependencies, this also increases the surface area for
security issues. This dependency has been reduced to the core Groovy
language `org.codehaus.groovy:groovy` artifact.
==== DocumentAlreadyExistsException removed
`DocumentAlreadyExistsException` is removed and a `VersionConflictException` is thrown instead (with a better
error description). This will influence code that use the `IndexRequest.opType()` or `IndexRequest.create()`
to index a document only if it doesn't already exist.
==== writeConsistencyLevel removed on write requests
In previous versions of Elasticsearch, the various write requests had a
`setWriteConsistencyLevel` method to set the shard consistency level for
write operations. However, the semantics of write consistency were ambiguous
as this is just a pre-operation check to ensure the specified number of
shards were available before the operation commenced. The write consistency
level did not guarantee that the data would be replicated to those number
of copies by the time the operation finished. The `setWriteConsistencyLevel`
method on these write requests has been changed to `setWaitForActiveShards`,
which can take a numerical value up to the total number of shard copies or
`ActiveShardCount.ALL` for all shard copies. The default is to just wait
for the primary shard to be active before proceeding with the operation.
See the section on <<index-wait-for-active-shards,wait for active shards>>
for more details.
This change affects `IndexRequest`, `IndexRequestBuilder`, `BulkRequest`,
`BulkRequestBuilder`, `UpdateRequest`, `UpdateRequestBuilder`, `DeleteRequest`,
and `DeleteRequestBuilder`.
==== Changes to Query Builders
===== BoostingQueryBuilder
Removed setters for mandatory positive/negative query. Both arguments now have
to be supplied at construction time already and have to be non-null.
===== SpanContainingQueryBuilder
Removed setters for mandatory big/little inner span queries. Both arguments now have
to be supplied at construction time already and have to be non-null. Updated
static factory methods in QueryBuilders accordingly.
===== SpanOrQueryBuilder
Making sure that query contains at least one clause by making initial clause mandatory
in constructor.
Renaming method to add clauses from `clause(SpanQueryBuilder)` to `addClause(SpanQueryBuilder)`.
===== SpanNearQueryBuilder
Removed setter for mandatory slop parameter, needs to be set in constructor now. Also
making sure that query contains at least one clause by making initial clause mandatory
in constructor. Updated the static factory methods in QueryBuilders accordingly.
Renaming method to add clauses from `clause(SpanQueryBuilder)` to `addClause(SpanQueryBuilder)`.
===== SpanNotQueryBuilder
Removed setter for mandatory include/exclude span query clause, needs to be set in constructor now.
Updated the static factory methods in QueryBuilders and tests accordingly.
===== SpanWithinQueryBuilder
Removed setters for mandatory big/little inner span queries. Both arguments now have
to be supplied at construction time already and have to be non-null. Updated
static factory methods in QueryBuilders accordingly.
===== WrapperQueryBuilder
Removed `wrapperQueryBuilder(byte[] source, int offset, int length)`. Instead simply
use `wrapperQueryBuilder(byte[] source)`. Updated the static factory methods in
QueryBuilders accordingly.
===== QueryStringQueryBuilder
Removed ability to pass in boost value using `field(String field)` method in form e.g. `field^2`.
Use the `field(String, float)` method instead.
===== Operator
Removed the enums called `Operator` from `MatchQueryBuilder`, `QueryStringQueryBuilder`,
`SimpleQueryStringBuilder`, and `CommonTermsQueryBuilder` in favour of using the enum
defined in `org.elasticsearch.index.query.Operator` in an effort to consolidate the
codebase and avoid duplication.
===== queryName and boost support
Support for `queryName` and `boost` has been streamlined to all of the queries. That is
a breaking change till queries get sent over the network as serialized json rather
than in `Streamable` format. In fact whenever additional fields are added to the json
representation of the query, older nodes might throw error when they find unknown fields.
===== InnerHitsBuilder
InnerHitsBuilder now has a dedicated addParentChildInnerHits and addNestedInnerHits methods
to differentiate between inner hits for nested vs. parent / child documents. This change
makes the type / path parameter mandatory.
===== MatchQueryBuilder
Moving MatchQueryBuilder.Type and MatchQueryBuilder.ZeroTermsQuery enum to MatchQuery.Type.
Also reusing new Operator enum.
===== MoreLikeThisQueryBuilder
Removed `MoreLikeThisQueryBuilder.Item#id(String id)`, `Item#doc(BytesReference doc)`,
`Item#doc(XContentBuilder doc)`. Use provided constructors instead.
Removed `MoreLikeThisQueryBuilder#addLike` in favor of texts and/or items being provided
at construction time. Using arrays there instead of lists now.
Removed `MoreLikeThisQueryBuilder#addUnlike` in favor to using the `unlike` methods
which take arrays as arguments now rather than the lists used before.
The deprecated `docs(Item... docs)`, `ignoreLike(Item... docs)`,
`ignoreLike(String... likeText)`, `addItem(Item... likeItems)` have been removed.
===== GeoDistanceQueryBuilder
Removing individual setters for lon() and lat() values, both values should be set together
using point(lon, lat).
===== GeoDistanceRangeQueryBuilder
Removing setters for to(Object ...) and from(Object ...) in favour of the only two allowed input
arguments (String, Number). Removing setter for center point (point(), geohash()) because parameter
is mandatory and should already be set in constructor.
Also removing setters for lt(), lte(), gt(), gte() since they can all be replaced by equivalent
calls to to/from() and inludeLower()/includeUpper().
===== GeoPolygonQueryBuilder
Require shell of polygon already to be specified in constructor instead of adding it pointwise.
This enables validation, but makes it necessary to remove the addPoint() methods.
===== MultiMatchQueryBuilder
Moving MultiMatchQueryBuilder.ZeroTermsQuery enum to MatchQuery.ZeroTermsQuery.
Also reusing new Operator enum.
Removed ability to pass in boost value using `field(String field)` method in form e.g. `field^2`.
Use the `field(String, float)` method instead.
===== MissingQueryBuilder
The MissingQueryBuilder which was deprecated in 2.2.0 is removed. As a replacement use ExistsQueryBuilder
inside a mustNot() clause. So instead of using `new ExistsQueryBuilder(name)` now use
`new BoolQueryBuilder().mustNot(new ExistsQueryBuilder(name))`.
===== NotQueryBuilder
The NotQueryBuilder which was deprecated in 2.1.0 is removed. As a replacement use BoolQueryBuilder
with added mustNot() clause. So instead of using `new NotQueryBuilder(filter)` now use
`new BoolQueryBuilder().mustNot(filter)`.
===== TermsQueryBuilder
Remove the setter for `termsLookup()`, making it only possible to either use a TermsLookup object or
individual values at construction time. Also moving individual settings for the TermsLookup (lookupIndex,
lookupType, lookupId, lookupPath) to the separate TermsLookup class, using constructor only and moving
checks for validation there. Removed `TermsLookupQueryBuilder` in favour of `TermsQueryBuilder`.
===== FunctionScoreQueryBuilder
`add` methods have been removed, all filters and functions must be provided as constructor arguments by
creating an array of `FunctionScoreQueryBuilder.FilterFunctionBuilder` objects, containing one element
for each filter/function pair.
`scoreMode` and `boostMode` can only be provided using corresponding enum members instead
of string values: see `FilterFunctionScoreQuery.ScoreMode` and `CombineFunction`.
`CombineFunction.MULT` has been renamed to `MULTIPLY`.
===== IdsQueryBuilder
For simplicity, only one way of adding the ids to the existing list (empty by default) is left: `addIds(String...)`
===== ShapeBuilders
`InternalLineStringBuilder` is removed in favour of `LineStringBuilder`, `InternalPolygonBuilder` in favour of PolygonBuilder` and `Ring` has been replaced with `LineStringBuilder`. Also the abstract base classes `BaseLineStringBuilder` and `BasePolygonBuilder` haven been merged with their corresponding implementations.
===== RescoreBuilder
`RecoreBuilder.Rescorer` was merged with `RescoreBuilder`, which now is an abstract superclass. QueryRescoreBuilder currently is its only implementation.
===== PhraseSuggestionBuilder
The inner DirectCandidateGenerator class has been moved out to its own class called DirectCandidateGeneratorBuilder.
===== SortBuilders
The `sortMode` setter in `FieldSortBuilder`, `GeoDistanceSortBuilder` and `ScriptSortBuilder` now
accept a `SortMode` enum instead of a String constant. Also the getter returns the same enum type.
===== SuggestBuilder
The `setText` method has been changed to `setGlobalText` to make the intent more clear, and a `getGlobalText` method has been added.
The `addSuggestion` method now required the user specified suggestion name, previously used in the ctor of each suggestion.
===== SuggestionBuilder
The `field` setter has been deleted. Instead the field name needs to be specified as constructor argument.
==== SearchSourceBuilder
All methods which take an `XContentBuilder`, `BytesReference` `Map<String, Object>` or `bytes[]` have been removed in favor of providing the
relevant builder object for that feature (e.g. `HighlightBuilder`, `AggregationBuilder`, `SuggestBuilder`) . This means that all search requests
can now be validated at call time which results in much clearer errors.
The `defaultResourceWindowSize(int)` method has been removed. The window size should be set explicitly on all `RescoreBuilder` objects.
==== SearchRequestBuilder
All methods which take an `XContentBuilder`, `BytesReference` `Map<String, Object>` or `bytes[]` have been removed in favor of providing the
relevant builder object for that feature (e.g. `HighlightBuilder`, `AggregationBuilder`, `SuggestBuilder`) . This means that all search requests
can now be validated at call time which results in much clearer errors.
All highlighter methods have been removed in favor of a single `highlighter(HighlightBuilder)` method.
The `setExtraSource(SearchSourceBuilder)` method has been removed.
The `setTemplateSource(String)` and `setTemplateSource(BytesReference)` methods have been removed. Use `setTemplate(Template)` instead.
`setRescorer(Rescorer)` and `setRescorer(Rescorer, int)` have been removed infavor of `setRescorer(RescoreBuilder)` and `setRescorer(RescoreBuilder, int)`
==== SearchRequest
All `source` methods have been removed in favor of a single `source(SearchSourceBuilder)` method. This means that all search requests can now be validated
at call time which results in much clearer errors.
All `extraSource` methods have been removed.
All `template` methods have been removed in favor of a new Search Template API. A new `SearchTemplateRequest` now accepts a template and
a `SearchRequest` and must be executed using the new `SearchTemplateAction` action.
==== SearchResponse
Sort values for `string` fields are now return as `java.lang.String` objects rather than `org.elasticsearch.common.text.Text`.
==== AggregationBuilder
All methods which take an `XContentBuilder`, `BytesReference` `Map<String, Object>` or `bytes[]` have been removed in favor of providing the
relevant builder object (i.e. `subAggregation(AggregationBuilder)` or `subAggregation(PipelineAggregationBuilder)`). This means that all
requests can now be validated at call time which results in much clearer errors.
==== ValidateQueryRequest
`source(QuerySourceBuilder)`, `source(Map)`, `source(XContentBuilder)`, `source(String)`, `source(byte[])`, `source(byte[], int, int)`,
`source(BytesReference)` and `source()` have been removed in favor of using `query(QueryBuilder)` and `query()`
==== ValidateQueryRequestBuilder
`setSource()` methods have been removed in favor of using `setQuery(QueryBuilder)`
==== ExplainRequest
`source(QuerySourceBuilder)`, `source(Map)`, `source(BytesReference)` and `source()` have been removed in favor of using
`query(QueryBuilder)` and `query()`
==== ExplainRequestBuilder
The `setQuery(BytesReference)` method have been removed in favor of using `setQuery(QueryBuilder)`
==== ClusterStatsResponse
Removed the `getMemoryAvailable` method from `OsStats`, which could be previously accessed calling
`clusterStatsResponse.getNodesStats().getOs().getMemoryAvailable()`. It is now replaced with
`clusterStatsResponse.getNodesStats().getOs().getMem()` which exposes `getTotal()`, `getFree()`,
`getUsed()`, `getFreePercent()` and `getUsedPercent()`.
==== setRefresh(boolean) has been removed
`setRefresh(boolean)` has been removed in favor of `setRefreshPolicy(RefreshPolicy)` because there
are now three options (NONE, IMMEDIATE, and WAIT_FOR). `setRefresh(IMMEDIATE)` has the same behavior
as `setRefresh(true)` used to have. See `setRefreshPolicy`'s javadoc for more.
==== Remove properties support
Some Java APIs (e.g., `IndicesAdminClient#setSettings`) would support Java properties syntax
(line-delimited key=value pairs). This support has been removed.
==== Render Search Template Java API has been removed
The Render Search Template Java API including `RenderSearchTemplateAction`, `RenderSearchTemplateRequest` and
`RenderSearchTemplateResponse` has been removed in favor of a new `simulate` option in the Search Template Java API.
This Search Template API is now included in the `lang-mustache` module and the `simulate` flag must be set on the
`SearchTemplateRequest` object.
==== AnalyzeRequest
The `tokenFilters(String...)` and `charFilters(String...)` methods have been removed
in favor of using `addTokenFilter(String)`/`addTokenFilter(Map)` and `addCharFilter(String)`/`addCharFilter(Map)` each filters
==== AnalyzeRequestBuilder
The `setTokenFilters(String...)` and `setCharFilters(String...)` methods have been removed
in favor of using `addTokenFilter(String)`/`addTokenFilter(Map)` and `addCharFilter(String)`/`addCharFilter(Map)` each filters
==== ClusterHealthRequest
The `waitForRelocatingShards(int)` method has been removed in favor of `waitForNoRelocatingShards(boolean)`
which instead uses a boolean flag to denote whether the cluster health operation should wait for there to
be no relocating shards in the cluster before returning.
==== ClusterHealthRequestBuilder
The `setWaitForRelocatingShards(int)` method has been removed in favor of `setWaitForNoRelocatingShards(boolean)`
which instead uses a boolean flag to denote whether the cluster health operation should wait for there to
be no relocating shards in the cluster before returning.
==== BlobContainer Interface for Snapshot/Restore
Some methods have been removed from the `BlobContainer` interface for Snapshot/Restore repositories. In particular,
the following three methods have been removed:
1. `deleteBlobs(Collection<String>)` (use `deleteBlob(String)` instead)
2. `deleteBlobsByPrefix(String)` (use `deleteBlob(String)` instead)
3. `writeBlob(String, BytesReference)` (use `writeBlob(String, InputStream, long)` instead)
The `deleteBlob` methods that took multiple blobs as arguments were deleted because no atomic guarantees can be made about either deleting all blobs or deleting none of them, and exception handling in such a situation is ambiguous and best left to the caller. Hence, all delete blob calls use the singular `deleteBlob(String)` method.
The extra `writeBlob` method offered no real advantage to the interface and all calls to `writeBlob(blobName, bytesRef)` can be replaced with:
[source,java]
-----
try (InputStream stream = bytesRef.streamInput()) {
blobContainer.writeBlob(blobName, stream, bytesRef.length());
}
-----
For any custom implementation of the `BlobContainer` interface, these three methods must be removed.

View File

@ -1,268 +0,0 @@
[[breaking_50_mapping_changes]]
=== Mapping changes
==== `string` fields replaced by `text`/`keyword` fields
The `string` field datatype has been replaced by the `text` field for full
text analyzed content, and the `keyword` field for not-analyzed exact string
values. For backwards compatibility purposes, during the 5.x series:
* `string` fields on pre-5.0 indices will function as before.
* New `string` fields can be added to pre-5.0 indices as before.
* `text` and `keyword` fields can also be added to pre-5.0 indices.
* When adding a `string` field to a new index, the field mapping will be
rewritten as a `text` or `keyword` field if possible, otherwise
an exception will be thrown. Certain configurations that were possible
with `string` fields are no longer possible with `text`/`keyword` fields
such as enabling `term_vectors` on a not-analyzed `keyword` field.
==== Default string mappings
String mappings now have the following default mappings:
[source,js]
---------------
{
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
}
}
}
---------------
This allows to perform full-text search on the original field name and to sort
and run aggregations on the sub keyword field.
==== Numeric fields
Numeric fields are now indexed with a completely different data-structure, called
BKD tree, that is expected to require less disk space and be faster for range
queries than the previous way that numerics were indexed.
Term queries will return constant scores now, while they used to return higher
scores for rare terms due to the contribution of the document frequency, which
this new BKD structure does not record. If scoring is needed, then it is advised
to map the numeric fields as <<keyword,`keyword`s>> too.
Note that this <<keyword,`keyword`>> mapping do not need to replace the numeric
mapping. For instance if you need both sorting and scoring on your numeric field,
you could map it both as a number and a `keyword` using <<multi-fields>>:
[source,js]
--------------------------------------------------
PUT my_index
{
"mappings": {
"my_type": {
"properties": {
"my_number": {
"type": "long",
"fields": {
"keyword": {
"type": "keyword"
}
}
}
}
}
}
}
--------------------------------------------------
// CONSOLE
Also the `precision_step` parameter is now irrelevant and will be rejected on
indices that are created on or after 5.0.
==== `_timestamp` and `_ttl`
The `_timestamp` and `_ttl` fields were deprecated and are now removed. As a
replacement for `_timestamp`, you should populate a regular date field with the
current timestamp on application side. For `_ttl`, you should either use
time-based indices when applicable, or cron a delete-by-query with a range
query on a timestamp field
==== `index` property
On all field datatypes (except for the deprecated `string` field), the `index`
property now only accepts `true`/`false` instead of `not_analyzed`/`no`. The
`string` field still accepts `analyzed`/`not_analyzed`/`no`.
==== Doc values on unindexed fields
Previously, setting a field to `index:no` would also disable doc-values. Now,
doc-values are always enabled on numeric and boolean fields unless
`doc_values` is set to `false`.
==== Floating points use `float` instead of `double`
When dynamically mapping a field containing a floating point number, the field
now defaults to using `float` instead of `double`. The reasoning is that
floats should be more than enough for most cases but would decrease storage
requirements significantly.
==== `norms`
`norms` now take a boolean instead of an object. This boolean is the replacement
for `norms.enabled`. There is no replacement for `norms.loading` since eager
loading of norms is not useful anymore now that norms are disk-based.
==== `fielddata.format`
Setting `fielddata.format: doc_values` in the mappings used to implicitly
enable doc-values on a field. This no longer works: the only way to enable or
disable doc-values is by using the `doc_values` property of mappings.
==== `fielddata.filter.regex`
Regex filters are not supported anymore and will be dropped on upgrade.
==== Source-transform removed
The source `transform` feature has been removed. Instead, use an ingest pipeline
==== `_parent` field no longer indexed
The join between parent and child documents no longer relies on indexed fields
and therefore from 5.0.0 onwards the `_parent` field is no longer indexed. In
order to find documents that refer to a specific parent id, the new
`parent_id` query can be used. The GET response and hits inside the search
response still include the parent id under the `_parent` key.
==== Source `format` option
The `_source` mapping no longer supports the `format` option. It will still be
accepted for indices created before the upgrade to 5.0 for backwards
compatibility, but it will have no effect. Indices created on or after 5.0
will reject this option.
==== Object notation
Core types no longer support the object notation, which was used to provide
per document boosts as follows:
[source,js]
---------------
{
"value": "field_value",
"boost": 42
}
---------------
==== Boost accuracy for queries on `_all`
Per-field boosts on the `_all` are now compressed into a single byte instead
of the 4 bytes used previously. While this will make the index much more
space-efficient, it also means that index time boosts will be less accurately
encoded.
==== `_ttl` and `_timestamp` cannot be created
You can no longer create indexes with `_ttl` or `_timestamp` enabled. Indexes
with them enabled created before 5.0 will continue to work.
You should replace `_timestamp` in new indexes by adding a field to your source
either in the application producing the data or with an ingest pipline like
this one:
[source,js]
---------------
PUT _ingest/pipeline/timestamp
{
"description" : "Adds a timestamp field at the current time",
"processors" : [ {
"set" : {
"field": "timestamp",
"value": "{{_ingest.timestamp}}"
}
} ]
}
PUT newindex/type/1?pipeline=timestamp
{
"example": "data"
}
GET newindex/type/1
---------------
// CONSOLE
Which produces
[source,js]
---------------
{
"_source": {
"example": "data",
"timestamp": "2016-06-21T18:48:55.560+0000"
},
...
}
---------------
// TESTRESPONSE[s/\.\.\./"found": true, "_id": "1", "_index": "newindex", "_type": "type", "_version": 1/]
// TESTRESPONSE[s/"2016-06-21T18:48:55.560\+0000"/"$body._source.timestamp"/]
If you have an old index created with 2.x that has `_timestamp` enabled then
you can migrate it to a new index with the a `timestamp` field in the source
with reindex:
[source,js]
---------------
POST _reindex
{
"source": {
"index": "oldindex"
},
"dest": {
"index": "newindex"
},
"script": {
"lang": "painless",
"inline": "ctx._source.timestamp = ctx._timestamp; ctx._timestamp = null"
}
}
---------------
// CONSOLE
// TEST[s/^/PUT oldindex\n/]
You can replace `_ttl` with time based index names (preferred) or by adding a
cron job which runs a delete-by-query on a timestamp field in the source
document. If you had documents like this:
[source,js]
---------------
POST index/type/_bulk
{"index":{"_id":1}}
{"example": "data", "timestamp": "2016-06-21T18:48:55.560+0000" }
{"index":{"_id":2}}
{"example": "data", "timestamp": "2016-04-21T18:48:55.560+0000" }
---------------
// CONSOLE
Then you could delete all of the documents from before June 1st with:
[source,js]
---------------
POST index/type/_delete_by_query
{
"query": {
"range" : {
"timestamp" : {
"lt" : "2016-05-01"
}
}
}
}
---------------
// CONSOLE
// TEST[continued]
IMPORTANT: Keep in mind that deleting documents from an index is very expensive
compared to deleting whole indexes. That is why time based indexes are
recommended over this sort of thing and why `_ttl` was deprecated in the first
place.
==== Blank field names is not supported
Blank field names in mappings is not allowed after 5.0.

View File

@ -1,65 +0,0 @@
[[breaking_50_packaging]]
=== Packaging
==== Default logging using systemd (since Elasticsearch 2.2.0)
In previous versions of Elasticsearch, the default logging
configuration routed standard output to /dev/null and standard error to
the journal. However, there are often critical error messages at
startup that are logged to standard output rather than standard error
and these error messages would be lost to the nether. The default has
changed to now route standard output to the journal and standard error
to inherit this setting (these are the defaults for systemd). These
settings can be modified by editing the elasticsearch.service file.
==== Longer startup times
In Elasticsearch 5.0.0 the `-XX:+AlwaysPreTouch` flag has been added to the JVM
startup options. This option touches all memory pages used by the JVM heap
during initialization of the HotSpot VM to reduce the chance of having to commit
a memory page during GC time. This will increase the startup time of
Elasticsearch as well as increasing the initial resident memory usage of the
Java process.
==== JVM options
Arguments to the Java Virtual Machine have been centralized and moved
to a new configuration file jvm.options. This centralization allows for
simpler end-user management of JVM options.
This migration removes all previous mechanisms of setting JVM options
via the environment variables `ES_MIN_MEM`, `ES_MAX_MEM`,
`ES_HEAP_SIZE`, `ES_HEAP_NEWSIZE`, `ES_DIRECT_SIZE`, `ES_USE_IPV4`,
`ES_GC_OPTS`, `ES_GC_LOG_FILE`, and `JAVA_OPTS`.
The default location for this file is in config/jvm.options if installing
from the tar or zip distributions, and /etc/elasticsearch/jvm.options if installing
from the Debian or RPM packages. You can specify an alternative location by setting
the environment variable `ES_JVM_OPTIONS` to the path to the file.
==== /bin/bash is now required
Previously, the scripts used to start Elasticsearch and run plugin
commands only required a Bourne-compatible shell. Starting in
Elasticsearch 5.0.0, the bash shell is now required and `/bin/bash` is a
hard-dependency for the RPM and Debian packages.
==== Environmental Settings
Previously, Elasticsearch could be configured via environment variables
in two ways: first by using the placeholder syntax
`${env.ENV_VAR_NAME}` and the second by using the same syntax without
the `env` prefix: `${ENV_VAR_NAME}`. The first method has been removed
from Elasticsearch.
Additionally, it was previously possible to set any setting in
Elasticsearch via JVM system properties. This has been removed from
Elasticsearch.
==== Dying on fatal errors
Previous versions of Elasticsearch would not halt the JVM if out of memory errors or other fatal
errors were encountered during the life of the Elasticsearch instance. Because such errors leave
the JVM in a questionable state, the best course of action is to halt the JVM when this occurs.
Starting in Elasticsearch 5.x, this is now the case. Operators should consider configuring their
Elasticsearch services so that they respawn automatically in the case of such a fatal crash.

View File

@ -1,131 +0,0 @@
[[breaking_50_percolator]]
=== Percolator changes
==== Percolator is near-real time
Previously percolators were activated in real-time, i.e. as soon as they were
indexed. Now, changes to the `percolate` query are visible in near-real time,
as soon as the index has been refreshed. This change was required because, in
indices created from 5.0 onwards, the terms used in a percolator query are
automatically indexed to allow for more efficient query selection during
percolation.
==== Percolate and multi percolator APIs
Percolator and multi percolate APIs have been deprecated and will be removed in the next major release. These APIs have
been replaced by the `percolate` query that can be used in the search and multi search APIs.
==== Percolator field mapping
The `.percolator` type can no longer be used to index percolator queries.
Instead a <<percolator,percolator field type>> must be configured prior to indexing percolator queries.
Indices with a `.percolator` type created on a version before 5.0.0 can still be used,
but new indices no longer accept the `.percolator` type.
However it is strongly recommended to reindex any indices containing percolator queries created prior
upgrading to Elasticsearch 5. By doing this the `percolate` query utilize the extracted terms the `percolator`
field type extracted from the percolator queries and potentially execute many times faster.
==== Percolate document mapping
The `percolate` query no longer modifies the mappings. Before the percolate API
could be used to dynamically introduce new fields to the mappings based on the
fields in the document being percolated. This no longer works, because these
unmapped fields are not persisted in the mapping.
==== Percolator documents returned by search
Documents with the `.percolate` type were previously excluded from the search
response, unless the `.percolate` type was specified explicitly in the search
request. Now, percolator documents are treated in the same way as any other
document and are returned by search requests.
==== Percolating existing document
When percolating an existing document then also specifying a document as source in the
`percolate` query is not allowed any more. Before the percolate API allowed and ignored
the existing document.
==== Percolate Stats
The percolate stats have been removed. This is because the percolator no longer caches the percolator queries.
==== Percolator queries containing range queries with now ranges
The percolator no longer accepts percolator queries containing `range` queries with ranges that are based on current
time (using `now`).
==== Percolator queries containing scripts.
Percolator queries that contain scripts (For example: `script` query or a `function_score` query script function) that
have no explicit language specified will use the Painless scripting language from version 5.0 and up.
Scripts with no explicit language set in percolator queries stored in indices created prior to version 5.0
will use the language that has been configured in the `script.legacy.default_lang` setting. This setting defaults to
the Groovy scripting language, which was the default for versions prior to 5.0. If your default scripting language was
different then set the `script.legacy.default_lang` setting to the language you used before.
In order to make use of the new `percolator` field type all percolator queries should be reindexed into a new index.
When reindexing percolator queries with scripts that have no explicit language defined into a new index, one of the
following two things should be done in order to make the scripts work:
* (Recommended approach) While reindexing the percolator documents, migrate the scripts to the Painless scripting language.
* or add `lang` parameter on the script and set it the language these scripts were written in.
==== Java client
The percolator is no longer part of the core elasticsearch dependency. It has moved to the percolator module.
Therefor when using the percolator feature from the Java client the new percolator module should also be on the
classpath. Also the transport client should load the percolator module as plugin:
[source,java]
--------------------------------------------------
TransportClient transportClient = TransportClient.builder()
.settings(Settings.builder().put("node.name", "node"))
.addPlugin(PercolatorPlugin.class)
.build();
transportClient.addTransportAddress(
new InetSocketTransportAddress(new InetSocketAddress(InetAddresses.forString("127.0.0.1"), 9300))
);
--------------------------------------------------
The percolator and multi percolate related methods from the `Client` interface have been removed. These APIs have been
deprecated and it is recommended to use the `percolate` query in either the search or multi search APIs. However the
percolate and multi percolate APIs can still be used from the Java client.
Using percolate request:
[source,java]
--------------------------------------------------
PercolateRequest request = new PercolateRequest();
// set stuff and then execute:
PercolateResponse response = transportClient.execute(PercolateAction.INSTANCE, request).actionGet();
--------------------------------------------------
Using percolate request builder:
[source,java]
--------------------------------------------------
PercolateRequestBuilder builder = new PercolateRequestBuilder(transportClient, PercolateAction.INSTANCE);
// set stuff and then execute:
PercolateResponse response = builder.get();
--------------------------------------------------
Using multi percolate request:
[source,java]
--------------------------------------------------
MultiPercolateRequest request = new MultiPercolateRequest();
// set stuff and then execute:
MultiPercolateResponse response = transportClient.execute(MultiPercolateAction.INSTANCE, request).get();
--------------------------------------------------
Using multi percolate request builder:
[source,java]
--------------------------------------------------
MultiPercolateRequestBuilder builder = new MultiPercolateRequestBuilder(transportClient, MultiPercolateAction.INSTANCE);
// set stuff and then execute:
MultiPercolateResponse response = builder.get();
--------------------------------------------------

View File

@ -1,161 +0,0 @@
[[breaking_50_plugins]]
=== Plugin changes
The command `bin/plugin` has been renamed to `bin/elasticsearch-plugin`. The
structure of the plugin ZIP archive has changed. All the plugin files must be
contained in a top-level directory called `elasticsearch`. If you use the
gradle build, this structure is automatically generated.
==== Plugins isolation
`isolated` option has been removed. Each plugin will have its own classloader.
==== Site plugins removed
Site plugins have been removed. Site plugins should be reimplemented as Kibana
plugins.
==== Multicast plugin removed
Multicast has been removed. Use unicast discovery, or one of the cloud
discovery plugins.
==== Plugins with custom query implementations
Plugins implementing custom queries need to implement the `fromXContent(QueryParseContext)` method in their
`QueryParser` subclass rather than `parse`. This method will take care of parsing the query from `XContent` format
into an intermediate query representation that can be streamed between the nodes in binary format, effectively the
query object used in the java api. Also, the query parser needs to implement the `getBuilderPrototype` method that
returns a prototype of the `NamedWriteable` query, which allows to deserialize an incoming query by calling
`readFrom(StreamInput)` against it, which will create a new object, see usages of `Writeable`. The `QueryParser`
also needs to declare the generic type of the query that it supports and it's able to parse.
The query object can then transform itself into a lucene query through the new `toQuery(QueryShardContext)` method,
which returns a lucene query to be executed on the data node.
Similarly, plugins implementing custom score functions need to implement the `fromXContent(QueryParseContext)`
method in their `ScoreFunctionParser` subclass rather than `parse`. This method will take care of parsing
the function from `XContent` format into an intermediate function representation that can be streamed between
the nodes in binary format, effectively the function object used in the java api. Also, the query parser needs
to implement the `getBuilderPrototype` method that returns a prototype of the `NamedWriteable` function, which
allows to deserialize an incoming function by calling `readFrom(StreamInput)` against it, which will create a
new object, see usages of `Writeable`. The `ScoreFunctionParser` also needs to declare the generic type of the
function that it supports and it's able to parse. The function object can then transform itself into a lucene
function through the new `toFunction(QueryShardContext)` method, which returns a lucene function to be executed
on the data node.
==== Cloud AWS plugin changes
Cloud AWS plugin has been split in two plugins:
* {plugins}/discovery-ec2.html[Discovery EC2 plugin]
* {plugins}/repository-s3.html[Repository S3 plugin]
Proxy settings for both plugins have been renamed:
* from `cloud.aws.proxy_host` to `cloud.aws.proxy.host`
* from `cloud.aws.ec2.proxy_host` to `cloud.aws.ec2.proxy.host`
* from `cloud.aws.s3.proxy_host` to `cloud.aws.s3.proxy.host`
* from `cloud.aws.proxy_port` to `cloud.aws.proxy.port`
* from `cloud.aws.ec2.proxy_port` to `cloud.aws.ec2.proxy.port`
* from `cloud.aws.s3.proxy_port` to `cloud.aws.s3.proxy.port`
==== Cloud Azure plugin changes
Cloud Azure plugin has been split in three plugins:
* {plugins}/discovery-azure-classic.html[Discovery Azure plugin]
* {plugins}/repository-azure.html[Repository Azure plugin]
* {plugins}/store-smb.html[Store SMB plugin]
If you were using the `cloud-azure` plugin for snapshot and restore, you had in `elasticsearch.yml`:
[source,yaml]
-----
cloud:
azure:
storage:
account: your_azure_storage_account
key: your_azure_storage_key
-----
You need to give a unique id to the storage details now as you can define multiple storage accounts:
[source,yaml]
-----
cloud:
azure:
storage:
my_account:
account: your_azure_storage_account
key: your_azure_storage_key
-----
==== Cloud GCE plugin changes
Cloud GCE plugin has been renamed to {plugins}/discovery-gce.html[Discovery GCE plugin].
==== Delete-By-Query plugin removed
The Delete-By-Query plugin has been removed in favor of a new <<docs-delete-by-query,Delete By Query API>>
implementation in core. It now supports throttling, retries and cancellation but no longer supports timeouts.
Instead use the <<docs-delete-by-query-cancel-task-api,cancel API>> to cancel deletes that run too long.
==== Mapper Attachments plugin deprecated
Mapper attachments has been deprecated. Users should use now the {plugins}/ingest-attachment.html[`ingest-attachment`]
plugin.
==== Passing of Java System Properties
Previously, Java system properties could be passed to the plugin
command by passing `-D` style arguments directly to the plugin script.
This is no longer permitted and such system properties must be passed
via ES_JAVA_OPTS.
==== Custom plugins path
The ability to specify a custom plugins path via `path.plugins` has
been removed.
==== ScriptPlugin
Plugins that register custom scripts should implement `ScriptPlugin` and remove
their `onModule(ScriptModule)` implementation.
==== AnalysisPlugin
Plugins that register custom analysis components should implement
`AnalysisPlugin` and remove their `onModule(AnalysisModule)` implementation.
==== MapperPlugin
Plugins that register custom mappers should implement
`MapperPlugin` and remove their `onModule(IndicesModule)` implementation.
==== ActionPlugin
Plugins that register custom actions should implement `ActionPlugin` and
remove their `onModule(ActionModule)` implementation.
Plugins that register custom `RestHandler`s should implement `ActionPlugin` and
remove their `onModule(NetworkModule)` implemnetation.
==== SearchPlugin
Plugins that register custom search time behavior (`Query`, `Suggester`,
`ScoreFunction`, `FetchSubPhase`, `Highlighter`, etc) should implement
`SearchPlugin` and remove their `onModule(SearchModule)` implementation.
==== Testing Custom Plugins
`ESIntegTestCase#pluginList` has been removed. Use `Arrays.asList` instead. It
isn't needed now that all plugins require Java 1.8.
==== Mapper-Size plugin
The metadata field `_size` is not accessible in aggregations, scripts and when
sorting for indices created in 2.x even if the index has been upgraded using the <<indices-upgrade,`upgrade`>> API.
If these features are needed in your application it is required to reindex the data with Elasticsearch 5.x.
The easiest way to reindex old indices is to use the `reindex` API, or the reindex UI provided by
the <<migration-plugin,Migration Plugin>>.

View File

@ -1,100 +0,0 @@
[[breaking_50_rest_api_changes]]
=== REST API changes
==== id values longer than 512 bytes are rejected
When specifying an `_id` value longer than 512 bytes, the request will be
rejected.
==== `/_optimize` endpoint removed
The deprecated `/_optimize` endpoint has been removed. The `/_forcemerge`
endpoint should be used in lieu of optimize.
The `GET` HTTP verb for `/_forcemerge` is no longer supported, please use the
`POST` HTTP verb.
==== Index creation endpoint only accepts `PUT`
It used to be possible to create an index by either calling `PUT index_name`
or `POST index_name`. Only the former is now supported.
==== `HEAD {index}/{type}` replaced with `HEAD {index}/_mapping/{type}`
The endpoint for checking whether a type exists has been changed from
`{index}/{type}` to `{index}/_mapping/{type}` in order to prepare for the
removal of types when `HEAD {index}/{id}` will be used to check whether a
document exists in an index. The old endpoint will keep working until 6.0.
==== Removed `mem` section from `/_cluster/stats` response
The `mem` section contained only the `total` value, which was actually the
memory available throughout all nodes in the cluster. The section contains now
`total`, `free`, `used`, `used_percent` and `free_percent`.
==== Revised node roles aggregate returned by `/_cluster/stats`
The `client`, `master_only`, `data_only` and `master_data` fields have been
removed in favor of `master`, `data`, `ingest` and `coordinating_only`. A
node can contribute to multiple counts as it can have multiple roles. Every
node is implicitly a coordinating node, so whenever a node has no explicit
roles, it will be counted as coordinating only.
==== Removed shard `version` information from `/_cluster/state` routing table
We now store allocation id's of shards in the cluster state and use that to
select primary shards instead of the version information.
==== Node roles are not part of node attributes anymore
Node roles are now returned in a specific section, called `roles`, as part of
nodes stats and nodes info response. The new section is an array that holds all
the different roles that each node fulfills. In case the array is returned
empty, that means that the node is a coordinating only node.
==== Forbid unquoted JSON
Previously, JSON documents were allowed with unquoted field names, which isn't
strictly JSON and broke some Elasticsearch clients. If documents were already
indexed with unquoted fields in a previous vesrion of Elasticsearch, some
operations may throw errors. To accompany this, a commented out JVM option has
been added to the `jvm.options` file:
`-Delasticsearch.json.allow_unquoted_field_names`.
Note that this option is provided solely for migration purposes and will be
removed in Elasticsearch 6.0.0.
==== Analyze API changes
The `filters` and `char_filters` parameters have been renamed `filter` and `char_filter`.
The `token_filters` parameter has been removed. Use `filter` instead.
==== `DELETE /_query` endpoint removed
The `DELETE /_query` endpoint provided by the Delete-By-Query plugin has been
removed and replaced by the <<docs-delete-by-query,Delete By Query API>>.
==== Create stored script endpoint removed
The `PUT /_scripts/{lang}/{id}/_create` endpoint that previously allowed to create
indexed scripts has been removed. Indexed scripts have been replaced
by <<modules-scripting-stored-scripts,stored scripts>>.
==== Create stored template endpoint removed
The `PUT /_search/template/{id}/_create` endpoint that previously allowed to create
indexed template has been removed. Indexed templates have been replaced
by <<pre-registered-templates, Pre-registered templates>>.
==== Remove properties support
Some REST endpoints (e.g., cluster update index settings) supported detecting content in the Java
properties format (line-delimited key=value pairs). This support has been removed.
==== `wait_for_relocating_shards` is now `wait_for_no_relocating_shards` in `/_cluster/health`
The `wait_for_relocating_shards` parameter that used to take a number is now simply a boolean
flag `wait_for_no_relocating_shards`, which if set to true, means the request will wait (up
until the configured timeout) for the cluster to have no shard relocations before returning.
Defaults to false, which means the operation will not wait.

View File

@ -1,392 +0,0 @@
[[breaking_50_scripting]]
=== Script related changes
==== Switched Default Language from Groovy to Painless
The default scripting language for Elasticsearch is now Painless. Painless is a custom-built language with syntax
similar to Groovy designed to be fast as well as secure. Many Groovy scripts will be identitical to Painless scripts
to help make the transition between languages as simple as possible.
Documentation for Painless can be found at <<modules-scripting-painless,Painless Scripting Language>>
One common difference to note between Groovy and Painless is the use of parameters -- all parameters in Painless
must be prefixed with `params.` now. The following example shows the difference:
Groovy:
[source,js]
-----------------------------------
{
"script_score": {
"script": {
"lang": "groovy",
"inline": "Math.log(_score * 2) + my_modifier",
"params": {
"my_modifier": 8
}
}
}
}
-----------------------------------
Painless (`my_modifer` is prefixed with `params`):
[source,js]
-----------------------------------
{
"script_score": {
"script": {
"lang": "painless",
"inline": "Math.log(_score * 2) + params.my_modifier",
"params": {
"my_modifier": 8
}
}
}
}
-----------------------------------
The `script.default_lang` setting has been removed. It is no longer possible set the default scripting language. If a
different language than `painless` is used then this should be explicitly specified on the script itself.
For scripts with no explicit language defined, that are part of already stored percolator queries, the default language
can be controlled with the `script.legacy.default_lang` setting.
==== Removed 1.x script and template syntax
The deprecated 1.x syntax of defining inline scripts / templates and referring to file or index base scripts / templates
have been removed.
The `script` and `params` string parameters can no longer be used and instead the `script` object syntax must be used.
This applies for the update api, script sort, `script_score` function, `script` query, `scripted_metric` aggregation and
`script_heuristic` aggregation.
So this usage of inline scripts is no longer allowed:
[source,js]
-----------------------------------
{
"script_score": {
"lang": "groovy",
"script": "Math.log(_score * 2) + my_modifier",
"params": {
"my_modifier": 8
}
}
}
-----------------------------------
and instead this syntax must be used:
[source,js]
-----------------------------------
{
"script_score": {
"script": {
"lang": "groovy",
"inline": "Math.log(_score * 2) + my_modifier",
"params": {
"my_modifier": 8
}
}
}
}
-----------------------------------
The `script` or `script_file` parameter can no longer be used to refer to file based scripts and templates and instead
`file` must be used.
This usage of referring to file based scripts is no longer valid:
[source,js]
-----------------------------------
{
"script_score": {
"script": "calculate-score",
"params": {
"my_modifier": 8
}
}
}
-----------------------------------
This usage is valid:
[source,js]
-----------------------------------
{
"script_score": {
"script": {
"lang": "groovy",
"file": "calculate-score",
"params": {
"my_modifier": 8
}
}
}
}
-----------------------------------
The `script_id` parameter can no longer be used the refer to indexed based scripts and templates and instead `id` must
be used.
This usage of referring to indexed scripts is no longer valid:
[source,js]
-----------------------------------
{
"script_score": {
"script_id": "indexedCalculateScore",
"params": {
"my_modifier": 8
}
}
}
-----------------------------------
This usage is valid:
[source,js]
-----------------------------------
{
"script_score": {
"script": {
"id": "indexedCalculateScore",
"lang" : "groovy",
"params": {
"my_modifier": 8
}
}
}
}
-----------------------------------
==== Template query
The `query` field in the `template` query can no longer be used.
This 1.x syntax can no longer be used:
[source,js]
-----------------------------------
{
"query": {
"template": {
"query": {"match_{{template}}": {}},
"params" : {
"template" : "all"
}
}
}
}
-----------------------------------
and instead the following syntax should be used:
[source,js]
-----------------------------------
{
"query": {
"template": {
"inline": {"match_{{template}}": {}},
"params" : {
"template" : "all"
}
}
}
}
-----------------------------------
==== Search templates
The top level `template` field in the search template api has been replaced with consistent template / script object
syntax. This 1.x syntax can no longer be used:
[source,js]
-----------------------------------
{
"template" : {
"query": { "match" : { "{{my_field}}" : "{{my_value}}" } },
"size" : "{{my_size}}"
},
"params" : {
"my_field" : "foo",
"my_value" : "bar",
"my_size" : 5
}
}
-----------------------------------
and instead the following syntax should be used:
[source,js]
-----------------------------------
{
"inline" : {
"query": { "match" : { "{{my_field}}" : "{{my_value}}" } },
"size" : "{{my_size}}"
},
"params" : {
"my_field" : "foo",
"my_value" : "bar",
"my_size" : 5
}
}
-----------------------------------
==== Indexed scripts and templates
Indexed scripts and templates have been replaced by <<modules-scripting-stored-scripts,stored scripts>>
which stores the scripts and templates in the cluster state instead of a dedicate `.scripts` index.
For the size of stored scripts there is a soft limit of 65535 bytes. If scripts exceed that size then
the `script.max_size_in_bytes` setting can be added to elasticsearch.yml to change the soft limit to a higher value.
If scripts are really large, other options like native scripts should be considered.
Previously indexed scripts in the `.scripts` index will not be used any more as
Elasticsearch will now try to fetch the scripts from the cluster state. Upon upgrading
to 5.x the `.scripts` index will remain to exist, so it can be used by a script to migrate
the stored scripts from the `.scripts` index into the cluster state. The current format of the scripts
and templates hasn't been changed, only the 1.x format has been removed.
===== Python migration script
The following Python script can be used to import your indexed scripts into the cluster state
as stored scripts:
[source,python]
-----------------------------------
from elasticsearch import Elasticsearch,helpers
es = Elasticsearch([
{'host': 'localhost'}
])
for doc in helpers.scan(es, index=".scripts", preserve_order=True):
es.put_script(lang=doc['_type'], id=doc['_id'], body=doc['_source'])
-----------------------------------
This script makes use of the official Elasticsearch Python client and
therefore you need to make sure that your have installed the client in your
environment. For more information on this please see
https://www.elastic.co/guide/en/elasticsearch/client/python-api/current/index.html[`elasticsearch-py`].
===== Perl migration script
The following Perl script can be used to import your indexed scripts into the cluster state
as stored scripts:
[source,perl]
-----------------------------------
use Search::Elasticsearch;
my $es = Search::Elasticsearch->new( nodes => 'localhost:9200');
my $scroll = $es->scroll_helper( index => '.scripts', sort => '_doc');
while (my $doc = $scroll->next) {
$e->put_script(
lang => $doc->{_type},
id => $doc->{_id},
body => $doc->{_source}
);
}
-----------------------------------
This script makes use of the official Elasticsearch Perl client and
therefore you need to make sure that your have installed the client in your
environment. For more information on this please see
https://metacpan.org/pod/Search::Elasticsearch[`Search::Elasticsearch`].
===== Verifying script migration
After you have moved the scripts via the provided script or otherwise then you can verify with the following
request if the migration has happened successfully:
[source,js]
-----------------------------------
GET _cluster/state?filter_path=metadata.stored_scripts
-----------------------------------
The response should include all your scripts from the `.scripts` index.
After you have verified that all your scripts have been moved, optionally as a last step,
you can delete the `.scripts` index as Elasticsearch no longer uses it.
==== Indexed scripts Java APIs
All the methods related to interacting with indexed scripts have been removed.
The Java API methods for interacting with stored scripts have been added under `ClusterAdminClient` class.
The sugar methods that used to exist on the indexed scripts API methods don't exist on the methods for
stored scripts. The only way to provide scripts is by using `BytesReference` implementation, if a string needs to be
provided the `BytesArray` class should be used.
==== Scripting engines now register only a single language
Prior to 5.0.0, script engines could register multiple languages. The Javascript
script engine in particular registered both `"lang": "js"` and `"lang":
"javascript"`. Script engines can now only register a single language. All
references to `"lang": "js"` should be changed to `"lang": "javascript"` for
existing users of the lang-javascript plugin.
==== Scripting engines now register only a single extension
Prior to 5.0.0 scripting engines could register multiple extensions. The only
engine doing this was the Javascript engine, which registered "js" and
"javascript". It now only registers the "js" file extension for on-disk scripts.
==== `.javascript` files are no longer supported (use `.js`)
The Javascript engine previously registered "js" and "javascript". It now only
registers the "js" file extension for on-disk scripts.
==== Removed scripting query string parameters from update rest api
The `script`, `script_id` and `scripting_upsert` query string parameters have been removed from the update api.
==== Java transport client
The `TemplateQueryBuilder` has been moved to the `lang-mustache` module.
Therefor when using the `TemplateQueryBuilder` from the Java native client the
lang-mustache module should be on the classpath. Also the transport client
should load the lang-mustache module as plugin:
[source,java]
--------------------------------------------------
TransportClient transportClient = TransportClient.builder()
.settings(Settings.builder().put("node.name", "node"))
.addPlugin(MustachePlugin.class)
.build();
transportClient.addTransportAddress(
new InetSocketTransportAddress(new InetSocketAddress(InetAddresses.forString("127.0.0.1"), 9300))
);
--------------------------------------------------
Also the helper methods in `QueryBuilders` class that create a `TemplateQueryBuilder` instance have been removed,
instead the constructors on `TemplateQueryBuilder` should be used.
==== Template query
The `template` query has been deprecated in favour of the search template api. The `template` query is scheduled
to be removed in the next major version.
==== GeoPoint scripts
The following helper methods have been removed from GeoPoint scripting:
* `factorDistance`
* `factorDistanceWithDefault`
* `factorDistance02`
* `factorDistance13`
* `arcDistanceInKm`
* `arcDistanceInKmWithDefault`
* `arcDistanceInMiles`
* `arcDistanceInMilesWithDefault`
* `distanceWithDefault`
* `distanceInKm`
* `distanceInKmWithDefault`
* `distanceInMiles`
* `distanceInMilesWithDefault`
* `geohashDistanceInKm`
* `geohashDistanceInMiles`
Instead use `arcDistance`, `arcDistanceWithDefault`, `planeDistance`, `planeDistanceWithDefault`, `geohashDistance`,
`geohashDistanceWithDefault` and convert from default units (meters) to desired units using the appropriate constance
(e.g., multiply by `0.001` to convert to Km).

View File

@ -1,212 +0,0 @@
[[breaking_50_search_changes]]
=== Search and Query DSL changes
==== `search_type`
===== `search_type=count` removed
The `count` search type was deprecated since version 2.0.0 and is now removed.
In order to get the same benefits, you just need to set the value of the `size`
parameter to `0`.
For instance, the following request:
[source,sh]
---------------
GET /my_index/_search?search_type=count
{
"aggs": {
"my_terms": {
"terms": {
"field": "foo"
}
}
}
}
---------------
can be replaced with:
[source,sh]
---------------
GET /my_index/_search
{
"size": 0,
"aggs": {
"my_terms": {
"terms": {
"field": "foo"
}
}
}
}
---------------
===== `search_type=scan` removed
The `scan` search type was deprecated since version 2.1.0 and is now removed.
All benefits from this search type can now be achieved by doing a scroll
request that sorts documents in `_doc` order, for instance:
[source,sh]
---------------
GET /my_index/_search?scroll=2m
{
"sort": [
"_doc"
]
}
---------------
Scroll requests sorted by `_doc` have been optimized to more efficiently resume
from where the previous request stopped, so this will have the same performance
characteristics as the former `scan` search type.
==== `fields` parameter
The `fields` parameter has been replaced by `stored_fields`.
The `stored_fields` parameter will only return stored fields
-- it will no longer extract values from the `_source`.
==== `fielddata_fields` parameter
The `fielddata_fields` has been deprecated, use parameter `docvalue_fields` instead.
==== search-exists API removed
The search exists api has been removed in favour of using the search api with
`size` set to `0` and `terminate_after` set to `1`.
==== Deprecated queries removed
The following deprecated queries have been removed:
`filtered`:: Use `bool` query instead, which supports `filter` clauses too.
`and`:: Use `must` clauses in a `bool` query instead.
`or`:: Use `should` clauses in a `bool` query instead.
`limit`:: Use the `terminate_after` parameter instead.
`fquery`:: Is obsolete after filters and queries have been merged.
`query`:: Is obsolete after filters and queries have been merged.
`query_binary`:: Was undocumented and has been removed.
`filter_binary`:: Was undocumented and has been removed.
==== Changes to queries
* Unsupported queries such as term queries on `geo_point` fields will now fail
rather than returning no hits.
* Removed support for fuzzy queries on numeric, date and ip fields, use range
queries instead.
* Removed support for range and prefix queries on `_uid` and `_id` fields.
* Querying an unindexed field will now fail rather than returning no hits.
* Removed support for the deprecated `min_similarity` parameter in `fuzzy
query`, in favour of `fuzziness`.
* Removed support for the deprecated `fuzzy_min_sim` parameter in
`query_string` query, in favour of `fuzziness`.
* Removed support for the deprecated `edit_distance` parameter in completion
suggester, in favour of `fuzziness`.
* Removed support for the deprecated `filter` and `no_match_filter` fields in `indices` query,
in favour of `query` and `no_match_query`.
* Removed support for the deprecated `filter` fields in `nested` query, in favour of `query`.
* Removed support for the deprecated `minimum_should_match` and
`disable_coord` in `terms` query, use `bool` query instead. Also removed
support for the deprecated `execution` parameter.
* Removed support for the top level `filter` element in `function_score` query, replaced by `query`.
* The `collect_payloads` parameter of the `span_near` query has been deprecated. Payloads will be loaded when needed.
* The `score_type` parameter to the `nested` and `has_child` queries has been
removed in favour of `score_mode`. The `score_mode` parameter to `has_parent`
has been deprecated in favour of the `score` boolean parameter. Also, the
`total` score mode has been removed in favour of the `sum` mode.
* When the `max_children` parameter was set to `0` on the `has_child` query
then there was no upper limit on how many child documents were allowed to
match. Now, `0` really means that zero child documents are allowed. If no
upper limit is needed then the `max_children` parameter shouldn't be specified
at all.
* The `exists` query will now fail if the `_field_names` field is disabled.
* The `multi_match` query will fail if `fuzziness` is used for `cross_fields`, `phrase` or `phrase_prefix` type.
This parameter was undocumented and silently ignored before for these types of `multi_match`.
* Deprecated support for the coerce, normalize, ignore_malformed parameters in GeoPolygonQuery. Use parameter validation_method instead.
* Deprecated support for the coerce, normalize, ignore_malformed parameters in GeoDistanceRangeQuery. Use parameter validation_method instead.
* Deprecated support for the coerce, normalize, ignore_malformed parameters in GeoDistanceQuery. Use parameter validation_method instead.
* Deprecated support for the coerce, normalize, ignore_malformed parameters in GeoBoundingBoxQuery. Use parameter validation_method instead.
==== Top level `filter` parameter
Removed support for the deprecated top level `filter` in the search api,
replaced by `post_filter`.
==== Highlighters
Removed support for multiple highlighter names, the only supported ones are:
`plain`, `fvh` and `postings`.
==== Term vectors API
The term vectors APIs no longer persist unmapped fields in the mappings.
The `dfs` parameter to the term vectors API has been removed completely. Term
vectors don't support distributed document frequencies anymore.
==== Sort
The `reverse` parameter has been removed, in favour of explicitly
specifying the sort order with the `order` option.
The `coerce` and `ignore_malformed` parameters were deprecated in favour of `validation_method`.
==== Inner hits
* Top level inner hits syntax has been removed. Inner hits can now only be specified as part of the `nested`,
`has_child` and `has_parent` queries. Use cases previously only possible with top level inner hits can now be done
with inner hits defined inside the query dsl.
* Source filtering for inner hits inside nested queries requires full field names instead of relative field names.
This is now consistent for source filtering on other places in the search API.
* Nested inner hits will now no longer include `_index`, `_type` and `_id` keys. For nested inner hits these values
are always the same as the `_index`, `_type` and `_id` keys of the root search hit.
* Parent/child inner hits will now no longer include the `_index` key. For parent/child inner hits the `_index` key is
always the same as the the parent search hit.
==== Query Profiler
In the response for profiling queries, the `query_type` has been renamed to `type` and `lucene` has been renamed to
`description`. These changes have been made so the response format is more friendly to supporting other types of profiling
in the future.
==== Search preferences
The <<search-request-preference,search preference>> `_only_node` has
been removed. The same behavior can be achieved by using `_only_nodes`
and specifying a single node ID.
The <<search-request-preference,search preference>> `_prefer_node` has
been superseded by `_prefer_nodes`. By specifying a single node,
`_prefer_nodes` provides the same functionality as `_prefer_node` but
also supports specifying multiple nodes.
==== Default similarity
The default similarity has been changed to `BM25`.

View File

@ -1,323 +0,0 @@
[[breaking_50_settings_changes]]
=== Settings changes
From Elasticsearch 5.0 on all settings are validated before they are applied.
Node level and default index level settings are validated on node startup,
dynamic cluster and index setting are validated before they are updated/added
to the cluster state.
Every setting must be a *known* setting. All settings must have been
registered with the node or transport client they are used with. This implies
that plugins that define custom settings must register all of their settings
during plugin loading using the `SettingsModule#registerSettings(Setting)`
method.
==== Index Level Settings
In previous versions Elasticsearch allowed to specify index level setting
as _defaults_ on the node level, inside the `elasticsearch.yaml` file or even via
command-line parameters. From Elasticsearch 5.0 on only selected settings like
for instance `index.codec` can be set on the node level. All other settings must be
set on each individual index. To set default values on every index, index templates
should be used instead.
==== Node settings
The `name` setting has been removed and is replaced by `node.name`. Usage of
`-Dname=some_node_name` is not supported anymore.
The `node.add_id_to_custom_path` was renamed to `add_lock_id_to_custom_path`.
The default for the `node.name` settings is now the first 7 charachters of the node id,
which is in turn a randomly generated UUID.
The settings `node.mode` and `node.local` are removed. Local mode should be configured via
`discovery.type: local` and `transport.type:local`. In order to disable _http_ please use `http.enabled: false`
==== Node attribute settings
Node level attributes used for allocation filtering, forced awareness or other node identification / grouping
must be prefixed with `node.attr`. In previous versions it was possible to specify node attributes with the `node.`
prefix. All node attributes except of `node.master`, `node.data` and `node.ingest` must be moved to the new `node.attr.`
namespace.
==== Node types settings
The `node.client` setting has been removed. A node with such a setting set will not
start up. Instead, each node role needs to be set separately using the existing
`node.master`, `node.data` and `node.ingest` supported static settings.
==== Gateway settings
The `gateway.format` setting for configuring global and index state serialization
format has been removed. By default, `smile` is used as the format.
==== Transport Settings
All settings with a `netty` infix have been replaced by their already existing
`transport` synonyms. For instance `transport.netty.bind_host` is no longer
supported and should be replaced by the superseding setting
`transport.bind_host`.
==== Security manager settings
The option to disable the security manager `security.manager.enabled` has been
removed. In order to grant special permissions to elasticsearch users must
edit the local Java Security Policy.
==== Network settings
The `_non_loopback_` value for settings like `network.host` would arbitrarily
pick the first interface not marked as loopback. Instead, specify by address
scope (e.g. `_local_,_site_` for all loopback and private network addresses)
or by explicit interface names, hostnames, or addresses.
The `netty.epollBugWorkaround` settings is removed. This settings allow people to enable
a netty work around for https://github.com/netty/netty/issues/327[a high CPU usage issue] with early JVM versions.
This bug was http://bugs.java.com/view_bug.do?bug_id=6403933[fixed in Java 7]. Since Elasticsearch 5.0 requires Java 8 the settings is removed. Note that if the workaround needs to be reintroduced you can still set the `org.jboss.netty.epollBugWorkaround` system property to control Netty directly.
==== Forbid changing of thread pool types
Previously, <<modules-threadpool,thread pool types>> could be dynamically
adjusted. The thread pool type effectively controls the backing queue for the
thread pool and modifying this is an expert setting with minimal practical
benefits and high risk of being misused. The ability to change the thread pool
type for any thread pool has been removed. It is still possible to adjust
relevant thread pool parameters for each of the thread pools (e.g., depending
on the thread pool type, `keep_alive`, `queue_size`, etc.).
==== Threadpool settings
The `suggest` threadpool has been removed, now suggest requests use the
`search` threadpool.
The prefix on all thread pool settings has been changed from
`threadpool` to `thread_pool`.
The minimum size setting for a scaling thread pool has been changed
from `min` to `core`.
The maximum size setting for a scaling thread pool has been changed
from `size` to `max`.
The queue size setting for a fixed thread pool must be `queue_size`
(all other variants that were previously supported are no longer
supported).
Thread pool settings are now node-level settings. As such, it is not
possible to update thread pool settings via the cluster settings API.
==== Analysis settings
The `index.analysis.analyzer.default_index` analyzer is not supported anymore.
If you wish to change the analyzer to use for indexing, change the
`index.analysis.analyzer.default` analyzer instead.
==== Ping settings
Previously, there were three settings for the ping timeout:
`discovery.zen.initial_ping_timeout`, `discovery.zen.ping.timeout` and
`discovery.zen.ping_timeout`. The former two have been removed and the only
setting key for the ping timeout is now `discovery.zen.ping_timeout`. The
default value for ping timeouts remains at three seconds.
`discovery.zen.master_election.filter_client` and `discovery.zen.master_election.filter_data` have
been removed in favor of the new `discovery.zen.master_election.ignore_non_master_pings`. This setting control how ping responses
are interpreted during master election and should be used with care and only in extreme cases. See documentation for details.
==== Recovery settings
Recovery settings deprecated in 1.x have been removed:
* `index.shard.recovery.translog_size` is superseded by `indices.recovery.translog_size`
* `index.shard.recovery.translog_ops` is superseded by `indices.recovery.translog_ops`
* `index.shard.recovery.file_chunk_size` is superseded by `indices.recovery.file_chunk_size`
* `index.shard.recovery.concurrent_streams` is superseded by `indices.recovery.concurrent_streams`
* `index.shard.recovery.concurrent_small_file_streams` is superseded by `indices.recovery.concurrent_small_file_streams`
* `indices.recovery.max_size_per_sec` is superseded by `indices.recovery.max_bytes_per_sec`
If you are using any of these settings please take the time to review their
purpose. All of the settings above are considered _expert settings_ and should
only be used if absolutely necessary. If you have set any of the above setting
as persistent cluster settings please use the settings update API and set
their superseded keys accordingly.
The following settings have been removed without replacement
* `indices.recovery.concurrent_small_file_streams` - recoveries are now single threaded. The number of concurrent outgoing recoveries are throttled via allocation deciders
* `indices.recovery.concurrent_file_streams` - recoveries are now single threaded. The number of concurrent outgoing recoveries are throttled via allocation deciders
==== Translog settings
The `index.translog.flush_threshold_ops` setting is not supported anymore. In
order to control flushes based on the transaction log growth use
`index.translog.flush_threshold_size` instead.
Changing the translog type with `index.translog.fs.type` is not supported
anymore, the `buffered` implementation is now the only available option and
uses a fixed `8kb` buffer.
The translog by default is fsynced after every `index`, `create`, `update`,
`delete`, or `bulk` request. The ability to fsync on every operation is not
necessary anymore. In fact, it can be a performance bottleneck and it's trappy
since it enabled by a special value set on `index.translog.sync_interval`.
Now, `index.translog.sync_interval` doesn't accept a value less than `100ms`
which prevents fsyncing too often if async durability is enabled. The special
value `0` is no longer supported.
`index.translog.interval` has been removed.
==== Request Cache Settings
The deprecated settings `index.cache.query.enable` and
`indices.cache.query.size` have been removed and are replaced with
`index.requests.cache.enable` and `indices.requests.cache.size` respectively.
`indices.requests.cache.clean_interval` has been replaced with
`indices.cache.clean_interval` and is no longer supported.
==== Field Data Cache Settings
The `indices.fielddata.cache.clean_interval` setting has been replaced with
`indices.cache.clean_interval`.
==== Allocation settings
The `cluster.routing.allocation.concurrent_recoveries` setting has been
replaced with `cluster.routing.allocation.node_concurrent_recoveries`.
==== Similarity settings
The 'default' similarity has been renamed to 'classic'.
==== Indexing settings
The `indices.memory.min_shard_index_buffer_size` and
`indices.memory.max_shard_index_buffer_size` have been removed as
Elasticsearch now allows any one shard to use amount of heap as long as the
total indexing buffer heap used across all shards is below the node's
`indices.memory.index_buffer_size` (defaults to 10% of the JVM heap).
==== Removed es.max-open-files
Setting the system property es.max-open-files to true to get
Elasticsearch to print the number of maximum open files for the
Elasticsearch process has been removed. This same information can be
obtained from the <<cluster-nodes-info>> API, and a warning is logged
on startup if it is set too low.
==== Removed es.netty.gathering
Disabling Netty from using NIO gathering could be done via the escape
hatch of setting the system property "es.netty.gathering" to "false".
Time has proven enabling gathering by default is a non-issue and this
non-documented setting has been removed.
==== Removed es.useLinkedTransferQueue
The system property `es.useLinkedTransferQueue` could be used to
control the queue implementation used in the cluster service and the
handling of ping responses during discovery. This was an undocumented
setting and has been removed.
==== Cache concurrency level settings removed
Two cache concurrency level settings
`indices.requests.cache.concurrency_level` and
`indices.fielddata.cache.concurrency_level` because they no longer apply to
the cache implementation used for the request cache and the field data cache.
==== Using system properties to configure Elasticsearch
Elasticsearch can no longer be configured by setting system properties.
Instead, use `-Ename.of.setting=value.of.setting`.
==== Removed using double-dashes to configure Elasticsearch
Elasticsearch could previously be configured on the command line by
setting settings via `--name.of.setting value.of.setting`. This feature
has been removed. Instead, use `-Ename.of.setting=value.of.setting`.
==== Remove support for .properties config files
The Elasticsearch configuration and logging configuration can no longer be stored in the Java
properties file format (line-delimited key=value pairs with a `.properties` extension).
==== Discovery Settings
The `discovery.zen.minimum_master_node` must be set for nodes that have
`network.host`, `network.bind_host`, `network.publish_host`,
`transport.host`, `transport.bind_host`, or `transport.publish_host`
configuration options set. We see those nodes as in "production" mode
and thus require the setting.
==== Realtime get setting
The `action.get.realtime` setting has been removed. This setting was
a fallback realtime setting for the get and mget APIs when realtime
wasn't specified. Now if the parameter isn't specified we always
default to true.
=== Script settings
==== Indexed script settings
Due to the fact that indexed script has been replaced by stored
scripts the following settings have been replaced to:
* `script.indexed` has been replaced by `script.stored`
* `script.engine.*.indexed.aggs` has been replaced by `script.engine.*.stored.aggs` (where `*` represents the script language, like `groovy`, `mustache`, `painless` etc.)
* `script.engine.*.indexed.mapping` has been replaced by `script.engine.*.stored.mapping` (where `*` represents the script language, like `groovy`, `mustache`, `painless` etc.)
* `script.engine.*.indexed.search` has been replaced by `script.engine.*.stored.search` (where `*` represents the script language, like `groovy`, `mustache`, `painless` etc.)
* `script.engine.*.indexed.update` has been replaced by `script.engine.*.stored.update` (where `*` represents the script language, like `groovy`, `mustache`, `painless` etc.)
* `script.engine.*.indexed.plugin` has been replaced by `script.engine.*.stored.plugin` (where `*` represents the script language, like `groovy`, `mustache`, `painless` etc.)
==== Script mode settings
Previously script mode settings (e.g., "script.inline: true",
"script.engine.groovy.inline.aggs: false", etc.) accepted a wide range of
"truthy" or "falsy" values. This is now much stricter and supports only the
`true` and `false` options.
==== Script sandbox settings removed
Prior to 5.0 a third option could be specified for the `script.inline` and
`script.stored` settings ("sandbox"). This has been removed, You can now only
set `script.line: true` or `script.stored: true`.
==== Search settings
The setting `index.query.bool.max_clause_count` has been removed. In order to
set the maximum number of boolean clauses `indices.query.bool.max_clause_count`
should be used instead.
==== Memory lock settings
The setting `bootstrap.mlockall` has been renamed to
`bootstrap.memory_lock`.
==== Snapshot settings
The default setting `include_global_state` for restoring snapshots has been
changed from `true` to `false`. It has not been changed for taking snapshots and
still defaults to `true` in that case.
==== Time value parsing
The unit 'w' representing weeks is no longer supported.
Fractional time values (e.g., 0.5s) are no longer supported. For example, this means when setting
timeouts "0.5s" will be rejected and should instead be input as "500ms".
==== Node max local storage nodes
Previous versions of Elasticsearch defaulted to allowing multiple nodes to share the same data
directory (up to 50). This can be confusing where users accidentally startup multiple nodes and end
up thinking that they've lost data because the second node will start with an empty data directory.
While the default of allowing multiple nodes is friendly to playing with forming a small cluster on
a laptop, and end-users do sometimes run multiple nodes on the same host, this tends to be the
exception. Keeping with Elasticsearch's continual movement towards safer out-of-the-box defaults,
and optimizing for the norm instead of the exception, the default for
`node.max_local_storage_nodes` is now one.

View File

@ -1,74 +0,0 @@
[[breaking_50_suggester]]
=== Suggester changes
The completion suggester has undergone a complete rewrite. This means that the
syntax and data structure for fields of type `completion` have changed, as
have the syntax and response of completion suggester requests. See
<<search-suggesters-completion,completion suggester>> for details.
For indices created before Elasticsearch 5.0.0, `completion` fields and the
completion suggester will continue to work as they did in Elasticsearch 2.x.
However, it is not possible to run a completion suggester query across indices
created in 2.x and indices created in 5.x.
It is strongly recommended to reindex indices containing 2.x `completion`
fields in 5.x to take advantage of the new features listed below.
NOTE: You will need to change the structure of the completion field values
when reindexing.
==== Completion suggester is near-real time
Previously, deleted suggestions could be included in results even
after refreshing an index. Now, deletions are visible in near-real
time, i.e. as soon as the index has been refreshed. This applies
to suggestion entries for both context and completion suggesters.
==== Completion suggester is document-oriented
Suggestions are aware of the document they belong to. Now, associated
documents (`_source`) are returned as part of `completion` suggestions.
IMPORTANT: `_source` meta-field must be enabled, which is the default behavior,
to enable returning `_source` with suggestions.
Previously, `context` and `completion` suggesters supported an index-time
`payloads` option, which was used to store and return metadata with suggestions.
Now metadata can be stored as part of the the same document as the
suggestion for retrieval at query-time. The support for index-time `payloads`
has been removed to avoid bloating the in-memory index with suggestion metadata.
==== Simpler completion indexing
As suggestions are document-oriented, suggestion metadata (e.g. `output`)
should now be specified as a field in the document. The support for specifying
`output` when indexing suggestion entries has been removed. Now suggestion
result entry's `text` is always the un-analyzed value of the suggestion's
`input` (same as not specifying `output` while indexing suggestions in pre-5.0
indices).
==== Completion mapping with multiple contexts
The `context` option in `completion` field mapping is now an array to support
multiple named contexts per completion field. Note that this is sugar for
indexing same suggestions under different name with different contexts.
The `default` option for a named `context` has been removed. Now querying with
no `context` against a context-enabled completion field yields results from all
indexed suggestions. Note that performance for match-all-context query
degrades with the number of unique context value for a given `completion` field.
==== Completion suggestion with multiple context filtering
Previously `context` option in a suggest request was used for filtering suggestions
by `context` value. Now, the option has been named to `contexts` to specify
multiple named context filters. Note that this is not supported by pre-5.0 indices.
Following is the `contexts` snippet for a suggest query filtered by both 'color'
and 'location' contexts:
[source,sh]
---------------
"contexts": {
"color": [ {...} ],
"location": [ {...} ]
}
---------------

View File

@ -0,0 +1,7 @@
[[breaking_60_search_changes]]
=== Search and Query DSL changes
==== Changes to queries
* The `collect_payloads` parameter of the `span_near` query has been removed. Payloads will be
loaded when needed.