From 36e1c7928c0b56a938b7ae05cc10ddca26d5882c Mon Sep 17 00:00:00 2001 From: Clinton Gormley Date: Thu, 31 Jul 2014 12:48:55 +0200 Subject: [PATCH] Rewrote post-filter.asciidoc Closes #5166 --- .../search/request/post-filter.asciidoc | 156 ++++++++++-------- 1 file changed, 90 insertions(+), 66 deletions(-) diff --git a/docs/reference/search/request/post-filter.asciidoc b/docs/reference/search/request/post-filter.asciidoc index d506acc9ca0..4af79c1db9d 100644 --- a/docs/reference/search/request/post-filter.asciidoc +++ b/docs/reference/search/request/post-filter.asciidoc @@ -1,90 +1,114 @@ [[search-request-post-filter]] === Post filter -The `post_filter` allows any filter that it holds to be executed as last filter, because -of this the `post_filter` only has affect on the search hits and not facets. +The `post_filter` is applied to the search `hits` at the very end of a search +request, after aggregations have already been calculated. It's purpose is +best explained by example: -There are several reasons why to specify filters as `post_filter`. One reason is to force -expensive filters to be executed as last filter, so that these filters only operate on the -docs that match with the rest of the query. An example of for what filter a post_filter -should be used for this reason is the `geo_distance` filter. The `geo_distance` filter is -in general an expensive filter to execute and to reduce the execution time for this filter, -one can choose to specify it as `post_filter`, so it runs on documents that are very likely -to end up as matching documents. +Imagine that you are selling shirts, and the user has specified two filters: +`color:red` and `brand:gucci`. You only want to show them red shirts made by +Gucci in the search results. Normally you would do this with a +<>: -Another important reason is when doing things like facet navigation, -sometimes only the hits are needed to be filtered by the chosen facet, -and all the facets should continue to be calculated based on the original query. -The `post_filter` element within the search request can be used to accomplish it. - -Note, this is different compared to creating a `filtered` query with the -filter, since this will cause the facets to only process the filtered -results. - -For example, let's create two tweets, with two different tags: - -[source,js] +[source,json] -------------------------------------------------- -curl -XPUT 'localhost:9200/twitter/tweet/1' -d ' +curl -XGET localhost:9200/shirts/_search -d ' { - "message" : "something blue", - "tag" : "blue" -} -' - -curl -XPUT 'localhost:9200/twitter/tweet/2' -d ' -{ - "message" : "something green", - "tag" : "green" -} -' - -curl -XPOST 'localhost:9200/_refresh' --------------------------------------------------- - -We can now search for something, and have a terms facet. - -[source,js] --------------------------------------------------- -curl -XPOST 'localhost:9200/twitter/_search?pretty=true' -d ' -{ - "query" : { - "term" : { "message" : "something" } - }, - "facets" : { - "tag" : { - "terms" : { "field" : "tag" } + "query": { + "filtered": { + "filter": { + "bool": { + "must": [ + { "term": { "color": "red" }}, + { "term": { "brand": "gucci" }} + ] } + } } + } } ' -------------------------------------------------- -We get two hits, and the relevant facets with a count of 1 for both -`green` and `blue`. Now, let's say the `green` facet is chosen, we can -simply add a filter for it: +However, you would also like to use _faceted navigation_ to display a list of +other options that the user could click on. Perhaps you have a `model` field +that would allow the user to limit their search results to red Gucci +`t-shirts` or `dress-shirts`. -[source,js] +This can be done with a +<>: + +[source,json] -------------------------------------------------- -curl -XPOST 'localhost:9200/twitter/_search?pretty=true' -d ' +curl -XGET localhost:9200/shirts/_search -d ' { - "query" : { - "term" : { "message" : "something" } - }, - "post_filter" : { - "term" : { "tag" : "green" } - }, - "facets" : { - "tag" : { - "terms" : { "field" : "tag" } + "query": { + "filtered": { + "filter": { + "bool": { + "must": [ + { "term": { "color": "red" }}, + { "term": { "brand": "gucci" }} + ] } + } } + }, + "aggs": { + "models": { + "terms": { "field": "model" } <1> + } + } } ' -------------------------------------------------- +<1> Returns the most popular models of red shirts by Gucci. -And now, we get only 1 hit back, but the facets remain the same. +But perhaps you would also like to tell the user how many Gucci shirts are +available in *other colors*. If you just add a `terms` aggregation on the +`color` field, you will only get back the color `red`, because your query +returns only red shirts by Gucci. -Note, if additional filters are required on specific facets, they can be -added as a `facet_filter` to the relevant facets. +Instead, you want to include shirts of all colors during aggregation, then +apply the `colors` filter only to the search results. This is the purpose of +the `post_filter`: + +[source,json] +-------------------------------------------------- +curl -XGET localhost:9200/shirts/_search -d ' +{ + "query": { + "filtered": { + "filter": { + { "term": { "brand": "gucci" }} <1> + } + } + }, + "aggs": { + "colors": { + "terms": { "field": "color" }, <2> + }, + "color_red": { + "filter": { + "term": { "color": "red" } <3> + }, + "aggs": { + "models": { + "terms": { "field": "model" } <3> + } + } + } + }, + "post_filter": { <4> + "term": { "color": "red" }, + } +} +' +-------------------------------------------------- +<1> The main query now finds all shirts by Gucci, regardless of color. +<2> The `colors` agg returns popular colors for shirts by Gucci. +<3> The `color_red` agg limits the `models` sub-aggregation + to *red* Gucci shirts. +<4> Finally, the `post_filter` removes colors other than red + from the search `hits`.