Convert more search docs to CONSOLE

`profile.asciidoc` now runs all of its command but it doesn't validate
all of the results. Writing the validation is time consuming so I only
did some of it.
This commit is contained in:
Nik Everett 2016-09-15 11:57:05 -04:00
parent 37489c3274
commit e4c80c94e9
2 changed files with 129 additions and 63 deletions

View File

@ -17,13 +17,17 @@ Any `_search` request can be profiled by adding a top-level `profile` parameter:
[source,js]
--------------------------------------------------
curl -XGET 'localhost:9200/_search' -d '{
GET /_search
{
"profile": true,<1>
"query" : {
"match" : { "message" : "search test" }
"match" : { "message" : "message number" }
}
}'
}
--------------------------------------------------
// CONSOLE
// TEST[setup:twitter]
<1> Setting the top-level `profile` parameter to `true` will enable profiling
for the search
@ -40,70 +44,70 @@ This will yield the following result:
"failed": 0
},
"hits": {
"total": 3,
"max_score": 1.078072,
"hits": [ ... ] <1>
"total": 4,
"max_score": 0.5093388,
"hits": [...] <1>
},
"profile": {
"shards": [
{
"id": "[2aE02wS1R8q_QFnYu6vDVQ][test][1]",
"id": "[2aE02wS1R8q_QFnYu6vDVQ][twitter][1]",
"searches": [
{
"query": [
{
"type": "BooleanQuery",
"description": "message:search message:test",
"time": "15.52889800ms",
"description": "message:message message:number",
"time": "1.873811000ms",
"breakdown": {
"score": 6352,
"score_count": 1,
"build_scorer": 1800776,
"score": 51306,
"score_count": 4,
"build_scorer": 2935582,
"build_scorer_count": 1,
"match": 0,
"match_count": 0,
"create_weight": 667400,
"create_weight": 919297,
"create_weight_count": 1,
"next_doc": 10563,
"next_doc_count": 2,
"next_doc": 53876,
"next_doc_count": 5,
"advance": 0,
"advance_count": 0
},
"children": [
{
"type": "TermQuery",
"description": "message:search",
"time": "4.938855000ms",
"description": "message:message",
"time": "0.3919430000ms",
"breakdown": {
"score": 0,
"score_count": 0,
"build_scorer": 3230,
"score": 28776,
"score_count": 4,
"build_scorer": 784451,
"build_scorer_count": 1,
"match": 0,
"match_count": 0,
"create_weight": 415612,
"create_weight": 1669564,
"create_weight_count": 1,
"next_doc": 0,
"next_doc_count": 0,
"next_doc": 10111,
"next_doc_count": 5,
"advance": 0,
"advance_count": 0
}
},
{
"type": "TermQuery",
"description": "message:test",
"time": "0.5016660000ms",
"description": "message:number",
"time": "0.2106820000ms",
"breakdown": {
"score": 5014,
"score_count": 1,
"build_scorer": 1689333,
"score": 4552,
"score_count": 4,
"build_scorer": 42602,
"build_scorer_count": 1,
"match": 0,
"match_count": 0,
"create_weight": 166587,
"create_weight": 89323,
"create_weight_count": 1,
"next_doc": 5542,
"next_doc_count": 2,
"next_doc": 2852,
"next_doc_count": 5,
"advance": 0,
"advance_count": 0
}
@ -111,21 +115,44 @@ This will yield the following result:
]
}
],
"rewrite_time": 870954,
"rewrite_time": 51443,
"collector": [
{
"name": "SimpleTopScoreDocCollector",
"reason": "search_top_hits",
"time": "0.009783000000ms"
"time": "0.06989100000ms"
}
]
}
]
],
"aggregations": []
}
]
}
}
--------------------------------------------------
// TESTRESPONSE[s/"took": 25/"took": $body.took/]
// TESTRESPONSE[s/"hits": \[...\]/"hits": $body.hits.hits/]
// TESTRESPONSE[s/"id": "\[2aE02wS1R8q_QFnYu6vDVQ\]\[twitter\]\[1\]"/"id": $body.profile.shards.0.id/]
// TESTRESPONSE[s/"rewrite_time": 51443/"rewrite_time": $body.profile.shards.0.searches.0.rewrite_time/]
// TESTRESPONSE[s/"score": 51306/"score": $body.profile.shards.0.searches.0.query.0.breakdown.score/]
// TESTRESPONSE[s/"time": "1.873811000ms"/"time": $body.profile.shards.0.searches.0.query.0.time/]
// TESTRESPONSE[s/"build_scorer": 2935582/"build_scorer": $body.profile.shards.0.searches.0.query.0.breakdown.build_scorer/]
// TESTRESPONSE[s/"create_weight": 919297/"create_weight": $body.profile.shards.0.searches.0.query.0.breakdown.create_weight/]
// TESTRESPONSE[s/"next_doc": 53876/"next_doc": $body.profile.shards.0.searches.0.query.0.breakdown.next_doc/]
// TESTRESPONSE[s/"time": "0.3919430000ms"/"time": $body.profile.shards.0.searches.0.query.0.children.0.time/]
// TESTRESPONSE[s/"score": 28776/"score": $body.profile.shards.0.searches.0.query.0.children.0.breakdown.score/]
// TESTRESPONSE[s/"build_scorer": 784451/"build_scorer": $body.profile.shards.0.searches.0.query.0.children.0.breakdown.build_scorer/]
// TESTRESPONSE[s/"create_weight": 1669564/"create_weight": $body.profile.shards.0.searches.0.query.0.children.0.breakdown.create_weight/]
// TESTRESPONSE[s/"next_doc": 10111/"next_doc": $body.profile.shards.0.searches.0.query.0.children.0.breakdown.next_doc/]
// TESTRESPONSE[s/"time": "0.2106820000ms"/"time": $body.profile.shards.0.searches.0.query.0.children.1.time/]
// TESTRESPONSE[s/"score": 4552/"score": $body.profile.shards.0.searches.0.query.0.children.1.breakdown.score/]
// TESTRESPONSE[s/"build_scorer": 42602/"build_scorer": $body.profile.shards.0.searches.0.query.0.children.1.breakdown.build_scorer/]
// TESTRESPONSE[s/"create_weight": 89323/"create_weight": $body.profile.shards.0.searches.0.query.0.children.1.breakdown.create_weight/]
// TESTRESPONSE[s/"next_doc": 2852/"next_doc": $body.profile.shards.0.searches.0.query.0.children.1.breakdown.next_doc/]
// TESTRESPONSE[s/"time": "0.06989100000ms"/"time": $body.profile.shards.0.searches.0.collector.0.time/]
// Sorry for this mess....
<1> Search results are returned, but were omitted here for brevity
Even for a simple query, the response is relatively complicated. Let's break it down piece-by-piece before moving
@ -139,11 +166,11 @@ First, the overall structure of the profile response is as follows:
"profile": {
"shards": [
{
"id": "[2aE02wS1R8q_QFnYu6vDVQ][test][1]", <1>
"id": "[2aE02wS1R8q_QFnYu6vDVQ][twitter][1]", <1>
"searches": [
{
"query": [...], <2>
"rewrite_time": 870954, <3>
"rewrite_time": 51443, <3>
"collector": [...] <4>
}
],
@ -153,6 +180,12 @@ First, the overall structure of the profile response is as follows:
}
}
--------------------------------------------------
// TESTRESPONSE[s/"profile": /"took": $body.took, "timed_out": $body.timed_out, "_shards": $body._shards, "hits": $body.hits, "profile": /]
// TESTRESPONSE[s/"id": "\[2aE02wS1R8q_QFnYu6vDVQ\]\[twitter\]\[1\]"/"id": $body.profile.shards.0.id/]
// TESTRESPONSE[s/"query": \[...\]/"query": $body.profile.shards.0.searches.0.query/]
// TESTRESPONSE[s/"rewrite_time": 51443/"rewrite_time": $body.profile.shards.0.searches.0.rewrite_time/]
// TESTRESPONSE[s/"collector": \[...\]/"collector": $body.profile.shards.0.searches.0.collector/]
// TESTRESPONSE[s/"aggregations": \[...\]/"aggregations": []/]
<1> A profile is returned for each shard that participated in the response, and is identified
by a unique ID
<2> Each profile contains a section which holds details about the query execution
@ -195,33 +228,38 @@ the `advance` phase of that query is the cause, for example.
The `query` section contains detailed timing of the query tree executed by Lucene on a particular shard.
The overall structure of this query tree will resemble your original Elasticsearch query, but may be slightly
(or sometimes very) different. It will also use similar but not always identical naming. Using our previous
`term` query example, let's analyze the `query` section:
`match` query example, let's analyze the `query` section:
[source,js]
--------------------------------------------------
"query": [
{
"type": "BooleanQuery",
"description": "message:search message:test",
"time": "15.52889800ms",
"description": "message:message message:number",
"time": "1.873811000ms",
"breakdown": {...}, <1>
"children": [
{
"type": "TermQuery",
"description": "message:search",
"time": "4.938855000ms",
"description": "message:message",
"time": "0.3919430000ms",
"breakdown": {...}
},
{
"type": "TermQuery",
"description": "message:test",
"time": "0.5016660000ms",
"description": "message:number",
"time": "0.2106820000ms",
"breakdown": {...}
}
]
}
]
--------------------------------------------------
// TESTRESPONSE[s/^/{\n"took": $body.took,\n"timed_out": $body.timed_out,\n"_shards": $body._shards,\n"hits": $body.hits,\n"profile": {\n"shards": [ {\n"id": "$body.profile.shards.0.id",\n"searches": [{\n/]
// TESTRESPONSE[s/]$/],"rewrite_time": $body.profile.shards.0.searches.0.rewrite_time, "collector": $body.profile.shards.0.searches.0.collector}], "aggregations": []}]}}/]
// TESTRESPONSE[s/"time": "1.873811000ms",\n.+"breakdown": \{...\}/"time": $body.profile.shards.0.searches.0.query.0.time, "breakdown": $body.profile.shards.0.searches.0.query.0.breakdown/]
// TESTRESPONSE[s/"time": "0.3919430000ms",\n.+"breakdown": \{...\}/"time": $body.profile.shards.0.searches.0.query.0.children.0.time, "breakdown": $body.profile.shards.0.searches.0.query.0.children.0.breakdown/]
// TESTRESPONSE[s/"time": "0.2106820000ms",\n.+"breakdown": \{...\}/"time": $body.profile.shards.0.searches.0.query.0.children.1.time, "breakdown": $body.profile.shards.0.searches.0.query.0.children.1.breakdown/]
<1> The breakdown timings are omitted for simplicity
Based on the profile structure, we can see that our `match` query was rewritten by Lucene into a BooleanQuery with two
@ -245,20 +283,27 @@ The `"breakdown"` component lists detailed timing statistics about low-level Luc
[source,js]
--------------------------------------------------
"breakdown": {
"score": 5014,
"score_count": 1,
"build_scorer": 1689333,
"build_scorer_count": 1,
"match": 0,
"match_count": 0,
"create_weight": 166587,
"create_weight_count": 1,
"next_doc": 5542,
"next_doc_count": 2,
"advance": 0,
"advance_count": 0
"score": 51306,
"score_count": 4,
"build_scorer": 2935582,
"build_scorer_count": 1,
"match": 0,
"match_count": 0,
"create_weight": 919297,
"create_weight_count": 1,
"next_doc": 53876,
"next_doc_count": 5,
"advance": 0,
"advance_count": 0
}
--------------------------------------------------
// TESTRESPONSE[s/^/{\n"took": $body.took,\n"timed_out": $body.timed_out,\n"_shards": $body._shards,\n"hits": $body.hits,\n"profile": {\n"shards": [ {\n"id": "$body.profile.shards.0.id",\n"searches": [{\n"query": [{\n"type": "BooleanQuery",\n"description": "message:message message:number",\n"time": $body.profile.shards.0.searches.0.query.0.time,/]
// TESTRESPONSE[s/}$/},\n"children": $body.profile.shards.0.searches.0.query.0.children}],\n"rewrite_time": $body.profile.shards.0.searches.0.rewrite_time, "collector": $body.profile.shards.0.searches.0.collector}], "aggregations": []}]}}/]
// TESTRESPONSE[s/"score": 51306/"score": $body.profile.shards.0.searches.0.query.0.breakdown.score/]
// TESTRESPONSE[s/"time": "1.873811000ms"/"time": $body.profile.shards.0.searches.0.query.0.time/]
// TESTRESPONSE[s/"build_scorer": 2935582/"build_scorer": $body.profile.shards.0.searches.0.query.0.breakdown.build_scorer/]
// TESTRESPONSE[s/"create_weight": 919297/"create_weight": $body.profile.shards.0.searches.0.query.0.breakdown.create_weight/]
// TESTRESPONSE[s/"next_doc": 53876/"next_doc": $body.profile.shards.0.searches.0.query.0.breakdown.next_doc/]
Timings are listed in wall-clock nanoseconds and are not normalized at all. All caveats about the overall
`"time"` apply here. The intention of the breakdown is to give you a feel for A) what machinery in Lucene is
@ -348,10 +393,13 @@ Looking at the previous example:
{
"name": "SimpleTopScoreDocCollector",
"reason": "search_top_hits",
"time": "2.206529000ms"
"time": "0.06989100000ms"
}
]
--------------------------------------------------
// TESTRESPONSE[s/^/{\n"took": $body.took,\n"timed_out": $body.timed_out,\n"_shards": $body._shards,\n"hits": $body.hits,\n"profile": {\n"shards": [ {\n"id": "$body.profile.shards.0.id",\n"searches": [{\n"query": $body.profile.shards.0.searches.0.query,\n"rewrite_time": $body.profile.shards.0.searches.0.rewrite_time,/]
// TESTRESPONSE[s/]$/]}], "aggregations": []}]}}/]
// TESTRESPONSE[s/"time": "0.06989100000ms"/"time": $body.profile.shards.0.searches.0.collector.0.time/]
We see a single collector named `SimpleTopScoreDocCollector`. This is the default "scoring and sorting" Collector
used by Elasticsearch. The `"reason"` field attempts to give a plain english description of the class name. The
@ -473,6 +521,8 @@ GET /test/_search
}
}
--------------------------------------------------
// CONSOLE
// TEST[s/^/PUT test\n/]
This example has:
@ -509,7 +559,7 @@ And the response:
"create_weight_count": 1,
"build_scorer": 377872,
"build_scorer_count": 1,
"advance": 0
"advance": 0,
"advance_count": 0
}
},
@ -528,7 +578,7 @@ And the response:
"create_weight_count": 1,
"build_scorer": 112551,
"build_scorer_count": 1,
"advance": 0
"advance": 0,
"advance_count": 0
}
}
@ -578,7 +628,7 @@ And the response:
"create_weight_count": 1,
"build_scorer": 38310,
"build_scorer_count": 1,
"advance": 0
"advance": 0,
"advance_count": 0
}
}
@ -640,7 +690,7 @@ the following example aggregations request:
[source,js]
--------------------------------------------------
curl -XGET "http://localhost:9200/house-prices/_search" -d'
GET /house-prices/_search
{
"profile": true,
"size": 0,
@ -658,8 +708,10 @@ curl -XGET "http://localhost:9200/house-prices/_search" -d'
}
}
}
}'
}
--------------------------------------------------
// CONSOLE
// TEST[s/^/PUT house-prices\n/]
Which yields the following aggregation profile output

View File

@ -351,9 +351,23 @@ Multiple geo points can be passed as an array containing any `geo_point` format,
[source,js]
--------------------------------------------------
"pin.location" : [[-70, 40], [-71, 42]]
"pin.location" : [{"lat": 40, "lon": -70}, {"lat": 42, "lon": -71}]
GET /_search
{
"sort" : [
{
"_geo_distance" : {
"pin.location" : [[-70, 40], [-71, 42]],
"order" : "asc",
"unit" : "km"
}
}
],
"query" : {
"term" : { "user" : "kimchy" }
}
}
--------------------------------------------------
// CONSOLE
and so forth.