Consolify term and phrase suggester docs

This includes a working example of reverse filters to support
correcting prefix errors.
This commit is contained in:
Nik Everett 2016-07-22 18:51:36 -04:00
parent e1415d6519
commit 3c0288ee98
5 changed files with 209 additions and 289 deletions

View File

@ -63,6 +63,13 @@ buildRestTests.docs = fileTree(projectDir) {
Closure setupTwitter = { String name, int count ->
buildRestTests.setups[name] = '''
- do:
indices.create:
index: twitter
body:
settings:
number_of_shards: 1
number_of_replicas: 1
- do:
bulk:
index: twitter

View File

@ -153,18 +153,18 @@ sync-flushed:
--------------------------------------------------
{
"_shards": {
"total": 10,
"successful": 10,
"total": 2,
"successful": 2,
"failed": 0
},
"twitter": {
"total": 10,
"successful": 10,
"total": 2,
"successful": 2,
"failed": 0
}
}
--------------------------------------------------
// TESTRESPONSE[s/"successful": 10/"successful": 5/]
// TESTRESPONSE[s/"successful": 2/"successful": 1/]
Here is what it looks like when one shard group failed due to pending operations:

View File

@ -10,15 +10,25 @@ The suggest request part is either defined alongside the query part in a
[source,js]
--------------------------------------------------
curl -s -XPOST 'localhost:9200/_search' -d '{
POST twitter/_search
{
"query" : {
...
"match": {
"message": "tring out Elasticsearch"
}
},
"suggest" : {
...
"my-suggestion" : {
"text" : "trying out Elasticsearch",
"term" : {
"field" : "message"
}
}
}
}'
}
--------------------------------------------------
// CONSOLE
// TEST[setup:twitter]
Suggest requests executed against the `_suggest` endpoint should omit
the surrounding `suggest` element which is only used if the suggest
@ -26,15 +36,18 @@ request is part of a search.
[source,js]
--------------------------------------------------
curl -XPOST 'localhost:9200/_suggest' -d '{
POST _suggest
{
"my-suggestion" : {
"text" : "the amsterdma meetpu",
"text" : "tring out Elasticsearch",
"term" : {
"field" : "body"
"field" : "message"
}
}
}'
}
--------------------------------------------------
// CONSOLE
// TEST[setup:twitter]
Several suggestions can be specified per request. Each suggestion is
identified with an arbitrary name. In the example below two suggestions
@ -43,21 +56,24 @@ the `term` suggester, but have a different `text`.
[source,js]
--------------------------------------------------
"suggest" : {
POST _suggest
{
"my-suggest-1" : {
"text" : "the amsterdma meetpu",
"text" : "tring out Elasticsearch",
"term" : {
"field" : "body"
"field" : "message"
}
},
"my-suggest-2" : {
"text" : "the rottredam meetpu",
"text" : "kmichy",
"term" : {
"field" : "title"
"field" : "user"
}
}
}
--------------------------------------------------
// CONSOLE
// TEST[setup:twitter]
The below suggest response example includes the suggestion response for
`my-suggest-1` and `my-suggest-2`. Each suggestion part contains
@ -68,44 +84,35 @@ in the suggest text and if found an arbitrary number of options.
[source,js]
--------------------------------------------------
{
...
"suggest": {
"my-suggest-1": [
{
"text" : "amsterdma",
"offset": 4,
"length": 9,
"options": [
...
]
},
...
],
"my-suggest-2" : [
...
]
}
...
"_shards": ...
"my-suggest-1": [ {
"text": "tring",
"offset": 0,
"length": 5,
"options": [ {"text": "trying", "score": 0.8, "freq": 1 } ]
}, {
"text": "out",
"offset": 6,
"length": 3,
"options": []
}, {
"text": "elasticsearch",
"offset": 10,
"length": 13,
"options": []
} ],
"my-suggest-2": ...
}
--------------------------------------------------
// TESTRESPONSE[s/"_shards": \.\.\./"_shards": "$body._shards",/]
// TESTRESPONSE[s/"my-suggest-2": \.\.\./"my-suggest-2": "$body.my-suggest-2"/]
Each options array contains an option object that includes the
suggested text, its document frequency and score compared to the suggest
entry text. The meaning of the score depends on the used suggester. The
term suggester's score is based on the edit distance.
[source,js]
--------------------------------------------------
"options": [
{
"text": "amsterdam",
"freq": 77,
"score": 0.8888889
},
...
]
--------------------------------------------------
[float]
[[global-suggest]]
=== Global suggest text
@ -116,157 +123,27 @@ and applies to the `my-suggest-1` and `my-suggest-2` suggestions.
[source,js]
--------------------------------------------------
"suggest" : {
"text" : "the amsterdma meetpu",
POST _suggest
{
"text" : "tring out Elasticsearch",
"my-suggest-1" : {
"term" : {
"field" : "title"
"field" : "message"
}
},
"my-suggest-2" : {
"term" : {
"field" : "body"
"field" : "user"
}
}
}
--------------------------------------------------
// CONSOLE
The suggest text can in the above example also be specified as
suggestion specific option. The suggest text specified on suggestion
level override the suggest text on the global level.
[float]
=== Other suggest example
In the below example we request suggestions for the following suggest
text: `devloping distibutd saerch engies` on the `title` field with a
maximum of 3 suggestions per term inside the suggest text. Note that in
this example we set `size` to `0`. This isn't required, but a
nice optimization. The suggestions are gathered in the `query` phase and
in the case that we only care about suggestions (so no hits) we don't
need to execute the `fetch` phase.
[source,js]
--------------------------------------------------
curl -s -XPOST 'localhost:9200/_search' -d '{
"size": 0,
"suggest" : {
"my-title-suggestions-1" : {
"text" : "devloping distibutd saerch engies",
"term" : {
"size" : 3,
"field" : "title"
}
}
}
}'
--------------------------------------------------
The above request could yield the response as stated in the code example
below. As you can see if we take the first suggested options of each
suggestion entry we get `developing distributed search engines` as
result.
[source,js]
--------------------------------------------------
{
...
"suggest": {
"my-title-suggestions-1": [
{
"text": "devloping",
"offset": 0,
"length": 9,
"options": [
{
"text": "developing",
"freq": 77,
"score": 0.8888889
},
{
"text": "deloping",
"freq": 1,
"score": 0.875
},
{
"text": "deploying",
"freq": 2,
"score": 0.7777778
}
]
},
{
"text": "distibutd",
"offset": 10,
"length": 9,
"options": [
{
"text": "distributed",
"freq": 217,
"score": 0.7777778
},
{
"text": "disributed",
"freq": 1,
"score": 0.7777778
},
{
"text": "distribute",
"freq": 1,
"score": 0.7777778
}
]
},
{
"text": "saerch",
"offset": 20,
"length": 6,
"options": [
{
"text": "search",
"freq": 1038,
"score": 0.8333333
},
{
"text": "smerch",
"freq": 3,
"score": 0.8333333
},
{
"text": "serch",
"freq": 2,
"score": 0.8
}
]
},
{
"text": "engies",
"offset": 27,
"length": 6,
"options": [
{
"text": "engines",
"freq": 568,
"score": 0.8333333
},
{
"text": "engles",
"freq": 3,
"score": 0.8333333
},
{
"text": "eggies",
"freq": 1,
"score": 0.8333333
}
]
}
]
}
...
}
--------------------------------------------------
include::suggesters/term-suggest.asciidoc[]
include::suggesters/phrase-suggest.asciidoc[]
@ -274,5 +151,3 @@ include::suggesters/phrase-suggest.asciidoc[]
include::suggesters/completion-suggest.asciidoc[]
include::suggesters/context-suggest.asciidoc[]

View File

@ -17,36 +17,94 @@ co-occurrence and frequencies.
==== API Example
The `phrase` request is defined along side the query part in the json
request:
In general the `phrase` suggester requires special mapping up front to work.
The `phrase` suggester examples on this page need the following mapping to
work. The `reverse` analyzer is used only in the last example.
[source,js]
--------------------------------------------------
curl -XPOST 'localhost:9200/_search' -d '{
"suggest" : {
"text" : "Xor the Got-Jewel",
"simple_phrase" : {
"phrase" : {
"analyzer" : "body",
"field" : "bigram",
"size" : 1,
"real_word_error_likelihood" : 0.95,
"max_errors" : 0.5,
"gram_size" : 2,
"direct_generator" : [ {
"field" : "body",
"suggest_mode" : "always",
"min_word_length" : 1
} ],
"highlight": {
"pre_tag": "<em>",
"post_tag": "</em>"
POST test
{
"settings": {
"index": {
"number_of_shards": 1,
"analysis": {
"analyzer": {
"trigram": {
"type": "custom",
"tokenizer": "standard",
"filter": ["standard", "shingle"]
},
"reverse": {
"type": "custom",
"tokenizer": "standard",
"filter": ["standard", "reverse"]
}
},
"filter": {
"shingle": {
"type": "shingle",
"min_shingle_size": 2,
"max_shingle_size": 3
}
}
}
}
},
"mappings": {
"test": {
"properties": {
"title": {
"type": "text",
"fields": {
"trigram": {
"type": "text",
"analyzer": "trigram"
},
"reverse": {
"type": "text",
"analyzer": "reverse"
}
}
}
}
}
}
}'
}
POST test/test
{"title": "noble warriors"}
POST test/test
{"title": "nobel prize"}
POST _refresh
--------------------------------------------------
// TESTSETUP
Once you have the analyzers and mappings set up you can use the `phrase`
suggester in the same spot you'd use the `term` suggester:
[source,js]
--------------------------------------------------
POST _suggest?pretty -d'
{
"text": "noble prize",
"simple_phrase": {
"phrase": {
"field": "title.trigram",
"size": 1,
"gram_size": 3,
"direct_generator": [ {
"field": "title.trigram",
"suggest_mode": "always"
} ],
"highlight": {
"pre_tag": "<em>",
"post_tag": "</em>"
}
}
}
}
--------------------------------------------------
// CONSOLE
The response contains suggestions scored by the most likely spell
correction first. In this case we received the expected correction
@ -57,37 +115,23 @@ can contain misspellings (See parameter descriptions below).
[source,js]
--------------------------------------------------
{
"took" : 5,
"timed_out" : false,
"_shards" : {
"total" : 5,
"successful" : 5,
"failed" : 0
},
"hits" : {
"total" : 2938,
"max_score" : 0.0,
"hits" : [ ]
},
"suggest" : {
"simple_phrase" : [ {
"text" : "Xor the Got-Jewel",
{
"_shards": ...
"simple_phrase" : [
{
"text" : "noble prize",
"offset" : 0,
"length" : 17,
"length" : 11,
"options" : [ {
"text" : "xorr the god jewel",
"highlighted": "<em>xorr</em> the <em>god</em> jewel",
"score" : 0.17877324
}, {
"text" : "xor the god jewel",
"highlighted": "xor the <em>god</em> jewel",
"score" : 0.14231323
} ]
} ]
}
"text" : "nobel prize",
"highlighted": "<em>nobel</em> prize",
"score" : 0.40765354
}]
}
]
}
--------------------------------------------------
// TESTRESPONSE[s/"_shards": .../"_shards": "$body._shards",/]
==== Basic Phrase suggest API parameters
@ -178,34 +222,34 @@ can contain misspellings (See parameter descriptions below).
[source,js]
--------------------------------------------------
curl -XPOST 'localhost:9200/_search' -d {
"suggest" : {
"text" : "Xor the Got-Jewel",
"simple_phrase" : {
"phrase" : {
"field" : "bigram",
"size" : 1,
"direct_generator" : [ {
"field" : "body",
"suggest_mode" : "always",
"min_word_length" : 1
} ],
"collate": {
"query": { <1>
"inline" : {
"match": {
"{{field_name}}" : "{{suggestion}}" <2>
}
}
},
"params": {"field_name" : "title"}, <3>
"prune": true <4>
}
}
}
}
}
POST _suggest
{
"text" : "noble prize",
"simple_phrase" : {
"phrase" : {
"field" : "title.trigram",
"size" : 1,
"direct_generator" : [ {
"field" : "title.trigram",
"suggest_mode" : "always",
"min_word_length" : 1
} ],
"collate": {
"query": { <1>
"inline" : {
"match": {
"{{field_name}}" : "{{suggestion}}" <2>
}
}
},
"params": {"field_name" : "title"}, <3>
"prune": true <4>
}
}
}
}
--------------------------------------------------
// CONSOLE
<1> This query will be run once for every suggestion.
<2> The `{{suggestion}}` variable will be replaced by the text
of each suggestion.
@ -342,33 +386,27 @@ accept ordinary analyzer names.
[source,js]
--------------------------------------------------
curl -s -XPOST 'localhost:9200/_search' -d {
"suggest" : {
"text" : "Xor the Got-Jewel",
"simple_phrase" : {
"phrase" : {
"analyzer" : "body",
"field" : "bigram",
"size" : 4,
"real_word_error_likelihood" : 0.95,
"confidence" : 2.0,
"gram_size" : 2,
"direct_generator" : [ {
"field" : "body",
"suggest_mode" : "always",
"min_word_length" : 1
}, {
"field" : "reverse",
"suggest_mode" : "always",
"min_word_length" : 1,
"pre_filter" : "reverse",
"post_filter" : "reverse"
} ]
}
POST _suggest
{
"text" : "obel prize",
"simple_phrase" : {
"phrase" : {
"field" : "title.trigram",
"size" : 1,
"direct_generator" : [ {
"field" : "title.trigram",
"suggest_mode" : "always"
}, {
"field" : "title.reverse",
"suggest_mode" : "always",
"pre_filter" : "reverse",
"post_filter" : "reverse"
} ]
}
}
}
--------------------------------------------------
// CONSOLE
`pre_filter` and `post_filter` can also be used to inject synonyms after
candidates are generated. For instance for the query `captain usq` we

View File

@ -21,19 +21,19 @@ And here is a sample response:
"timed_out": false,
"took": 62,
"_shards":{
"total" : 5,
"successful" : 5,
"total" : 1,
"successful" : 1,
"failed" : 0
},
"hits":{
"total" : 1,
"max_score": 0.2876821,
"max_score": 1.3862944,
"hits" : [
{
"_index" : "twitter",
"_type" : "tweet",
"_id" : "0",
"_score": 0.2876821,
"_score": 1.3862944,
"_source" : {
"user" : "kimchy",
"date" : "2009-11-15T14:12:12",