mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-22 12:56:53 +00:00
Consolify term and phrase suggester docs
This includes a working example of reverse filters to support correcting prefix errors.
This commit is contained in:
parent
e1415d6519
commit
3c0288ee98
@ -63,6 +63,13 @@ buildRestTests.docs = fileTree(projectDir) {
|
||||
|
||||
Closure setupTwitter = { String name, int count ->
|
||||
buildRestTests.setups[name] = '''
|
||||
- do:
|
||||
indices.create:
|
||||
index: twitter
|
||||
body:
|
||||
settings:
|
||||
number_of_shards: 1
|
||||
number_of_replicas: 1
|
||||
- do:
|
||||
bulk:
|
||||
index: twitter
|
||||
|
@ -153,18 +153,18 @@ sync-flushed:
|
||||
--------------------------------------------------
|
||||
{
|
||||
"_shards": {
|
||||
"total": 10,
|
||||
"successful": 10,
|
||||
"total": 2,
|
||||
"successful": 2,
|
||||
"failed": 0
|
||||
},
|
||||
"twitter": {
|
||||
"total": 10,
|
||||
"successful": 10,
|
||||
"total": 2,
|
||||
"successful": 2,
|
||||
"failed": 0
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// TESTRESPONSE[s/"successful": 10/"successful": 5/]
|
||||
// TESTRESPONSE[s/"successful": 2/"successful": 1/]
|
||||
|
||||
Here is what it looks like when one shard group failed due to pending operations:
|
||||
|
||||
|
@ -10,15 +10,25 @@ The suggest request part is either defined alongside the query part in a
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
curl -s -XPOST 'localhost:9200/_search' -d '{
|
||||
POST twitter/_search
|
||||
{
|
||||
"query" : {
|
||||
...
|
||||
"match": {
|
||||
"message": "tring out Elasticsearch"
|
||||
}
|
||||
},
|
||||
"suggest" : {
|
||||
...
|
||||
"my-suggestion" : {
|
||||
"text" : "trying out Elasticsearch",
|
||||
"term" : {
|
||||
"field" : "message"
|
||||
}
|
||||
}
|
||||
}
|
||||
}'
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
// TEST[setup:twitter]
|
||||
|
||||
Suggest requests executed against the `_suggest` endpoint should omit
|
||||
the surrounding `suggest` element which is only used if the suggest
|
||||
@ -26,15 +36,18 @@ request is part of a search.
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
curl -XPOST 'localhost:9200/_suggest' -d '{
|
||||
POST _suggest
|
||||
{
|
||||
"my-suggestion" : {
|
||||
"text" : "the amsterdma meetpu",
|
||||
"text" : "tring out Elasticsearch",
|
||||
"term" : {
|
||||
"field" : "body"
|
||||
"field" : "message"
|
||||
}
|
||||
}
|
||||
}'
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
// TEST[setup:twitter]
|
||||
|
||||
Several suggestions can be specified per request. Each suggestion is
|
||||
identified with an arbitrary name. In the example below two suggestions
|
||||
@ -43,21 +56,24 @@ the `term` suggester, but have a different `text`.
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
"suggest" : {
|
||||
POST _suggest
|
||||
{
|
||||
"my-suggest-1" : {
|
||||
"text" : "the amsterdma meetpu",
|
||||
"text" : "tring out Elasticsearch",
|
||||
"term" : {
|
||||
"field" : "body"
|
||||
"field" : "message"
|
||||
}
|
||||
},
|
||||
"my-suggest-2" : {
|
||||
"text" : "the rottredam meetpu",
|
||||
"text" : "kmichy",
|
||||
"term" : {
|
||||
"field" : "title"
|
||||
"field" : "user"
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
// TEST[setup:twitter]
|
||||
|
||||
The below suggest response example includes the suggestion response for
|
||||
`my-suggest-1` and `my-suggest-2`. Each suggestion part contains
|
||||
@ -68,44 +84,35 @@ in the suggest text and if found an arbitrary number of options.
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
...
|
||||
"suggest": {
|
||||
"my-suggest-1": [
|
||||
{
|
||||
"text" : "amsterdma",
|
||||
"offset": 4,
|
||||
"length": 9,
|
||||
"options": [
|
||||
...
|
||||
]
|
||||
},
|
||||
...
|
||||
],
|
||||
"my-suggest-2" : [
|
||||
...
|
||||
]
|
||||
}
|
||||
...
|
||||
"_shards": ...
|
||||
"my-suggest-1": [ {
|
||||
"text": "tring",
|
||||
"offset": 0,
|
||||
"length": 5,
|
||||
"options": [ {"text": "trying", "score": 0.8, "freq": 1 } ]
|
||||
}, {
|
||||
"text": "out",
|
||||
"offset": 6,
|
||||
"length": 3,
|
||||
"options": []
|
||||
}, {
|
||||
"text": "elasticsearch",
|
||||
"offset": 10,
|
||||
"length": 13,
|
||||
"options": []
|
||||
} ],
|
||||
"my-suggest-2": ...
|
||||
}
|
||||
--------------------------------------------------
|
||||
// TESTRESPONSE[s/"_shards": \.\.\./"_shards": "$body._shards",/]
|
||||
// TESTRESPONSE[s/"my-suggest-2": \.\.\./"my-suggest-2": "$body.my-suggest-2"/]
|
||||
|
||||
|
||||
Each options array contains an option object that includes the
|
||||
suggested text, its document frequency and score compared to the suggest
|
||||
entry text. The meaning of the score depends on the used suggester. The
|
||||
term suggester's score is based on the edit distance.
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
"options": [
|
||||
{
|
||||
"text": "amsterdam",
|
||||
"freq": 77,
|
||||
"score": 0.8888889
|
||||
},
|
||||
...
|
||||
]
|
||||
--------------------------------------------------
|
||||
|
||||
[float]
|
||||
[[global-suggest]]
|
||||
=== Global suggest text
|
||||
@ -116,157 +123,27 @@ and applies to the `my-suggest-1` and `my-suggest-2` suggestions.
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
"suggest" : {
|
||||
"text" : "the amsterdma meetpu",
|
||||
POST _suggest
|
||||
{
|
||||
"text" : "tring out Elasticsearch",
|
||||
"my-suggest-1" : {
|
||||
"term" : {
|
||||
"field" : "title"
|
||||
"field" : "message"
|
||||
}
|
||||
},
|
||||
"my-suggest-2" : {
|
||||
"term" : {
|
||||
"field" : "body"
|
||||
"field" : "user"
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
|
||||
The suggest text can in the above example also be specified as
|
||||
suggestion specific option. The suggest text specified on suggestion
|
||||
level override the suggest text on the global level.
|
||||
|
||||
[float]
|
||||
=== Other suggest example
|
||||
|
||||
In the below example we request suggestions for the following suggest
|
||||
text: `devloping distibutd saerch engies` on the `title` field with a
|
||||
maximum of 3 suggestions per term inside the suggest text. Note that in
|
||||
this example we set `size` to `0`. This isn't required, but a
|
||||
nice optimization. The suggestions are gathered in the `query` phase and
|
||||
in the case that we only care about suggestions (so no hits) we don't
|
||||
need to execute the `fetch` phase.
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
curl -s -XPOST 'localhost:9200/_search' -d '{
|
||||
"size": 0,
|
||||
"suggest" : {
|
||||
"my-title-suggestions-1" : {
|
||||
"text" : "devloping distibutd saerch engies",
|
||||
"term" : {
|
||||
"size" : 3,
|
||||
"field" : "title"
|
||||
}
|
||||
}
|
||||
}
|
||||
}'
|
||||
--------------------------------------------------
|
||||
|
||||
The above request could yield the response as stated in the code example
|
||||
below. As you can see if we take the first suggested options of each
|
||||
suggestion entry we get `developing distributed search engines` as
|
||||
result.
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
...
|
||||
"suggest": {
|
||||
"my-title-suggestions-1": [
|
||||
{
|
||||
"text": "devloping",
|
||||
"offset": 0,
|
||||
"length": 9,
|
||||
"options": [
|
||||
{
|
||||
"text": "developing",
|
||||
"freq": 77,
|
||||
"score": 0.8888889
|
||||
},
|
||||
{
|
||||
"text": "deloping",
|
||||
"freq": 1,
|
||||
"score": 0.875
|
||||
},
|
||||
{
|
||||
"text": "deploying",
|
||||
"freq": 2,
|
||||
"score": 0.7777778
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"text": "distibutd",
|
||||
"offset": 10,
|
||||
"length": 9,
|
||||
"options": [
|
||||
{
|
||||
"text": "distributed",
|
||||
"freq": 217,
|
||||
"score": 0.7777778
|
||||
},
|
||||
{
|
||||
"text": "disributed",
|
||||
"freq": 1,
|
||||
"score": 0.7777778
|
||||
},
|
||||
{
|
||||
"text": "distribute",
|
||||
"freq": 1,
|
||||
"score": 0.7777778
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"text": "saerch",
|
||||
"offset": 20,
|
||||
"length": 6,
|
||||
"options": [
|
||||
{
|
||||
"text": "search",
|
||||
"freq": 1038,
|
||||
"score": 0.8333333
|
||||
},
|
||||
{
|
||||
"text": "smerch",
|
||||
"freq": 3,
|
||||
"score": 0.8333333
|
||||
},
|
||||
{
|
||||
"text": "serch",
|
||||
"freq": 2,
|
||||
"score": 0.8
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"text": "engies",
|
||||
"offset": 27,
|
||||
"length": 6,
|
||||
"options": [
|
||||
{
|
||||
"text": "engines",
|
||||
"freq": 568,
|
||||
"score": 0.8333333
|
||||
},
|
||||
{
|
||||
"text": "engles",
|
||||
"freq": 3,
|
||||
"score": 0.8333333
|
||||
},
|
||||
{
|
||||
"text": "eggies",
|
||||
"freq": 1,
|
||||
"score": 0.8333333
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
...
|
||||
}
|
||||
--------------------------------------------------
|
||||
|
||||
include::suggesters/term-suggest.asciidoc[]
|
||||
|
||||
include::suggesters/phrase-suggest.asciidoc[]
|
||||
@ -274,5 +151,3 @@ include::suggesters/phrase-suggest.asciidoc[]
|
||||
include::suggesters/completion-suggest.asciidoc[]
|
||||
|
||||
include::suggesters/context-suggest.asciidoc[]
|
||||
|
||||
|
||||
|
@ -17,36 +17,94 @@ co-occurrence and frequencies.
|
||||
|
||||
==== API Example
|
||||
|
||||
The `phrase` request is defined along side the query part in the json
|
||||
request:
|
||||
In general the `phrase` suggester requires special mapping up front to work.
|
||||
The `phrase` suggester examples on this page need the following mapping to
|
||||
work. The `reverse` analyzer is used only in the last example.
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
curl -XPOST 'localhost:9200/_search' -d '{
|
||||
"suggest" : {
|
||||
"text" : "Xor the Got-Jewel",
|
||||
"simple_phrase" : {
|
||||
"phrase" : {
|
||||
"analyzer" : "body",
|
||||
"field" : "bigram",
|
||||
"size" : 1,
|
||||
"real_word_error_likelihood" : 0.95,
|
||||
"max_errors" : 0.5,
|
||||
"gram_size" : 2,
|
||||
"direct_generator" : [ {
|
||||
"field" : "body",
|
||||
"suggest_mode" : "always",
|
||||
"min_word_length" : 1
|
||||
} ],
|
||||
"highlight": {
|
||||
"pre_tag": "<em>",
|
||||
"post_tag": "</em>"
|
||||
POST test
|
||||
{
|
||||
"settings": {
|
||||
"index": {
|
||||
"number_of_shards": 1,
|
||||
"analysis": {
|
||||
"analyzer": {
|
||||
"trigram": {
|
||||
"type": "custom",
|
||||
"tokenizer": "standard",
|
||||
"filter": ["standard", "shingle"]
|
||||
},
|
||||
"reverse": {
|
||||
"type": "custom",
|
||||
"tokenizer": "standard",
|
||||
"filter": ["standard", "reverse"]
|
||||
}
|
||||
},
|
||||
"filter": {
|
||||
"shingle": {
|
||||
"type": "shingle",
|
||||
"min_shingle_size": 2,
|
||||
"max_shingle_size": 3
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"mappings": {
|
||||
"test": {
|
||||
"properties": {
|
||||
"title": {
|
||||
"type": "text",
|
||||
"fields": {
|
||||
"trigram": {
|
||||
"type": "text",
|
||||
"analyzer": "trigram"
|
||||
},
|
||||
"reverse": {
|
||||
"type": "text",
|
||||
"analyzer": "reverse"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}'
|
||||
}
|
||||
POST test/test
|
||||
{"title": "noble warriors"}
|
||||
POST test/test
|
||||
{"title": "nobel prize"}
|
||||
POST _refresh
|
||||
--------------------------------------------------
|
||||
// TESTSETUP
|
||||
|
||||
Once you have the analyzers and mappings set up you can use the `phrase`
|
||||
suggester in the same spot you'd use the `term` suggester:
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
POST _suggest?pretty -d'
|
||||
{
|
||||
"text": "noble prize",
|
||||
"simple_phrase": {
|
||||
"phrase": {
|
||||
"field": "title.trigram",
|
||||
"size": 1,
|
||||
"gram_size": 3,
|
||||
"direct_generator": [ {
|
||||
"field": "title.trigram",
|
||||
"suggest_mode": "always"
|
||||
} ],
|
||||
"highlight": {
|
||||
"pre_tag": "<em>",
|
||||
"post_tag": "</em>"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
|
||||
The response contains suggestions scored by the most likely spell
|
||||
correction first. In this case we received the expected correction
|
||||
@ -57,37 +115,23 @@ can contain misspellings (See parameter descriptions below).
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
{
|
||||
"took" : 5,
|
||||
"timed_out" : false,
|
||||
"_shards" : {
|
||||
"total" : 5,
|
||||
"successful" : 5,
|
||||
"failed" : 0
|
||||
},
|
||||
"hits" : {
|
||||
"total" : 2938,
|
||||
"max_score" : 0.0,
|
||||
"hits" : [ ]
|
||||
},
|
||||
"suggest" : {
|
||||
"simple_phrase" : [ {
|
||||
"text" : "Xor the Got-Jewel",
|
||||
{
|
||||
"_shards": ...
|
||||
"simple_phrase" : [
|
||||
{
|
||||
"text" : "noble prize",
|
||||
"offset" : 0,
|
||||
"length" : 17,
|
||||
"length" : 11,
|
||||
"options" : [ {
|
||||
"text" : "xorr the god jewel",
|
||||
"highlighted": "<em>xorr</em> the <em>god</em> jewel",
|
||||
"score" : 0.17877324
|
||||
}, {
|
||||
"text" : "xor the god jewel",
|
||||
"highlighted": "xor the <em>god</em> jewel",
|
||||
"score" : 0.14231323
|
||||
} ]
|
||||
} ]
|
||||
}
|
||||
"text" : "nobel prize",
|
||||
"highlighted": "<em>nobel</em> prize",
|
||||
"score" : 0.40765354
|
||||
}]
|
||||
}
|
||||
]
|
||||
}
|
||||
--------------------------------------------------
|
||||
// TESTRESPONSE[s/"_shards": .../"_shards": "$body._shards",/]
|
||||
|
||||
==== Basic Phrase suggest API parameters
|
||||
|
||||
@ -178,34 +222,34 @@ can contain misspellings (See parameter descriptions below).
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
curl -XPOST 'localhost:9200/_search' -d {
|
||||
"suggest" : {
|
||||
"text" : "Xor the Got-Jewel",
|
||||
"simple_phrase" : {
|
||||
"phrase" : {
|
||||
"field" : "bigram",
|
||||
"size" : 1,
|
||||
"direct_generator" : [ {
|
||||
"field" : "body",
|
||||
"suggest_mode" : "always",
|
||||
"min_word_length" : 1
|
||||
} ],
|
||||
"collate": {
|
||||
"query": { <1>
|
||||
"inline" : {
|
||||
"match": {
|
||||
"{{field_name}}" : "{{suggestion}}" <2>
|
||||
}
|
||||
}
|
||||
},
|
||||
"params": {"field_name" : "title"}, <3>
|
||||
"prune": true <4>
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
POST _suggest
|
||||
{
|
||||
"text" : "noble prize",
|
||||
"simple_phrase" : {
|
||||
"phrase" : {
|
||||
"field" : "title.trigram",
|
||||
"size" : 1,
|
||||
"direct_generator" : [ {
|
||||
"field" : "title.trigram",
|
||||
"suggest_mode" : "always",
|
||||
"min_word_length" : 1
|
||||
} ],
|
||||
"collate": {
|
||||
"query": { <1>
|
||||
"inline" : {
|
||||
"match": {
|
||||
"{{field_name}}" : "{{suggestion}}" <2>
|
||||
}
|
||||
}
|
||||
},
|
||||
"params": {"field_name" : "title"}, <3>
|
||||
"prune": true <4>
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
<1> This query will be run once for every suggestion.
|
||||
<2> The `{{suggestion}}` variable will be replaced by the text
|
||||
of each suggestion.
|
||||
@ -342,33 +386,27 @@ accept ordinary analyzer names.
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
curl -s -XPOST 'localhost:9200/_search' -d {
|
||||
"suggest" : {
|
||||
"text" : "Xor the Got-Jewel",
|
||||
"simple_phrase" : {
|
||||
"phrase" : {
|
||||
"analyzer" : "body",
|
||||
"field" : "bigram",
|
||||
"size" : 4,
|
||||
"real_word_error_likelihood" : 0.95,
|
||||
"confidence" : 2.0,
|
||||
"gram_size" : 2,
|
||||
"direct_generator" : [ {
|
||||
"field" : "body",
|
||||
"suggest_mode" : "always",
|
||||
"min_word_length" : 1
|
||||
}, {
|
||||
"field" : "reverse",
|
||||
"suggest_mode" : "always",
|
||||
"min_word_length" : 1,
|
||||
"pre_filter" : "reverse",
|
||||
"post_filter" : "reverse"
|
||||
} ]
|
||||
}
|
||||
POST _suggest
|
||||
{
|
||||
"text" : "obel prize",
|
||||
"simple_phrase" : {
|
||||
"phrase" : {
|
||||
"field" : "title.trigram",
|
||||
"size" : 1,
|
||||
"direct_generator" : [ {
|
||||
"field" : "title.trigram",
|
||||
"suggest_mode" : "always"
|
||||
}, {
|
||||
"field" : "title.reverse",
|
||||
"suggest_mode" : "always",
|
||||
"pre_filter" : "reverse",
|
||||
"post_filter" : "reverse"
|
||||
} ]
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
|
||||
`pre_filter` and `post_filter` can also be used to inject synonyms after
|
||||
candidates are generated. For instance for the query `captain usq` we
|
||||
|
@ -21,19 +21,19 @@ And here is a sample response:
|
||||
"timed_out": false,
|
||||
"took": 62,
|
||||
"_shards":{
|
||||
"total" : 5,
|
||||
"successful" : 5,
|
||||
"total" : 1,
|
||||
"successful" : 1,
|
||||
"failed" : 0
|
||||
},
|
||||
"hits":{
|
||||
"total" : 1,
|
||||
"max_score": 0.2876821,
|
||||
"max_score": 1.3862944,
|
||||
"hits" : [
|
||||
{
|
||||
"_index" : "twitter",
|
||||
"_type" : "tweet",
|
||||
"_id" : "0",
|
||||
"_score": 0.2876821,
|
||||
"_score": 1.3862944,
|
||||
"_source" : {
|
||||
"user" : "kimchy",
|
||||
"date" : "2009-11-15T14:12:12",
|
||||
|
Loading…
x
Reference in New Issue
Block a user