Docs: Test examples that recreate lang analyzers (#29535)

We have a pile of documentation describing how to rebuild the built in
language analyzers and, previously, our documentation testing framework
made sure that the examples successfully built *an* analyzer but they
didn't assert that the analyzer built by the documentation matches the
built in anlayzer. Unsuprisingly, some of the examples aren't quite
right.

This adds a mechanism that tests that the analyzers built by the docs.
The mechanism is fairly simple and brutal but it seems to be working:
build a hundred random unicode sequences and send them through the
`_analyze` API with the rebuilt analyzer and then again through the
built in analyzer. Then make sure both APIs return the same results.
Each of these calls to `_anlayze` takes about 20ms on my laptop which
seems fine.
This commit is contained in:
Nik Everett 2018-05-09 09:23:10 -04:00 committed by GitHub
parent 2228e6e663
commit f9dc86836d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 344 additions and 58 deletions

View File

@ -141,9 +141,11 @@ public class RestTestsFromSnippetsTask extends SnippetsTask {
private static final String SYNTAX = { private static final String SYNTAX = {
String method = /(?<method>GET|PUT|POST|HEAD|OPTIONS|DELETE)/ String method = /(?<method>GET|PUT|POST|HEAD|OPTIONS|DELETE)/
String pathAndQuery = /(?<pathAndQuery>[^\n]+)/ String pathAndQuery = /(?<pathAndQuery>[^\n]+)/
String badBody = /GET|PUT|POST|HEAD|OPTIONS|DELETE|#/ String badBody = /GET|PUT|POST|HEAD|OPTIONS|DELETE|startyaml|#/
String body = /(?<body>(?:\n(?!$badBody)[^\n]+)+)/ String body = /(?<body>(?:\n(?!$badBody)[^\n]+)+)/
String nonComment = /$method\s+$pathAndQuery$body?/ String rawRequest = /(?:$method\s+$pathAndQuery$body?)/
String yamlRequest = /(?:startyaml(?s)(?<yaml>.+?)(?-s)endyaml)/
String nonComment = /(?:$rawRequest|$yamlRequest)/
String comment = /(?<comment>#.+)/ String comment = /(?<comment>#.+)/
/(?:$comment|$nonComment)\n+/ /(?:$comment|$nonComment)\n+/
}() }()
@ -333,6 +335,11 @@ public class RestTestsFromSnippetsTask extends SnippetsTask {
// Comment // Comment
return return
} }
String yamlRequest = matcher.group("yaml");
if (yamlRequest != null) {
current.println(yamlRequest)
return
}
String method = matcher.group("method") String method = matcher.group("method")
String pathAndQuery = matcher.group("pathAndQuery") String pathAndQuery = matcher.group("pathAndQuery")
String body = matcher.group("body") String body = matcher.group("body")

View File

@ -68,6 +68,23 @@ for its modifiers:
but rather than the setup defined in `docs/build.gradle` the setup is defined but rather than the setup defined in `docs/build.gradle` the setup is defined
right in the documentation file. right in the documentation file.
In addition to the standard CONSOLE syntax these snippets can contain blocks
of yaml surrounded by markers like this:
```
startyaml
- compare_analyzers: {index: thai_example, first: thai, second: rebuilt_thai}
endyaml
```
This allows slightly more expressive testing of the snippets. Since that syntax
is not supported by CONSOLE the usual way to incorporate it is with a
`// TEST[s//]` marker like this:
```
// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: thai_example, first: thai, second: rebuilt_thai}\nendyaml\n/]
```
Any place you can use json you can use elements like `$body.path.to.thing` Any place you can use json you can use elements like `$body.path.to.thing`
which is replaced on the fly with the contents of the thing at `path.to.thing` which is replaced on the fly with the contents of the thing at `path.to.thing`
in the last response. in the last response.

View File

@ -60,6 +60,8 @@ buildRestTests.docs = fileTree(projectDir) {
exclude 'build.gradle' exclude 'build.gradle'
// That is where the snippets go, not where they come from! // That is where the snippets go, not where they come from!
exclude 'build' exclude 'build'
// Just syntax examples
exclude 'README.asciidoc'
} }
Closure setupTwitter = { String name, int count -> Closure setupTwitter = { String name, int count ->

View File

@ -97,10 +97,11 @@ PUT /arabic_example
} }
}, },
"analyzer": { "analyzer": {
"arabic": { "rebuilt_arabic": {
"tokenizer": "standard", "tokenizer": "standard",
"filter": [ "filter": [
"lowercase", "lowercase",
"decimal_digit",
"arabic_stop", "arabic_stop",
"arabic_normalization", "arabic_normalization",
"arabic_keywords", "arabic_keywords",
@ -113,6 +114,8 @@ PUT /arabic_example
} }
---------------------------------------------------- ----------------------------------------------------
// CONSOLE // CONSOLE
// TEST[s/"arabic_keywords",//]
// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: arabic_example, first: arabic, second: rebuilt_arabic}\nendyaml\n/]
<1> The default stopwords can be overridden with the `stopwords` <1> The default stopwords can be overridden with the `stopwords`
or `stopwords_path` parameters. or `stopwords_path` parameters.
<2> This filter should be removed unless there are words which should <2> This filter should be removed unless there are words which should
@ -144,7 +147,7 @@ PUT /armenian_example
} }
}, },
"analyzer": { "analyzer": {
"armenian": { "rebuilt_armenian": {
"tokenizer": "standard", "tokenizer": "standard",
"filter": [ "filter": [
"lowercase", "lowercase",
@ -159,6 +162,8 @@ PUT /armenian_example
} }
---------------------------------------------------- ----------------------------------------------------
// CONSOLE // CONSOLE
// TEST[s/"armenian_keywords",//]
// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: armenian_example, first: armenian, second: rebuilt_armenian}\nendyaml\n/]
<1> The default stopwords can be overridden with the `stopwords` <1> The default stopwords can be overridden with the `stopwords`
or `stopwords_path` parameters. or `stopwords_path` parameters.
<2> This filter should be removed unless there are words which should <2> This filter should be removed unless there are words which should
@ -190,7 +195,7 @@ PUT /basque_example
} }
}, },
"analyzer": { "analyzer": {
"basque": { "rebuilt_basque": {
"tokenizer": "standard", "tokenizer": "standard",
"filter": [ "filter": [
"lowercase", "lowercase",
@ -205,6 +210,8 @@ PUT /basque_example
} }
---------------------------------------------------- ----------------------------------------------------
// CONSOLE // CONSOLE
// TEST[s/"basque_keywords",//]
// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: basque_example, first: basque, second: rebuilt_basque}\nendyaml\n/]
<1> The default stopwords can be overridden with the `stopwords` <1> The default stopwords can be overridden with the `stopwords`
or `stopwords_path` parameters. or `stopwords_path` parameters.
<2> This filter should be removed unless there are words which should <2> This filter should be removed unless there are words which should
@ -236,14 +243,15 @@ PUT /bengali_example
} }
}, },
"analyzer": { "analyzer": {
"bengali": { "rebuilt_bengali": {
"tokenizer": "standard", "tokenizer": "standard",
"filter": [ "filter": [
"lowercase", "lowercase",
"decimal_digit",
"bengali_keywords",
"indic_normalization", "indic_normalization",
"bengali_normalization", "bengali_normalization",
"bengali_stop", "bengali_stop",
"bengali_keywords",
"bengali_stemmer" "bengali_stemmer"
] ]
} }
@ -253,6 +261,8 @@ PUT /bengali_example
} }
---------------------------------------------------- ----------------------------------------------------
// CONSOLE // CONSOLE
// TEST[s/"bengali_keywords",//]
// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: bengali_example, first: bengali, second: rebuilt_bengali}\nendyaml\n/]
<1> The default stopwords can be overridden with the `stopwords` <1> The default stopwords can be overridden with the `stopwords`
or `stopwords_path` parameters. or `stopwords_path` parameters.
<2> This filter should be removed unless there are words which should <2> This filter should be removed unless there are words which should
@ -284,7 +294,7 @@ PUT /brazilian_example
} }
}, },
"analyzer": { "analyzer": {
"brazilian": { "rebuilt_brazilian": {
"tokenizer": "standard", "tokenizer": "standard",
"filter": [ "filter": [
"lowercase", "lowercase",
@ -299,6 +309,8 @@ PUT /brazilian_example
} }
---------------------------------------------------- ----------------------------------------------------
// CONSOLE // CONSOLE
// TEST[s/"brazilian_keywords",//]
// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: brazilian_example, first: brazilian, second: rebuilt_brazilian}\nendyaml\n/]
<1> The default stopwords can be overridden with the `stopwords` <1> The default stopwords can be overridden with the `stopwords`
or `stopwords_path` parameters. or `stopwords_path` parameters.
<2> This filter should be removed unless there are words which should <2> This filter should be removed unless there are words which should
@ -330,7 +342,7 @@ PUT /bulgarian_example
} }
}, },
"analyzer": { "analyzer": {
"bulgarian": { "rebuilt_bulgarian": {
"tokenizer": "standard", "tokenizer": "standard",
"filter": [ "filter": [
"lowercase", "lowercase",
@ -345,6 +357,8 @@ PUT /bulgarian_example
} }
---------------------------------------------------- ----------------------------------------------------
// CONSOLE // CONSOLE
// TEST[s/"bulgarian_keywords",//]
// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: bulgarian_example, first: bulgarian, second: rebuilt_bulgarian}\nendyaml\n/]
<1> The default stopwords can be overridden with the `stopwords` <1> The default stopwords can be overridden with the `stopwords`
or `stopwords_path` parameters. or `stopwords_path` parameters.
<2> This filter should be removed unless there are words which should <2> This filter should be removed unless there are words which should
@ -380,7 +394,7 @@ PUT /catalan_example
} }
}, },
"analyzer": { "analyzer": {
"catalan": { "rebuilt_catalan": {
"tokenizer": "standard", "tokenizer": "standard",
"filter": [ "filter": [
"catalan_elision", "catalan_elision",
@ -396,6 +410,8 @@ PUT /catalan_example
} }
---------------------------------------------------- ----------------------------------------------------
// CONSOLE // CONSOLE
// TEST[s/"catalan_keywords",//]
// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: catalan_example, first: catalan, second: rebuilt_catalan}\nendyaml\n/]
<1> The default stopwords can be overridden with the `stopwords` <1> The default stopwords can be overridden with the `stopwords`
or `stopwords_path` parameters. or `stopwords_path` parameters.
<2> This filter should be removed unless there are words which should <2> This filter should be removed unless there are words which should
@ -415,11 +431,17 @@ PUT /cjk_example
"filter": { "filter": {
"english_stop": { "english_stop": {
"type": "stop", "type": "stop",
"stopwords": "_english_" <1> "stopwords": [ <1>
"a", "and", "are", "as", "at", "be", "but", "by", "for",
"if", "in", "into", "is", "it", "no", "not", "of", "on",
"or", "s", "such", "t", "that", "the", "their", "then",
"there", "these", "they", "this", "to", "was", "will",
"with", "www"
]
} }
}, },
"analyzer": { "analyzer": {
"cjk": { "rebuilt_cjk": {
"tokenizer": "standard", "tokenizer": "standard",
"filter": [ "filter": [
"cjk_width", "cjk_width",
@ -434,8 +456,12 @@ PUT /cjk_example
} }
---------------------------------------------------- ----------------------------------------------------
// CONSOLE // CONSOLE
// TEST[s/"cjk_keywords",//]
// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: cjk_example, first: cjk, second: rebuilt_cjk}\nendyaml\n/]
<1> The default stopwords can be overridden with the `stopwords` <1> The default stopwords can be overridden with the `stopwords`
or `stopwords_path` parameters. or `stopwords_path` parameters. The default stop words are
*almost* the same as the `_english_` set, but not exactly
the same.
[[czech-analyzer]] [[czech-analyzer]]
===== `czech` analyzer ===== `czech` analyzer
@ -463,7 +489,7 @@ PUT /czech_example
} }
}, },
"analyzer": { "analyzer": {
"czech": { "rebuilt_czech": {
"tokenizer": "standard", "tokenizer": "standard",
"filter": [ "filter": [
"lowercase", "lowercase",
@ -478,6 +504,8 @@ PUT /czech_example
} }
---------------------------------------------------- ----------------------------------------------------
// CONSOLE // CONSOLE
// TEST[s/"czech_keywords",//]
// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: czech_example, first: czech, second: rebuilt_czech}\nendyaml\n/]
<1> The default stopwords can be overridden with the `stopwords` <1> The default stopwords can be overridden with the `stopwords`
or `stopwords_path` parameters. or `stopwords_path` parameters.
<2> This filter should be removed unless there are words which should <2> This filter should be removed unless there are words which should
@ -509,7 +537,7 @@ PUT /danish_example
} }
}, },
"analyzer": { "analyzer": {
"danish": { "rebuilt_danish": {
"tokenizer": "standard", "tokenizer": "standard",
"filter": [ "filter": [
"lowercase", "lowercase",
@ -524,6 +552,8 @@ PUT /danish_example
} }
---------------------------------------------------- ----------------------------------------------------
// CONSOLE // CONSOLE
// TEST[s/"danish_keywords",//]
// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: danish_example, first: danish, second: rebuilt_danish}\nendyaml\n/]
<1> The default stopwords can be overridden with the `stopwords` <1> The default stopwords can be overridden with the `stopwords`
or `stopwords_path` parameters. or `stopwords_path` parameters.
<2> This filter should be removed unless there are words which should <2> This filter should be removed unless there are words which should
@ -564,7 +594,7 @@ PUT /dutch_example
} }
}, },
"analyzer": { "analyzer": {
"dutch": { "rebuilt_dutch": {
"tokenizer": "standard", "tokenizer": "standard",
"filter": [ "filter": [
"lowercase", "lowercase",
@ -580,6 +610,8 @@ PUT /dutch_example
} }
---------------------------------------------------- ----------------------------------------------------
// CONSOLE // CONSOLE
// TEST[s/"dutch_keywords",//]
// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: dutch_example, first: dutch, second: rebuilt_dutch}\nendyaml\n/]
<1> The default stopwords can be overridden with the `stopwords` <1> The default stopwords can be overridden with the `stopwords`
or `stopwords_path` parameters. or `stopwords_path` parameters.
<2> This filter should be removed unless there are words which should <2> This filter should be removed unless there are words which should
@ -615,7 +647,7 @@ PUT /english_example
} }
}, },
"analyzer": { "analyzer": {
"english": { "rebuilt_english": {
"tokenizer": "standard", "tokenizer": "standard",
"filter": [ "filter": [
"english_possessive_stemmer", "english_possessive_stemmer",
@ -631,6 +663,8 @@ PUT /english_example
} }
---------------------------------------------------- ----------------------------------------------------
// CONSOLE // CONSOLE
// TEST[s/"english_keywords",//]
// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: english_example, first: english, second: rebuilt_english}\nendyaml\n/]
<1> The default stopwords can be overridden with the `stopwords` <1> The default stopwords can be overridden with the `stopwords`
or `stopwords_path` parameters. or `stopwords_path` parameters.
<2> This filter should be removed unless there are words which should <2> This filter should be removed unless there are words which should
@ -662,7 +696,7 @@ PUT /finnish_example
} }
}, },
"analyzer": { "analyzer": {
"finnish": { "rebuilt_finnish": {
"tokenizer": "standard", "tokenizer": "standard",
"filter": [ "filter": [
"lowercase", "lowercase",
@ -677,6 +711,8 @@ PUT /finnish_example
} }
---------------------------------------------------- ----------------------------------------------------
// CONSOLE // CONSOLE
// TEST[s/"finnish_keywords",//]
// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: finnish_example, first: finnish, second: rebuilt_finnish}\nendyaml\n/]
<1> The default stopwords can be overridden with the `stopwords` <1> The default stopwords can be overridden with the `stopwords`
or `stopwords_path` parameters. or `stopwords_path` parameters.
<2> This filter should be removed unless there are words which should <2> This filter should be removed unless there are words which should
@ -717,7 +753,7 @@ PUT /french_example
} }
}, },
"analyzer": { "analyzer": {
"french": { "rebuilt_french": {
"tokenizer": "standard", "tokenizer": "standard",
"filter": [ "filter": [
"french_elision", "french_elision",
@ -733,6 +769,8 @@ PUT /french_example
} }
---------------------------------------------------- ----------------------------------------------------
// CONSOLE // CONSOLE
// TEST[s/"french_keywords",//]
// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: french_example, first: french, second: rebuilt_french}\nendyaml\n/]
<1> The default stopwords can be overridden with the `stopwords` <1> The default stopwords can be overridden with the `stopwords`
or `stopwords_path` parameters. or `stopwords_path` parameters.
<2> This filter should be removed unless there are words which should <2> This filter should be removed unless there are words which should
@ -764,7 +802,7 @@ PUT /galician_example
} }
}, },
"analyzer": { "analyzer": {
"galician": { "rebuilt_galician": {
"tokenizer": "standard", "tokenizer": "standard",
"filter": [ "filter": [
"lowercase", "lowercase",
@ -779,6 +817,8 @@ PUT /galician_example
} }
---------------------------------------------------- ----------------------------------------------------
// CONSOLE // CONSOLE
// TEST[s/"galician_keywords",//]
// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: galician_example, first: galician, second: rebuilt_galician}\nendyaml\n/]
<1> The default stopwords can be overridden with the `stopwords` <1> The default stopwords can be overridden with the `stopwords`
or `stopwords_path` parameters. or `stopwords_path` parameters.
<2> This filter should be removed unless there are words which should <2> This filter should be removed unless there are words which should
@ -810,7 +850,7 @@ PUT /german_example
} }
}, },
"analyzer": { "analyzer": {
"german": { "rebuilt_german": {
"tokenizer": "standard", "tokenizer": "standard",
"filter": [ "filter": [
"lowercase", "lowercase",
@ -826,6 +866,8 @@ PUT /german_example
} }
---------------------------------------------------- ----------------------------------------------------
// CONSOLE // CONSOLE
// TEST[s/"german_keywords",//]
// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: german_example, first: german, second: rebuilt_german}\nendyaml\n/]
<1> The default stopwords can be overridden with the `stopwords` <1> The default stopwords can be overridden with the `stopwords`
or `stopwords_path` parameters. or `stopwords_path` parameters.
<2> This filter should be removed unless there are words which should <2> This filter should be removed unless there are words which should
@ -861,7 +903,7 @@ PUT /greek_example
} }
}, },
"analyzer": { "analyzer": {
"greek": { "rebuilt_greek": {
"tokenizer": "standard", "tokenizer": "standard",
"filter": [ "filter": [
"greek_lowercase", "greek_lowercase",
@ -876,6 +918,8 @@ PUT /greek_example
} }
---------------------------------------------------- ----------------------------------------------------
// CONSOLE // CONSOLE
// TEST[s/"greek_keywords",//]
// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: greek_example, first: greek, second: rebuilt_greek}\nendyaml\n/]
<1> The default stopwords can be overridden with the `stopwords` <1> The default stopwords can be overridden with the `stopwords`
or `stopwords_path` parameters. or `stopwords_path` parameters.
<2> This filter should be removed unless there are words which should <2> This filter should be removed unless there are words which should
@ -907,14 +951,15 @@ PUT /hindi_example
} }
}, },
"analyzer": { "analyzer": {
"hindi": { "rebuilt_hindi": {
"tokenizer": "standard", "tokenizer": "standard",
"filter": [ "filter": [
"lowercase", "lowercase",
"decimal_digit",
"hindi_keywords",
"indic_normalization", "indic_normalization",
"hindi_normalization", "hindi_normalization",
"hindi_stop", "hindi_stop",
"hindi_keywords",
"hindi_stemmer" "hindi_stemmer"
] ]
} }
@ -924,6 +969,8 @@ PUT /hindi_example
} }
---------------------------------------------------- ----------------------------------------------------
// CONSOLE // CONSOLE
// TEST[s/"hindi_keywords",//]
// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: hindi_example, first: hindi, second: rebuilt_hindi}\nendyaml\n/]
<1> The default stopwords can be overridden with the `stopwords` <1> The default stopwords can be overridden with the `stopwords`
or `stopwords_path` parameters. or `stopwords_path` parameters.
<2> This filter should be removed unless there are words which should <2> This filter should be removed unless there are words which should
@ -955,7 +1002,7 @@ PUT /hungarian_example
} }
}, },
"analyzer": { "analyzer": {
"hungarian": { "rebuilt_hungarian": {
"tokenizer": "standard", "tokenizer": "standard",
"filter": [ "filter": [
"lowercase", "lowercase",
@ -970,6 +1017,8 @@ PUT /hungarian_example
} }
---------------------------------------------------- ----------------------------------------------------
// CONSOLE // CONSOLE
// TEST[s/"hungarian_keywords",//]
// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: hungarian_example, first: hungarian, second: rebuilt_hungarian}\nendyaml\n/]
<1> The default stopwords can be overridden with the `stopwords` <1> The default stopwords can be overridden with the `stopwords`
or `stopwords_path` parameters. or `stopwords_path` parameters.
<2> This filter should be removed unless there are words which should <2> This filter should be removed unless there are words which should
@ -1002,7 +1051,7 @@ PUT /indonesian_example
} }
}, },
"analyzer": { "analyzer": {
"indonesian": { "rebuilt_indonesian": {
"tokenizer": "standard", "tokenizer": "standard",
"filter": [ "filter": [
"lowercase", "lowercase",
@ -1017,6 +1066,8 @@ PUT /indonesian_example
} }
---------------------------------------------------- ----------------------------------------------------
// CONSOLE // CONSOLE
// TEST[s/"indonesian_keywords",//]
// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: indonesian_example, first: indonesian, second: rebuilt_indonesian}\nendyaml\n/]
<1> The default stopwords can be overridden with the `stopwords` <1> The default stopwords can be overridden with the `stopwords`
or `stopwords_path` parameters. or `stopwords_path` parameters.
<2> This filter should be removed unless there are words which should <2> This filter should be removed unless there are words which should
@ -1034,9 +1085,15 @@ PUT /irish_example
"settings": { "settings": {
"analysis": { "analysis": {
"filter": { "filter": {
"irish_hyphenation": {
"type": "stop",
"stopwords": [ "h", "n", "t" ],
"ignore_case": true
},
"irish_elision": { "irish_elision": {
"type": "elision", "type": "elision",
"articles": [ "h", "n", "t" ] "articles": [ "d", "m", "b" ],
"articles_case": true
}, },
"irish_stop": { "irish_stop": {
"type": "stop", "type": "stop",
@ -1056,12 +1113,13 @@ PUT /irish_example
} }
}, },
"analyzer": { "analyzer": {
"irish": { "rebuilt_irish": {
"tokenizer": "standard", "tokenizer": "standard",
"filter": [ "filter": [
"irish_stop", "irish_hyphenation",
"irish_elision", "irish_elision",
"irish_lowercase", "irish_lowercase",
"irish_stop",
"irish_keywords", "irish_keywords",
"irish_stemmer" "irish_stemmer"
] ]
@ -1072,6 +1130,8 @@ PUT /irish_example
} }
---------------------------------------------------- ----------------------------------------------------
// CONSOLE // CONSOLE
// TEST[s/"irish_keywords",//]
// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: irish_example, first: irish, second: rebuilt_irish}\nendyaml\n/]
<1> The default stopwords can be overridden with the `stopwords` <1> The default stopwords can be overridden with the `stopwords`
or `stopwords_path` parameters. or `stopwords_path` parameters.
<2> This filter should be removed unless there are words which should <2> This filter should be removed unless there are words which should
@ -1112,7 +1172,7 @@ PUT /italian_example
} }
}, },
"analyzer": { "analyzer": {
"italian": { "rebuilt_italian": {
"tokenizer": "standard", "tokenizer": "standard",
"filter": [ "filter": [
"italian_elision", "italian_elision",
@ -1128,6 +1188,8 @@ PUT /italian_example
} }
---------------------------------------------------- ----------------------------------------------------
// CONSOLE // CONSOLE
// TEST[s/"italian_keywords",//]
// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: italian_example, first: italian, second: rebuilt_italian}\nendyaml\n/]
<1> The default stopwords can be overridden with the `stopwords` <1> The default stopwords can be overridden with the `stopwords`
or `stopwords_path` parameters. or `stopwords_path` parameters.
<2> This filter should be removed unless there are words which should <2> This filter should be removed unless there are words which should
@ -1159,7 +1221,7 @@ PUT /latvian_example
} }
}, },
"analyzer": { "analyzer": {
"latvian": { "rebuilt_latvian": {
"tokenizer": "standard", "tokenizer": "standard",
"filter": [ "filter": [
"lowercase", "lowercase",
@ -1174,6 +1236,8 @@ PUT /latvian_example
} }
---------------------------------------------------- ----------------------------------------------------
// CONSOLE // CONSOLE
// TEST[s/"latvian_keywords",//]
// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: latvian_example, first: latvian, second: rebuilt_latvian}\nendyaml\n/]
<1> The default stopwords can be overridden with the `stopwords` <1> The default stopwords can be overridden with the `stopwords`
or `stopwords_path` parameters. or `stopwords_path` parameters.
<2> This filter should be removed unless there are words which should <2> This filter should be removed unless there are words which should
@ -1205,7 +1269,7 @@ PUT /lithuanian_example
} }
}, },
"analyzer": { "analyzer": {
"lithuanian": { "rebuilt_lithuanian": {
"tokenizer": "standard", "tokenizer": "standard",
"filter": [ "filter": [
"lowercase", "lowercase",
@ -1220,6 +1284,8 @@ PUT /lithuanian_example
} }
---------------------------------------------------- ----------------------------------------------------
// CONSOLE // CONSOLE
// TEST[s/"lithuanian_keywords",//]
// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: lithuanian_example, first: lithuanian, second: rebuilt_lithuanian}\nendyaml\n/]
<1> The default stopwords can be overridden with the `stopwords` <1> The default stopwords can be overridden with the `stopwords`
or `stopwords_path` parameters. or `stopwords_path` parameters.
<2> This filter should be removed unless there are words which should <2> This filter should be removed unless there are words which should
@ -1251,7 +1317,7 @@ PUT /norwegian_example
} }
}, },
"analyzer": { "analyzer": {
"norwegian": { "rebuilt_norwegian": {
"tokenizer": "standard", "tokenizer": "standard",
"filter": [ "filter": [
"lowercase", "lowercase",
@ -1266,6 +1332,8 @@ PUT /norwegian_example
} }
---------------------------------------------------- ----------------------------------------------------
// CONSOLE // CONSOLE
// TEST[s/"norwegian_keywords",//]
// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: norwegian_example, first: norwegian, second: rebuilt_norwegian}\nendyaml\n/]
<1> The default stopwords can be overridden with the `stopwords` <1> The default stopwords can be overridden with the `stopwords`
or `stopwords_path` parameters. or `stopwords_path` parameters.
<2> This filter should be removed unless there are words which should <2> This filter should be removed unless there are words which should
@ -1295,11 +1363,12 @@ PUT /persian_example
} }
}, },
"analyzer": { "analyzer": {
"persian": { "rebuilt_persian": {
"tokenizer": "standard", "tokenizer": "standard",
"char_filter": [ "zero_width_spaces" ], "char_filter": [ "zero_width_spaces" ],
"filter": [ "filter": [
"lowercase", "lowercase",
"decimal_digit",
"arabic_normalization", "arabic_normalization",
"persian_normalization", "persian_normalization",
"persian_stop" "persian_stop"
@ -1311,6 +1380,7 @@ PUT /persian_example
} }
---------------------------------------------------- ----------------------------------------------------
// CONSOLE // CONSOLE
// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: persian_example, first: persian, second: rebuilt_persian}\nendyaml\n/]
<1> Replaces zero-width non-joiners with an ASCII space. <1> Replaces zero-width non-joiners with an ASCII space.
<2> The default stopwords can be overridden with the `stopwords` <2> The default stopwords can be overridden with the `stopwords`
or `stopwords_path` parameters. or `stopwords_path` parameters.
@ -1341,7 +1411,7 @@ PUT /portuguese_example
} }
}, },
"analyzer": { "analyzer": {
"portuguese": { "rebuilt_portuguese": {
"tokenizer": "standard", "tokenizer": "standard",
"filter": [ "filter": [
"lowercase", "lowercase",
@ -1356,6 +1426,8 @@ PUT /portuguese_example
} }
---------------------------------------------------- ----------------------------------------------------
// CONSOLE // CONSOLE
// TEST[s/"portuguese_keywords",//]
// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: portuguese_example, first: portuguese, second: rebuilt_portuguese}\nendyaml\n/]
<1> The default stopwords can be overridden with the `stopwords` <1> The default stopwords can be overridden with the `stopwords`
or `stopwords_path` parameters. or `stopwords_path` parameters.
<2> This filter should be removed unless there are words which should <2> This filter should be removed unless there are words which should
@ -1387,7 +1459,7 @@ PUT /romanian_example
} }
}, },
"analyzer": { "analyzer": {
"romanian": { "rebuilt_romanian": {
"tokenizer": "standard", "tokenizer": "standard",
"filter": [ "filter": [
"lowercase", "lowercase",
@ -1402,6 +1474,8 @@ PUT /romanian_example
} }
---------------------------------------------------- ----------------------------------------------------
// CONSOLE // CONSOLE
// TEST[s/"romanian_keywords",//]
// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: romanian_example, first: romanian, second: rebuilt_romanian}\nendyaml\n/]
<1> The default stopwords can be overridden with the `stopwords` <1> The default stopwords can be overridden with the `stopwords`
or `stopwords_path` parameters. or `stopwords_path` parameters.
<2> This filter should be removed unless there are words which should <2> This filter should be removed unless there are words which should
@ -1434,7 +1508,7 @@ PUT /russian_example
} }
}, },
"analyzer": { "analyzer": {
"russian": { "rebuilt_russian": {
"tokenizer": "standard", "tokenizer": "standard",
"filter": [ "filter": [
"lowercase", "lowercase",
@ -1449,6 +1523,8 @@ PUT /russian_example
} }
---------------------------------------------------- ----------------------------------------------------
// CONSOLE // CONSOLE
// TEST[s/"russian_keywords",//]
// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: russian_example, first: russian, second: rebuilt_russian}\nendyaml\n/]
<1> The default stopwords can be overridden with the `stopwords` <1> The default stopwords can be overridden with the `stopwords`
or `stopwords_path` parameters. or `stopwords_path` parameters.
<2> This filter should be removed unless there are words which should <2> This filter should be removed unless there are words which should
@ -1480,11 +1556,12 @@ PUT /sorani_example
} }
}, },
"analyzer": { "analyzer": {
"sorani": { "rebuilt_sorani": {
"tokenizer": "standard", "tokenizer": "standard",
"filter": [ "filter": [
"sorani_normalization", "sorani_normalization",
"lowercase", "lowercase",
"decimal_digit",
"sorani_stop", "sorani_stop",
"sorani_keywords", "sorani_keywords",
"sorani_stemmer" "sorani_stemmer"
@ -1496,6 +1573,8 @@ PUT /sorani_example
} }
---------------------------------------------------- ----------------------------------------------------
// CONSOLE // CONSOLE
// TEST[s/"sorani_keywords",//]
// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: sorani_example, first: sorani, second: rebuilt_sorani}\nendyaml\n/]
<1> The default stopwords can be overridden with the `stopwords` <1> The default stopwords can be overridden with the `stopwords`
or `stopwords_path` parameters. or `stopwords_path` parameters.
<2> This filter should be removed unless there are words which should <2> This filter should be removed unless there are words which should
@ -1527,7 +1606,7 @@ PUT /spanish_example
} }
}, },
"analyzer": { "analyzer": {
"spanish": { "rebuilt_spanish": {
"tokenizer": "standard", "tokenizer": "standard",
"filter": [ "filter": [
"lowercase", "lowercase",
@ -1542,6 +1621,8 @@ PUT /spanish_example
} }
---------------------------------------------------- ----------------------------------------------------
// CONSOLE // CONSOLE
// TEST[s/"spanish_keywords",//]
// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: spanish_example, first: spanish, second: rebuilt_spanish}\nendyaml\n/]
<1> The default stopwords can be overridden with the `stopwords` <1> The default stopwords can be overridden with the `stopwords`
or `stopwords_path` parameters. or `stopwords_path` parameters.
<2> This filter should be removed unless there are words which should <2> This filter should be removed unless there are words which should
@ -1573,7 +1654,7 @@ PUT /swedish_example
} }
}, },
"analyzer": { "analyzer": {
"swedish": { "rebuilt_swedish": {
"tokenizer": "standard", "tokenizer": "standard",
"filter": [ "filter": [
"lowercase", "lowercase",
@ -1588,6 +1669,8 @@ PUT /swedish_example
} }
---------------------------------------------------- ----------------------------------------------------
// CONSOLE // CONSOLE
// TEST[s/"swedish_keywords",//]
// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: swedish_example, first: swedish, second: rebuilt_swedish}\nendyaml\n/]
<1> The default stopwords can be overridden with the `stopwords` <1> The default stopwords can be overridden with the `stopwords`
or `stopwords_path` parameters. or `stopwords_path` parameters.
<2> This filter should be removed unless there are words which should <2> This filter should be removed unless there are words which should
@ -1623,7 +1706,7 @@ PUT /turkish_example
} }
}, },
"analyzer": { "analyzer": {
"turkish": { "rebuilt_turkish": {
"tokenizer": "standard", "tokenizer": "standard",
"filter": [ "filter": [
"apostrophe", "apostrophe",
@ -1639,6 +1722,8 @@ PUT /turkish_example
} }
---------------------------------------------------- ----------------------------------------------------
// CONSOLE // CONSOLE
// TEST[s/"turkish_keywords",//]
// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: turkish_example, first: turkish, second: rebuilt_turkish}\nendyaml\n/]
<1> The default stopwords can be overridden with the `stopwords` <1> The default stopwords can be overridden with the `stopwords`
or `stopwords_path` parameters. or `stopwords_path` parameters.
<2> This filter should be removed unless there are words which should <2> This filter should be removed unless there are words which should
@ -1662,10 +1747,11 @@ PUT /thai_example
} }
}, },
"analyzer": { "analyzer": {
"thai": { "rebuilt_thai": {
"tokenizer": "thai", "tokenizer": "thai",
"filter": [ "filter": [
"lowercase", "lowercase",
"decimal_digit",
"thai_stop" "thai_stop"
] ]
} }
@ -1675,5 +1761,7 @@ PUT /thai_example
} }
---------------------------------------------------- ----------------------------------------------------
// CONSOLE // CONSOLE
// TEST[s/"thai_keywords",//]
// TEST[s/\n$/\nstartyaml\n - compare_analyzers: {index: thai_example, first: thai, second: rebuilt_thai}\nendyaml\n/]
<1> The default stopwords can be overridden with the `stopwords` <1> The default stopwords can be overridden with the `stopwords`
or `stopwords_path` parameters. or `stopwords_path` parameters.

View File

@ -20,18 +20,39 @@
package org.elasticsearch.smoketest; package org.elasticsearch.smoketest;
import org.apache.http.HttpHost; import org.apache.http.HttpHost;
import org.apache.lucene.util.BytesRef;
import com.carrotsearch.randomizedtesting.annotations.Name; import com.carrotsearch.randomizedtesting.annotations.Name;
import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
import org.elasticsearch.Version; import org.elasticsearch.Version;
import org.elasticsearch.client.RestClient; import org.elasticsearch.client.RestClient;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.xcontent.ConstructingObjectParser;
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
import org.elasticsearch.common.xcontent.XContentLocation;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.XContentParser.Token;
import org.elasticsearch.test.rest.yaml.ClientYamlDocsTestClient; import org.elasticsearch.test.rest.yaml.ClientYamlDocsTestClient;
import org.elasticsearch.test.rest.yaml.ClientYamlTestCandidate; import org.elasticsearch.test.rest.yaml.ClientYamlTestCandidate;
import org.elasticsearch.test.rest.yaml.ClientYamlTestClient; import org.elasticsearch.test.rest.yaml.ClientYamlTestClient;
import org.elasticsearch.test.rest.yaml.ClientYamlTestExecutionContext;
import org.elasticsearch.test.rest.yaml.ClientYamlTestResponse;
import org.elasticsearch.test.rest.yaml.ESClientYamlSuiteTestCase; import org.elasticsearch.test.rest.yaml.ESClientYamlSuiteTestCase;
import org.elasticsearch.test.rest.yaml.restspec.ClientYamlSuiteRestSpec; import org.elasticsearch.test.rest.yaml.restspec.ClientYamlSuiteRestSpec;
import org.elasticsearch.test.rest.yaml.section.ExecutableSection;
import java.io.IOException; import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map;
import static org.elasticsearch.common.xcontent.ConstructingObjectParser.constructorArg;
import static java.util.Collections.emptyMap;
import static java.util.Collections.singletonList;
import static java.util.Collections.singletonMap;
public class DocsClientYamlTestSuiteIT extends ESClientYamlSuiteTestCase { public class DocsClientYamlTestSuiteIT extends ESClientYamlSuiteTestCase {
@ -41,7 +62,12 @@ public class DocsClientYamlTestSuiteIT extends ESClientYamlSuiteTestCase {
@ParametersFactory @ParametersFactory
public static Iterable<Object[]> parameters() throws Exception { public static Iterable<Object[]> parameters() throws Exception {
return ESClientYamlSuiteTestCase.createParameters(); List<NamedXContentRegistry.Entry> entries = new ArrayList<>(ExecutableSection.DEFAULT_EXECUTABLE_CONTEXTS.size() + 1);
entries.addAll(ExecutableSection.DEFAULT_EXECUTABLE_CONTEXTS);
entries.add(new NamedXContentRegistry.Entry(ExecutableSection.class,
new ParseField("compare_analyzers"), CompareAnalyzers::parse));
NamedXContentRegistry executeableSectionRegistry = new NamedXContentRegistry(entries);
return ESClientYamlSuiteTestCase.createParameters(executeableSectionRegistry);
} }
@Override @Override
@ -64,5 +90,117 @@ public class DocsClientYamlTestSuiteIT extends ESClientYamlSuiteTestCase {
List<HttpHost> hosts, Version esVersion) throws IOException { List<HttpHost> hosts, Version esVersion) throws IOException {
return new ClientYamlDocsTestClient(restSpec, restClient, hosts, esVersion); return new ClientYamlDocsTestClient(restSpec, restClient, hosts, esVersion);
} }
}
/**
* Compares the the results of running two analyzers against many random
* strings. The goal is to figure out if two anlayzers are "the same" by
* comparing their results. This is far from perfect but should be fairly
* accurate, especially for gross things like missing {@code decimal_digit}
* token filters, and should be fairly fast because it compares a fairly
* small number of tokens.
*/
private static class CompareAnalyzers implements ExecutableSection {
private static ConstructingObjectParser<CompareAnalyzers, XContentLocation> PARSER =
new ConstructingObjectParser<>("test_analyzer", false, (a, location) -> {
String index = (String) a[0];
String first = (String) a[1];
String second = (String) a[2];
return new CompareAnalyzers(location, index, first, second);
});
static {
PARSER.declareString(constructorArg(), new ParseField("index"));
PARSER.declareString(constructorArg(), new ParseField("first"));
PARSER.declareString(constructorArg(), new ParseField("second"));
}
private static CompareAnalyzers parse(XContentParser parser) throws IOException {
XContentLocation location = parser.getTokenLocation();
CompareAnalyzers section = PARSER.parse(parser, location);
assert parser.currentToken() == Token.END_OBJECT : "End of object required";
parser.nextToken(); // throw out the END_OBJECT to conform with other ExecutableSections
return section;
}
private final XContentLocation location;
private final String index;
private final String first;
private final String second;
private CompareAnalyzers(XContentLocation location, String index, String first, String second) {
this.location = location;
this.index = index;
this.first = first;
this.second = second;
}
@Override
public XContentLocation getLocation() {
return location;
}
@Override
public void execute(ClientYamlTestExecutionContext executionContext) throws IOException {
int size = 100;
int maxLength = 15;
List<String> testText = new ArrayList<>(size);
for (int i = 0; i < size; i++) {
/**
* Build a string with a few unicode sequences separated by
* spaces. The unicode sequences aren't going to be of the same
* code page which is a shame because it makes the entire
* string less realistic. But this still provides a fairly
* nice string to compare.
*/
int spaces = between(0, 5);
StringBuilder b = new StringBuilder((spaces + 1) * maxLength);
b.append(randomRealisticUnicodeOfCodepointLengthBetween(1, maxLength));
for (int t = 0; t < spaces; t++) {
b.append(' ');
b.append(randomRealisticUnicodeOfCodepointLengthBetween(1, maxLength));
}
testText.add(b.toString()
// Don't look up stashed values
.replace("$", "\\$"));
}
Map<String, Object> body = new HashMap<>(2);
body.put("analyzer", first);
body.put("text", testText);
ClientYamlTestResponse response = executionContext.callApi("indices.analyze", singletonMap("index", index),
singletonList(body), emptyMap());
Iterator<?> firstTokens = ((List<?>) response.evaluate("tokens")).iterator();
body.put("analyzer", second);
response = executionContext.callApi("indices.analyze", singletonMap("index", index),
singletonList(body), emptyMap());
Iterator<?> secondTokens = ((List<?>) response.evaluate("tokens")).iterator();
Object previousFirst = null;
Object previousSecond = null;
while (firstTokens.hasNext()) {
if (false == secondTokens.hasNext()) {
fail(second + " has fewer tokens than " + first + ". "
+ first + " has [" + firstTokens.next() + "] but " + second + " is out of tokens. "
+ first + "'s last token was [" + previousFirst + "] and "
+ second + "'s last token was' [" + previousSecond + "]");
}
Map<?, ?> firstToken = (Map<?, ?>) firstTokens.next();
Map<?, ?> secondToken = (Map<?, ?>) secondTokens.next();
String firstText = (String) firstToken.get("token");
String secondText = (String) secondToken.get("token");
// Check the text and produce an error message with the utf8 sequence if they don't match.
if (false == secondText.equals(firstText)) {
fail("text differs: " + first + " was [" + firstText + "] but " + second + " was [" + secondText
+ "]. In utf8 those are\n" + new BytesRef(firstText) + " and\n" + new BytesRef(secondText));
}
// Now check the whole map just in case the text matches but something else differs
assertEquals(firstToken, secondToken);
previousFirst = firstToken;
previousSecond = secondToken;
}
if (secondTokens.hasNext()) {
fail(second + " has more tokens than " + first + ". "
+ second + " has [" + secondTokens.next() + "] but " + first + " is out of tokens. "
+ first + "'s last token was [" + previousFirst + "] and "
+ second + "'s last token was' [" + previousSecond + "]");
}
}
}
}

View File

@ -121,7 +121,7 @@ public class ClientYamlTestClient {
} }
String contentType = entity.getContentType().getValue(); String contentType = entity.getContentType().getValue();
//randomly test the GET with source param instead of GET/POST with body //randomly test the GET with source param instead of GET/POST with body
if (sendBodyAsSourceParam(supportedMethods, contentType)) { if (sendBodyAsSourceParam(supportedMethods, contentType, entity.getContentLength())) {
logger.debug("sending the request body as source param with GET method"); logger.debug("sending the request body as source param with GET method");
queryStringParams.put("source", EntityUtils.toString(entity)); queryStringParams.put("source", EntityUtils.toString(entity));
queryStringParams.put("source_content_type", contentType); queryStringParams.put("source_content_type", contentType);
@ -177,14 +177,25 @@ public class ClientYamlTestClient {
} }
} }
private static boolean sendBodyAsSourceParam(List<String> supportedMethods, String contentType) { private static boolean sendBodyAsSourceParam(List<String> supportedMethods, String contentType, long contentLength) {
if (supportedMethods.contains(HttpGet.METHOD_NAME)) { if (false == supportedMethods.contains(HttpGet.METHOD_NAME)) {
if (contentType.startsWith(ContentType.APPLICATION_JSON.getMimeType()) || // The API doesn't claim to support GET anyway
contentType.startsWith(YAML_CONTENT_TYPE.getMimeType())) { return false;
return RandomizedTest.rarely();
}
} }
return false; if (contentLength < 0) {
// Negative length means "unknown" or "huge" in this case. Either way we can't send it as a parameter
return false;
}
if (contentLength > 2000) {
// Long bodies won't fit in the parameter and will cause a too_long_frame_exception
return false;
}
if (false == contentType.startsWith(ContentType.APPLICATION_JSON.getMimeType())
&& false == contentType.startsWith(YAML_CONTENT_TYPE.getMimeType())) {
// We can only encode JSON or YAML this way.
return false;
}
return RandomizedTest.rarely();
} }
private ClientYamlSuiteRestApi restApi(String apiName) { private ClientYamlSuiteRestApi restApi(String apiName) {

View File

@ -28,6 +28,7 @@ import org.elasticsearch.client.RestClient;
import org.elasticsearch.common.Strings; import org.elasticsearch.common.Strings;
import org.elasticsearch.common.collect.Tuple; import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.io.PathUtils; import org.elasticsearch.common.io.PathUtils;
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
import org.elasticsearch.test.rest.ESRestTestCase; import org.elasticsearch.test.rest.ESRestTestCase;
import org.elasticsearch.test.rest.yaml.restspec.ClientYamlSuiteRestApi; import org.elasticsearch.test.rest.yaml.restspec.ClientYamlSuiteRestApi;
import org.elasticsearch.test.rest.yaml.restspec.ClientYamlSuiteRestSpec; import org.elasticsearch.test.rest.yaml.restspec.ClientYamlSuiteRestSpec;
@ -143,7 +144,19 @@ public abstract class ESClientYamlSuiteTestCase extends ESRestTestCase {
return new ClientYamlTestClient(restSpec, restClient, hosts, esVersion); return new ClientYamlTestClient(restSpec, restClient, hosts, esVersion);
} }
/**
* Create parameters for this parameterized test. Uses the
* {@link ExecutableSection#XCONTENT_REGISTRY list} of executable sections
* defined in {@link ExecutableSection}.
*/
public static Iterable<Object[]> createParameters() throws Exception { public static Iterable<Object[]> createParameters() throws Exception {
return createParameters(ExecutableSection.XCONTENT_REGISTRY);
}
/**
* Create parameters for this parameterized test.
*/
public static Iterable<Object[]> createParameters(NamedXContentRegistry executeableSectionRegistry) throws Exception {
String[] paths = resolvePathsProperty(REST_TESTS_SUITE, ""); // default to all tests under the test root String[] paths = resolvePathsProperty(REST_TESTS_SUITE, ""); // default to all tests under the test root
List<Object[]> tests = new ArrayList<>(); List<Object[]> tests = new ArrayList<>();
Map<String, Set<Path>> yamlSuites = loadSuites(paths); Map<String, Set<Path>> yamlSuites = loadSuites(paths);
@ -151,7 +164,7 @@ public abstract class ESClientYamlSuiteTestCase extends ESRestTestCase {
for (String api : yamlSuites.keySet()) { for (String api : yamlSuites.keySet()) {
List<Path> yamlFiles = new ArrayList<>(yamlSuites.get(api)); List<Path> yamlFiles = new ArrayList<>(yamlSuites.get(api));
for (Path yamlFile : yamlFiles) { for (Path yamlFile : yamlFiles) {
ClientYamlTestSuite restTestSuite = ClientYamlTestSuite.parse(api, yamlFile); ClientYamlTestSuite restTestSuite = ClientYamlTestSuite.parse(executeableSectionRegistry, api, yamlFile);
for (ClientYamlTestSection testSection : restTestSuite.getTestSections()) { for (ClientYamlTestSection testSection : restTestSuite.getTestSections()) {
tests.add(new Object[]{ new ClientYamlTestCandidate(restTestSuite, testSection) }); tests.add(new Object[]{ new ClientYamlTestCandidate(restTestSuite, testSection) });
} }

View File

@ -21,6 +21,7 @@ package org.elasticsearch.test.rest.yaml.section;
import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.xcontent.DeprecationHandler; import org.elasticsearch.common.xcontent.DeprecationHandler;
import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; import org.elasticsearch.common.xcontent.LoggingDeprecationHandler;
import org.elasticsearch.common.xcontent.NamedXContentRegistry;
import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.yaml.YamlXContent; import org.elasticsearch.common.xcontent.yaml.YamlXContent;
@ -40,7 +41,7 @@ import java.util.TreeSet;
* Supports a setup section and multiple test sections. * Supports a setup section and multiple test sections.
*/ */
public class ClientYamlTestSuite { public class ClientYamlTestSuite {
public static ClientYamlTestSuite parse(String api, Path file) throws IOException { public static ClientYamlTestSuite parse(NamedXContentRegistry executeableSectionRegistry, String api, Path file) throws IOException {
if (!Files.isRegularFile(file)) { if (!Files.isRegularFile(file)) {
throw new IllegalArgumentException(file.toAbsolutePath() + " is not a file"); throw new IllegalArgumentException(file.toAbsolutePath() + " is not a file");
} }
@ -64,7 +65,7 @@ public class ClientYamlTestSuite {
} }
} }
try (XContentParser parser = YamlXContent.yamlXContent.createParser(ExecutableSection.XCONTENT_REGISTRY, try (XContentParser parser = YamlXContent.yamlXContent.createParser(executeableSectionRegistry,
LoggingDeprecationHandler.INSTANCE, Files.newInputStream(file))) { LoggingDeprecationHandler.INSTANCE, Files.newInputStream(file))) {
return parse(api, filename, parser); return parse(api, filename, parser);
} catch(Exception e) { } catch(Exception e) {

View File

@ -26,15 +26,18 @@ import org.elasticsearch.test.rest.yaml.ClientYamlTestExecutionContext;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import java.util.List;
import static java.util.Collections.unmodifiableList;
/** /**
* Represents a test fragment that can be executed (e.g. api call, assertion) * Represents a test fragment that can be executed (e.g. api call, assertion)
*/ */
public interface ExecutableSection { public interface ExecutableSection {
/** /**
* {@link NamedXContentRegistry} needed in the {@link XContentParser} before calling {@link ExecutableSection#parse(XContentParser)}. * Default list of {@link ExecutableSection}s available for tests.
*/ */
NamedXContentRegistry XCONTENT_REGISTRY = new NamedXContentRegistry(Arrays.asList( List<NamedXContentRegistry.Entry> DEFAULT_EXECUTABLE_CONTEXTS = unmodifiableList(Arrays.asList(
new NamedXContentRegistry.Entry(ExecutableSection.class, new ParseField("do"), DoSection::parse), new NamedXContentRegistry.Entry(ExecutableSection.class, new ParseField("do"), DoSection::parse),
new NamedXContentRegistry.Entry(ExecutableSection.class, new ParseField("set"), SetSection::parse), new NamedXContentRegistry.Entry(ExecutableSection.class, new ParseField("set"), SetSection::parse),
new NamedXContentRegistry.Entry(ExecutableSection.class, new ParseField("match"), MatchAssertion::parse), new NamedXContentRegistry.Entry(ExecutableSection.class, new ParseField("match"), MatchAssertion::parse),
@ -46,6 +49,12 @@ public interface ExecutableSection {
new NamedXContentRegistry.Entry(ExecutableSection.class, new ParseField("lte"), LessThanOrEqualToAssertion::parse), new NamedXContentRegistry.Entry(ExecutableSection.class, new ParseField("lte"), LessThanOrEqualToAssertion::parse),
new NamedXContentRegistry.Entry(ExecutableSection.class, new ParseField("length"), LengthAssertion::parse))); new NamedXContentRegistry.Entry(ExecutableSection.class, new ParseField("length"), LengthAssertion::parse)));
/**
* {@link NamedXContentRegistry} that parses the default list of
* {@link ExecutableSection}s available for tests.
*/
NamedXContentRegistry XCONTENT_REGISTRY = new NamedXContentRegistry(DEFAULT_EXECUTABLE_CONTEXTS);
static ExecutableSection parse(XContentParser parser) throws IOException { static ExecutableSection parse(XContentParser parser) throws IOException {
ParserUtils.advanceToFieldName(parser); ParserUtils.advanceToFieldName(parser);
String section = parser.currentName(); String section = parser.currentName();
@ -60,7 +69,7 @@ public interface ExecutableSection {
} }
/** /**
* Get the location in the test that this was defined. * Get the location in the test that this was defined.
*/ */
XContentLocation getLocation(); XContentLocation getLocation();