document and test custom analyzer position offset gap

This commit is contained in:
Matt Weber 2015-05-01 21:36:27 -07:00
parent df1914cb21
commit 63c4a214db
4 changed files with 17 additions and 2 deletions

View File

@ -18,6 +18,9 @@ filters.
|`char_filter` |An optional list of logical / registered name of char |`char_filter` |An optional list of logical / registered name of char
filters. filters.
|`position_offset_gap` |An optional number of positions to increment
between each field value of a field using this analyzer.
|======================================================================= |=======================================================================
Here is an example: Here is an example:
@ -32,6 +35,7 @@ index :
tokenizer : myTokenizer1 tokenizer : myTokenizer1
filter : [myTokenFilter1, myTokenFilter2] filter : [myTokenFilter1, myTokenFilter2]
char_filter : [my_html] char_filter : [my_html]
position_offset_gap: 256
tokenizer : tokenizer :
myTokenizer1 : myTokenizer1 :
type : standard type : standard

View File

@ -152,6 +152,12 @@ public class AnalysisModuleTests extends ElasticsearchTestCase {
// html = (HtmlStripCharFilterFactory) custom2.charFilters()[1]; // html = (HtmlStripCharFilterFactory) custom2.charFilters()[1];
// assertThat(html.readAheadLimit(), equalTo(1024)); // assertThat(html.readAheadLimit(), equalTo(1024));
// verify position offset gap
analyzer = analysisService.analyzer("custom6").analyzer();
assertThat(analyzer, instanceOf(CustomAnalyzer.class));
CustomAnalyzer custom6 = (CustomAnalyzer) analyzer;
assertThat(custom6.getPositionIncrementGap("any_string"), equalTo(256));
// verify characters mapping // verify characters mapping
analyzer = analysisService.analyzer("custom5").analyzer(); analyzer = analysisService.analyzer("custom5").analyzer();
assertThat(analyzer, instanceOf(CustomAnalyzer.class)); assertThat(analyzer, instanceOf(CustomAnalyzer.class));

View File

@ -66,6 +66,10 @@
"tokenizer":"standard", "tokenizer":"standard",
"char_filter":["my_mapping"] "char_filter":["my_mapping"]
}, },
"custom6":{
"tokenizer":"standard",
"position_offset_gap": 256
},
"czechAnalyzerWithStemmer":{ "czechAnalyzerWithStemmer":{
"tokenizer":"standard", "tokenizer":"standard",
"filter":["standard", "lowercase", "stop", "czech_stem"] "filter":["standard", "lowercase", "stop", "czech_stem"]

View File

@ -49,7 +49,8 @@ index :
tokenizer : standard tokenizer : standard
char_filter : [my_mapping] char_filter : [my_mapping]
custom6 : custom6 :
type : standard tokenizer : standard
position_offset_gap: 256
custom7 : custom7 :
type : standard type : standard
version: 3.6 version: 3.6
@ -58,4 +59,4 @@ index :
filter : [standard, lowercase, stop, czech_stem] filter : [standard, lowercase, stop, czech_stem]
decompoundingAnalyzer : decompoundingAnalyzer :
tokenizer : standard tokenizer : standard
filter : [dict_dec] filter : [dict_dec]