Rename simple pattern tokenizers (#25300)
Changed names to be snake case for consistency Related to #25159, original issue #23363
This commit is contained in:
parent
0d6c47fe14
commit
4c5bd57619
|
@ -99,14 +99,14 @@ terms.
|
|||
|
||||
<<analysis-simplepattern-tokenizer,Simple Pattern Tokenizer>>::
|
||||
|
||||
The `simplepattern` tokenizer uses a regular expression to capture matching
|
||||
The `simple_pattern` tokenizer uses a regular expression to capture matching
|
||||
text as terms. It uses a restricted subset of regular expression features
|
||||
and is generally faster than the `pattern` tokenizer.
|
||||
|
||||
<<analysis-simplepatternsplit-tokenizer,Simple Pattern Split Tokenizer>>::
|
||||
|
||||
The `simplepatternsplit` tokenizer uses the same restricted regular expression
|
||||
subset as the `simplepattern` tokenizer, but splits the input at matches rather
|
||||
The `simple_pattern_split` tokenizer uses the same restricted regular expression
|
||||
subset as the `simple_pattern` tokenizer, but splits the input at matches rather
|
||||
than returning the matches as terms.
|
||||
|
||||
<<analysis-pathhierarchy-tokenizer,Path Tokenizer>>::
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
experimental[]
|
||||
|
||||
The `simplepattern` tokenizer uses a regular expression to capture matching
|
||||
The `simple_pattern` tokenizer uses a regular expression to capture matching
|
||||
text as terms. The set of regular expression features it supports is more
|
||||
limited than the <<analysis-pattern-tokenizer,`pattern`>> tokenizer, but the
|
||||
tokenization is generally faster.
|
||||
|
@ -11,7 +11,7 @@ tokenization is generally faster.
|
|||
This tokenizer does not support splitting the input on a pattern match, unlike
|
||||
the <<analysis-pattern-tokenizer,`pattern`>> tokenizer. To split on pattern
|
||||
matches using the same restricted regular expression subset, see the
|
||||
<<analysis-simplepatternsplit-tokenizer,`simplepatternsplit`>> tokenizer.
|
||||
<<analysis-simplepatternsplit-tokenizer,`simple_pattern_split`>> tokenizer.
|
||||
|
||||
This tokenizer uses {lucene-core-javadoc}/org/apache/lucene/util/automaton/RegExp.html[Lucene regular expressions].
|
||||
For an explanation of the supported features and syntax, see <<regexp-syntax,Regular Expression Syntax>>.
|
||||
|
@ -22,7 +22,7 @@ tokenizer should always be configured with a non-default pattern.
|
|||
[float]
|
||||
=== Configuration
|
||||
|
||||
The `simplepattern` tokenizer accepts the following parameters:
|
||||
The `simple_pattern` tokenizer accepts the following parameters:
|
||||
|
||||
[horizontal]
|
||||
`pattern`::
|
||||
|
@ -31,7 +31,7 @@ The `simplepattern` tokenizer accepts the following parameters:
|
|||
[float]
|
||||
=== Example configuration
|
||||
|
||||
This example configures the `simplepattern` tokenizer to produce terms that are
|
||||
This example configures the `simple_pattern` tokenizer to produce terms that are
|
||||
three-digit numbers
|
||||
|
||||
[source,js]
|
||||
|
@ -47,7 +47,7 @@ PUT my_index
|
|||
},
|
||||
"tokenizer": {
|
||||
"my_tokenizer": {
|
||||
"type": "simplepattern",
|
||||
"type": "simple_pattern",
|
||||
"pattern": "[0123456789]{3}"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,14 +3,14 @@
|
|||
|
||||
experimental[]
|
||||
|
||||
The `simplepatternsplit` tokenizer uses a regular expression to split the
|
||||
The `simple_pattern_split` tokenizer uses a regular expression to split the
|
||||
input into terms at pattern matches. The set of regular expression features it
|
||||
supports is more limited than the <<analysis-pattern-tokenizer,`pattern`>>
|
||||
tokenizer, but the tokenization is generally faster.
|
||||
|
||||
This tokenizer does not produce terms from the matches themselves. To produce
|
||||
terms from matches using patterns in the same restricted regular expression
|
||||
subset, see the <<analysis-simplepattern-tokenizer,`simplepattern`>>
|
||||
subset, see the <<analysis-simplepattern-tokenizer,`simple_pattern`>>
|
||||
tokenizer.
|
||||
|
||||
This tokenizer uses {lucene-core-javadoc}/org/apache/lucene/util/automaton/RegExp.html[Lucene regular expressions].
|
||||
|
@ -23,7 +23,7 @@ pattern.
|
|||
[float]
|
||||
=== Configuration
|
||||
|
||||
The `simplepatternsplit` tokenizer accepts the following parameters:
|
||||
The `simple_pattern_split` tokenizer accepts the following parameters:
|
||||
|
||||
[horizontal]
|
||||
`pattern`::
|
||||
|
@ -32,7 +32,7 @@ The `simplepatternsplit` tokenizer accepts the following parameters:
|
|||
[float]
|
||||
=== Example configuration
|
||||
|
||||
This example configures the `simplepatternsplit` tokenizer to split the input
|
||||
This example configures the `simple_pattern_split` tokenizer to split the input
|
||||
text on underscores.
|
||||
|
||||
[source,js]
|
||||
|
@ -48,7 +48,7 @@ PUT my_index
|
|||
},
|
||||
"tokenizer": {
|
||||
"my_tokenizer": {
|
||||
"type": "simplepatternsplit",
|
||||
"type": "simple_pattern_split",
|
||||
"pattern": "_"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -122,8 +122,8 @@ public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin {
|
|||
@Override
|
||||
public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
|
||||
Map<String, AnalysisProvider<TokenizerFactory>> tokenizers = new TreeMap<>();
|
||||
tokenizers.put("simplepattern", SimplePatternTokenizerFactory::new);
|
||||
tokenizers.put("simplepatternsplit", SimplePatternSplitTokenizerFactory::new);
|
||||
tokenizers.put("simple_pattern", SimplePatternTokenizerFactory::new);
|
||||
tokenizers.put("simple_pattern_split", SimplePatternSplitTokenizerFactory::new);
|
||||
return tokenizers;
|
||||
}
|
||||
|
||||
|
|
|
@ -27,14 +27,14 @@
|
|||
- match: { detail.tokenizer.tokens.2.token: od }
|
||||
|
||||
---
|
||||
"simplepattern":
|
||||
"simple_pattern":
|
||||
- do:
|
||||
indices.analyze:
|
||||
body:
|
||||
text: "a6bf fooo ff61"
|
||||
explain: true
|
||||
tokenizer:
|
||||
type: simplepattern
|
||||
type: simple_pattern
|
||||
pattern: "[abcdef0123456789]{4}"
|
||||
- length: { detail.tokenizer.tokens: 2 }
|
||||
- match: { detail.tokenizer.name: _anonymous_tokenizer }
|
||||
|
@ -42,14 +42,14 @@
|
|||
- match: { detail.tokenizer.tokens.1.token: ff61 }
|
||||
|
||||
---
|
||||
"simplepatternsplit":
|
||||
"simple_pattern_split":
|
||||
- do:
|
||||
indices.analyze:
|
||||
body:
|
||||
text: "foo==bar"
|
||||
explain: true
|
||||
tokenizer:
|
||||
type: simplepatternsplit
|
||||
type: simple_pattern_split
|
||||
pattern: ==
|
||||
- length: { detail.tokenizer.tokens: 2 }
|
||||
- match: { detail.tokenizer.name: _anonymous_tokenizer }
|
||||
|
|
Loading…
Reference in New Issue