parent
47d50c6774
commit
b84b5525e1
|
@ -155,3 +155,7 @@ include::tokenizers/simplepattern-tokenizer.asciidoc[]
|
|||
include::tokenizers/simplepatternsplit-tokenizer.asciidoc[]
|
||||
|
||||
include::tokenizers/pathhierarchy-tokenizer.asciidoc[]
|
||||
|
||||
include::tokenizers/pathhierarchy-tokenizer-examples.asciidoc[]
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,193 @@
|
|||
[[analysis-pathhierarchy-tokenizer-examples]]
|
||||
=== Path Hierarchy Tokenizer Examples
|
||||
|
||||
A common use-case for the `path_hierarchy` tokenizer is filtering results by
|
||||
file paths. If indexing a file path along with the data, the use of the
|
||||
`path_hierarchy` tokenizer to analyze the path allows filtering the results
|
||||
by different parts of the file path string.
|
||||
|
||||
|
||||
This example configures an index to have two custom analyzers and applies
|
||||
those analyzers to multifields of the `file_path` text field that will
|
||||
store filenames. One of the two analyzers uses reverse tokenization.
|
||||
Some sample documents are then indexed to represent some file paths
|
||||
for photos inside photo folders of two different users.
|
||||
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
PUT file-path-test
|
||||
{
|
||||
"settings": {
|
||||
"analysis": {
|
||||
"analyzer": {
|
||||
"custom_path_tree": {
|
||||
"tokenizer": "custom_hierarchy"
|
||||
},
|
||||
"custom_path_tree_reversed": {
|
||||
"tokenizer": "custom_hierarchy_reversed"
|
||||
}
|
||||
},
|
||||
"tokenizer": {
|
||||
"custom_hierarchy": {
|
||||
"type": "path_hierarchy",
|
||||
"delimiter": "/"
|
||||
},
|
||||
"custom_hierarchy_reversed": {
|
||||
"type": "path_hierarchy",
|
||||
"delimiter": "/",
|
||||
"reverse": "true"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"mappings": {
|
||||
"_doc": {
|
||||
"properties": {
|
||||
"file_path": {
|
||||
"type": "text",
|
||||
"fields": {
|
||||
"tree": {
|
||||
"type": "text",
|
||||
"analyzer": "custom_path_tree"
|
||||
},
|
||||
"tree_reversed": {
|
||||
"type": "text",
|
||||
"analyzer": "custom_path_tree_reversed"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
POST file-path-test/_doc/1
|
||||
{
|
||||
"file_path": "/User/alice/photos/2017/05/16/my_photo1.jpg"
|
||||
}
|
||||
|
||||
POST file-path-test/_doc/2
|
||||
{
|
||||
"file_path": "/User/alice/photos/2017/05/16/my_photo2.jpg"
|
||||
}
|
||||
|
||||
POST file-path-test/_doc/3
|
||||
{
|
||||
"file_path": "/User/alice/photos/2017/05/16/my_photo3.jpg"
|
||||
}
|
||||
|
||||
POST file-path-test/_doc/4
|
||||
{
|
||||
"file_path": "/User/alice/photos/2017/05/15/my_photo1.jpg"
|
||||
}
|
||||
|
||||
POST file-path-test/_doc/5
|
||||
{
|
||||
"file_path": "/User/bob/photos/2017/05/16/my_photo1.jpg"
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
// TESTSETUP
|
||||
|
||||
|
||||
A search for a particular file path string against the text field matches all
|
||||
the example documents, with Bob's documents ranking highest due to `bob` also
|
||||
being one of the terms created by the standard analyzer boosting relevance for
|
||||
Bob's documents.
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
GET file-path-test/_search
|
||||
{
|
||||
"query": {
|
||||
"match": {
|
||||
"file_path": "/User/bob/photos/2017/05"
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
|
||||
|
||||
It's simple to match or filter documents with file paths that exist within a
|
||||
particular directory using the `file_path.tree` field.
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
GET file-path-test/_search
|
||||
{
|
||||
"query": {
|
||||
"term": {
|
||||
"file_path.tree": "/User/alice/photos/2017/05/16"
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
|
||||
With the reverse parameter for this tokenizer, it's also possible to match
|
||||
from the other end of the file path, such as individual file names or a deep
|
||||
level subdirectory. The following example shows a search for all files named
|
||||
`my_photo1.jpg` within any directory via the `file_path.tree_reversed` field
|
||||
configured to use the reverse parameter in the mapping.
|
||||
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
GET file-path-test/_search
|
||||
{
|
||||
"query": {
|
||||
"term": {
|
||||
"file_path.tree_reversed": {
|
||||
"value": "my_photo1.jpg"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
|
||||
|
||||
Viewing the tokens generated with both forward and reverse is instructive
|
||||
in showing the tokens created for the same file path value.
|
||||
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
POST file-path-test/_analyze
|
||||
{
|
||||
"analyzer": "custom_path_tree",
|
||||
"text": "/User/alice/photos/2017/05/16/my_photo1.jpg"
|
||||
}
|
||||
|
||||
POST file-path-test/_analyze
|
||||
{
|
||||
"analyzer": "custom_path_tree_reversed",
|
||||
"text": "/User/alice/photos/2017/05/16/my_photo1.jpg"
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
||||
|
||||
|
||||
It's also useful to be able to filter with file paths when combined with other
|
||||
types of searches, such as this example looking for any files paths with `16`
|
||||
that also must be in Alice's photo directory.
|
||||
|
||||
[source,js]
|
||||
--------------------------------------------------
|
||||
GET file-path-test/_search
|
||||
{
|
||||
"query": {
|
||||
"bool" : {
|
||||
"must" : {
|
||||
"match" : { "file_path" : "16" }
|
||||
},
|
||||
"filter": {
|
||||
"term" : { "file_path.tree" : "/User/alice" }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
--------------------------------------------------
|
||||
// CONSOLE
|
|
@ -170,3 +170,7 @@ If we were to set `reverse` to `true`, it would produce the following:
|
|||
---------------------------
|
||||
[ one/two/three/, two/three/, three/ ]
|
||||
---------------------------
|
||||
|
||||
[float]
|
||||
=== Detailed Examples
|
||||
See <<analysis-pathhierarchy-tokenizer-examples, detailed examples here>>.
|
||||
|
|
Loading…
Reference in New Issue