From 56dbb6650356ba1292bd38aa5ad669fddd8ee50b Mon Sep 17 00:00:00 2001 From: Bruno Roustant Date: Wed, 2 Sep 2020 11:45:36 +0200 Subject: [PATCH] SOLR-14782: Document how to unescape for the QueryElevationComponent. --- .../TestPatternReplaceCharFilterFactory.java | 23 +++++++++++++++++++ .../src/the-query-elevation-component.adoc | 15 +++++++++++- 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilterFactory.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilterFactory.java index 74733d626c5..04f926927fd 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilterFactory.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/pattern/TestPatternReplaceCharFilterFactory.java @@ -77,4 +77,27 @@ public class TestPatternReplaceCharFilterFactory extends BaseTokenStreamFactoryT }); assertTrue(expected.getMessage().contains("Unknown parameters")); } + + /** Test with backslash unescape */ + public void testUnescape() throws Exception { + Reader reader = new StringReader("aaa\\ bbb\\-ccc"); + reader = charFilterFactory("PatternReplace", + "pattern", "\\\\(.)", + "replacement", "$1").create(reader); + TokenStream ts = whitespaceMockTokenizer(reader); + assertTokenStreamContents(ts, + new String[] { "aaa", "bbb-ccc" }, + new int[] { 0, 5 }, + new int[] { 3, 13 }); + + reader = new StringReader("a\\b\\0\\-c\\é\\ d"); + reader = charFilterFactory("PatternReplace", + "pattern", "\\\\([^\\p{IsAlphabetic}\\p{Digit}])", + "replacement", "$1").create(reader); + ts = whitespaceMockTokenizer(reader); + assertTokenStreamContents(ts, + new String[] { "a\\b\\0-c\\é", "d" }, + new int[] { 0, 12 }, + new int[] { 10, 13 }); + } } diff --git a/solr/solr-ref-guide/src/the-query-elevation-component.adoc b/solr/solr-ref-guide/src/the-query-elevation-component.adoc index f381de3b7d0..2c0662862a2 100644 --- a/solr/solr-ref-guide/src/the-query-elevation-component.adoc +++ b/solr/solr-ref-guide/src/the-query-elevation-component.adoc @@ -61,7 +61,20 @@ Optionally, in the Query Elevation Component configuration you can also specify The Query Elevation Search Component takes the following parameters: `queryFieldType`:: -Specifies which fieldType should be used to analyze the incoming text. For example, it may be appropriate to use a fieldType with a LowerCaseFilter. +Specifies which fieldType should be used to analyze the incoming text. For example, it may be appropriate to use a fieldType with a LowerCaseFilter. Other example, if you need to unescape backslash-escaped queries, then you can define the fieldType to preprocess with a PatternReplaceCharFilter. Here is the corresponding example of fieldType (traditionally in `schema.xml`): + +[source,xml] +---- + + + + + + + +---- + +For example, to unescape only non-alphanumeric, the pattern could be `\\([^\p{IsAlphabetic}\p{Digit}])`. `config-file`:: Path to the file that defines query elevation. This file must exist in `/conf/` or `/`. If the file exists in the `conf/` directory it will be loaded once at startup. If it exists in the `data/` directory, it will be reloaded for each IndexReader.