mirror of https://github.com/apache/lucene.git
SOLR-14782: Document how to unescape for the QueryElevationComponent.
This commit is contained in:
parent
20af6dbd3d
commit
56dbb66503
|
@ -77,4 +77,27 @@ public class TestPatternReplaceCharFilterFactory extends BaseTokenStreamFactoryT
|
|||
});
|
||||
assertTrue(expected.getMessage().contains("Unknown parameters"));
|
||||
}
|
||||
|
||||
/** Test with backslash unescape */
|
||||
public void testUnescape() throws Exception {
|
||||
Reader reader = new StringReader("aaa\\ bbb\\-ccc");
|
||||
reader = charFilterFactory("PatternReplace",
|
||||
"pattern", "\\\\(.)",
|
||||
"replacement", "$1").create(reader);
|
||||
TokenStream ts = whitespaceMockTokenizer(reader);
|
||||
assertTokenStreamContents(ts,
|
||||
new String[] { "aaa", "bbb-ccc" },
|
||||
new int[] { 0, 5 },
|
||||
new int[] { 3, 13 });
|
||||
|
||||
reader = new StringReader("a\\b\\0\\-c\\é\\ d");
|
||||
reader = charFilterFactory("PatternReplace",
|
||||
"pattern", "\\\\([^\\p{IsAlphabetic}\\p{Digit}])",
|
||||
"replacement", "$1").create(reader);
|
||||
ts = whitespaceMockTokenizer(reader);
|
||||
assertTokenStreamContents(ts,
|
||||
new String[] { "a\\b\\0-c\\é", "d" },
|
||||
new int[] { 0, 12 },
|
||||
new int[] { 10, 13 });
|
||||
}
|
||||
}
|
||||
|
|
|
@ -61,7 +61,20 @@ Optionally, in the Query Elevation Component configuration you can also specify
|
|||
The Query Elevation Search Component takes the following parameters:
|
||||
|
||||
`queryFieldType`::
|
||||
Specifies which fieldType should be used to analyze the incoming text. For example, it may be appropriate to use a fieldType with a LowerCaseFilter.
|
||||
Specifies which fieldType should be used to analyze the incoming text. For example, it may be appropriate to use a fieldType with a LowerCaseFilter. Other example, if you need to unescape backslash-escaped queries, then you can define the fieldType to preprocess with a PatternReplaceCharFilter. Here is the corresponding example of fieldType (traditionally in `schema.xml`):
|
||||
|
||||
[source,xml]
|
||||
----
|
||||
<fieldType name="unescapelowercase" class="solr.TextField">
|
||||
<analyzer>
|
||||
<charFilter class="solr.PatternReplaceCharFilterFactory" pattern="\\(.)" replacement="$1"/>
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
----
|
||||
|
||||
For example, to unescape only non-alphanumeric, the pattern could be `\\([^\p{IsAlphabetic}\p{Digit}])`.
|
||||
|
||||
`config-file`::
|
||||
Path to the file that defines query elevation. This file must exist in `<instanceDir>/conf/<config-file>` or `<dataDir>/<config-file>`. If the file exists in the `conf/` directory it will be loaded once at startup. If it exists in the `data/` directory, it will be reloaded for each IndexReader.
|
||||
|
|
Loading…
Reference in New Issue