mirror of https://github.com/apache/lucene.git
SOLR-14703 Edismax parser replaces whitespace characters with spaces (#1713)
This commit is contained in:
parent
4267734e80
commit
44c4e6ef31
|
@ -184,6 +184,8 @@ Bug Fixes
|
|||
|
||||
* SOLR-14677: Improve DIH termination logic to close all DataSources, EntityProcessors (Jason Gerlowski)
|
||||
|
||||
* SOLR-14703: Fix edismax replacement of all whitespace characters with spaces (Yuriy Koval via Jason Gerlowski)
|
||||
|
||||
Other Changes
|
||||
---------------------
|
||||
|
||||
|
|
|
@ -799,6 +799,12 @@ public class ExtendedDismaxQParser extends QParser {
|
|||
}
|
||||
|
||||
if (inString == 0) {
|
||||
if (!ignoreQuote && ch == '"') {
|
||||
// end of the token if we aren't in a string, backing
|
||||
// up the position.
|
||||
pos--;
|
||||
break;
|
||||
}
|
||||
switch (ch) {
|
||||
case '!':
|
||||
case '(':
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.solr.search;
|
|||
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
|
@ -39,6 +40,7 @@ import org.apache.solr.common.params.SolrParams;
|
|||
import org.apache.solr.common.util.Utils;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.util.SolrPluginUtils;
|
||||
import org.junit.Assert;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
|
@ -964,6 +966,62 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
|
|||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testWhitespaceCharacters() throws Exception {
|
||||
assertU(adoc("id", "whitespaceChars",
|
||||
"cat_s", "foo\nfoo"));
|
||||
assertU(commit());
|
||||
|
||||
assertQ(req("q", "(\"foo\nfoo\")",
|
||||
"qf", "cat_s",
|
||||
"defType", "edismax")
|
||||
, "*[count(//doc)=1]");
|
||||
|
||||
assertQ(req("q", "cat_s:[\"foo\nfoo\" TO \"foo\nfoo\"]",
|
||||
"qf", "name",
|
||||
"defType", "edismax")
|
||||
, "*[count(//doc)=1]");
|
||||
|
||||
assertQ(req("q", "cat_s:[ \"foo\nfoo\" TO \"foo\nfoo\"]",
|
||||
"qf", "name",
|
||||
"defType", "edismax")
|
||||
, "*[count(//doc)=1]");
|
||||
|
||||
assertQ(req("q", "{!edismax qf=cat_s v='[\"foo\nfoo\" TO \"foo\nfoo\"]'}")
|
||||
, "*[count(//doc)=1]");
|
||||
|
||||
assertQ(req("q", "{!edismax qf=cat_s v='[ \"foo\nfoo\" TO \"foo\nfoo\"]'}")
|
||||
, "*[count(//doc)=1]");
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDoubleQuoteCharacters() throws Exception {
|
||||
assertU(adoc("id", "doubleQuote",
|
||||
"cat_s", "foo\"foo"));
|
||||
assertU(commit());
|
||||
|
||||
assertQ(req("q", "cat_s:[\"foo\\\"foo\" TO \"foo\\\"foo\"]",
|
||||
"qf", "name",
|
||||
"defType", "edismax")
|
||||
, "*[count(//doc)=1]");
|
||||
|
||||
assertQ(req("q", "cat_s:\"foo\\\"foo\"",
|
||||
"qf", "name",
|
||||
"defType", "edismax")
|
||||
, "*[count(//doc)=1]");
|
||||
|
||||
assertQ(req("q", "cat_s:foo\\\"foo",
|
||||
"qf", "name",
|
||||
"defType", "edismax")
|
||||
, "*[count(//doc)=1]");
|
||||
|
||||
assertQ(req("q", "cat_s:foo\"foo",
|
||||
"qf", "name",
|
||||
"defType", "edismax")
|
||||
, "*[count(//doc)=1]");
|
||||
}
|
||||
|
||||
/**
|
||||
* verify that all reserved characters are properly escaped when being set in
|
||||
* {@link org.apache.solr.search.ExtendedDismaxQParser.Clause#val}.
|
||||
|
@ -1011,8 +1069,92 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
|
|||
"*[count(//doc)=3]");
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
|
||||
/**
|
||||
* Repeating some of test cases as direct calls to splitIntoClauses
|
||||
*/
|
||||
@Test
|
||||
public void testSplitIntoClauses() throws Exception {
|
||||
String query = "(\"foo\nfoo\")";
|
||||
SolrQueryRequest request = req("q", query,
|
||||
"qf", "cat_s",
|
||||
"defType", "edismax");
|
||||
ExtendedDismaxQParser parser = new ExtendedDismaxQParser(query, null, request.getParams(), request);
|
||||
List<ExtendedDismaxQParser.Clause> clauses = parser.splitIntoClauses(query, false);
|
||||
Assert.assertEquals(3, clauses.size());
|
||||
assertClause(clauses.get(0), "\\(", false, true);
|
||||
assertClause(clauses.get(1), "foo\nfoo", true, false);
|
||||
assertClause(clauses.get(2), "\\)", false, true);
|
||||
|
||||
query = "cat_s:[\"foo\nfoo\" TO \"foo\nfoo\"]";
|
||||
request = req("q", query,
|
||||
"qf", "cat_s",
|
||||
"defType", "edismax");
|
||||
parser = new ExtendedDismaxQParser(query, null, request.getParams(), request);
|
||||
clauses = parser.splitIntoClauses(query, false);
|
||||
Assert.assertEquals(5, clauses.size());
|
||||
assertClause(clauses.get(0), "\\[", false, true, "cat_s");
|
||||
assertClause(clauses.get(1), "foo\nfoo", true, false);
|
||||
assertClause(clauses.get(2), "TO", true, false);
|
||||
assertClause(clauses.get(3), "foo\nfoo", true, false);
|
||||
assertClause(clauses.get(4), "\\]", false, true);
|
||||
|
||||
query = "cat_s:[ \"foo\nfoo\" TO \"foo\nfoo\"]";
|
||||
request = req("q", query,
|
||||
"qf", "cat_s",
|
||||
"defType", "edismax");
|
||||
parser = new ExtendedDismaxQParser(query, null, request.getParams(), request);
|
||||
clauses = parser.splitIntoClauses(query, false);
|
||||
Assert.assertEquals(5, clauses.size());
|
||||
assertClause(clauses.get(0), "\\[", true, true, "cat_s");
|
||||
assertClause(clauses.get(1), "foo\nfoo", true, false);
|
||||
assertClause(clauses.get(2), "TO", true, false);
|
||||
assertClause(clauses.get(3), "foo\nfoo", true, false);
|
||||
assertClause(clauses.get(4), "\\]", false, true);
|
||||
|
||||
String allReservedCharacters = "!():^[]{}~*?\"+-\\|&/";
|
||||
// the backslash needs to be manually escaped (the query parser sees the raw backslash as an escape the subsequent
|
||||
// character)
|
||||
query = allReservedCharacters.replace("\\", "\\\\");
|
||||
|
||||
request = req("q", query,
|
||||
"qf", "name",
|
||||
"mm", "100%",
|
||||
"defType", "edismax");
|
||||
|
||||
parser = new ExtendedDismaxQParser(query, null, request.getParams(), request);
|
||||
clauses = parser.splitIntoClauses(query, false);
|
||||
Assert.assertEquals(1, clauses.size());
|
||||
assertClause(clauses.get(0), "\\!\\(\\)\\:\\^\\[\\]\\{\\}\\~\\*\\?\\\"\\+\\-\\\\\\|\\&\\/", false, true);
|
||||
|
||||
query = "foo/";
|
||||
request = req("q", query,
|
||||
"qf", "name",
|
||||
"mm", "100%",
|
||||
"defType", "edismax");
|
||||
|
||||
parser = new ExtendedDismaxQParser(query, null, request.getParams(), request);
|
||||
clauses = parser.splitIntoClauses(query, false);
|
||||
Assert.assertEquals(1, clauses.size());
|
||||
assertClause(clauses.get(0), "foo\\/", false, true);
|
||||
}
|
||||
|
||||
private static void assertClause(ExtendedDismaxQParser.Clause clause, String value, boolean hasWhitespace,
|
||||
boolean hasSpecialSyntax, String field) {
|
||||
Assert.assertEquals(value, clause.val);
|
||||
Assert.assertEquals(hasWhitespace, clause.hasWhitespace);
|
||||
Assert.assertEquals(hasSpecialSyntax, clause.hasSpecialSyntax);
|
||||
Assert.assertEquals(field, clause.field);
|
||||
}
|
||||
|
||||
private static void assertClause(ExtendedDismaxQParser.Clause clause, String value, boolean hasWhitespace,
|
||||
boolean hasSpecialSyntax) {
|
||||
assertClause(clause, value, hasWhitespace, hasSpecialSyntax, null);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* SOLR-3589: Edismax parser does not honor mm parameter if analyzer splits a token
|
||||
*/
|
||||
public void testCJK() throws Exception {
|
||||
|
|
Loading…
Reference in New Issue