From 44c4e6ef311ea0dffe72d33413ede260de0fb6e9 Mon Sep 17 00:00:00 2001 From: Yuriy Koval Date: Fri, 14 Aug 2020 11:12:53 -0700 Subject: [PATCH] SOLR-14703 Edismax parser replaces whitespace characters with spaces (#1713) --- solr/CHANGES.txt | 2 + .../solr/search/ExtendedDismaxQParser.java | 6 + .../solr/search/TestExtendedDismaxParser.java | 146 +++++++++++++++++- 3 files changed, 152 insertions(+), 2 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 9945ecb0229..5b27dcdafdc 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -184,6 +184,8 @@ Bug Fixes * SOLR-14677: Improve DIH termination logic to close all DataSources, EntityProcessors (Jason Gerlowski) +* SOLR-14703: Fix edismax replacement of all whitespace characters with spaces (Yuriy Koval via Jason Gerlowski) + Other Changes --------------------- diff --git a/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java b/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java index fce9416ad59..8f234f0bc35 100644 --- a/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java +++ b/solr/core/src/java/org/apache/solr/search/ExtendedDismaxQParser.java @@ -799,6 +799,12 @@ public class ExtendedDismaxQParser extends QParser { } if (inString == 0) { + if (!ignoreQuote && ch == '"') { + // end of the token if we aren't in a string, backing + // up the position. + pos--; + break; + } switch (ch) { case '!': case '(': diff --git a/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java b/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java index 8b93ae79a83..f94f421c0c5 100644 --- a/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java +++ b/solr/core/src/test/org/apache/solr/search/TestExtendedDismaxParser.java @@ -18,6 +18,7 @@ package org.apache.solr.search; import java.util.Arrays; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Random; import java.util.Set; @@ -39,6 +40,7 @@ import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.Utils; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.util.SolrPluginUtils; +import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; @@ -964,6 +966,62 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 { ); } + @Test + public void testWhitespaceCharacters() throws Exception { + assertU(adoc("id", "whitespaceChars", + "cat_s", "foo\nfoo")); + assertU(commit()); + + assertQ(req("q", "(\"foo\nfoo\")", + "qf", "cat_s", + "defType", "edismax") + , "*[count(//doc)=1]"); + + assertQ(req("q", "cat_s:[\"foo\nfoo\" TO \"foo\nfoo\"]", + "qf", "name", + "defType", "edismax") + , "*[count(//doc)=1]"); + + assertQ(req("q", "cat_s:[ \"foo\nfoo\" TO \"foo\nfoo\"]", + "qf", "name", + "defType", "edismax") + , "*[count(//doc)=1]"); + + assertQ(req("q", "{!edismax qf=cat_s v='[\"foo\nfoo\" TO \"foo\nfoo\"]'}") + , "*[count(//doc)=1]"); + + assertQ(req("q", "{!edismax qf=cat_s v='[ \"foo\nfoo\" TO \"foo\nfoo\"]'}") + , "*[count(//doc)=1]"); + + } + + @Test + public void testDoubleQuoteCharacters() throws Exception { + assertU(adoc("id", "doubleQuote", + "cat_s", "foo\"foo")); + assertU(commit()); + + assertQ(req("q", "cat_s:[\"foo\\\"foo\" TO \"foo\\\"foo\"]", + "qf", "name", + "defType", "edismax") + , "*[count(//doc)=1]"); + + assertQ(req("q", "cat_s:\"foo\\\"foo\"", + "qf", "name", + "defType", "edismax") + , "*[count(//doc)=1]"); + + assertQ(req("q", "cat_s:foo\\\"foo", + "qf", "name", + "defType", "edismax") + , "*[count(//doc)=1]"); + + assertQ(req("q", "cat_s:foo\"foo", + "qf", "name", + "defType", "edismax") + , "*[count(//doc)=1]"); + } + /** * verify that all reserved characters are properly escaped when being set in * {@link org.apache.solr.search.ExtendedDismaxQParser.Clause#val}. @@ -1011,8 +1069,92 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 { "*[count(//doc)=3]"); } - - /** + + + /** + * Repeating some of test cases as direct calls to splitIntoClauses + */ + @Test + public void testSplitIntoClauses() throws Exception { + String query = "(\"foo\nfoo\")"; + SolrQueryRequest request = req("q", query, + "qf", "cat_s", + "defType", "edismax"); + ExtendedDismaxQParser parser = new ExtendedDismaxQParser(query, null, request.getParams(), request); + List clauses = parser.splitIntoClauses(query, false); + Assert.assertEquals(3, clauses.size()); + assertClause(clauses.get(0), "\\(", false, true); + assertClause(clauses.get(1), "foo\nfoo", true, false); + assertClause(clauses.get(2), "\\)", false, true); + + query = "cat_s:[\"foo\nfoo\" TO \"foo\nfoo\"]"; + request = req("q", query, + "qf", "cat_s", + "defType", "edismax"); + parser = new ExtendedDismaxQParser(query, null, request.getParams(), request); + clauses = parser.splitIntoClauses(query, false); + Assert.assertEquals(5, clauses.size()); + assertClause(clauses.get(0), "\\[", false, true, "cat_s"); + assertClause(clauses.get(1), "foo\nfoo", true, false); + assertClause(clauses.get(2), "TO", true, false); + assertClause(clauses.get(3), "foo\nfoo", true, false); + assertClause(clauses.get(4), "\\]", false, true); + + query = "cat_s:[ \"foo\nfoo\" TO \"foo\nfoo\"]"; + request = req("q", query, + "qf", "cat_s", + "defType", "edismax"); + parser = new ExtendedDismaxQParser(query, null, request.getParams(), request); + clauses = parser.splitIntoClauses(query, false); + Assert.assertEquals(5, clauses.size()); + assertClause(clauses.get(0), "\\[", true, true, "cat_s"); + assertClause(clauses.get(1), "foo\nfoo", true, false); + assertClause(clauses.get(2), "TO", true, false); + assertClause(clauses.get(3), "foo\nfoo", true, false); + assertClause(clauses.get(4), "\\]", false, true); + + String allReservedCharacters = "!():^[]{}~*?\"+-\\|&/"; + // the backslash needs to be manually escaped (the query parser sees the raw backslash as an escape the subsequent + // character) + query = allReservedCharacters.replace("\\", "\\\\"); + + request = req("q", query, + "qf", "name", + "mm", "100%", + "defType", "edismax"); + + parser = new ExtendedDismaxQParser(query, null, request.getParams(), request); + clauses = parser.splitIntoClauses(query, false); + Assert.assertEquals(1, clauses.size()); + assertClause(clauses.get(0), "\\!\\(\\)\\:\\^\\[\\]\\{\\}\\~\\*\\?\\\"\\+\\-\\\\\\|\\&\\/", false, true); + + query = "foo/"; + request = req("q", query, + "qf", "name", + "mm", "100%", + "defType", "edismax"); + + parser = new ExtendedDismaxQParser(query, null, request.getParams(), request); + clauses = parser.splitIntoClauses(query, false); + Assert.assertEquals(1, clauses.size()); + assertClause(clauses.get(0), "foo\\/", false, true); + } + + private static void assertClause(ExtendedDismaxQParser.Clause clause, String value, boolean hasWhitespace, + boolean hasSpecialSyntax, String field) { + Assert.assertEquals(value, clause.val); + Assert.assertEquals(hasWhitespace, clause.hasWhitespace); + Assert.assertEquals(hasSpecialSyntax, clause.hasSpecialSyntax); + Assert.assertEquals(field, clause.field); + } + + private static void assertClause(ExtendedDismaxQParser.Clause clause, String value, boolean hasWhitespace, + boolean hasSpecialSyntax) { + assertClause(clause, value, hasWhitespace, hasSpecialSyntax, null); + + } + + /** * SOLR-3589: Edismax parser does not honor mm parameter if analyzer splits a token */ public void testCJK() throws Exception {