SOLR-10357: Enable edismax and standard query parsers to handle the option combination sow=false / autoGeneratePhraseQueries=true by setting QueryBuilder.autoGenerateMultiTermSynonymsQuery

This commit is contained in:
Steve Rowe 2017-03-28 18:39:28 -04:00
parent 6b0217b7cb
commit 0a689f4d95
8 changed files with 148 additions and 99 deletions

View File

@ -164,6 +164,9 @@ Other Changes
* SOLR-10365: Handle a SolrCoreInitializationException while publishing core state during SolrCore creation
(Ishan Chattopadhyaya)
* SOLR-10357: Enable edismax and standard query parsers to handle the option combination
sow=false / autoGeneratePhraseQueries="true" by setting QueryBuilder.autoGenerateMultiTermSynonymsQuery.
================== 6.5.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.

View File

@ -13,7 +13,6 @@ import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Query;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QueryParserConfigurationException;
public class QueryParser extends SolrQueryParserBase implements QueryParserConstants {
@ -54,11 +53,11 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
@Override
protected Query newFieldQuery(Analyzer analyzer, String field, String queryText,
boolean quoted, boolean fieldAutoGenPhraseQueries) throws SyntaxError {
if ((getAutoGeneratePhraseQueries() || fieldAutoGenPhraseQueries) && splitOnWhitespace == false) {
throw new QueryParserConfigurationException
("Field '" + field + "': autoGeneratePhraseQueries == true is disallowed when sow/splitOnWhitespace == false");
}
return super.newFieldQuery(analyzer, field, queryText, quoted, fieldAutoGenPhraseQueries);
setAutoGenerateMultiTermSynonymsPhraseQuery(fieldAutoGenPhraseQueries || getAutoGeneratePhraseQueries());
// Don't auto-quote graph-aware field queries
boolean treatAsQuoted = getSplitOnWhitespace()
? (quoted || fieldAutoGenPhraseQueries || getAutoGeneratePhraseQueries()) : quoted;
return super.newFieldQuery(analyzer, field, queryText, treatAsQuoted, false);
}
// * Query ::= ( Clause )*

View File

@ -37,7 +37,6 @@ import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Query;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.QParser;
import org.apache.solr.search.QueryParserConfigurationException;
public class QueryParser extends SolrQueryParserBase {
@ -78,11 +77,11 @@ public class QueryParser extends SolrQueryParserBase {
@Override
protected Query newFieldQuery(Analyzer analyzer, String field, String queryText,
boolean quoted, boolean fieldAutoGenPhraseQueries) throws SyntaxError {
if ((getAutoGeneratePhraseQueries() || fieldAutoGenPhraseQueries) && splitOnWhitespace == false) {
throw new QueryParserConfigurationException
("Field '" + field + "': autoGeneratePhraseQueries == true is disallowed when sow/splitOnWhitespace == false");
}
return super.newFieldQuery(analyzer, field, queryText, quoted, fieldAutoGenPhraseQueries);
setAutoGenerateMultiTermSynonymsPhraseQuery(fieldAutoGenPhraseQueries || getAutoGeneratePhraseQueries());
// Don't auto-quote graph-aware field queries
boolean treatAsQuoted = getSplitOnWhitespace()
? (quoted || fieldAutoGenPhraseQueries || getAutoGeneratePhraseQueries()) : quoted;
return super.newFieldQuery(analyzer, field, queryText, treatAsQuoted, false);
}
}

View File

@ -310,8 +310,6 @@ public class ExtendedDismaxQParser extends QParser {
up.setRemoveStopFilter(true);
query = up.parse(mainUserQuery);
}
} catch (QueryParserConfigurationException e) {
throw e; // Don't ignore configuration exceptions
} catch (Exception e) {
// ignore failure and reparse later after escaping reserved chars
up.exceptions = false;
@ -1083,10 +1081,6 @@ public class ExtendedDismaxQParser extends QParser {
@Override
protected Query newFieldQuery(Analyzer analyzer, String field, String queryText,
boolean quoted, boolean fieldAutoGenPhraseQueries) throws SyntaxError {
if ((getAutoGeneratePhraseQueries() || fieldAutoGenPhraseQueries) && getSplitOnWhitespace() == false) {
throw new QueryParserConfigurationException
("Field '" + field + "': autoGeneratePhraseQueries == true is disallowed when sow/splitOnWhitespace == false");
}
Analyzer actualAnalyzer;
if (removeStopFilter) {
if (nonStopFilterAnalyzerPerField == null) {
@ -1403,8 +1397,6 @@ public class ExtendedDismaxQParser extends QParser {
}
return null;
} catch (QueryParserConfigurationException e) {
throw e; // Don't ignore configuration exceptions
} catch (Exception e) {
// an exception here is due to the field query not being compatible with the input text
// for example, passing a string to a numeric field.

View File

@ -1,24 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
public class QueryParserConfigurationException extends IllegalArgumentException {
public QueryParserConfigurationException(String message) {
super(message);
}
}

View File

@ -31,3 +31,4 @@ pixima => pixma
# multiword synonyms
wi fi => wifi
crow blackbird, grackle

View File

@ -21,6 +21,7 @@ import java.util.HashSet;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.stream.Stream;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
@ -34,7 +35,6 @@ import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.TextField;
import org.apache.solr.util.SolrPluginUtils;
import org.junit.BeforeClass;
import org.junit.Test;
@ -66,7 +66,7 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
"foo_i", "8"
));
assertU(adoc("id", "47", "trait_ss", "Pig",
"text_sw", "line up and fly directly at the enemy death cannons, clogging them with wreckage!"));
"text", "line up and fly directly at the enemy death cannons, clogging them with wreckage!"));
assertU(adoc("id", "48", "text_sw", "this has gigabyte potential", "foo_i","100"));
assertU(adoc("id", "49", "text_sw", "start the big apple end", "foo_i","-100"));
assertU(adoc("id", "50", "text_sw", "start new big city end"));
@ -121,22 +121,22 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
// really just test that exceptions aren't thrown by
// single + -
assertJQ(req("defType", "edismax", "q", "-", "df", "text_sw", "sow", sow)
assertJQ(req("defType", "edismax", "q", "-", "sow", sow)
, "/response==");
assertJQ(req("defType", "edismax", "q", "+", "df", "text_sw", "sow", sow)
assertJQ(req("defType", "edismax", "q", "+", "sow", sow)
, "/response==");
assertJQ(req("defType", "edismax", "q", "+ - +", "df", "text_sw", "sow", sow)
assertJQ(req("defType", "edismax", "q", "+ - +", "sow", sow)
, "/response==");
assertJQ(req("defType", "edismax", "q", "- + -", "df", "text_sw", "sow", sow)
assertJQ(req("defType", "edismax", "q", "- + -", "sow", sow)
, "/response==");
assertJQ(req("defType", "edismax", "q", "id:47 +", "df", "text_sw", "sow", sow)
assertJQ(req("defType", "edismax", "q", "id:47 +", "sow", sow)
, "/response/numFound==1");
assertJQ(req("defType", "edismax", "q", "id:47 -", "df", "text_sw", "sow", sow)
assertJQ(req("defType", "edismax", "q", "id:47 -", "sow", sow)
, "/response/numFound==1");
Random r = random();
@ -152,7 +152,7 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
}
String q = sb.toString();
assertJQ(req("defType", "edismax", "q", q, "df", "text_sw", "sow", sow)
assertJQ(req("defType", "edismax", "q", q, "sow", sow)
, "/response==");
}
}
@ -264,7 +264,7 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
, twor
);
assertQ(req("defType", "edismax", "qf", "name title subject text_sw",
assertQ(req("defType", "edismax", "qf", "name title subject text",
"q","op"), twor
);
assertQ(req("defType", "edismax",
@ -277,29 +277,29 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
"q.op", "OR",
"q","Order op"), twor
);
assertQ(req("defType", "edismax", "qf", "name title subject text_sw",
assertQ(req("defType", "edismax", "qf", "name title subject text",
"q","Order AND op"), oner
);
assertQ(req("defType", "edismax", "qf", "name title subject text_sw",
assertQ(req("defType", "edismax", "qf", "name title subject text",
"q","Order and op"), oner
);
assertQ(req("defType", "edismax", "qf", "name title subject text_sw",
assertQ(req("defType", "edismax", "qf", "name title subject text",
"q","+Order op"), oner
);
assertQ(req("defType", "edismax", "qf", "name title subject text_sw",
assertQ(req("defType", "edismax", "qf", "name title subject text",
"q","Order OR op"), twor
);
assertQ(req("defType", "edismax", "qf", "name title subject text_sw",
assertQ(req("defType", "edismax", "qf", "name title subject text",
"q","Order or op"), twor
);
assertQ(req("defType", "edismax", "qf", "name title subject text_sw",
assertQ(req("defType", "edismax", "qf", "name title subject text",
"q","*:*"), allr
);
assertQ(req("defType", "edismax", "qf", "name title subject text_sw",
assertQ(req("defType", "edismax", "qf", "name title subject text",
"q","star OR (-star)"), allr
);
assertQ(req("defType", "edismax", "qf", "name title subject text_sw",
assertQ(req("defType", "edismax", "qf", "name title subject text",
"q","id:42 OR (-id:42)"), allr
);
@ -551,7 +551,7 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
"//str[@name='parsedquery_toString'][.='+(id:42)^5.0']");
assertQ(req("defType","edismax", "uf","-*", "q","cannons", "qf","text_sw"),
assertQ(req("defType","edismax", "uf","-*", "q","cannons", "qf","text"),
oner);
assertQ(req("defType","edismax", "uf","* -id", "q","42", "qf", "id"), oner);
@ -885,7 +885,7 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
"*[count(//doc)=3]");
assertQ(
"Might be double-escaping a client-escaped colon",
req("q", "text_sw:(theos OR thistokenhasa\\:preescapedcolon OR theou)", "defType", "edismax", "qf", "text_sw"),
req("q", "text_sw:(theos OR thistokenhasa\\:preescapedcolon OR theou)", "defType", "edismax", "qf", "text"),
"*[count(//doc)=3]");
}
@ -1047,56 +1047,56 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
// "line up and fly directly at the enemy death cannons, clogging them with wreckage!"
assertQ("test default operator with mm (AND + 0% => 0 hits)",
req("q", "(line notfound) OR notfound",
"qf", "text_sw",
"qf", "text",
"q.op", "AND",
"mm", "0%",
"defType", "edismax")
, "*[count(//doc)=0]");
assertQ("test default operator with mm (OR + 0% => 1 hit)",
req("q", "line notfound OR notfound",
"qf", "text_sw",
"qf", "text",
"q.op", "OR",
"mm", "0%",
"defType", "edismax")
, "*[count(//doc)=1]");
assertQ("test default operator with mm (OR + 100% => 0 hits)",
req("q", "line notfound OR notfound",
"qf", "text_sw",
"qf", "text",
"q.op", "OR",
"mm", "100%",
"defType", "edismax")
, "*[count(//doc)=0]");
assertQ("test default operator with mm (OR + 35% => 1 hit)",
req("q", "line notfound notfound2 OR notfound",
"qf", "text_sw",
"qf", "text",
"q.op", "OR",
"mm", "35%",
"defType", "edismax")
, "*[count(//doc)=1]");
assertQ("test default operator with mm (OR + 75% => 0 hits)",
req("q", "line notfound notfound2 OR notfound3",
"qf", "text_sw",
"qf", "text",
"q.op", "OR",
"mm", "75%",
"defType", "edismax")
, "*[count(//doc)=0]");
assertQ("test default operator with mm (AND + 0% => 1 hit)",
req("q", "(line enemy) OR notfound",
"qf", "text_sw",
"qf", "text",
"q.op", "AND",
"mm", "0%",
"defType", "edismax")
, "*[count(//doc)=1]");
assertQ("test default operator with mm (AND + 50% => 1 hit)",
req("q", "(line enemy) OR (line notfound) OR (death cannons) OR (death notfound)",
"qf", "text_sw",
"qf", "text",
"q.op", "AND",
"mm", "50%",
"defType", "edismax")
, "*[count(//doc)=1]");
assertQ("test default operator with mm (AND + 75% => 0 hits)",
req("q", "(line enemy) OR (line notfound) OR (death cannons) OR (death notfound)",
"qf", "text_sw",
"qf", "text",
"q.op", "AND",
"mm", "75%",
"defType", "edismax")
@ -1394,16 +1394,6 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
}
// LUCENE-7533
public void testSplitOnWhitespace_with_autoGeneratePhraseQueries() throws Exception {
assertTrue(((TextField)h.getCore().getLatestSchema().getField("text").getType()).getAutoGeneratePhraseQueries());
try (SolrQueryRequest req = req()) {
final QParser qparser = QParser.getParser("{!edismax sow=false fq=text}blah blah)", req);
expectThrows(IllegalArgumentException.class, qparser::getQuery);
}
}
@Test
public void testSplitOnWhitespace_Basic() throws Exception {
// The "text_sw" field has synonyms loaded from synonyms.txt
@ -1550,7 +1540,7 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
, "/response/numFound==0"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi* fi", "sow","false")
, "/response/numFound==2" // matches because wi* matches "wifi" in one doc and "with" in another
, "/response/numFound==1" // matches because wi* matches "wifi"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","w? fi", "sow","false")
, "/response/numFound==0"
@ -1720,7 +1710,7 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","AT* wi fi", "sow","false")
, "/response/numFound==2"
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","AT? wi fi", "sow","false")
, "/response/numFound==1"
@ -1750,7 +1740,7 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi AT*", "sow","false")
, "/response/numFound==2"
, "/response/numFound==1"
);
assertJQ(req("qf","text_sw title", "defType","edismax", "q","wi fi AT?", "sow","false")
, "/response/numFound==1"
@ -1765,11 +1755,74 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
, "/response/numFound==1"
);
}
public void testAutoGeneratePhraseQueries() throws Exception {
ModifiableSolrParams noSowParams = new ModifiableSolrParams();
ModifiableSolrParams sowFalseParams = new ModifiableSolrParams();
sowFalseParams.add("sow", "false");
ModifiableSolrParams sowTrueParams = new ModifiableSolrParams();
sowTrueParams.add("sow", "true");
// From synonyms.txt:
//
// crow blackbird, grackle
try (SolrQueryRequest req = req(sowFalseParams)) {
QParser qParser = QParser.getParser("text:grackle", "edismax", req); // "text" has autoGeneratePhraseQueries="true"
Query q = qParser.getQuery();
assertEquals("+(text:\"crow blackbird\" text:grackl)", q.toString());
}
for (SolrParams params : Arrays.asList(noSowParams, sowTrueParams)) {
try (SolrQueryRequest req = req(params)) {
QParser qParser = QParser.getParser("text:grackle", "edismax", req);
Query q = qParser.getQuery();
assertEquals("+spanOr([spanNear([text:crow, text:blackbird], 0, true), text:grackl])", q.toString());
}
}
for (SolrParams params : Arrays.asList(noSowParams, sowTrueParams, sowFalseParams)) {
try (SolrQueryRequest req = req(params)) {
QParser qParser = QParser.getParser("text_sw:grackle", "edismax", req); // "text_sw" doesn't specify autoGeneratePhraseQueries => default false
Query q = qParser.getQuery();
assertEquals("+((+text_sw:crow +text_sw:blackbird) text_sw:grackl)", q.toString());
}
}
Stream.of(noSowParams, sowTrueParams, sowFalseParams).forEach(p->p.add("qf", "text text_sw"));
try (SolrQueryRequest req = req(sowFalseParams)) {
QParser qParser = QParser.getParser("grackle", "edismax", req);
Query q = qParser.getQuery();
assertEquals("+((text:\"crow blackbird\" text:grackl)"
+ " | ((+text_sw:crow +text_sw:blackbird) text_sw:grackl))",
q.toString());
qParser = QParser.getParser("grackle wi fi", "edismax", req);
q = qParser.getQuery();
assertEquals("+(((text:\"crow blackbird\" text:grackl) text:wifi)"
+ " | (((+text_sw:crow +text_sw:blackbird) text_sw:grackl) text_sw:wifi))",
q.toString());
}
for (SolrParams params : Arrays.asList(noSowParams, sowTrueParams)) {
try (SolrQueryRequest req = req(params)) {
QParser qParser = QParser.getParser("grackle", "edismax", req);
Query q = qParser.getQuery();
assertEquals("+(spanOr([spanNear([text:crow, text:blackbird], 0, true), text:grackl])"
+ " | ((+text_sw:crow +text_sw:blackbird) text_sw:grackl))",
q.toString());
qParser = QParser.getParser("grackle wi fi", "edismax", req);
q = qParser.getQuery();
assertEquals("+((spanOr([spanNear([text:crow, text:blackbird], 0, true), text:grackl])"
+ " | ((+text_sw:crow +text_sw:blackbird) text_sw:grackl)) (text:wi | text_sw:wi) (text:fi | text_sw:fi))",
q.toString());
}
}
}
private boolean containsClause(Query query, String field, String value,
int boost, boolean fuzzy) {
int boost, boolean fuzzy) {
float queryBoost = 1f;
if (query instanceof BoostQuery) {

View File

@ -34,11 +34,12 @@ import org.apache.lucene.search.TermInSetQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.core.SolrInfoMBean;
import org.apache.solr.parser.QueryParser;
import org.apache.solr.query.FilterQuery;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.TextField;
import org.junit.BeforeClass;
import org.junit.Test;
import org.noggit.ObjectBuilder;
@ -573,16 +574,6 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
req.close();
}
// LUCENE-7533
public void testSplitOnWhitespace_with_autoGeneratePhraseQueries() throws Exception {
assertTrue(((TextField)h.getCore().getLatestSchema().getField("text").getType()).getAutoGeneratePhraseQueries());
try (SolrQueryRequest req = req()) {
final QParser qparser = QParser.getParser("{!lucene sow=false qf=text}blah blah", req);
expectThrows(QueryParserConfigurationException.class, qparser::getQuery);
}
}
@Test
public void testSplitOnWhitespace_Basic() throws Exception {
// The "syn" field has synonyms loaded from synonyms.txt
@ -969,4 +960,39 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
, "/response/numFound==1"
);
}
@Test
public void testAutoGeneratePhraseQueries() throws Exception {
ModifiableSolrParams noSowParams = new ModifiableSolrParams();
ModifiableSolrParams sowFalseParams = new ModifiableSolrParams();
sowFalseParams.add("sow", "false");
ModifiableSolrParams sowTrueParams = new ModifiableSolrParams();
sowTrueParams.add("sow", "true");
// From synonyms.txt:
//
// crow blackbird, grackle
//
try (SolrQueryRequest req = req()) {
QParser qParser = QParser.getParser("text:grackle", req); // "text" has autoGeneratePhraseQueries="true"
qParser.setParams(sowFalseParams);
Query q = qParser.getQuery();
assertEquals("text:\"crow blackbird\" text:grackl", q.toString());
for (SolrParams params : Arrays.asList(noSowParams, sowTrueParams)) {
qParser = QParser.getParser("text:grackle", req);
qParser.setParams(params);
q = qParser.getQuery();
assertEquals("spanOr([spanNear([text:crow, text:blackbird], 0, true), text:grackl])", q.toString());
}
for (SolrParams params : Arrays.asList(noSowParams, sowTrueParams, sowFalseParams)) {
qParser = QParser.getParser("text_sw:grackle", req); // "text_sw" doesn't specify autoGeneratePhraseQueries => default false
qParser.setParams(params);
q = qParser.getQuery();
assertEquals("(+text_sw:crow +text_sw:blackbird) text_sw:grackl", q.toString());
}
}
}
}