diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index acdce1d7aa4..0c18696409a 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -128,6 +128,8 @@ New Features * SOLR-3177: Enable tagging and excluding filters in StatsComponent via the localParams syntax. (Mathias H., Nikolai Luthman, Vitaliy Zhovtyuk, shalin) + +* SOLR-1604: Wildcards, ORs etc inside Phrase Queries. (Ahmet Arslan via Erick Erickson) Bug Fixes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java new file mode 100644 index 00000000000..0cf9bc6b771 --- /dev/null +++ b/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java @@ -0,0 +1,117 @@ +package org.apache.solr.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.queryparser.classic.ParseException; +import org.apache.lucene.queryparser.complexPhrase.ComplexPhraseQueryParser; +import org.apache.lucene.search.Query; +import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.StrUtils; +import org.apache.solr.parser.QueryParser; +import org.apache.solr.request.SolrQueryRequest; + +/** + * Parse Solr's variant on the Lucene {@link org.apache.lucene.queryparser.complexPhrase.ComplexPhraseQueryParser} syntax. + *

+ * Modified from {@link org.apache.solr.search.LuceneQParserPlugin} and {@link org.apache.solr.search.SurroundQParserPlugin} + */ +public class ComplexPhraseQParserPlugin extends QParserPlugin { + + public static final String NAME = "complexphrase"; + + private boolean inOrder = true; + + @Override + public void init(NamedList args) { + if (args != null) { + Object val = args.get("inOrder"); + if (val != null) { + inOrder = StrUtils.parseBool(val.toString()); + } + } + } + + @Override + public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { + ComplexPhraseQParser qParser = new ComplexPhraseQParser(qstr, localParams, params, req); + qParser.setInOrder(inOrder); + return qParser; + } +} + +/** + * Modified from {@link org.apache.solr.search.LuceneQParser} and {@link org.apache.solr.search.SurroundQParser} + */ +class ComplexPhraseQParser extends QParser { + + ComplexPhraseQueryParser lparser; + + boolean inOrder = true; + + /** + * When inOrder is true, the search terms must + * exists in the documents as the same order as in query. + * + * @param inOrder parameter to choose between ordered or un-ordered proximity search + */ + public void setInOrder(final boolean inOrder) { + this.inOrder = inOrder; + } + + public ComplexPhraseQParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) { + super(qstr, localParams, params, req); + } + + @Override + public Query parse() throws SyntaxError { + String qstr = getString(); + + String defaultField = getParam(CommonParams.DF); + if (defaultField == null) { + defaultField = getReq().getSchema().getDefaultSearchFieldName(); + } + + lparser = new ComplexPhraseQueryParser(getReq().getCore().getSolrConfig().luceneMatchVersion, defaultField, getReq().getSchema().getQueryAnalyzer()); + + if (localParams != null) + inOrder = localParams.getBool("inOrder", inOrder); + + lparser.setInOrder(inOrder); + + QueryParser.Operator defaultOperator = QueryParsing.getQueryParserDefaultOperator(getReq().getSchema(), getParam(QueryParsing.OP)); + + if (QueryParser.Operator.AND.equals(defaultOperator)) + lparser.setDefaultOperator(org.apache.lucene.queryparser.classic.QueryParser.Operator.AND); + else + lparser.setDefaultOperator(org.apache.lucene.queryparser.classic.QueryParser.Operator.OR); + + try { + return lparser.parse(qstr); + } catch (ParseException pe) { + throw new SyntaxError(pe); + } + } + + @Override + public String[] getDefaultHighlightFields() { + return lparser == null ? new String[]{} : new String[]{lparser.getField()}; + } +} + diff --git a/solr/core/src/java/org/apache/solr/search/QParserPlugin.java b/solr/core/src/java/org/apache/solr/search/QParserPlugin.java index 3cb6f4dbfcd..41e64c030d2 100644 --- a/solr/core/src/java/org/apache/solr/search/QParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/QParserPlugin.java @@ -59,7 +59,8 @@ public abstract class QParserPlugin implements NamedListInitializedPlugin, SolrI BlockJoinParentQParserPlugin.NAME, BlockJoinParentQParserPlugin.class, BlockJoinChildQParserPlugin.NAME, BlockJoinChildQParserPlugin.class, CollapsingQParserPlugin.NAME, CollapsingQParserPlugin.class, - SimpleQParserPlugin.NAME, SimpleQParserPlugin.class + SimpleQParserPlugin.NAME, SimpleQParserPlugin.class, + ComplexPhraseQParserPlugin.NAME, ComplexPhraseQParserPlugin.class }; /** return a {@link QParser} */ diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-complex-phrase.xml b/solr/core/src/test-files/solr/collection1/conf/schema-complex-phrase.xml new file mode 100644 index 00000000000..e2c02ddd875 --- /dev/null +++ b/solr/core/src/test-files/solr/collection1/conf/schema-complex-phrase.xml @@ -0,0 +1,40 @@ + + + + + + + + + + + + + + + + + + + + text + + id + + + diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-query-parser-init.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-query-parser-init.xml index e98de695c3b..89ba3ca3bd1 100644 --- a/solr/core/src/test-files/solr/collection1/conf/solrconfig-query-parser-init.xml +++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-query-parser-init.xml @@ -28,4 +28,9 @@ + + + false + + diff --git a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java index a8d42317c52..ddb7aaf5089 100644 --- a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java +++ b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java @@ -344,6 +344,13 @@ public class QueryEqualityTest extends SolrTestCaseJ4 { "and(apache,solr)", "apache AND solr"); } + public void testQueryComplexPhrase() throws Exception { + assertQueryEquals("complexphrase", "{!complexphrase df=text}\"jo* smith\"", + "text:\"jo* smith\""); + assertQueryEquals("complexphrase", "{!complexphrase df=title}\"jo* smith\"", + "title:\"jo* smith\""); + } + public void testFuncTestfunc() throws Exception { assertFuncEquals("testfunc(foo_i)","testfunc(field(foo_i))"); assertFuncEquals("testfunc(23)"); diff --git a/solr/core/src/test/org/apache/solr/search/TestComplexPhraseQParserPlugin.java b/solr/core/src/test/org/apache/solr/search/TestComplexPhraseQParserPlugin.java new file mode 100644 index 00000000000..2b0066b92be --- /dev/null +++ b/solr/core/src/test/org/apache/solr/search/TestComplexPhraseQParserPlugin.java @@ -0,0 +1,383 @@ +package org.apache.solr.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.HighlightParams; +import org.apache.solr.util.AbstractSolrTestCase; +import org.apache.solr.util.TestHarness; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.util.HashMap; + +public class TestComplexPhraseQParserPlugin extends AbstractSolrTestCase { + + @BeforeClass + public static void beforeClass() throws Exception { + initCore("solrconfig-query-parser-init.xml","schema-complex-phrase.xml"); + } + + @Override + public void setUp() throws Exception { + super.setUp(); + } + + @Override + public void tearDown() throws Exception { + super.tearDown(); + } + + @Test + public void testDefaultField() { + + assertU(adoc("text", "john smith", "id", "1")); + assertU(adoc("text", "johathon smith", "id", "2")); + assertU(adoc("text", "john percival smith", "id", "3")); + assertU(commit()); + assertU(optimize()); + + assertQ(req("q", "{!complexphrase} \"john smith\"") + , "//result[@numFound='1']" + , "//doc[./int[@name='id']='1']" + ); + + assertQ(req("q", "{!complexphrase} \"j* smyth~\"") + , "//result[@numFound='2']" + , "//doc[./int[@name='id']='1']" + , "//doc[./int[@name='id']='2']" + ); + + assertQ(req("q", "{!complexphrase} \"(jo* -john) smith\"") + , "//result[@numFound='1']" + , "//doc[./int[@name='id']='2']" + ); + + assertQ(req("q", "{!complexphrase} \"jo* smith\"~2") + , "//result[@numFound='3']" + , "//doc[./int[@name='id']='1']" + , "//doc[./int[@name='id']='2']" + , "//doc[./int[@name='id']='3']" + ); + + assertQ(req("q", "{!complexphrase} \"jo* [sma TO smz]\"") + , "//result[@numFound='2']" + , "//doc[./int[@name='id']='1']" + , "//doc[./int[@name='id']='2']" + ); + + assertQ(req("q", "{!complexphrase} \"john\"") + , "//result[@numFound='2']" + , "//doc[./int[@name='id']='1']" + , "//doc[./int[@name='id']='3']" + ); + + assertQ(req("q", "{!complexphrase} \"(john johathon) smith\"") + , "//result[@numFound='2']" + , "//doc[./int[@name='id']='1']" + , "//doc[./int[@name='id']='2']" + ); + + } + + @Test + public void test() { + HashMap args = new HashMap(); + + args.put(QueryParsing.DEFTYPE, ComplexPhraseQParserPlugin.NAME); + args.put(CommonParams.FL, "id"); + + TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory( + "standard", 0, 200, args); + + assertU(adoc("name", "john smith", "id", "1")); + assertU(adoc("name", "johathon smith", "id", "2")); + assertU(adoc("name", "john percival smith", "id", "3")); + assertU(commit()); + assertU(optimize()); + + assertQ("Simple multi-term still works", + sumLRF.makeRequest("name:\"john smith\""), + "//doc[./int[@name='id']='1']", + "//result[@numFound='1']" + ); + + assertQ(req("q", "{!complexphrase} name:\"john smith\""), + "//doc[./int[@name='id']='1']", + "//result[@numFound='1']" + ); + + + assertQ("wildcards and fuzzies are OK in phrases", + sumLRF.makeRequest("name:\"j* smyth~\""), + "//doc[./int[@name='id']='1']", + "//doc[./int[@name='id']='2']", + "//result[@numFound='2']" + ); + + assertQ("boolean logic works", + sumLRF.makeRequest("name:\"(jo* -john) smith\""), + "//doc[./int[@name='id']='2']", + "//result[@numFound='1']" + ); + + assertQ("position logic works", + sumLRF.makeRequest("name:\"jo* smith\"~2"), + "//doc[./int[@name='id']='1']", + "//doc[./int[@name='id']='2']", + "//doc[./int[@name='id']='3']", + "//result[@numFound='3']" + ); + + assertQ("range queries supported", + sumLRF.makeRequest("name:\"jo* [sma TO smz]\""), + "//doc[./int[@name='id']='1']", + "//doc[./int[@name='id']='2']", + "//result[@numFound='2']" + ); + + assertQ("Simple single-term still works", + sumLRF.makeRequest("name:\"john\""), + "//doc[./int[@name='id']='1']", + "//doc[./int[@name='id']='3']", + "//result[@numFound='2']" + ); + + assertQ("OR inside phrase works", + sumLRF.makeRequest("name:\"(john johathon) smith\""), + "//doc[./int[@name='id']='1']", + "//doc[./int[@name='id']='2']", + "//result[@numFound='2']" + ); + + } + + @Test + public void testPhraseHighlighter() { + HashMap args = new HashMap(); + + args.put(QueryParsing.DEFTYPE, ComplexPhraseQParserPlugin.NAME); + args.put(CommonParams.FL, "id"); + args.put(HighlightParams.HIGHLIGHT, Boolean.TRUE.toString()); + args.put(HighlightParams.USE_PHRASE_HIGHLIGHTER, Boolean.TRUE.toString()); + args.put(HighlightParams.FIELD_MATCH, Boolean.FALSE.toString()); + + args.put(HighlightParams.FRAGSIZE, String.valueOf(0)); + args.put(HighlightParams.FIELDS, "name"); + + + TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory( + "standard", 0, 200, args); + + assertU(adoc("name", "john smith smith john", "id", "1")); + assertU(adoc("name", "johathon smith smith johathon", "id", "2")); + assertU(adoc("name", "john percival smith", "id", "3")); + assertU(commit()); + assertU(optimize()); + + assertQ("range queries supported", + sumLRF.makeRequest("name:[sma TO smz]"), + "//doc[./int[@name='id']='1']", + "//doc[./int[@name='id']='2']", + "//doc[./int[@name='id']='3']", + "//result[@numFound='3']" + ); + + + sumLRF = h.getRequestFactory("standard", 0, 200, args); + assertQ("PhraseHighlighter=true Test", + sumLRF.makeRequest("name:\"(john johathon) smith\""), + "//lst[@name='highlighting']/lst[@name='1']", + "//lst[@name='1']/arr[@name='name']/str[.='john smith smith john']", + "//lst[@name='highlighting']/lst[@name='2']", + "//lst[@name='2']/arr[@name='name']/str[.='johathon smith smith johathon']" + ); + + + args.put(HighlightParams.USE_PHRASE_HIGHLIGHTER, Boolean.FALSE.toString()); + sumLRF = h.getRequestFactory("standard", 0, 200, args); + assertQ("PhraseHighlighter=false Test", + sumLRF.makeRequest("name:\"(john johathon) smith\""), + "//lst[@name='highlighting']/lst[@name='1']", + "//lst[@name='1']/arr[@name='name']/str[.='john smith smith john']", + "//lst[@name='highlighting']/lst[@name='2']", + "//lst[@name='2']/arr[@name='name']/str[.='johathon smith smith johathon']" + ); + + /* + assertQ("Highlight Plain Prefix Query Test", + sumLRF.makeRequest("name:jo*"), + "//lst[@name='highlighting']/lst[@name='1']", + "//lst[@name='1']/arr[@name='name']/str[.='john smith smith john']", + "//lst[@name='highlighting']/lst[@name='2']", + "//lst[@name='2']/arr[@name='name']/str[.='johathon smith smith johathon']", + "//lst[@name='highlighting']/lst[@name='3']", + "//lst[@name='3']/arr[@name='name']/str[.='john percival smith']" + ); + */ + } + + @Test + public void testMultipleFields() { + + assertU(adoc("text", "protein digest", "name", "dna rules", "id", "1")); + assertU(adoc("text", "digest protein", "name", "rna is the workhorse", "id", "2")); + + assertU(adoc("text", "dna rules", "name", "protein digest", "id", "3")); + assertU(adoc("text", "dna really rules", "name", "digest protein", "id", "4")); + + assertU(commit()); + assertU(optimize()); + + assertQ(req("q", "{!complexphrase} name:\"protein digest\" AND text:\"dna rules\"") + , "//result[@numFound='1']" + , "//doc[./int[@name='id']='3']" + ); + + assertQ(req("q", "{!complexphrase} name:\"prot* dige*\" AND text:\"d* r*\"") + , "//result[@numFound='1']" + , "//doc[./int[@name='id']='3']" + ); + + assertQ(req("q", "{!complexphrase inOrder=\"false\"} name:\"dna* rule*\" AND text:\"prot* diges*\"") + , "//result[@numFound='1']" + , "//doc[./int[@name='id']='1']" + ); + + assertQ(req("q", "{!unorderedcomplexphrase} name:\"protein digest\" AND text:\"dna rules\"~2") + , "//result[@numFound='2']" + , "//doc[./int[@name='id']='3']" + , "//doc[./int[@name='id']='4']" + ); + + + assertQ(req("q", "{!unorderedcomplexphrase inOrder=\"true\"} name:\"protein digest\" AND text:\"dna rules\"") + , "//result[@numFound='1']" + , "//doc[./int[@name='id']='3']" + ); + + } + + @Test + public void testUnorderedPhraseQuery() { + + assertU(adoc("text", "protein digest", "id", "1")); + assertU(adoc("text", "digest protein", "id", "2")); + + assertU(adoc("name", "protein digest", "id", "3")); + assertU(adoc("name", "digest protein", "id", "4")); + + assertU(commit()); + assertU(optimize()); + + /** + * ordered phrase query return only fist document + */ + assertQ(req("q", "{!complexphrase} \"protein digest\"") + , "//result[@numFound='1']" + , "//doc[./int[@name='id']='1']" + ); + + assertQ(req("q", "{!complexphrase} \"pro* di*\"") + , "//result[@numFound='1']" + , "//doc[./int[@name='id']='1']" + ); + + assertQ(req("q", "{!complexphrase} name:\"protein digest\"") + , "//result[@numFound='1']" + , "//doc[./int[@name='id']='3']" + ); + + assertQ(req("q", "{!complexphrase} name:\"pro* di*\"") + , "//result[@numFound='1']" + , "//doc[./int[@name='id']='3']" + ); + + /** + * unordered phrase query returns two documents. + */ + assertQ(req("q", "{!unorderedcomplexphrase} \"digest protein\"") + , "//result[@numFound='2']" + , "//doc[./int[@name='id']='1']" + , "//doc[./int[@name='id']='2']" + ); + + assertQ(req("q", "{!unorderedcomplexphrase} \"di* pro*\"") + , "//result[@numFound='2']" + , "//doc[./int[@name='id']='1']" + , "//doc[./int[@name='id']='2']" + ); + + assertQ(req("q", "{!unorderedcomplexphrase} name:\"digest protein\"") + , "//result[@numFound='2']" + , "//doc[./int[@name='id']='3']" + , "//doc[./int[@name='id']='4']" + ); + + assertQ(req("q", "{!unorderedcomplexphrase} name:\"di* pro*\"") + , "//result[@numFound='2']" + , "//doc[./int[@name='id']='3']" + , "//doc[./int[@name='id']='4']" + ); + + /** + * inOrder parameter can be defined with local params syntax. + */ + assertQ(req("q", "{!complexphrase inOrder=false} \"di* pro*\"") + , "//result[@numFound='2']" + , "//doc[./int[@name='id']='1']" + , "//doc[./int[@name='id']='2']" + ); + + /** + * inOrder and df parameters can be defined with local params syntax. + */ + assertQ(req("q", "{!complexphrase inOrder=false df=name} \"di* pro*\"") + , "//result[@numFound='2']" + , "//doc[./int[@name='id']='3']" + , "//doc[./int[@name='id']='4']" + ); + } + /** + * the query "sulfur-reducing bacteria" was crashing due to the dash inside the phrase. + */ + @Test public void testHyphenInPhrase() { + + assertU(adoc("text", "sulfur-reducing bacteria", "id", "1")); + assertU(adoc("text", "sulfur reducing bacteria", "id", "2")); + + assertU(adoc("name", "sulfur-reducing bacteria", "id", "3")); + assertU(adoc("name", "sulfur reducing bacteria", "id", "4")); + + assertU(commit()); + assertU(optimize()); + + assertQ(req("q", "{!complexphrase} \"sulfur-reducing bacteria\"") + , "//result[@numFound='2']" + , "//doc[./int[@name='id']='1']" + , "//doc[./int[@name='id']='2']" + ); + + assertQ(req("q", "{!complexphrase} name:\"sulfur-reducing bacteria\"") + , "//result[@numFound='2']" + , "//doc[./int[@name='id']='3']" + , "//doc[./int[@name='id']='4']" + ); + } +} +