diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index acdce1d7aa4..0c18696409a 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -128,6 +128,8 @@ New Features
* SOLR-3177: Enable tagging and excluding filters in StatsComponent via the
localParams syntax. (Mathias H., Nikolai Luthman, Vitaliy Zhovtyuk, shalin)
+
+* SOLR-1604: Wildcards, ORs etc inside Phrase Queries. (Ahmet Arslan via Erick Erickson)
Bug Fixes
----------------------
diff --git a/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java
new file mode 100644
index 00000000000..0cf9bc6b771
--- /dev/null
+++ b/solr/core/src/java/org/apache/solr/search/ComplexPhraseQParserPlugin.java
@@ -0,0 +1,117 @@
+package org.apache.solr.search;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.queryparser.classic.ParseException;
+import org.apache.lucene.queryparser.complexPhrase.ComplexPhraseQueryParser;
+import org.apache.lucene.search.Query;
+import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.StrUtils;
+import org.apache.solr.parser.QueryParser;
+import org.apache.solr.request.SolrQueryRequest;
+
+/**
+ * Parse Solr's variant on the Lucene {@link org.apache.lucene.queryparser.complexPhrase.ComplexPhraseQueryParser} syntax.
+ *
+ * Modified from {@link org.apache.solr.search.LuceneQParserPlugin} and {@link org.apache.solr.search.SurroundQParserPlugin}
+ */
+public class ComplexPhraseQParserPlugin extends QParserPlugin {
+
+ public static final String NAME = "complexphrase";
+
+ private boolean inOrder = true;
+
+ @Override
+ public void init(NamedList args) {
+ if (args != null) {
+ Object val = args.get("inOrder");
+ if (val != null) {
+ inOrder = StrUtils.parseBool(val.toString());
+ }
+ }
+ }
+
+ @Override
+ public QParser createParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
+ ComplexPhraseQParser qParser = new ComplexPhraseQParser(qstr, localParams, params, req);
+ qParser.setInOrder(inOrder);
+ return qParser;
+ }
+}
+
+/**
+ * Modified from {@link org.apache.solr.search.LuceneQParser} and {@link org.apache.solr.search.SurroundQParser}
+ */
+class ComplexPhraseQParser extends QParser {
+
+ ComplexPhraseQueryParser lparser;
+
+ boolean inOrder = true;
+
+ /**
+ * When inOrder
is true, the search terms must
+ * exists in the documents as the same order as in query.
+ *
+ * @param inOrder parameter to choose between ordered or un-ordered proximity search
+ */
+ public void setInOrder(final boolean inOrder) {
+ this.inOrder = inOrder;
+ }
+
+ public ComplexPhraseQParser(String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
+ super(qstr, localParams, params, req);
+ }
+
+ @Override
+ public Query parse() throws SyntaxError {
+ String qstr = getString();
+
+ String defaultField = getParam(CommonParams.DF);
+ if (defaultField == null) {
+ defaultField = getReq().getSchema().getDefaultSearchFieldName();
+ }
+
+ lparser = new ComplexPhraseQueryParser(getReq().getCore().getSolrConfig().luceneMatchVersion, defaultField, getReq().getSchema().getQueryAnalyzer());
+
+ if (localParams != null)
+ inOrder = localParams.getBool("inOrder", inOrder);
+
+ lparser.setInOrder(inOrder);
+
+ QueryParser.Operator defaultOperator = QueryParsing.getQueryParserDefaultOperator(getReq().getSchema(), getParam(QueryParsing.OP));
+
+ if (QueryParser.Operator.AND.equals(defaultOperator))
+ lparser.setDefaultOperator(org.apache.lucene.queryparser.classic.QueryParser.Operator.AND);
+ else
+ lparser.setDefaultOperator(org.apache.lucene.queryparser.classic.QueryParser.Operator.OR);
+
+ try {
+ return lparser.parse(qstr);
+ } catch (ParseException pe) {
+ throw new SyntaxError(pe);
+ }
+ }
+
+ @Override
+ public String[] getDefaultHighlightFields() {
+ return lparser == null ? new String[]{} : new String[]{lparser.getField()};
+ }
+}
+
diff --git a/solr/core/src/java/org/apache/solr/search/QParserPlugin.java b/solr/core/src/java/org/apache/solr/search/QParserPlugin.java
index 3cb6f4dbfcd..41e64c030d2 100644
--- a/solr/core/src/java/org/apache/solr/search/QParserPlugin.java
+++ b/solr/core/src/java/org/apache/solr/search/QParserPlugin.java
@@ -59,7 +59,8 @@ public abstract class QParserPlugin implements NamedListInitializedPlugin, SolrI
BlockJoinParentQParserPlugin.NAME, BlockJoinParentQParserPlugin.class,
BlockJoinChildQParserPlugin.NAME, BlockJoinChildQParserPlugin.class,
CollapsingQParserPlugin.NAME, CollapsingQParserPlugin.class,
- SimpleQParserPlugin.NAME, SimpleQParserPlugin.class
+ SimpleQParserPlugin.NAME, SimpleQParserPlugin.class,
+ ComplexPhraseQParserPlugin.NAME, ComplexPhraseQParserPlugin.class
};
/** return a {@link QParser} */
diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-complex-phrase.xml b/solr/core/src/test-files/solr/collection1/conf/schema-complex-phrase.xml
new file mode 100644
index 00000000000..e2c02ddd875
--- /dev/null
+++ b/solr/core/src/test-files/solr/collection1/conf/schema-complex-phrase.xml
@@ -0,0 +1,40 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ text
+
+ id
+
+
+
diff --git a/solr/core/src/test-files/solr/collection1/conf/solrconfig-query-parser-init.xml b/solr/core/src/test-files/solr/collection1/conf/solrconfig-query-parser-init.xml
index e98de695c3b..89ba3ca3bd1 100644
--- a/solr/core/src/test-files/solr/collection1/conf/solrconfig-query-parser-init.xml
+++ b/solr/core/src/test-files/solr/collection1/conf/solrconfig-query-parser-init.xml
@@ -28,4 +28,9 @@
+
+
+ false
+
+
diff --git a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
index a8d42317c52..ddb7aaf5089 100644
--- a/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
+++ b/solr/core/src/test/org/apache/solr/search/QueryEqualityTest.java
@@ -344,6 +344,13 @@ public class QueryEqualityTest extends SolrTestCaseJ4 {
"and(apache,solr)", "apache AND solr");
}
+ public void testQueryComplexPhrase() throws Exception {
+ assertQueryEquals("complexphrase", "{!complexphrase df=text}\"jo* smith\"",
+ "text:\"jo* smith\"");
+ assertQueryEquals("complexphrase", "{!complexphrase df=title}\"jo* smith\"",
+ "title:\"jo* smith\"");
+ }
+
public void testFuncTestfunc() throws Exception {
assertFuncEquals("testfunc(foo_i)","testfunc(field(foo_i))");
assertFuncEquals("testfunc(23)");
diff --git a/solr/core/src/test/org/apache/solr/search/TestComplexPhraseQParserPlugin.java b/solr/core/src/test/org/apache/solr/search/TestComplexPhraseQParserPlugin.java
new file mode 100644
index 00000000000..2b0066b92be
--- /dev/null
+++ b/solr/core/src/test/org/apache/solr/search/TestComplexPhraseQParserPlugin.java
@@ -0,0 +1,383 @@
+package org.apache.solr.search;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.HighlightParams;
+import org.apache.solr.util.AbstractSolrTestCase;
+import org.apache.solr.util.TestHarness;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.util.HashMap;
+
+public class TestComplexPhraseQParserPlugin extends AbstractSolrTestCase {
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ initCore("solrconfig-query-parser-init.xml","schema-complex-phrase.xml");
+ }
+
+ @Override
+ public void setUp() throws Exception {
+ super.setUp();
+ }
+
+ @Override
+ public void tearDown() throws Exception {
+ super.tearDown();
+ }
+
+ @Test
+ public void testDefaultField() {
+
+ assertU(adoc("text", "john smith", "id", "1"));
+ assertU(adoc("text", "johathon smith", "id", "2"));
+ assertU(adoc("text", "john percival smith", "id", "3"));
+ assertU(commit());
+ assertU(optimize());
+
+ assertQ(req("q", "{!complexphrase} \"john smith\"")
+ , "//result[@numFound='1']"
+ , "//doc[./int[@name='id']='1']"
+ );
+
+ assertQ(req("q", "{!complexphrase} \"j* smyth~\"")
+ , "//result[@numFound='2']"
+ , "//doc[./int[@name='id']='1']"
+ , "//doc[./int[@name='id']='2']"
+ );
+
+ assertQ(req("q", "{!complexphrase} \"(jo* -john) smith\"")
+ , "//result[@numFound='1']"
+ , "//doc[./int[@name='id']='2']"
+ );
+
+ assertQ(req("q", "{!complexphrase} \"jo* smith\"~2")
+ , "//result[@numFound='3']"
+ , "//doc[./int[@name='id']='1']"
+ , "//doc[./int[@name='id']='2']"
+ , "//doc[./int[@name='id']='3']"
+ );
+
+ assertQ(req("q", "{!complexphrase} \"jo* [sma TO smz]\"")
+ , "//result[@numFound='2']"
+ , "//doc[./int[@name='id']='1']"
+ , "//doc[./int[@name='id']='2']"
+ );
+
+ assertQ(req("q", "{!complexphrase} \"john\"")
+ , "//result[@numFound='2']"
+ , "//doc[./int[@name='id']='1']"
+ , "//doc[./int[@name='id']='3']"
+ );
+
+ assertQ(req("q", "{!complexphrase} \"(john johathon) smith\"")
+ , "//result[@numFound='2']"
+ , "//doc[./int[@name='id']='1']"
+ , "//doc[./int[@name='id']='2']"
+ );
+
+ }
+
+ @Test
+ public void test() {
+ HashMap args = new HashMap();
+
+ args.put(QueryParsing.DEFTYPE, ComplexPhraseQParserPlugin.NAME);
+ args.put(CommonParams.FL, "id");
+
+ TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
+ "standard", 0, 200, args);
+
+ assertU(adoc("name", "john smith", "id", "1"));
+ assertU(adoc("name", "johathon smith", "id", "2"));
+ assertU(adoc("name", "john percival smith", "id", "3"));
+ assertU(commit());
+ assertU(optimize());
+
+ assertQ("Simple multi-term still works",
+ sumLRF.makeRequest("name:\"john smith\""),
+ "//doc[./int[@name='id']='1']",
+ "//result[@numFound='1']"
+ );
+
+ assertQ(req("q", "{!complexphrase} name:\"john smith\""),
+ "//doc[./int[@name='id']='1']",
+ "//result[@numFound='1']"
+ );
+
+
+ assertQ("wildcards and fuzzies are OK in phrases",
+ sumLRF.makeRequest("name:\"j* smyth~\""),
+ "//doc[./int[@name='id']='1']",
+ "//doc[./int[@name='id']='2']",
+ "//result[@numFound='2']"
+ );
+
+ assertQ("boolean logic works",
+ sumLRF.makeRequest("name:\"(jo* -john) smith\""),
+ "//doc[./int[@name='id']='2']",
+ "//result[@numFound='1']"
+ );
+
+ assertQ("position logic works",
+ sumLRF.makeRequest("name:\"jo* smith\"~2"),
+ "//doc[./int[@name='id']='1']",
+ "//doc[./int[@name='id']='2']",
+ "//doc[./int[@name='id']='3']",
+ "//result[@numFound='3']"
+ );
+
+ assertQ("range queries supported",
+ sumLRF.makeRequest("name:\"jo* [sma TO smz]\""),
+ "//doc[./int[@name='id']='1']",
+ "//doc[./int[@name='id']='2']",
+ "//result[@numFound='2']"
+ );
+
+ assertQ("Simple single-term still works",
+ sumLRF.makeRequest("name:\"john\""),
+ "//doc[./int[@name='id']='1']",
+ "//doc[./int[@name='id']='3']",
+ "//result[@numFound='2']"
+ );
+
+ assertQ("OR inside phrase works",
+ sumLRF.makeRequest("name:\"(john johathon) smith\""),
+ "//doc[./int[@name='id']='1']",
+ "//doc[./int[@name='id']='2']",
+ "//result[@numFound='2']"
+ );
+
+ }
+
+ @Test
+ public void testPhraseHighlighter() {
+ HashMap args = new HashMap();
+
+ args.put(QueryParsing.DEFTYPE, ComplexPhraseQParserPlugin.NAME);
+ args.put(CommonParams.FL, "id");
+ args.put(HighlightParams.HIGHLIGHT, Boolean.TRUE.toString());
+ args.put(HighlightParams.USE_PHRASE_HIGHLIGHTER, Boolean.TRUE.toString());
+ args.put(HighlightParams.FIELD_MATCH, Boolean.FALSE.toString());
+
+ args.put(HighlightParams.FRAGSIZE, String.valueOf(0));
+ args.put(HighlightParams.FIELDS, "name");
+
+
+ TestHarness.LocalRequestFactory sumLRF = h.getRequestFactory(
+ "standard", 0, 200, args);
+
+ assertU(adoc("name", "john smith smith john", "id", "1"));
+ assertU(adoc("name", "johathon smith smith johathon", "id", "2"));
+ assertU(adoc("name", "john percival smith", "id", "3"));
+ assertU(commit());
+ assertU(optimize());
+
+ assertQ("range queries supported",
+ sumLRF.makeRequest("name:[sma TO smz]"),
+ "//doc[./int[@name='id']='1']",
+ "//doc[./int[@name='id']='2']",
+ "//doc[./int[@name='id']='3']",
+ "//result[@numFound='3']"
+ );
+
+
+ sumLRF = h.getRequestFactory("standard", 0, 200, args);
+ assertQ("PhraseHighlighter=true Test",
+ sumLRF.makeRequest("name:\"(john johathon) smith\""),
+ "//lst[@name='highlighting']/lst[@name='1']",
+ "//lst[@name='1']/arr[@name='name']/str[.='john smith smith john']",
+ "//lst[@name='highlighting']/lst[@name='2']",
+ "//lst[@name='2']/arr[@name='name']/str[.='johathon smith smith johathon']"
+ );
+
+
+ args.put(HighlightParams.USE_PHRASE_HIGHLIGHTER, Boolean.FALSE.toString());
+ sumLRF = h.getRequestFactory("standard", 0, 200, args);
+ assertQ("PhraseHighlighter=false Test",
+ sumLRF.makeRequest("name:\"(john johathon) smith\""),
+ "//lst[@name='highlighting']/lst[@name='1']",
+ "//lst[@name='1']/arr[@name='name']/str[.='john smith smith john']",
+ "//lst[@name='highlighting']/lst[@name='2']",
+ "//lst[@name='2']/arr[@name='name']/str[.='johathon smith smith johathon']"
+ );
+
+ /*
+ assertQ("Highlight Plain Prefix Query Test",
+ sumLRF.makeRequest("name:jo*"),
+ "//lst[@name='highlighting']/lst[@name='1']",
+ "//lst[@name='1']/arr[@name='name']/str[.='john smith smith john']",
+ "//lst[@name='highlighting']/lst[@name='2']",
+ "//lst[@name='2']/arr[@name='name']/str[.='johathon smith smith johathon']",
+ "//lst[@name='highlighting']/lst[@name='3']",
+ "//lst[@name='3']/arr[@name='name']/str[.='john percival smith']"
+ );
+ */
+ }
+
+ @Test
+ public void testMultipleFields() {
+
+ assertU(adoc("text", "protein digest", "name", "dna rules", "id", "1"));
+ assertU(adoc("text", "digest protein", "name", "rna is the workhorse", "id", "2"));
+
+ assertU(adoc("text", "dna rules", "name", "protein digest", "id", "3"));
+ assertU(adoc("text", "dna really rules", "name", "digest protein", "id", "4"));
+
+ assertU(commit());
+ assertU(optimize());
+
+ assertQ(req("q", "{!complexphrase} name:\"protein digest\" AND text:\"dna rules\"")
+ , "//result[@numFound='1']"
+ , "//doc[./int[@name='id']='3']"
+ );
+
+ assertQ(req("q", "{!complexphrase} name:\"prot* dige*\" AND text:\"d* r*\"")
+ , "//result[@numFound='1']"
+ , "//doc[./int[@name='id']='3']"
+ );
+
+ assertQ(req("q", "{!complexphrase inOrder=\"false\"} name:\"dna* rule*\" AND text:\"prot* diges*\"")
+ , "//result[@numFound='1']"
+ , "//doc[./int[@name='id']='1']"
+ );
+
+ assertQ(req("q", "{!unorderedcomplexphrase} name:\"protein digest\" AND text:\"dna rules\"~2")
+ , "//result[@numFound='2']"
+ , "//doc[./int[@name='id']='3']"
+ , "//doc[./int[@name='id']='4']"
+ );
+
+
+ assertQ(req("q", "{!unorderedcomplexphrase inOrder=\"true\"} name:\"protein digest\" AND text:\"dna rules\"")
+ , "//result[@numFound='1']"
+ , "//doc[./int[@name='id']='3']"
+ );
+
+ }
+
+ @Test
+ public void testUnorderedPhraseQuery() {
+
+ assertU(adoc("text", "protein digest", "id", "1"));
+ assertU(adoc("text", "digest protein", "id", "2"));
+
+ assertU(adoc("name", "protein digest", "id", "3"));
+ assertU(adoc("name", "digest protein", "id", "4"));
+
+ assertU(commit());
+ assertU(optimize());
+
+ /**
+ * ordered phrase query return only fist document
+ */
+ assertQ(req("q", "{!complexphrase} \"protein digest\"")
+ , "//result[@numFound='1']"
+ , "//doc[./int[@name='id']='1']"
+ );
+
+ assertQ(req("q", "{!complexphrase} \"pro* di*\"")
+ , "//result[@numFound='1']"
+ , "//doc[./int[@name='id']='1']"
+ );
+
+ assertQ(req("q", "{!complexphrase} name:\"protein digest\"")
+ , "//result[@numFound='1']"
+ , "//doc[./int[@name='id']='3']"
+ );
+
+ assertQ(req("q", "{!complexphrase} name:\"pro* di*\"")
+ , "//result[@numFound='1']"
+ , "//doc[./int[@name='id']='3']"
+ );
+
+ /**
+ * unordered phrase query returns two documents.
+ */
+ assertQ(req("q", "{!unorderedcomplexphrase} \"digest protein\"")
+ , "//result[@numFound='2']"
+ , "//doc[./int[@name='id']='1']"
+ , "//doc[./int[@name='id']='2']"
+ );
+
+ assertQ(req("q", "{!unorderedcomplexphrase} \"di* pro*\"")
+ , "//result[@numFound='2']"
+ , "//doc[./int[@name='id']='1']"
+ , "//doc[./int[@name='id']='2']"
+ );
+
+ assertQ(req("q", "{!unorderedcomplexphrase} name:\"digest protein\"")
+ , "//result[@numFound='2']"
+ , "//doc[./int[@name='id']='3']"
+ , "//doc[./int[@name='id']='4']"
+ );
+
+ assertQ(req("q", "{!unorderedcomplexphrase} name:\"di* pro*\"")
+ , "//result[@numFound='2']"
+ , "//doc[./int[@name='id']='3']"
+ , "//doc[./int[@name='id']='4']"
+ );
+
+ /**
+ * inOrder parameter can be defined with local params syntax.
+ */
+ assertQ(req("q", "{!complexphrase inOrder=false} \"di* pro*\"")
+ , "//result[@numFound='2']"
+ , "//doc[./int[@name='id']='1']"
+ , "//doc[./int[@name='id']='2']"
+ );
+
+ /**
+ * inOrder and df parameters can be defined with local params syntax.
+ */
+ assertQ(req("q", "{!complexphrase inOrder=false df=name} \"di* pro*\"")
+ , "//result[@numFound='2']"
+ , "//doc[./int[@name='id']='3']"
+ , "//doc[./int[@name='id']='4']"
+ );
+ }
+ /**
+ * the query "sulfur-reducing bacteria" was crashing due to the dash inside the phrase.
+ */
+ @Test public void testHyphenInPhrase() {
+
+ assertU(adoc("text", "sulfur-reducing bacteria", "id", "1"));
+ assertU(adoc("text", "sulfur reducing bacteria", "id", "2"));
+
+ assertU(adoc("name", "sulfur-reducing bacteria", "id", "3"));
+ assertU(adoc("name", "sulfur reducing bacteria", "id", "4"));
+
+ assertU(commit());
+ assertU(optimize());
+
+ assertQ(req("q", "{!complexphrase} \"sulfur-reducing bacteria\"")
+ , "//result[@numFound='2']"
+ , "//doc[./int[@name='id']='1']"
+ , "//doc[./int[@name='id']='2']"
+ );
+
+ assertQ(req("q", "{!complexphrase} name:\"sulfur-reducing bacteria\"")
+ , "//result[@numFound='2']"
+ , "//doc[./int[@name='id']='3']"
+ , "//doc[./int[@name='id']='4']"
+ );
+ }
+}
+