mirror of https://github.com/apache/lucene.git
SOLR-1553: extended dismax parser
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@881546 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2886626aea
commit
1cfb5a2fe3
|
@ -34,9 +34,14 @@ Detailed Change List
|
||||||
New Features
|
New Features
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
1. SOLR-1302: Added several new distance based functions, including Great Circle (haversine), Manhattan and Euclidean.
|
* SOLR-1302: Added several new distance based functions, including Great Circle (haversine), Manhattan and Euclidean.
|
||||||
Also added geohash(), deg() and rad() convenience functions. See http://wiki.apache.org/solr/FunctionQuery. (gsingers)
|
Also added geohash(), deg() and rad() convenience functions. See http://wiki.apache.org/solr/FunctionQuery. (gsingers)
|
||||||
|
|
||||||
|
* SOLR-1553: New dismax parser implementation (accessible as "edismax")
|
||||||
|
that supports full lucene syntax, improved reserved char escaping,
|
||||||
|
fielded queries, improved proximity boosting, and improved stopword
|
||||||
|
handling. (yonik)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -32,6 +32,7 @@ public abstract class QParserPlugin implements NamedListInitializedPlugin {
|
||||||
PrefixQParserPlugin.NAME, PrefixQParserPlugin.class,
|
PrefixQParserPlugin.NAME, PrefixQParserPlugin.class,
|
||||||
BoostQParserPlugin.NAME, BoostQParserPlugin.class,
|
BoostQParserPlugin.NAME, BoostQParserPlugin.class,
|
||||||
DisMaxQParserPlugin.NAME, DisMaxQParserPlugin.class,
|
DisMaxQParserPlugin.NAME, DisMaxQParserPlugin.class,
|
||||||
|
ExtendedDismaxQParserPlugin.NAME, ExtendedDismaxQParserPlugin.class,
|
||||||
FieldQParserPlugin.NAME, FieldQParserPlugin.class,
|
FieldQParserPlugin.NAME, FieldQParserPlugin.class,
|
||||||
RawQParserPlugin.NAME, RawQParserPlugin.class,
|
RawQParserPlugin.NAME, RawQParserPlugin.class,
|
||||||
NestedQParserPlugin.NAME, NestedQParserPlugin.class,
|
NestedQParserPlugin.NAME, NestedQParserPlugin.class,
|
||||||
|
|
|
@ -0,0 +1,170 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.solr.search;
|
||||||
|
|
||||||
|
import org.apache.solr.util.AbstractSolrTestCase;
|
||||||
|
|
||||||
|
public class TestExtendedDismaxParser extends AbstractSolrTestCase {
|
||||||
|
public String getSchemaFile() { return "schema12.xml"; }
|
||||||
|
public String getSolrConfigFile() { return "solrconfig.xml"; }
|
||||||
|
// public String getCoreName() { return "collection1"; }
|
||||||
|
|
||||||
|
public void setUp() throws Exception {
|
||||||
|
// if you override setUp or tearDown, you better call
|
||||||
|
// the super classes version
|
||||||
|
super.setUp();
|
||||||
|
}
|
||||||
|
public void tearDown() throws Exception {
|
||||||
|
// if you override setUp or tearDown, you better call
|
||||||
|
// the super classes version
|
||||||
|
super.tearDown();
|
||||||
|
}
|
||||||
|
|
||||||
|
// test the edismax query parser based on the dismax parser
|
||||||
|
public void testFocusQueryParser() {
|
||||||
|
assertU(adoc("id", "42", "trait_ss", "Tool", "trait_ss", "Obnoxious",
|
||||||
|
"name", "Zapp Brannigan"));
|
||||||
|
assertU(adoc("id", "43" ,
|
||||||
|
"title", "Democratic Order op Planets"));
|
||||||
|
assertU(adoc("id", "44", "trait_ss", "Tool",
|
||||||
|
"name", "The Zapper"));
|
||||||
|
assertU(adoc("id", "45", "trait_ss", "Chauvinist",
|
||||||
|
"title", "25 star General"));
|
||||||
|
assertU(adoc("id", "46", "trait_ss", "Obnoxious",
|
||||||
|
"subject", "Defeated the pacifists op the Gandhi nebula"));
|
||||||
|
assertU(adoc("id", "47", "trait_ss", "Pig",
|
||||||
|
"text", "line up and fly directly at the enemy death cannons, clogging them with wreckage!"));
|
||||||
|
assertU(adoc("id", "48", "text_sw", "this has gigabyte potential", "foo_i","100"));
|
||||||
|
assertU(adoc("id", "49", "text_sw", "start the big apple end", "foo_i","-100"));
|
||||||
|
assertU(adoc("id", "50", "text_sw", "start new big city end"));
|
||||||
|
|
||||||
|
assertU(commit());
|
||||||
|
String allq = "id:[42 TO 50]";
|
||||||
|
String allr = "*[count(//doc)=9]";
|
||||||
|
String oner = "*[count(//doc)=1]";
|
||||||
|
String twor = "*[count(//doc)=2]";
|
||||||
|
String nor = "*[count(//doc)=0]";
|
||||||
|
|
||||||
|
|
||||||
|
assertQ("standard request handler returns all matches",
|
||||||
|
req(allq),
|
||||||
|
allr
|
||||||
|
);
|
||||||
|
|
||||||
|
assertQ("edismax query parser returns all matches",
|
||||||
|
req("q", allq,
|
||||||
|
"defType", "edismax"
|
||||||
|
),
|
||||||
|
allr
|
||||||
|
);
|
||||||
|
|
||||||
|
assertQ(req("defType", "edismax", "qf", "trait_ss",
|
||||||
|
"q","Tool"), twor
|
||||||
|
);
|
||||||
|
|
||||||
|
// test that field types that aren't applicable don't cause an exception to be thrown
|
||||||
|
assertQ(req("defType", "edismax", "qf", "trait_ss foo_i foo_f foo_dt foo_l foo_d foo_b",
|
||||||
|
"q","Tool"), twor
|
||||||
|
);
|
||||||
|
|
||||||
|
// test that numeric field types can be queried
|
||||||
|
assertQ(req("defType", "edismax", "qf", "text_sw",
|
||||||
|
"q","foo_i:100"), oner
|
||||||
|
);
|
||||||
|
|
||||||
|
// test that numeric field types can be queried
|
||||||
|
assertQ(req("defType", "edismax", "qf", "text_sw",
|
||||||
|
"q","foo_i:-100"), oner
|
||||||
|
);
|
||||||
|
|
||||||
|
// test that numeric field types can be queried via qf
|
||||||
|
assertQ(req("defType", "edismax", "qf", "text_sw foo_i",
|
||||||
|
"q","100"), oner
|
||||||
|
);
|
||||||
|
|
||||||
|
assertQ(req("defType", "edismax", "qf", "name title subject text",
|
||||||
|
"q","op"), twor
|
||||||
|
);
|
||||||
|
assertQ(req("defType", "edismax", "qf", "name title subject text",
|
||||||
|
"q","Order op"), oner
|
||||||
|
);
|
||||||
|
assertQ(req("defType", "edismax", "qf", "name title subject text",
|
||||||
|
"q","Order AND op"), oner
|
||||||
|
);
|
||||||
|
assertQ(req("defType", "edismax", "qf", "name title subject text",
|
||||||
|
"q","Order and op"), oner
|
||||||
|
);
|
||||||
|
assertQ(req("defType", "edismax", "qf", "name title subject text",
|
||||||
|
"q","+Order op"), oner
|
||||||
|
);
|
||||||
|
assertQ(req("defType", "edismax", "qf", "name title subject text",
|
||||||
|
"q","Order OR op"), twor
|
||||||
|
);
|
||||||
|
assertQ(req("defType", "edismax", "qf", "name title subject text",
|
||||||
|
"q","Order or op"), twor
|
||||||
|
);
|
||||||
|
assertQ(req("defType", "edismax", "qf", "name title subject text",
|
||||||
|
"q","*:*"), allr
|
||||||
|
);
|
||||||
|
|
||||||
|
assertQ(req("defType", "edismax", "qf", "name title subject text",
|
||||||
|
"q","star OR (-star)"), allr
|
||||||
|
);
|
||||||
|
assertQ(req("defType", "edismax", "qf", "name title subject text",
|
||||||
|
"q","id:42 OR (-id:42)"), allr
|
||||||
|
);
|
||||||
|
|
||||||
|
// test that basic synonyms work
|
||||||
|
assertQ(req("defType", "edismax", "qf", "text_sw",
|
||||||
|
"q","GB"), oner
|
||||||
|
);
|
||||||
|
|
||||||
|
// test for stopword removal in main query part
|
||||||
|
assertQ(req("defType", "edismax", "qf", "text_sw",
|
||||||
|
"q","the big"), twor
|
||||||
|
);
|
||||||
|
|
||||||
|
// test for stopwords not removed
|
||||||
|
assertQ(req("defType", "edismax", "qf", "text_sw", "stopwords","false",
|
||||||
|
"q","the big"), oner
|
||||||
|
);
|
||||||
|
|
||||||
|
/** stopword removal in conjunction with multi-word synonyms at query time
|
||||||
|
* break this test.
|
||||||
|
// multi-word synonyms
|
||||||
|
// remove id:50 which contans the false match
|
||||||
|
assertQ(req("defType", "edismax", "qf", "text_t", "indent","true", "debugQuery","true",
|
||||||
|
"q","-id:50 nyc"), oner
|
||||||
|
);
|
||||||
|
**/
|
||||||
|
|
||||||
|
/*** these fail because multi-word synonyms are being used at query time
|
||||||
|
// this will incorrectly match "new big city"
|
||||||
|
assertQ(req("defType", "edismax", "qf", "id title",
|
||||||
|
"q","nyc"), oner
|
||||||
|
);
|
||||||
|
|
||||||
|
// this will incorrectly match "new big city"
|
||||||
|
assertQ(req("defType", "edismax", "qf", "title",
|
||||||
|
"q","the big apple"), nor
|
||||||
|
);
|
||||||
|
***/
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -339,7 +339,7 @@
|
||||||
<fieldtype name="syn" class="solr.TextField">
|
<fieldtype name="syn" class="solr.TextField">
|
||||||
<analyzer>
|
<analyzer>
|
||||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
<filter name="syn" class="solr.SynonymFilterFactory" synonyms="synonyms.txt"/>
|
<filter name="syn" class="solr.SynonymFilterFactory" synonyms="old_synonyms.txt"/>
|
||||||
</analyzer>
|
</analyzer>
|
||||||
</fieldtype>
|
</fieldtype>
|
||||||
|
|
||||||
|
@ -350,7 +350,7 @@
|
||||||
<analyzer>
|
<analyzer>
|
||||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
<filter class="solr.SynonymFilterFactory"
|
<filter class="solr.SynonymFilterFactory"
|
||||||
synonyms="synonyms.txt" expand="true" />
|
synonyms="old_synonyms.txt" expand="true" />
|
||||||
<filter class="solr.EnglishPorterFilterFactory"/>
|
<filter class="solr.EnglishPorterFilterFactory"/>
|
||||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
|
<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
|
||||||
</analyzer>
|
</analyzer>
|
||||||
|
|
|
@ -325,6 +325,32 @@
|
||||||
<filter name="syn" class="solr.SynonymFilterFactory" synonyms="synonyms.txt"/>
|
<filter name="syn" class="solr.SynonymFilterFactory" synonyms="synonyms.txt"/>
|
||||||
</analyzer>
|
</analyzer>
|
||||||
</fieldtype>
|
</fieldtype>
|
||||||
|
|
||||||
|
<!-- a text field with the stop filter only on the query analyzer
|
||||||
|
-->
|
||||||
|
<fieldType name="text_sw" class="solr.TextField" positionIncrementGap="100">
|
||||||
|
<analyzer type="index">
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
<!-- in this example, we will only use synonyms at query time
|
||||||
|
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
||||||
|
-->
|
||||||
|
<!--<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
|
||||||
|
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||||
|
catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
|
||||||
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
|
<filter class="solr.EnglishPorterFilterFactory"/>
|
||||||
|
</analyzer>
|
||||||
|
<analyzer type="query">
|
||||||
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
|
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||||
|
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||||
|
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||||
|
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
|
||||||
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
|
<filter class="solr.EnglishPorterFilterFactory"/>
|
||||||
|
</analyzer>
|
||||||
|
</fieldType>
|
||||||
|
|
||||||
|
|
||||||
<!-- Demonstrates How RemoveDuplicatesTokenFilter makes stemmed
|
<!-- Demonstrates How RemoveDuplicatesTokenFilter makes stemmed
|
||||||
synonyms "better"
|
synonyms "better"
|
||||||
|
@ -461,9 +487,11 @@
|
||||||
|
|
||||||
<dynamicField name="*_mfacet" type="string" indexed="true" stored="false" multiValued="true" />
|
<dynamicField name="*_mfacet" type="string" indexed="true" stored="false" multiValued="true" />
|
||||||
|
|
||||||
|
<dynamicField name="*_sw" type="text_sw" indexed="true" stored="true" multiValued="true"/>
|
||||||
|
|
||||||
<dynamicField name="*_i" type="int" indexed="true" stored="true"/>
|
<dynamicField name="*_i" type="int" indexed="true" stored="true"/>
|
||||||
<dynamicField name="*_s" type="string" indexed="true" stored="true" multiValued="true"/>
|
<dynamicField name="*_s" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||||
|
<dynamicField name="*_ss" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||||
<dynamicField name="*_l" type="long" indexed="true" stored="true"/>
|
<dynamicField name="*_l" type="long" indexed="true" stored="true"/>
|
||||||
<dynamicField name="*_t" type="text" indexed="true" stored="true"/>
|
<dynamicField name="*_t" type="text" indexed="true" stored="true"/>
|
||||||
<dynamicField name="*_tt" type="text" indexed="true" stored="true"/>
|
<dynamicField name="*_tt" type="text" indexed="true" stored="true"/>
|
||||||
|
|
|
@ -1,16 +1,58 @@
|
||||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
# contributor license agreements. See the NOTICE file distributed with
|
# contributor license agreements. See the NOTICE file distributed with
|
||||||
# this work for additional information regarding copyright ownership.
|
# this work for additional information regarding copyright ownership.
|
||||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
# (the "License"); you may not use this file except in compliance with
|
# (the "License"); you may not use this file except in compliance with
|
||||||
# the License. You may obtain a copy of the License at
|
# the License. You may obtain a copy of the License at
|
||||||
#
|
#
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
#
|
#
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
stopworda
|
|
||||||
stopwordb
|
#-----------------------------------------------------------------------
|
||||||
|
# a couple of test stopwords to test that the words are really being
|
||||||
|
# configured from this file:
|
||||||
|
stopworda
|
||||||
|
stopwordb
|
||||||
|
|
||||||
|
#Standard english stop words taken from Lucene's StopAnalyzer
|
||||||
|
a
|
||||||
|
an
|
||||||
|
and
|
||||||
|
are
|
||||||
|
as
|
||||||
|
at
|
||||||
|
be
|
||||||
|
but
|
||||||
|
by
|
||||||
|
for
|
||||||
|
if
|
||||||
|
in
|
||||||
|
into
|
||||||
|
is
|
||||||
|
it
|
||||||
|
no
|
||||||
|
not
|
||||||
|
of
|
||||||
|
on
|
||||||
|
or
|
||||||
|
s
|
||||||
|
such
|
||||||
|
t
|
||||||
|
that
|
||||||
|
the
|
||||||
|
their
|
||||||
|
then
|
||||||
|
there
|
||||||
|
these
|
||||||
|
they
|
||||||
|
this
|
||||||
|
to
|
||||||
|
was
|
||||||
|
will
|
||||||
|
with
|
||||||
|
|
||||||
|
|
|
@ -1,22 +1,31 @@
|
||||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
# contributor license agreements. See the NOTICE file distributed with
|
# (the "License"); you may not use this file except in compliance with
|
||||||
# this work for additional information regarding copyright ownership.
|
# the License. You may obtain a copy of the License at
|
||||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
#
|
||||||
# (the "License"); you may not use this file except in compliance with
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
# the License. You may obtain a copy of the License at
|
#
|
||||||
#
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
#
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
# See the License for the specific language governing permissions and
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
# limitations under the License.
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
#-----------------------------------------------------------------------
|
||||||
# limitations under the License.
|
#some test synonym mappings unlikely to appear in real input text
|
||||||
a => aa
|
aaa => aaaa
|
||||||
b => b1 b2
|
bbb => bbbb1 bbbb2
|
||||||
c => c1,c2
|
ccc => cccc1,cccc2
|
||||||
a\=>a => b\=>b
|
a\=>a => b\=>b
|
||||||
a\,a => b\,b
|
a\,a => b\,b
|
||||||
foo,bar,baz
|
fooaaa,baraaa,bazaaa
|
||||||
|
|
||||||
Television,TV,Televisions
|
# Some synonym groups specific to this example
|
||||||
|
GB,gib,gigabyte,gigabytes
|
||||||
|
MB,mib,megabyte,megabytes
|
||||||
|
Television, Televisions, TV, TVs
|
||||||
|
#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
|
||||||
|
#after us won't split it into two words.
|
||||||
|
|
||||||
|
# Synonym mappings can be used for spelling correction too
|
||||||
|
pixima => pixma
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue