mirror of https://github.com/apache/lucene.git
SOLR-1553: extended dismax parser
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@881546 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2886626aea
commit
1cfb5a2fe3
|
@ -34,9 +34,14 @@ Detailed Change List
|
|||
New Features
|
||||
----------------------
|
||||
|
||||
1. SOLR-1302: Added several new distance based functions, including Great Circle (haversine), Manhattan and Euclidean.
|
||||
* SOLR-1302: Added several new distance based functions, including Great Circle (haversine), Manhattan and Euclidean.
|
||||
Also added geohash(), deg() and rad() convenience functions. See http://wiki.apache.org/solr/FunctionQuery. (gsingers)
|
||||
|
||||
* SOLR-1553: New dismax parser implementation (accessible as "edismax")
|
||||
that supports full lucene syntax, improved reserved char escaping,
|
||||
fielded queries, improved proximity boosting, and improved stopword
|
||||
handling. (yonik)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -32,6 +32,7 @@ public abstract class QParserPlugin implements NamedListInitializedPlugin {
|
|||
PrefixQParserPlugin.NAME, PrefixQParserPlugin.class,
|
||||
BoostQParserPlugin.NAME, BoostQParserPlugin.class,
|
||||
DisMaxQParserPlugin.NAME, DisMaxQParserPlugin.class,
|
||||
ExtendedDismaxQParserPlugin.NAME, ExtendedDismaxQParserPlugin.class,
|
||||
FieldQParserPlugin.NAME, FieldQParserPlugin.class,
|
||||
RawQParserPlugin.NAME, RawQParserPlugin.class,
|
||||
NestedQParserPlugin.NAME, NestedQParserPlugin.class,
|
||||
|
|
|
@ -0,0 +1,170 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.search;
|
||||
|
||||
import org.apache.solr.util.AbstractSolrTestCase;
|
||||
|
||||
public class TestExtendedDismaxParser extends AbstractSolrTestCase {
|
||||
public String getSchemaFile() { return "schema12.xml"; }
|
||||
public String getSolrConfigFile() { return "solrconfig.xml"; }
|
||||
// public String getCoreName() { return "collection1"; }
|
||||
|
||||
public void setUp() throws Exception {
|
||||
// if you override setUp or tearDown, you better call
|
||||
// the super classes version
|
||||
super.setUp();
|
||||
}
|
||||
public void tearDown() throws Exception {
|
||||
// if you override setUp or tearDown, you better call
|
||||
// the super classes version
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
// test the edismax query parser based on the dismax parser
|
||||
public void testFocusQueryParser() {
|
||||
assertU(adoc("id", "42", "trait_ss", "Tool", "trait_ss", "Obnoxious",
|
||||
"name", "Zapp Brannigan"));
|
||||
assertU(adoc("id", "43" ,
|
||||
"title", "Democratic Order op Planets"));
|
||||
assertU(adoc("id", "44", "trait_ss", "Tool",
|
||||
"name", "The Zapper"));
|
||||
assertU(adoc("id", "45", "trait_ss", "Chauvinist",
|
||||
"title", "25 star General"));
|
||||
assertU(adoc("id", "46", "trait_ss", "Obnoxious",
|
||||
"subject", "Defeated the pacifists op the Gandhi nebula"));
|
||||
assertU(adoc("id", "47", "trait_ss", "Pig",
|
||||
"text", "line up and fly directly at the enemy death cannons, clogging them with wreckage!"));
|
||||
assertU(adoc("id", "48", "text_sw", "this has gigabyte potential", "foo_i","100"));
|
||||
assertU(adoc("id", "49", "text_sw", "start the big apple end", "foo_i","-100"));
|
||||
assertU(adoc("id", "50", "text_sw", "start new big city end"));
|
||||
|
||||
assertU(commit());
|
||||
String allq = "id:[42 TO 50]";
|
||||
String allr = "*[count(//doc)=9]";
|
||||
String oner = "*[count(//doc)=1]";
|
||||
String twor = "*[count(//doc)=2]";
|
||||
String nor = "*[count(//doc)=0]";
|
||||
|
||||
|
||||
assertQ("standard request handler returns all matches",
|
||||
req(allq),
|
||||
allr
|
||||
);
|
||||
|
||||
assertQ("edismax query parser returns all matches",
|
||||
req("q", allq,
|
||||
"defType", "edismax"
|
||||
),
|
||||
allr
|
||||
);
|
||||
|
||||
assertQ(req("defType", "edismax", "qf", "trait_ss",
|
||||
"q","Tool"), twor
|
||||
);
|
||||
|
||||
// test that field types that aren't applicable don't cause an exception to be thrown
|
||||
assertQ(req("defType", "edismax", "qf", "trait_ss foo_i foo_f foo_dt foo_l foo_d foo_b",
|
||||
"q","Tool"), twor
|
||||
);
|
||||
|
||||
// test that numeric field types can be queried
|
||||
assertQ(req("defType", "edismax", "qf", "text_sw",
|
||||
"q","foo_i:100"), oner
|
||||
);
|
||||
|
||||
// test that numeric field types can be queried
|
||||
assertQ(req("defType", "edismax", "qf", "text_sw",
|
||||
"q","foo_i:-100"), oner
|
||||
);
|
||||
|
||||
// test that numeric field types can be queried via qf
|
||||
assertQ(req("defType", "edismax", "qf", "text_sw foo_i",
|
||||
"q","100"), oner
|
||||
);
|
||||
|
||||
assertQ(req("defType", "edismax", "qf", "name title subject text",
|
||||
"q","op"), twor
|
||||
);
|
||||
assertQ(req("defType", "edismax", "qf", "name title subject text",
|
||||
"q","Order op"), oner
|
||||
);
|
||||
assertQ(req("defType", "edismax", "qf", "name title subject text",
|
||||
"q","Order AND op"), oner
|
||||
);
|
||||
assertQ(req("defType", "edismax", "qf", "name title subject text",
|
||||
"q","Order and op"), oner
|
||||
);
|
||||
assertQ(req("defType", "edismax", "qf", "name title subject text",
|
||||
"q","+Order op"), oner
|
||||
);
|
||||
assertQ(req("defType", "edismax", "qf", "name title subject text",
|
||||
"q","Order OR op"), twor
|
||||
);
|
||||
assertQ(req("defType", "edismax", "qf", "name title subject text",
|
||||
"q","Order or op"), twor
|
||||
);
|
||||
assertQ(req("defType", "edismax", "qf", "name title subject text",
|
||||
"q","*:*"), allr
|
||||
);
|
||||
|
||||
assertQ(req("defType", "edismax", "qf", "name title subject text",
|
||||
"q","star OR (-star)"), allr
|
||||
);
|
||||
assertQ(req("defType", "edismax", "qf", "name title subject text",
|
||||
"q","id:42 OR (-id:42)"), allr
|
||||
);
|
||||
|
||||
// test that basic synonyms work
|
||||
assertQ(req("defType", "edismax", "qf", "text_sw",
|
||||
"q","GB"), oner
|
||||
);
|
||||
|
||||
// test for stopword removal in main query part
|
||||
assertQ(req("defType", "edismax", "qf", "text_sw",
|
||||
"q","the big"), twor
|
||||
);
|
||||
|
||||
// test for stopwords not removed
|
||||
assertQ(req("defType", "edismax", "qf", "text_sw", "stopwords","false",
|
||||
"q","the big"), oner
|
||||
);
|
||||
|
||||
/** stopword removal in conjunction with multi-word synonyms at query time
|
||||
* break this test.
|
||||
// multi-word synonyms
|
||||
// remove id:50 which contans the false match
|
||||
assertQ(req("defType", "edismax", "qf", "text_t", "indent","true", "debugQuery","true",
|
||||
"q","-id:50 nyc"), oner
|
||||
);
|
||||
**/
|
||||
|
||||
/*** these fail because multi-word synonyms are being used at query time
|
||||
// this will incorrectly match "new big city"
|
||||
assertQ(req("defType", "edismax", "qf", "id title",
|
||||
"q","nyc"), oner
|
||||
);
|
||||
|
||||
// this will incorrectly match "new big city"
|
||||
assertQ(req("defType", "edismax", "qf", "title",
|
||||
"q","the big apple"), nor
|
||||
);
|
||||
***/
|
||||
|
||||
}
|
||||
|
||||
}
|
|
@ -339,7 +339,7 @@
|
|||
<fieldtype name="syn" class="solr.TextField">
|
||||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter name="syn" class="solr.SynonymFilterFactory" synonyms="synonyms.txt"/>
|
||||
<filter name="syn" class="solr.SynonymFilterFactory" synonyms="old_synonyms.txt"/>
|
||||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
|
@ -350,7 +350,7 @@
|
|||
<analyzer>
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory"
|
||||
synonyms="synonyms.txt" expand="true" />
|
||||
synonyms="old_synonyms.txt" expand="true" />
|
||||
<filter class="solr.EnglishPorterFilterFactory"/>
|
||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory" />
|
||||
</analyzer>
|
||||
|
|
|
@ -326,6 +326,32 @@
|
|||
</analyzer>
|
||||
</fieldtype>
|
||||
|
||||
<!-- a text field with the stop filter only on the query analyzer
|
||||
-->
|
||||
<fieldType name="text_sw" class="solr.TextField" positionIncrementGap="100">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<!-- in this example, we will only use synonyms at query time
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
||||
-->
|
||||
<!--<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>-->
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.EnglishPorterFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.EnglishPorterFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
|
||||
<!-- Demonstrates How RemoveDuplicatesTokenFilter makes stemmed
|
||||
synonyms "better"
|
||||
-->
|
||||
|
@ -461,9 +487,11 @@
|
|||
|
||||
<dynamicField name="*_mfacet" type="string" indexed="true" stored="false" multiValued="true" />
|
||||
|
||||
<dynamicField name="*_sw" type="text_sw" indexed="true" stored="true" multiValued="true"/>
|
||||
|
||||
<dynamicField name="*_i" type="int" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_s" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
<dynamicField name="*_ss" type="string" indexed="true" stored="true" multiValued="true"/>
|
||||
<dynamicField name="*_l" type="long" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_t" type="text" indexed="true" stored="true"/>
|
||||
<dynamicField name="*_tt" type="text" indexed="true" stored="true"/>
|
||||
|
|
|
@ -12,5 +12,47 @@
|
|||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
# a couple of test stopwords to test that the words are really being
|
||||
# configured from this file:
|
||||
stopworda
|
||||
stopwordb
|
||||
|
||||
#Standard english stop words taken from Lucene's StopAnalyzer
|
||||
a
|
||||
an
|
||||
and
|
||||
are
|
||||
as
|
||||
at
|
||||
be
|
||||
but
|
||||
by
|
||||
for
|
||||
if
|
||||
in
|
||||
into
|
||||
is
|
||||
it
|
||||
no
|
||||
not
|
||||
of
|
||||
on
|
||||
or
|
||||
s
|
||||
such
|
||||
t
|
||||
that
|
||||
the
|
||||
their
|
||||
then
|
||||
there
|
||||
these
|
||||
they
|
||||
this
|
||||
to
|
||||
was
|
||||
will
|
||||
with
|
||||
|
||||
|
|
|
@ -1,6 +1,3 @@
|
|||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
|
@ -12,11 +9,23 @@
|
|||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
a => aa
|
||||
b => b1 b2
|
||||
c => c1,c2
|
||||
|
||||
#-----------------------------------------------------------------------
|
||||
#some test synonym mappings unlikely to appear in real input text
|
||||
aaa => aaaa
|
||||
bbb => bbbb1 bbbb2
|
||||
ccc => cccc1,cccc2
|
||||
a\=>a => b\=>b
|
||||
a\,a => b\,b
|
||||
foo,bar,baz
|
||||
fooaaa,baraaa,bazaaa
|
||||
|
||||
# Some synonym groups specific to this example
|
||||
GB,gib,gigabyte,gigabytes
|
||||
MB,mib,megabyte,megabytes
|
||||
Television, Televisions, TV, TVs
|
||||
#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
|
||||
#after us won't split it into two words.
|
||||
|
||||
# Synonym mappings can be used for spelling correction too
|
||||
pixima => pixma
|
||||
|
||||
Television,TV,Televisions
|
||||
|
|
Loading…
Reference in New Issue