mirror of
https://github.com/apache/lucene.git
synced 2025-02-24 11:16:35 +00:00
SOLR-11662: synonymQueryStyle option for FieldType used by query parser
This commit is contained in:
parent
929ce7ca30
commit
83753d0a2a
@ -62,7 +62,7 @@ public class QueryBuilder {
|
||||
protected boolean enablePositionIncrements = true;
|
||||
protected boolean enableGraphQueries = true;
|
||||
protected boolean autoGenerateMultiTermSynonymsPhraseQuery = false;
|
||||
|
||||
|
||||
/** Creates a new QueryBuilder using the given analyzer. */
|
||||
public QueryBuilder(Analyzer analyzer) {
|
||||
this.analyzer = analyzer;
|
||||
|
@ -102,6 +102,9 @@ New Features
|
||||
* SOLR-11250: A new DefaultWrapperModel class for loading of large and/or externally stored
|
||||
LTRScoringModel definitions. (Yuki Yano, shalin, Christine Poerschke)
|
||||
|
||||
* SOLR-11662: New synonymQueryStyle option to configure whether SynonymQuery, a DisjunctionMaxQuery, or BooleanQuery
|
||||
occurs over query terms that overlap their position. (Doug Turnbull, David Smiley)
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
||||
|
@ -52,13 +52,14 @@ public class QueryParser extends SolrQueryParserBase implements QueryParserConst
|
||||
|
||||
@Override
|
||||
protected Query newFieldQuery(Analyzer analyzer, String field, String queryText,
|
||||
boolean quoted, boolean fieldAutoGenPhraseQueries, boolean fieldEnableGraphQueries)
|
||||
boolean quoted, boolean fieldAutoGenPhraseQueries, boolean fieldEnableGraphQueries,
|
||||
SynonymQueryStyle synonymQueryStyle)
|
||||
throws SyntaxError {
|
||||
setAutoGenerateMultiTermSynonymsPhraseQuery(fieldAutoGenPhraseQueries || getAutoGeneratePhraseQueries());
|
||||
// Don't auto-quote graph-aware field queries
|
||||
boolean treatAsQuoted = getSplitOnWhitespace()
|
||||
? (quoted || fieldAutoGenPhraseQueries || getAutoGeneratePhraseQueries()) : quoted;
|
||||
return super.newFieldQuery(analyzer, field, queryText, treatAsQuoted, false, fieldEnableGraphQueries);
|
||||
return super.newFieldQuery(analyzer, field, queryText, treatAsQuoted, false, fieldEnableGraphQueries, synonymQueryStyle);
|
||||
}
|
||||
|
||||
// * Query ::= ( Clause )*
|
||||
|
@ -34,6 +34,7 @@ import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.BoostQuery;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.DisjunctionMaxQuery;
|
||||
import org.apache.lucene.search.FuzzyQuery;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.MultiPhraseQuery;
|
||||
@ -61,6 +62,8 @@ import org.apache.solr.search.QueryUtils;
|
||||
import org.apache.solr.search.SolrConstantScoreQuery;
|
||||
import org.apache.solr.search.SyntaxError;
|
||||
|
||||
import static org.apache.solr.parser.SolrQueryParserBase.SynonymQueryStyle.*;
|
||||
|
||||
/** This class is overridden by QueryParser in QueryParser.jj
|
||||
* and acts to separate the majority of the Java code from the .jj grammar file.
|
||||
*/
|
||||
@ -78,6 +81,39 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
||||
static final int MOD_NOT = 10;
|
||||
static final int MOD_REQ = 11;
|
||||
|
||||
protected SynonymQueryStyle synonymQueryStyle = AS_SAME_TERM;
|
||||
|
||||
/**
|
||||
* Query strategy when analyzed query terms overlap the same position (ie synonyms)
|
||||
* consider if pants and khakis are query time synonyms
|
||||
*
|
||||
* {@link #AS_SAME_TERM}
|
||||
* {@link #PICK_BEST}
|
||||
* {@link #AS_DISTINCT_TERMS}
|
||||
*/
|
||||
public static enum SynonymQueryStyle {
|
||||
/** (default) synonym terms share doc freq
|
||||
* so if "pants" has df 500, and "khakis" a df of 50, uses 500 df when scoring both terms
|
||||
* appropriate for exact synonyms
|
||||
* see {@link org.apache.lucene.search.SynonymQuery}
|
||||
* */
|
||||
AS_SAME_TERM,
|
||||
|
||||
/** highest scoring term match chosen (ie dismax)
|
||||
* so if "pants" has df 500, and "khakis" a df of 50, khakis matches are scored higher
|
||||
* appropriate when more specific synonyms should score higher
|
||||
* */
|
||||
PICK_BEST,
|
||||
|
||||
/** each synonym scored indepedently, then added together (ie boolean query)
|
||||
* so if "pants" has df 500, and "khakis" a df of 50, khakis matches are scored higher but
|
||||
* summed with any "pants" matches
|
||||
* appropriate when more specific synonyms should score higher, but we don't want to ignore
|
||||
* less specific synonyms
|
||||
* */
|
||||
AS_DISTINCT_TERMS
|
||||
}
|
||||
|
||||
// make it possible to call setDefaultOperator() without accessing
|
||||
// the nested class:
|
||||
/** Alternative form of QueryParser.Operator.AND */
|
||||
@ -330,6 +366,19 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
||||
this.allowSubQueryParsing = allowSubQueryParsing;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set how overlapping query terms (ie synonyms) should be scored, as if they're the same term,
|
||||
* picking highest scoring term, or OR'ing them together
|
||||
* @param synonymQueryStyle how to score terms that overlap see {{@link SynonymQueryStyle}}
|
||||
*/
|
||||
public void setSynonymQueryStyle(SynonymQueryStyle synonymQueryStyle) {this.synonymQueryStyle = synonymQueryStyle;}
|
||||
|
||||
/**
|
||||
* Gets how overlapping query terms should be scored
|
||||
*/
|
||||
public SynonymQueryStyle getSynonymQueryStyle() {return this.synonymQueryStyle;}
|
||||
|
||||
|
||||
/**
|
||||
* Set to <code>true</code> to allow leading wildcard characters.
|
||||
* <p>
|
||||
@ -460,13 +509,16 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
||||
}
|
||||
|
||||
protected Query newFieldQuery(Analyzer analyzer, String field, String queryText,
|
||||
boolean quoted, boolean fieldAutoGenPhraseQueries, boolean fieldEnableGraphQueries)
|
||||
boolean quoted, boolean fieldAutoGenPhraseQueries, boolean fieldEnableGraphQueries,
|
||||
SynonymQueryStyle synonymQueryStyle)
|
||||
throws SyntaxError {
|
||||
BooleanClause.Occur occur = operator == Operator.AND ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD;
|
||||
setEnableGraphQueries(fieldEnableGraphQueries);
|
||||
setSynonymQueryStyle(synonymQueryStyle);
|
||||
Query query = createFieldQuery(analyzer, occur, field, queryText,
|
||||
quoted || fieldAutoGenPhraseQueries || autoGeneratePhraseQueries, phraseSlop);
|
||||
setEnableGraphQueries(true); // reset back to default
|
||||
setSynonymQueryStyle(AS_SAME_TERM);
|
||||
return query;
|
||||
}
|
||||
|
||||
@ -539,6 +591,29 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
||||
return query;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Query newSynonymQuery(Term terms[]) {
|
||||
switch (synonymQueryStyle) {
|
||||
case PICK_BEST:
|
||||
List<Query> currPosnClauses = new ArrayList<Query>(terms.length);
|
||||
for (Term term : terms) {
|
||||
currPosnClauses.add(newTermQuery(term));
|
||||
}
|
||||
DisjunctionMaxQuery dm = new DisjunctionMaxQuery(currPosnClauses, 0.0f);
|
||||
return dm;
|
||||
case AS_DISTINCT_TERMS:
|
||||
BooleanQuery.Builder builder = new BooleanQuery.Builder();
|
||||
for (Term term : terms) {
|
||||
builder.add(newTermQuery(term), BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
return builder.build();
|
||||
case AS_SAME_TERM:
|
||||
return super.newSynonymQuery(terms);
|
||||
default:
|
||||
throw new AssertionError("unrecognized synonymQueryStyle passed when creating newSynonymQuery");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a new FuzzyQuery instance
|
||||
* @param term Term
|
||||
@ -661,8 +736,13 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
||||
boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries();
|
||||
boolean fieldEnableGraphQueries = ft instanceof TextField && ((TextField)ft).getEnableGraphQueries();
|
||||
|
||||
SynonymQueryStyle synonymQueryStyle = AS_SAME_TERM;
|
||||
if (ft instanceof TextField) {
|
||||
synonymQueryStyle = ((TextField)(ft)).getSynonymQueryStyle();
|
||||
}
|
||||
|
||||
subq = newFieldQuery(getAnalyzer(), sfield.getName(), rawq.getJoinedExternalVal(),
|
||||
false, fieldAutoGenPhraseQueries, fieldEnableGraphQueries);
|
||||
false, fieldAutoGenPhraseQueries, fieldEnableGraphQueries, synonymQueryStyle);
|
||||
booleanBuilder.add(subq, BooleanClause.Occur.SHOULD);
|
||||
} else {
|
||||
for (String externalVal : rawq.getExternalVals()) {
|
||||
@ -979,7 +1059,11 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
||||
if (ft.isTokenized() && sf.indexed()) {
|
||||
boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries();
|
||||
boolean fieldEnableGraphQueries = ft instanceof TextField && ((TextField)ft).getEnableGraphQueries();
|
||||
return newFieldQuery(getAnalyzer(), field, queryText, quoted, fieldAutoGenPhraseQueries, fieldEnableGraphQueries);
|
||||
SynonymQueryStyle synonymQueryStyle = AS_SAME_TERM;
|
||||
if (ft instanceof TextField) {
|
||||
synonymQueryStyle = ((TextField)(ft)).getSynonymQueryStyle();
|
||||
}
|
||||
return newFieldQuery(getAnalyzer(), field, queryText, quoted, fieldAutoGenPhraseQueries, fieldEnableGraphQueries, synonymQueryStyle);
|
||||
} else {
|
||||
if (raw) {
|
||||
return new RawQuery(sf, queryText);
|
||||
@ -990,7 +1074,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
||||
}
|
||||
|
||||
// default to a normal field query
|
||||
return newFieldQuery(getAnalyzer(), field, queryText, quoted, false, true);
|
||||
return newFieldQuery(getAnalyzer(), field, queryText, quoted, false, true, AS_SAME_TERM);
|
||||
}
|
||||
|
||||
// Assumption: quoted is always false
|
||||
@ -1024,8 +1108,12 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
||||
String queryText = queryTerms.size() == 1 ? queryTerms.get(0) : String.join(" ", queryTerms);
|
||||
boolean fieldAutoGenPhraseQueries = ft instanceof TextField && ((TextField)ft).getAutoGeneratePhraseQueries();
|
||||
boolean fieldEnableGraphQueries = ft instanceof TextField && ((TextField)ft).getEnableGraphQueries();
|
||||
SynonymQueryStyle synonymQueryStyle = AS_SAME_TERM;
|
||||
if (ft instanceof TextField) {
|
||||
synonymQueryStyle = ((TextField)(ft)).getSynonymQueryStyle();
|
||||
}
|
||||
return newFieldQuery
|
||||
(getAnalyzer(), field, queryText, false, fieldAutoGenPhraseQueries, fieldEnableGraphQueries);
|
||||
(getAnalyzer(), field, queryText, false, fieldAutoGenPhraseQueries, fieldEnableGraphQueries, synonymQueryStyle);
|
||||
} else {
|
||||
if (raw) {
|
||||
return new RawQuery(sf, queryTerms);
|
||||
@ -1057,7 +1145,7 @@ public abstract class SolrQueryParserBase extends QueryBuilder {
|
||||
|
||||
// default to a normal field query
|
||||
String queryText = queryTerms.size() == 1 ? queryTerms.get(0) : String.join(" ", queryTerms);
|
||||
return newFieldQuery(getAnalyzer(), field, queryText, false, false, true);
|
||||
return newFieldQuery(getAnalyzer(), field, queryText, false, false, true, AS_SAME_TERM);
|
||||
}
|
||||
|
||||
protected boolean isRangeShouldBeProtectedFromReverse(String field, String part1){
|
||||
|
@ -905,6 +905,7 @@ public abstract class FieldType extends FieldProperties {
|
||||
protected static final String ENABLE_GRAPH_QUERIES = "enableGraphQueries";
|
||||
private static final String ARGS = "args";
|
||||
private static final String POSITION_INCREMENT_GAP = "positionIncrementGap";
|
||||
protected static final String SYNONYM_QUERY_STYLE = "synonymQueryStyle";
|
||||
|
||||
/**
|
||||
* Get a map of property name -> value for this field type.
|
||||
@ -926,6 +927,7 @@ public abstract class FieldType extends FieldProperties {
|
||||
if (this instanceof TextField) {
|
||||
namedPropertyValues.add(AUTO_GENERATE_PHRASE_QUERIES, ((TextField) this).getAutoGeneratePhraseQueries());
|
||||
namedPropertyValues.add(ENABLE_GRAPH_QUERIES, ((TextField) this).getEnableGraphQueries());
|
||||
namedPropertyValues.add(SYNONYM_QUERY_STYLE, ((TextField) this).getSynonymQueryStyle());
|
||||
}
|
||||
namedPropertyValues.add(getPropertyName(INDEXED), hasProperty(INDEXED));
|
||||
namedPropertyValues.add(getPropertyName(STORED), hasProperty(STORED));
|
||||
|
@ -17,6 +17,7 @@
|
||||
package org.apache.solr.schema;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
@ -29,6 +30,7 @@ import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.QueryBuilder;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.parser.SolrQueryParserBase;
|
||||
import org.apache.solr.query.SolrRangeQuery;
|
||||
import org.apache.solr.response.TextResponseWriter;
|
||||
import org.apache.solr.search.QParser;
|
||||
@ -41,6 +43,7 @@ import org.apache.solr.uninverting.UninvertingReader.Type;
|
||||
public class TextField extends FieldType {
|
||||
protected boolean autoGeneratePhraseQueries;
|
||||
protected boolean enableGraphQueries;
|
||||
protected SolrQueryParserBase.SynonymQueryStyle synonymQueryStyle;
|
||||
|
||||
/**
|
||||
* Analyzer set by schema for text types to use when searching fields
|
||||
@ -72,6 +75,12 @@ public class TextField extends FieldType {
|
||||
String autoGeneratePhraseQueriesStr = args.remove(AUTO_GENERATE_PHRASE_QUERIES);
|
||||
if (autoGeneratePhraseQueriesStr != null)
|
||||
autoGeneratePhraseQueries = Boolean.parseBoolean(autoGeneratePhraseQueriesStr);
|
||||
|
||||
synonymQueryStyle = SolrQueryParserBase.SynonymQueryStyle.AS_SAME_TERM;
|
||||
String synonymQueryStyle = args.remove(SYNONYM_QUERY_STYLE);
|
||||
if (synonymQueryStyle != null) {
|
||||
this.synonymQueryStyle = SolrQueryParserBase.SynonymQueryStyle.valueOf(synonymQueryStyle.toUpperCase(Locale.ROOT));
|
||||
}
|
||||
|
||||
enableGraphQueries = true;
|
||||
String enableGraphQueriesStr = args.remove(ENABLE_GRAPH_QUERIES);
|
||||
@ -104,6 +113,8 @@ public class TextField extends FieldType {
|
||||
return enableGraphQueries;
|
||||
}
|
||||
|
||||
public SolrQueryParserBase.SynonymQueryStyle getSynonymQueryStyle() {return synonymQueryStyle;}
|
||||
|
||||
@Override
|
||||
public SortField getSortField(SchemaField field, boolean reverse) {
|
||||
/* :TODO: maybe warn if isTokenized(), but doesn't use LimitTokenCountFilter in its chain? */
|
||||
|
@ -1003,7 +1003,8 @@ public class ExtendedDismaxQParser extends QParser {
|
||||
|
||||
@Override
|
||||
protected Query newFieldQuery(Analyzer analyzer, String field, String queryText,
|
||||
boolean quoted, boolean fieldAutoGenPhraseQueries, boolean enableGraphQueries)
|
||||
boolean quoted, boolean fieldAutoGenPhraseQueries, boolean enableGraphQueries,
|
||||
SynonymQueryStyle synonymQueryStyle)
|
||||
throws SyntaxError {
|
||||
Analyzer actualAnalyzer;
|
||||
if (removeStopFilter) {
|
||||
@ -1017,7 +1018,7 @@ public class ExtendedDismaxQParser extends QParser {
|
||||
} else {
|
||||
actualAnalyzer = parser.getReq().getSchema().getFieldType(field).getQueryAnalyzer();
|
||||
}
|
||||
return super.newFieldQuery(actualAnalyzer, field, queryText, quoted, fieldAutoGenPhraseQueries, enableGraphQueries);
|
||||
return super.newFieldQuery(actualAnalyzer, field, queryText, quoted, fieldAutoGenPhraseQueries, enableGraphQueries, synonymQueryStyle);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -167,6 +167,67 @@
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
|
||||
<fieldType name="text_pick_best" class="solr.TextField" positionIncrementGap="100" synonymQueryStyle="pick_best" autoGeneratePhraseQueries="true">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
|
||||
<fieldType name="text_as_distinct" class="solr.TextField" positionIncrementGap="100" synonymQueryStyle="as_distinct_terms" autoGeneratePhraseQueries="true">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1"
|
||||
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
<filter class="solr.FlattenGraphFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.MockTokenizerFactory"/>
|
||||
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
||||
<filter class="solr.StopFilterFactory"
|
||||
ignoreCase="true"
|
||||
words="stopwords.txt"
|
||||
/>
|
||||
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0"
|
||||
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
|
||||
<filter class="solr.PorterStemFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
|
||||
<fieldType name="nametext" class="solr.TextField">
|
||||
<analyzer class="org.apache.lucene.analysis.core.WhitespaceAnalyzer"/>
|
||||
</fieldType>
|
||||
@ -590,6 +651,10 @@
|
||||
|
||||
<dynamicField name="*_sI" type="string" indexed="true" stored="false"/>
|
||||
<dynamicField name="*_sS" type="string" indexed="false" stored="true"/>
|
||||
<dynamicField name="t_pick_best_*" type="text_pick_best" indexed="true" stored="true"/>
|
||||
<dynamicField name="t_as_distinct_*" type="text_as_distinct" indexed="true" stored="true"/>
|
||||
|
||||
|
||||
<dynamicField name="t_*" type="text" indexed="true" stored="true"/>
|
||||
<dynamicField name="tv_*" type="text" indexed="true" stored="true"
|
||||
termVectors="true" termPositions="true" termOffsets="true"/>
|
||||
|
@ -31,4 +31,10 @@ pixima => pixma
|
||||
|
||||
# multiword synonyms
|
||||
wi fi => wifi
|
||||
crow blackbird, grackle
|
||||
crow blackbird, grackle
|
||||
|
||||
# Synonyms used in semantic expansion
|
||||
tabby => tabby, cat, feline, animal
|
||||
persian => persian, cat, feline, animal
|
||||
|
||||
jeans, denim pants
|
@ -96,6 +96,9 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
|
||||
assertU(adoc("id", "71", "text_sw", "ties"));
|
||||
assertU(adoc("id", "72", "text_sw", "wifi ATM"));
|
||||
assertU(adoc("id", "73", "shingle23", "A B X D E"));
|
||||
// assertU(adoc("id", "74", "text_pick_best", "tabby"));
|
||||
// assertU(adoc("id", "74", "text_as_distinct", "persian"));
|
||||
|
||||
assertU(commit());
|
||||
}
|
||||
|
||||
@ -2018,10 +2021,11 @@ public class TestExtendedDismaxParser extends SolrTestCaseJ4 {
|
||||
**/
|
||||
@Override
|
||||
protected Query newFieldQuery(Analyzer analyzer, String field, String queryText,
|
||||
boolean quoted, boolean fieldAutoGenPhraseQueries, boolean fieldEnableGraphQueries)
|
||||
boolean quoted, boolean fieldAutoGenPhraseQueries,
|
||||
boolean fieldEnableGraphQueries, SynonymQueryStyle synonymQueryStyle)
|
||||
throws SyntaxError {
|
||||
Query q = super.newFieldQuery
|
||||
(analyzer, field, queryText, quoted, fieldAutoGenPhraseQueries, fieldEnableGraphQueries);
|
||||
(analyzer, field, queryText, quoted, fieldAutoGenPhraseQueries, fieldEnableGraphQueries, synonymQueryStyle);
|
||||
if (q instanceof BooleanQuery) {
|
||||
boolean rewrittenSubQ = false; // dirty flag: rebuild the repacked query?
|
||||
BooleanQuery.Builder builder = newBooleanQuery();
|
||||
|
@ -1057,7 +1057,25 @@ public class TestSolrQueryParser extends SolrTestCaseJ4 {
|
||||
, "/response/numFound==1"
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
||||
public void testSynonymQueryStyle() throws Exception {
|
||||
|
||||
Query q = QParser.getParser("tabby", req(params("df", "t_pick_best_foo"))).getQuery();
|
||||
assertEquals("(t_pick_best_foo:tabbi | t_pick_best_foo:cat | t_pick_best_foo:felin | t_pick_best_foo:anim)", q.toString());
|
||||
|
||||
q = QParser.getParser("tabby", req(params("df", "t_as_distinct_foo"))).getQuery();
|
||||
assertEquals("t_as_distinct_foo:tabbi t_as_distinct_foo:cat t_as_distinct_foo:felin t_as_distinct_foo:anim", q.toString());
|
||||
|
||||
/*confirm autoGeneratePhraseQueries always builds OR queries*/
|
||||
q = QParser.getParser("jeans", req(params("df", "t_as_distinct_foo", "sow", "false"))).getQuery();
|
||||
assertEquals("(t_as_distinct_foo:\"denim pant\" t_as_distinct_foo:jean)", q.toString());
|
||||
|
||||
q = QParser.getParser("jeans", req(params("df", "t_pick_best_foo", "sow", "false"))).getQuery();
|
||||
assertEquals("(t_pick_best_foo:\"denim pant\" t_pick_best_foo:jean)", q.toString());
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBadRequestInSetQuery() throws SyntaxError {
|
||||
SolrQueryRequest req = req();
|
||||
|
@ -87,6 +87,13 @@ For multivalued fields, specifies a distance between multiple values, which prev
|
||||
|
||||
`autoGeneratePhraseQueries`:: For text fields. If `true`, Solr automatically generates phrase queries for adjacent terms. If `false`, terms must be enclosed in double-quotes to be treated as phrases.
|
||||
|
||||
`synonymQueryStyle`::
|
||||
Query used to combine scores of overlapping query terms (i.e. synonyms). Consider a search for "blue tee" with query-time synonyms `tshirt,tee`.
|
||||
+
|
||||
Use `as_same_term` (default) to blend terms, i.e. `SynonymQuery(tshirt,tee)` where each term will be treated as equally important. Use `pick_best` to select the most significant synonym when scoring `Dismax(tee,tshirt)`. Use `as_distinct_terms` to bias scoring towards the most significant synonym `(pants OR slacks)`.
|
||||
+
|
||||
`as_same_term` is appropriate when terms are true synonyms (television, tv). Use `pick_best` or `as_distinct_terms` when synonyms are expanding to hyponyms `(q=jeans w/ jeans\=>jeans,pants)` and you want exact to come before parent and sibling concepts. See this http://opensourceconnections.com/blog/2017/11/21/solr-synonyms-mea-culpa/[blog article].
|
||||
|
||||
`enableGraphQueries`::
|
||||
For text fields, applicable when querying with <<the-standard-query-parser.adoc#standard-query-parser-parameters,`sow=false`>> (which is the default for the `sow` parameter). Use `true`, the default, for field types with query analyzers including graph-aware filters, e.g., <<filter-descriptions.adoc#synonym-graph-filter,Synonym Graph Filter>> and <<filter-descriptions.adoc#word-delimiter-graph-filter,Word Delimiter Graph Filter>>.
|
||||
+
|
||||
@ -138,4 +145,4 @@ The default values for each property depend on the underlying `FieldType` class,
|
||||
|
||||
A field type may optionally specify a `<similarity/>` that will be used when scoring documents that refer to fields with this type, as long as the "global" similarity for the collection allows it.
|
||||
|
||||
By default, any field type which does not define a similarity, uses `BM25Similarity`. For more details, and examples of configuring both global & per-type Similarities, please see <<other-schema-elements.adoc#similarity,Other Schema Elements>>.
|
||||
By default, any field type which does not define a similarity, uses `BM25Similarity`. For more details, and examples of configuring both global & per-type Similarities, please see <<other-schema-elements.adoc#similarity,Other Schema Elements>>.
|
Loading…
x
Reference in New Issue
Block a user