mirror of https://github.com/apache/lucene.git
SOLR-2058: Add optional "phrase slop" syntax to edismax pf/pf2/pf3
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1342681 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
fc48a07bfb
commit
50baab7c4b
|
@ -310,6 +310,11 @@ New Features
|
|||
that the suggestions need not consist entirely of terms with a greater document
|
||||
frequency than the queried terms. (James Dyer)
|
||||
|
||||
* SOLR-2058: Edismax query parser to allow "phrase slop" to be specified per-field
|
||||
on the pf/pf2/pf3 parameters using optional "FieldName~slop^boost" syntax. The
|
||||
prior "FieldName^boost" syntax is still accepted. In such cases the value on the
|
||||
"ps" parameter serves as the default slop. (Ron Mayer via James Dyer)
|
||||
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
|
|
@ -139,14 +139,15 @@ class ExtendedDismaxQParser extends QParser {
|
|||
}
|
||||
|
||||
// Boosted phrase of the full query string
|
||||
Map<String,Float> phraseFields =
|
||||
SolrPluginUtils.parseFieldBoosts(solrParams.getParams(DisMaxParams.PF));
|
||||
List<FieldParams> phraseFields =
|
||||
U.parseFieldBoostsAndSlop(solrParams.getParams(DMP.PF),0);
|
||||
// Boosted Bi-Term Shingles from the query string
|
||||
Map<String,Float> phraseFields2 =
|
||||
SolrPluginUtils.parseFieldBoosts(solrParams.getParams("pf2"));
|
||||
List<FieldParams> phraseFields2 =
|
||||
U.parseFieldBoostsAndSlop(solrParams.getParams("pf2"),2);
|
||||
// Boosted Tri-Term Shingles from the query string
|
||||
Map<String,Float> phraseFields3 =
|
||||
SolrPluginUtils.parseFieldBoosts(solrParams.getParams("pf3"));
|
||||
List<FieldParams> phraseFields3 =
|
||||
U.parseFieldBoostsAndSlop(solrParams.getParams("pf3"),3);
|
||||
|
||||
|
||||
float tiebreaker = solrParams.getFloat(DisMaxParams.TIE, 0.0f);
|
||||
|
||||
|
@ -313,10 +314,12 @@ class ExtendedDismaxQParser extends QParser {
|
|||
query.add(parsedUserQuery, BooleanClause.Occur.MUST);
|
||||
|
||||
// sloppy phrase queries for proximity
|
||||
if (phraseFields.size() > 0 ||
|
||||
phraseFields2.size() > 0 ||
|
||||
phraseFields3.size() > 0) {
|
||||
List<FieldParams> allPhraseFields = new ArrayList<FieldParams>();
|
||||
allPhraseFields.addAll(phraseFields);
|
||||
allPhraseFields.addAll(phraseFields2);
|
||||
allPhraseFields.addAll(phraseFields3);
|
||||
|
||||
if (allPhraseFields.size() > 0) {
|
||||
// find non-field clauses
|
||||
List<Clause> normalClauses = new ArrayList<Clause>(clauses.size());
|
||||
for (Clause clause : clauses) {
|
||||
|
@ -330,14 +333,14 @@ class ExtendedDismaxQParser extends QParser {
|
|||
normalClauses.add(clause);
|
||||
}
|
||||
|
||||
// full phrase...
|
||||
addShingledPhraseQueries(query, normalClauses, phraseFields, 0,
|
||||
tiebreaker, pslop);
|
||||
// shingles...
|
||||
addShingledPhraseQueries(query, normalClauses, phraseFields2, 2,
|
||||
tiebreaker, pslop);
|
||||
addShingledPhraseQueries(query, normalClauses, phraseFields3, 3,
|
||||
tiebreaker, pslop);
|
||||
// full phrase and shingles
|
||||
for (FieldParams phraseField: allPhraseFields) {
|
||||
int slop = (phraseField.getSlop() == 0) ? pslop : phraseField.getSlop();
|
||||
Map<String,Float> pf = new HashMap<String,Float>(1);
|
||||
pf.put(phraseField.getField(),phraseField.getBoost());
|
||||
addShingledPhraseQueries(query, normalClauses, pf,
|
||||
phraseField.getWordGrams(),tiebreaker, slop);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,46 @@
|
|||
package org.apache.solr.search;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A class to hold "phrase slop" and "boost" parameters for pf, pf2, pf3 parameters
|
||||
**/
|
||||
public class FieldParams {
|
||||
private final int wordGrams; // make bigrams if 2, trigrams if 3, or all if 0
|
||||
private final int slop; // null defaults to ps parameter
|
||||
private final float boost;
|
||||
private final String field;
|
||||
public FieldParams(String field, int wordGrams, int slop, float boost) {
|
||||
this.wordGrams = wordGrams;
|
||||
this.slop = slop;
|
||||
this.boost = boost;
|
||||
this.field = field;
|
||||
}
|
||||
public int getWordGrams() {
|
||||
return wordGrams;
|
||||
}
|
||||
public int getSlop() {
|
||||
return slop;
|
||||
}
|
||||
public float getBoost() {
|
||||
return boost;
|
||||
}
|
||||
public String getField() {
|
||||
return field;
|
||||
}
|
||||
|
||||
}
|
|
@ -377,6 +377,9 @@ public class SolrPluginUtils {
|
|||
}
|
||||
|
||||
}
|
||||
private static final Pattern whitespacePattern = Pattern.compile("\\s+");
|
||||
private static final Pattern caratPattern = Pattern.compile("\\^");
|
||||
private static final Pattern tildePattern = Pattern.compile("[~]");
|
||||
|
||||
/**
|
||||
* Given a string containing fieldNames and boost info,
|
||||
|
@ -408,16 +411,58 @@ public class SolrPluginUtils {
|
|||
}
|
||||
Map<String, Float> out = new HashMap<String,Float>(7);
|
||||
for (String in : fieldLists) {
|
||||
if (null == in || "".equals(in.trim()))
|
||||
if (null == in) {
|
||||
continue;
|
||||
String[] bb = in.trim().split("\\s+");
|
||||
}
|
||||
in = in.trim();
|
||||
if(in.length()==0) {
|
||||
continue;
|
||||
}
|
||||
|
||||
String[] bb = whitespacePattern.split(in);
|
||||
for (String s : bb) {
|
||||
String[] bbb = s.split("\\^");
|
||||
String[] bbb = caratPattern.split(s);
|
||||
out.put(bbb[0], 1 == bbb.length ? null : Float.valueOf(bbb[1]));
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
/**
|
||||
|
||||
/**
|
||||
* Like {@link #parseFieldBoosts}, but allows for an optional slop value prefixed by "~".
|
||||
*
|
||||
* @param fieldLists - an array of Strings eg. <code>{"fieldOne^2.3", "fieldTwo", fieldThree~5^-0.4}</code>
|
||||
* @param wordGrams - (0=all words, 2,3 = shingle size)
|
||||
* @return - FieldParams containing the fieldname,boost,slop,and shingle size
|
||||
*/
|
||||
public static List<FieldParams> parseFieldBoostsAndSlop(String[] fieldLists,int wordGrams) {
|
||||
if (null == fieldLists || 0 == fieldLists.length) {
|
||||
return new ArrayList<FieldParams>();
|
||||
}
|
||||
List<FieldParams> out = new ArrayList<FieldParams>();
|
||||
for (String in : fieldLists) {
|
||||
if (null == in) {
|
||||
continue;
|
||||
}
|
||||
in = in.trim();
|
||||
if(in.length()==0) {
|
||||
continue;
|
||||
}
|
||||
String[] fieldConfigs = whitespacePattern.split(in);
|
||||
for (String s : fieldConfigs) {
|
||||
String[] fieldAndSlopVsBoost = caratPattern.split(s);
|
||||
String[] fieldVsSlop = tildePattern.split(fieldAndSlopVsBoost[0]);
|
||||
String field = fieldVsSlop[0];
|
||||
int slop = (2 == fieldVsSlop.length) ? Integer.valueOf(fieldVsSlop[1]) : 0;
|
||||
Float boost = (1 == fieldAndSlopVsBoost.length) ? 1 : Float.valueOf(fieldAndSlopVsBoost[1]);
|
||||
FieldParams fp = new FieldParams(field,wordGrams,slop,boost);
|
||||
out.add(fp);
|
||||
}
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Checks the number of optional clauses in the query, and compares it
|
||||
|
|
|
@ -597,5 +597,29 @@ public class TestExtendedDismaxParser extends AbstractSolrTestCase {
|
|||
"debugQuery", "true",
|
||||
"defType", "edismax"),
|
||||
"//doc[1]/str[@name='id'][.='s1']");
|
||||
|
||||
assertQ("ps/ps2/ps3 with default slop overrides not working",
|
||||
req("q", "zzzz xxxx cccc vvvv",
|
||||
"qf", "phrase_sw",
|
||||
"pf", "phrase_sw~1^10 phrase_sw~2^20 phrase_sw^30",
|
||||
"pf2", "phrase_sw~2^22 phrase_sw^33",
|
||||
"pf3", "phrase_sw~2^222 phrase_sw^333",
|
||||
"ps", "3",
|
||||
"defType", "edismax",
|
||||
"debugQuery", "true"),
|
||||
"//str[@name='parsedquery'][contains(.,'phrase_sw:\"zzzz xxxx cccc vvvv\"~1^10.0')]",
|
||||
"//str[@name='parsedquery'][contains(.,'phrase_sw:\"zzzz xxxx cccc vvvv\"~2^20.0')]",
|
||||
"//str[@name='parsedquery'][contains(.,'phrase_sw:\"zzzz xxxx cccc vvvv\"~3^30.0')]",
|
||||
"//str[@name='parsedquery'][contains(.,'phrase_sw:\"zzzz xxxx\"~2^22.0')]",
|
||||
"//str[@name='parsedquery'][contains(.,'phrase_sw:\"xxxx cccc\"~2^22.0')]",
|
||||
"//str[@name='parsedquery'][contains(.,'phrase_sw:\"cccc vvvv\"~2^22.0')]",
|
||||
"//str[@name='parsedquery'][contains(.,'phrase_sw:\"zzzz xxxx\"~3^33.0')]",
|
||||
"//str[@name='parsedquery'][contains(.,'phrase_sw:\"xxxx cccc\"~3^33.0')]",
|
||||
"//str[@name='parsedquery'][contains(.,'phrase_sw:\"cccc vvvv\"~3^33.0')]",
|
||||
"//str[@name='parsedquery'][contains(.,'phrase_sw:\"zzzz xxxx cccc\"~2^222.0')]",
|
||||
"//str[@name='parsedquery'][contains(.,'phrase_sw:\"xxxx cccc vvvv\"~2^222.0')]",
|
||||
"//str[@name='parsedquery'][contains(.,'phrase_sw:\"zzzz xxxx cccc\"~3^333.0')]",
|
||||
"//str[@name='parsedquery'][contains(.,'phrase_sw:\"xxxx cccc vvvv\"~3^333.0')]"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue