From 79df1c1618adc25bbf4897e3f7c7575b6304e567 Mon Sep 17 00:00:00 2001 From: "Chris M. Hostetter" Date: Wed, 20 Jan 2010 19:24:30 +0000 Subject: [PATCH] SOLR-1553: edismax improvements -- pf param is now backcompat with dismax; pf and pf3 logic was refactored to reduce code; added pf2 which does what pf did in the first itertion of edismax git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@901342 13f79535-47bb-0310-9956-ffa450edef68 --- .../search/ExtendedDismaxQParserPlugin.java | 187 ++++++++++-------- 1 file changed, 103 insertions(+), 84 deletions(-) diff --git a/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java b/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java index f80e9fe2ee5..b6486f29a75 100755 --- a/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java +++ b/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java @@ -104,8 +104,15 @@ class ExtendedDismaxQParser extends QParser { SolrParams solrParams = localParams == null ? params : new DefaultSolrParams(localParams, params); queryFields = U.parseFieldBoosts(solrParams.getParams(DMP.QF)); - Map phraseFields = U.parseFieldBoosts(solrParams.getParams(DMP.PF)); - Map phraseFields3 = U.parseFieldBoosts(solrParams.getParams("pf3")); + // Boosted phrase of the full query string + Map phraseFields = + U.parseFieldBoosts(solrParams.getParams(DMP.PF)); + // Boosted Bi-Term Shingles from the query string + Map phraseFields2 = + U.parseFieldBoosts(solrParams.getParams("pf2")); + // Boosted Tri-Term Shingles from the query string + Map phraseFields3 = + U.parseFieldBoosts(solrParams.getParams("pf3")); float tiebreaker = solrParams.getFloat(DMP.TIE, 0.0f); @@ -284,7 +291,10 @@ class ExtendedDismaxQParser extends QParser { query.add(parsedUserQuery, BooleanClause.Occur.MUST); // sloppy phrase queries for proximity - if (phraseFields.size() > 0 || phraseFields3.size() > 0) { + if (phraseFields.size() > 0 || + phraseFields2.size() > 0 || + phraseFields3.size() > 0) { + // find non-field clauses List normalClauses = new ArrayList(clauses.size()); for (Clause clause : clauses) { @@ -298,70 +308,15 @@ class ExtendedDismaxQParser extends QParser { normalClauses.add(clause); } - Map pf = phraseFields; - if (normalClauses.size() >= 2 && pf.size() > 0) { - StringBuilder sb = new StringBuilder(); - for (int i=0; i= 3 && pf.size() > 0) { - StringBuilder sb = new StringBuilder(); - for (int i=0; i clauses, + final Map fields, + int shingleSize, + final float tiebreaker, + final int slop) + throws ParseException { + + if (null == fields || fields.isEmpty() || + null == clauses || clauses.size() <= shingleSize ) + return; + + if (0 == shingleSize) shingleSize = clauses.size(); + + final int goat = shingleSize-1; // :TODO: better name for var? + + StringBuilder userPhraseQuery = new StringBuilder(); + for (int i=0; i < clauses.size() - goat; i++) { + userPhraseQuery.append('"'); + for (int j=0; j <= goat; j++) { + userPhraseQuery.append(clauses.get(i + j).val); + userPhraseQuery.append(' '); + } + userPhraseQuery.append('"'); + userPhraseQuery.append(' '); + } + + /* for parsing sloppy phrases using DisjunctionMaxQueries */ + ExtendedSolrQueryParser pp = + new ExtendedSolrQueryParser(this, IMPOSSIBLE_FIELD_NAME); + + pp.addAlias(IMPOSSIBLE_FIELD_NAME, tiebreaker, fields); + pp.setPhraseSlop(slop); + pp.setRemoveStopFilter(true); // remove stop filter and keep stopwords + + /* :TODO: reevaluate using makeDismax=true vs false... + * + * The DismaxQueryParser always used DisjunctionMaxQueries for the + * pf boost, for the same reasons it used them for the qf fields. + * When Yonik first wrote the ExtendedDismaxQParserPlugin, he added + * the "makeDismax=false" property to use BooleanQueries instead, but + * when asked why his response was "I honestly don't recall" ... + * + * https://issues.apache.org/jira/browse/SOLR-1553?focusedCommentId=12793813#action_12793813 + * + * so for now, we continue to use dismax style queries becuse it + * seems the most logical and is back compatible, but we should + * try to figure out what Yonik was thinking at the time (because he + * rarely does things for no reason) + */ + pp.makeDismax = true; + + + // minClauseSize is independent of the shingleSize because of stop words + // (if they are removed from the middle, so be it, but we need at least + // two or there shouldn't be a boost) + pp.minClauseSize = 2; + + // TODO: perhaps we shouldn't use synonyms either... + + Query phrase = pp.parse(userPhraseQuery.toString()); + if (phrase != null) { + mainQuery.add(phrase, BooleanClause.Occur.SHOULD); + } + } + + @Override public String[] getDefaultHighlightFields() { String[] highFields = queryFields.keySet().toArray(new String[0]);