From 3e20ad3616efa9cddb06d17e0525adc220a38208 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Tue, 30 Oct 2012 14:13:41 +0000 Subject: [PATCH] LUCENE-4514: fold abstract PhraseScorer into its only subclass: SloppyPhraseScorer git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/cleanup2878@1403709 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/search/MultiPhraseQuery.java | 2 +- .../org/apache/lucene/search/PhraseQuery.java | 2 +- .../apache/lucene/search/PhraseScorer.java | 125 ------------------ .../lucene/search/SloppyPhraseScorer.java | 85 +++++++++++- .../lucene/search/JustCompileSearch.java | 19 --- 5 files changed, 82 insertions(+), 151 deletions(-) delete mode 100644 lucene/core/src/java/org/apache/lucene/search/PhraseScorer.java diff --git a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java index 0e8b093b5e5..4faf7d2e886 100644 --- a/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/MultiPhraseQuery.java @@ -261,7 +261,7 @@ public class MultiPhraseQuery extends Query { if (scorer != null) { int newDoc = scorer.advance(doc); if (newDoc == doc) { - float freq = slop == 0 ? scorer.freq() : ((SloppyPhraseScorer)scorer).freq; + float freq = slop == 0 ? scorer.freq() : ((SloppyPhraseScorer)scorer).sloppyFreq(); SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context); ComplexExplanation result = new ComplexExplanation(); result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:"); diff --git a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java index 2b13dcfe06a..223c5d723ef 100644 --- a/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/PhraseQuery.java @@ -303,7 +303,7 @@ public class PhraseQuery extends Query { if (scorer != null) { int newDoc = scorer.advance(doc); if (newDoc == doc) { - float freq = slop == 0 ? scorer.freq() : ((PhraseScorer)scorer).freq; + float freq = slop == 0 ? scorer.freq() : ((SloppyPhraseScorer)scorer).sloppyFreq(); SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context); ComplexExplanation result = new ComplexExplanation(); result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:"); diff --git a/lucene/core/src/java/org/apache/lucene/search/PhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/PhraseScorer.java deleted file mode 100644 index c77feb1afda..00000000000 --- a/lucene/core/src/java/org/apache/lucene/search/PhraseScorer.java +++ /dev/null @@ -1,125 +0,0 @@ -package org.apache.lucene.search; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; - -import org.apache.lucene.search.similarities.Similarity; - -/** Expert: Scoring functionality for phrase queries. - *
A document is considered matching if it contains the phrase-query terms - * at "valid" positions. What "valid positions" are - * depends on the type of the phrase query: for an exact phrase query terms are required - * to appear in adjacent locations, while for a sloppy phrase query some distance between - * the terms is allowed. The abstract method {@link #phraseFreq()} of extending classes - * is invoked for each document containing all the phrase query terms, in order to - * compute the frequency of the phrase query in that document. A non zero frequency - * means a match. - */ -abstract class PhraseScorer extends Scorer { - PhrasePositions min, max; - - protected float freq; //phrase frequency in current doc as computed by phraseFreq(). - - final Similarity.SloppySimScorer docScorer; - - PhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, - Similarity.SloppySimScorer docScorer) { - super(weight); - this.docScorer = docScorer; - - // convert tps to a list of phrase positions. - // note: phrase-position differs from term-position in that its position - // reflects the phrase offset: pp.pos = tp.pos - offset. - // this allows to easily identify a matching (exact) phrase - // when all PhrasePositions have exactly the same position. - if (postings.length > 0) { - min = new PhrasePositions(postings[0].postings, postings[0].position, 0, postings[0].terms); - max = min; - max.doc = -1; - for (int i = 1; i < postings.length; i++) { - PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms); - max.next = pp; - max = pp; - max.doc = -1; - } - max.next = min; // make it cyclic for easier manipulation - } - } - - @Override - public int docID() { - return max.doc; - } - - @Override - public int nextDoc() throws IOException { - return advance(max.doc); - } - - @Override - public float score() throws IOException { - return docScorer.score(max.doc, freq); - } - - private boolean advanceMin(int target) throws IOException { - if (!min.skipTo(target)) { - max.doc = NO_MORE_DOCS; // for further calls to docID() - return false; - } - min = min.next; // cyclic - max = max.next; // cyclic - return true; - } - - @Override - public int advance(int target) throws IOException { - freq = 0.0f; - if (!advanceMin(target)) { - return NO_MORE_DOCS; - } - boolean restart=false; - while (freq == 0.0f) { - while (min.doc < max.doc || restart) { - restart = false; - if (!advanceMin(max.doc)) { - return NO_MORE_DOCS; - } - } - // found a doc with all of the terms - freq = phraseFreq(); // check for phrase - restart = true; - } - - // found a match - return max.doc; - } - - /** - * For a document containing all the phrase query terms, compute the - * frequency of the phrase in that document. - * A non zero frequency means a match. - *
Note, that containing all phrase terms does not guarantee a match - they have to be found in matching locations. - * @return frequency of the phrase in current doc, 0 if not found. - */ - abstract float phraseFreq() throws IOException; - - @Override - public String toString() { return "scorer(" + weight + ")"; } - -} diff --git a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java index af46873a905..f810af22267 100644 --- a/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java +++ b/lucene/core/src/java/org/apache/lucene/search/SloppyPhraseScorer.java @@ -29,7 +29,12 @@ import org.apache.lucene.index.Term; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.util.OpenBitSet; -final class SloppyPhraseScorer extends PhraseScorer { +final class SloppyPhraseScorer extends Scorer { + private PhrasePositions min, max; + + private float sloppyFreq; //phrase frequency in current doc as computed by phraseFreq(). + + private final Similarity.SloppySimScorer docScorer; private final int slop; private final int numPostings; @@ -47,10 +52,28 @@ final class SloppyPhraseScorer extends PhraseScorer { SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, int slop, Similarity.SloppySimScorer docScorer) { - super(weight, postings, docScorer); + super(weight); + this.docScorer = docScorer; this.slop = slop; this.numPostings = postings==null ? 0 : postings.length; pq = new PhraseQueue(postings.length); + // convert tps to a list of phrase positions. + // note: phrase-position differs from term-position in that its position + // reflects the phrase offset: pp.pos = tp.pos - offset. + // this allows to easily identify a matching (exact) phrase + // when all PhrasePositions have exactly the same position. + if (postings.length > 0) { + min = new PhrasePositions(postings[0].postings, postings[0].position, 0, postings[0].terms); + max = min; + max.doc = -1; + for (int i = 1; i < postings.length; i++) { + PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms); + max.next = pp; + max = pp; + max.doc = -1; + } + max.next = min; // make it cyclic for easier manipulation + } } /** @@ -71,8 +94,7 @@ final class SloppyPhraseScorer extends PhraseScorer { * would get same score as "g f"~2, although "c b"~2 could be matched twice. * We may want to fix this in the future (currently not, for performance reasons). */ - @Override - protected float phraseFreq() throws IOException { + private float phraseFreq() throws IOException { if (!initPhrasePositions()) { return 0.0f; } @@ -489,10 +511,14 @@ final class SloppyPhraseScorer extends PhraseScorer { } @Override - public int freq() throws IOException { + public int freq() { return numMatches; } + float sloppyFreq() { + return sloppyFreq; + } + // private void printQueue(PrintStream ps, PhrasePositions ext, String title) { // //if (min.doc != ?) return; // ps.println(); @@ -514,5 +540,54 @@ final class SloppyPhraseScorer extends PhraseScorer { // } // } + private boolean advanceMin(int target) throws IOException { + if (!min.skipTo(target)) { + max.doc = NO_MORE_DOCS; // for further calls to docID() + return false; + } + min = min.next; // cyclic + max = max.next; // cyclic + return true; + } + @Override + public int docID() { + return max.doc; + } + + @Override + public int nextDoc() throws IOException { + return advance(max.doc); + } + + @Override + public float score() { + return docScorer.score(max.doc, sloppyFreq); + } + + @Override + public int advance(int target) throws IOException { + sloppyFreq = 0.0f; + if (!advanceMin(target)) { + return NO_MORE_DOCS; + } + boolean restart=false; + while (sloppyFreq == 0.0f) { + while (min.doc < max.doc || restart) { + restart = false; + if (!advanceMin(max.doc)) { + return NO_MORE_DOCS; + } + } + // found a doc with all of the terms + sloppyFreq = phraseFreq(); // check for phrase + restart = true; + } + + // found a match + return max.doc; + } + + @Override + public String toString() { return "scorer(" + weight + ")"; } } diff --git a/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java b/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java index 327ec0488d1..80d9c469d7e 100644 --- a/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java +++ b/lucene/core/src/test/org/apache/lucene/search/JustCompileSearch.java @@ -189,25 +189,6 @@ final class JustCompileSearch { } - static final class JustCompilePhraseScorer extends PhraseScorer { - - JustCompilePhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, - Similarity.SloppySimScorer docScorer) { - super(weight, postings, docScorer); - } - - @Override - protected float phraseFreq() { - throw new UnsupportedOperationException(UNSUPPORTED_MSG); - } - - @Override - public int freq() throws IOException { - throw new UnsupportedOperationException(UNSUPPORTED_MSG); - } - - } - static final class JustCompileQuery extends Query { @Override