LUCENE-4514: fold abstract PhraseScorer into its only subclass: SloppyPhraseScorer

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/cleanup2878@1403709 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-10-30 14:13:41 +00:00
parent 38bf19cdcf
commit 3e20ad3616
5 changed files with 82 additions and 151 deletions

View File

@ -261,7 +261,7 @@ public class MultiPhraseQuery extends Query {
if (scorer != null) {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
float freq = slop == 0 ? scorer.freq() : ((SloppyPhraseScorer)scorer).freq;
float freq = slop == 0 ? scorer.freq() : ((SloppyPhraseScorer)scorer).sloppyFreq();
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
ComplexExplanation result = new ComplexExplanation();
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");

View File

@ -303,7 +303,7 @@ public class PhraseQuery extends Query {
if (scorer != null) {
int newDoc = scorer.advance(doc);
if (newDoc == doc) {
float freq = slop == 0 ? scorer.freq() : ((PhraseScorer)scorer).freq;
float freq = slop == 0 ? scorer.freq() : ((SloppyPhraseScorer)scorer).sloppyFreq();
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
ComplexExplanation result = new ComplexExplanation();
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");

View File

@ -1,125 +0,0 @@
package org.apache.lucene.search;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.search.similarities.Similarity;
/** Expert: Scoring functionality for phrase queries.
* <br>A document is considered matching if it contains the phrase-query terms
* at "valid" positions. What "valid positions" are
* depends on the type of the phrase query: for an exact phrase query terms are required
* to appear in adjacent locations, while for a sloppy phrase query some distance between
* the terms is allowed. The abstract method {@link #phraseFreq()} of extending classes
* is invoked for each document containing all the phrase query terms, in order to
* compute the frequency of the phrase query in that document. A non zero frequency
* means a match.
*/
abstract class PhraseScorer extends Scorer {
PhrasePositions min, max;
protected float freq; //phrase frequency in current doc as computed by phraseFreq().
final Similarity.SloppySimScorer docScorer;
PhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
Similarity.SloppySimScorer docScorer) {
super(weight);
this.docScorer = docScorer;
// convert tps to a list of phrase positions.
// note: phrase-position differs from term-position in that its position
// reflects the phrase offset: pp.pos = tp.pos - offset.
// this allows to easily identify a matching (exact) phrase
// when all PhrasePositions have exactly the same position.
if (postings.length > 0) {
min = new PhrasePositions(postings[0].postings, postings[0].position, 0, postings[0].terms);
max = min;
max.doc = -1;
for (int i = 1; i < postings.length; i++) {
PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms);
max.next = pp;
max = pp;
max.doc = -1;
}
max.next = min; // make it cyclic for easier manipulation
}
}
@Override
public int docID() {
return max.doc;
}
@Override
public int nextDoc() throws IOException {
return advance(max.doc);
}
@Override
public float score() throws IOException {
return docScorer.score(max.doc, freq);
}
private boolean advanceMin(int target) throws IOException {
if (!min.skipTo(target)) {
max.doc = NO_MORE_DOCS; // for further calls to docID()
return false;
}
min = min.next; // cyclic
max = max.next; // cyclic
return true;
}
@Override
public int advance(int target) throws IOException {
freq = 0.0f;
if (!advanceMin(target)) {
return NO_MORE_DOCS;
}
boolean restart=false;
while (freq == 0.0f) {
while (min.doc < max.doc || restart) {
restart = false;
if (!advanceMin(max.doc)) {
return NO_MORE_DOCS;
}
}
// found a doc with all of the terms
freq = phraseFreq(); // check for phrase
restart = true;
}
// found a match
return max.doc;
}
/**
* For a document containing all the phrase query terms, compute the
* frequency of the phrase in that document.
* A non zero frequency means a match.
* <br>Note, that containing all phrase terms does not guarantee a match - they have to be found in matching locations.
* @return frequency of the phrase in current doc, 0 if not found.
*/
abstract float phraseFreq() throws IOException;
@Override
public String toString() { return "scorer(" + weight + ")"; }
}

View File

@ -29,7 +29,12 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.util.OpenBitSet;
final class SloppyPhraseScorer extends PhraseScorer {
final class SloppyPhraseScorer extends Scorer {
private PhrasePositions min, max;
private float sloppyFreq; //phrase frequency in current doc as computed by phraseFreq().
private final Similarity.SloppySimScorer docScorer;
private final int slop;
private final int numPostings;
@ -47,10 +52,28 @@ final class SloppyPhraseScorer extends PhraseScorer {
SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
int slop, Similarity.SloppySimScorer docScorer) {
super(weight, postings, docScorer);
super(weight);
this.docScorer = docScorer;
this.slop = slop;
this.numPostings = postings==null ? 0 : postings.length;
pq = new PhraseQueue(postings.length);
// convert tps to a list of phrase positions.
// note: phrase-position differs from term-position in that its position
// reflects the phrase offset: pp.pos = tp.pos - offset.
// this allows to easily identify a matching (exact) phrase
// when all PhrasePositions have exactly the same position.
if (postings.length > 0) {
min = new PhrasePositions(postings[0].postings, postings[0].position, 0, postings[0].terms);
max = min;
max.doc = -1;
for (int i = 1; i < postings.length; i++) {
PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms);
max.next = pp;
max = pp;
max.doc = -1;
}
max.next = min; // make it cyclic for easier manipulation
}
}
/**
@ -71,8 +94,7 @@ final class SloppyPhraseScorer extends PhraseScorer {
* would get same score as "g f"~2, although "c b"~2 could be matched twice.
* We may want to fix this in the future (currently not, for performance reasons).
*/
@Override
protected float phraseFreq() throws IOException {
private float phraseFreq() throws IOException {
if (!initPhrasePositions()) {
return 0.0f;
}
@ -489,10 +511,14 @@ final class SloppyPhraseScorer extends PhraseScorer {
}
@Override
public int freq() throws IOException {
public int freq() {
return numMatches;
}
float sloppyFreq() {
return sloppyFreq;
}
// private void printQueue(PrintStream ps, PhrasePositions ext, String title) {
// //if (min.doc != ?) return;
// ps.println();
@ -514,5 +540,54 @@ final class SloppyPhraseScorer extends PhraseScorer {
// }
// }
private boolean advanceMin(int target) throws IOException {
if (!min.skipTo(target)) {
max.doc = NO_MORE_DOCS; // for further calls to docID()
return false;
}
min = min.next; // cyclic
max = max.next; // cyclic
return true;
}
@Override
public int docID() {
return max.doc;
}
@Override
public int nextDoc() throws IOException {
return advance(max.doc);
}
@Override
public float score() {
return docScorer.score(max.doc, sloppyFreq);
}
@Override
public int advance(int target) throws IOException {
sloppyFreq = 0.0f;
if (!advanceMin(target)) {
return NO_MORE_DOCS;
}
boolean restart=false;
while (sloppyFreq == 0.0f) {
while (min.doc < max.doc || restart) {
restart = false;
if (!advanceMin(max.doc)) {
return NO_MORE_DOCS;
}
}
// found a doc with all of the terms
sloppyFreq = phraseFreq(); // check for phrase
restart = true;
}
// found a match
return max.doc;
}
@Override
public String toString() { return "scorer(" + weight + ")"; }
}

View File

@ -189,25 +189,6 @@ final class JustCompileSearch {
}
static final class JustCompilePhraseScorer extends PhraseScorer {
JustCompilePhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
Similarity.SloppySimScorer docScorer) {
super(weight, postings, docScorer);
}
@Override
protected float phraseFreq() {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public int freq() throws IOException {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
}
static final class JustCompileQuery extends Query {
@Override