mirror of https://github.com/apache/lucene.git
LUCENE-4514: fold abstract PhraseScorer into its only subclass: SloppyPhraseScorer
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/cleanup2878@1403709 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
38bf19cdcf
commit
3e20ad3616
|
@ -261,7 +261,7 @@ public class MultiPhraseQuery extends Query {
|
|||
if (scorer != null) {
|
||||
int newDoc = scorer.advance(doc);
|
||||
if (newDoc == doc) {
|
||||
float freq = slop == 0 ? scorer.freq() : ((SloppyPhraseScorer)scorer).freq;
|
||||
float freq = slop == 0 ? scorer.freq() : ((SloppyPhraseScorer)scorer).sloppyFreq();
|
||||
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
|
||||
ComplexExplanation result = new ComplexExplanation();
|
||||
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
|
||||
|
|
|
@ -303,7 +303,7 @@ public class PhraseQuery extends Query {
|
|||
if (scorer != null) {
|
||||
int newDoc = scorer.advance(doc);
|
||||
if (newDoc == doc) {
|
||||
float freq = slop == 0 ? scorer.freq() : ((PhraseScorer)scorer).freq;
|
||||
float freq = slop == 0 ? scorer.freq() : ((SloppyPhraseScorer)scorer).sloppyFreq();
|
||||
SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context);
|
||||
ComplexExplanation result = new ComplexExplanation();
|
||||
result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:");
|
||||
|
|
|
@ -1,125 +0,0 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
|
||||
/** Expert: Scoring functionality for phrase queries.
|
||||
* <br>A document is considered matching if it contains the phrase-query terms
|
||||
* at "valid" positions. What "valid positions" are
|
||||
* depends on the type of the phrase query: for an exact phrase query terms are required
|
||||
* to appear in adjacent locations, while for a sloppy phrase query some distance between
|
||||
* the terms is allowed. The abstract method {@link #phraseFreq()} of extending classes
|
||||
* is invoked for each document containing all the phrase query terms, in order to
|
||||
* compute the frequency of the phrase query in that document. A non zero frequency
|
||||
* means a match.
|
||||
*/
|
||||
abstract class PhraseScorer extends Scorer {
|
||||
PhrasePositions min, max;
|
||||
|
||||
protected float freq; //phrase frequency in current doc as computed by phraseFreq().
|
||||
|
||||
final Similarity.SloppySimScorer docScorer;
|
||||
|
||||
PhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
|
||||
Similarity.SloppySimScorer docScorer) {
|
||||
super(weight);
|
||||
this.docScorer = docScorer;
|
||||
|
||||
// convert tps to a list of phrase positions.
|
||||
// note: phrase-position differs from term-position in that its position
|
||||
// reflects the phrase offset: pp.pos = tp.pos - offset.
|
||||
// this allows to easily identify a matching (exact) phrase
|
||||
// when all PhrasePositions have exactly the same position.
|
||||
if (postings.length > 0) {
|
||||
min = new PhrasePositions(postings[0].postings, postings[0].position, 0, postings[0].terms);
|
||||
max = min;
|
||||
max.doc = -1;
|
||||
for (int i = 1; i < postings.length; i++) {
|
||||
PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms);
|
||||
max.next = pp;
|
||||
max = pp;
|
||||
max.doc = -1;
|
||||
}
|
||||
max.next = min; // make it cyclic for easier manipulation
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return max.doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return advance(max.doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float score() throws IOException {
|
||||
return docScorer.score(max.doc, freq);
|
||||
}
|
||||
|
||||
private boolean advanceMin(int target) throws IOException {
|
||||
if (!min.skipTo(target)) {
|
||||
max.doc = NO_MORE_DOCS; // for further calls to docID()
|
||||
return false;
|
||||
}
|
||||
min = min.next; // cyclic
|
||||
max = max.next; // cyclic
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
freq = 0.0f;
|
||||
if (!advanceMin(target)) {
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
boolean restart=false;
|
||||
while (freq == 0.0f) {
|
||||
while (min.doc < max.doc || restart) {
|
||||
restart = false;
|
||||
if (!advanceMin(max.doc)) {
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
}
|
||||
// found a doc with all of the terms
|
||||
freq = phraseFreq(); // check for phrase
|
||||
restart = true;
|
||||
}
|
||||
|
||||
// found a match
|
||||
return max.doc;
|
||||
}
|
||||
|
||||
/**
|
||||
* For a document containing all the phrase query terms, compute the
|
||||
* frequency of the phrase in that document.
|
||||
* A non zero frequency means a match.
|
||||
* <br>Note, that containing all phrase terms does not guarantee a match - they have to be found in matching locations.
|
||||
* @return frequency of the phrase in current doc, 0 if not found.
|
||||
*/
|
||||
abstract float phraseFreq() throws IOException;
|
||||
|
||||
@Override
|
||||
public String toString() { return "scorer(" + weight + ")"; }
|
||||
|
||||
}
|
|
@ -29,7 +29,12 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
|
||||
final class SloppyPhraseScorer extends PhraseScorer {
|
||||
final class SloppyPhraseScorer extends Scorer {
|
||||
private PhrasePositions min, max;
|
||||
|
||||
private float sloppyFreq; //phrase frequency in current doc as computed by phraseFreq().
|
||||
|
||||
private final Similarity.SloppySimScorer docScorer;
|
||||
|
||||
private final int slop;
|
||||
private final int numPostings;
|
||||
|
@ -47,10 +52,28 @@ final class SloppyPhraseScorer extends PhraseScorer {
|
|||
|
||||
SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
|
||||
int slop, Similarity.SloppySimScorer docScorer) {
|
||||
super(weight, postings, docScorer);
|
||||
super(weight);
|
||||
this.docScorer = docScorer;
|
||||
this.slop = slop;
|
||||
this.numPostings = postings==null ? 0 : postings.length;
|
||||
pq = new PhraseQueue(postings.length);
|
||||
// convert tps to a list of phrase positions.
|
||||
// note: phrase-position differs from term-position in that its position
|
||||
// reflects the phrase offset: pp.pos = tp.pos - offset.
|
||||
// this allows to easily identify a matching (exact) phrase
|
||||
// when all PhrasePositions have exactly the same position.
|
||||
if (postings.length > 0) {
|
||||
min = new PhrasePositions(postings[0].postings, postings[0].position, 0, postings[0].terms);
|
||||
max = min;
|
||||
max.doc = -1;
|
||||
for (int i = 1; i < postings.length; i++) {
|
||||
PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms);
|
||||
max.next = pp;
|
||||
max = pp;
|
||||
max.doc = -1;
|
||||
}
|
||||
max.next = min; // make it cyclic for easier manipulation
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -71,8 +94,7 @@ final class SloppyPhraseScorer extends PhraseScorer {
|
|||
* would get same score as "g f"~2, although "c b"~2 could be matched twice.
|
||||
* We may want to fix this in the future (currently not, for performance reasons).
|
||||
*/
|
||||
@Override
|
||||
protected float phraseFreq() throws IOException {
|
||||
private float phraseFreq() throws IOException {
|
||||
if (!initPhrasePositions()) {
|
||||
return 0.0f;
|
||||
}
|
||||
|
@ -489,10 +511,14 @@ final class SloppyPhraseScorer extends PhraseScorer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int freq() throws IOException {
|
||||
public int freq() {
|
||||
return numMatches;
|
||||
}
|
||||
|
||||
float sloppyFreq() {
|
||||
return sloppyFreq;
|
||||
}
|
||||
|
||||
// private void printQueue(PrintStream ps, PhrasePositions ext, String title) {
|
||||
// //if (min.doc != ?) return;
|
||||
// ps.println();
|
||||
|
@ -514,5 +540,54 @@ final class SloppyPhraseScorer extends PhraseScorer {
|
|||
// }
|
||||
// }
|
||||
|
||||
private boolean advanceMin(int target) throws IOException {
|
||||
if (!min.skipTo(target)) {
|
||||
max.doc = NO_MORE_DOCS; // for further calls to docID()
|
||||
return false;
|
||||
}
|
||||
min = min.next; // cyclic
|
||||
max = max.next; // cyclic
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return max.doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return advance(max.doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public float score() {
|
||||
return docScorer.score(max.doc, sloppyFreq);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
sloppyFreq = 0.0f;
|
||||
if (!advanceMin(target)) {
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
boolean restart=false;
|
||||
while (sloppyFreq == 0.0f) {
|
||||
while (min.doc < max.doc || restart) {
|
||||
restart = false;
|
||||
if (!advanceMin(max.doc)) {
|
||||
return NO_MORE_DOCS;
|
||||
}
|
||||
}
|
||||
// found a doc with all of the terms
|
||||
sloppyFreq = phraseFreq(); // check for phrase
|
||||
restart = true;
|
||||
}
|
||||
|
||||
// found a match
|
||||
return max.doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() { return "scorer(" + weight + ")"; }
|
||||
}
|
||||
|
|
|
@ -189,25 +189,6 @@ final class JustCompileSearch {
|
|||
|
||||
}
|
||||
|
||||
static final class JustCompilePhraseScorer extends PhraseScorer {
|
||||
|
||||
JustCompilePhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings,
|
||||
Similarity.SloppySimScorer docScorer) {
|
||||
super(weight, postings, docScorer);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected float phraseFreq() {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int freq() throws IOException {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static final class JustCompileQuery extends Query {
|
||||
|
||||
@Override
|
||||
|
|
Loading…
Reference in New Issue