resolve TODO: run the dfas backwards

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1068957 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2011-02-09 16:10:00 +00:00
parent 70a9910b38
commit 762272e48a
1 changed files with 26 additions and 16 deletions

View File

@ -325,22 +325,26 @@ public final class FuzzyTermsEnum extends TermsEnum {
/** finds the smallest Lev(n) DFA that accepts the term. */ /** finds the smallest Lev(n) DFA that accepts the term. */
@Override @Override
protected AcceptStatus accept(BytesRef term) { protected AcceptStatus accept(BytesRef term) {
if (term.equals(termRef)) { // ed = 0 int ed = matchers.length - 1;
boostAtt.setBoost(1.0F);
return AcceptStatus.YES_AND_SEEK;
}
int codePointCount = -1; if (matches(term, ed)) { // we match the outer dfa
// now compute exact edit distance
// TODO: benchmark doing this backwards while (ed > 0) {
for (int i = 1; i < matchers.length; i++) if (matches(term, ed - 1)) {
if (matchers[i].run(term.bytes, term.offset, term.length)) { ed--;
// this sucks, we convert just to score based on length. } else {
if (codePointCount == -1) { break;
codePointCount = UnicodeUtil.codePointCount(term);
} }
final float similarity = 1.0f - ((float) i / (float) }
// scale to a boost and return (if similarity > minSimilarity)
if (ed == 0) { // exact match
boostAtt.setBoost(1.0F);
return AcceptStatus.YES_AND_SEEK;
} else {
final int codePointCount = UnicodeUtil.codePointCount(term);
final float similarity = 1.0f - ((float) ed / (float)
(Math.min(codePointCount, termLength))); (Math.min(codePointCount, termLength)));
if (similarity > minSimilarity) { if (similarity > minSimilarity) {
boostAtt.setBoost((similarity - minSimilarity) * scale_factor); boostAtt.setBoost((similarity - minSimilarity) * scale_factor);
@ -349,8 +353,14 @@ public final class FuzzyTermsEnum extends TermsEnum {
return AcceptStatus.NO_AND_SEEK; return AcceptStatus.NO_AND_SEEK;
} }
} }
} else {
return AcceptStatus.NO_AND_SEEK; return AcceptStatus.NO_AND_SEEK;
}
}
/** returns true if term is within k edits of the query term */
final boolean matches(BytesRef term, int k) {
return k == 0 ? term.equals(termRef) : matchers[k].run(term.bytes, term.offset, term.length);
} }
/** defers to superclass, except can start at an arbitrary location */ /** defers to superclass, except can start at an arbitrary location */