mirror of https://github.com/apache/lucene.git
resolve TODO: run the dfas backwards
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1068957 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
70a9910b38
commit
762272e48a
|
@ -326,21 +326,25 @@ public final class FuzzyTermsEnum extends TermsEnum {
|
|||
/** finds the smallest Lev(n) DFA that accepts the term. */
|
||||
@Override
|
||||
protected AcceptStatus accept(BytesRef term) {
|
||||
if (term.equals(termRef)) { // ed = 0
|
||||
int ed = matchers.length - 1;
|
||||
|
||||
if (matches(term, ed)) { // we match the outer dfa
|
||||
// now compute exact edit distance
|
||||
while (ed > 0) {
|
||||
if (matches(term, ed - 1)) {
|
||||
ed--;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// scale to a boost and return (if similarity > minSimilarity)
|
||||
if (ed == 0) { // exact match
|
||||
boostAtt.setBoost(1.0F);
|
||||
return AcceptStatus.YES_AND_SEEK;
|
||||
}
|
||||
|
||||
int codePointCount = -1;
|
||||
|
||||
// TODO: benchmark doing this backwards
|
||||
for (int i = 1; i < matchers.length; i++)
|
||||
if (matchers[i].run(term.bytes, term.offset, term.length)) {
|
||||
// this sucks, we convert just to score based on length.
|
||||
if (codePointCount == -1) {
|
||||
codePointCount = UnicodeUtil.codePointCount(term);
|
||||
}
|
||||
final float similarity = 1.0f - ((float) i / (float)
|
||||
} else {
|
||||
final int codePointCount = UnicodeUtil.codePointCount(term);
|
||||
final float similarity = 1.0f - ((float) ed / (float)
|
||||
(Math.min(codePointCount, termLength)));
|
||||
if (similarity > minSimilarity) {
|
||||
boostAtt.setBoost((similarity - minSimilarity) * scale_factor);
|
||||
|
@ -349,9 +353,15 @@ public final class FuzzyTermsEnum extends TermsEnum {
|
|||
return AcceptStatus.NO_AND_SEEK;
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
return AcceptStatus.NO_AND_SEEK;
|
||||
}
|
||||
}
|
||||
|
||||
/** returns true if term is within k edits of the query term */
|
||||
final boolean matches(BytesRef term, int k) {
|
||||
return k == 0 ? term.equals(termRef) : matchers[k].run(term.bytes, term.offset, term.length);
|
||||
}
|
||||
|
||||
/** defers to superclass, except can start at an arbitrary location */
|
||||
@Override
|
||||
|
|
Loading…
Reference in New Issue