mirror of https://github.com/apache/lucene.git
resolve TODO: run the dfas backwards
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1068957 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
70a9910b38
commit
762272e48a
|
@ -326,21 +326,25 @@ public final class FuzzyTermsEnum extends TermsEnum {
|
||||||
/** finds the smallest Lev(n) DFA that accepts the term. */
|
/** finds the smallest Lev(n) DFA that accepts the term. */
|
||||||
@Override
|
@Override
|
||||||
protected AcceptStatus accept(BytesRef term) {
|
protected AcceptStatus accept(BytesRef term) {
|
||||||
if (term.equals(termRef)) { // ed = 0
|
int ed = matchers.length - 1;
|
||||||
|
|
||||||
|
if (matches(term, ed)) { // we match the outer dfa
|
||||||
|
// now compute exact edit distance
|
||||||
|
while (ed > 0) {
|
||||||
|
if (matches(term, ed - 1)) {
|
||||||
|
ed--;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// scale to a boost and return (if similarity > minSimilarity)
|
||||||
|
if (ed == 0) { // exact match
|
||||||
boostAtt.setBoost(1.0F);
|
boostAtt.setBoost(1.0F);
|
||||||
return AcceptStatus.YES_AND_SEEK;
|
return AcceptStatus.YES_AND_SEEK;
|
||||||
}
|
} else {
|
||||||
|
final int codePointCount = UnicodeUtil.codePointCount(term);
|
||||||
int codePointCount = -1;
|
final float similarity = 1.0f - ((float) ed / (float)
|
||||||
|
|
||||||
// TODO: benchmark doing this backwards
|
|
||||||
for (int i = 1; i < matchers.length; i++)
|
|
||||||
if (matchers[i].run(term.bytes, term.offset, term.length)) {
|
|
||||||
// this sucks, we convert just to score based on length.
|
|
||||||
if (codePointCount == -1) {
|
|
||||||
codePointCount = UnicodeUtil.codePointCount(term);
|
|
||||||
}
|
|
||||||
final float similarity = 1.0f - ((float) i / (float)
|
|
||||||
(Math.min(codePointCount, termLength)));
|
(Math.min(codePointCount, termLength)));
|
||||||
if (similarity > minSimilarity) {
|
if (similarity > minSimilarity) {
|
||||||
boostAtt.setBoost((similarity - minSimilarity) * scale_factor);
|
boostAtt.setBoost((similarity - minSimilarity) * scale_factor);
|
||||||
|
@ -349,9 +353,15 @@ public final class FuzzyTermsEnum extends TermsEnum {
|
||||||
return AcceptStatus.NO_AND_SEEK;
|
return AcceptStatus.NO_AND_SEEK;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
return AcceptStatus.NO_AND_SEEK;
|
return AcceptStatus.NO_AND_SEEK;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** returns true if term is within k edits of the query term */
|
||||||
|
final boolean matches(BytesRef term, int k) {
|
||||||
|
return k == 0 ? term.equals(termRef) : matchers[k].run(term.bytes, term.offset, term.length);
|
||||||
|
}
|
||||||
|
|
||||||
/** defers to superclass, except can start at an arbitrary location */
|
/** defers to superclass, except can start at an arbitrary location */
|
||||||
@Override
|
@Override
|
||||||
|
|
Loading…
Reference in New Issue