mirror of https://github.com/apache/lucene.git
LUCENE-1297 - Allow other string distance measures for the SpellChecker
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@669085 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4adc3f7ba6
commit
ce19aa28c0
|
@ -186,6 +186,9 @@ New features
|
||||||
|
|
||||||
16. LUCENE-1298: MoreLikeThis can now accept a custom Similarity (Grant Ingersoll)
|
16. LUCENE-1298: MoreLikeThis can now accept a custom Similarity (Grant Ingersoll)
|
||||||
|
|
||||||
|
17. LUCENE-1297: Allow other string distance measures for the SpellChecker
|
||||||
|
(Thomas Morton via Otis Gospodnetic)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
||||||
1. LUCENE-705: When building a compound file, use
|
1. LUCENE-705: When building a compound file, use
|
||||||
|
|
|
@ -0,0 +1,112 @@
|
||||||
|
package org.apache.lucene.search.spell;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
public class JaroWinklerDistance implements StringDistance {
|
||||||
|
|
||||||
|
private float threshold = 0.7f;
|
||||||
|
|
||||||
|
private int[] matches(String s1, String s2) {
|
||||||
|
String max, min;
|
||||||
|
if (s1.length() > s2.length()) {
|
||||||
|
max = s1;
|
||||||
|
min = s2;
|
||||||
|
} else {
|
||||||
|
max = s2;
|
||||||
|
min = s1;
|
||||||
|
}
|
||||||
|
int range = Math.max(max.length() / 2 - 1, 0);
|
||||||
|
int[] matchIndexes = new int[min.length()];
|
||||||
|
Arrays.fill(matchIndexes, -1);
|
||||||
|
boolean[] matchFlags = new boolean[max.length()];
|
||||||
|
int matches = 0;
|
||||||
|
for (int mi = 0; mi < min.length(); mi++) {
|
||||||
|
char c1 = min.charAt(mi);
|
||||||
|
for (int xi = Math.max(mi - range, 0), xn = Math.min(mi + range + 1, max
|
||||||
|
.length()); xi < xn; xi++) {
|
||||||
|
if (!matchFlags[xi] && c1 == max.charAt(xi)) {
|
||||||
|
matchIndexes[mi] = xi;
|
||||||
|
matchFlags[xi] = true;
|
||||||
|
matches++;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
char[] ms1 = new char[matches];
|
||||||
|
char[] ms2 = new char[matches];
|
||||||
|
for (int i = 0, si = 0; i < min.length(); i++) {
|
||||||
|
if (matchIndexes[i] != -1) {
|
||||||
|
ms1[si] = min.charAt(i);
|
||||||
|
si++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (int i = 0, si = 0; i < max.length(); i++) {
|
||||||
|
if (matchFlags[i]) {
|
||||||
|
ms2[si] = max.charAt(i);
|
||||||
|
si++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int transpositions = 0;
|
||||||
|
for (int mi = 0; mi < ms1.length; mi++) {
|
||||||
|
if (ms1[mi] != ms2[mi]) {
|
||||||
|
transpositions++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int prefix = 0;
|
||||||
|
for (int mi = 0; mi < min.length(); mi++) {
|
||||||
|
if (s1.charAt(mi) == s2.charAt(mi)) {
|
||||||
|
prefix++;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new int[] { matches, transpositions / 2, prefix, max.length() };
|
||||||
|
}
|
||||||
|
|
||||||
|
public float getDistance(String s1, String s2) {
|
||||||
|
int[] mtp = matches(s1, s2);
|
||||||
|
float m = (float) mtp[0];
|
||||||
|
if (m == 0) {
|
||||||
|
return 0f;
|
||||||
|
}
|
||||||
|
float j = ((m / s1.length() + m / s2.length() + (m - mtp[1]) / m)) / 3;
|
||||||
|
float jw = j < getThreshold() ? j : j + Math.min(0.1f, 1f / mtp[3]) * mtp[2]
|
||||||
|
* (1 - j);
|
||||||
|
return jw;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the threshold used to determine when Winkler bonus should be used.
|
||||||
|
* Set to a negative value to get the Jaro distance.
|
||||||
|
* @param threshold the new value of the threshold
|
||||||
|
*/
|
||||||
|
public void setThreshold(float threshold) {
|
||||||
|
this.threshold = threshold;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the current value of the threshold used for adding the Winkler bonus.
|
||||||
|
* The deafult value is 0.7.
|
||||||
|
* @return the current value of the threshold
|
||||||
|
*/
|
||||||
|
public float getThreshold() {
|
||||||
|
return threshold;
|
||||||
|
}
|
||||||
|
}
|
|
@ -76,6 +76,8 @@ public class SpellChecker {
|
||||||
|
|
||||||
// minimum score for hits generated by the spell checker query
|
// minimum score for hits generated by the spell checker query
|
||||||
private float minScore = 0.5f;
|
private float minScore = 0.5f;
|
||||||
|
|
||||||
|
private StringDistance sd;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Use the given directory as a spell checker index. The directory
|
* Use the given directory as a spell checker index. The directory
|
||||||
|
@ -84,10 +86,15 @@ public class SpellChecker {
|
||||||
* @param spellIndex
|
* @param spellIndex
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
public SpellChecker(Directory spellIndex) throws IOException {
|
public SpellChecker(Directory spellIndex,StringDistance sd) throws IOException {
|
||||||
this.setSpellIndex(spellIndex);
|
this.setSpellIndex(spellIndex);
|
||||||
|
this.setStringDistance(sd);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public SpellChecker(Directory spellIndex) throws IOException {
|
||||||
|
this(spellIndex,new TRStringDistance());
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Use a different index as the spell checker index or re-open
|
* Use a different index as the spell checker index or re-open
|
||||||
* the existing index if <code>spellIndex</code> is the same value
|
* the existing index if <code>spellIndex</code> is the same value
|
||||||
|
@ -108,6 +115,11 @@ public class SpellChecker {
|
||||||
}
|
}
|
||||||
searcher = new IndexSearcher(this.spellIndex);
|
searcher = new IndexSearcher(this.spellIndex);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setStringDistance(StringDistance sd) {
|
||||||
|
this.sd = sd;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets the accuracy 0 < minScore < 1; default 0.5
|
* Sets the accuracy 0 < minScore < 1; default 0.5
|
||||||
|
@ -163,7 +175,6 @@ public class SpellChecker {
|
||||||
String field, boolean morePopular) throws IOException {
|
String field, boolean morePopular) throws IOException {
|
||||||
|
|
||||||
float min = this.minScore;
|
float min = this.minScore;
|
||||||
final TRStringDistance sd = new TRStringDistance(word);
|
|
||||||
final int lengthWord = word.length();
|
final int lengthWord = word.length();
|
||||||
|
|
||||||
final int freq = (ir != null && field != null) ? ir.docFreq(new Term(field, word)) : 0;
|
final int freq = (ir != null && field != null) ? ir.docFreq(new Term(field, word)) : 0;
|
||||||
|
@ -217,9 +228,8 @@ public class SpellChecker {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// edit distance/normalize with the minScore word length
|
// edit distance
|
||||||
sugWord.score = 1.0f - ((float) sd.getDistance(sugWord.string) / Math
|
sugWord.score = sd.getDistance(word,sugWord.string);
|
||||||
.min(sugWord.string.length(), lengthWord));
|
|
||||||
if (sugWord.score < min) {
|
if (sugWord.score < min) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,35 @@
|
||||||
|
package org.apache.lucene.search.spell;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Interface for string distances.
|
||||||
|
*/
|
||||||
|
public interface StringDistance {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a float between 0 and 1 based on how similar the specified strings are to one another.
|
||||||
|
* Returning a value of 0 means the specified strings are identical and 1 means the
|
||||||
|
* string are maximally different.
|
||||||
|
* @param s1 The first string.
|
||||||
|
* @param s2 The second string.
|
||||||
|
* @return a float between 0 and 1 based on how similar the specified strings are to one another.
|
||||||
|
*/
|
||||||
|
public float getDistance(String s1,String s2);
|
||||||
|
|
||||||
|
}
|
|
@ -18,34 +18,28 @@ package org.apache.lucene.search.spell;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Edit distance class.
|
* Levenshtein edit distance class.
|
||||||
* Note: this class is not thread-safe.
|
|
||||||
*/
|
*/
|
||||||
final class TRStringDistance {
|
final class TRStringDistance implements StringDistance {
|
||||||
|
|
||||||
final char[] sa;
|
|
||||||
final int n;
|
|
||||||
int p[]; //'previous' cost array, horizontally
|
|
||||||
int d[]; // cost array, horizontally
|
|
||||||
int _d[]; //placeholder to assist in swapping p and d
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Optimized to run a bit faster than the static getDistance().
|
* Optimized to run a bit faster than the static getDistance().
|
||||||
* In one benchmark times were 5.3sec using ctr vs 8.5sec w/ static method, thus 37% faster.
|
* In one benchmark times were 5.3sec using ctr vs 8.5sec w/ static method, thus 37% faster.
|
||||||
*/
|
*/
|
||||||
public TRStringDistance (String target) {
|
public TRStringDistance () {
|
||||||
sa = target.toCharArray();
|
|
||||||
n = sa.length;
|
|
||||||
p = new int[n+1]; //'previous' cost array, horizontally
|
|
||||||
d = new int[n+1]; // cost array, horizontally
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
//*****************************
|
//*****************************
|
||||||
// Compute Levenshtein distance: see org.apache.commons.lang.StringUtils#getLevenshteinDistance(String, String)
|
// Compute Levenshtein distance: see org.apache.commons.lang.StringUtils#getLevenshteinDistance(String, String)
|
||||||
//*****************************
|
//*****************************
|
||||||
public final int getDistance (String other) {
|
public float getDistance (String target, String other) {
|
||||||
|
char[] sa;
|
||||||
|
int n;
|
||||||
|
int p[]; //'previous' cost array, horizontally
|
||||||
|
int d[]; // cost array, horizontally
|
||||||
|
int _d[]; //placeholder to assist in swapping p and d
|
||||||
|
|
||||||
/*
|
/*
|
||||||
The difference between this impl. and the previous is that, rather
|
The difference between this impl. and the previous is that, rather
|
||||||
than creating and retaining a matrix of size s.length()+1 by t.length()+1,
|
than creating and retaining a matrix of size s.length()+1 by t.length()+1,
|
||||||
|
@ -63,12 +57,17 @@ final class TRStringDistance {
|
||||||
cause an out of memory condition when calculating the LD over two very large strings.
|
cause an out of memory condition when calculating the LD over two very large strings.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
sa = target.toCharArray();
|
||||||
|
n = sa.length;
|
||||||
|
p = new int[n+1];
|
||||||
|
d = new int[n+1];
|
||||||
|
|
||||||
final int m = other.length();
|
final int m = other.length();
|
||||||
|
|
||||||
if (n == 0) {
|
if (n == 0) {
|
||||||
return m;
|
return 1;
|
||||||
} else if (m == 0) {
|
} else if (m == 0) {
|
||||||
return n;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// indexes into strings s and t
|
// indexes into strings s and t
|
||||||
|
@ -101,7 +100,7 @@ final class TRStringDistance {
|
||||||
|
|
||||||
// our last action in the above loop was to switch d and p, so p now
|
// our last action in the above loop was to switch d and p, so p now
|
||||||
// actually has the most recent cost counts
|
// actually has the most recent cost counts
|
||||||
return p[n];
|
return 1.0f - ((float) p[n] / Math.min(other.length(), sa.length));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,49 @@
|
||||||
|
package org.apache.lucene.search.spell;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
public class TestJaroWinklerDistance extends TestCase {
|
||||||
|
|
||||||
|
private StringDistance sd = new JaroWinklerDistance();
|
||||||
|
|
||||||
|
public void testGetDistance() {
|
||||||
|
float d = sd.getDistance("al", "al");
|
||||||
|
assertTrue(d == 1.0f);
|
||||||
|
d = sd.getDistance("martha", "marhta");
|
||||||
|
assertTrue(d > 0.961 && d <0.962);
|
||||||
|
d = sd.getDistance("jones", "johnson");
|
||||||
|
assertTrue(d > 0.832 && d < 0.833);
|
||||||
|
d = sd.getDistance("abcvwxyz", "cabvwxyz");
|
||||||
|
assertTrue(d > 0.958 && d < 0.959);
|
||||||
|
d = sd.getDistance("dwayne", "duane");
|
||||||
|
assertTrue(d > 0.84 && d < 0.841);
|
||||||
|
d = sd.getDistance("dixon", "dicksonx");
|
||||||
|
assertTrue(d > 0.813 && d < 0.814);
|
||||||
|
d = sd.getDistance("fvie", "ten");
|
||||||
|
assertTrue(d == 0f);
|
||||||
|
float d1 = sd.getDistance("zac ephron", "zac efron");
|
||||||
|
float d2 = sd.getDistance("zac ephron", "kai ephron");
|
||||||
|
assertTrue(d1 > d2);
|
||||||
|
d1 = sd.getDistance("brittney spears", "britney spears");
|
||||||
|
d2 = sd.getDistance("brittney spears", "brittney startzman");
|
||||||
|
assertTrue(d1 > d2);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,49 @@
|
||||||
|
package org.apache.lucene.search.spell;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
public class TestLevenshteinDistance extends TestCase {
|
||||||
|
|
||||||
|
private StringDistance sd = new TRStringDistance();
|
||||||
|
|
||||||
|
public void testGetDistance() {
|
||||||
|
float d = sd.getDistance("al", "al");
|
||||||
|
assertTrue(d == 1.0f);
|
||||||
|
d = sd.getDistance("martha", "marhta");
|
||||||
|
assertTrue(d > 0.66 && d <0.67);
|
||||||
|
d = sd.getDistance("jones", "johnson");
|
||||||
|
assertTrue(d > 0.199 && d < 0.201);
|
||||||
|
d = sd.getDistance("abcvwxyz", "cabvwxyz");
|
||||||
|
assertTrue(d > 0.749 && d < 0.751);
|
||||||
|
d = sd.getDistance("dwayne", "duane");
|
||||||
|
assertTrue(d > 0.599 && d < 0.601);
|
||||||
|
d = sd.getDistance("dixon", "dicksonx");
|
||||||
|
assertTrue(d > 0.199 && d < 0.201);
|
||||||
|
d = sd.getDistance("six", "ten");
|
||||||
|
assertTrue(d == 0f);
|
||||||
|
float d1 = sd.getDistance("zac ephron", "zac efron");
|
||||||
|
float d2 = sd.getDistance("zac ephron", "kai ephron");
|
||||||
|
assertTrue(d1 < d2);
|
||||||
|
d1 = sd.getDistance("brittney spears", "britney spears");
|
||||||
|
d2 = sd.getDistance("brittney spears", "brittney startzman");
|
||||||
|
assertTrue(d1 > d2);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -43,7 +43,7 @@ public class TestSpellChecker extends TestCase {
|
||||||
|
|
||||||
protected void setUp() throws Exception {
|
protected void setUp() throws Exception {
|
||||||
super.setUp();
|
super.setUp();
|
||||||
|
|
||||||
//create a user index
|
//create a user index
|
||||||
userindex = new RAMDirectory();
|
userindex = new RAMDirectory();
|
||||||
IndexWriter writer = new IndexWriter(userindex, new SimpleAnalyzer(), true);
|
IndexWriter writer = new IndexWriter(userindex, new SimpleAnalyzer(), true);
|
||||||
|
@ -75,6 +75,46 @@ public class TestSpellChecker extends TestCase {
|
||||||
|
|
||||||
assertEquals(num_field2, num_field1 + 1);
|
assertEquals(num_field2, num_field1 + 1);
|
||||||
|
|
||||||
|
checkCommonSuggestions(r);
|
||||||
|
checkLevenshteinSuggestions(r);
|
||||||
|
|
||||||
|
spellChecker.setStringDistance(new JaroWinklerDistance());
|
||||||
|
spellChecker.setAccuracy(0.8f);
|
||||||
|
checkCommonSuggestions(r);
|
||||||
|
checkJaroWinklerSuggestions();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkCommonSuggestions(IndexReader r) throws IOException {
|
||||||
|
String[] similar = spellChecker.suggestSimilar("fvie", 2);
|
||||||
|
assertTrue(similar.length > 0);
|
||||||
|
assertEquals(similar[0], "five");
|
||||||
|
|
||||||
|
similar = spellChecker.suggestSimilar("five", 2);
|
||||||
|
if (similar.length > 0) {
|
||||||
|
assertFalse(similar[0].equals("five")); // don't suggest a word for itself
|
||||||
|
}
|
||||||
|
|
||||||
|
similar = spellChecker.suggestSimilar("fiv", 2);
|
||||||
|
assertTrue(similar.length > 0);
|
||||||
|
assertEquals(similar[0], "five");
|
||||||
|
|
||||||
|
similar = spellChecker.suggestSimilar("fives", 2);
|
||||||
|
assertTrue(similar.length > 0);
|
||||||
|
assertEquals(similar[0], "five");
|
||||||
|
|
||||||
|
assertTrue(similar.length > 0);
|
||||||
|
similar = spellChecker.suggestSimilar("fie", 2);
|
||||||
|
assertEquals(similar[0], "five");
|
||||||
|
|
||||||
|
// test restraint to a field
|
||||||
|
similar = spellChecker.suggestSimilar("tousand", 10, r, "field1", false);
|
||||||
|
assertEquals(0, similar.length); // there isn't the term thousand in the field field1
|
||||||
|
|
||||||
|
similar = spellChecker.suggestSimilar("tousand", 10, r, "field2", false);
|
||||||
|
assertEquals(1, similar.length); // there is the term thousand in the field field2
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkLevenshteinSuggestions(IndexReader r) throws IOException {
|
||||||
// test small word
|
// test small word
|
||||||
String[] similar = spellChecker.suggestSimilar("fvie", 2);
|
String[] similar = spellChecker.suggestSimilar("fvie", 2);
|
||||||
assertEquals(1, similar.length);
|
assertEquals(1, similar.length);
|
||||||
|
@ -109,14 +149,21 @@ public class TestSpellChecker extends TestCase {
|
||||||
|
|
||||||
similar = spellChecker.suggestSimilar("tousand", 10, r, "field2", false);
|
similar = spellChecker.suggestSimilar("tousand", 10, r, "field2", false);
|
||||||
assertEquals(1, similar.length); // there is the term thousand in the field field2
|
assertEquals(1, similar.length); // there is the term thousand in the field field2
|
||||||
|
|
||||||
|
similar = spellChecker.suggestSimilar("onety", 2);
|
||||||
|
assertEquals(1, similar.length);
|
||||||
|
assertEquals(similar[0], "ninety");
|
||||||
try {
|
try {
|
||||||
similar = spellChecker.suggestSimilar("tousand", 10, r, null, false);
|
similar = spellChecker.suggestSimilar("tousand", 10, r, null, false);
|
||||||
} catch (NullPointerException e) {
|
} catch (NullPointerException e) {
|
||||||
assertTrue("threw an NPE, and it shouldn't have", false);
|
assertTrue("threw an NPE, and it shouldn't have", false);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkJaroWinklerSuggestions() throws IOException {
|
||||||
|
String[] similar = spellChecker.suggestSimilar("onety", 2);
|
||||||
|
assertEquals(2, similar.length);
|
||||||
|
assertEquals(similar[0], "one");
|
||||||
}
|
}
|
||||||
|
|
||||||
private void addwords(IndexReader r, String field) throws IOException {
|
private void addwords(IndexReader r, String field) throws IOException {
|
||||||
|
|
Loading…
Reference in New Issue