mirror of https://github.com/apache/lucene.git
- Moved IndexSearcher creation to setSpellIndex(Dictionary) method,
so IndexSearcher is not opened/closed for every suggestion - Fixed typo in method name: setAccuraty -> setAccuracy - Cosmetics, comments and other cleanups git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@424230 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9bc6ba0863
commit
8723cd5585
|
@ -69,28 +69,29 @@ public class SpellChecker {
|
||||||
* Boost value for start and end grams
|
* Boost value for start and end grams
|
||||||
*/
|
*/
|
||||||
private float bStart = 2.0f;
|
private float bStart = 2.0f;
|
||||||
|
|
||||||
private float bEnd = 1.0f;
|
private float bEnd = 1.0f;
|
||||||
|
|
||||||
private IndexReader reader;
|
private IndexReader reader;
|
||||||
|
private IndexSearcher searcher;
|
||||||
|
|
||||||
float min = 0.5f;
|
float min = 0.5f;
|
||||||
|
|
||||||
public void setSpellIndex(Directory spellindex) {
|
public SpellChecker(Directory spellIndex) throws IOException {
|
||||||
|
this.setSpellIndex(spellIndex);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setSpellIndex(Directory spellindex) throws IOException {
|
||||||
this.spellindex = spellindex;
|
this.spellindex = spellindex;
|
||||||
|
searcher = new IndexSearcher(this.spellindex);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the accuracy 0 < min < 1; default 0.5
|
* Sets the accuracy 0 < min < 1; default 0.5
|
||||||
*/
|
*/
|
||||||
public void setAccuraty(float min) {
|
public void setAccuracy(float min) {
|
||||||
this.min = min;
|
this.min = min;
|
||||||
}
|
}
|
||||||
|
|
||||||
public SpellChecker(Directory gramIndex) {
|
|
||||||
this.setSpellIndex(gramIndex);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Suggest similar words
|
* Suggest similar words
|
||||||
* @param word String the word you want a spell check done on
|
* @param word String the word you want a spell check done on
|
||||||
|
@ -123,10 +124,10 @@ public class SpellChecker {
|
||||||
final TRStringDistance sd = new TRStringDistance(word);
|
final TRStringDistance sd = new TRStringDistance(word);
|
||||||
final int lengthWord = word.length();
|
final int lengthWord = word.length();
|
||||||
|
|
||||||
final int goalFreq = (morePopular && ir != null) ? ir.docFreq(new Term(
|
final int goalFreq = (morePopular && ir != null) ? ir.docFreq(new Term(field, word)) : 0;
|
||||||
field, word)) : 0;
|
// if the word exists in the real index and we don't care for word frequency, return the word itself
|
||||||
if (!morePopular && goalFreq > 0) {
|
if (!morePopular && goalFreq > 0) {
|
||||||
return new String[] { word }; // return the word if it exist in the index and i don't want a more popular word
|
return new String[] { word };
|
||||||
}
|
}
|
||||||
|
|
||||||
BooleanQuery query = new BooleanQuery();
|
BooleanQuery query = new BooleanQuery();
|
||||||
|
@ -154,21 +155,22 @@ public class SpellChecker {
|
||||||
for (int i = 0; i < grams.length; i++) {
|
for (int i = 0; i < grams.length; i++) {
|
||||||
add(query, key, grams[i]);
|
add(query, key, grams[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
IndexSearcher searcher = new IndexSearcher(this.spellindex);
|
// System.out.println("Q: " + query);
|
||||||
Hits hits = searcher.search(query);
|
Hits hits = searcher.search(query);
|
||||||
SuggestWordQueue sugqueue = new SuggestWordQueue(num_sug);
|
SuggestWordQueue sugqueue = new SuggestWordQueue(num_sug);
|
||||||
|
|
||||||
int stop = Math.min(hits.length(), 10 * num_sug); // go thru more than 'maxr' matches in case the distance filter triggers
|
// go thru more than 'maxr' matches in case the distance filter triggers
|
||||||
|
int stop = Math.min(hits.length(), 10 * num_sug);
|
||||||
SuggestWord sugword = new SuggestWord();
|
SuggestWord sugword = new SuggestWord();
|
||||||
for (int i = 0; i < stop; i++) {
|
for (int i = 0; i < stop; i++) {
|
||||||
|
|
||||||
sugword.string = hits.doc(i).get(F_WORD); // get orig word)
|
sugword.string = hits.doc(i).get(F_WORD); // get orig word
|
||||||
|
|
||||||
|
// don't suggest a word for itself, that would be silly
|
||||||
if (sugword.string.equals(word)) {
|
if (sugword.string.equals(word)) {
|
||||||
continue; // don't suggest a word for itself, that would be silly
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// edit distance/normalize with the min word length
|
// edit distance/normalize with the min word length
|
||||||
|
@ -180,7 +182,8 @@ public class SpellChecker {
|
||||||
|
|
||||||
if (ir != null) { // use the user index
|
if (ir != null) { // use the user index
|
||||||
sugword.freq = ir.docFreq(new Term(field, sugword.string)); // freq in the index
|
sugword.freq = ir.docFreq(new Term(field, sugword.string)); // freq in the index
|
||||||
if ((morePopular && goalFreq > sugword.freq) || sugword.freq < 1) { // don't suggest a word that is not present in the field
|
// don't suggest a word that is not present in the field
|
||||||
|
if ((morePopular && goalFreq > sugword.freq) || sugword.freq < 1) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -198,15 +201,14 @@ public class SpellChecker {
|
||||||
list[i] = ((SuggestWord) sugqueue.pop()).string;
|
list[i] = ((SuggestWord) sugqueue.pop()).string;
|
||||||
}
|
}
|
||||||
|
|
||||||
searcher.close();
|
|
||||||
return list;
|
return list;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Add a clause to a boolean query.
|
* Add a clause to a boolean query.
|
||||||
*/
|
*/
|
||||||
private static void add(BooleanQuery q, String k, String v, float boost) {
|
private static void add(BooleanQuery q, String name, String value, float boost) {
|
||||||
Query tq = new TermQuery(new Term(k, v));
|
Query tq = new TermQuery(new Term(name, value));
|
||||||
tq.setBoost(boost);
|
tq.setBoost(boost);
|
||||||
q.add(new BooleanClause(tq, BooleanClause.Occur.SHOULD));
|
q.add(new BooleanClause(tq, BooleanClause.Occur.SHOULD));
|
||||||
}
|
}
|
||||||
|
@ -214,9 +216,8 @@ public class SpellChecker {
|
||||||
/**
|
/**
|
||||||
* Add a clause to a boolean query.
|
* Add a clause to a boolean query.
|
||||||
*/
|
*/
|
||||||
private static void add(BooleanQuery q, String k, String v) {
|
private static void add(BooleanQuery q, String name, String value) {
|
||||||
q.add(new BooleanClause(new TermQuery(new Term(k, v)),
|
q.add(new BooleanClause(new TermQuery(new Term(name, value)), BooleanClause.Occur.SHOULD));
|
||||||
BooleanClause.Occur.SHOULD));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -285,10 +286,6 @@ public class SpellChecker {
|
||||||
// close writer
|
// close writer
|
||||||
writer.optimize();
|
writer.optimize();
|
||||||
writer.close();
|
writer.close();
|
||||||
|
|
||||||
// close reader
|
|
||||||
// reader.close();
|
|
||||||
// reader=null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private int getMin(int l) {
|
private int getMin(int l) {
|
||||||
|
@ -325,17 +322,14 @@ public class SpellChecker {
|
||||||
String end = null;
|
String end = null;
|
||||||
for (int i = 0; i < len - ng + 1; i++) {
|
for (int i = 0; i < len - ng + 1; i++) {
|
||||||
String gram = text.substring(i, i + ng);
|
String gram = text.substring(i, i + ng);
|
||||||
doc
|
doc.add(new Field(key, gram, Field.Store.YES, Field.Index.UN_TOKENIZED));
|
||||||
.add(new Field(key, gram, Field.Store.YES, Field.Index.UN_TOKENIZED));
|
|
||||||
if (i == 0) {
|
if (i == 0) {
|
||||||
doc.add(new Field("start" + ng, gram, Field.Store.YES,
|
doc.add(new Field("start" + ng, gram, Field.Store.YES, Field.Index.UN_TOKENIZED));
|
||||||
Field.Index.UN_TOKENIZED));
|
|
||||||
}
|
}
|
||||||
end = gram;
|
end = gram;
|
||||||
}
|
}
|
||||||
if (end != null) { // may not be present if len==ng1
|
if (end != null) { // may not be present if len==ng1
|
||||||
doc.add(new Field("end" + ng, end, Field.Store.YES,
|
doc.add(new Field("end" + ng, end, Field.Store.YES, Field.Index.UN_TOKENIZED));
|
||||||
Field.Index.UN_TOKENIZED));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue