mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-20 03:45:02 +00:00
Move analysis helper methods to DirectCandidateGenerator
This commit is contained in:
parent
d6e16b6e74
commit
cdc77648a1
@ -18,7 +18,6 @@
|
||||
*/
|
||||
package org.elasticsearch.search.suggest;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
@ -29,11 +28,7 @@ import org.apache.lucene.search.spell.SuggestWordFrequencyComparator;
|
||||
import org.apache.lucene.search.spell.SuggestWordQueue;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.io.FastCharArrayReader;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Comparator;
|
||||
@ -103,44 +98,6 @@ public final class SuggestUtils {
|
||||
public void end() {}
|
||||
}
|
||||
|
||||
public static int analyze(Analyzer analyzer, BytesRef toAnalyze, String field, TokenConsumer consumer, CharsRefBuilder spare) throws IOException {
|
||||
spare.copyUTF8Bytes(toAnalyze);
|
||||
return analyze(analyzer, spare.get(), field, consumer);
|
||||
}
|
||||
|
||||
public static int analyze(Analyzer analyzer, CharsRef toAnalyze, String field, TokenConsumer consumer) throws IOException {
|
||||
try (TokenStream ts = analyzer.tokenStream(
|
||||
field, new FastCharArrayReader(toAnalyze.chars, toAnalyze.offset, toAnalyze.length))) {
|
||||
return analyze(ts, consumer);
|
||||
}
|
||||
}
|
||||
|
||||
/** NOTE: this method closes the TokenStream, even on exception, which is awkward
|
||||
* because really the caller who called {@link Analyzer#tokenStream} should close it,
|
||||
* but when trying that there are recursion issues when we try to use the same
|
||||
* TokenStream twice in the same recursion... */
|
||||
public static int analyze(TokenStream stream, TokenConsumer consumer) throws IOException {
|
||||
int numTokens = 0;
|
||||
boolean success = false;
|
||||
try {
|
||||
stream.reset();
|
||||
consumer.reset(stream);
|
||||
while (stream.incrementToken()) {
|
||||
consumer.nextToken();
|
||||
numTokens++;
|
||||
}
|
||||
consumer.end();
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
stream.close();
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(stream);
|
||||
}
|
||||
}
|
||||
return numTokens;
|
||||
}
|
||||
|
||||
public static class Fields {
|
||||
public static final ParseField STRING_DISTANCE = new ParseField("string_distance");
|
||||
public static final ParseField SUGGEST_MODE = new ParseField("suggest_mode");
|
||||
|
@ -19,6 +19,7 @@
|
||||
package org.elasticsearch.search.suggest.phrase;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.Term;
|
||||
@ -29,8 +30,12 @@ import org.apache.lucene.search.spell.SuggestMode;
|
||||
import org.apache.lucene.search.spell.SuggestWord;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.elasticsearch.common.io.FastCharArrayReader;
|
||||
import org.elasticsearch.search.suggest.SuggestUtils;
|
||||
import org.elasticsearch.search.suggest.SuggestUtils.TokenConsumer;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
@ -44,7 +49,7 @@ import static java.lang.Math.log10;
|
||||
import static java.lang.Math.max;
|
||||
import static java.lang.Math.round;
|
||||
|
||||
final class DirectCandidateGenerator extends CandidateGenerator {
|
||||
public final class DirectCandidateGenerator extends CandidateGenerator {
|
||||
|
||||
private final DirectSpellChecker spellchecker;
|
||||
private final String field;
|
||||
@ -140,7 +145,7 @@ final class DirectCandidateGenerator extends CandidateGenerator {
|
||||
return term;
|
||||
}
|
||||
final BytesRefBuilder result = byteSpare;
|
||||
SuggestUtils.analyze(preFilter, term, field, new SuggestUtils.TokenConsumer() {
|
||||
analyze(preFilter, term, field, new SuggestUtils.TokenConsumer() {
|
||||
|
||||
@Override
|
||||
public void nextToken() throws IOException {
|
||||
@ -156,7 +161,7 @@ final class DirectCandidateGenerator extends CandidateGenerator {
|
||||
candidates.add(candidate);
|
||||
} else {
|
||||
final BytesRefBuilder result = byteSpare;
|
||||
SuggestUtils.analyze(postFilter, candidate.term, field, new SuggestUtils.TokenConsumer() {
|
||||
analyze(postFilter, candidate.term, field, new SuggestUtils.TokenConsumer() {
|
||||
@Override
|
||||
public void nextToken() throws IOException {
|
||||
this.fillBytesRef(result);
|
||||
@ -283,4 +288,39 @@ final class DirectCandidateGenerator extends CandidateGenerator {
|
||||
return new Candidate(term, frequency, channelScore, score(frequency, channelScore, dictSize), userInput);
|
||||
}
|
||||
|
||||
public static int analyze(Analyzer analyzer, BytesRef toAnalyze, String field, TokenConsumer consumer, CharsRefBuilder spare) throws IOException {
|
||||
spare.copyUTF8Bytes(toAnalyze);
|
||||
CharsRef charsRef = spare.get();
|
||||
try (TokenStream ts = analyzer.tokenStream(
|
||||
field, new FastCharArrayReader(charsRef.chars, charsRef.offset, charsRef.length))) {
|
||||
return analyze(ts, consumer);
|
||||
}
|
||||
}
|
||||
|
||||
/** NOTE: this method closes the TokenStream, even on exception, which is awkward
|
||||
* because really the caller who called {@link Analyzer#tokenStream} should close it,
|
||||
* but when trying that there are recursion issues when we try to use the same
|
||||
* TokenStream twice in the same recursion... */
|
||||
public static int analyze(TokenStream stream, TokenConsumer consumer) throws IOException {
|
||||
int numTokens = 0;
|
||||
boolean success = false;
|
||||
try {
|
||||
stream.reset();
|
||||
consumer.reset(stream);
|
||||
while (stream.incrementToken()) {
|
||||
consumer.nextToken();
|
||||
numTokens++;
|
||||
}
|
||||
consumer.end();
|
||||
success = true;
|
||||
} finally {
|
||||
if (success) {
|
||||
stream.close();
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(stream);
|
||||
}
|
||||
}
|
||||
return numTokens;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -63,7 +63,7 @@ public final class NoisyChannelSpellChecker {
|
||||
float maxErrors, int numCorrections, WordScorer wordScorer, float confidence, int gramSize) throws IOException {
|
||||
|
||||
final List<CandidateSet> candidateSetsList = new ArrayList<>();
|
||||
SuggestUtils.analyze(stream, new SuggestUtils.TokenConsumer() {
|
||||
DirectCandidateGenerator.analyze(stream, new SuggestUtils.TokenConsumer() {
|
||||
CandidateSet currentSet = null;
|
||||
private TypeAttribute typeAttribute;
|
||||
private final BytesRefBuilder termsRef = new BytesRefBuilder();
|
||||
|
@ -34,6 +34,7 @@ import org.elasticsearch.search.suggest.SuggestUtils;
|
||||
import org.elasticsearch.search.suggest.Suggester;
|
||||
import org.elasticsearch.search.suggest.SuggestionBuilder;
|
||||
import org.elasticsearch.search.suggest.SuggestionSearchContext.SuggestionContext;
|
||||
import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
@ -73,7 +74,7 @@ public final class TermSuggester extends Suggester<TermSuggestionContext> {
|
||||
private List<Token> queryTerms(SuggestionContext suggestion, CharsRefBuilder spare) throws IOException {
|
||||
final List<Token> result = new ArrayList<>();
|
||||
final String field = suggestion.getField();
|
||||
SuggestUtils.analyze(suggestion.getAnalyzer(), suggestion.getText(), field, new SuggestUtils.TokenConsumer() {
|
||||
DirectCandidateGenerator.analyze(suggestion.getAnalyzer(), suggestion.getText(), field, new SuggestUtils.TokenConsumer() {
|
||||
@Override
|
||||
public void nextToken() {
|
||||
Term term = new Term(field, BytesRef.deepCopyOf(fillBytesRef(new BytesRefBuilder())));
|
||||
|
Loading…
x
Reference in New Issue
Block a user