diff --git a/core/src/main/java/org/elasticsearch/search/suggest/SuggestUtils.java b/core/src/main/java/org/elasticsearch/search/suggest/SuggestUtils.java index f3a034cda61..744552e7ea4 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/SuggestUtils.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/SuggestUtils.java @@ -18,7 +18,6 @@ */ package org.elasticsearch.search.suggest; -import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; @@ -29,11 +28,7 @@ import org.apache.lucene.search.spell.SuggestWordFrequencyComparator; import org.apache.lucene.search.spell.SuggestWordQueue; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; -import org.apache.lucene.util.CharsRef; -import org.apache.lucene.util.CharsRefBuilder; -import org.apache.lucene.util.IOUtils; import org.elasticsearch.common.ParseField; -import org.elasticsearch.common.io.FastCharArrayReader; import java.io.IOException; import java.util.Comparator; @@ -103,44 +98,6 @@ public final class SuggestUtils { public void end() {} } - public static int analyze(Analyzer analyzer, BytesRef toAnalyze, String field, TokenConsumer consumer, CharsRefBuilder spare) throws IOException { - spare.copyUTF8Bytes(toAnalyze); - return analyze(analyzer, spare.get(), field, consumer); - } - - public static int analyze(Analyzer analyzer, CharsRef toAnalyze, String field, TokenConsumer consumer) throws IOException { - try (TokenStream ts = analyzer.tokenStream( - field, new FastCharArrayReader(toAnalyze.chars, toAnalyze.offset, toAnalyze.length))) { - return analyze(ts, consumer); - } - } - - /** NOTE: this method closes the TokenStream, even on exception, which is awkward - * because really the caller who called {@link Analyzer#tokenStream} should close it, - * but when trying that there are recursion issues when we try to use the same - * TokenStream twice in the same recursion... */ - public static int analyze(TokenStream stream, TokenConsumer consumer) throws IOException { - int numTokens = 0; - boolean success = false; - try { - stream.reset(); - consumer.reset(stream); - while (stream.incrementToken()) { - consumer.nextToken(); - numTokens++; - } - consumer.end(); - success = true; - } finally { - if (success) { - stream.close(); - } else { - IOUtils.closeWhileHandlingException(stream); - } - } - return numTokens; - } - public static class Fields { public static final ParseField STRING_DISTANCE = new ParseField("string_distance"); public static final ParseField SUGGEST_MODE = new ParseField("suggest_mode"); diff --git a/core/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGenerator.java b/core/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGenerator.java index 67fed51b622..82ae3a02d33 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGenerator.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGenerator.java @@ -19,6 +19,7 @@ package org.elasticsearch.search.suggest.phrase; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.Term; @@ -29,8 +30,12 @@ import org.apache.lucene.search.spell.SuggestMode; import org.apache.lucene.search.spell.SuggestWord; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; +import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.CharsRefBuilder; +import org.apache.lucene.util.IOUtils; +import org.elasticsearch.common.io.FastCharArrayReader; import org.elasticsearch.search.suggest.SuggestUtils; +import org.elasticsearch.search.suggest.SuggestUtils.TokenConsumer; import java.io.IOException; import java.util.ArrayList; @@ -44,7 +49,7 @@ import static java.lang.Math.log10; import static java.lang.Math.max; import static java.lang.Math.round; -final class DirectCandidateGenerator extends CandidateGenerator { +public final class DirectCandidateGenerator extends CandidateGenerator { private final DirectSpellChecker spellchecker; private final String field; @@ -140,7 +145,7 @@ final class DirectCandidateGenerator extends CandidateGenerator { return term; } final BytesRefBuilder result = byteSpare; - SuggestUtils.analyze(preFilter, term, field, new SuggestUtils.TokenConsumer() { + analyze(preFilter, term, field, new SuggestUtils.TokenConsumer() { @Override public void nextToken() throws IOException { @@ -156,7 +161,7 @@ final class DirectCandidateGenerator extends CandidateGenerator { candidates.add(candidate); } else { final BytesRefBuilder result = byteSpare; - SuggestUtils.analyze(postFilter, candidate.term, field, new SuggestUtils.TokenConsumer() { + analyze(postFilter, candidate.term, field, new SuggestUtils.TokenConsumer() { @Override public void nextToken() throws IOException { this.fillBytesRef(result); @@ -283,4 +288,39 @@ final class DirectCandidateGenerator extends CandidateGenerator { return new Candidate(term, frequency, channelScore, score(frequency, channelScore, dictSize), userInput); } + public static int analyze(Analyzer analyzer, BytesRef toAnalyze, String field, TokenConsumer consumer, CharsRefBuilder spare) throws IOException { + spare.copyUTF8Bytes(toAnalyze); + CharsRef charsRef = spare.get(); + try (TokenStream ts = analyzer.tokenStream( + field, new FastCharArrayReader(charsRef.chars, charsRef.offset, charsRef.length))) { + return analyze(ts, consumer); + } + } + + /** NOTE: this method closes the TokenStream, even on exception, which is awkward + * because really the caller who called {@link Analyzer#tokenStream} should close it, + * but when trying that there are recursion issues when we try to use the same + * TokenStream twice in the same recursion... */ + public static int analyze(TokenStream stream, TokenConsumer consumer) throws IOException { + int numTokens = 0; + boolean success = false; + try { + stream.reset(); + consumer.reset(stream); + while (stream.incrementToken()) { + consumer.nextToken(); + numTokens++; + } + consumer.end(); + success = true; + } finally { + if (success) { + stream.close(); + } else { + IOUtils.closeWhileHandlingException(stream); + } + } + return numTokens; + } + } diff --git a/core/src/main/java/org/elasticsearch/search/suggest/phrase/NoisyChannelSpellChecker.java b/core/src/main/java/org/elasticsearch/search/suggest/phrase/NoisyChannelSpellChecker.java index ec9ca6e1da2..223b169be6b 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/phrase/NoisyChannelSpellChecker.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/phrase/NoisyChannelSpellChecker.java @@ -51,19 +51,19 @@ public final class NoisyChannelSpellChecker { public NoisyChannelSpellChecker(double nonErrorLikelihood) { this(nonErrorLikelihood, true, DEFAULT_TOKEN_LIMIT); } - + public NoisyChannelSpellChecker(double nonErrorLikelihood, boolean requireUnigram, int tokenLimit) { this.realWordLikelihood = nonErrorLikelihood; this.requireUnigram = requireUnigram; this.tokenLimit = tokenLimit; - + } public Result getCorrections(TokenStream stream, final CandidateGenerator generator, float maxErrors, int numCorrections, WordScorer wordScorer, float confidence, int gramSize) throws IOException { - + final List candidateSetsList = new ArrayList<>(); - SuggestUtils.analyze(stream, new SuggestUtils.TokenConsumer() { + DirectCandidateGenerator.analyze(stream, new SuggestUtils.TokenConsumer() { CandidateSet currentSet = null; private TypeAttribute typeAttribute; private final BytesRefBuilder termsRef = new BytesRefBuilder(); @@ -74,7 +74,7 @@ public final class NoisyChannelSpellChecker { super.reset(stream); typeAttribute = stream.addAttribute(TypeAttribute.class); } - + @Override public void nextToken() throws IOException { anyTokens = true; @@ -96,7 +96,7 @@ public final class NoisyChannelSpellChecker { currentSet = new CandidateSet(Candidate.EMPTY, generator.createCandidate(BytesRef.deepCopyOf(term), true)); } } - + @Override public void end() { if (currentSet != null) { @@ -107,11 +107,11 @@ public final class NoisyChannelSpellChecker { } } }); - + if (candidateSetsList.isEmpty() || candidateSetsList.size() >= tokenLimit) { return Result.EMPTY; } - + for (CandidateSet candidateSet : candidateSetsList) { generator.drawCandidates(candidateSet); } @@ -127,13 +127,13 @@ public final class NoisyChannelSpellChecker { cutoffScore = inputPhraseScore * confidence; } Correction[] bestCandidates = scorer.findBestCandiates(candidateSets, maxErrors, cutoffScore); - + return new Result(bestCandidates, cutoffScore); } public Result getCorrections(Analyzer analyzer, BytesRef query, CandidateGenerator generator, float maxErrors, int numCorrections, IndexReader reader, String analysisField, WordScorer scorer, float confidence, int gramSize) throws IOException { - + return getCorrections(tokenStream(analyzer, query, new CharsRefBuilder(), analysisField), generator, maxErrors, numCorrections, scorer, confidence, gramSize); } diff --git a/core/src/main/java/org/elasticsearch/search/suggest/term/TermSuggester.java b/core/src/main/java/org/elasticsearch/search/suggest/term/TermSuggester.java index ed71c997d45..6cca58286c5 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/term/TermSuggester.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/term/TermSuggester.java @@ -34,6 +34,7 @@ import org.elasticsearch.search.suggest.SuggestUtils; import org.elasticsearch.search.suggest.Suggester; import org.elasticsearch.search.suggest.SuggestionBuilder; import org.elasticsearch.search.suggest.SuggestionSearchContext.SuggestionContext; +import org.elasticsearch.search.suggest.phrase.DirectCandidateGenerator; import java.io.IOException; import java.util.ArrayList; @@ -73,7 +74,7 @@ public final class TermSuggester extends Suggester { private List queryTerms(SuggestionContext suggestion, CharsRefBuilder spare) throws IOException { final List result = new ArrayList<>(); final String field = suggestion.getField(); - SuggestUtils.analyze(suggestion.getAnalyzer(), suggestion.getText(), field, new SuggestUtils.TokenConsumer() { + DirectCandidateGenerator.analyze(suggestion.getAnalyzer(), suggestion.getText(), field, new SuggestUtils.TokenConsumer() { @Override public void nextToken() { Term term = new Term(field, BytesRef.deepCopyOf(fillBytesRef(new BytesRefBuilder())));