Merge pull request #13870 from mikemccand/close_tokenstream
Close TokenStream in finally clause
This commit is contained in:
commit
5278cf0d5e
|
@ -26,6 +26,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.util.automaton.RegExp;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.elasticsearch.common.lucene.search.Queries;
|
||||
import org.elasticsearch.common.unit.Fuzziness;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
|
@ -484,15 +485,17 @@ public class MapperQueryParser extends QueryParser {
|
|||
if (!settings.analyzeWildcard()) {
|
||||
return super.getPrefixQuery(field, termStr);
|
||||
}
|
||||
List<String> tlist;
|
||||
// get Analyzer from superclass and tokenize the term
|
||||
TokenStream source;
|
||||
TokenStream source = null;
|
||||
try {
|
||||
try {
|
||||
source = getAnalyzer().tokenStream(field, termStr);
|
||||
source.reset();
|
||||
} catch (IOException e) {
|
||||
return super.getPrefixQuery(field, termStr);
|
||||
}
|
||||
List<String> tlist = new ArrayList<>();
|
||||
tlist = new ArrayList<>();
|
||||
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
|
||||
|
||||
while (true) {
|
||||
|
@ -503,11 +506,10 @@ public class MapperQueryParser extends QueryParser {
|
|||
}
|
||||
tlist.add(termAtt.toString());
|
||||
}
|
||||
|
||||
try {
|
||||
source.close();
|
||||
} catch (IOException e) {
|
||||
// ignore
|
||||
} finally {
|
||||
if (source != null) {
|
||||
IOUtils.closeWhileHandlingException(source);
|
||||
}
|
||||
}
|
||||
|
||||
if (tlist.size() == 1) {
|
||||
|
@ -617,8 +619,7 @@ public class MapperQueryParser extends QueryParser {
|
|||
char c = termStr.charAt(i);
|
||||
if (c == '?' || c == '*') {
|
||||
if (isWithinToken) {
|
||||
try {
|
||||
TokenStream source = getAnalyzer().tokenStream(field, tmp.toString());
|
||||
try (TokenStream source = getAnalyzer().tokenStream(field, tmp.toString())) {
|
||||
source.reset();
|
||||
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
|
||||
if (source.incrementToken()) {
|
||||
|
@ -633,7 +634,6 @@ public class MapperQueryParser extends QueryParser {
|
|||
// no tokens, just use what we have now
|
||||
aggStr.append(tmp);
|
||||
}
|
||||
source.close();
|
||||
} catch (IOException e) {
|
||||
aggStr.append(tmp);
|
||||
}
|
||||
|
@ -648,7 +648,7 @@ public class MapperQueryParser extends QueryParser {
|
|||
}
|
||||
if (isWithinToken) {
|
||||
try {
|
||||
TokenStream source = getAnalyzer().tokenStream(field, tmp.toString());
|
||||
try (TokenStream source = getAnalyzer().tokenStream(field, tmp.toString())) {
|
||||
source.reset();
|
||||
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
|
||||
if (source.incrementToken()) {
|
||||
|
@ -663,7 +663,7 @@ public class MapperQueryParser extends QueryParser {
|
|||
// no tokens, just use what we have now
|
||||
aggStr.append(tmp);
|
||||
}
|
||||
source.close();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
aggStr.append(tmp);
|
||||
}
|
||||
|
|
|
@ -959,11 +959,9 @@ public long ramBytesUsed() {
|
|||
// TODO: is there a Reader from a CharSequence?
|
||||
// Turn tokenstream into automaton:
|
||||
Automaton automaton = null;
|
||||
TokenStream ts = queryAnalyzer.tokenStream("", key.toString());
|
||||
try {
|
||||
|
||||
try (TokenStream ts = queryAnalyzer.tokenStream("", key.toString())) {
|
||||
automaton = getTokenStreamToAutomaton().toAutomaton(ts);
|
||||
} finally {
|
||||
IOUtils.closeWhileHandlingException(ts);
|
||||
}
|
||||
|
||||
automaton = replaceSep(automaton);
|
||||
|
|
|
@ -217,12 +217,10 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
|
|||
}
|
||||
|
||||
List<AnalyzeResponse.AnalyzeToken> tokens = new ArrayList<>();
|
||||
TokenStream stream = null;
|
||||
int lastPosition = -1;
|
||||
int lastOffset = 0;
|
||||
for (String text : request.text()) {
|
||||
try {
|
||||
stream = analyzer.tokenStream(field, text);
|
||||
try (TokenStream stream = analyzer.tokenStream(field, text)) {
|
||||
stream.reset();
|
||||
CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
|
||||
PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
|
||||
|
@ -243,11 +241,8 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
|
|||
|
||||
lastPosition += analyzer.getPositionIncrementGap(field);
|
||||
lastOffset += analyzer.getOffsetGap(field);
|
||||
|
||||
} catch (IOException e) {
|
||||
throw new ElasticsearchException("failed to analyze", e);
|
||||
} finally {
|
||||
IOUtils.closeWhileHandlingException(stream);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -314,7 +314,9 @@ public class Analysis {
|
|||
* @see #isCharacterTokenStream(TokenStream)
|
||||
*/
|
||||
public static boolean generatesCharacterTokenStream(Analyzer analyzer, String fieldName) throws IOException {
|
||||
return isCharacterTokenStream(analyzer.tokenStream(fieldName, ""));
|
||||
try (TokenStream ts = analyzer.tokenStream(fieldName, "")) {
|
||||
return isCharacterTokenStream(ts);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
package org.elasticsearch.index.mapper.core;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.document.Field;
|
||||
|
@ -145,7 +146,7 @@ public class TokenCountFieldMapper extends IntegerFieldMapper {
|
|||
if (valueAndBoost.value() == null) {
|
||||
count = fieldType().nullValue();
|
||||
} else {
|
||||
count = countPositions(analyzer.analyzer().tokenStream(simpleName(), valueAndBoost.value()));
|
||||
count = countPositions(analyzer, simpleName(), valueAndBoost.value());
|
||||
}
|
||||
addIntegerFields(context, fields, count, valueAndBoost.boost());
|
||||
}
|
||||
|
@ -156,12 +157,14 @@ public class TokenCountFieldMapper extends IntegerFieldMapper {
|
|||
|
||||
/**
|
||||
* Count position increments in a token stream. Package private for testing.
|
||||
* @param tokenStream token stream to count
|
||||
* @param analyzer analyzer to create token stream
|
||||
* @param fieldName field name to pass to analyzer
|
||||
* @param fieldValue field value to pass to analyzer
|
||||
* @return number of position increments in a token stream
|
||||
* @throws IOException if tokenStream throws it
|
||||
*/
|
||||
static int countPositions(TokenStream tokenStream) throws IOException {
|
||||
try {
|
||||
static int countPositions(Analyzer analyzer, String fieldName, String fieldValue) throws IOException {
|
||||
try (TokenStream tokenStream = analyzer.tokenStream(fieldName, fieldValue)) {
|
||||
int count = 0;
|
||||
PositionIncrementAttribute position = tokenStream.addAttribute(PositionIncrementAttribute.class);
|
||||
tokenStream.reset();
|
||||
|
@ -171,8 +174,6 @@ public class TokenCountFieldMapper extends IntegerFieldMapper {
|
|||
tokenStream.end();
|
||||
count += position.getPositionIncrement();
|
||||
return count;
|
||||
} finally {
|
||||
tokenStream.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -88,10 +88,11 @@ class MultiDocumentPercolatorIndex implements PercolatorIndex {
|
|||
try {
|
||||
// TODO: instead of passing null here, we can have a CTL<Map<String,TokenStream>> and pass previous,
|
||||
// like the indexer does
|
||||
TokenStream tokenStream = field.tokenStream(analyzer, null);
|
||||
try (TokenStream tokenStream = field.tokenStream(analyzer, null)) {
|
||||
if (tokenStream != null) {
|
||||
memoryIndex.addField(field.name(), tokenStream, field.boost());
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new ElasticsearchException("Failed to create token stream", e);
|
||||
}
|
||||
|
|
|
@ -56,10 +56,11 @@ class SingleDocumentPercolatorIndex implements PercolatorIndex {
|
|||
Analyzer analyzer = context.mapperService().documentMapper(parsedDocument.type()).mappers().indexAnalyzer();
|
||||
// TODO: instead of passing null here, we can have a CTL<Map<String,TokenStream>> and pass previous,
|
||||
// like the indexer does
|
||||
TokenStream tokenStream = field.tokenStream(analyzer, null);
|
||||
try (TokenStream tokenStream = field.tokenStream(analyzer, null)) {
|
||||
if (tokenStream != null) {
|
||||
memoryIndex.addField(field.name(), tokenStream, field.boost());
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new ElasticsearchException("Failed to create token stream for [" + field.name() + "]", e);
|
||||
}
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
|
|||
import org.apache.lucene.search.highlight.TextFragment;
|
||||
import org.apache.lucene.util.BytesRefHash;
|
||||
import org.apache.lucene.util.CollectionUtil;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.elasticsearch.ExceptionsHelper;
|
||||
import org.elasticsearch.common.text.StringText;
|
||||
import org.elasticsearch.common.text.Text;
|
||||
|
@ -109,7 +110,7 @@ public class PlainHighlighter implements Highlighter {
|
|||
for (Object textToHighlight : textsToHighlight) {
|
||||
String text = textToHighlight.toString();
|
||||
|
||||
TokenStream tokenStream = analyzer.tokenStream(mapper.fieldType().names().indexName(), text);
|
||||
try (TokenStream tokenStream = analyzer.tokenStream(mapper.fieldType().names().indexName(), text)) {
|
||||
if (!tokenStream.hasAttribute(CharTermAttribute.class) || !tokenStream.hasAttribute(OffsetAttribute.class)) {
|
||||
// can't perform highlighting if the stream has no terms (binary token stream) or no offsets
|
||||
continue;
|
||||
|
@ -121,6 +122,7 @@ public class PlainHighlighter implements Highlighter {
|
|||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
if (ExceptionsHelper.unwrap(e, BytesRefHash.MaxBytesLengthExceededException.class) != null) {
|
||||
// this can happen if for example a field is not_analyzed and ignore_above option is set.
|
||||
|
@ -165,7 +167,7 @@ public class PlainHighlighter implements Highlighter {
|
|||
String fieldContents = textsToHighlight.get(0).toString();
|
||||
int end;
|
||||
try {
|
||||
end = findGoodEndForNoHighlightExcerpt(noMatchSize, analyzer.tokenStream(mapper.fieldType().names().indexName(), fieldContents));
|
||||
end = findGoodEndForNoHighlightExcerpt(noMatchSize, analyzer, mapper.fieldType().names().indexName(), fieldContents);
|
||||
} catch (Exception e) {
|
||||
throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
|
||||
}
|
||||
|
@ -181,8 +183,8 @@ public class PlainHighlighter implements Highlighter {
|
|||
return true;
|
||||
}
|
||||
|
||||
private static int findGoodEndForNoHighlightExcerpt(int noMatchSize, TokenStream tokenStream) throws IOException {
|
||||
try {
|
||||
private static int findGoodEndForNoHighlightExcerpt(int noMatchSize, Analyzer analyzer, String fieldName, String contents) throws IOException {
|
||||
try (TokenStream tokenStream = analyzer.tokenStream(fieldName, contents)) {
|
||||
if (!tokenStream.hasAttribute(OffsetAttribute.class)) {
|
||||
// Can't split on term boundaries without offsets
|
||||
return -1;
|
||||
|
@ -200,11 +202,9 @@ public class PlainHighlighter implements Highlighter {
|
|||
}
|
||||
end = attr.endOffset();
|
||||
}
|
||||
tokenStream.end();
|
||||
// We've exhausted the token stream so we should just highlight everything.
|
||||
return end;
|
||||
} finally {
|
||||
tokenStream.end();
|
||||
tokenStream.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.automaton.LevenshteinAutomata;
|
||||
import org.elasticsearch.common.ParseField;
|
||||
import org.elasticsearch.common.ParseFieldMatcher;
|
||||
|
@ -116,22 +117,34 @@ public final class SuggestUtils {
|
|||
}
|
||||
|
||||
public static int analyze(Analyzer analyzer, CharsRef toAnalyze, String field, TokenConsumer consumer) throws IOException {
|
||||
TokenStream ts = analyzer.tokenStream(
|
||||
field, new FastCharArrayReader(toAnalyze.chars, toAnalyze.offset, toAnalyze.length)
|
||||
);
|
||||
try (TokenStream ts = analyzer.tokenStream(
|
||||
field, new FastCharArrayReader(toAnalyze.chars, toAnalyze.offset, toAnalyze.length))) {
|
||||
return analyze(ts, consumer);
|
||||
}
|
||||
}
|
||||
|
||||
/** NOTE: this method closes the TokenStream, even on exception, which is awkward
|
||||
* because really the caller who called {@link Analyzer#tokenStream} should close it,
|
||||
* but when trying that there are recursion issues when we try to use the same
|
||||
* TokenStrem twice in the same recursion... */
|
||||
public static int analyze(TokenStream stream, TokenConsumer consumer) throws IOException {
|
||||
int numTokens = 0;
|
||||
boolean success = false;
|
||||
try {
|
||||
stream.reset();
|
||||
consumer.reset(stream);
|
||||
int numTokens = 0;
|
||||
while (stream.incrementToken()) {
|
||||
consumer.nextToken();
|
||||
numTokens++;
|
||||
}
|
||||
consumer.end();
|
||||
} finally {
|
||||
if (success) {
|
||||
stream.close();
|
||||
} else {
|
||||
IOUtils.closeWhileHandlingException(stream);
|
||||
}
|
||||
}
|
||||
return numTokens;
|
||||
}
|
||||
|
||||
|
|
|
@ -100,10 +100,8 @@ public final class CompletionTokenStream extends TokenStream {
|
|||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
if (posInc == -1) {
|
||||
input.close();
|
||||
}
|
||||
}
|
||||
|
||||
public static interface ToFiniteStrings {
|
||||
public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException;
|
||||
|
|
|
@ -92,12 +92,13 @@ public final class PhraseSuggester extends Suggester<PhraseSuggestionContext> {
|
|||
if (gens.size() > 0 && suggestTerms != null) {
|
||||
final NoisyChannelSpellChecker checker = new NoisyChannelSpellChecker(realWordErrorLikelihood, suggestion.getRequireUnigram(), suggestion.getTokenLimit());
|
||||
final BytesRef separator = suggestion.separator();
|
||||
TokenStream stream = checker.tokenStream(suggestion.getAnalyzer(), suggestion.getText(), spare, suggestion.getField());
|
||||
|
||||
WordScorer wordScorer = suggestion.model().newScorer(indexReader, suggestTerms, suggestField, realWordErrorLikelihood, separator);
|
||||
Result checkerResult = checker.getCorrections(stream, new MultiCandidateGeneratorWrapper(suggestion.getShardSize(),
|
||||
Result checkerResult;
|
||||
try (TokenStream stream = checker.tokenStream(suggestion.getAnalyzer(), suggestion.getText(), spare, suggestion.getField())) {
|
||||
checkerResult = checker.getCorrections(stream, new MultiCandidateGeneratorWrapper(suggestion.getShardSize(),
|
||||
gens.toArray(new CandidateGenerator[gens.size()])), suggestion.maxErrors(),
|
||||
suggestion.getShardSize(), wordScorer, suggestion.confidence(), suggestion.gramSize());
|
||||
}
|
||||
|
||||
PhraseSuggestion.Entry resultEntry = buildResultEntry(suggestion, spare, checkerResult.cutoffScore);
|
||||
response.addTerm(resultEntry);
|
||||
|
|
|
@ -19,7 +19,9 @@
|
|||
|
||||
package org.elasticsearch.index.mapper.core;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CannedTokenStream;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
|
@ -87,7 +89,14 @@ public class TokenCountFieldMapperTests extends ESSingleNodeTestCase {
|
|||
int finalTokenIncrement = 4; // Count the final token increment on the rare token streams that have them
|
||||
Token[] tokens = new Token[] {t1, t2, t3};
|
||||
Collections.shuffle(Arrays.asList(tokens), getRandom());
|
||||
TokenStream tokenStream = new CannedTokenStream(finalTokenIncrement, 0, tokens);
|
||||
assertThat(TokenCountFieldMapper.countPositions(tokenStream), equalTo(7));
|
||||
final TokenStream tokenStream = new CannedTokenStream(finalTokenIncrement, 0, tokens);
|
||||
// TODO: we have no CannedAnalyzer?
|
||||
Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName) {
|
||||
return new TokenStreamComponents(new MockTokenizer(), tokenStream);
|
||||
}
|
||||
};
|
||||
assertThat(TokenCountFieldMapper.countPositions(analyzer, "", ""), equalTo(7));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue