Merge pull request #13870 from mikemccand/close_tokenstream

Close TokenStream in finally clause
This commit is contained in:
Michael McCandless 2015-10-02 14:43:26 +01:00
commit 5278cf0d5e
12 changed files with 113 additions and 94 deletions

View File

@ -26,6 +26,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.Term; import org.apache.lucene.index.Term;
import org.apache.lucene.search.*; import org.apache.lucene.search.*;
import org.apache.lucene.util.automaton.RegExp; import org.apache.lucene.util.automaton.RegExp;
import org.apache.lucene.util.IOUtils;
import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MappedFieldType;
@ -484,15 +485,17 @@ public class MapperQueryParser extends QueryParser {
if (!settings.analyzeWildcard()) { if (!settings.analyzeWildcard()) {
return super.getPrefixQuery(field, termStr); return super.getPrefixQuery(field, termStr);
} }
List<String> tlist;
// get Analyzer from superclass and tokenize the term // get Analyzer from superclass and tokenize the term
TokenStream source; TokenStream source = null;
try {
try { try {
source = getAnalyzer().tokenStream(field, termStr); source = getAnalyzer().tokenStream(field, termStr);
source.reset(); source.reset();
} catch (IOException e) { } catch (IOException e) {
return super.getPrefixQuery(field, termStr); return super.getPrefixQuery(field, termStr);
} }
List<String> tlist = new ArrayList<>(); tlist = new ArrayList<>();
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class); CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
while (true) { while (true) {
@ -503,11 +506,10 @@ public class MapperQueryParser extends QueryParser {
} }
tlist.add(termAtt.toString()); tlist.add(termAtt.toString());
} }
} finally {
try { if (source != null) {
source.close(); IOUtils.closeWhileHandlingException(source);
} catch (IOException e) { }
// ignore
} }
if (tlist.size() == 1) { if (tlist.size() == 1) {
@ -617,8 +619,7 @@ public class MapperQueryParser extends QueryParser {
char c = termStr.charAt(i); char c = termStr.charAt(i);
if (c == '?' || c == '*') { if (c == '?' || c == '*') {
if (isWithinToken) { if (isWithinToken) {
try { try (TokenStream source = getAnalyzer().tokenStream(field, tmp.toString())) {
TokenStream source = getAnalyzer().tokenStream(field, tmp.toString());
source.reset(); source.reset();
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class); CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
if (source.incrementToken()) { if (source.incrementToken()) {
@ -633,7 +634,6 @@ public class MapperQueryParser extends QueryParser {
// no tokens, just use what we have now // no tokens, just use what we have now
aggStr.append(tmp); aggStr.append(tmp);
} }
source.close();
} catch (IOException e) { } catch (IOException e) {
aggStr.append(tmp); aggStr.append(tmp);
} }
@ -648,7 +648,7 @@ public class MapperQueryParser extends QueryParser {
} }
if (isWithinToken) { if (isWithinToken) {
try { try {
TokenStream source = getAnalyzer().tokenStream(field, tmp.toString()); try (TokenStream source = getAnalyzer().tokenStream(field, tmp.toString())) {
source.reset(); source.reset();
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class); CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
if (source.incrementToken()) { if (source.incrementToken()) {
@ -663,7 +663,7 @@ public class MapperQueryParser extends QueryParser {
// no tokens, just use what we have now // no tokens, just use what we have now
aggStr.append(tmp); aggStr.append(tmp);
} }
source.close(); }
} catch (IOException e) { } catch (IOException e) {
aggStr.append(tmp); aggStr.append(tmp);
} }

View File

@ -959,11 +959,9 @@ public long ramBytesUsed() {
// TODO: is there a Reader from a CharSequence? // TODO: is there a Reader from a CharSequence?
// Turn tokenstream into automaton: // Turn tokenstream into automaton:
Automaton automaton = null; Automaton automaton = null;
TokenStream ts = queryAnalyzer.tokenStream("", key.toString());
try { try (TokenStream ts = queryAnalyzer.tokenStream("", key.toString())) {
automaton = getTokenStreamToAutomaton().toAutomaton(ts); automaton = getTokenStreamToAutomaton().toAutomaton(ts);
} finally {
IOUtils.closeWhileHandlingException(ts);
} }
automaton = replaceSep(automaton); automaton = replaceSep(automaton);

View File

@ -217,12 +217,10 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
} }
List<AnalyzeResponse.AnalyzeToken> tokens = new ArrayList<>(); List<AnalyzeResponse.AnalyzeToken> tokens = new ArrayList<>();
TokenStream stream = null;
int lastPosition = -1; int lastPosition = -1;
int lastOffset = 0; int lastOffset = 0;
for (String text : request.text()) { for (String text : request.text()) {
try { try (TokenStream stream = analyzer.tokenStream(field, text)) {
stream = analyzer.tokenStream(field, text);
stream.reset(); stream.reset();
CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); CharTermAttribute term = stream.addAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class); PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class);
@ -243,11 +241,8 @@ public class TransportAnalyzeAction extends TransportSingleShardAction<AnalyzeRe
lastPosition += analyzer.getPositionIncrementGap(field); lastPosition += analyzer.getPositionIncrementGap(field);
lastOffset += analyzer.getOffsetGap(field); lastOffset += analyzer.getOffsetGap(field);
} catch (IOException e) { } catch (IOException e) {
throw new ElasticsearchException("failed to analyze", e); throw new ElasticsearchException("failed to analyze", e);
} finally {
IOUtils.closeWhileHandlingException(stream);
} }
} }

View File

@ -314,7 +314,9 @@ public class Analysis {
* @see #isCharacterTokenStream(TokenStream) * @see #isCharacterTokenStream(TokenStream)
*/ */
public static boolean generatesCharacterTokenStream(Analyzer analyzer, String fieldName) throws IOException { public static boolean generatesCharacterTokenStream(Analyzer analyzer, String fieldName) throws IOException {
return isCharacterTokenStream(analyzer.tokenStream(fieldName, "")); try (TokenStream ts = analyzer.tokenStream(fieldName, "")) {
return isCharacterTokenStream(ts);
}
} }
} }

View File

@ -19,6 +19,7 @@
package org.elasticsearch.index.mapper.core; package org.elasticsearch.index.mapper.core;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.Field; import org.apache.lucene.document.Field;
@ -145,7 +146,7 @@ public class TokenCountFieldMapper extends IntegerFieldMapper {
if (valueAndBoost.value() == null) { if (valueAndBoost.value() == null) {
count = fieldType().nullValue(); count = fieldType().nullValue();
} else { } else {
count = countPositions(analyzer.analyzer().tokenStream(simpleName(), valueAndBoost.value())); count = countPositions(analyzer, simpleName(), valueAndBoost.value());
} }
addIntegerFields(context, fields, count, valueAndBoost.boost()); addIntegerFields(context, fields, count, valueAndBoost.boost());
} }
@ -156,12 +157,14 @@ public class TokenCountFieldMapper extends IntegerFieldMapper {
/** /**
* Count position increments in a token stream. Package private for testing. * Count position increments in a token stream. Package private for testing.
* @param tokenStream token stream to count * @param analyzer analyzer to create token stream
* @param fieldName field name to pass to analyzer
* @param fieldValue field value to pass to analyzer
* @return number of position increments in a token stream * @return number of position increments in a token stream
* @throws IOException if tokenStream throws it * @throws IOException if tokenStream throws it
*/ */
static int countPositions(TokenStream tokenStream) throws IOException { static int countPositions(Analyzer analyzer, String fieldName, String fieldValue) throws IOException {
try { try (TokenStream tokenStream = analyzer.tokenStream(fieldName, fieldValue)) {
int count = 0; int count = 0;
PositionIncrementAttribute position = tokenStream.addAttribute(PositionIncrementAttribute.class); PositionIncrementAttribute position = tokenStream.addAttribute(PositionIncrementAttribute.class);
tokenStream.reset(); tokenStream.reset();
@ -171,8 +174,6 @@ public class TokenCountFieldMapper extends IntegerFieldMapper {
tokenStream.end(); tokenStream.end();
count += position.getPositionIncrement(); count += position.getPositionIncrement();
return count; return count;
} finally {
tokenStream.close();
} }
} }

View File

@ -88,10 +88,11 @@ class MultiDocumentPercolatorIndex implements PercolatorIndex {
try { try {
// TODO: instead of passing null here, we can have a CTL<Map<String,TokenStream>> and pass previous, // TODO: instead of passing null here, we can have a CTL<Map<String,TokenStream>> and pass previous,
// like the indexer does // like the indexer does
TokenStream tokenStream = field.tokenStream(analyzer, null); try (TokenStream tokenStream = field.tokenStream(analyzer, null)) {
if (tokenStream != null) { if (tokenStream != null) {
memoryIndex.addField(field.name(), tokenStream, field.boost()); memoryIndex.addField(field.name(), tokenStream, field.boost());
} }
}
} catch (IOException e) { } catch (IOException e) {
throw new ElasticsearchException("Failed to create token stream", e); throw new ElasticsearchException("Failed to create token stream", e);
} }

View File

@ -56,10 +56,11 @@ class SingleDocumentPercolatorIndex implements PercolatorIndex {
Analyzer analyzer = context.mapperService().documentMapper(parsedDocument.type()).mappers().indexAnalyzer(); Analyzer analyzer = context.mapperService().documentMapper(parsedDocument.type()).mappers().indexAnalyzer();
// TODO: instead of passing null here, we can have a CTL<Map<String,TokenStream>> and pass previous, // TODO: instead of passing null here, we can have a CTL<Map<String,TokenStream>> and pass previous,
// like the indexer does // like the indexer does
TokenStream tokenStream = field.tokenStream(analyzer, null); try (TokenStream tokenStream = field.tokenStream(analyzer, null)) {
if (tokenStream != null) { if (tokenStream != null) {
memoryIndex.addField(field.name(), tokenStream, field.boost()); memoryIndex.addField(field.name(), tokenStream, field.boost());
} }
}
} catch (Exception e) { } catch (Exception e) {
throw new ElasticsearchException("Failed to create token stream for [" + field.name() + "]", e); throw new ElasticsearchException("Failed to create token stream for [" + field.name() + "]", e);
} }

View File

@ -33,6 +33,7 @@ import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.search.highlight.TextFragment; import org.apache.lucene.search.highlight.TextFragment;
import org.apache.lucene.util.BytesRefHash; import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.CollectionUtil; import org.apache.lucene.util.CollectionUtil;
import org.apache.lucene.util.IOUtils;
import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.common.text.StringText; import org.elasticsearch.common.text.StringText;
import org.elasticsearch.common.text.Text; import org.elasticsearch.common.text.Text;
@ -109,7 +110,7 @@ public class PlainHighlighter implements Highlighter {
for (Object textToHighlight : textsToHighlight) { for (Object textToHighlight : textsToHighlight) {
String text = textToHighlight.toString(); String text = textToHighlight.toString();
TokenStream tokenStream = analyzer.tokenStream(mapper.fieldType().names().indexName(), text); try (TokenStream tokenStream = analyzer.tokenStream(mapper.fieldType().names().indexName(), text)) {
if (!tokenStream.hasAttribute(CharTermAttribute.class) || !tokenStream.hasAttribute(OffsetAttribute.class)) { if (!tokenStream.hasAttribute(CharTermAttribute.class) || !tokenStream.hasAttribute(OffsetAttribute.class)) {
// can't perform highlighting if the stream has no terms (binary token stream) or no offsets // can't perform highlighting if the stream has no terms (binary token stream) or no offsets
continue; continue;
@ -121,6 +122,7 @@ public class PlainHighlighter implements Highlighter {
} }
} }
} }
}
} catch (Exception e) { } catch (Exception e) {
if (ExceptionsHelper.unwrap(e, BytesRefHash.MaxBytesLengthExceededException.class) != null) { if (ExceptionsHelper.unwrap(e, BytesRefHash.MaxBytesLengthExceededException.class) != null) {
// this can happen if for example a field is not_analyzed and ignore_above option is set. // this can happen if for example a field is not_analyzed and ignore_above option is set.
@ -165,7 +167,7 @@ public class PlainHighlighter implements Highlighter {
String fieldContents = textsToHighlight.get(0).toString(); String fieldContents = textsToHighlight.get(0).toString();
int end; int end;
try { try {
end = findGoodEndForNoHighlightExcerpt(noMatchSize, analyzer.tokenStream(mapper.fieldType().names().indexName(), fieldContents)); end = findGoodEndForNoHighlightExcerpt(noMatchSize, analyzer, mapper.fieldType().names().indexName(), fieldContents);
} catch (Exception e) { } catch (Exception e) {
throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e); throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
} }
@ -181,8 +183,8 @@ public class PlainHighlighter implements Highlighter {
return true; return true;
} }
private static int findGoodEndForNoHighlightExcerpt(int noMatchSize, TokenStream tokenStream) throws IOException { private static int findGoodEndForNoHighlightExcerpt(int noMatchSize, Analyzer analyzer, String fieldName, String contents) throws IOException {
try { try (TokenStream tokenStream = analyzer.tokenStream(fieldName, contents)) {
if (!tokenStream.hasAttribute(OffsetAttribute.class)) { if (!tokenStream.hasAttribute(OffsetAttribute.class)) {
// Can't split on term boundaries without offsets // Can't split on term boundaries without offsets
return -1; return -1;
@ -200,11 +202,9 @@ public class PlainHighlighter implements Highlighter {
} }
end = attr.endOffset(); end = attr.endOffset();
} }
tokenStream.end();
// We've exhausted the token stream so we should just highlight everything. // We've exhausted the token stream so we should just highlight everything.
return end; return end;
} finally {
tokenStream.end();
tokenStream.close();
} }
} }
} }

View File

@ -28,6 +28,7 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.CharsRefBuilder; import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.automaton.LevenshteinAutomata; import org.apache.lucene.util.automaton.LevenshteinAutomata;
import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.ParseFieldMatcher; import org.elasticsearch.common.ParseFieldMatcher;
@ -116,22 +117,34 @@ public final class SuggestUtils {
} }
public static int analyze(Analyzer analyzer, CharsRef toAnalyze, String field, TokenConsumer consumer) throws IOException { public static int analyze(Analyzer analyzer, CharsRef toAnalyze, String field, TokenConsumer consumer) throws IOException {
TokenStream ts = analyzer.tokenStream( try (TokenStream ts = analyzer.tokenStream(
field, new FastCharArrayReader(toAnalyze.chars, toAnalyze.offset, toAnalyze.length) field, new FastCharArrayReader(toAnalyze.chars, toAnalyze.offset, toAnalyze.length))) {
);
return analyze(ts, consumer); return analyze(ts, consumer);
} }
}
/** NOTE: this method closes the TokenStream, even on exception, which is awkward
* because really the caller who called {@link Analyzer#tokenStream} should close it,
* but when trying that there are recursion issues when we try to use the same
* TokenStrem twice in the same recursion... */
public static int analyze(TokenStream stream, TokenConsumer consumer) throws IOException { public static int analyze(TokenStream stream, TokenConsumer consumer) throws IOException {
int numTokens = 0;
boolean success = false;
try {
stream.reset(); stream.reset();
consumer.reset(stream); consumer.reset(stream);
int numTokens = 0;
while (stream.incrementToken()) { while (stream.incrementToken()) {
consumer.nextToken(); consumer.nextToken();
numTokens++; numTokens++;
} }
consumer.end(); consumer.end();
} finally {
if (success) {
stream.close(); stream.close();
} else {
IOUtils.closeWhileHandlingException(stream);
}
}
return numTokens; return numTokens;
} }

View File

@ -100,10 +100,8 @@ public final class CompletionTokenStream extends TokenStream {
@Override @Override
public void close() throws IOException { public void close() throws IOException {
if (posInc == -1) {
input.close(); input.close();
} }
}
public static interface ToFiniteStrings { public static interface ToFiniteStrings {
public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException; public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException;

View File

@ -92,12 +92,13 @@ public final class PhraseSuggester extends Suggester<PhraseSuggestionContext> {
if (gens.size() > 0 && suggestTerms != null) { if (gens.size() > 0 && suggestTerms != null) {
final NoisyChannelSpellChecker checker = new NoisyChannelSpellChecker(realWordErrorLikelihood, suggestion.getRequireUnigram(), suggestion.getTokenLimit()); final NoisyChannelSpellChecker checker = new NoisyChannelSpellChecker(realWordErrorLikelihood, suggestion.getRequireUnigram(), suggestion.getTokenLimit());
final BytesRef separator = suggestion.separator(); final BytesRef separator = suggestion.separator();
TokenStream stream = checker.tokenStream(suggestion.getAnalyzer(), suggestion.getText(), spare, suggestion.getField());
WordScorer wordScorer = suggestion.model().newScorer(indexReader, suggestTerms, suggestField, realWordErrorLikelihood, separator); WordScorer wordScorer = suggestion.model().newScorer(indexReader, suggestTerms, suggestField, realWordErrorLikelihood, separator);
Result checkerResult = checker.getCorrections(stream, new MultiCandidateGeneratorWrapper(suggestion.getShardSize(), Result checkerResult;
try (TokenStream stream = checker.tokenStream(suggestion.getAnalyzer(), suggestion.getText(), spare, suggestion.getField())) {
checkerResult = checker.getCorrections(stream, new MultiCandidateGeneratorWrapper(suggestion.getShardSize(),
gens.toArray(new CandidateGenerator[gens.size()])), suggestion.maxErrors(), gens.toArray(new CandidateGenerator[gens.size()])), suggestion.maxErrors(),
suggestion.getShardSize(), wordScorer, suggestion.confidence(), suggestion.gramSize()); suggestion.getShardSize(), wordScorer, suggestion.confidence(), suggestion.gramSize());
}
PhraseSuggestion.Entry resultEntry = buildResultEntry(suggestion, spare, checkerResult.cutoffScore); PhraseSuggestion.Entry resultEntry = buildResultEntry(suggestion, spare, checkerResult.cutoffScore);
response.addTerm(resultEntry); response.addTerm(resultEntry);

View File

@ -19,7 +19,9 @@
package org.elasticsearch.index.mapper.core; package org.elasticsearch.index.mapper.core;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CannedTokenStream; import org.apache.lucene.analysis.CannedTokenStream;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentFactory;
@ -87,7 +89,14 @@ public class TokenCountFieldMapperTests extends ESSingleNodeTestCase {
int finalTokenIncrement = 4; // Count the final token increment on the rare token streams that have them int finalTokenIncrement = 4; // Count the final token increment on the rare token streams that have them
Token[] tokens = new Token[] {t1, t2, t3}; Token[] tokens = new Token[] {t1, t2, t3};
Collections.shuffle(Arrays.asList(tokens), getRandom()); Collections.shuffle(Arrays.asList(tokens), getRandom());
TokenStream tokenStream = new CannedTokenStream(finalTokenIncrement, 0, tokens); final TokenStream tokenStream = new CannedTokenStream(finalTokenIncrement, 0, tokens);
assertThat(TokenCountFieldMapper.countPositions(tokenStream), equalTo(7)); // TODO: we have no CannedAnalyzer?
Analyzer analyzer = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String fieldName) {
return new TokenStreamComponents(new MockTokenizer(), tokenStream);
}
};
assertThat(TokenCountFieldMapper.countPositions(analyzer, "", ""), equalTo(7));
} }
} }