diff --git a/core/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java b/core/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java index 3a61daeca12..ca1524f1214 100644 --- a/core/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java +++ b/core/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java @@ -26,6 +26,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.index.Term; import org.apache.lucene.search.*; import org.apache.lucene.util.automaton.RegExp; +import org.apache.lucene.util.IOUtils; import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.index.mapper.MappedFieldType; @@ -484,30 +485,31 @@ public class MapperQueryParser extends QueryParser { if (!settings.analyzeWildcard()) { return super.getPrefixQuery(field, termStr); } + List tlist; // get Analyzer from superclass and tokenize the term - TokenStream source; + TokenStream source = null; try { - source = getAnalyzer().tokenStream(field, termStr); - source.reset(); - } catch (IOException e) { - return super.getPrefixQuery(field, termStr); - } - List tlist = new ArrayList<>(); - CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class); - - while (true) { try { - if (!source.incrementToken()) break; + source = getAnalyzer().tokenStream(field, termStr); + source.reset(); } catch (IOException e) { - break; + return super.getPrefixQuery(field, termStr); } - tlist.add(termAtt.toString()); - } + tlist = new ArrayList<>(); + CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class); - try { - source.close(); - } catch (IOException e) { - // ignore + while (true) { + try { + if (!source.incrementToken()) break; + } catch (IOException e) { + break; + } + tlist.add(termAtt.toString()); + } + } finally { + if (source != null) { + IOUtils.closeWhileHandlingException(source); + } } if (tlist.size() == 1) { @@ -617,8 +619,7 @@ public class MapperQueryParser extends QueryParser { char c = termStr.charAt(i); if (c == '?' || c == '*') { if (isWithinToken) { - try { - TokenStream source = getAnalyzer().tokenStream(field, tmp.toString()); + try (TokenStream source = getAnalyzer().tokenStream(field, tmp.toString())) { source.reset(); CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class); if (source.incrementToken()) { @@ -633,7 +634,6 @@ public class MapperQueryParser extends QueryParser { // no tokens, just use what we have now aggStr.append(tmp); } - source.close(); } catch (IOException e) { aggStr.append(tmp); } @@ -648,22 +648,22 @@ public class MapperQueryParser extends QueryParser { } if (isWithinToken) { try { - TokenStream source = getAnalyzer().tokenStream(field, tmp.toString()); - source.reset(); - CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class); - if (source.incrementToken()) { - String term = termAtt.toString(); - if (term.length() == 0) { + try (TokenStream source = getAnalyzer().tokenStream(field, tmp.toString())) { + source.reset(); + CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class); + if (source.incrementToken()) { + String term = termAtt.toString(); + if (term.length() == 0) { + // no tokens, just use what we have now + aggStr.append(tmp); + } else { + aggStr.append(term); + } + } else { // no tokens, just use what we have now aggStr.append(tmp); - } else { - aggStr.append(term); } - } else { - // no tokens, just use what we have now - aggStr.append(tmp); } - source.close(); } catch (IOException e) { aggStr.append(tmp); } diff --git a/core/src/main/java/org/apache/lucene/search/suggest/analyzing/XAnalyzingSuggester.java b/core/src/main/java/org/apache/lucene/search/suggest/analyzing/XAnalyzingSuggester.java index 5db4f932c67..b2b23a29981 100644 --- a/core/src/main/java/org/apache/lucene/search/suggest/analyzing/XAnalyzingSuggester.java +++ b/core/src/main/java/org/apache/lucene/search/suggest/analyzing/XAnalyzingSuggester.java @@ -959,11 +959,9 @@ public long ramBytesUsed() { // TODO: is there a Reader from a CharSequence? // Turn tokenstream into automaton: Automaton automaton = null; - TokenStream ts = queryAnalyzer.tokenStream("", key.toString()); - try { + + try (TokenStream ts = queryAnalyzer.tokenStream("", key.toString())) { automaton = getTokenStreamToAutomaton().toAutomaton(ts); - } finally { - IOUtils.closeWhileHandlingException(ts); } automaton = replaceSep(automaton); diff --git a/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java b/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java index 42d05ea4637..4f7a605341e 100644 --- a/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java +++ b/core/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java @@ -217,12 +217,10 @@ public class TransportAnalyzeAction extends TransportSingleShardAction tokens = new ArrayList<>(); - TokenStream stream = null; int lastPosition = -1; int lastOffset = 0; for (String text : request.text()) { - try { - stream = analyzer.tokenStream(field, text); + try (TokenStream stream = analyzer.tokenStream(field, text)) { stream.reset(); CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class); @@ -243,11 +241,8 @@ public class TransportAnalyzeAction extends TransportSingleShardAction> and pass previous, // like the indexer does - TokenStream tokenStream = field.tokenStream(analyzer, null); - if (tokenStream != null) { - memoryIndex.addField(field.name(), tokenStream, field.boost()); - } + try (TokenStream tokenStream = field.tokenStream(analyzer, null)) { + if (tokenStream != null) { + memoryIndex.addField(field.name(), tokenStream, field.boost()); + } + } } catch (IOException e) { throw new ElasticsearchException("Failed to create token stream", e); } diff --git a/core/src/main/java/org/elasticsearch/percolator/SingleDocumentPercolatorIndex.java b/core/src/main/java/org/elasticsearch/percolator/SingleDocumentPercolatorIndex.java index 3233cdcd756..1271872cab6 100644 --- a/core/src/main/java/org/elasticsearch/percolator/SingleDocumentPercolatorIndex.java +++ b/core/src/main/java/org/elasticsearch/percolator/SingleDocumentPercolatorIndex.java @@ -56,10 +56,11 @@ class SingleDocumentPercolatorIndex implements PercolatorIndex { Analyzer analyzer = context.mapperService().documentMapper(parsedDocument.type()).mappers().indexAnalyzer(); // TODO: instead of passing null here, we can have a CTL> and pass previous, // like the indexer does - TokenStream tokenStream = field.tokenStream(analyzer, null); - if (tokenStream != null) { - memoryIndex.addField(field.name(), tokenStream, field.boost()); - } + try (TokenStream tokenStream = field.tokenStream(analyzer, null)) { + if (tokenStream != null) { + memoryIndex.addField(field.name(), tokenStream, field.boost()); + } + } } catch (Exception e) { throw new ElasticsearchException("Failed to create token stream for [" + field.name() + "]", e); } diff --git a/core/src/main/java/org/elasticsearch/search/highlight/PlainHighlighter.java b/core/src/main/java/org/elasticsearch/search/highlight/PlainHighlighter.java index d50c53a1380..041ed754d76 100644 --- a/core/src/main/java/org/elasticsearch/search/highlight/PlainHighlighter.java +++ b/core/src/main/java/org/elasticsearch/search/highlight/PlainHighlighter.java @@ -33,6 +33,7 @@ import org.apache.lucene.search.highlight.SimpleSpanFragmenter; import org.apache.lucene.search.highlight.TextFragment; import org.apache.lucene.util.BytesRefHash; import org.apache.lucene.util.CollectionUtil; +import org.apache.lucene.util.IOUtils; import org.elasticsearch.ExceptionsHelper; import org.elasticsearch.common.text.StringText; import org.elasticsearch.common.text.Text; @@ -109,15 +110,16 @@ public class PlainHighlighter implements Highlighter { for (Object textToHighlight : textsToHighlight) { String text = textToHighlight.toString(); - TokenStream tokenStream = analyzer.tokenStream(mapper.fieldType().names().indexName(), text); - if (!tokenStream.hasAttribute(CharTermAttribute.class) || !tokenStream.hasAttribute(OffsetAttribute.class)) { - // can't perform highlighting if the stream has no terms (binary token stream) or no offsets - continue; - } - TextFragment[] bestTextFragments = entry.getBestTextFragments(tokenStream, text, false, numberOfFragments); - for (TextFragment bestTextFragment : bestTextFragments) { - if (bestTextFragment != null && bestTextFragment.getScore() > 0) { - fragsList.add(bestTextFragment); + try (TokenStream tokenStream = analyzer.tokenStream(mapper.fieldType().names().indexName(), text)) { + if (!tokenStream.hasAttribute(CharTermAttribute.class) || !tokenStream.hasAttribute(OffsetAttribute.class)) { + // can't perform highlighting if the stream has no terms (binary token stream) or no offsets + continue; + } + TextFragment[] bestTextFragments = entry.getBestTextFragments(tokenStream, text, false, numberOfFragments); + for (TextFragment bestTextFragment : bestTextFragments) { + if (bestTextFragment != null && bestTextFragment.getScore() > 0) { + fragsList.add(bestTextFragment); + } } } } @@ -165,7 +167,7 @@ public class PlainHighlighter implements Highlighter { String fieldContents = textsToHighlight.get(0).toString(); int end; try { - end = findGoodEndForNoHighlightExcerpt(noMatchSize, analyzer.tokenStream(mapper.fieldType().names().indexName(), fieldContents)); + end = findGoodEndForNoHighlightExcerpt(noMatchSize, analyzer, mapper.fieldType().names().indexName(), fieldContents); } catch (Exception e) { throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e); } @@ -181,8 +183,8 @@ public class PlainHighlighter implements Highlighter { return true; } - private static int findGoodEndForNoHighlightExcerpt(int noMatchSize, TokenStream tokenStream) throws IOException { - try { + private static int findGoodEndForNoHighlightExcerpt(int noMatchSize, Analyzer analyzer, String fieldName, String contents) throws IOException { + try (TokenStream tokenStream = analyzer.tokenStream(fieldName, contents)) { if (!tokenStream.hasAttribute(OffsetAttribute.class)) { // Can't split on term boundaries without offsets return -1; @@ -200,11 +202,9 @@ public class PlainHighlighter implements Highlighter { } end = attr.endOffset(); } + tokenStream.end(); // We've exhausted the token stream so we should just highlight everything. return end; - } finally { - tokenStream.end(); - tokenStream.close(); } } } diff --git a/core/src/main/java/org/elasticsearch/search/suggest/SuggestUtils.java b/core/src/main/java/org/elasticsearch/search/suggest/SuggestUtils.java index bcf8cee64c2..8dd193f6c24 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/SuggestUtils.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/SuggestUtils.java @@ -28,6 +28,7 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.CharsRefBuilder; +import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.automaton.LevenshteinAutomata; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParseFieldMatcher; @@ -116,22 +117,34 @@ public final class SuggestUtils { } public static int analyze(Analyzer analyzer, CharsRef toAnalyze, String field, TokenConsumer consumer) throws IOException { - TokenStream ts = analyzer.tokenStream( - field, new FastCharArrayReader(toAnalyze.chars, toAnalyze.offset, toAnalyze.length) - ); - return analyze(ts, consumer); + try (TokenStream ts = analyzer.tokenStream( + field, new FastCharArrayReader(toAnalyze.chars, toAnalyze.offset, toAnalyze.length))) { + return analyze(ts, consumer); + } } + /** NOTE: this method closes the TokenStream, even on exception, which is awkward + * because really the caller who called {@link Analyzer#tokenStream} should close it, + * but when trying that there are recursion issues when we try to use the same + * TokenStrem twice in the same recursion... */ public static int analyze(TokenStream stream, TokenConsumer consumer) throws IOException { - stream.reset(); - consumer.reset(stream); int numTokens = 0; - while (stream.incrementToken()) { - consumer.nextToken(); - numTokens++; + boolean success = false; + try { + stream.reset(); + consumer.reset(stream); + while (stream.incrementToken()) { + consumer.nextToken(); + numTokens++; + } + consumer.end(); + } finally { + if (success) { + stream.close(); + } else { + IOUtils.closeWhileHandlingException(stream); + } } - consumer.end(); - stream.close(); return numTokens; } diff --git a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionTokenStream.java b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionTokenStream.java index ebcf0456f87..5edf848dda3 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionTokenStream.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/completion/CompletionTokenStream.java @@ -100,9 +100,7 @@ public final class CompletionTokenStream extends TokenStream { @Override public void close() throws IOException { - if (posInc == -1) { - input.close(); - } + input.close(); } public static interface ToFiniteStrings { diff --git a/core/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggester.java b/core/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggester.java index e7d0eb378c3..724e3d40e25 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggester.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/phrase/PhraseSuggester.java @@ -92,12 +92,13 @@ public final class PhraseSuggester extends Suggester { if (gens.size() > 0 && suggestTerms != null) { final NoisyChannelSpellChecker checker = new NoisyChannelSpellChecker(realWordErrorLikelihood, suggestion.getRequireUnigram(), suggestion.getTokenLimit()); final BytesRef separator = suggestion.separator(); - TokenStream stream = checker.tokenStream(suggestion.getAnalyzer(), suggestion.getText(), spare, suggestion.getField()); - WordScorer wordScorer = suggestion.model().newScorer(indexReader, suggestTerms, suggestField, realWordErrorLikelihood, separator); - Result checkerResult = checker.getCorrections(stream, new MultiCandidateGeneratorWrapper(suggestion.getShardSize(), - gens.toArray(new CandidateGenerator[gens.size()])), suggestion.maxErrors(), - suggestion.getShardSize(), wordScorer, suggestion.confidence(), suggestion.gramSize()); + Result checkerResult; + try (TokenStream stream = checker.tokenStream(suggestion.getAnalyzer(), suggestion.getText(), spare, suggestion.getField())) { + checkerResult = checker.getCorrections(stream, new MultiCandidateGeneratorWrapper(suggestion.getShardSize(), + gens.toArray(new CandidateGenerator[gens.size()])), suggestion.maxErrors(), + suggestion.getShardSize(), wordScorer, suggestion.confidence(), suggestion.gramSize()); + } PhraseSuggestion.Entry resultEntry = buildResultEntry(suggestion, spare, checkerResult.cutoffScore); response.addTerm(resultEntry); diff --git a/core/src/test/java/org/elasticsearch/index/mapper/core/TokenCountFieldMapperTests.java b/core/src/test/java/org/elasticsearch/index/mapper/core/TokenCountFieldMapperTests.java index 818366647d1..5a644e56f48 100644 --- a/core/src/test/java/org/elasticsearch/index/mapper/core/TokenCountFieldMapperTests.java +++ b/core/src/test/java/org/elasticsearch/index/mapper/core/TokenCountFieldMapperTests.java @@ -19,7 +19,9 @@ package org.elasticsearch.index.mapper.core; +import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.CannedTokenStream; +import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.elasticsearch.common.xcontent.XContentFactory; @@ -87,7 +89,14 @@ public class TokenCountFieldMapperTests extends ESSingleNodeTestCase { int finalTokenIncrement = 4; // Count the final token increment on the rare token streams that have them Token[] tokens = new Token[] {t1, t2, t3}; Collections.shuffle(Arrays.asList(tokens), getRandom()); - TokenStream tokenStream = new CannedTokenStream(finalTokenIncrement, 0, tokens); - assertThat(TokenCountFieldMapper.countPositions(tokenStream), equalTo(7)); + final TokenStream tokenStream = new CannedTokenStream(finalTokenIncrement, 0, tokens); + // TODO: we have no CannedAnalyzer? + Analyzer analyzer = new Analyzer() { + @Override + public TokenStreamComponents createComponents(String fieldName) { + return new TokenStreamComponents(new MockTokenizer(), tokenStream); + } + }; + assertThat(TokenCountFieldMapper.countPositions(analyzer, "", ""), equalTo(7)); } }