From 64a795b6e3dd82b9c579ee1652db64da7df56c6b Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Mon, 7 Oct 2013 06:24:25 +0000 Subject: [PATCH] LUCENE-5259: convert analysis consumers to try-with-resources git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1529770 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/analysis/synonym/SynonymMap.java | 44 +- .../analysis/core/TestKeywordAnalyzer.java | 15 +- .../analysis/core/TestStopAnalyzer.java | 52 +- .../TestPerFieldAnalyzerWrapper.java | 26 +- .../shingle/ShingleAnalyzerWrapperTest.java | 36 +- .../analysis/util/TestCharTokenizers.java | 40 +- .../icu/segmentation/TestICUTokenizer.java | 20 +- .../lucene/analysis/ja/TestExtendedMode.java | 14 +- .../analysis/ja/TestJapaneseTokenizer.java | 174 ++++--- .../morfologik/TestMorfologikAnalyzer.java | 59 +-- .../cn/smart/TestSmartChineseAnalyzer.java | 16 +- .../BooleanPerceptronClassifier.java | 21 +- .../SimpleNaiveBayesClassifier.java | 14 +- .../lucene/index/DocInverterPerField.java | 17 +- .../lucene/analysis/TestMockAnalyzer.java | 24 +- .../apache/lucene/index/TestLongPostings.java | 39 +- .../lucene/index/TestTermVectorsWriter.java | 23 +- .../apache/lucene/search/TestPhraseQuery.java | 18 +- .../vectorhighlight/AbstractTestCase.java | 20 +- .../lucene/queries/mlt/MoreLikeThis.java | 44 +- .../analyzing/AnalyzingQueryParser.java | 6 +- .../queryparser/classic/QueryParserBase.java | 106 ++-- .../AnalyzerQueryNodeProcessor.java | 62 +-- .../xml/builders/LikeThisQueryBuilder.java | 3 +- .../xml/builders/SpanOrTermsBuilder.java | 7 +- .../xml/builders/TermsFilterBuilder.java | 4 +- .../xml/builders/TermsQueryBuilder.java | 4 +- .../sandbox/queries/FuzzyLikeThisQuery.java | 98 ++-- .../analyzing/AnalyzingInfixSuggester.java | 64 ++- .../suggest/analyzing/AnalyzingSuggester.java | 22 +- .../suggest/analyzing/FreeTextSuggester.java | 478 +++++++++--------- .../AnalyzingInfixSuggesterTest.java | 68 +-- .../lucene/analysis/CollationTestBase.java | 42 +- .../apache/solr/schema/ICUCollationField.java | 25 +- .../handler/AnalysisRequestHandlerBase.java | 16 +- .../component/QueryElevationComponent.java | 16 +- .../component/SpellCheckComponent.java | 44 +- .../solr/parser/SolrQueryParserBase.java | 79 ++- .../apache/solr/schema/CollationField.java | 30 +- .../org/apache/solr/schema/TextField.java | 104 ++-- .../solr/spelling/SimpleQueryConverter.java | 12 +- 41 files changed, 958 insertions(+), 1048 deletions(-) diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java index e5b05c3c2d7..8b455c5ba75 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java @@ -307,30 +307,30 @@ public class SynonymMap { * separates by {@link SynonymMap#WORD_SEPARATOR}. * reuse and its chars must not be null. */ public CharsRef analyze(String text, CharsRef reuse) throws IOException { - TokenStream ts = analyzer.tokenStream("", text); - CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); - PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class); - ts.reset(); - reuse.length = 0; - while (ts.incrementToken()) { - int length = termAtt.length(); - if (length == 0) { - throw new IllegalArgumentException("term: " + text + " analyzed to a zero-length token"); + try (TokenStream ts = analyzer.tokenStream("", text)) { + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); + PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class); + ts.reset(); + reuse.length = 0; + while (ts.incrementToken()) { + int length = termAtt.length(); + if (length == 0) { + throw new IllegalArgumentException("term: " + text + " analyzed to a zero-length token"); + } + if (posIncAtt.getPositionIncrement() != 1) { + throw new IllegalArgumentException("term: " + text + " analyzed to a token with posinc != 1"); + } + reuse.grow(reuse.length + length + 1); /* current + word + separator */ + int end = reuse.offset + reuse.length; + if (reuse.length > 0) { + reuse.chars[end++] = SynonymMap.WORD_SEPARATOR; + reuse.length++; + } + System.arraycopy(termAtt.buffer(), 0, reuse.chars, end, length); + reuse.length += length; } - if (posIncAtt.getPositionIncrement() != 1) { - throw new IllegalArgumentException("term: " + text + " analyzed to a token with posinc != 1"); - } - reuse.grow(reuse.length + length + 1); /* current + word + separator */ - int end = reuse.offset + reuse.length; - if (reuse.length > 0) { - reuse.chars[end++] = SynonymMap.WORD_SEPARATOR; - reuse.length++; - } - System.arraycopy(termAtt.buffer(), 0, reuse.chars, end, length); - reuse.length += length; + ts.end(); } - ts.end(); - ts.close(); if (reuse.length == 0) { throw new IllegalArgumentException("term: " + text + " was completely eliminated by analyzer"); } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java index 1b07d20194a..a5ce14fc13e 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestKeywordAnalyzer.java @@ -117,12 +117,15 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase { // LUCENE-1441 public void testOffsets() throws Exception { - TokenStream stream = new KeywordAnalyzer().tokenStream("field", new StringReader("abcd")); - OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class); - stream.reset(); - assertTrue(stream.incrementToken()); - assertEquals(0, offsetAtt.startOffset()); - assertEquals(4, offsetAtt.endOffset()); + try (TokenStream stream = new KeywordAnalyzer().tokenStream("field", new StringReader("abcd"))) { + OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class); + stream.reset(); + assertTrue(stream.incrementToken()); + assertEquals(0, offsetAtt.startOffset()); + assertEquals(4, offsetAtt.endOffset()); + assertFalse(stream.incrementToken()); + stream.end(); + } } /** blast some random strings through the analyzer */ diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java index 9f9c2714547..e27adbb5883 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/core/TestStopAnalyzer.java @@ -46,27 +46,31 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase { public void testDefaults() throws IOException { assertTrue(stop != null); - TokenStream stream = stop.tokenStream("test", "This is a test of the english stop analyzer"); - assertTrue(stream != null); - CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); - stream.reset(); + try (TokenStream stream = stop.tokenStream("test", "This is a test of the english stop analyzer")) { + assertTrue(stream != null); + CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); + stream.reset(); - while (stream.incrementToken()) { - assertFalse(inValidTokens.contains(termAtt.toString())); + while (stream.incrementToken()) { + assertFalse(inValidTokens.contains(termAtt.toString())); + } + stream.end(); } } public void testStopList() throws IOException { CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("good", "test", "analyzer"), false); StopAnalyzer newStop = new StopAnalyzer(Version.LUCENE_40, stopWordsSet); - TokenStream stream = newStop.tokenStream("test", "This is a good test of the english stop analyzer"); - assertNotNull(stream); - CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); + try (TokenStream stream = newStop.tokenStream("test", "This is a good test of the english stop analyzer")) { + assertNotNull(stream); + CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); - stream.reset(); - while (stream.incrementToken()) { - String text = termAtt.toString(); - assertFalse(stopWordsSet.contains(text)); + stream.reset(); + while (stream.incrementToken()) { + String text = termAtt.toString(); + assertFalse(stopWordsSet.contains(text)); + } + stream.end(); } } @@ -75,17 +79,19 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase { StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet); String s = "This is a good test of the english stop analyzer with positions"; int expectedIncr[] = { 1, 1, 1, 3, 1, 1, 1, 2, 1}; - TokenStream stream = newStop.tokenStream("test", s); - assertNotNull(stream); - int i = 0; - CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); - PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class); + try (TokenStream stream = newStop.tokenStream("test", s)) { + assertNotNull(stream); + int i = 0; + CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); + PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class); - stream.reset(); - while (stream.incrementToken()) { - String text = termAtt.toString(); - assertFalse(stopWordsSet.contains(text)); - assertEquals(expectedIncr[i++],posIncrAtt.getPositionIncrement()); + stream.reset(); + while (stream.incrementToken()) { + String text = termAtt.toString(); + assertFalse(stopWordsSet.contains(text)); + assertEquals(expectedIncr[i++],posIncrAtt.getPositionIncrement()); + } + stream.end(); } } diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalyzerWrapper.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalyzerWrapper.java index ce0cd745095..fe1b282e172 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalyzerWrapper.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPerFieldAnalyzerWrapper.java @@ -37,23 +37,29 @@ public class TestPerFieldAnalyzerWrapper extends BaseTokenStreamTestCase { PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), analyzerPerField); - TokenStream tokenStream = analyzer.tokenStream("field", text); - CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class); - tokenStream.reset(); + try (TokenStream tokenStream = analyzer.tokenStream("field", text)) { + CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class); + tokenStream.reset(); - assertTrue(tokenStream.incrementToken()); - assertEquals("WhitespaceAnalyzer does not lowercase", + assertTrue(tokenStream.incrementToken()); + assertEquals("WhitespaceAnalyzer does not lowercase", "Qwerty", termAtt.toString()); + assertFalse(tokenStream.incrementToken()); + tokenStream.end(); + } - tokenStream = analyzer.tokenStream("special", text); - termAtt = tokenStream.getAttribute(CharTermAttribute.class); - tokenStream.reset(); + try (TokenStream tokenStream = analyzer.tokenStream("special", text)) { + CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class); + tokenStream.reset(); - assertTrue(tokenStream.incrementToken()); - assertEquals("SimpleAnalyzer lowercases", + assertTrue(tokenStream.incrementToken()); + assertEquals("SimpleAnalyzer lowercases", "qwerty", termAtt.toString()); + assertFalse(tokenStream.incrementToken()); + tokenStream.end(); + } } public void testCharFilters() throws Exception { diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java index 855a14e3825..49b09873314 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java @@ -95,17 +95,19 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase { public void testShingleAnalyzerWrapperPhraseQuery() throws Exception { PhraseQuery q = new PhraseQuery(); - TokenStream ts = analyzer.tokenStream("content", "this sentence"); - int j = -1; + try (TokenStream ts = analyzer.tokenStream("content", "this sentence")) { + int j = -1; - PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class); - CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); + PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class); + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); - ts.reset(); - while (ts.incrementToken()) { - j += posIncrAtt.getPositionIncrement(); - String termText = termAtt.toString(); - q.add(new Term("content", termText), j); + ts.reset(); + while (ts.incrementToken()) { + j += posIncrAtt.getPositionIncrement(); + String termText = termAtt.toString(); + q.add(new Term("content", termText), j); + } + ts.end(); } ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs; @@ -121,16 +123,16 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase { public void testShingleAnalyzerWrapperBooleanQuery() throws Exception { BooleanQuery q = new BooleanQuery(); - TokenStream ts = analyzer.tokenStream("content", "test sentence"); + try (TokenStream ts = analyzer.tokenStream("content", "test sentence")) { + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); - CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); - - ts.reset(); - - while (ts.incrementToken()) { - String termText = termAtt.toString(); - q.add(new TermQuery(new Term("content", termText)), + ts.reset(); + while (ts.incrementToken()) { + String termText = termAtt.toString(); + q.add(new TermQuery(new Term("content", termText)), BooleanClause.Occur.SHOULD); + } + ts.end(); } ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs; diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java index e8880dfeaf8..b52540cec0e 100644 --- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java +++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/util/TestCharTokenizers.java @@ -123,18 +123,18 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase { int num = 1000 * RANDOM_MULTIPLIER; for (int i = 0; i < num; i++) { String s = _TestUtil.randomUnicodeString(random()); - TokenStream ts = analyzer.tokenStream("foo", s); - ts.reset(); - OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); - while (ts.incrementToken()) { - String highlightedText = s.substring(offsetAtt.startOffset(), offsetAtt.endOffset()); - for (int j = 0, cp = 0; j < highlightedText.length(); j += Character.charCount(cp)) { - cp = highlightedText.codePointAt(j); - assertTrue("non-letter:" + Integer.toHexString(cp), Character.isLetter(cp)); + try (TokenStream ts = analyzer.tokenStream("foo", s)) { + ts.reset(); + OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); + while (ts.incrementToken()) { + String highlightedText = s.substring(offsetAtt.startOffset(), offsetAtt.endOffset()); + for (int j = 0, cp = 0; j < highlightedText.length(); j += Character.charCount(cp)) { + cp = highlightedText.codePointAt(j); + assertTrue("non-letter:" + Integer.toHexString(cp), Character.isLetter(cp)); + } } + ts.end(); } - ts.end(); - ts.close(); } // just for fun checkRandomData(random(), analyzer, num); @@ -161,18 +161,18 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase { int num = 1000 * RANDOM_MULTIPLIER; for (int i = 0; i < num; i++) { String s = _TestUtil.randomUnicodeString(random()); - TokenStream ts = analyzer.tokenStream("foo", s); - ts.reset(); - OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); - while (ts.incrementToken()) { - String highlightedText = s.substring(offsetAtt.startOffset(), offsetAtt.endOffset()); - for (int j = 0, cp = 0; j < highlightedText.length(); j += Character.charCount(cp)) { - cp = highlightedText.codePointAt(j); - assertTrue("non-letter:" + Integer.toHexString(cp), Character.isLetter(cp)); + try (TokenStream ts = analyzer.tokenStream("foo", s)) { + ts.reset(); + OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); + while (ts.incrementToken()) { + String highlightedText = s.substring(offsetAtt.startOffset(), offsetAtt.endOffset()); + for (int j = 0, cp = 0; j < highlightedText.length(); j += Character.charCount(cp)) { + cp = highlightedText.codePointAt(j); + assertTrue("non-letter:" + Integer.toHexString(cp), Character.isLetter(cp)); + } } + ts.end(); } - ts.end(); - ts.close(); } // just for fun checkRandomData(random(), analyzer, num); diff --git a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java index f6649f3b872..a7c02688b54 100644 --- a/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java +++ b/lucene/analysis/icu/src/test/org/apache/lucene/analysis/icu/segmentation/TestICUTokenizer.java @@ -249,16 +249,16 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase { } public void testTokenAttributes() throws Exception { - TokenStream ts = a.tokenStream("dummy", "This is a test"); - ScriptAttribute scriptAtt = ts.addAttribute(ScriptAttribute.class); - ts.reset(); - while (ts.incrementToken()) { - assertEquals(UScript.LATIN, scriptAtt.getCode()); - assertEquals(UScript.getName(UScript.LATIN), scriptAtt.getName()); - assertEquals(UScript.getShortName(UScript.LATIN), scriptAtt.getShortName()); - assertTrue(ts.reflectAsString(false).contains("script=Latin")); + try (TokenStream ts = a.tokenStream("dummy", "This is a test")) { + ScriptAttribute scriptAtt = ts.addAttribute(ScriptAttribute.class); + ts.reset(); + while (ts.incrementToken()) { + assertEquals(UScript.LATIN, scriptAtt.getCode()); + assertEquals(UScript.getName(UScript.LATIN), scriptAtt.getName()); + assertEquals(UScript.getShortName(UScript.LATIN), scriptAtt.getShortName()); + assertTrue(ts.reflectAsString(false).contains("script=Latin")); + } + ts.end(); } - ts.end(); - ts.close(); } } diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestExtendedMode.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestExtendedMode.java index 250f26e2762..8ab644a01e2 100644 --- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestExtendedMode.java +++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestExtendedMode.java @@ -53,14 +53,14 @@ public class TestExtendedMode extends BaseTokenStreamTestCase { int numIterations = atLeast(1000); for (int i = 0; i < numIterations; i++) { String s = _TestUtil.randomUnicodeString(random(), 100); - TokenStream ts = analyzer.tokenStream("foo", s); - CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); - ts.reset(); - while (ts.incrementToken()) { - assertTrue(UnicodeUtil.validUTF16String(termAtt)); + try (TokenStream ts = analyzer.tokenStream("foo", s)) { + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); + ts.reset(); + while (ts.incrementToken()) { + assertTrue(UnicodeUtil.validUTF16String(termAtt)); + } + ts.end(); } - ts.end(); - ts.close(); } } diff --git a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java index b31949a33ba..aa64765c1a5 100644 --- a/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java +++ b/lucene/analysis/kuromoji/src/test/org/apache/lucene/analysis/ja/TestJapaneseTokenizer.java @@ -141,13 +141,13 @@ public class TestJapaneseTokenizer extends BaseTokenStreamTestCase { * ideally the test would actually fail instead of hanging... */ public void testDecomposition5() throws Exception { - TokenStream ts = analyzer.tokenStream("bogus", "くよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよ"); - ts.reset(); - while (ts.incrementToken()) { + try (TokenStream ts = analyzer.tokenStream("bogus", "くよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよ")) { + ts.reset(); + while (ts.incrementToken()) { + } + ts.end(); } - ts.end(); - ts.close(); } /* @@ -213,12 +213,12 @@ public class TestJapaneseTokenizer extends BaseTokenStreamTestCase { public void testLargeDocReliability() throws Exception { for (int i = 0; i < 100; i++) { String s = _TestUtil.randomUnicodeString(random(), 10000); - TokenStream ts = analyzer.tokenStream("foo", s); - ts.reset(); - while (ts.incrementToken()) { + try (TokenStream ts = analyzer.tokenStream("foo", s)) { + ts.reset(); + while (ts.incrementToken()) { + } + ts.end(); } - ts.end(); - ts.close(); } } @@ -236,29 +236,31 @@ public class TestJapaneseTokenizer extends BaseTokenStreamTestCase { System.out.println("\nTEST: iter=" + i); } String s = _TestUtil.randomUnicodeString(random(), 100); - TokenStream ts = analyzer.tokenStream("foo", s); - CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); - ts.reset(); - while (ts.incrementToken()) { - assertTrue(UnicodeUtil.validUTF16String(termAtt)); + try (TokenStream ts = analyzer.tokenStream("foo", s)) { + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); + ts.reset(); + while (ts.incrementToken()) { + assertTrue(UnicodeUtil.validUTF16String(termAtt)); + } + ts.end(); } - ts.end(); - ts.close(); } } public void testOnlyPunctuation() throws IOException { - TokenStream ts = analyzerNoPunct.tokenStream("foo", "。、。。"); - ts.reset(); - assertFalse(ts.incrementToken()); - ts.end(); + try (TokenStream ts = analyzerNoPunct.tokenStream("foo", "。、。。")) { + ts.reset(); + assertFalse(ts.incrementToken()); + ts.end(); + } } public void testOnlyPunctuationExtended() throws IOException { - TokenStream ts = extendedModeAnalyzerNoPunct.tokenStream("foo", "......"); - ts.reset(); - assertFalse(ts.incrementToken()); - ts.end(); + try (TokenStream ts = extendedModeAnalyzerNoPunct.tokenStream("foo", "......")) { + ts.reset(); + assertFalse(ts.incrementToken()); + ts.end(); + } } // note: test is kinda silly since kuromoji emits punctuation tokens. @@ -369,75 +371,81 @@ public class TestJapaneseTokenizer extends BaseTokenStreamTestCase { } private void assertReadings(String input, String... readings) throws IOException { - TokenStream ts = analyzer.tokenStream("ignored", input); - ReadingAttribute readingAtt = ts.addAttribute(ReadingAttribute.class); - ts.reset(); - for(String reading : readings) { - assertTrue(ts.incrementToken()); - assertEquals(reading, readingAtt.getReading()); + try (TokenStream ts = analyzer.tokenStream("ignored", input)) { + ReadingAttribute readingAtt = ts.addAttribute(ReadingAttribute.class); + ts.reset(); + for(String reading : readings) { + assertTrue(ts.incrementToken()); + assertEquals(reading, readingAtt.getReading()); + } + assertFalse(ts.incrementToken()); + ts.end(); } - assertFalse(ts.incrementToken()); - ts.end(); } private void assertPronunciations(String input, String... pronunciations) throws IOException { - TokenStream ts = analyzer.tokenStream("ignored", input); - ReadingAttribute readingAtt = ts.addAttribute(ReadingAttribute.class); - ts.reset(); - for(String pronunciation : pronunciations) { - assertTrue(ts.incrementToken()); - assertEquals(pronunciation, readingAtt.getPronunciation()); + try (TokenStream ts = analyzer.tokenStream("ignored", input)) { + ReadingAttribute readingAtt = ts.addAttribute(ReadingAttribute.class); + ts.reset(); + for(String pronunciation : pronunciations) { + assertTrue(ts.incrementToken()); + assertEquals(pronunciation, readingAtt.getPronunciation()); + } + assertFalse(ts.incrementToken()); + ts.end(); } - assertFalse(ts.incrementToken()); - ts.end(); } private void assertBaseForms(String input, String... baseForms) throws IOException { - TokenStream ts = analyzer.tokenStream("ignored", input); - BaseFormAttribute baseFormAtt = ts.addAttribute(BaseFormAttribute.class); - ts.reset(); - for(String baseForm : baseForms) { - assertTrue(ts.incrementToken()); - assertEquals(baseForm, baseFormAtt.getBaseForm()); + try (TokenStream ts = analyzer.tokenStream("ignored", input)) { + BaseFormAttribute baseFormAtt = ts.addAttribute(BaseFormAttribute.class); + ts.reset(); + for(String baseForm : baseForms) { + assertTrue(ts.incrementToken()); + assertEquals(baseForm, baseFormAtt.getBaseForm()); + } + assertFalse(ts.incrementToken()); + ts.end(); } - assertFalse(ts.incrementToken()); - ts.end(); } private void assertInflectionTypes(String input, String... inflectionTypes) throws IOException { - TokenStream ts = analyzer.tokenStream("ignored", input); - InflectionAttribute inflectionAtt = ts.addAttribute(InflectionAttribute.class); - ts.reset(); - for(String inflectionType : inflectionTypes) { - assertTrue(ts.incrementToken()); - assertEquals(inflectionType, inflectionAtt.getInflectionType()); + try (TokenStream ts = analyzer.tokenStream("ignored", input)) { + InflectionAttribute inflectionAtt = ts.addAttribute(InflectionAttribute.class); + ts.reset(); + for(String inflectionType : inflectionTypes) { + assertTrue(ts.incrementToken()); + assertEquals(inflectionType, inflectionAtt.getInflectionType()); + } + assertFalse(ts.incrementToken()); + ts.end(); } - assertFalse(ts.incrementToken()); - ts.end(); } private void assertInflectionForms(String input, String... inflectionForms) throws IOException { - TokenStream ts = analyzer.tokenStream("ignored", input); - InflectionAttribute inflectionAtt = ts.addAttribute(InflectionAttribute.class); - ts.reset(); - for(String inflectionForm : inflectionForms) { - assertTrue(ts.incrementToken()); - assertEquals(inflectionForm, inflectionAtt.getInflectionForm()); + try (TokenStream ts = analyzer.tokenStream("ignored", input)) { + InflectionAttribute inflectionAtt = ts.addAttribute(InflectionAttribute.class); + ts.reset(); + for(String inflectionForm : inflectionForms) { + assertTrue(ts.incrementToken()); + assertEquals(inflectionForm, inflectionAtt.getInflectionForm()); + } + assertFalse(ts.incrementToken()); + ts.end(); } - assertFalse(ts.incrementToken()); - ts.end(); } private void assertPartsOfSpeech(String input, String... partsOfSpeech) throws IOException { - TokenStream ts = analyzer.tokenStream("ignored", input); - PartOfSpeechAttribute partOfSpeechAtt = ts.addAttribute(PartOfSpeechAttribute.class); - ts.reset(); - for(String partOfSpeech : partsOfSpeech) { - assertTrue(ts.incrementToken()); - assertEquals(partOfSpeech, partOfSpeechAtt.getPartOfSpeech()); + try (TokenStream ts = analyzer.tokenStream("ignored", input)) { + PartOfSpeechAttribute partOfSpeechAtt = ts.addAttribute(PartOfSpeechAttribute.class); + ts.reset(); + for(String partOfSpeech : partsOfSpeech) { + assertTrue(ts.incrementToken()); + assertEquals(partOfSpeech, partOfSpeechAtt.getPartOfSpeech()); + } + assertFalse(ts.incrementToken()); + ts.end(); } - assertFalse(ts.incrementToken()); - ts.end(); } public void testReadings() throws Exception { @@ -631,11 +639,11 @@ public class TestJapaneseTokenizer extends BaseTokenStreamTestCase { long totalStart = System.currentTimeMillis(); for (int i = 0; i < numIterations; i++) { - final TokenStream ts = analyzer.tokenStream("ignored", line); - ts.reset(); - while(ts.incrementToken()); - ts.end(); - ts.close(); + try (TokenStream ts = analyzer.tokenStream("ignored", line)) { + ts.reset(); + while(ts.incrementToken()); + ts.end(); + } } String[] sentences = line.split("、|。"); if (VERBOSE) { @@ -645,11 +653,11 @@ public class TestJapaneseTokenizer extends BaseTokenStreamTestCase { totalStart = System.currentTimeMillis(); for (int i = 0; i < numIterations; i++) { for (String sentence: sentences) { - final TokenStream ts = analyzer.tokenStream("ignored", sentence); - ts.reset(); - while(ts.incrementToken()); - ts.end(); - ts.close(); + try (TokenStream ts = analyzer.tokenStream("ignored", sentence)) { + ts.reset(); + while(ts.incrementToken()); + ts.end(); + } } } if (VERBOSE) { diff --git a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java index 366355b7d6e..fa79298eb92 100644 --- a/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java +++ b/lucene/analysis/morfologik/src/test/org/apache/lucene/analysis/morfologik/TestMorfologikAnalyzer.java @@ -72,34 +72,36 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase { @SuppressWarnings("unused") private void dumpTokens(String input) throws IOException { - TokenStream ts = getTestAnalyzer().tokenStream("dummy", input); - ts.reset(); + try (TokenStream ts = getTestAnalyzer().tokenStream("dummy", input)) { + ts.reset(); - MorphosyntacticTagsAttribute attribute = ts.getAttribute(MorphosyntacticTagsAttribute.class); - CharTermAttribute charTerm = ts.getAttribute(CharTermAttribute.class); - while (ts.incrementToken()) { - System.out.println(charTerm.toString() + " => " + attribute.getTags()); + MorphosyntacticTagsAttribute attribute = ts.getAttribute(MorphosyntacticTagsAttribute.class); + CharTermAttribute charTerm = ts.getAttribute(CharTermAttribute.class); + while (ts.incrementToken()) { + System.out.println(charTerm.toString() + " => " + attribute.getTags()); + } + ts.end(); } } /** Test reuse of MorfologikFilter with leftover stems. */ public final void testLeftoverStems() throws IOException { Analyzer a = getTestAnalyzer(); - TokenStream ts_1 = a.tokenStream("dummy", "liście"); - CharTermAttribute termAtt_1 = ts_1.getAttribute(CharTermAttribute.class); - ts_1.reset(); - ts_1.incrementToken(); - assertEquals("first stream", "liście", termAtt_1.toString()); - ts_1.end(); - ts_1.close(); + try (TokenStream ts_1 = a.tokenStream("dummy", "liście")) { + CharTermAttribute termAtt_1 = ts_1.getAttribute(CharTermAttribute.class); + ts_1.reset(); + ts_1.incrementToken(); + assertEquals("first stream", "liście", termAtt_1.toString()); + ts_1.end(); + } - TokenStream ts_2 = a.tokenStream("dummy", "danych"); - CharTermAttribute termAtt_2 = ts_2.getAttribute(CharTermAttribute.class); - ts_2.reset(); - ts_2.incrementToken(); - assertEquals("second stream", "dany", termAtt_2.toString()); - ts_2.end(); - ts_2.close(); + try (TokenStream ts_2 = a.tokenStream("dummy", "danych")) { + CharTermAttribute termAtt_2 = ts_2.getAttribute(CharTermAttribute.class); + ts_2.reset(); + ts_2.incrementToken(); + assertEquals("second stream", "dany", termAtt_2.toString()); + ts_2.end(); + } } /** Test stemming of mixed-case tokens. */ @@ -140,28 +142,27 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase { /** Test morphosyntactic annotations. */ public final void testPOSAttribute() throws IOException { - TokenStream ts = getTestAnalyzer().tokenStream("dummy", "liście"); - - ts.reset(); - assertPOSToken(ts, "liście", + try (TokenStream ts = getTestAnalyzer().tokenStream("dummy", "liście")) { + ts.reset(); + assertPOSToken(ts, "liście", "subst:sg:acc:n2", "subst:sg:nom:n2", "subst:sg:voc:n2"); - assertPOSToken(ts, "liść", + assertPOSToken(ts, "liść", "subst:pl:acc:m3", "subst:pl:nom:m3", "subst:pl:voc:m3"); - assertPOSToken(ts, "list", + assertPOSToken(ts, "list", "subst:sg:loc:m3", "subst:sg:voc:m3"); - assertPOSToken(ts, "lista", + assertPOSToken(ts, "lista", "subst:sg:dat:f", "subst:sg:loc:f"); - ts.end(); - ts.close(); + ts.end(); + } } /** */ diff --git a/lucene/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java b/lucene/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java index 1cb8e9b6034..633663e7898 100644 --- a/lucene/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java +++ b/lucene/analysis/smartcn/src/test/org/apache/lucene/analysis/cn/smart/TestSmartChineseAnalyzer.java @@ -184,9 +184,11 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase { sb.append("我购买了道具和服装。"); } Analyzer analyzer = new SmartChineseAnalyzer(TEST_VERSION_CURRENT); - TokenStream stream = analyzer.tokenStream("", sb.toString()); - stream.reset(); - while (stream.incrementToken()) { + try (TokenStream stream = analyzer.tokenStream("", sb.toString())) { + stream.reset(); + while (stream.incrementToken()) { + } + stream.end(); } } @@ -197,9 +199,11 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase { sb.append("我购买了道具和服装"); } Analyzer analyzer = new SmartChineseAnalyzer(TEST_VERSION_CURRENT); - TokenStream stream = analyzer.tokenStream("", sb.toString()); - stream.reset(); - while (stream.incrementToken()) { + try (TokenStream stream = analyzer.tokenStream("", sb.toString())) { + stream.reset(); + while (stream.incrementToken()) { + } + stream.end(); } } diff --git a/lucene/classification/src/java/org/apache/lucene/classification/BooleanPerceptronClassifier.java b/lucene/classification/src/java/org/apache/lucene/classification/BooleanPerceptronClassifier.java index 267ac99949c..32e94881e3e 100644 --- a/lucene/classification/src/java/org/apache/lucene/classification/BooleanPerceptronClassifier.java +++ b/lucene/classification/src/java/org/apache/lucene/classification/BooleanPerceptronClassifier.java @@ -91,20 +91,19 @@ public class BooleanPerceptronClassifier implements Classifier { throw new IOException("You must first call Classifier#train"); } Long output = 0l; - TokenStream tokenStream = analyzer.tokenStream(textFieldName, - new StringReader(text)); - CharTermAttribute charTermAttribute = tokenStream + try (TokenStream tokenStream = analyzer.tokenStream(textFieldName, text)) { + CharTermAttribute charTermAttribute = tokenStream .addAttribute(CharTermAttribute.class); - tokenStream.reset(); - while (tokenStream.incrementToken()) { - String s = charTermAttribute.toString(); - Long d = Util.get(fst, new BytesRef(s)); - if (d != null) { - output += d; + tokenStream.reset(); + while (tokenStream.incrementToken()) { + String s = charTermAttribute.toString(); + Long d = Util.get(fst, new BytesRef(s)); + if (d != null) { + output += d; + } } + tokenStream.end(); } - tokenStream.end(); - tokenStream.close(); return new ClassificationResult<>(output >= threshold, output.doubleValue()); } diff --git a/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java b/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java index 74fc631ef28..652c599b867 100644 --- a/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java +++ b/lucene/classification/src/java/org/apache/lucene/classification/SimpleNaiveBayesClassifier.java @@ -85,14 +85,14 @@ public class SimpleNaiveBayesClassifier implements Classifier { private String[] tokenizeDoc(String doc) throws IOException { Collection result = new LinkedList(); - TokenStream tokenStream = analyzer.tokenStream(textFieldName, doc); - CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); - tokenStream.reset(); - while (tokenStream.incrementToken()) { - result.add(charTermAttribute.toString()); + try (TokenStream tokenStream = analyzer.tokenStream(textFieldName, doc)) { + CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); + tokenStream.reset(); + while (tokenStream.incrementToken()) { + result.add(charTermAttribute.toString()); + } + tokenStream.end(); } - tokenStream.end(); - tokenStream.close(); return result.toArray(new String[result.size()]); } diff --git a/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java b/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java index 0e966698deb..6824b859b7b 100644 --- a/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java +++ b/lucene/core/src/java/org/apache/lucene/index/DocInverterPerField.java @@ -92,13 +92,9 @@ final class DocInverterPerField extends DocFieldConsumerPerField { fieldState.position += analyzed ? docState.analyzer.getPositionIncrementGap(fieldInfo.name) : 0; } - final TokenStream stream = field.tokenStream(docState.analyzer); - // reset the TokenStream to the first token - stream.reset(); - - boolean success2 = false; - - try { + try (TokenStream stream = field.tokenStream(docState.analyzer)) { + // reset the TokenStream to the first token + stream.reset(); boolean hasMoreTokens = stream.incrementToken(); fieldState.attributeSource = stream; @@ -179,13 +175,6 @@ final class DocInverterPerField extends DocFieldConsumerPerField { // when we come back around to the field... fieldState.position += posIncrAttribute.getPositionIncrement(); fieldState.offset += offsetAttribute.endOffset(); - success2 = true; - } finally { - if (!success2) { - IOUtils.closeWhileHandlingException(stream); - } else { - stream.close(); - } } fieldState.offset += analyzed ? docState.analyzer.getOffsetGap(fieldInfo.name) : 0; diff --git a/lucene/core/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java b/lucene/core/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java index f04f8496116..9438ea7e7be 100644 --- a/lucene/core/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java +++ b/lucene/core/src/test/org/apache/lucene/analysis/TestMockAnalyzer.java @@ -98,13 +98,13 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase { String testString = "t"; Analyzer analyzer = new MockAnalyzer(random()); - TokenStream stream = analyzer.tokenStream("dummy", testString); - stream.reset(); - while (stream.incrementToken()) { - // consume + try (TokenStream stream = analyzer.tokenStream("dummy", testString)) { + stream.reset(); + while (stream.incrementToken()) { + // consume + } + stream.end(); } - stream.end(); - stream.close(); assertAnalyzesTo(analyzer, testString, new String[] { "t" }); } @@ -121,13 +121,13 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase { StringReader reader = new StringReader(s); MockCharFilter charfilter = new MockCharFilter(reader, 2); MockAnalyzer analyzer = new MockAnalyzer(random()); - TokenStream ts = analyzer.tokenStream("bogus", charfilter); - ts.reset(); - while (ts.incrementToken()) { - ; + try (TokenStream ts = analyzer.tokenStream("bogus", charfilter)) { + ts.reset(); + while (ts.incrementToken()) { + ; + } + ts.end(); } - ts.end(); - ts.close(); } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestLongPostings.java b/lucene/core/src/test/org/apache/lucene/index/TestLongPostings.java index 38cef40276a..90fa89a5588 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestLongPostings.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestLongPostings.java @@ -47,30 +47,29 @@ public class TestLongPostings extends LuceneTestCase { if (other != null && s.equals(other)) { continue; } - final TokenStream ts = a.tokenStream("foo", s); - final TermToBytesRefAttribute termAtt = ts.getAttribute(TermToBytesRefAttribute.class); - final BytesRef termBytes = termAtt.getBytesRef(); - ts.reset(); + try (TokenStream ts = a.tokenStream("foo", s)) { + final TermToBytesRefAttribute termAtt = ts.getAttribute(TermToBytesRefAttribute.class); + final BytesRef termBytes = termAtt.getBytesRef(); + ts.reset(); - int count = 0; - boolean changed = false; + int count = 0; + boolean changed = false; - while(ts.incrementToken()) { - termAtt.fillBytesRef(); - if (count == 0 && !termBytes.utf8ToString().equals(s)) { - // The value was changed during analysis. Keep iterating so the - // tokenStream is exhausted. - changed = true; + while(ts.incrementToken()) { + termAtt.fillBytesRef(); + if (count == 0 && !termBytes.utf8ToString().equals(s)) { + // The value was changed during analysis. Keep iterating so the + // tokenStream is exhausted. + changed = true; + } + count++; } - count++; - } - ts.end(); - ts.close(); - - // Did we iterate just once and the value was unchanged? - if (!changed && count == 1) { - return s; + ts.end(); + // Did we iterate just once and the value was unchanged? + if (!changed && count == 1) { + return s; + } } } } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestTermVectorsWriter.java b/lucene/core/src/test/org/apache/lucene/index/TestTermVectorsWriter.java index 37589097ac9..bf2906a0531 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestTermVectorsWriter.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestTermVectorsWriter.java @@ -174,17 +174,18 @@ public class TestTermVectorsWriter extends LuceneTestCase { Analyzer analyzer = new MockAnalyzer(random()); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); - TokenStream stream = analyzer.tokenStream("field", "abcd "); - stream.reset(); // TODO: weird to reset before wrapping with CachingTokenFilter... correct? - stream = new CachingTokenFilter(stream); - FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); - customType.setStoreTermVectors(true); - customType.setStoreTermVectorPositions(true); - customType.setStoreTermVectorOffsets(true); - Field f = new Field("field", stream, customType); - doc.add(f); - doc.add(f); - w.addDocument(doc); + try (TokenStream stream = analyzer.tokenStream("field", "abcd ")) { + stream.reset(); // TODO: weird to reset before wrapping with CachingTokenFilter... correct? + TokenStream cachedStream = new CachingTokenFilter(stream); + FieldType customType = new FieldType(TextField.TYPE_NOT_STORED); + customType.setStoreTermVectors(true); + customType.setStoreTermVectorPositions(true); + customType.setStoreTermVectorOffsets(true); + Field f = new Field("field", cachedStream, customType); + doc.add(f); + doc.add(f); + w.addDocument(doc); + } w.close(); IndexReader r = DirectoryReader.open(dir); diff --git a/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java b/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java index 44ab8cf276b..e78649a745e 100644 --- a/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java +++ b/lucene/core/src/test/org/apache/lucene/search/TestPhraseQuery.java @@ -617,16 +617,16 @@ public class TestPhraseQuery extends LuceneTestCase { break; } } - TokenStream ts = analyzer.tokenStream("ignore", term); - CharTermAttribute termAttr = ts.addAttribute(CharTermAttribute.class); - ts.reset(); - while(ts.incrementToken()) { - String text = termAttr.toString(); - doc.add(text); - sb.append(text).append(' '); + try (TokenStream ts = analyzer.tokenStream("ignore", term)) { + CharTermAttribute termAttr = ts.addAttribute(CharTermAttribute.class); + ts.reset(); + while(ts.incrementToken()) { + String text = termAttr.toString(); + doc.add(text); + sb.append(text).append(' '); + } + ts.end(); } - ts.end(); - ts.close(); } else { // pick existing sub-phrase List lastDoc = docs.get(r.nextInt(docs.size())); diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java b/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java index c50265776ad..33b76e128ba 100644 --- a/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java +++ b/lucene/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java @@ -170,20 +170,20 @@ public abstract class AbstractTestCase extends LuceneTestCase { protected List analyze(String text, String field, Analyzer analyzer) throws IOException { List bytesRefs = new ArrayList(); - TokenStream tokenStream = analyzer.tokenStream(field, text); - TermToBytesRefAttribute termAttribute = tokenStream.getAttribute(TermToBytesRefAttribute.class); + try (TokenStream tokenStream = analyzer.tokenStream(field, text)) { + TermToBytesRefAttribute termAttribute = tokenStream.getAttribute(TermToBytesRefAttribute.class); - BytesRef bytesRef = termAttribute.getBytesRef(); + BytesRef bytesRef = termAttribute.getBytesRef(); - tokenStream.reset(); + tokenStream.reset(); - while (tokenStream.incrementToken()) { - termAttribute.fillBytesRef(); - bytesRefs.add(BytesRef.deepCopyOf(bytesRef)); - } + while (tokenStream.incrementToken()) { + termAttribute.fillBytesRef(); + bytesRefs.add(BytesRef.deepCopyOf(bytesRef)); + } - tokenStream.end(); - tokenStream.close(); + tokenStream.end(); + } return bytesRefs; } diff --git a/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java b/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java index 13636030123..9e2ceee0af6 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/mlt/MoreLikeThis.java @@ -777,31 +777,31 @@ public final class MoreLikeThis { throw new UnsupportedOperationException("To use MoreLikeThis without " + "term vectors, you must provide an Analyzer"); } - TokenStream ts = analyzer.tokenStream(fieldName, r); - int tokenCount = 0; - // for every token - CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); - ts.reset(); - while (ts.incrementToken()) { - String word = termAtt.toString(); - tokenCount++; - if (tokenCount > maxNumTokensParsed) { - break; - } - if (isNoiseWord(word)) { - continue; - } + try (TokenStream ts = analyzer.tokenStream(fieldName, r)) { + int tokenCount = 0; + // for every token + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); + ts.reset(); + while (ts.incrementToken()) { + String word = termAtt.toString(); + tokenCount++; + if (tokenCount > maxNumTokensParsed) { + break; + } + if (isNoiseWord(word)) { + continue; + } - // increment frequency - Int cnt = termFreqMap.get(word); - if (cnt == null) { - termFreqMap.put(word, new Int()); - } else { - cnt.x++; + // increment frequency + Int cnt = termFreqMap.get(word); + if (cnt == null) { + termFreqMap.put(word, new Int()); + } else { + cnt.x++; + } } + ts.end(); } - ts.end(); - ts.close(); } diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java index 3ce2ae34723..e1ea6500333 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java @@ -162,9 +162,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryparser.classic. */ protected String analyzeSingleChunk(String field, String termStr, String chunk) throws ParseException{ String analyzed = null; - TokenStream stream = null; - try{ - stream = getAnalyzer().tokenStream(field, chunk); + try (TokenStream stream = getAnalyzer().tokenStream(field, chunk)) { stream.reset(); CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); // get first and hopefully only output token @@ -186,7 +184,6 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryparser.classic. multipleOutputs.append('"'); } stream.end(); - stream.close(); if (null != multipleOutputs) { throw new ParseException( String.format(getLocale(), @@ -196,7 +193,6 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryparser.classic. // nothing returned by analyzer. Was it a stop word and the user accidentally // used an analyzer with stop words? stream.end(); - stream.close(); throw new ParseException(String.format(getLocale(), "Analyzer returned nothing for \"%s\"", chunk)); } } catch (IOException e){ diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java index 2bf91203e0a..b5cf904a04f 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/classic/QueryParserBase.java @@ -497,63 +497,51 @@ public abstract class QueryParserBase implements CommonQueryParserConfiguration protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, boolean quoted) throws ParseException { // Use the analyzer to get all the tokens, and then build a TermQuery, // PhraseQuery, or nothing based on the term count - - TokenStream source; - try { - source = analyzer.tokenStream(field, queryText); - source.reset(); - } catch (IOException e) { - ParseException p = new ParseException("Unable to initialize TokenStream to analyze query text"); - p.initCause(e); - throw p; - } - CachingTokenFilter buffer = new CachingTokenFilter(source); + CachingTokenFilter buffer = null; TermToBytesRefAttribute termAtt = null; PositionIncrementAttribute posIncrAtt = null; int numTokens = 0; - - buffer.reset(); - - if (buffer.hasAttribute(TermToBytesRefAttribute.class)) { - termAtt = buffer.getAttribute(TermToBytesRefAttribute.class); - } - if (buffer.hasAttribute(PositionIncrementAttribute.class)) { - posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class); - } - int positionCount = 0; boolean severalTokensAtSamePosition = false; - - boolean hasMoreTokens = false; - if (termAtt != null) { - try { - hasMoreTokens = buffer.incrementToken(); - while (hasMoreTokens) { - numTokens++; - int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1; - if (positionIncrement != 0) { - positionCount += positionIncrement; - } else { - severalTokensAtSamePosition = true; - } - hasMoreTokens = buffer.incrementToken(); - } - } catch (IOException e) { - // ignore - } - } - try { - // rewind the buffer stream + boolean hasMoreTokens = false; + + try (TokenStream source = analyzer.tokenStream(field, queryText)) { + source.reset(); + buffer = new CachingTokenFilter(source); buffer.reset(); - // close original stream - all tokens buffered - source.close(); - } - catch (IOException e) { - ParseException p = new ParseException("Cannot close TokenStream analyzing query text"); + if (buffer.hasAttribute(TermToBytesRefAttribute.class)) { + termAtt = buffer.getAttribute(TermToBytesRefAttribute.class); + } + if (buffer.hasAttribute(PositionIncrementAttribute.class)) { + posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class); + } + + if (termAtt != null) { + try { + hasMoreTokens = buffer.incrementToken(); + while (hasMoreTokens) { + numTokens++; + int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1; + if (positionIncrement != 0) { + positionCount += positionIncrement; + } else { + severalTokensAtSamePosition = true; + } + hasMoreTokens = buffer.incrementToken(); + } + } catch (IOException e) { + // ignore + } + } + } catch (IOException e) { + ParseException p = new ParseException("Eror analyzing query text"); p.initCause(e); throw p; } + + // rewind the buffer stream + buffer.reset(); BytesRef bytes = termAtt == null ? null : termAtt.getBytesRef(); @@ -839,38 +827,24 @@ public abstract class QueryParserBase implements CommonQueryParserConfiguration } protected BytesRef analyzeMultitermTerm(String field, String part, Analyzer analyzerIn) { - TokenStream source; - if (analyzerIn == null) analyzerIn = analyzer; - try { - source = analyzerIn.tokenStream(field, part); + try (TokenStream source = analyzerIn.tokenStream(field, part)) { source.reset(); - } catch (IOException e) { - throw new RuntimeException("Unable to initialize TokenStream to analyze multiTerm term: " + part, e); - } - TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class); - BytesRef bytes = termAtt.getBytesRef(); + TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class); + BytesRef bytes = termAtt.getBytesRef(); - try { if (!source.incrementToken()) throw new IllegalArgumentException("analyzer returned no terms for multiTerm term: " + part); termAtt.fillBytesRef(); if (source.incrementToken()) throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + part); - } catch (IOException e) { - throw new RuntimeException("error analyzing range part: " + part, e); - } - - try { source.end(); - source.close(); + return BytesRef.deepCopyOf(bytes); } catch (IOException e) { - throw new RuntimeException("Unable to end & close TokenStream after analyzing multiTerm term: " + part, e); + throw new RuntimeException("Error analyzing multiTerm term: " + part, e); } - - return BytesRef.deepCopyOf(bytes); } /** diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/AnalyzerQueryNodeProcessor.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/AnalyzerQueryNodeProcessor.java index 78610af393b..71d67091878 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/AnalyzerQueryNodeProcessor.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/flexible/standard/processors/AnalyzerQueryNodeProcessor.java @@ -113,52 +113,44 @@ public class AnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl { String text = fieldNode.getTextAsString(); String field = fieldNode.getFieldAsString(); - TokenStream source; - try { - source = this.analyzer.tokenStream(field, text); - source.reset(); - } catch (IOException e1) { - throw new RuntimeException(e1); - } - CachingTokenFilter buffer = new CachingTokenFilter(source); - + CachingTokenFilter buffer = null; PositionIncrementAttribute posIncrAtt = null; int numTokens = 0; int positionCount = 0; boolean severalTokensAtSamePosition = false; + + try (TokenStream source = this.analyzer.tokenStream(field, text)) { + source.reset(); + buffer = new CachingTokenFilter(source); - if (buffer.hasAttribute(PositionIncrementAttribute.class)) { - posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class); - } - - try { - - while (buffer.incrementToken()) { - numTokens++; - int positionIncrement = (posIncrAtt != null) ? posIncrAtt - .getPositionIncrement() : 1; - if (positionIncrement != 0) { - positionCount += positionIncrement; - - } else { - severalTokensAtSamePosition = true; - } - + if (buffer.hasAttribute(PositionIncrementAttribute.class)) { + posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class); } - } catch (IOException e) { - // ignore - } + try { - try { - // rewind the buffer stream - buffer.reset(); + while (buffer.incrementToken()) { + numTokens++; + int positionIncrement = (posIncrAtt != null) ? posIncrAtt + .getPositionIncrement() : 1; + if (positionIncrement != 0) { + positionCount += positionIncrement; - // close original stream - all tokens buffered - source.close(); + } else { + severalTokensAtSamePosition = true; + } + + } + + } catch (IOException e) { + // ignore + } } catch (IOException e) { - // ignore + throw new RuntimeException(e); } + + // rewind the buffer stream + buffer.reset(); if (!buffer.hasAttribute(CharTermAttribute.class)) { return new NoTokenFoundQueryNode(); diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/LikeThisQueryBuilder.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/LikeThisQueryBuilder.java index 56cc66ef73c..11565040589 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/LikeThisQueryBuilder.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/LikeThisQueryBuilder.java @@ -73,8 +73,7 @@ public class LikeThisQueryBuilder implements QueryBuilder { if ((stopWords != null) && (fields != null)) { stopWordsSet = new HashSet(); for (String field : fields) { - try { - TokenStream ts = analyzer.tokenStream(field, stopWords); + try (TokenStream ts = analyzer.tokenStream(field, stopWords)) { CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); ts.reset(); while (ts.incrementToken()) { diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/SpanOrTermsBuilder.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/SpanOrTermsBuilder.java index ecda31135a9..5e316f5c0f3 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/SpanOrTermsBuilder.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/SpanOrTermsBuilder.java @@ -49,9 +49,9 @@ public class SpanOrTermsBuilder extends SpanBuilderBase { String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName"); String value = DOMUtils.getNonBlankTextOrFail(e); - try { - List clausesList = new ArrayList(); - TokenStream ts = analyzer.tokenStream(fieldName, value); + List clausesList = new ArrayList(); + + try (TokenStream ts = analyzer.tokenStream(fieldName, value)) { TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); BytesRef bytes = termAtt.getBytesRef(); ts.reset(); @@ -61,7 +61,6 @@ public class SpanOrTermsBuilder extends SpanBuilderBase { clausesList.add(stq); } ts.end(); - ts.close(); SpanOrQuery soq = new SpanOrQuery(clausesList.toArray(new SpanQuery[clausesList.size()])); soq.setBoost(DOMUtils.getAttribute(e, "boost", 1.0f)); return soq; diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/TermsFilterBuilder.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/TermsFilterBuilder.java index 65b13014f24..6b97f728341 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/TermsFilterBuilder.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/TermsFilterBuilder.java @@ -54,8 +54,7 @@ public class TermsFilterBuilder implements FilterBuilder { String text = DOMUtils.getNonBlankTextOrFail(e); String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName"); - try { - TokenStream ts = analyzer.tokenStream(fieldName, text); + try (TokenStream ts = analyzer.tokenStream(fieldName, text)) { TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); BytesRef bytes = termAtt.getBytesRef(); ts.reset(); @@ -64,7 +63,6 @@ public class TermsFilterBuilder implements FilterBuilder { terms.add(BytesRef.deepCopyOf(bytes)); } ts.end(); - ts.close(); } catch (IOException ioe) { throw new RuntimeException("Error constructing terms from index:" + ioe); diff --git a/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/TermsQueryBuilder.java b/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/TermsQueryBuilder.java index ed06d091ba4..d85d02c645d 100644 --- a/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/TermsQueryBuilder.java +++ b/lucene/queryparser/src/java/org/apache/lucene/queryparser/xml/builders/TermsQueryBuilder.java @@ -51,8 +51,7 @@ public class TermsQueryBuilder implements QueryBuilder { BooleanQuery bq = new BooleanQuery(DOMUtils.getAttribute(e, "disableCoord", false)); bq.setMinimumNumberShouldMatch(DOMUtils.getAttribute(e, "minimumNumberShouldMatch", 0)); - try { - TokenStream ts = analyzer.tokenStream(fieldName, text); + try (TokenStream ts = analyzer.tokenStream(fieldName, text)) { TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); Term term = null; BytesRef bytes = termAtt.getBytesRef(); @@ -63,7 +62,6 @@ public class TermsQueryBuilder implements QueryBuilder { bq.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.SHOULD)); } ts.end(); - ts.close(); } catch (IOException ioe) { throw new RuntimeException("Error constructing terms from index:" + ioe); diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/FuzzyLikeThisQuery.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/FuzzyLikeThisQuery.java index 6d205c78a74..6d9f5a3f012 100644 --- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/FuzzyLikeThisQuery.java +++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/queries/FuzzyLikeThisQuery.java @@ -193,67 +193,67 @@ public class FuzzyLikeThisQuery extends Query private void addTerms(IndexReader reader, FieldVals f) throws IOException { if (f.queryString == null) return; - TokenStream ts = analyzer.tokenStream(f.fieldName, f.queryString); - CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); - - int corpusNumDocs = reader.numDocs(); - HashSet processedTerms = new HashSet(); - ts.reset(); final Terms terms = MultiFields.getTerms(reader, f.fieldName); if (terms == null) { return; } - while (ts.incrementToken()) { - String term = termAtt.toString(); - if (!processedTerms.contains(term)) { - processedTerms.add(term); - ScoreTermQueue variantsQ = new ScoreTermQueue(MAX_VARIANTS_PER_TERM); //maxNum variants considered for any one term - float minScore = 0; - Term startTerm = new Term(f.fieldName, term); - AttributeSource atts = new AttributeSource(); - MaxNonCompetitiveBoostAttribute maxBoostAtt = + try (TokenStream ts = analyzer.tokenStream(f.fieldName, f.queryString)) { + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); + + int corpusNumDocs = reader.numDocs(); + HashSet processedTerms = new HashSet(); + ts.reset(); + while (ts.incrementToken()) { + String term = termAtt.toString(); + if (!processedTerms.contains(term)) { + processedTerms.add(term); + ScoreTermQueue variantsQ = new ScoreTermQueue(MAX_VARIANTS_PER_TERM); //maxNum variants considered for any one term + float minScore = 0; + Term startTerm = new Term(f.fieldName, term); + AttributeSource atts = new AttributeSource(); + MaxNonCompetitiveBoostAttribute maxBoostAtt = atts.addAttribute(MaxNonCompetitiveBoostAttribute.class); - SlowFuzzyTermsEnum fe = new SlowFuzzyTermsEnum(terms, atts, startTerm, f.minSimilarity, f.prefixLength); - //store the df so all variants use same idf - int df = reader.docFreq(startTerm); - int numVariants = 0; - int totalVariantDocFreqs = 0; - BytesRef possibleMatch; - BoostAttribute boostAtt = + SlowFuzzyTermsEnum fe = new SlowFuzzyTermsEnum(terms, atts, startTerm, f.minSimilarity, f.prefixLength); + //store the df so all variants use same idf + int df = reader.docFreq(startTerm); + int numVariants = 0; + int totalVariantDocFreqs = 0; + BytesRef possibleMatch; + BoostAttribute boostAtt = fe.attributes().addAttribute(BoostAttribute.class); - while ((possibleMatch = fe.next()) != null) { - numVariants++; - totalVariantDocFreqs += fe.docFreq(); - float score = boostAtt.getBoost(); - if (variantsQ.size() < MAX_VARIANTS_PER_TERM || score > minScore) { - ScoreTerm st = new ScoreTerm(new Term(startTerm.field(), BytesRef.deepCopyOf(possibleMatch)), score, startTerm); - variantsQ.insertWithOverflow(st); - minScore = variantsQ.top().score; // maintain minScore - } - maxBoostAtt.setMaxNonCompetitiveBoost(variantsQ.size() >= MAX_VARIANTS_PER_TERM ? minScore : Float.NEGATIVE_INFINITY); - } - - if (numVariants > 0) { - int avgDf = totalVariantDocFreqs / numVariants; - if (df == 0)//no direct match we can use as df for all variants - { - df = avgDf; //use avg df of all variants + while ((possibleMatch = fe.next()) != null) { + numVariants++; + totalVariantDocFreqs += fe.docFreq(); + float score = boostAtt.getBoost(); + if (variantsQ.size() < MAX_VARIANTS_PER_TERM || score > minScore) { + ScoreTerm st = new ScoreTerm(new Term(startTerm.field(), BytesRef.deepCopyOf(possibleMatch)), score, startTerm); + variantsQ.insertWithOverflow(st); + minScore = variantsQ.top().score; // maintain minScore + } + maxBoostAtt.setMaxNonCompetitiveBoost(variantsQ.size() >= MAX_VARIANTS_PER_TERM ? minScore : Float.NEGATIVE_INFINITY); } - // take the top variants (scored by edit distance) and reset the score - // to include an IDF factor then add to the global queue for ranking - // overall top query terms - int size = variantsQ.size(); - for (int i = 0; i < size; i++) { - ScoreTerm st = variantsQ.pop(); - st.score = (st.score * st.score) * sim.idf(df, corpusNumDocs); - q.insertWithOverflow(st); + if (numVariants > 0) { + int avgDf = totalVariantDocFreqs / numVariants; + if (df == 0)//no direct match we can use as df for all variants + { + df = avgDf; //use avg df of all variants + } + + // take the top variants (scored by edit distance) and reset the score + // to include an IDF factor then add to the global queue for ranking + // overall top query terms + int size = variantsQ.size(); + for (int i = 0; i < size; i++) { + ScoreTerm st = variantsQ.pop(); + st.score = (st.score * st.score) * sim.idf(df, corpusNumDocs); + q.insertWithOverflow(st); + } } } } + ts.end(); } - ts.end(); - ts.close(); } @Override diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java index ccd40847f50..632023d1a82 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggester.java @@ -352,9 +352,8 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { occur = BooleanClause.Occur.SHOULD; } - try { + try (TokenStream ts = queryAnalyzer.tokenStream("", new StringReader(key.toString()))) { //long t0 = System.currentTimeMillis(); - TokenStream ts = queryAnalyzer.tokenStream("", new StringReader(key.toString())); ts.reset(); final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); final OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); @@ -464,40 +463,39 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable { * result is set on each {@link * LookupResult#highlightKey} member. */ protected Object highlight(String text, Set matchedTokens, String prefixToken) throws IOException { - TokenStream ts = queryAnalyzer.tokenStream("text", new StringReader(text)); - CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); - OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); - ts.reset(); - StringBuilder sb = new StringBuilder(); - int upto = 0; - while (ts.incrementToken()) { - String token = termAtt.toString(); - int startOffset = offsetAtt.startOffset(); + try (TokenStream ts = queryAnalyzer.tokenStream("text", new StringReader(text))) { + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); + OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); + ts.reset(); + StringBuilder sb = new StringBuilder(); + int upto = 0; + while (ts.incrementToken()) { + String token = termAtt.toString(); + int startOffset = offsetAtt.startOffset(); + int endOffset = offsetAtt.endOffset(); + if (upto < startOffset) { + addNonMatch(sb, text.substring(upto, startOffset)); + upto = startOffset; + } else if (upto > startOffset) { + continue; + } + + if (matchedTokens.contains(token)) { + // Token matches. + addWholeMatch(sb, text.substring(startOffset, endOffset), token); + upto = endOffset; + } else if (prefixToken != null && token.startsWith(prefixToken)) { + addPrefixMatch(sb, text.substring(startOffset, endOffset), token, prefixToken); + upto = endOffset; + } + } + ts.end(); int endOffset = offsetAtt.endOffset(); - if (upto < startOffset) { - addNonMatch(sb, text.substring(upto, startOffset)); - upto = startOffset; - } else if (upto > startOffset) { - continue; - } - - if (matchedTokens.contains(token)) { - // Token matches. - addWholeMatch(sb, text.substring(startOffset, endOffset), token); - upto = endOffset; - } else if (prefixToken != null && token.startsWith(prefixToken)) { - addPrefixMatch(sb, text.substring(startOffset, endOffset), token, prefixToken); - upto = endOffset; + if (upto < endOffset) { + addNonMatch(sb, text.substring(upto)); } + return sb.toString(); } - ts.end(); - int endOffset = offsetAtt.endOffset(); - if (upto < endOffset) { - addNonMatch(sb, text.substring(upto)); - } - ts.close(); - - return sb.toString(); } /** Called while highlighting a single result, to append a diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java index ba64403d2c3..77f0f1c573f 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/AnalyzingSuggester.java @@ -827,14 +827,15 @@ public class AnalyzingSuggester extends Lookup { } final Set toFiniteStrings(final BytesRef surfaceForm, final TokenStreamToAutomaton ts2a) throws IOException { - // Analyze surface form: - TokenStream ts = indexAnalyzer.tokenStream("", surfaceForm.utf8ToString()); + // Analyze surface form: + Automaton automaton = null; + try (TokenStream ts = indexAnalyzer.tokenStream("", surfaceForm.utf8ToString())) { - // Create corresponding automaton: labels are bytes - // from each analyzed token, with byte 0 used as - // separator between tokens: - Automaton automaton = ts2a.toAutomaton(ts); - ts.close(); + // Create corresponding automaton: labels are bytes + // from each analyzed token, with byte 0 used as + // separator between tokens: + automaton = ts2a.toAutomaton(ts); + } replaceSep(automaton); automaton = convertAutomaton(automaton); @@ -854,9 +855,10 @@ public class AnalyzingSuggester extends Lookup { final Automaton toLookupAutomaton(final CharSequence key) throws IOException { // TODO: is there a Reader from a CharSequence? // Turn tokenstream into automaton: - TokenStream ts = queryAnalyzer.tokenStream("", key.toString()); - Automaton automaton = (getTokenStreamToAutomaton()).toAutomaton(ts); - ts.close(); + Automaton automaton = null; + try (TokenStream ts = queryAnalyzer.tokenStream("", key.toString())) { + automaton = (getTokenStreamToAutomaton()).toAutomaton(ts); + } // TODO: we could use the end offset to "guess" // whether the final token was a partial token; this diff --git a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java index bf10337741b..d2f652df10d 100644 --- a/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java +++ b/lucene/suggest/src/java/org/apache/lucene/search/suggest/analyzing/FreeTextSuggester.java @@ -449,252 +449,251 @@ public class FreeTextSuggester extends Lookup { /** Retrieve suggestions. */ public List lookup(final CharSequence key, int num) throws IOException { - TokenStream ts = queryAnalyzer.tokenStream("", key.toString()); - TermToBytesRefAttribute termBytesAtt = ts.addAttribute(TermToBytesRefAttribute.class); - OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); - PositionLengthAttribute posLenAtt = ts.addAttribute(PositionLengthAttribute.class); - PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class); - ts.reset(); - - BytesRef[] lastTokens = new BytesRef[grams]; - //System.out.println("lookup: key='" + key + "'"); - - // Run full analysis, but save only the - // last 1gram, last 2gram, etc.: - BytesRef tokenBytes = termBytesAtt.getBytesRef(); - int maxEndOffset = -1; - boolean sawRealToken = false; - while(ts.incrementToken()) { - termBytesAtt.fillBytesRef(); - sawRealToken |= tokenBytes.length > 0; - // TODO: this is somewhat iffy; today, ShingleFilter - // sets posLen to the gram count; maybe we should make - // a separate dedicated att for this? - int gramCount = posLenAtt.getPositionLength(); - - assert gramCount <= grams; - - // Safety: make sure the recalculated count "agrees": - if (countGrams(tokenBytes) != gramCount) { - throw new IllegalArgumentException("tokens must not contain separator byte; got token=" + tokenBytes + " but gramCount=" + gramCount + " does not match recalculated count=" + countGrams(tokenBytes)); + try (TokenStream ts = queryAnalyzer.tokenStream("", key.toString())) { + TermToBytesRefAttribute termBytesAtt = ts.addAttribute(TermToBytesRefAttribute.class); + OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); + PositionLengthAttribute posLenAtt = ts.addAttribute(PositionLengthAttribute.class); + PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class); + ts.reset(); + + BytesRef[] lastTokens = new BytesRef[grams]; + //System.out.println("lookup: key='" + key + "'"); + + // Run full analysis, but save only the + // last 1gram, last 2gram, etc.: + BytesRef tokenBytes = termBytesAtt.getBytesRef(); + int maxEndOffset = -1; + boolean sawRealToken = false; + while(ts.incrementToken()) { + termBytesAtt.fillBytesRef(); + sawRealToken |= tokenBytes.length > 0; + // TODO: this is somewhat iffy; today, ShingleFilter + // sets posLen to the gram count; maybe we should make + // a separate dedicated att for this? + int gramCount = posLenAtt.getPositionLength(); + + assert gramCount <= grams; + + // Safety: make sure the recalculated count "agrees": + if (countGrams(tokenBytes) != gramCount) { + throw new IllegalArgumentException("tokens must not contain separator byte; got token=" + tokenBytes + " but gramCount=" + gramCount + " does not match recalculated count=" + countGrams(tokenBytes)); + } + maxEndOffset = Math.max(maxEndOffset, offsetAtt.endOffset()); + lastTokens[gramCount-1] = BytesRef.deepCopyOf(tokenBytes); } - maxEndOffset = Math.max(maxEndOffset, offsetAtt.endOffset()); - lastTokens[gramCount-1] = BytesRef.deepCopyOf(tokenBytes); - } - ts.end(); - - if (!sawRealToken) { - throw new IllegalArgumentException("no tokens produced by analyzer, or the only tokens were empty strings"); - } - - // Carefully fill last tokens with _ tokens; - // ShingleFilter appraently won't emit "only hole" - // tokens: - int endPosInc = posIncAtt.getPositionIncrement(); - - // Note this will also be true if input is the empty - // string (in which case we saw no tokens and - // maxEndOffset is still -1), which in fact works out OK - // because we fill the unigram with an empty BytesRef - // below: - boolean lastTokenEnded = offsetAtt.endOffset() > maxEndOffset || endPosInc > 0; - ts.close(); - //System.out.println("maxEndOffset=" + maxEndOffset + " vs " + offsetAtt.endOffset()); - - if (lastTokenEnded) { - //System.out.println(" lastTokenEnded"); - // If user hit space after the last token, then - // "upgrade" all tokens. This way "foo " will suggest - // all bigrams starting w/ foo, and not any unigrams - // starting with "foo": - for(int i=grams-1;i>0;i--) { - BytesRef token = lastTokens[i-1]; - if (token == null) { + ts.end(); + + if (!sawRealToken) { + throw new IllegalArgumentException("no tokens produced by analyzer, or the only tokens were empty strings"); + } + + // Carefully fill last tokens with _ tokens; + // ShingleFilter appraently won't emit "only hole" + // tokens: + int endPosInc = posIncAtt.getPositionIncrement(); + + // Note this will also be true if input is the empty + // string (in which case we saw no tokens and + // maxEndOffset is still -1), which in fact works out OK + // because we fill the unigram with an empty BytesRef + // below: + boolean lastTokenEnded = offsetAtt.endOffset() > maxEndOffset || endPosInc > 0; + //System.out.println("maxEndOffset=" + maxEndOffset + " vs " + offsetAtt.endOffset()); + + if (lastTokenEnded) { + //System.out.println(" lastTokenEnded"); + // If user hit space after the last token, then + // "upgrade" all tokens. This way "foo " will suggest + // all bigrams starting w/ foo, and not any unigrams + // starting with "foo": + for(int i=grams-1;i>0;i--) { + BytesRef token = lastTokens[i-1]; + if (token == null) { + continue; + } + token.grow(token.length+1); + token.bytes[token.length] = separator; + token.length++; + lastTokens[i] = token; + } + lastTokens[0] = new BytesRef(); + } + + Arc arc = new Arc(); + + BytesReader bytesReader = fst.getBytesReader(); + + // Try highest order models first, and if they return + // results, return that; else, fallback: + double backoff = 1.0; + + List results = new ArrayList(num); + + // We only add a given suffix once, from the highest + // order model that saw it; for subsequent lower order + // models we skip it: + final Set seen = new HashSet(); + + for(int gram=grams-1;gram>=0;gram--) { + BytesRef token = lastTokens[gram]; + // Don't make unigram predictions from empty string: + if (token == null || (token.length == 0 && key.length() > 0)) { + // Input didn't have enough tokens: + //System.out.println(" gram=" + gram + ": skip: not enough input"); continue; } - token.grow(token.length+1); - token.bytes[token.length] = separator; - token.length++; - lastTokens[i] = token; - } - lastTokens[0] = new BytesRef(); - } - - Arc arc = new Arc(); - - BytesReader bytesReader = fst.getBytesReader(); - - // Try highest order models first, and if they return - // results, return that; else, fallback: - double backoff = 1.0; - - List results = new ArrayList(num); - - // We only add a given suffix once, from the highest - // order model that saw it; for subsequent lower order - // models we skip it: - final Set seen = new HashSet(); - - for(int gram=grams-1;gram>=0;gram--) { - BytesRef token = lastTokens[gram]; - // Don't make unigram predictions from empty string: - if (token == null || (token.length == 0 && key.length() > 0)) { - // Input didn't have enough tokens: - //System.out.println(" gram=" + gram + ": skip: not enough input"); - continue; - } - - if (endPosInc > 0 && gram <= endPosInc) { - // Skip hole-only predictions; in theory we - // shouldn't have to do this, but we'd need to fix - // ShingleFilter to produce only-hole tokens: - //System.out.println(" break: only holes now"); - break; - } - - //System.out.println("try " + (gram+1) + " gram token=" + token.utf8ToString()); - - // TODO: we could add fuzziness here - // match the prefix portion exactly - //Pair prefixOutput = null; - Long prefixOutput = null; - try { - prefixOutput = lookupPrefix(fst, bytesReader, token, arc); - } catch (IOException bogus) { - throw new RuntimeException(bogus); - } - //System.out.println(" prefixOutput=" + prefixOutput); - - if (prefixOutput == null) { - // This model never saw this prefix, e.g. the - // trigram model never saw context "purple mushroom" - backoff *= ALPHA; - continue; - } - - // TODO: we could do this division at build time, and - // bake it into the FST? - - // Denominator for computing scores from current - // model's predictions: - long contextCount = totTokens; - - BytesRef lastTokenFragment = null; - - for(int i=token.length-1;i>=0;i--) { - if (token.bytes[token.offset+i] == separator) { - BytesRef context = new BytesRef(token.bytes, token.offset, i); - Long output = Util.get(fst, Util.toIntsRef(context, new IntsRef())); - assert output != null; - contextCount = decodeWeight(output); - lastTokenFragment = new BytesRef(token.bytes, token.offset + i + 1, token.length - i - 1); + + if (endPosInc > 0 && gram <= endPosInc) { + // Skip hole-only predictions; in theory we + // shouldn't have to do this, but we'd need to fix + // ShingleFilter to produce only-hole tokens: + //System.out.println(" break: only holes now"); break; } - } - - final BytesRef finalLastToken; - - if (lastTokenFragment == null) { - finalLastToken = BytesRef.deepCopyOf(token); - } else { - finalLastToken = BytesRef.deepCopyOf(lastTokenFragment); - } - assert finalLastToken.offset == 0; - - CharsRef spare = new CharsRef(); - - // complete top-N - MinResult completions[] = null; - try { - - // Because we store multiple models in one FST - // (1gram, 2gram, 3gram), we must restrict the - // search so that it only considers the current - // model. For highest order model, this is not - // necessary since all completions in the FST - // must be from this model, but for lower order - // models we have to filter out the higher order - // ones: - - // Must do num+seen.size() for queue depth because we may - // reject up to seen.size() paths in acceptResult(): - Util.TopNSearcher searcher = new Util.TopNSearcher(fst, num, num+seen.size(), weightComparator) { - - BytesRef scratchBytes = new BytesRef(); - - @Override - protected void addIfCompetitive(Util.FSTPath path) { - if (path.arc.label != separator) { - //System.out.println(" keep path: " + Util.toBytesRef(path.input, new BytesRef()).utf8ToString() + "; " + path + "; arc=" + path.arc); - super.addIfCompetitive(path); - } else { - //System.out.println(" prevent path: " + Util.toBytesRef(path.input, new BytesRef()).utf8ToString() + "; " + path + "; arc=" + path.arc); - } - } - - @Override - protected boolean acceptResult(IntsRef input, Long output) { - Util.toBytesRef(input, scratchBytes); - finalLastToken.grow(finalLastToken.length + scratchBytes.length); - int lenSav = finalLastToken.length; - finalLastToken.append(scratchBytes); - //System.out.println(" accept? input='" + scratchBytes.utf8ToString() + "'; lastToken='" + finalLastToken.utf8ToString() + "'; return " + (seen.contains(finalLastToken) == false)); - boolean ret = seen.contains(finalLastToken) == false; - - finalLastToken.length = lenSav; - return ret; - } - }; - - // since this search is initialized with a single start node - // it is okay to start with an empty input path here - searcher.addStartPaths(arc, prefixOutput, true, new IntsRef()); - - completions = searcher.search(); - } catch (IOException bogus) { - throw new RuntimeException(bogus); - } - - int prefixLength = token.length; - - BytesRef suffix = new BytesRef(8); - //System.out.println(" " + completions.length + " completions"); - - nextCompletion: - for (MinResult completion : completions) { - token.length = prefixLength; - // append suffix - Util.toBytesRef(completion.input, suffix); - token.append(suffix); - - //System.out.println(" completion " + token.utf8ToString()); - - // Skip this path if a higher-order model already - // saw/predicted its last token: - BytesRef lastToken = token; + + //System.out.println("try " + (gram+1) + " gram token=" + token.utf8ToString()); + + // TODO: we could add fuzziness here + // match the prefix portion exactly + //Pair prefixOutput = null; + Long prefixOutput = null; + try { + prefixOutput = lookupPrefix(fst, bytesReader, token, arc); + } catch (IOException bogus) { + throw new RuntimeException(bogus); + } + //System.out.println(" prefixOutput=" + prefixOutput); + + if (prefixOutput == null) { + // This model never saw this prefix, e.g. the + // trigram model never saw context "purple mushroom" + backoff *= ALPHA; + continue; + } + + // TODO: we could do this division at build time, and + // bake it into the FST? + + // Denominator for computing scores from current + // model's predictions: + long contextCount = totTokens; + + BytesRef lastTokenFragment = null; + for(int i=token.length-1;i>=0;i--) { if (token.bytes[token.offset+i] == separator) { - assert token.length-i-1 > 0; - lastToken = new BytesRef(token.bytes, token.offset+i+1, token.length-i-1); + BytesRef context = new BytesRef(token.bytes, token.offset, i); + Long output = Util.get(fst, Util.toIntsRef(context, new IntsRef())); + assert output != null; + contextCount = decodeWeight(output); + lastTokenFragment = new BytesRef(token.bytes, token.offset + i + 1, token.length - i - 1); break; } } - if (seen.contains(lastToken)) { - //System.out.println(" skip dup " + lastToken.utf8ToString()); - continue nextCompletion; + + final BytesRef finalLastToken; + + if (lastTokenFragment == null) { + finalLastToken = BytesRef.deepCopyOf(token); + } else { + finalLastToken = BytesRef.deepCopyOf(lastTokenFragment); } - seen.add(BytesRef.deepCopyOf(lastToken)); - spare.grow(token.length); - UnicodeUtil.UTF8toUTF16(token, spare); - LookupResult result = new LookupResult(spare.toString(), (long) (Long.MAX_VALUE * backoff * ((double) decodeWeight(completion.output)) / contextCount)); - results.add(result); - assert results.size() == seen.size(); - //System.out.println(" add result=" + result); + assert finalLastToken.offset == 0; + + CharsRef spare = new CharsRef(); + + // complete top-N + MinResult completions[] = null; + try { + + // Because we store multiple models in one FST + // (1gram, 2gram, 3gram), we must restrict the + // search so that it only considers the current + // model. For highest order model, this is not + // necessary since all completions in the FST + // must be from this model, but for lower order + // models we have to filter out the higher order + // ones: + + // Must do num+seen.size() for queue depth because we may + // reject up to seen.size() paths in acceptResult(): + Util.TopNSearcher searcher = new Util.TopNSearcher(fst, num, num+seen.size(), weightComparator) { + + BytesRef scratchBytes = new BytesRef(); + + @Override + protected void addIfCompetitive(Util.FSTPath path) { + if (path.arc.label != separator) { + //System.out.println(" keep path: " + Util.toBytesRef(path.input, new BytesRef()).utf8ToString() + "; " + path + "; arc=" + path.arc); + super.addIfCompetitive(path); + } else { + //System.out.println(" prevent path: " + Util.toBytesRef(path.input, new BytesRef()).utf8ToString() + "; " + path + "; arc=" + path.arc); + } + } + + @Override + protected boolean acceptResult(IntsRef input, Long output) { + Util.toBytesRef(input, scratchBytes); + finalLastToken.grow(finalLastToken.length + scratchBytes.length); + int lenSav = finalLastToken.length; + finalLastToken.append(scratchBytes); + //System.out.println(" accept? input='" + scratchBytes.utf8ToString() + "'; lastToken='" + finalLastToken.utf8ToString() + "'; return " + (seen.contains(finalLastToken) == false)); + boolean ret = seen.contains(finalLastToken) == false; + + finalLastToken.length = lenSav; + return ret; + } + }; + + // since this search is initialized with a single start node + // it is okay to start with an empty input path here + searcher.addStartPaths(arc, prefixOutput, true, new IntsRef()); + + completions = searcher.search(); + } catch (IOException bogus) { + throw new RuntimeException(bogus); + } + + int prefixLength = token.length; + + BytesRef suffix = new BytesRef(8); + //System.out.println(" " + completions.length + " completions"); + + nextCompletion: + for (MinResult completion : completions) { + token.length = prefixLength; + // append suffix + Util.toBytesRef(completion.input, suffix); + token.append(suffix); + + //System.out.println(" completion " + token.utf8ToString()); + + // Skip this path if a higher-order model already + // saw/predicted its last token: + BytesRef lastToken = token; + for(int i=token.length-1;i>=0;i--) { + if (token.bytes[token.offset+i] == separator) { + assert token.length-i-1 > 0; + lastToken = new BytesRef(token.bytes, token.offset+i+1, token.length-i-1); + break; + } + } + if (seen.contains(lastToken)) { + //System.out.println(" skip dup " + lastToken.utf8ToString()); + continue nextCompletion; + } + seen.add(BytesRef.deepCopyOf(lastToken)); + spare.grow(token.length); + UnicodeUtil.UTF8toUTF16(token, spare); + LookupResult result = new LookupResult(spare.toString(), (long) (Long.MAX_VALUE * backoff * ((double) decodeWeight(completion.output)) / contextCount)); + results.add(result); + assert results.size() == seen.size(); + //System.out.println(" add result=" + result); + } + backoff *= ALPHA; } - backoff *= ALPHA; - } - - Collections.sort(results, new Comparator() { + + Collections.sort(results, new Comparator() { @Override public int compare(LookupResult a, LookupResult b) { if (a.value > b.value) { @@ -707,12 +706,13 @@ public class FreeTextSuggester extends Lookup { } } }); - - if (results.size() > num) { - results.subList(num, results.size()).clear(); + + if (results.size() > num) { + results.subList(num, results.size()).clear(); + } + + return results; } - - return results; } /** weight -> cost */ diff --git a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java index 2b5ce78de4a..84c6227127b 100644 --- a/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java +++ b/lucene/suggest/src/test/org/apache/lucene/search/suggest/analyzing/AnalyzingInfixSuggesterTest.java @@ -165,43 +165,43 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase { @Override protected Object highlight(String text, Set matchedTokens, String prefixToken) throws IOException { - TokenStream ts = queryAnalyzer.tokenStream("text", new StringReader(text)); - CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); - OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); - ts.reset(); - List fragments = new ArrayList(); - int upto = 0; - while (ts.incrementToken()) { - String token = termAtt.toString(); - int startOffset = offsetAtt.startOffset(); - int endOffset = offsetAtt.endOffset(); - if (upto < startOffset) { - fragments.add(new LookupHighlightFragment(text.substring(upto, startOffset), false)); - upto = startOffset; - } else if (upto > startOffset) { - continue; - } - - if (matchedTokens.contains(token)) { - // Token matches. - fragments.add(new LookupHighlightFragment(text.substring(startOffset, endOffset), true)); - upto = endOffset; - } else if (prefixToken != null && token.startsWith(prefixToken)) { - fragments.add(new LookupHighlightFragment(text.substring(startOffset, startOffset+prefixToken.length()), true)); - if (prefixToken.length() < token.length()) { - fragments.add(new LookupHighlightFragment(text.substring(startOffset+prefixToken.length(), startOffset+token.length()), false)); + try (TokenStream ts = queryAnalyzer.tokenStream("text", new StringReader(text))) { + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); + OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); + ts.reset(); + List fragments = new ArrayList(); + int upto = 0; + while (ts.incrementToken()) { + String token = termAtt.toString(); + int startOffset = offsetAtt.startOffset(); + int endOffset = offsetAtt.endOffset(); + if (upto < startOffset) { + fragments.add(new LookupHighlightFragment(text.substring(upto, startOffset), false)); + upto = startOffset; + } else if (upto > startOffset) { + continue; + } + + if (matchedTokens.contains(token)) { + // Token matches. + fragments.add(new LookupHighlightFragment(text.substring(startOffset, endOffset), true)); + upto = endOffset; + } else if (prefixToken != null && token.startsWith(prefixToken)) { + fragments.add(new LookupHighlightFragment(text.substring(startOffset, startOffset+prefixToken.length()), true)); + if (prefixToken.length() < token.length()) { + fragments.add(new LookupHighlightFragment(text.substring(startOffset+prefixToken.length(), startOffset+token.length()), false)); + } + upto = endOffset; } - upto = endOffset; } + ts.end(); + int endOffset = offsetAtt.endOffset(); + if (upto < endOffset) { + fragments.add(new LookupHighlightFragment(text.substring(upto), false)); + } + + return fragments; } - ts.end(); - int endOffset = offsetAtt.endOffset(); - if (upto < endOffset) { - fragments.add(new LookupHighlightFragment(text.substring(upto), false)); - } - ts.close(); - - return fragments; } }; suggester.build(new TermFreqPayloadArrayIterator(keys)); diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/CollationTestBase.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/CollationTestBase.java index 27cf491683b..db4223e99b6 100644 --- a/lucene/test-framework/src/java/org/apache/lucene/analysis/CollationTestBase.java +++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/CollationTestBase.java @@ -258,17 +258,17 @@ public abstract class CollationTestBase extends LuceneTestCase { for (int i = 0; i < numTestPoints; i++) { String term = _TestUtil.randomSimpleString(random()); - TokenStream ts = analyzer.tokenStream("fake", term); - TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); - BytesRef bytes = termAtt.getBytesRef(); - ts.reset(); - assertTrue(ts.incrementToken()); - termAtt.fillBytesRef(); - // ensure we make a copy of the actual bytes too - map.put(term, BytesRef.deepCopyOf(bytes)); - assertFalse(ts.incrementToken()); - ts.end(); - ts.close(); + try (TokenStream ts = analyzer.tokenStream("fake", term)) { + TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); + BytesRef bytes = termAtt.getBytesRef(); + ts.reset(); + assertTrue(ts.incrementToken()); + termAtt.fillBytesRef(); + // ensure we make a copy of the actual bytes too + map.put(term, BytesRef.deepCopyOf(bytes)); + assertFalse(ts.incrementToken()); + ts.end(); + } } Thread threads[] = new Thread[numThreads]; @@ -280,16 +280,16 @@ public abstract class CollationTestBase extends LuceneTestCase { for (Map.Entry mapping : map.entrySet()) { String term = mapping.getKey(); BytesRef expected = mapping.getValue(); - TokenStream ts = analyzer.tokenStream("fake", term); - TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); - BytesRef bytes = termAtt.getBytesRef(); - ts.reset(); - assertTrue(ts.incrementToken()); - termAtt.fillBytesRef(); - assertEquals(expected, bytes); - assertFalse(ts.incrementToken()); - ts.end(); - ts.close(); + try (TokenStream ts = analyzer.tokenStream("fake", term)) { + TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); + BytesRef bytes = termAtt.getBytesRef(); + ts.reset(); + assertTrue(ts.incrementToken()); + termAtt.fillBytesRef(); + assertEquals(expected, bytes); + assertFalse(ts.incrementToken()); + ts.end(); + } } } catch (IOException e) { throw new RuntimeException(e); diff --git a/solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java b/solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java index 2cacb63e793..e1489474281 100644 --- a/solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java +++ b/solr/contrib/analysis-extras/src/java/org/apache/solr/schema/ICUCollationField.java @@ -234,36 +234,23 @@ public class ICUCollationField extends FieldType { * simple (we already have a threadlocal clone in the reused TS) */ private BytesRef analyzeRangePart(String field, String part) { - TokenStream source; - - try { - source = analyzer.tokenStream(field, part); + try (TokenStream source = analyzer.tokenStream(field, part)) { source.reset(); - } catch (IOException e) { - throw new RuntimeException("Unable to initialize TokenStream to analyze range part: " + part, e); - } - TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class); - BytesRef bytes = termAtt.getBytesRef(); + TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class); + BytesRef bytes = termAtt.getBytesRef(); - // we control the analyzer here: most errors are impossible - try { + // we control the analyzer here: most errors are impossible if (!source.incrementToken()) throw new IllegalArgumentException("analyzer returned no terms for range part: " + part); termAtt.fillBytesRef(); assert !source.incrementToken(); - } catch (IOException e) { - throw new RuntimeException("error analyzing range part: " + part, e); - } - try { source.end(); - source.close(); + return BytesRef.deepCopyOf(bytes); } catch (IOException e) { - throw new RuntimeException("Unable to end & close TokenStream after analyzing range part: " + part, e); + throw new RuntimeException("Unable analyze range part: " + part, e); } - - return BytesRef.deepCopyOf(bytes); } @Override diff --git a/solr/core/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java b/solr/core/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java index 2b30c2e2003..947229ead77 100644 --- a/solr/core/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java +++ b/solr/core/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java @@ -85,15 +85,13 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase { if (!TokenizerChain.class.isInstance(analyzer)) { - TokenStream tokenStream = null; - try { - tokenStream = analyzer.tokenStream(context.getFieldName(), value); + try (TokenStream tokenStream = analyzer.tokenStream(context.getFieldName(), value)) { + NamedList> namedList = new NamedList>(); + namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(analyzeTokenStream(tokenStream), context)); + return namedList; } catch (IOException e) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e); } - NamedList> namedList = new NamedList>(); - namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(analyzeTokenStream(tokenStream), context)); - return namedList; } TokenizerChain tokenizerChain = (TokenizerChain) analyzer; @@ -139,10 +137,8 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase { * @param analyzer The analyzer to use. */ protected Set getQueryTokenSet(String query, Analyzer analyzer) { - TokenStream tokenStream = null; - try { + try (TokenStream tokenStream = analyzer.tokenStream("", query)){ final Set tokens = new HashSet(); - tokenStream = analyzer.tokenStream("", query); final TermToBytesRefAttribute bytesAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class); final BytesRef bytes = bytesAtt.getBytesRef(); @@ -157,8 +153,6 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase { return tokens; } catch (IOException ioe) { throw new RuntimeException("Error occured while iterating over tokenstream", ioe); - } finally { - IOUtils.closeWhileHandlingException(tokenStream); } } diff --git a/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java b/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java index 87bd748110f..476e53d66b9 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/QueryElevationComponent.java @@ -344,16 +344,16 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore return query; } StringBuilder norm = new StringBuilder(); - TokenStream tokens = analyzer.tokenStream("", query); - tokens.reset(); + try (TokenStream tokens = analyzer.tokenStream("", query)) { + tokens.reset(); - CharTermAttribute termAtt = tokens.addAttribute(CharTermAttribute.class); - while (tokens.incrementToken()) { - norm.append(termAtt.buffer(), 0, termAtt.length()); + CharTermAttribute termAtt = tokens.addAttribute(CharTermAttribute.class); + while (tokens.incrementToken()) { + norm.append(termAtt.buffer(), 0, termAtt.length()); + } + tokens.end(); + return norm.toString(); } - tokens.end(); - tokens.close(); - return norm.toString(); } //--------------------------------------------------------------------------------- diff --git a/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java b/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java index 1deec69da6a..35a6c66592f 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/SpellCheckComponent.java @@ -463,29 +463,29 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar private Collection getTokens(String q, Analyzer analyzer) throws IOException { Collection result = new ArrayList(); assert analyzer != null; - TokenStream ts = analyzer.tokenStream("", q); - ts.reset(); - // TODO: support custom attributes - CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); - OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); - TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class); - FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class); - PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class); - PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class); - - while (ts.incrementToken()){ - Token token = new Token(); - token.copyBuffer(termAtt.buffer(), 0, termAtt.length()); - token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); - token.setType(typeAtt.type()); - token.setFlags(flagsAtt.getFlags()); - token.setPayload(payloadAtt.getPayload()); - token.setPositionIncrement(posIncAtt.getPositionIncrement()); - result.add(token); + try (TokenStream ts = analyzer.tokenStream("", q)) { + ts.reset(); + // TODO: support custom attributes + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); + OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); + TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class); + FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class); + PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class); + PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class); + + while (ts.incrementToken()){ + Token token = new Token(); + token.copyBuffer(termAtt.buffer(), 0, termAtt.length()); + token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); + token.setType(typeAtt.type()); + token.setFlags(flagsAtt.getFlags()); + token.setPayload(payloadAtt.getPayload()); + token.setPositionIncrement(posIncAtt.getPositionIncrement()); + result.add(token); + } + ts.end(); + return result; } - ts.end(); - ts.close(); - return result; } protected SolrSpellChecker getSpellChecker(SolrParams params) { diff --git a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java index 863b03ee79c..3c85e0b13bb 100644 --- a/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java +++ b/solr/core/src/java/org/apache/solr/parser/SolrQueryParserBase.java @@ -403,58 +403,49 @@ public abstract class SolrQueryParserBase { // Use the analyzer to get all the tokens, and then build a TermQuery, // PhraseQuery, or nothing based on the term count - TokenStream source; - try { - source = analyzer.tokenStream(field, queryText); - source.reset(); - } catch (IOException e) { - throw new SyntaxError("Unable to initialize TokenStream to analyze query text", e); - } - CachingTokenFilter buffer = new CachingTokenFilter(source); + CachingTokenFilter buffer = null; TermToBytesRefAttribute termAtt = null; PositionIncrementAttribute posIncrAtt = null; int numTokens = 0; - - buffer.reset(); - - if (buffer.hasAttribute(TermToBytesRefAttribute.class)) { - termAtt = buffer.getAttribute(TermToBytesRefAttribute.class); - } - if (buffer.hasAttribute(PositionIncrementAttribute.class)) { - posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class); - } - int positionCount = 0; boolean severalTokensAtSamePosition = false; - - boolean hasMoreTokens = false; - if (termAtt != null) { - try { - hasMoreTokens = buffer.incrementToken(); - while (hasMoreTokens) { - numTokens++; - int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1; - if (positionIncrement != 0) { - positionCount += positionIncrement; - } else { - severalTokensAtSamePosition = true; - } - hasMoreTokens = buffer.incrementToken(); - } - } catch (IOException e) { - // ignore - } - } - try { - // rewind the buffer stream + + try (TokenStream source = analyzer.tokenStream(field, queryText)) { + source.reset(); + buffer = new CachingTokenFilter(source); buffer.reset(); - // close original stream - all tokens buffered - source.close(); - } - catch (IOException e) { - throw new SyntaxError("Cannot close TokenStream analyzing query text", e); + if (buffer.hasAttribute(TermToBytesRefAttribute.class)) { + termAtt = buffer.getAttribute(TermToBytesRefAttribute.class); + } + if (buffer.hasAttribute(PositionIncrementAttribute.class)) { + posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class); + } + + boolean hasMoreTokens = false; + if (termAtt != null) { + try { + hasMoreTokens = buffer.incrementToken(); + while (hasMoreTokens) { + numTokens++; + int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1; + if (positionIncrement != 0) { + positionCount += positionIncrement; + } else { + severalTokensAtSamePosition = true; + } + hasMoreTokens = buffer.incrementToken(); + } + } catch (IOException e) { + // ignore + } + } + } catch (IOException e) { + throw new SyntaxError("Error analyzing query text", e); } + + // rewind the buffer stream + buffer.reset(); BytesRef bytes = termAtt == null ? null : termAtt.getBytesRef(); diff --git a/solr/core/src/java/org/apache/solr/schema/CollationField.java b/solr/core/src/java/org/apache/solr/schema/CollationField.java index 4fc8b16edac..2c47097e596 100644 --- a/solr/core/src/java/org/apache/solr/schema/CollationField.java +++ b/solr/core/src/java/org/apache/solr/schema/CollationField.java @@ -209,37 +209,23 @@ public class CollationField extends FieldType { * its just that all methods are synced), this keeps things * simple (we already have a threadlocal clone in the reused TS) */ - private BytesRef analyzeRangePart(String field, String part) { - TokenStream source; - - try { - source = analyzer.tokenStream(field, part); - source.reset(); - } catch (IOException e) { - throw new RuntimeException("Unable to initialize TokenStream to analyze range part: " + part, e); - } - - TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class); - BytesRef bytes = termAtt.getBytesRef(); + private BytesRef analyzeRangePart(String field, String part) { + try (TokenStream source = analyzer.tokenStream(field, part)) { + source.reset(); + TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class); + BytesRef bytes = termAtt.getBytesRef(); - // we control the analyzer here: most errors are impossible - try { + // we control the analyzer here: most errors are impossible if (!source.incrementToken()) throw new IllegalArgumentException("analyzer returned no terms for range part: " + part); termAtt.fillBytesRef(); assert !source.incrementToken(); - } catch (IOException e) { - throw new RuntimeException("error analyzing range part: " + part, e); - } - try { source.end(); - source.close(); + return BytesRef.deepCopyOf(bytes); } catch (IOException e) { - throw new RuntimeException("Unable to end & close TokenStream after analyzing range part: " + part, e); + throw new RuntimeException("Unable to analyze range part: " + part, e); } - - return BytesRef.deepCopyOf(bytes); } @Override diff --git a/solr/core/src/java/org/apache/solr/schema/TextField.java b/solr/core/src/java/org/apache/solr/schema/TextField.java index c651259fb0f..964617615a4 100644 --- a/solr/core/src/java/org/apache/solr/schema/TextField.java +++ b/solr/core/src/java/org/apache/solr/schema/TextField.java @@ -138,35 +138,23 @@ public class TextField extends FieldType { public static BytesRef analyzeMultiTerm(String field, String part, Analyzer analyzerIn) { if (part == null || analyzerIn == null) return null; - TokenStream source; - try { - source = analyzerIn.tokenStream(field, part); + try (TokenStream source = analyzerIn.tokenStream(field, part)){ source.reset(); - } catch (IOException e) { - throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unable to initialize TokenStream to analyze multiTerm term: " + part, e); - } - TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class); - BytesRef bytes = termAtt.getBytesRef(); + TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class); + BytesRef bytes = termAtt.getBytesRef(); - try { if (!source.incrementToken()) throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned no terms for multiTerm term: " + part); termAtt.fillBytesRef(); if (source.incrementToken()) throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned too many terms for multiTerm term: " + part); + + source.end(); + return BytesRef.deepCopyOf(bytes); } catch (IOException e) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"error analyzing range part: " + part, e); } - - try { - source.end(); - source.close(); - } catch (IOException e) { - throw new RuntimeException("Unable to end & close TokenStream after analyzing multiTerm term: " + part, e); - } - - return BytesRef.deepCopyOf(bytes); } @@ -178,58 +166,50 @@ public class TextField extends FieldType { // Use the analyzer to get all the tokens, and then build a TermQuery, // PhraseQuery, or nothing based on the term count - TokenStream source; - try { - source = analyzer.tokenStream(field, queryText); - source.reset(); - } catch (IOException e) { - throw new RuntimeException("Unable to initialize TokenStream to analyze query text", e); - } - CachingTokenFilter buffer = new CachingTokenFilter(source); + CachingTokenFilter buffer = null; CharTermAttribute termAtt = null; PositionIncrementAttribute posIncrAtt = null; int numTokens = 0; - - buffer.reset(); - - if (buffer.hasAttribute(CharTermAttribute.class)) { - termAtt = buffer.getAttribute(CharTermAttribute.class); - } - if (buffer.hasAttribute(PositionIncrementAttribute.class)) { - posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class); - } - int positionCount = 0; boolean severalTokensAtSamePosition = false; - boolean hasMoreTokens = false; - if (termAtt != null) { - try { - hasMoreTokens = buffer.incrementToken(); - while (hasMoreTokens) { - numTokens++; - int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1; - if (positionIncrement != 0) { - positionCount += positionIncrement; - } else { - severalTokensAtSamePosition = true; - } - hasMoreTokens = buffer.incrementToken(); - } - } catch (IOException e) { - // ignore - } - } - try { - // rewind the buffer stream + try (TokenStream source = analyzer.tokenStream(field, queryText)) { + source.reset(); + buffer = new CachingTokenFilter(source); + buffer.reset(); + + if (buffer.hasAttribute(CharTermAttribute.class)) { + termAtt = buffer.getAttribute(CharTermAttribute.class); + } + if (buffer.hasAttribute(PositionIncrementAttribute.class)) { + posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class); + } + + boolean hasMoreTokens = false; + if (termAtt != null) { + try { + hasMoreTokens = buffer.incrementToken(); + while (hasMoreTokens) { + numTokens++; + int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1; + if (positionIncrement != 0) { + positionCount += positionIncrement; + } else { + severalTokensAtSamePosition = true; + } + hasMoreTokens = buffer.incrementToken(); + } + } catch (IOException e) { + // ignore + } + } + } catch (IOException e) { + throw new RuntimeException(e); + } - // close original stream - all tokens buffered - source.close(); - } - catch (IOException e) { - // ignore - } + // rewind the buffer stream + buffer.reset(); if (numTokens == 0) return null; diff --git a/solr/core/src/test/org/apache/solr/spelling/SimpleQueryConverter.java b/solr/core/src/test/org/apache/solr/spelling/SimpleQueryConverter.java index a9ad3b9b627..15ec62f6a2f 100644 --- a/solr/core/src/test/org/apache/solr/spelling/SimpleQueryConverter.java +++ b/solr/core/src/test/org/apache/solr/spelling/SimpleQueryConverter.java @@ -40,10 +40,10 @@ class SimpleQueryConverter extends SpellingQueryConverter { @Override public Collection convert(String origQuery) { - try { - Collection result = new HashSet(); - WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_40); - TokenStream ts = analyzer.tokenStream("", origQuery); + Collection result = new HashSet(); + WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_40); + + try (TokenStream ts = analyzer.tokenStream("", origQuery)) { // TODO: support custom attributes CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); @@ -64,9 +64,7 @@ class SimpleQueryConverter extends SpellingQueryConverter { tok.setType(typeAtt.type()); result.add(tok); } - ts.end(); - ts.close(); - + ts.end(); return result; } catch (IOException e) { throw new RuntimeException(e);