mirror of https://github.com/apache/lucene.git
LUCENE-5259: convert analysis consumers to try-with-resources
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1529770 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9c98f9d958
commit
64a795b6e3
|
@ -307,7 +307,7 @@ public class SynonymMap {
|
||||||
* separates by {@link SynonymMap#WORD_SEPARATOR}.
|
* separates by {@link SynonymMap#WORD_SEPARATOR}.
|
||||||
* reuse and its chars must not be null. */
|
* reuse and its chars must not be null. */
|
||||||
public CharsRef analyze(String text, CharsRef reuse) throws IOException {
|
public CharsRef analyze(String text, CharsRef reuse) throws IOException {
|
||||||
TokenStream ts = analyzer.tokenStream("", text);
|
try (TokenStream ts = analyzer.tokenStream("", text)) {
|
||||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||||
PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
|
PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
|
||||||
ts.reset();
|
ts.reset();
|
||||||
|
@ -330,7 +330,7 @@ public class SynonymMap {
|
||||||
reuse.length += length;
|
reuse.length += length;
|
||||||
}
|
}
|
||||||
ts.end();
|
ts.end();
|
||||||
ts.close();
|
}
|
||||||
if (reuse.length == 0) {
|
if (reuse.length == 0) {
|
||||||
throw new IllegalArgumentException("term: " + text + " was completely eliminated by analyzer");
|
throw new IllegalArgumentException("term: " + text + " was completely eliminated by analyzer");
|
||||||
}
|
}
|
||||||
|
|
|
@ -117,12 +117,15 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
|
||||||
|
|
||||||
// LUCENE-1441
|
// LUCENE-1441
|
||||||
public void testOffsets() throws Exception {
|
public void testOffsets() throws Exception {
|
||||||
TokenStream stream = new KeywordAnalyzer().tokenStream("field", new StringReader("abcd"));
|
try (TokenStream stream = new KeywordAnalyzer().tokenStream("field", new StringReader("abcd"))) {
|
||||||
OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
|
OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
|
||||||
stream.reset();
|
stream.reset();
|
||||||
assertTrue(stream.incrementToken());
|
assertTrue(stream.incrementToken());
|
||||||
assertEquals(0, offsetAtt.startOffset());
|
assertEquals(0, offsetAtt.startOffset());
|
||||||
assertEquals(4, offsetAtt.endOffset());
|
assertEquals(4, offsetAtt.endOffset());
|
||||||
|
assertFalse(stream.incrementToken());
|
||||||
|
stream.end();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** blast some random strings through the analyzer */
|
/** blast some random strings through the analyzer */
|
||||||
|
|
|
@ -46,7 +46,7 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
|
||||||
|
|
||||||
public void testDefaults() throws IOException {
|
public void testDefaults() throws IOException {
|
||||||
assertTrue(stop != null);
|
assertTrue(stop != null);
|
||||||
TokenStream stream = stop.tokenStream("test", "This is a test of the english stop analyzer");
|
try (TokenStream stream = stop.tokenStream("test", "This is a test of the english stop analyzer")) {
|
||||||
assertTrue(stream != null);
|
assertTrue(stream != null);
|
||||||
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
|
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
|
||||||
stream.reset();
|
stream.reset();
|
||||||
|
@ -54,12 +54,14 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
|
||||||
while (stream.incrementToken()) {
|
while (stream.incrementToken()) {
|
||||||
assertFalse(inValidTokens.contains(termAtt.toString()));
|
assertFalse(inValidTokens.contains(termAtt.toString()));
|
||||||
}
|
}
|
||||||
|
stream.end();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testStopList() throws IOException {
|
public void testStopList() throws IOException {
|
||||||
CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("good", "test", "analyzer"), false);
|
CharArraySet stopWordsSet = new CharArraySet(TEST_VERSION_CURRENT, asSet("good", "test", "analyzer"), false);
|
||||||
StopAnalyzer newStop = new StopAnalyzer(Version.LUCENE_40, stopWordsSet);
|
StopAnalyzer newStop = new StopAnalyzer(Version.LUCENE_40, stopWordsSet);
|
||||||
TokenStream stream = newStop.tokenStream("test", "This is a good test of the english stop analyzer");
|
try (TokenStream stream = newStop.tokenStream("test", "This is a good test of the english stop analyzer")) {
|
||||||
assertNotNull(stream);
|
assertNotNull(stream);
|
||||||
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
|
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
|
||||||
|
|
||||||
|
@ -68,6 +70,8 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
|
||||||
String text = termAtt.toString();
|
String text = termAtt.toString();
|
||||||
assertFalse(stopWordsSet.contains(text));
|
assertFalse(stopWordsSet.contains(text));
|
||||||
}
|
}
|
||||||
|
stream.end();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testStopListPositions() throws IOException {
|
public void testStopListPositions() throws IOException {
|
||||||
|
@ -75,7 +79,7 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
|
||||||
StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet);
|
StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet);
|
||||||
String s = "This is a good test of the english stop analyzer with positions";
|
String s = "This is a good test of the english stop analyzer with positions";
|
||||||
int expectedIncr[] = { 1, 1, 1, 3, 1, 1, 1, 2, 1};
|
int expectedIncr[] = { 1, 1, 1, 3, 1, 1, 1, 2, 1};
|
||||||
TokenStream stream = newStop.tokenStream("test", s);
|
try (TokenStream stream = newStop.tokenStream("test", s)) {
|
||||||
assertNotNull(stream);
|
assertNotNull(stream);
|
||||||
int i = 0;
|
int i = 0;
|
||||||
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
|
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
|
||||||
|
@ -87,6 +91,8 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
|
||||||
assertFalse(stopWordsSet.contains(text));
|
assertFalse(stopWordsSet.contains(text));
|
||||||
assertEquals(expectedIncr[i++],posIncrAtt.getPositionIncrement());
|
assertEquals(expectedIncr[i++],posIncrAtt.getPositionIncrement());
|
||||||
}
|
}
|
||||||
|
stream.end();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,7 +37,7 @@ public class TestPerFieldAnalyzerWrapper extends BaseTokenStreamTestCase {
|
||||||
PerFieldAnalyzerWrapper analyzer =
|
PerFieldAnalyzerWrapper analyzer =
|
||||||
new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), analyzerPerField);
|
new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), analyzerPerField);
|
||||||
|
|
||||||
TokenStream tokenStream = analyzer.tokenStream("field", text);
|
try (TokenStream tokenStream = analyzer.tokenStream("field", text)) {
|
||||||
CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class);
|
CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class);
|
||||||
tokenStream.reset();
|
tokenStream.reset();
|
||||||
|
|
||||||
|
@ -45,15 +45,21 @@ public class TestPerFieldAnalyzerWrapper extends BaseTokenStreamTestCase {
|
||||||
assertEquals("WhitespaceAnalyzer does not lowercase",
|
assertEquals("WhitespaceAnalyzer does not lowercase",
|
||||||
"Qwerty",
|
"Qwerty",
|
||||||
termAtt.toString());
|
termAtt.toString());
|
||||||
|
assertFalse(tokenStream.incrementToken());
|
||||||
|
tokenStream.end();
|
||||||
|
}
|
||||||
|
|
||||||
tokenStream = analyzer.tokenStream("special", text);
|
try (TokenStream tokenStream = analyzer.tokenStream("special", text)) {
|
||||||
termAtt = tokenStream.getAttribute(CharTermAttribute.class);
|
CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class);
|
||||||
tokenStream.reset();
|
tokenStream.reset();
|
||||||
|
|
||||||
assertTrue(tokenStream.incrementToken());
|
assertTrue(tokenStream.incrementToken());
|
||||||
assertEquals("SimpleAnalyzer lowercases",
|
assertEquals("SimpleAnalyzer lowercases",
|
||||||
"qwerty",
|
"qwerty",
|
||||||
termAtt.toString());
|
termAtt.toString());
|
||||||
|
assertFalse(tokenStream.incrementToken());
|
||||||
|
tokenStream.end();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testCharFilters() throws Exception {
|
public void testCharFilters() throws Exception {
|
||||||
|
|
|
@ -95,7 +95,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
|
||||||
public void testShingleAnalyzerWrapperPhraseQuery() throws Exception {
|
public void testShingleAnalyzerWrapperPhraseQuery() throws Exception {
|
||||||
PhraseQuery q = new PhraseQuery();
|
PhraseQuery q = new PhraseQuery();
|
||||||
|
|
||||||
TokenStream ts = analyzer.tokenStream("content", "this sentence");
|
try (TokenStream ts = analyzer.tokenStream("content", "this sentence")) {
|
||||||
int j = -1;
|
int j = -1;
|
||||||
|
|
||||||
PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
|
PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
|
||||||
|
@ -107,6 +107,8 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
|
||||||
String termText = termAtt.toString();
|
String termText = termAtt.toString();
|
||||||
q.add(new Term("content", termText), j);
|
q.add(new Term("content", termText), j);
|
||||||
}
|
}
|
||||||
|
ts.end();
|
||||||
|
}
|
||||||
|
|
||||||
ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs;
|
ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs;
|
||||||
int[] ranks = new int[] { 0 };
|
int[] ranks = new int[] { 0 };
|
||||||
|
@ -121,17 +123,17 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
|
||||||
public void testShingleAnalyzerWrapperBooleanQuery() throws Exception {
|
public void testShingleAnalyzerWrapperBooleanQuery() throws Exception {
|
||||||
BooleanQuery q = new BooleanQuery();
|
BooleanQuery q = new BooleanQuery();
|
||||||
|
|
||||||
TokenStream ts = analyzer.tokenStream("content", "test sentence");
|
try (TokenStream ts = analyzer.tokenStream("content", "test sentence")) {
|
||||||
|
|
||||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||||
|
|
||||||
ts.reset();
|
ts.reset();
|
||||||
|
|
||||||
while (ts.incrementToken()) {
|
while (ts.incrementToken()) {
|
||||||
String termText = termAtt.toString();
|
String termText = termAtt.toString();
|
||||||
q.add(new TermQuery(new Term("content", termText)),
|
q.add(new TermQuery(new Term("content", termText)),
|
||||||
BooleanClause.Occur.SHOULD);
|
BooleanClause.Occur.SHOULD);
|
||||||
}
|
}
|
||||||
|
ts.end();
|
||||||
|
}
|
||||||
|
|
||||||
ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs;
|
ScoreDoc[] hits = searcher.search(q, null, 1000).scoreDocs;
|
||||||
int[] ranks = new int[] { 1, 2, 0 };
|
int[] ranks = new int[] { 1, 2, 0 };
|
||||||
|
|
|
@ -123,7 +123,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
|
||||||
int num = 1000 * RANDOM_MULTIPLIER;
|
int num = 1000 * RANDOM_MULTIPLIER;
|
||||||
for (int i = 0; i < num; i++) {
|
for (int i = 0; i < num; i++) {
|
||||||
String s = _TestUtil.randomUnicodeString(random());
|
String s = _TestUtil.randomUnicodeString(random());
|
||||||
TokenStream ts = analyzer.tokenStream("foo", s);
|
try (TokenStream ts = analyzer.tokenStream("foo", s)) {
|
||||||
ts.reset();
|
ts.reset();
|
||||||
OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
|
OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
|
||||||
while (ts.incrementToken()) {
|
while (ts.incrementToken()) {
|
||||||
|
@ -134,7 +134,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ts.end();
|
ts.end();
|
||||||
ts.close();
|
}
|
||||||
}
|
}
|
||||||
// just for fun
|
// just for fun
|
||||||
checkRandomData(random(), analyzer, num);
|
checkRandomData(random(), analyzer, num);
|
||||||
|
@ -161,7 +161,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
|
||||||
int num = 1000 * RANDOM_MULTIPLIER;
|
int num = 1000 * RANDOM_MULTIPLIER;
|
||||||
for (int i = 0; i < num; i++) {
|
for (int i = 0; i < num; i++) {
|
||||||
String s = _TestUtil.randomUnicodeString(random());
|
String s = _TestUtil.randomUnicodeString(random());
|
||||||
TokenStream ts = analyzer.tokenStream("foo", s);
|
try (TokenStream ts = analyzer.tokenStream("foo", s)) {
|
||||||
ts.reset();
|
ts.reset();
|
||||||
OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
|
OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
|
||||||
while (ts.incrementToken()) {
|
while (ts.incrementToken()) {
|
||||||
|
@ -172,7 +172,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ts.end();
|
ts.end();
|
||||||
ts.close();
|
}
|
||||||
}
|
}
|
||||||
// just for fun
|
// just for fun
|
||||||
checkRandomData(random(), analyzer, num);
|
checkRandomData(random(), analyzer, num);
|
||||||
|
|
|
@ -249,7 +249,7 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testTokenAttributes() throws Exception {
|
public void testTokenAttributes() throws Exception {
|
||||||
TokenStream ts = a.tokenStream("dummy", "This is a test");
|
try (TokenStream ts = a.tokenStream("dummy", "This is a test")) {
|
||||||
ScriptAttribute scriptAtt = ts.addAttribute(ScriptAttribute.class);
|
ScriptAttribute scriptAtt = ts.addAttribute(ScriptAttribute.class);
|
||||||
ts.reset();
|
ts.reset();
|
||||||
while (ts.incrementToken()) {
|
while (ts.incrementToken()) {
|
||||||
|
@ -259,6 +259,6 @@ public class TestICUTokenizer extends BaseTokenStreamTestCase {
|
||||||
assertTrue(ts.reflectAsString(false).contains("script=Latin"));
|
assertTrue(ts.reflectAsString(false).contains("script=Latin"));
|
||||||
}
|
}
|
||||||
ts.end();
|
ts.end();
|
||||||
ts.close();
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -53,14 +53,14 @@ public class TestExtendedMode extends BaseTokenStreamTestCase {
|
||||||
int numIterations = atLeast(1000);
|
int numIterations = atLeast(1000);
|
||||||
for (int i = 0; i < numIterations; i++) {
|
for (int i = 0; i < numIterations; i++) {
|
||||||
String s = _TestUtil.randomUnicodeString(random(), 100);
|
String s = _TestUtil.randomUnicodeString(random(), 100);
|
||||||
TokenStream ts = analyzer.tokenStream("foo", s);
|
try (TokenStream ts = analyzer.tokenStream("foo", s)) {
|
||||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||||
ts.reset();
|
ts.reset();
|
||||||
while (ts.incrementToken()) {
|
while (ts.incrementToken()) {
|
||||||
assertTrue(UnicodeUtil.validUTF16String(termAtt));
|
assertTrue(UnicodeUtil.validUTF16String(termAtt));
|
||||||
}
|
}
|
||||||
ts.end();
|
ts.end();
|
||||||
ts.close();
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -141,13 +141,13 @@ public class TestJapaneseTokenizer extends BaseTokenStreamTestCase {
|
||||||
* ideally the test would actually fail instead of hanging...
|
* ideally the test would actually fail instead of hanging...
|
||||||
*/
|
*/
|
||||||
public void testDecomposition5() throws Exception {
|
public void testDecomposition5() throws Exception {
|
||||||
TokenStream ts = analyzer.tokenStream("bogus", "くよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよ");
|
try (TokenStream ts = analyzer.tokenStream("bogus", "くよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよくよ")) {
|
||||||
ts.reset();
|
ts.reset();
|
||||||
while (ts.incrementToken()) {
|
while (ts.incrementToken()) {
|
||||||
|
|
||||||
}
|
}
|
||||||
ts.end();
|
ts.end();
|
||||||
ts.close();
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -213,12 +213,12 @@ public class TestJapaneseTokenizer extends BaseTokenStreamTestCase {
|
||||||
public void testLargeDocReliability() throws Exception {
|
public void testLargeDocReliability() throws Exception {
|
||||||
for (int i = 0; i < 100; i++) {
|
for (int i = 0; i < 100; i++) {
|
||||||
String s = _TestUtil.randomUnicodeString(random(), 10000);
|
String s = _TestUtil.randomUnicodeString(random(), 10000);
|
||||||
TokenStream ts = analyzer.tokenStream("foo", s);
|
try (TokenStream ts = analyzer.tokenStream("foo", s)) {
|
||||||
ts.reset();
|
ts.reset();
|
||||||
while (ts.incrementToken()) {
|
while (ts.incrementToken()) {
|
||||||
}
|
}
|
||||||
ts.end();
|
ts.end();
|
||||||
ts.close();
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -236,30 +236,32 @@ public class TestJapaneseTokenizer extends BaseTokenStreamTestCase {
|
||||||
System.out.println("\nTEST: iter=" + i);
|
System.out.println("\nTEST: iter=" + i);
|
||||||
}
|
}
|
||||||
String s = _TestUtil.randomUnicodeString(random(), 100);
|
String s = _TestUtil.randomUnicodeString(random(), 100);
|
||||||
TokenStream ts = analyzer.tokenStream("foo", s);
|
try (TokenStream ts = analyzer.tokenStream("foo", s)) {
|
||||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||||
ts.reset();
|
ts.reset();
|
||||||
while (ts.incrementToken()) {
|
while (ts.incrementToken()) {
|
||||||
assertTrue(UnicodeUtil.validUTF16String(termAtt));
|
assertTrue(UnicodeUtil.validUTF16String(termAtt));
|
||||||
}
|
}
|
||||||
ts.end();
|
ts.end();
|
||||||
ts.close();
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testOnlyPunctuation() throws IOException {
|
public void testOnlyPunctuation() throws IOException {
|
||||||
TokenStream ts = analyzerNoPunct.tokenStream("foo", "。、。。");
|
try (TokenStream ts = analyzerNoPunct.tokenStream("foo", "。、。。")) {
|
||||||
ts.reset();
|
ts.reset();
|
||||||
assertFalse(ts.incrementToken());
|
assertFalse(ts.incrementToken());
|
||||||
ts.end();
|
ts.end();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void testOnlyPunctuationExtended() throws IOException {
|
public void testOnlyPunctuationExtended() throws IOException {
|
||||||
TokenStream ts = extendedModeAnalyzerNoPunct.tokenStream("foo", "......");
|
try (TokenStream ts = extendedModeAnalyzerNoPunct.tokenStream("foo", "......")) {
|
||||||
ts.reset();
|
ts.reset();
|
||||||
assertFalse(ts.incrementToken());
|
assertFalse(ts.incrementToken());
|
||||||
ts.end();
|
ts.end();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// note: test is kinda silly since kuromoji emits punctuation tokens.
|
// note: test is kinda silly since kuromoji emits punctuation tokens.
|
||||||
// but, when/if we filter these out it will be useful.
|
// but, when/if we filter these out it will be useful.
|
||||||
|
@ -369,7 +371,7 @@ public class TestJapaneseTokenizer extends BaseTokenStreamTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void assertReadings(String input, String... readings) throws IOException {
|
private void assertReadings(String input, String... readings) throws IOException {
|
||||||
TokenStream ts = analyzer.tokenStream("ignored", input);
|
try (TokenStream ts = analyzer.tokenStream("ignored", input)) {
|
||||||
ReadingAttribute readingAtt = ts.addAttribute(ReadingAttribute.class);
|
ReadingAttribute readingAtt = ts.addAttribute(ReadingAttribute.class);
|
||||||
ts.reset();
|
ts.reset();
|
||||||
for(String reading : readings) {
|
for(String reading : readings) {
|
||||||
|
@ -379,9 +381,10 @@ public class TestJapaneseTokenizer extends BaseTokenStreamTestCase {
|
||||||
assertFalse(ts.incrementToken());
|
assertFalse(ts.incrementToken());
|
||||||
ts.end();
|
ts.end();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private void assertPronunciations(String input, String... pronunciations) throws IOException {
|
private void assertPronunciations(String input, String... pronunciations) throws IOException {
|
||||||
TokenStream ts = analyzer.tokenStream("ignored", input);
|
try (TokenStream ts = analyzer.tokenStream("ignored", input)) {
|
||||||
ReadingAttribute readingAtt = ts.addAttribute(ReadingAttribute.class);
|
ReadingAttribute readingAtt = ts.addAttribute(ReadingAttribute.class);
|
||||||
ts.reset();
|
ts.reset();
|
||||||
for(String pronunciation : pronunciations) {
|
for(String pronunciation : pronunciations) {
|
||||||
|
@ -391,9 +394,10 @@ public class TestJapaneseTokenizer extends BaseTokenStreamTestCase {
|
||||||
assertFalse(ts.incrementToken());
|
assertFalse(ts.incrementToken());
|
||||||
ts.end();
|
ts.end();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private void assertBaseForms(String input, String... baseForms) throws IOException {
|
private void assertBaseForms(String input, String... baseForms) throws IOException {
|
||||||
TokenStream ts = analyzer.tokenStream("ignored", input);
|
try (TokenStream ts = analyzer.tokenStream("ignored", input)) {
|
||||||
BaseFormAttribute baseFormAtt = ts.addAttribute(BaseFormAttribute.class);
|
BaseFormAttribute baseFormAtt = ts.addAttribute(BaseFormAttribute.class);
|
||||||
ts.reset();
|
ts.reset();
|
||||||
for(String baseForm : baseForms) {
|
for(String baseForm : baseForms) {
|
||||||
|
@ -403,9 +407,10 @@ public class TestJapaneseTokenizer extends BaseTokenStreamTestCase {
|
||||||
assertFalse(ts.incrementToken());
|
assertFalse(ts.incrementToken());
|
||||||
ts.end();
|
ts.end();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private void assertInflectionTypes(String input, String... inflectionTypes) throws IOException {
|
private void assertInflectionTypes(String input, String... inflectionTypes) throws IOException {
|
||||||
TokenStream ts = analyzer.tokenStream("ignored", input);
|
try (TokenStream ts = analyzer.tokenStream("ignored", input)) {
|
||||||
InflectionAttribute inflectionAtt = ts.addAttribute(InflectionAttribute.class);
|
InflectionAttribute inflectionAtt = ts.addAttribute(InflectionAttribute.class);
|
||||||
ts.reset();
|
ts.reset();
|
||||||
for(String inflectionType : inflectionTypes) {
|
for(String inflectionType : inflectionTypes) {
|
||||||
|
@ -415,9 +420,10 @@ public class TestJapaneseTokenizer extends BaseTokenStreamTestCase {
|
||||||
assertFalse(ts.incrementToken());
|
assertFalse(ts.incrementToken());
|
||||||
ts.end();
|
ts.end();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private void assertInflectionForms(String input, String... inflectionForms) throws IOException {
|
private void assertInflectionForms(String input, String... inflectionForms) throws IOException {
|
||||||
TokenStream ts = analyzer.tokenStream("ignored", input);
|
try (TokenStream ts = analyzer.tokenStream("ignored", input)) {
|
||||||
InflectionAttribute inflectionAtt = ts.addAttribute(InflectionAttribute.class);
|
InflectionAttribute inflectionAtt = ts.addAttribute(InflectionAttribute.class);
|
||||||
ts.reset();
|
ts.reset();
|
||||||
for(String inflectionForm : inflectionForms) {
|
for(String inflectionForm : inflectionForms) {
|
||||||
|
@ -427,9 +433,10 @@ public class TestJapaneseTokenizer extends BaseTokenStreamTestCase {
|
||||||
assertFalse(ts.incrementToken());
|
assertFalse(ts.incrementToken());
|
||||||
ts.end();
|
ts.end();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private void assertPartsOfSpeech(String input, String... partsOfSpeech) throws IOException {
|
private void assertPartsOfSpeech(String input, String... partsOfSpeech) throws IOException {
|
||||||
TokenStream ts = analyzer.tokenStream("ignored", input);
|
try (TokenStream ts = analyzer.tokenStream("ignored", input)) {
|
||||||
PartOfSpeechAttribute partOfSpeechAtt = ts.addAttribute(PartOfSpeechAttribute.class);
|
PartOfSpeechAttribute partOfSpeechAtt = ts.addAttribute(PartOfSpeechAttribute.class);
|
||||||
ts.reset();
|
ts.reset();
|
||||||
for(String partOfSpeech : partsOfSpeech) {
|
for(String partOfSpeech : partsOfSpeech) {
|
||||||
|
@ -439,6 +446,7 @@ public class TestJapaneseTokenizer extends BaseTokenStreamTestCase {
|
||||||
assertFalse(ts.incrementToken());
|
assertFalse(ts.incrementToken());
|
||||||
ts.end();
|
ts.end();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void testReadings() throws Exception {
|
public void testReadings() throws Exception {
|
||||||
assertReadings("寿司が食べたいです。",
|
assertReadings("寿司が食べたいです。",
|
||||||
|
@ -631,11 +639,11 @@ public class TestJapaneseTokenizer extends BaseTokenStreamTestCase {
|
||||||
|
|
||||||
long totalStart = System.currentTimeMillis();
|
long totalStart = System.currentTimeMillis();
|
||||||
for (int i = 0; i < numIterations; i++) {
|
for (int i = 0; i < numIterations; i++) {
|
||||||
final TokenStream ts = analyzer.tokenStream("ignored", line);
|
try (TokenStream ts = analyzer.tokenStream("ignored", line)) {
|
||||||
ts.reset();
|
ts.reset();
|
||||||
while(ts.incrementToken());
|
while(ts.incrementToken());
|
||||||
ts.end();
|
ts.end();
|
||||||
ts.close();
|
}
|
||||||
}
|
}
|
||||||
String[] sentences = line.split("、|。");
|
String[] sentences = line.split("、|。");
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
|
@ -645,11 +653,11 @@ public class TestJapaneseTokenizer extends BaseTokenStreamTestCase {
|
||||||
totalStart = System.currentTimeMillis();
|
totalStart = System.currentTimeMillis();
|
||||||
for (int i = 0; i < numIterations; i++) {
|
for (int i = 0; i < numIterations; i++) {
|
||||||
for (String sentence: sentences) {
|
for (String sentence: sentences) {
|
||||||
final TokenStream ts = analyzer.tokenStream("ignored", sentence);
|
try (TokenStream ts = analyzer.tokenStream("ignored", sentence)) {
|
||||||
ts.reset();
|
ts.reset();
|
||||||
while(ts.incrementToken());
|
while(ts.incrementToken());
|
||||||
ts.end();
|
ts.end();
|
||||||
ts.close();
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
|
|
|
@ -72,7 +72,7 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase {
|
||||||
|
|
||||||
@SuppressWarnings("unused")
|
@SuppressWarnings("unused")
|
||||||
private void dumpTokens(String input) throws IOException {
|
private void dumpTokens(String input) throws IOException {
|
||||||
TokenStream ts = getTestAnalyzer().tokenStream("dummy", input);
|
try (TokenStream ts = getTestAnalyzer().tokenStream("dummy", input)) {
|
||||||
ts.reset();
|
ts.reset();
|
||||||
|
|
||||||
MorphosyntacticTagsAttribute attribute = ts.getAttribute(MorphosyntacticTagsAttribute.class);
|
MorphosyntacticTagsAttribute attribute = ts.getAttribute(MorphosyntacticTagsAttribute.class);
|
||||||
|
@ -80,26 +80,28 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase {
|
||||||
while (ts.incrementToken()) {
|
while (ts.incrementToken()) {
|
||||||
System.out.println(charTerm.toString() + " => " + attribute.getTags());
|
System.out.println(charTerm.toString() + " => " + attribute.getTags());
|
||||||
}
|
}
|
||||||
|
ts.end();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Test reuse of MorfologikFilter with leftover stems. */
|
/** Test reuse of MorfologikFilter with leftover stems. */
|
||||||
public final void testLeftoverStems() throws IOException {
|
public final void testLeftoverStems() throws IOException {
|
||||||
Analyzer a = getTestAnalyzer();
|
Analyzer a = getTestAnalyzer();
|
||||||
TokenStream ts_1 = a.tokenStream("dummy", "liście");
|
try (TokenStream ts_1 = a.tokenStream("dummy", "liście")) {
|
||||||
CharTermAttribute termAtt_1 = ts_1.getAttribute(CharTermAttribute.class);
|
CharTermAttribute termAtt_1 = ts_1.getAttribute(CharTermAttribute.class);
|
||||||
ts_1.reset();
|
ts_1.reset();
|
||||||
ts_1.incrementToken();
|
ts_1.incrementToken();
|
||||||
assertEquals("first stream", "liście", termAtt_1.toString());
|
assertEquals("first stream", "liście", termAtt_1.toString());
|
||||||
ts_1.end();
|
ts_1.end();
|
||||||
ts_1.close();
|
}
|
||||||
|
|
||||||
TokenStream ts_2 = a.tokenStream("dummy", "danych");
|
try (TokenStream ts_2 = a.tokenStream("dummy", "danych")) {
|
||||||
CharTermAttribute termAtt_2 = ts_2.getAttribute(CharTermAttribute.class);
|
CharTermAttribute termAtt_2 = ts_2.getAttribute(CharTermAttribute.class);
|
||||||
ts_2.reset();
|
ts_2.reset();
|
||||||
ts_2.incrementToken();
|
ts_2.incrementToken();
|
||||||
assertEquals("second stream", "dany", termAtt_2.toString());
|
assertEquals("second stream", "dany", termAtt_2.toString());
|
||||||
ts_2.end();
|
ts_2.end();
|
||||||
ts_2.close();
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Test stemming of mixed-case tokens. */
|
/** Test stemming of mixed-case tokens. */
|
||||||
|
@ -140,8 +142,7 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase {
|
||||||
|
|
||||||
/** Test morphosyntactic annotations. */
|
/** Test morphosyntactic annotations. */
|
||||||
public final void testPOSAttribute() throws IOException {
|
public final void testPOSAttribute() throws IOException {
|
||||||
TokenStream ts = getTestAnalyzer().tokenStream("dummy", "liście");
|
try (TokenStream ts = getTestAnalyzer().tokenStream("dummy", "liście")) {
|
||||||
|
|
||||||
ts.reset();
|
ts.reset();
|
||||||
assertPOSToken(ts, "liście",
|
assertPOSToken(ts, "liście",
|
||||||
"subst:sg:acc:n2",
|
"subst:sg:acc:n2",
|
||||||
|
@ -161,7 +162,7 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase {
|
||||||
"subst:sg:dat:f",
|
"subst:sg:dat:f",
|
||||||
"subst:sg:loc:f");
|
"subst:sg:loc:f");
|
||||||
ts.end();
|
ts.end();
|
||||||
ts.close();
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** */
|
/** */
|
||||||
|
|
|
@ -184,10 +184,12 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
|
||||||
sb.append("我购买了道具和服装。");
|
sb.append("我购买了道具和服装。");
|
||||||
}
|
}
|
||||||
Analyzer analyzer = new SmartChineseAnalyzer(TEST_VERSION_CURRENT);
|
Analyzer analyzer = new SmartChineseAnalyzer(TEST_VERSION_CURRENT);
|
||||||
TokenStream stream = analyzer.tokenStream("", sb.toString());
|
try (TokenStream stream = analyzer.tokenStream("", sb.toString())) {
|
||||||
stream.reset();
|
stream.reset();
|
||||||
while (stream.incrementToken()) {
|
while (stream.incrementToken()) {
|
||||||
}
|
}
|
||||||
|
stream.end();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// LUCENE-3026
|
// LUCENE-3026
|
||||||
|
@ -197,10 +199,12 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
|
||||||
sb.append("我购买了道具和服装");
|
sb.append("我购买了道具和服装");
|
||||||
}
|
}
|
||||||
Analyzer analyzer = new SmartChineseAnalyzer(TEST_VERSION_CURRENT);
|
Analyzer analyzer = new SmartChineseAnalyzer(TEST_VERSION_CURRENT);
|
||||||
TokenStream stream = analyzer.tokenStream("", sb.toString());
|
try (TokenStream stream = analyzer.tokenStream("", sb.toString())) {
|
||||||
stream.reset();
|
stream.reset();
|
||||||
while (stream.incrementToken()) {
|
while (stream.incrementToken()) {
|
||||||
}
|
}
|
||||||
|
stream.end();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// LUCENE-3642
|
// LUCENE-3642
|
||||||
|
|
|
@ -91,8 +91,7 @@ public class BooleanPerceptronClassifier implements Classifier<Boolean> {
|
||||||
throw new IOException("You must first call Classifier#train");
|
throw new IOException("You must first call Classifier#train");
|
||||||
}
|
}
|
||||||
Long output = 0l;
|
Long output = 0l;
|
||||||
TokenStream tokenStream = analyzer.tokenStream(textFieldName,
|
try (TokenStream tokenStream = analyzer.tokenStream(textFieldName, text)) {
|
||||||
new StringReader(text));
|
|
||||||
CharTermAttribute charTermAttribute = tokenStream
|
CharTermAttribute charTermAttribute = tokenStream
|
||||||
.addAttribute(CharTermAttribute.class);
|
.addAttribute(CharTermAttribute.class);
|
||||||
tokenStream.reset();
|
tokenStream.reset();
|
||||||
|
@ -104,7 +103,7 @@ public class BooleanPerceptronClassifier implements Classifier<Boolean> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
tokenStream.end();
|
tokenStream.end();
|
||||||
tokenStream.close();
|
}
|
||||||
|
|
||||||
return new ClassificationResult<>(output >= threshold, output.doubleValue());
|
return new ClassificationResult<>(output >= threshold, output.doubleValue());
|
||||||
}
|
}
|
||||||
|
|
|
@ -85,14 +85,14 @@ public class SimpleNaiveBayesClassifier implements Classifier<BytesRef> {
|
||||||
|
|
||||||
private String[] tokenizeDoc(String doc) throws IOException {
|
private String[] tokenizeDoc(String doc) throws IOException {
|
||||||
Collection<String> result = new LinkedList<String>();
|
Collection<String> result = new LinkedList<String>();
|
||||||
TokenStream tokenStream = analyzer.tokenStream(textFieldName, doc);
|
try (TokenStream tokenStream = analyzer.tokenStream(textFieldName, doc)) {
|
||||||
CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
|
CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);
|
||||||
tokenStream.reset();
|
tokenStream.reset();
|
||||||
while (tokenStream.incrementToken()) {
|
while (tokenStream.incrementToken()) {
|
||||||
result.add(charTermAttribute.toString());
|
result.add(charTermAttribute.toString());
|
||||||
}
|
}
|
||||||
tokenStream.end();
|
tokenStream.end();
|
||||||
tokenStream.close();
|
}
|
||||||
return result.toArray(new String[result.size()]);
|
return result.toArray(new String[result.size()]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -92,13 +92,9 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
|
||||||
fieldState.position += analyzed ? docState.analyzer.getPositionIncrementGap(fieldInfo.name) : 0;
|
fieldState.position += analyzed ? docState.analyzer.getPositionIncrementGap(fieldInfo.name) : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
final TokenStream stream = field.tokenStream(docState.analyzer);
|
try (TokenStream stream = field.tokenStream(docState.analyzer)) {
|
||||||
// reset the TokenStream to the first token
|
// reset the TokenStream to the first token
|
||||||
stream.reset();
|
stream.reset();
|
||||||
|
|
||||||
boolean success2 = false;
|
|
||||||
|
|
||||||
try {
|
|
||||||
boolean hasMoreTokens = stream.incrementToken();
|
boolean hasMoreTokens = stream.incrementToken();
|
||||||
|
|
||||||
fieldState.attributeSource = stream;
|
fieldState.attributeSource = stream;
|
||||||
|
@ -179,13 +175,6 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
|
||||||
// when we come back around to the field...
|
// when we come back around to the field...
|
||||||
fieldState.position += posIncrAttribute.getPositionIncrement();
|
fieldState.position += posIncrAttribute.getPositionIncrement();
|
||||||
fieldState.offset += offsetAttribute.endOffset();
|
fieldState.offset += offsetAttribute.endOffset();
|
||||||
success2 = true;
|
|
||||||
} finally {
|
|
||||||
if (!success2) {
|
|
||||||
IOUtils.closeWhileHandlingException(stream);
|
|
||||||
} else {
|
|
||||||
stream.close();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fieldState.offset += analyzed ? docState.analyzer.getOffsetGap(fieldInfo.name) : 0;
|
fieldState.offset += analyzed ? docState.analyzer.getOffsetGap(fieldInfo.name) : 0;
|
||||||
|
|
|
@ -98,13 +98,13 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase {
|
||||||
String testString = "t";
|
String testString = "t";
|
||||||
|
|
||||||
Analyzer analyzer = new MockAnalyzer(random());
|
Analyzer analyzer = new MockAnalyzer(random());
|
||||||
TokenStream stream = analyzer.tokenStream("dummy", testString);
|
try (TokenStream stream = analyzer.tokenStream("dummy", testString)) {
|
||||||
stream.reset();
|
stream.reset();
|
||||||
while (stream.incrementToken()) {
|
while (stream.incrementToken()) {
|
||||||
// consume
|
// consume
|
||||||
}
|
}
|
||||||
stream.end();
|
stream.end();
|
||||||
stream.close();
|
}
|
||||||
|
|
||||||
assertAnalyzesTo(analyzer, testString, new String[] { "t" });
|
assertAnalyzesTo(analyzer, testString, new String[] { "t" });
|
||||||
}
|
}
|
||||||
|
@ -121,13 +121,13 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase {
|
||||||
StringReader reader = new StringReader(s);
|
StringReader reader = new StringReader(s);
|
||||||
MockCharFilter charfilter = new MockCharFilter(reader, 2);
|
MockCharFilter charfilter = new MockCharFilter(reader, 2);
|
||||||
MockAnalyzer analyzer = new MockAnalyzer(random());
|
MockAnalyzer analyzer = new MockAnalyzer(random());
|
||||||
TokenStream ts = analyzer.tokenStream("bogus", charfilter);
|
try (TokenStream ts = analyzer.tokenStream("bogus", charfilter)) {
|
||||||
ts.reset();
|
ts.reset();
|
||||||
while (ts.incrementToken()) {
|
while (ts.incrementToken()) {
|
||||||
;
|
;
|
||||||
}
|
}
|
||||||
ts.end();
|
ts.end();
|
||||||
ts.close();
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -47,7 +47,7 @@ public class TestLongPostings extends LuceneTestCase {
|
||||||
if (other != null && s.equals(other)) {
|
if (other != null && s.equals(other)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
final TokenStream ts = a.tokenStream("foo", s);
|
try (TokenStream ts = a.tokenStream("foo", s)) {
|
||||||
final TermToBytesRefAttribute termAtt = ts.getAttribute(TermToBytesRefAttribute.class);
|
final TermToBytesRefAttribute termAtt = ts.getAttribute(TermToBytesRefAttribute.class);
|
||||||
final BytesRef termBytes = termAtt.getBytesRef();
|
final BytesRef termBytes = termAtt.getBytesRef();
|
||||||
ts.reset();
|
ts.reset();
|
||||||
|
@ -66,14 +66,13 @@ public class TestLongPostings extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
ts.end();
|
ts.end();
|
||||||
ts.close();
|
|
||||||
|
|
||||||
// Did we iterate just once and the value was unchanged?
|
// Did we iterate just once and the value was unchanged?
|
||||||
if (!changed && count == 1) {
|
if (!changed && count == 1) {
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public void testLongPostings() throws Exception {
|
public void testLongPostings() throws Exception {
|
||||||
// Don't use _TestUtil.getTempDir so that we own the
|
// Don't use _TestUtil.getTempDir so that we own the
|
||||||
|
|
|
@ -174,17 +174,18 @@ public class TestTermVectorsWriter extends LuceneTestCase {
|
||||||
Analyzer analyzer = new MockAnalyzer(random());
|
Analyzer analyzer = new MockAnalyzer(random());
|
||||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer));
|
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer));
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
TokenStream stream = analyzer.tokenStream("field", "abcd ");
|
try (TokenStream stream = analyzer.tokenStream("field", "abcd ")) {
|
||||||
stream.reset(); // TODO: weird to reset before wrapping with CachingTokenFilter... correct?
|
stream.reset(); // TODO: weird to reset before wrapping with CachingTokenFilter... correct?
|
||||||
stream = new CachingTokenFilter(stream);
|
TokenStream cachedStream = new CachingTokenFilter(stream);
|
||||||
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
|
FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
|
||||||
customType.setStoreTermVectors(true);
|
customType.setStoreTermVectors(true);
|
||||||
customType.setStoreTermVectorPositions(true);
|
customType.setStoreTermVectorPositions(true);
|
||||||
customType.setStoreTermVectorOffsets(true);
|
customType.setStoreTermVectorOffsets(true);
|
||||||
Field f = new Field("field", stream, customType);
|
Field f = new Field("field", cachedStream, customType);
|
||||||
doc.add(f);
|
doc.add(f);
|
||||||
doc.add(f);
|
doc.add(f);
|
||||||
w.addDocument(doc);
|
w.addDocument(doc);
|
||||||
|
}
|
||||||
w.close();
|
w.close();
|
||||||
|
|
||||||
IndexReader r = DirectoryReader.open(dir);
|
IndexReader r = DirectoryReader.open(dir);
|
||||||
|
|
|
@ -617,7 +617,7 @@ public class TestPhraseQuery extends LuceneTestCase {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
TokenStream ts = analyzer.tokenStream("ignore", term);
|
try (TokenStream ts = analyzer.tokenStream("ignore", term)) {
|
||||||
CharTermAttribute termAttr = ts.addAttribute(CharTermAttribute.class);
|
CharTermAttribute termAttr = ts.addAttribute(CharTermAttribute.class);
|
||||||
ts.reset();
|
ts.reset();
|
||||||
while(ts.incrementToken()) {
|
while(ts.incrementToken()) {
|
||||||
|
@ -626,7 +626,7 @@ public class TestPhraseQuery extends LuceneTestCase {
|
||||||
sb.append(text).append(' ');
|
sb.append(text).append(' ');
|
||||||
}
|
}
|
||||||
ts.end();
|
ts.end();
|
||||||
ts.close();
|
}
|
||||||
} else {
|
} else {
|
||||||
// pick existing sub-phrase
|
// pick existing sub-phrase
|
||||||
List<String> lastDoc = docs.get(r.nextInt(docs.size()));
|
List<String> lastDoc = docs.get(r.nextInt(docs.size()));
|
||||||
|
|
|
@ -170,7 +170,7 @@ public abstract class AbstractTestCase extends LuceneTestCase {
|
||||||
protected List<BytesRef> analyze(String text, String field, Analyzer analyzer) throws IOException {
|
protected List<BytesRef> analyze(String text, String field, Analyzer analyzer) throws IOException {
|
||||||
List<BytesRef> bytesRefs = new ArrayList<BytesRef>();
|
List<BytesRef> bytesRefs = new ArrayList<BytesRef>();
|
||||||
|
|
||||||
TokenStream tokenStream = analyzer.tokenStream(field, text);
|
try (TokenStream tokenStream = analyzer.tokenStream(field, text)) {
|
||||||
TermToBytesRefAttribute termAttribute = tokenStream.getAttribute(TermToBytesRefAttribute.class);
|
TermToBytesRefAttribute termAttribute = tokenStream.getAttribute(TermToBytesRefAttribute.class);
|
||||||
|
|
||||||
BytesRef bytesRef = termAttribute.getBytesRef();
|
BytesRef bytesRef = termAttribute.getBytesRef();
|
||||||
|
@ -183,7 +183,7 @@ public abstract class AbstractTestCase extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
tokenStream.end();
|
tokenStream.end();
|
||||||
tokenStream.close();
|
}
|
||||||
|
|
||||||
return bytesRefs;
|
return bytesRefs;
|
||||||
}
|
}
|
||||||
|
|
|
@ -777,7 +777,7 @@ public final class MoreLikeThis {
|
||||||
throw new UnsupportedOperationException("To use MoreLikeThis without " +
|
throw new UnsupportedOperationException("To use MoreLikeThis without " +
|
||||||
"term vectors, you must provide an Analyzer");
|
"term vectors, you must provide an Analyzer");
|
||||||
}
|
}
|
||||||
TokenStream ts = analyzer.tokenStream(fieldName, r);
|
try (TokenStream ts = analyzer.tokenStream(fieldName, r)) {
|
||||||
int tokenCount = 0;
|
int tokenCount = 0;
|
||||||
// for every token
|
// for every token
|
||||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||||
|
@ -801,7 +801,7 @@ public final class MoreLikeThis {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ts.end();
|
ts.end();
|
||||||
ts.close();
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -162,9 +162,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryparser.classic.
|
||||||
*/
|
*/
|
||||||
protected String analyzeSingleChunk(String field, String termStr, String chunk) throws ParseException{
|
protected String analyzeSingleChunk(String field, String termStr, String chunk) throws ParseException{
|
||||||
String analyzed = null;
|
String analyzed = null;
|
||||||
TokenStream stream = null;
|
try (TokenStream stream = getAnalyzer().tokenStream(field, chunk)) {
|
||||||
try{
|
|
||||||
stream = getAnalyzer().tokenStream(field, chunk);
|
|
||||||
stream.reset();
|
stream.reset();
|
||||||
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
|
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
|
||||||
// get first and hopefully only output token
|
// get first and hopefully only output token
|
||||||
|
@ -186,7 +184,6 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryparser.classic.
|
||||||
multipleOutputs.append('"');
|
multipleOutputs.append('"');
|
||||||
}
|
}
|
||||||
stream.end();
|
stream.end();
|
||||||
stream.close();
|
|
||||||
if (null != multipleOutputs) {
|
if (null != multipleOutputs) {
|
||||||
throw new ParseException(
|
throw new ParseException(
|
||||||
String.format(getLocale(),
|
String.format(getLocale(),
|
||||||
|
@ -196,7 +193,6 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryparser.classic.
|
||||||
// nothing returned by analyzer. Was it a stop word and the user accidentally
|
// nothing returned by analyzer. Was it a stop word and the user accidentally
|
||||||
// used an analyzer with stop words?
|
// used an analyzer with stop words?
|
||||||
stream.end();
|
stream.end();
|
||||||
stream.close();
|
|
||||||
throw new ParseException(String.format(getLocale(), "Analyzer returned nothing for \"%s\"", chunk));
|
throw new ParseException(String.format(getLocale(), "Analyzer returned nothing for \"%s\"", chunk));
|
||||||
}
|
}
|
||||||
} catch (IOException e){
|
} catch (IOException e){
|
||||||
|
|
|
@ -497,21 +497,17 @@ public abstract class QueryParserBase implements CommonQueryParserConfiguration
|
||||||
protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, boolean quoted) throws ParseException {
|
protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, boolean quoted) throws ParseException {
|
||||||
// Use the analyzer to get all the tokens, and then build a TermQuery,
|
// Use the analyzer to get all the tokens, and then build a TermQuery,
|
||||||
// PhraseQuery, or nothing based on the term count
|
// PhraseQuery, or nothing based on the term count
|
||||||
|
CachingTokenFilter buffer = null;
|
||||||
TokenStream source;
|
|
||||||
try {
|
|
||||||
source = analyzer.tokenStream(field, queryText);
|
|
||||||
source.reset();
|
|
||||||
} catch (IOException e) {
|
|
||||||
ParseException p = new ParseException("Unable to initialize TokenStream to analyze query text");
|
|
||||||
p.initCause(e);
|
|
||||||
throw p;
|
|
||||||
}
|
|
||||||
CachingTokenFilter buffer = new CachingTokenFilter(source);
|
|
||||||
TermToBytesRefAttribute termAtt = null;
|
TermToBytesRefAttribute termAtt = null;
|
||||||
PositionIncrementAttribute posIncrAtt = null;
|
PositionIncrementAttribute posIncrAtt = null;
|
||||||
int numTokens = 0;
|
int numTokens = 0;
|
||||||
|
int positionCount = 0;
|
||||||
|
boolean severalTokensAtSamePosition = false;
|
||||||
|
boolean hasMoreTokens = false;
|
||||||
|
|
||||||
|
try (TokenStream source = analyzer.tokenStream(field, queryText)) {
|
||||||
|
source.reset();
|
||||||
|
buffer = new CachingTokenFilter(source);
|
||||||
buffer.reset();
|
buffer.reset();
|
||||||
|
|
||||||
if (buffer.hasAttribute(TermToBytesRefAttribute.class)) {
|
if (buffer.hasAttribute(TermToBytesRefAttribute.class)) {
|
||||||
|
@ -521,10 +517,6 @@ public abstract class QueryParserBase implements CommonQueryParserConfiguration
|
||||||
posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
|
posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
int positionCount = 0;
|
|
||||||
boolean severalTokensAtSamePosition = false;
|
|
||||||
|
|
||||||
boolean hasMoreTokens = false;
|
|
||||||
if (termAtt != null) {
|
if (termAtt != null) {
|
||||||
try {
|
try {
|
||||||
hasMoreTokens = buffer.incrementToken();
|
hasMoreTokens = buffer.incrementToken();
|
||||||
|
@ -542,19 +534,15 @@ public abstract class QueryParserBase implements CommonQueryParserConfiguration
|
||||||
// ignore
|
// ignore
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
try {
|
} catch (IOException e) {
|
||||||
// rewind the buffer stream
|
ParseException p = new ParseException("Eror analyzing query text");
|
||||||
buffer.reset();
|
|
||||||
|
|
||||||
// close original stream - all tokens buffered
|
|
||||||
source.close();
|
|
||||||
}
|
|
||||||
catch (IOException e) {
|
|
||||||
ParseException p = new ParseException("Cannot close TokenStream analyzing query text");
|
|
||||||
p.initCause(e);
|
p.initCause(e);
|
||||||
throw p;
|
throw p;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// rewind the buffer stream
|
||||||
|
buffer.reset();
|
||||||
|
|
||||||
BytesRef bytes = termAtt == null ? null : termAtt.getBytesRef();
|
BytesRef bytes = termAtt == null ? null : termAtt.getBytesRef();
|
||||||
|
|
||||||
if (numTokens == 0)
|
if (numTokens == 0)
|
||||||
|
@ -839,38 +827,24 @@ public abstract class QueryParserBase implements CommonQueryParserConfiguration
|
||||||
}
|
}
|
||||||
|
|
||||||
protected BytesRef analyzeMultitermTerm(String field, String part, Analyzer analyzerIn) {
|
protected BytesRef analyzeMultitermTerm(String field, String part, Analyzer analyzerIn) {
|
||||||
TokenStream source;
|
|
||||||
|
|
||||||
if (analyzerIn == null) analyzerIn = analyzer;
|
if (analyzerIn == null) analyzerIn = analyzer;
|
||||||
|
|
||||||
try {
|
try (TokenStream source = analyzerIn.tokenStream(field, part)) {
|
||||||
source = analyzerIn.tokenStream(field, part);
|
|
||||||
source.reset();
|
source.reset();
|
||||||
} catch (IOException e) {
|
|
||||||
throw new RuntimeException("Unable to initialize TokenStream to analyze multiTerm term: " + part, e);
|
|
||||||
}
|
|
||||||
|
|
||||||
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
|
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
|
||||||
BytesRef bytes = termAtt.getBytesRef();
|
BytesRef bytes = termAtt.getBytesRef();
|
||||||
|
|
||||||
try {
|
|
||||||
if (!source.incrementToken())
|
if (!source.incrementToken())
|
||||||
throw new IllegalArgumentException("analyzer returned no terms for multiTerm term: " + part);
|
throw new IllegalArgumentException("analyzer returned no terms for multiTerm term: " + part);
|
||||||
termAtt.fillBytesRef();
|
termAtt.fillBytesRef();
|
||||||
if (source.incrementToken())
|
if (source.incrementToken())
|
||||||
throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + part);
|
throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + part);
|
||||||
} catch (IOException e) {
|
|
||||||
throw new RuntimeException("error analyzing range part: " + part, e);
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
source.end();
|
source.end();
|
||||||
source.close();
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new RuntimeException("Unable to end & close TokenStream after analyzing multiTerm term: " + part, e);
|
|
||||||
}
|
|
||||||
|
|
||||||
return BytesRef.deepCopyOf(bytes);
|
return BytesRef.deepCopyOf(bytes);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException("Error analyzing multiTerm term: " + part, e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -113,20 +113,16 @@ public class AnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl {
|
||||||
String text = fieldNode.getTextAsString();
|
String text = fieldNode.getTextAsString();
|
||||||
String field = fieldNode.getFieldAsString();
|
String field = fieldNode.getFieldAsString();
|
||||||
|
|
||||||
TokenStream source;
|
CachingTokenFilter buffer = null;
|
||||||
try {
|
|
||||||
source = this.analyzer.tokenStream(field, text);
|
|
||||||
source.reset();
|
|
||||||
} catch (IOException e1) {
|
|
||||||
throw new RuntimeException(e1);
|
|
||||||
}
|
|
||||||
CachingTokenFilter buffer = new CachingTokenFilter(source);
|
|
||||||
|
|
||||||
PositionIncrementAttribute posIncrAtt = null;
|
PositionIncrementAttribute posIncrAtt = null;
|
||||||
int numTokens = 0;
|
int numTokens = 0;
|
||||||
int positionCount = 0;
|
int positionCount = 0;
|
||||||
boolean severalTokensAtSamePosition = false;
|
boolean severalTokensAtSamePosition = false;
|
||||||
|
|
||||||
|
try (TokenStream source = this.analyzer.tokenStream(field, text)) {
|
||||||
|
source.reset();
|
||||||
|
buffer = new CachingTokenFilter(source);
|
||||||
|
|
||||||
if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
|
if (buffer.hasAttribute(PositionIncrementAttribute.class)) {
|
||||||
posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
|
posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
|
||||||
}
|
}
|
||||||
|
@ -149,17 +145,13 @@ public class AnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl {
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
// ignore
|
// ignore
|
||||||
}
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
|
||||||
// rewind the buffer stream
|
// rewind the buffer stream
|
||||||
buffer.reset();
|
buffer.reset();
|
||||||
|
|
||||||
// close original stream - all tokens buffered
|
|
||||||
source.close();
|
|
||||||
} catch (IOException e) {
|
|
||||||
// ignore
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!buffer.hasAttribute(CharTermAttribute.class)) {
|
if (!buffer.hasAttribute(CharTermAttribute.class)) {
|
||||||
return new NoTokenFoundQueryNode();
|
return new NoTokenFoundQueryNode();
|
||||||
}
|
}
|
||||||
|
|
|
@ -73,8 +73,7 @@ public class LikeThisQueryBuilder implements QueryBuilder {
|
||||||
if ((stopWords != null) && (fields != null)) {
|
if ((stopWords != null) && (fields != null)) {
|
||||||
stopWordsSet = new HashSet<String>();
|
stopWordsSet = new HashSet<String>();
|
||||||
for (String field : fields) {
|
for (String field : fields) {
|
||||||
try {
|
try (TokenStream ts = analyzer.tokenStream(field, stopWords)) {
|
||||||
TokenStream ts = analyzer.tokenStream(field, stopWords);
|
|
||||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||||
ts.reset();
|
ts.reset();
|
||||||
while (ts.incrementToken()) {
|
while (ts.incrementToken()) {
|
||||||
|
|
|
@ -49,9 +49,9 @@ public class SpanOrTermsBuilder extends SpanBuilderBase {
|
||||||
String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
|
String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
|
||||||
String value = DOMUtils.getNonBlankTextOrFail(e);
|
String value = DOMUtils.getNonBlankTextOrFail(e);
|
||||||
|
|
||||||
try {
|
|
||||||
List<SpanQuery> clausesList = new ArrayList<SpanQuery>();
|
List<SpanQuery> clausesList = new ArrayList<SpanQuery>();
|
||||||
TokenStream ts = analyzer.tokenStream(fieldName, value);
|
|
||||||
|
try (TokenStream ts = analyzer.tokenStream(fieldName, value)) {
|
||||||
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||||
BytesRef bytes = termAtt.getBytesRef();
|
BytesRef bytes = termAtt.getBytesRef();
|
||||||
ts.reset();
|
ts.reset();
|
||||||
|
@ -61,7 +61,6 @@ public class SpanOrTermsBuilder extends SpanBuilderBase {
|
||||||
clausesList.add(stq);
|
clausesList.add(stq);
|
||||||
}
|
}
|
||||||
ts.end();
|
ts.end();
|
||||||
ts.close();
|
|
||||||
SpanOrQuery soq = new SpanOrQuery(clausesList.toArray(new SpanQuery[clausesList.size()]));
|
SpanOrQuery soq = new SpanOrQuery(clausesList.toArray(new SpanQuery[clausesList.size()]));
|
||||||
soq.setBoost(DOMUtils.getAttribute(e, "boost", 1.0f));
|
soq.setBoost(DOMUtils.getAttribute(e, "boost", 1.0f));
|
||||||
return soq;
|
return soq;
|
||||||
|
|
|
@ -54,8 +54,7 @@ public class TermsFilterBuilder implements FilterBuilder {
|
||||||
String text = DOMUtils.getNonBlankTextOrFail(e);
|
String text = DOMUtils.getNonBlankTextOrFail(e);
|
||||||
String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
|
String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
|
||||||
|
|
||||||
try {
|
try (TokenStream ts = analyzer.tokenStream(fieldName, text)) {
|
||||||
TokenStream ts = analyzer.tokenStream(fieldName, text);
|
|
||||||
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||||
BytesRef bytes = termAtt.getBytesRef();
|
BytesRef bytes = termAtt.getBytesRef();
|
||||||
ts.reset();
|
ts.reset();
|
||||||
|
@ -64,7 +63,6 @@ public class TermsFilterBuilder implements FilterBuilder {
|
||||||
terms.add(BytesRef.deepCopyOf(bytes));
|
terms.add(BytesRef.deepCopyOf(bytes));
|
||||||
}
|
}
|
||||||
ts.end();
|
ts.end();
|
||||||
ts.close();
|
|
||||||
}
|
}
|
||||||
catch (IOException ioe) {
|
catch (IOException ioe) {
|
||||||
throw new RuntimeException("Error constructing terms from index:" + ioe);
|
throw new RuntimeException("Error constructing terms from index:" + ioe);
|
||||||
|
|
|
@ -51,8 +51,7 @@ public class TermsQueryBuilder implements QueryBuilder {
|
||||||
|
|
||||||
BooleanQuery bq = new BooleanQuery(DOMUtils.getAttribute(e, "disableCoord", false));
|
BooleanQuery bq = new BooleanQuery(DOMUtils.getAttribute(e, "disableCoord", false));
|
||||||
bq.setMinimumNumberShouldMatch(DOMUtils.getAttribute(e, "minimumNumberShouldMatch", 0));
|
bq.setMinimumNumberShouldMatch(DOMUtils.getAttribute(e, "minimumNumberShouldMatch", 0));
|
||||||
try {
|
try (TokenStream ts = analyzer.tokenStream(fieldName, text)) {
|
||||||
TokenStream ts = analyzer.tokenStream(fieldName, text);
|
|
||||||
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||||
Term term = null;
|
Term term = null;
|
||||||
BytesRef bytes = termAtt.getBytesRef();
|
BytesRef bytes = termAtt.getBytesRef();
|
||||||
|
@ -63,7 +62,6 @@ public class TermsQueryBuilder implements QueryBuilder {
|
||||||
bq.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.SHOULD));
|
bq.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.SHOULD));
|
||||||
}
|
}
|
||||||
ts.end();
|
ts.end();
|
||||||
ts.close();
|
|
||||||
}
|
}
|
||||||
catch (IOException ioe) {
|
catch (IOException ioe) {
|
||||||
throw new RuntimeException("Error constructing terms from index:" + ioe);
|
throw new RuntimeException("Error constructing terms from index:" + ioe);
|
||||||
|
|
|
@ -193,16 +193,16 @@ public class FuzzyLikeThisQuery extends Query
|
||||||
|
|
||||||
private void addTerms(IndexReader reader, FieldVals f) throws IOException {
|
private void addTerms(IndexReader reader, FieldVals f) throws IOException {
|
||||||
if (f.queryString == null) return;
|
if (f.queryString == null) return;
|
||||||
TokenStream ts = analyzer.tokenStream(f.fieldName, f.queryString);
|
final Terms terms = MultiFields.getTerms(reader, f.fieldName);
|
||||||
|
if (terms == null) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
try (TokenStream ts = analyzer.tokenStream(f.fieldName, f.queryString)) {
|
||||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||||
|
|
||||||
int corpusNumDocs = reader.numDocs();
|
int corpusNumDocs = reader.numDocs();
|
||||||
HashSet<String> processedTerms = new HashSet<String>();
|
HashSet<String> processedTerms = new HashSet<String>();
|
||||||
ts.reset();
|
ts.reset();
|
||||||
final Terms terms = MultiFields.getTerms(reader, f.fieldName);
|
|
||||||
if (terms == null) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
while (ts.incrementToken()) {
|
while (ts.incrementToken()) {
|
||||||
String term = termAtt.toString();
|
String term = termAtt.toString();
|
||||||
if (!processedTerms.contains(term)) {
|
if (!processedTerms.contains(term)) {
|
||||||
|
@ -253,7 +253,7 @@ public class FuzzyLikeThisQuery extends Query
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
ts.end();
|
ts.end();
|
||||||
ts.close();
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -352,9 +352,8 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
||||||
occur = BooleanClause.Occur.SHOULD;
|
occur = BooleanClause.Occur.SHOULD;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try (TokenStream ts = queryAnalyzer.tokenStream("", new StringReader(key.toString()))) {
|
||||||
//long t0 = System.currentTimeMillis();
|
//long t0 = System.currentTimeMillis();
|
||||||
TokenStream ts = queryAnalyzer.tokenStream("", new StringReader(key.toString()));
|
|
||||||
ts.reset();
|
ts.reset();
|
||||||
final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||||
final OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
|
final OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
|
||||||
|
@ -464,7 +463,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
||||||
* result is set on each {@link
|
* result is set on each {@link
|
||||||
* LookupResult#highlightKey} member. */
|
* LookupResult#highlightKey} member. */
|
||||||
protected Object highlight(String text, Set<String> matchedTokens, String prefixToken) throws IOException {
|
protected Object highlight(String text, Set<String> matchedTokens, String prefixToken) throws IOException {
|
||||||
TokenStream ts = queryAnalyzer.tokenStream("text", new StringReader(text));
|
try (TokenStream ts = queryAnalyzer.tokenStream("text", new StringReader(text))) {
|
||||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||||
OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
|
OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
|
||||||
ts.reset();
|
ts.reset();
|
||||||
|
@ -495,10 +494,9 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
||||||
if (upto < endOffset) {
|
if (upto < endOffset) {
|
||||||
addNonMatch(sb, text.substring(upto));
|
addNonMatch(sb, text.substring(upto));
|
||||||
}
|
}
|
||||||
ts.close();
|
|
||||||
|
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/** Called while highlighting a single result, to append a
|
/** Called while highlighting a single result, to append a
|
||||||
* non-matching chunk of text from the suggestion to the
|
* non-matching chunk of text from the suggestion to the
|
||||||
|
|
|
@ -828,13 +828,14 @@ public class AnalyzingSuggester extends Lookup {
|
||||||
|
|
||||||
final Set<IntsRef> toFiniteStrings(final BytesRef surfaceForm, final TokenStreamToAutomaton ts2a) throws IOException {
|
final Set<IntsRef> toFiniteStrings(final BytesRef surfaceForm, final TokenStreamToAutomaton ts2a) throws IOException {
|
||||||
// Analyze surface form:
|
// Analyze surface form:
|
||||||
TokenStream ts = indexAnalyzer.tokenStream("", surfaceForm.utf8ToString());
|
Automaton automaton = null;
|
||||||
|
try (TokenStream ts = indexAnalyzer.tokenStream("", surfaceForm.utf8ToString())) {
|
||||||
|
|
||||||
// Create corresponding automaton: labels are bytes
|
// Create corresponding automaton: labels are bytes
|
||||||
// from each analyzed token, with byte 0 used as
|
// from each analyzed token, with byte 0 used as
|
||||||
// separator between tokens:
|
// separator between tokens:
|
||||||
Automaton automaton = ts2a.toAutomaton(ts);
|
automaton = ts2a.toAutomaton(ts);
|
||||||
ts.close();
|
}
|
||||||
|
|
||||||
replaceSep(automaton);
|
replaceSep(automaton);
|
||||||
automaton = convertAutomaton(automaton);
|
automaton = convertAutomaton(automaton);
|
||||||
|
@ -854,9 +855,10 @@ public class AnalyzingSuggester extends Lookup {
|
||||||
final Automaton toLookupAutomaton(final CharSequence key) throws IOException {
|
final Automaton toLookupAutomaton(final CharSequence key) throws IOException {
|
||||||
// TODO: is there a Reader from a CharSequence?
|
// TODO: is there a Reader from a CharSequence?
|
||||||
// Turn tokenstream into automaton:
|
// Turn tokenstream into automaton:
|
||||||
TokenStream ts = queryAnalyzer.tokenStream("", key.toString());
|
Automaton automaton = null;
|
||||||
Automaton automaton = (getTokenStreamToAutomaton()).toAutomaton(ts);
|
try (TokenStream ts = queryAnalyzer.tokenStream("", key.toString())) {
|
||||||
ts.close();
|
automaton = (getTokenStreamToAutomaton()).toAutomaton(ts);
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: we could use the end offset to "guess"
|
// TODO: we could use the end offset to "guess"
|
||||||
// whether the final token was a partial token; this
|
// whether the final token was a partial token; this
|
||||||
|
|
|
@ -449,7 +449,7 @@ public class FreeTextSuggester extends Lookup {
|
||||||
|
|
||||||
/** Retrieve suggestions. */
|
/** Retrieve suggestions. */
|
||||||
public List<LookupResult> lookup(final CharSequence key, int num) throws IOException {
|
public List<LookupResult> lookup(final CharSequence key, int num) throws IOException {
|
||||||
TokenStream ts = queryAnalyzer.tokenStream("", key.toString());
|
try (TokenStream ts = queryAnalyzer.tokenStream("", key.toString())) {
|
||||||
TermToBytesRefAttribute termBytesAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
TermToBytesRefAttribute termBytesAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||||
OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
|
OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
|
||||||
PositionLengthAttribute posLenAtt = ts.addAttribute(PositionLengthAttribute.class);
|
PositionLengthAttribute posLenAtt = ts.addAttribute(PositionLengthAttribute.class);
|
||||||
|
@ -498,7 +498,6 @@ public class FreeTextSuggester extends Lookup {
|
||||||
// because we fill the unigram with an empty BytesRef
|
// because we fill the unigram with an empty BytesRef
|
||||||
// below:
|
// below:
|
||||||
boolean lastTokenEnded = offsetAtt.endOffset() > maxEndOffset || endPosInc > 0;
|
boolean lastTokenEnded = offsetAtt.endOffset() > maxEndOffset || endPosInc > 0;
|
||||||
ts.close();
|
|
||||||
//System.out.println("maxEndOffset=" + maxEndOffset + " vs " + offsetAtt.endOffset());
|
//System.out.println("maxEndOffset=" + maxEndOffset + " vs " + offsetAtt.endOffset());
|
||||||
|
|
||||||
if (lastTokenEnded) {
|
if (lastTokenEnded) {
|
||||||
|
@ -714,6 +713,7 @@ public class FreeTextSuggester extends Lookup {
|
||||||
|
|
||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/** weight -> cost */
|
/** weight -> cost */
|
||||||
private long encodeWeight(long ngramCount) {
|
private long encodeWeight(long ngramCount) {
|
||||||
|
|
|
@ -165,7 +165,7 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected Object highlight(String text, Set<String> matchedTokens, String prefixToken) throws IOException {
|
protected Object highlight(String text, Set<String> matchedTokens, String prefixToken) throws IOException {
|
||||||
TokenStream ts = queryAnalyzer.tokenStream("text", new StringReader(text));
|
try (TokenStream ts = queryAnalyzer.tokenStream("text", new StringReader(text))) {
|
||||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||||
OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
|
OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
|
||||||
ts.reset();
|
ts.reset();
|
||||||
|
@ -199,10 +199,10 @@ public class AnalyzingInfixSuggesterTest extends LuceneTestCase {
|
||||||
if (upto < endOffset) {
|
if (upto < endOffset) {
|
||||||
fragments.add(new LookupHighlightFragment(text.substring(upto), false));
|
fragments.add(new LookupHighlightFragment(text.substring(upto), false));
|
||||||
}
|
}
|
||||||
ts.close();
|
|
||||||
|
|
||||||
return fragments;
|
return fragments;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
suggester.build(new TermFreqPayloadArrayIterator(keys));
|
suggester.build(new TermFreqPayloadArrayIterator(keys));
|
||||||
|
|
||||||
|
|
|
@ -258,7 +258,7 @@ public abstract class CollationTestBase extends LuceneTestCase {
|
||||||
|
|
||||||
for (int i = 0; i < numTestPoints; i++) {
|
for (int i = 0; i < numTestPoints; i++) {
|
||||||
String term = _TestUtil.randomSimpleString(random());
|
String term = _TestUtil.randomSimpleString(random());
|
||||||
TokenStream ts = analyzer.tokenStream("fake", term);
|
try (TokenStream ts = analyzer.tokenStream("fake", term)) {
|
||||||
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||||
BytesRef bytes = termAtt.getBytesRef();
|
BytesRef bytes = termAtt.getBytesRef();
|
||||||
ts.reset();
|
ts.reset();
|
||||||
|
@ -268,7 +268,7 @@ public abstract class CollationTestBase extends LuceneTestCase {
|
||||||
map.put(term, BytesRef.deepCopyOf(bytes));
|
map.put(term, BytesRef.deepCopyOf(bytes));
|
||||||
assertFalse(ts.incrementToken());
|
assertFalse(ts.incrementToken());
|
||||||
ts.end();
|
ts.end();
|
||||||
ts.close();
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Thread threads[] = new Thread[numThreads];
|
Thread threads[] = new Thread[numThreads];
|
||||||
|
@ -280,7 +280,7 @@ public abstract class CollationTestBase extends LuceneTestCase {
|
||||||
for (Map.Entry<String,BytesRef> mapping : map.entrySet()) {
|
for (Map.Entry<String,BytesRef> mapping : map.entrySet()) {
|
||||||
String term = mapping.getKey();
|
String term = mapping.getKey();
|
||||||
BytesRef expected = mapping.getValue();
|
BytesRef expected = mapping.getValue();
|
||||||
TokenStream ts = analyzer.tokenStream("fake", term);
|
try (TokenStream ts = analyzer.tokenStream("fake", term)) {
|
||||||
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||||
BytesRef bytes = termAtt.getBytesRef();
|
BytesRef bytes = termAtt.getBytesRef();
|
||||||
ts.reset();
|
ts.reset();
|
||||||
|
@ -289,7 +289,7 @@ public abstract class CollationTestBase extends LuceneTestCase {
|
||||||
assertEquals(expected, bytes);
|
assertEquals(expected, bytes);
|
||||||
assertFalse(ts.incrementToken());
|
assertFalse(ts.incrementToken());
|
||||||
ts.end();
|
ts.end();
|
||||||
ts.close();
|
}
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
|
|
|
@ -234,36 +234,23 @@ public class ICUCollationField extends FieldType {
|
||||||
* simple (we already have a threadlocal clone in the reused TS)
|
* simple (we already have a threadlocal clone in the reused TS)
|
||||||
*/
|
*/
|
||||||
private BytesRef analyzeRangePart(String field, String part) {
|
private BytesRef analyzeRangePart(String field, String part) {
|
||||||
TokenStream source;
|
try (TokenStream source = analyzer.tokenStream(field, part)) {
|
||||||
|
|
||||||
try {
|
|
||||||
source = analyzer.tokenStream(field, part);
|
|
||||||
source.reset();
|
source.reset();
|
||||||
} catch (IOException e) {
|
|
||||||
throw new RuntimeException("Unable to initialize TokenStream to analyze range part: " + part, e);
|
|
||||||
}
|
|
||||||
|
|
||||||
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
|
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
|
||||||
BytesRef bytes = termAtt.getBytesRef();
|
BytesRef bytes = termAtt.getBytesRef();
|
||||||
|
|
||||||
// we control the analyzer here: most errors are impossible
|
// we control the analyzer here: most errors are impossible
|
||||||
try {
|
|
||||||
if (!source.incrementToken())
|
if (!source.incrementToken())
|
||||||
throw new IllegalArgumentException("analyzer returned no terms for range part: " + part);
|
throw new IllegalArgumentException("analyzer returned no terms for range part: " + part);
|
||||||
termAtt.fillBytesRef();
|
termAtt.fillBytesRef();
|
||||||
assert !source.incrementToken();
|
assert !source.incrementToken();
|
||||||
} catch (IOException e) {
|
|
||||||
throw new RuntimeException("error analyzing range part: " + part, e);
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
source.end();
|
source.end();
|
||||||
source.close();
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new RuntimeException("Unable to end & close TokenStream after analyzing range part: " + part, e);
|
|
||||||
}
|
|
||||||
|
|
||||||
return BytesRef.deepCopyOf(bytes);
|
return BytesRef.deepCopyOf(bytes);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException("Unable analyze range part: " + part, e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -85,15 +85,13 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
||||||
|
|
||||||
if (!TokenizerChain.class.isInstance(analyzer)) {
|
if (!TokenizerChain.class.isInstance(analyzer)) {
|
||||||
|
|
||||||
TokenStream tokenStream = null;
|
try (TokenStream tokenStream = analyzer.tokenStream(context.getFieldName(), value)) {
|
||||||
try {
|
|
||||||
tokenStream = analyzer.tokenStream(context.getFieldName(), value);
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
|
|
||||||
}
|
|
||||||
NamedList<List<NamedList>> namedList = new NamedList<List<NamedList>>();
|
NamedList<List<NamedList>> namedList = new NamedList<List<NamedList>>();
|
||||||
namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(analyzeTokenStream(tokenStream), context));
|
namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(analyzeTokenStream(tokenStream), context));
|
||||||
return namedList;
|
return namedList;
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
TokenizerChain tokenizerChain = (TokenizerChain) analyzer;
|
TokenizerChain tokenizerChain = (TokenizerChain) analyzer;
|
||||||
|
@ -139,10 +137,8 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
||||||
* @param analyzer The analyzer to use.
|
* @param analyzer The analyzer to use.
|
||||||
*/
|
*/
|
||||||
protected Set<BytesRef> getQueryTokenSet(String query, Analyzer analyzer) {
|
protected Set<BytesRef> getQueryTokenSet(String query, Analyzer analyzer) {
|
||||||
TokenStream tokenStream = null;
|
try (TokenStream tokenStream = analyzer.tokenStream("", query)){
|
||||||
try {
|
|
||||||
final Set<BytesRef> tokens = new HashSet<BytesRef>();
|
final Set<BytesRef> tokens = new HashSet<BytesRef>();
|
||||||
tokenStream = analyzer.tokenStream("", query);
|
|
||||||
final TermToBytesRefAttribute bytesAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class);
|
final TermToBytesRefAttribute bytesAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class);
|
||||||
final BytesRef bytes = bytesAtt.getBytesRef();
|
final BytesRef bytes = bytesAtt.getBytesRef();
|
||||||
|
|
||||||
|
@ -157,8 +153,6 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
||||||
return tokens;
|
return tokens;
|
||||||
} catch (IOException ioe) {
|
} catch (IOException ioe) {
|
||||||
throw new RuntimeException("Error occured while iterating over tokenstream", ioe);
|
throw new RuntimeException("Error occured while iterating over tokenstream", ioe);
|
||||||
} finally {
|
|
||||||
IOUtils.closeWhileHandlingException(tokenStream);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -344,7 +344,7 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
|
||||||
return query;
|
return query;
|
||||||
}
|
}
|
||||||
StringBuilder norm = new StringBuilder();
|
StringBuilder norm = new StringBuilder();
|
||||||
TokenStream tokens = analyzer.tokenStream("", query);
|
try (TokenStream tokens = analyzer.tokenStream("", query)) {
|
||||||
tokens.reset();
|
tokens.reset();
|
||||||
|
|
||||||
CharTermAttribute termAtt = tokens.addAttribute(CharTermAttribute.class);
|
CharTermAttribute termAtt = tokens.addAttribute(CharTermAttribute.class);
|
||||||
|
@ -352,9 +352,9 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
|
||||||
norm.append(termAtt.buffer(), 0, termAtt.length());
|
norm.append(termAtt.buffer(), 0, termAtt.length());
|
||||||
}
|
}
|
||||||
tokens.end();
|
tokens.end();
|
||||||
tokens.close();
|
|
||||||
return norm.toString();
|
return norm.toString();
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
//---------------------------------------------------------------------------------
|
//---------------------------------------------------------------------------------
|
||||||
// SearchComponent
|
// SearchComponent
|
||||||
|
|
|
@ -463,7 +463,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
||||||
private Collection<Token> getTokens(String q, Analyzer analyzer) throws IOException {
|
private Collection<Token> getTokens(String q, Analyzer analyzer) throws IOException {
|
||||||
Collection<Token> result = new ArrayList<Token>();
|
Collection<Token> result = new ArrayList<Token>();
|
||||||
assert analyzer != null;
|
assert analyzer != null;
|
||||||
TokenStream ts = analyzer.tokenStream("", q);
|
try (TokenStream ts = analyzer.tokenStream("", q)) {
|
||||||
ts.reset();
|
ts.reset();
|
||||||
// TODO: support custom attributes
|
// TODO: support custom attributes
|
||||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||||
|
@ -484,9 +484,9 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
||||||
result.add(token);
|
result.add(token);
|
||||||
}
|
}
|
||||||
ts.end();
|
ts.end();
|
||||||
ts.close();
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
protected SolrSpellChecker getSpellChecker(SolrParams params) {
|
protected SolrSpellChecker getSpellChecker(SolrParams params) {
|
||||||
String[] dictName = getDictionaryNames(params);
|
String[] dictName = getDictionaryNames(params);
|
||||||
|
|
|
@ -403,18 +403,16 @@ public abstract class SolrQueryParserBase {
|
||||||
// Use the analyzer to get all the tokens, and then build a TermQuery,
|
// Use the analyzer to get all the tokens, and then build a TermQuery,
|
||||||
// PhraseQuery, or nothing based on the term count
|
// PhraseQuery, or nothing based on the term count
|
||||||
|
|
||||||
TokenStream source;
|
CachingTokenFilter buffer = null;
|
||||||
try {
|
|
||||||
source = analyzer.tokenStream(field, queryText);
|
|
||||||
source.reset();
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new SyntaxError("Unable to initialize TokenStream to analyze query text", e);
|
|
||||||
}
|
|
||||||
CachingTokenFilter buffer = new CachingTokenFilter(source);
|
|
||||||
TermToBytesRefAttribute termAtt = null;
|
TermToBytesRefAttribute termAtt = null;
|
||||||
PositionIncrementAttribute posIncrAtt = null;
|
PositionIncrementAttribute posIncrAtt = null;
|
||||||
int numTokens = 0;
|
int numTokens = 0;
|
||||||
|
int positionCount = 0;
|
||||||
|
boolean severalTokensAtSamePosition = false;
|
||||||
|
|
||||||
|
try (TokenStream source = analyzer.tokenStream(field, queryText)) {
|
||||||
|
source.reset();
|
||||||
|
buffer = new CachingTokenFilter(source);
|
||||||
buffer.reset();
|
buffer.reset();
|
||||||
|
|
||||||
if (buffer.hasAttribute(TermToBytesRefAttribute.class)) {
|
if (buffer.hasAttribute(TermToBytesRefAttribute.class)) {
|
||||||
|
@ -424,9 +422,6 @@ public abstract class SolrQueryParserBase {
|
||||||
posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
|
posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
int positionCount = 0;
|
|
||||||
boolean severalTokensAtSamePosition = false;
|
|
||||||
|
|
||||||
boolean hasMoreTokens = false;
|
boolean hasMoreTokens = false;
|
||||||
if (termAtt != null) {
|
if (termAtt != null) {
|
||||||
try {
|
try {
|
||||||
|
@ -445,17 +440,13 @@ public abstract class SolrQueryParserBase {
|
||||||
// ignore
|
// ignore
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
try {
|
} catch (IOException e) {
|
||||||
|
throw new SyntaxError("Error analyzing query text", e);
|
||||||
|
}
|
||||||
|
|
||||||
// rewind the buffer stream
|
// rewind the buffer stream
|
||||||
buffer.reset();
|
buffer.reset();
|
||||||
|
|
||||||
// close original stream - all tokens buffered
|
|
||||||
source.close();
|
|
||||||
}
|
|
||||||
catch (IOException e) {
|
|
||||||
throw new SyntaxError("Cannot close TokenStream analyzing query text", e);
|
|
||||||
}
|
|
||||||
|
|
||||||
BytesRef bytes = termAtt == null ? null : termAtt.getBytesRef();
|
BytesRef bytes = termAtt == null ? null : termAtt.getBytesRef();
|
||||||
|
|
||||||
if (numTokens == 0)
|
if (numTokens == 0)
|
||||||
|
|
|
@ -210,36 +210,22 @@ public class CollationField extends FieldType {
|
||||||
* simple (we already have a threadlocal clone in the reused TS)
|
* simple (we already have a threadlocal clone in the reused TS)
|
||||||
*/
|
*/
|
||||||
private BytesRef analyzeRangePart(String field, String part) {
|
private BytesRef analyzeRangePart(String field, String part) {
|
||||||
TokenStream source;
|
try (TokenStream source = analyzer.tokenStream(field, part)) {
|
||||||
|
|
||||||
try {
|
|
||||||
source = analyzer.tokenStream(field, part);
|
|
||||||
source.reset();
|
source.reset();
|
||||||
} catch (IOException e) {
|
|
||||||
throw new RuntimeException("Unable to initialize TokenStream to analyze range part: " + part, e);
|
|
||||||
}
|
|
||||||
|
|
||||||
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
|
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
|
||||||
BytesRef bytes = termAtt.getBytesRef();
|
BytesRef bytes = termAtt.getBytesRef();
|
||||||
|
|
||||||
// we control the analyzer here: most errors are impossible
|
// we control the analyzer here: most errors are impossible
|
||||||
try {
|
|
||||||
if (!source.incrementToken())
|
if (!source.incrementToken())
|
||||||
throw new IllegalArgumentException("analyzer returned no terms for range part: " + part);
|
throw new IllegalArgumentException("analyzer returned no terms for range part: " + part);
|
||||||
termAtt.fillBytesRef();
|
termAtt.fillBytesRef();
|
||||||
assert !source.incrementToken();
|
assert !source.incrementToken();
|
||||||
} catch (IOException e) {
|
|
||||||
throw new RuntimeException("error analyzing range part: " + part, e);
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
source.end();
|
source.end();
|
||||||
source.close();
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new RuntimeException("Unable to end & close TokenStream after analyzing range part: " + part, e);
|
|
||||||
}
|
|
||||||
|
|
||||||
return BytesRef.deepCopyOf(bytes);
|
return BytesRef.deepCopyOf(bytes);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException("Unable to analyze range part: " + part, e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -138,35 +138,23 @@ public class TextField extends FieldType {
|
||||||
public static BytesRef analyzeMultiTerm(String field, String part, Analyzer analyzerIn) {
|
public static BytesRef analyzeMultiTerm(String field, String part, Analyzer analyzerIn) {
|
||||||
if (part == null || analyzerIn == null) return null;
|
if (part == null || analyzerIn == null) return null;
|
||||||
|
|
||||||
TokenStream source;
|
try (TokenStream source = analyzerIn.tokenStream(field, part)){
|
||||||
try {
|
|
||||||
source = analyzerIn.tokenStream(field, part);
|
|
||||||
source.reset();
|
source.reset();
|
||||||
} catch (IOException e) {
|
|
||||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unable to initialize TokenStream to analyze multiTerm term: " + part, e);
|
|
||||||
}
|
|
||||||
|
|
||||||
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
|
TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class);
|
||||||
BytesRef bytes = termAtt.getBytesRef();
|
BytesRef bytes = termAtt.getBytesRef();
|
||||||
|
|
||||||
try {
|
|
||||||
if (!source.incrementToken())
|
if (!source.incrementToken())
|
||||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned no terms for multiTerm term: " + part);
|
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned no terms for multiTerm term: " + part);
|
||||||
termAtt.fillBytesRef();
|
termAtt.fillBytesRef();
|
||||||
if (source.incrementToken())
|
if (source.incrementToken())
|
||||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned too many terms for multiTerm term: " + part);
|
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"analyzer returned too many terms for multiTerm term: " + part);
|
||||||
|
|
||||||
|
source.end();
|
||||||
|
return BytesRef.deepCopyOf(bytes);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"error analyzing range part: " + part, e);
|
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"error analyzing range part: " + part, e);
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
|
||||||
source.end();
|
|
||||||
source.close();
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new RuntimeException("Unable to end & close TokenStream after analyzing multiTerm term: " + part, e);
|
|
||||||
}
|
|
||||||
|
|
||||||
return BytesRef.deepCopyOf(bytes);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -178,17 +166,16 @@ public class TextField extends FieldType {
|
||||||
// Use the analyzer to get all the tokens, and then build a TermQuery,
|
// Use the analyzer to get all the tokens, and then build a TermQuery,
|
||||||
// PhraseQuery, or nothing based on the term count
|
// PhraseQuery, or nothing based on the term count
|
||||||
|
|
||||||
TokenStream source;
|
CachingTokenFilter buffer = null;
|
||||||
try {
|
|
||||||
source = analyzer.tokenStream(field, queryText);
|
|
||||||
source.reset();
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new RuntimeException("Unable to initialize TokenStream to analyze query text", e);
|
|
||||||
}
|
|
||||||
CachingTokenFilter buffer = new CachingTokenFilter(source);
|
|
||||||
CharTermAttribute termAtt = null;
|
CharTermAttribute termAtt = null;
|
||||||
PositionIncrementAttribute posIncrAtt = null;
|
PositionIncrementAttribute posIncrAtt = null;
|
||||||
int numTokens = 0;
|
int numTokens = 0;
|
||||||
|
int positionCount = 0;
|
||||||
|
boolean severalTokensAtSamePosition = false;
|
||||||
|
|
||||||
|
try (TokenStream source = analyzer.tokenStream(field, queryText)) {
|
||||||
|
source.reset();
|
||||||
|
buffer = new CachingTokenFilter(source);
|
||||||
|
|
||||||
buffer.reset();
|
buffer.reset();
|
||||||
|
|
||||||
|
@ -199,9 +186,6 @@ public class TextField extends FieldType {
|
||||||
posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
|
posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
int positionCount = 0;
|
|
||||||
boolean severalTokensAtSamePosition = false;
|
|
||||||
|
|
||||||
boolean hasMoreTokens = false;
|
boolean hasMoreTokens = false;
|
||||||
if (termAtt != null) {
|
if (termAtt != null) {
|
||||||
try {
|
try {
|
||||||
|
@ -220,17 +204,13 @@ public class TextField extends FieldType {
|
||||||
// ignore
|
// ignore
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
try {
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
|
||||||
// rewind the buffer stream
|
// rewind the buffer stream
|
||||||
buffer.reset();
|
buffer.reset();
|
||||||
|
|
||||||
// close original stream - all tokens buffered
|
|
||||||
source.close();
|
|
||||||
}
|
|
||||||
catch (IOException e) {
|
|
||||||
// ignore
|
|
||||||
}
|
|
||||||
|
|
||||||
if (numTokens == 0)
|
if (numTokens == 0)
|
||||||
return null;
|
return null;
|
||||||
else if (numTokens == 1) {
|
else if (numTokens == 1) {
|
||||||
|
|
|
@ -40,10 +40,10 @@ class SimpleQueryConverter extends SpellingQueryConverter {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Collection<Token> convert(String origQuery) {
|
public Collection<Token> convert(String origQuery) {
|
||||||
try {
|
|
||||||
Collection<Token> result = new HashSet<Token>();
|
Collection<Token> result = new HashSet<Token>();
|
||||||
WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_40);
|
WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_40);
|
||||||
TokenStream ts = analyzer.tokenStream("", origQuery);
|
|
||||||
|
try (TokenStream ts = analyzer.tokenStream("", origQuery)) {
|
||||||
// TODO: support custom attributes
|
// TODO: support custom attributes
|
||||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||||
OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
|
OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
|
||||||
|
@ -65,8 +65,6 @@ class SimpleQueryConverter extends SpellingQueryConverter {
|
||||||
result.add(tok);
|
result.add(tok);
|
||||||
}
|
}
|
||||||
ts.end();
|
ts.end();
|
||||||
ts.close();
|
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
|
|
Loading…
Reference in New Issue