mirror of https://github.com/apache/lucene.git
LUCENE-6884: Analyzer.tokenStream() and Tokenizer.setReader() don't need to throw IOException
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1712731 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
163fb8954e
commit
43cd1639f5
|
@ -144,6 +144,9 @@ API Changes
|
|||
* LUCENE-6870: DisjunctionMaxQuery#add is now deprecated, clauses should all be
|
||||
provided at construction time. (Adrien Grand)
|
||||
|
||||
* LUCENE-6884: Analyzer.tokenStream() and Tokenizer.setReader() are no longer
|
||||
declared as throwing IOException. (Alan Woodward)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-6708: TopFieldCollector does not compute the score several times on the
|
||||
|
|
|
@ -17,6 +17,9 @@ package org.apache.lucene.analysis.standard;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopAnalyzer;
|
||||
|
@ -25,9 +28,6 @@ import org.apache.lucene.analysis.util.CharArraySet;
|
|||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
/**
|
||||
* Filters {@link ClassicTokenizer} with {@link ClassicFilter}, {@link
|
||||
* LowerCaseFilter} and {@link StopFilter}, using a list of
|
||||
|
@ -94,7 +94,7 @@ public final class ClassicAnalyzer extends StopwordAnalyzerBase {
|
|||
tok = new StopFilter(tok, stopwords);
|
||||
return new TokenStreamComponents(src, tok) {
|
||||
@Override
|
||||
protected void setReader(final Reader reader) throws IOException {
|
||||
protected void setReader(final Reader reader) {
|
||||
src.setMaxTokenLength(ClassicAnalyzer.this.maxTokenLength);
|
||||
super.setReader(reader);
|
||||
}
|
||||
|
|
|
@ -17,6 +17,9 @@ package org.apache.lucene.analysis.standard;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopAnalyzer;
|
||||
|
@ -25,9 +28,6 @@ import org.apache.lucene.analysis.util.CharArraySet;
|
|||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
/**
|
||||
* Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
|
||||
* LowerCaseFilter} and {@link StopFilter}, using a list of
|
||||
|
@ -89,7 +89,7 @@ public final class StandardAnalyzer extends StopwordAnalyzerBase {
|
|||
tok = new StopFilter(tok, stopwords);
|
||||
return new TokenStreamComponents(src, tok) {
|
||||
@Override
|
||||
protected void setReader(final Reader reader) throws IOException {
|
||||
protected void setReader(final Reader reader) {
|
||||
src.setMaxTokenLength(StandardAnalyzer.this.maxTokenLength);
|
||||
super.setReader(reader);
|
||||
}
|
||||
|
|
|
@ -17,6 +17,9 @@ package org.apache.lucene.analysis.standard;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopAnalyzer;
|
||||
|
@ -24,9 +27,6 @@ import org.apache.lucene.analysis.core.StopFilter;
|
|||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
/**
|
||||
* Filters {@link org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer}
|
||||
* with {@link org.apache.lucene.analysis.standard.StandardFilter},
|
||||
|
@ -91,7 +91,7 @@ public final class UAX29URLEmailAnalyzer extends StopwordAnalyzerBase {
|
|||
tok = new StopFilter(tok, stopwords);
|
||||
return new TokenStreamComponents(src, tok) {
|
||||
@Override
|
||||
protected void setReader(final Reader reader) throws IOException {
|
||||
protected void setReader(final Reader reader) {
|
||||
src.setMaxTokenLength(UAX29URLEmailAnalyzer.this.maxTokenLength);
|
||||
super.setReader(reader);
|
||||
}
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
package org.apache.lucene.analysis.miscellaneous;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.Collections;
|
||||
import java.util.Map;
|
||||
|
@ -16,7 +15,6 @@ import org.apache.lucene.analysis.core.SimpleAnalyzer;
|
|||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.Rethrow;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -105,11 +103,7 @@ public class TestPerFieldAnalyzerWrapper extends BaseTokenStreamTestCase {
|
|||
|
||||
@Override
|
||||
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
|
||||
try {
|
||||
assertNotSame(specialAnalyzer.tokenStream("special", text), components.getTokenStream());
|
||||
} catch (IOException e) {
|
||||
Rethrow.rethrow(e);
|
||||
}
|
||||
assertNotSame(specialAnalyzer.tokenStream("special", text), components.getTokenStream());
|
||||
TokenFilter filter = new ASCIIFoldingFilter(components.getTokenStream());
|
||||
return new TokenStreamComponents(components.getTokenizer(), filter);
|
||||
}
|
||||
|
|
|
@ -17,16 +17,15 @@ package org.apache.lucene.analysis;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.util.CloseableThreadLocal;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.util.CloseableThreadLocal;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* An Analyzer builds TokenStreams, which analyze text. It thus represents a
|
||||
* policy for extracting index terms from text.
|
||||
|
@ -131,11 +130,10 @@ public abstract class Analyzer implements Closeable {
|
|||
* @param reader the reader the streams source reads from
|
||||
* @return TokenStream for iterating the analyzed content of <code>reader</code>
|
||||
* @throws AlreadyClosedException if the Analyzer is closed.
|
||||
* @throws IOException if an i/o error occurs.
|
||||
* @see #tokenStream(String, String)
|
||||
*/
|
||||
public final TokenStream tokenStream(final String fieldName,
|
||||
final Reader reader) throws IOException {
|
||||
final Reader reader) {
|
||||
TokenStreamComponents components = reuseStrategy.getReusableComponents(this, fieldName);
|
||||
final Reader r = initReader(fieldName, reader);
|
||||
if (components == null) {
|
||||
|
@ -165,10 +163,9 @@ public abstract class Analyzer implements Closeable {
|
|||
* @param text the String the streams source reads from
|
||||
* @return TokenStream for iterating the analyzed content of <code>reader</code>
|
||||
* @throws AlreadyClosedException if the Analyzer is closed.
|
||||
* @throws IOException if an i/o error occurs (may rarely happen for strings).
|
||||
* @see #tokenStream(String, Reader)
|
||||
*/
|
||||
public final TokenStream tokenStream(final String fieldName, final String text) throws IOException {
|
||||
public final TokenStream tokenStream(final String fieldName, final String text) {
|
||||
TokenStreamComponents components = reuseStrategy.getReusableComponents(this, fieldName);
|
||||
@SuppressWarnings("resource") final ReusableStringReader strReader =
|
||||
(components == null || components.reusableStringReader == null) ?
|
||||
|
@ -313,10 +310,8 @@ public abstract class Analyzer implements Closeable {
|
|||
*
|
||||
* @param reader
|
||||
* a reader to reset the source component
|
||||
* @throws IOException
|
||||
* if the component's reset method throws an {@link IOException}
|
||||
*/
|
||||
protected void setReader(final Reader reader) throws IOException {
|
||||
protected void setReader(final Reader reader) {
|
||||
source.setReader(reader);
|
||||
}
|
||||
|
||||
|
|
|
@ -17,12 +17,12 @@ package org.apache.lucene.analysis;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.util.AttributeFactory;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
|
||||
import java.io.Reader;
|
||||
import java.io.IOException;
|
||||
|
||||
/** A Tokenizer is a TokenStream whose input is a Reader.
|
||||
<p>
|
||||
This is an abstract class; subclasses must override {@link #incrementToken()}
|
||||
|
@ -83,7 +83,7 @@ public abstract class Tokenizer extends TokenStream {
|
|||
/** Expert: Set a new reader on the Tokenizer. Typically, an
|
||||
* analyzer (in its tokenStream method) will use
|
||||
* this to re-use a previously created tokenizer. */
|
||||
public final void setReader(Reader input) throws IOException {
|
||||
public final void setReader(Reader input) {
|
||||
if (input == null) {
|
||||
throw new NullPointerException("input must not be null");
|
||||
} else if (this.input != ILLEGAL_STATE_READER) {
|
||||
|
|
|
@ -17,7 +17,6 @@ package org.apache.lucene.document;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.List;
|
||||
|
@ -356,8 +355,6 @@ public class TestDocument extends LuceneTestCase {
|
|||
fail("did not hit expected exc");
|
||||
} catch (IllegalArgumentException iae) {
|
||||
// expected
|
||||
} catch (IOException ioe) {
|
||||
throw new RuntimeException(ioe);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -246,11 +246,7 @@ public class TokenSources {
|
|||
@Deprecated // maintenance reasons LUCENE-6445
|
||||
public static TokenStream getTokenStream(String field, String contents,
|
||||
Analyzer analyzer) {
|
||||
try {
|
||||
return analyzer.tokenStream(field, contents);
|
||||
} catch (IOException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
return analyzer.tokenStream(field, contents);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -284,13 +284,7 @@ public class MemoryIndex {
|
|||
if (analyzer == null)
|
||||
throw new IllegalArgumentException("analyzer must not be null");
|
||||
|
||||
TokenStream stream;
|
||||
try {
|
||||
stream = analyzer.tokenStream(fieldName, text);
|
||||
} catch (IOException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
|
||||
TokenStream stream = analyzer.tokenStream(fieldName, text);
|
||||
addField(fieldName, stream, 1.0f, analyzer.getPositionIncrementGap(fieldName), analyzer.getOffsetGap(fieldName));
|
||||
}
|
||||
|
||||
|
|
|
@ -329,7 +329,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected void setReader(final Reader reader) throws IOException {
|
||||
protected void setReader(final Reader reader) {
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -397,7 +397,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected void setReader(final Reader reader) throws IOException {
|
||||
protected void setReader(final Reader reader) {
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -472,7 +472,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected void setReader(final Reader reader) throws IOException {
|
||||
protected void setReader(final Reader reader) {
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -998,7 +998,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected void setReader(final Reader reader) throws IOException {
|
||||
protected void setReader(final Reader reader) {
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -1068,7 +1068,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected void setReader(final Reader reader) throws IOException {
|
||||
protected void setReader(final Reader reader) {
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -1142,7 +1142,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected void setReader(final Reader reader) throws IOException {
|
||||
protected void setReader(final Reader reader) {
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -1205,7 +1205,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected void setReader(final Reader reader) throws IOException {
|
||||
protected void setReader(final Reader reader) {
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -281,7 +281,7 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected void setReader(final Reader reader) throws IOException {
|
||||
protected void setReader(final Reader reader) {
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -360,7 +360,7 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected void setReader(final Reader reader) throws IOException {
|
||||
protected void setReader(final Reader reader) {
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -431,7 +431,7 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
|||
}
|
||||
|
||||
@Override
|
||||
protected void setReader(final Reader reader) throws IOException {
|
||||
protected void setReader(final Reader reader) {
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -81,11 +81,8 @@ public final class TokenizeTextBuilder implements CommandBuilder {
|
|||
}
|
||||
this.analyzer = fieldType.getIndexAnalyzer();
|
||||
Preconditions.checkNotNull(analyzer);
|
||||
try { // register CharTermAttribute for later (implicit) reuse
|
||||
this.token = analyzer.tokenStream("content", reader).addAttribute(CharTermAttribute.class);
|
||||
} catch (IOException e) {
|
||||
throw new MorphlineCompilationException("Cannot create token stream", config, e);
|
||||
}
|
||||
// register CharTermAttribute for later (implicit) reuse
|
||||
this.token = analyzer.tokenStream("content", reader).addAttribute(CharTermAttribute.class);
|
||||
Preconditions.checkNotNull(token);
|
||||
validateArguments();
|
||||
}
|
||||
|
|
|
@ -17,36 +17,46 @@
|
|||
|
||||
package org.apache.solr.handler;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.*;
|
||||
import org.apache.lucene.analysis.util.CharFilterFactory;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.Attribute;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.AttributeReflector;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.solr.analysis.TokenizerChain;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
import java.util.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.analysis.util.CharFilterFactory;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.Attribute;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.util.AttributeReflector;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.solr.analysis.TokenizerChain;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
|
||||
/**
|
||||
* A base class for all analysis request handlers.
|
||||
|
@ -114,11 +124,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
|||
}
|
||||
|
||||
TokenStream tokenStream = tfac.create();
|
||||
try {
|
||||
((Tokenizer)tokenStream).setReader(tokenizerChain.initReader(null, new StringReader(value)));
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
((Tokenizer)tokenStream).setReader(tokenizerChain.initReader(null, new StringReader(value)));
|
||||
List<AttributeSource> tokens = analyzeTokenStream(tokenStream);
|
||||
|
||||
namedList.add(tokenStream.getClass().getName(), convertTokensToNamedLists(tokens, context));
|
||||
|
|
|
@ -85,9 +85,13 @@ public abstract class AbstractSpatialPrefixTreeFieldType<T extends PrefixTreeStr
|
|||
return new TokenStreamComponents(new KeywordTokenizer()) {
|
||||
private Shape shape = null;
|
||||
|
||||
protected void setReader(final Reader reader) throws IOException {
|
||||
protected void setReader(final Reader reader) {
|
||||
source.setReader(reader);
|
||||
shape = parseShape(IOUtils.toString(reader));
|
||||
try {
|
||||
shape = parseShape(IOUtils.toString(reader));
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public TokenStream getTokenStream() {
|
||||
|
|
Loading…
Reference in New Issue