mirror of https://github.com/apache/lucene.git
LUCENE-3455: Renamed Analyzer.reusableTokenStream to Analyzer.tokenStream
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1176728 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
810f331fa2
commit
67c13bd2fe
|
@ -180,6 +180,10 @@ Changes in backwards compatibility policy
|
|||
overriding .tokenStream() and .reusableTokenStream() (which are now final).
|
||||
(Chris Male)
|
||||
|
||||
* LUCENE-3346: Analyzer.reusableTokenStream() has been renamed to tokenStream()
|
||||
with the old tokenStream() method removed. Consequently it is now mandatory
|
||||
for all Analyzers to support reusability. (Chris Male)
|
||||
|
||||
Changes in Runtime Behavior
|
||||
|
||||
* LUCENE-2846: omitNorms now behaves like omitTermFrequencyAndPositions, if you
|
||||
|
|
|
@ -78,7 +78,7 @@ public class Highlighter
|
|||
public final String getBestFragment(Analyzer analyzer, String fieldName,String text)
|
||||
throws IOException, InvalidTokenOffsetsException
|
||||
{
|
||||
TokenStream tokenStream = analyzer.reusableTokenStream(fieldName, new StringReader(text));
|
||||
TokenStream tokenStream = analyzer.tokenStream(fieldName, new StringReader(text));
|
||||
return getBestFragment(tokenStream, text);
|
||||
}
|
||||
|
||||
|
@ -130,7 +130,7 @@ public class Highlighter
|
|||
int maxNumFragments)
|
||||
throws IOException, InvalidTokenOffsetsException
|
||||
{
|
||||
TokenStream tokenStream = analyzer.reusableTokenStream(fieldName, new StringReader(text));
|
||||
TokenStream tokenStream = analyzer.tokenStream(fieldName, new StringReader(text));
|
||||
return getBestFragments(tokenStream, text, maxNumFragments);
|
||||
}
|
||||
|
||||
|
|
|
@ -288,7 +288,7 @@ public class TokenSources {
|
|||
public static TokenStream getTokenStream(String field, String contents,
|
||||
Analyzer analyzer) {
|
||||
try {
|
||||
return analyzer.reusableTokenStream(field, new StringReader(contents));
|
||||
return analyzer.tokenStream(field, new StringReader(contents));
|
||||
} catch (IOException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
|
|
|
@ -156,7 +156,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
private static String highlightField(Query query, String fieldName, String text)
|
||||
throws IOException, InvalidTokenOffsetsException {
|
||||
TokenStream tokenStream = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true)
|
||||
.reusableTokenStream(fieldName, new StringReader(text));
|
||||
.tokenStream(fieldName, new StringReader(text));
|
||||
// Assuming "<B>", "</B>" used to highlight
|
||||
SimpleHTMLFormatter formatter = new SimpleHTMLFormatter();
|
||||
QueryScorer scorer = new QueryScorer(query, fieldName, FIELD_NAME);
|
||||
|
@ -177,7 +177,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
|
||||
for (int i = 0; i < hits.totalHits; i++) {
|
||||
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
|
||||
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME,
|
||||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME,
|
||||
new StringReader(text));
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(40));
|
||||
|
||||
|
@ -256,7 +256,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
|
||||
for (int i = 0; i < hits.totalHits; i++) {
|
||||
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
|
||||
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
|
||||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
|
||||
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(40));
|
||||
|
||||
|
@ -285,7 +285,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
|
||||
for (int i = 0; i < hits.totalHits; i++) {
|
||||
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
|
||||
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
|
||||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
|
||||
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(40));
|
||||
|
||||
|
@ -314,7 +314,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
|
||||
for (int i = 0; i < hits.totalHits; i++) {
|
||||
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
|
||||
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
|
||||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
|
||||
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(40));
|
||||
|
||||
|
@ -339,7 +339,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
|
||||
for (int i = 0; i < hits.totalHits; i++) {
|
||||
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
|
||||
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
|
||||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
|
||||
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(40));
|
||||
|
||||
|
@ -363,7 +363,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
|
||||
for (int i = 0; i < hits.totalHits; i++) {
|
||||
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
|
||||
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
|
||||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
|
||||
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(40));
|
||||
|
||||
|
@ -388,7 +388,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
|
||||
for (int i = 0; i < hits.totalHits; i++) {
|
||||
String text = searcher.doc(hits.scoreDocs[i].doc).get(NUMERIC_FIELD_NAME);
|
||||
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
|
||||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
|
||||
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(40));
|
||||
|
||||
|
@ -416,7 +416,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
|
||||
for (int i = 0; i < hits.totalHits; i++) {
|
||||
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
|
||||
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
|
||||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
|
||||
|
||||
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
|
||||
"...");
|
||||
|
@ -438,7 +438,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
|
||||
for (int i = 0; i < hits.totalHits; i++) {
|
||||
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
|
||||
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
|
||||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
|
||||
QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
|
||||
Highlighter highlighter = new Highlighter(this, scorer);
|
||||
|
||||
|
@ -468,7 +468,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
|
||||
for (int i = 0; i < hits.totalHits; i++) {
|
||||
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
|
||||
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
|
||||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
|
||||
|
||||
highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 5));
|
||||
|
||||
|
@ -491,7 +491,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
|
||||
for (int i = 0; i < hits.totalHits; i++) {
|
||||
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
|
||||
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
|
||||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
|
||||
|
||||
highlighter.setTextFragmenter(new SimpleSpanFragmenter(scorer, 20));
|
||||
|
||||
|
@ -522,7 +522,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
|
||||
for (int i = 0; i < hits.totalHits; i++) {
|
||||
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
|
||||
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME,new StringReader(text));
|
||||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
|
||||
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(40));
|
||||
|
||||
|
@ -593,7 +593,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
int maxNumFragmentsRequired = 2;
|
||||
for (int i = 0; i < hits.totalHits; i++) {
|
||||
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
|
||||
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
|
||||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
|
||||
|
||||
String result = highlighter.getBestFragments(tokenStream, text, maxNumFragmentsRequired,
|
||||
"...");
|
||||
|
@ -766,7 +766,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
int maxNumFragmentsRequired = 2;
|
||||
String fragmentSeparator = "...";
|
||||
QueryScorer scorer = new QueryScorer(query, HighlighterTest.FIELD_NAME);
|
||||
TokenStream tokenStream = analyzer.reusableTokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
|
||||
TokenStream tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
|
||||
|
||||
Highlighter highlighter = new Highlighter(this, scorer);
|
||||
|
||||
|
@ -790,7 +790,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
int maxNumFragmentsRequired = 2;
|
||||
String fragmentSeparator = "...";
|
||||
QueryScorer scorer = new QueryScorer(query, null);
|
||||
TokenStream tokenStream = analyzer.reusableTokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
|
||||
TokenStream tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
|
||||
|
||||
Highlighter highlighter = new Highlighter(this, scorer);
|
||||
|
||||
|
@ -814,7 +814,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
int maxNumFragmentsRequired = 2;
|
||||
String fragmentSeparator = "...";
|
||||
QueryScorer scorer = new QueryScorer(query, "random_field", HighlighterTest.FIELD_NAME);
|
||||
TokenStream tokenStream = analyzer.reusableTokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
|
||||
TokenStream tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
|
||||
|
||||
Highlighter highlighter = new Highlighter(this, scorer);
|
||||
|
||||
|
@ -985,7 +985,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
numHighlights = 0;
|
||||
for (int i = 0; i < hits.totalHits; i++) {
|
||||
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
|
||||
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
|
||||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
|
||||
|
||||
Highlighter highlighter = getHighlighter(query, FIELD_NAME,
|
||||
HighlighterTest.this);
|
||||
|
@ -1046,7 +1046,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
Highlighter highlighter = getHighlighter(wTerms, HighlighterTest.this);// new
|
||||
// Highlighter(new
|
||||
// QueryTermScorer(wTerms));
|
||||
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(texts[0]));
|
||||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(texts[0]));
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(2));
|
||||
|
||||
String result = highlighter.getBestFragment(tokenStream, texts[0]).trim();
|
||||
|
@ -1055,7 +1055,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
|
||||
// readjust weights
|
||||
wTerms[1].setWeight(50f);
|
||||
tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(texts[0]));
|
||||
tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(texts[0]));
|
||||
highlighter = getHighlighter(wTerms, HighlighterTest.this);
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(2));
|
||||
|
||||
|
@ -1091,7 +1091,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
Highlighter highlighter = getHighlighter(query, null, HighlighterTest.this);
|
||||
|
||||
// Get 3 best fragments and seperate with a "..."
|
||||
TokenStream tokenStream = analyzer.reusableTokenStream(null, new StringReader(s));
|
||||
TokenStream tokenStream = analyzer.tokenStream(null, new StringReader(s));
|
||||
|
||||
String result = highlighter.getBestFragments(tokenStream, s, 3, "...");
|
||||
String expectedResult = "<B>football</B>-<B>soccer</B> in the euro 2004 <B>footie</B> competition";
|
||||
|
@ -1116,7 +1116,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
|
||||
for (int i = 0; i < hits.totalHits; i++) {
|
||||
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
|
||||
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
|
||||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
|
||||
Highlighter highlighter = getHighlighter(query, FIELD_NAME,
|
||||
HighlighterTest.this);
|
||||
String result = highlighter.getBestFragment(tokenStream, text);
|
||||
|
@ -1139,7 +1139,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
|
||||
for (int i = 0; i < hits.totalHits; i++) {
|
||||
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
|
||||
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
|
||||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
|
||||
|
||||
Highlighter highlighter = getHighlighter(query, FIELD_NAME,
|
||||
HighlighterTest.this);// new Highlighter(this, new
|
||||
|
@ -1147,7 +1147,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
highlighter.setTextFragmenter(new SimpleFragmenter(20));
|
||||
String stringResults[] = highlighter.getBestFragments(tokenStream, text, 10);
|
||||
|
||||
tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
|
||||
tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
|
||||
TextFragment fragmentResults[] = highlighter.getBestTextFragments(tokenStream, text,
|
||||
true, 10);
|
||||
|
||||
|
@ -1177,7 +1177,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
public void run() throws Exception {
|
||||
numHighlights = 0;
|
||||
doSearching(new TermQuery(new Term(FIELD_NAME, "meat")));
|
||||
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(texts[0]));
|
||||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(texts[0]));
|
||||
Highlighter highlighter = getHighlighter(query, FIELD_NAME,
|
||||
HighlighterTest.this);// new Highlighter(this, new
|
||||
// QueryTermScorer(query));
|
||||
|
@ -1251,7 +1251,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
String text = "this is a text with searchterm in it";
|
||||
SimpleHTMLFormatter fm = new SimpleHTMLFormatter();
|
||||
TokenStream tokenStream = new MockAnalyzer(random, MockTokenizer.SIMPLE, true, stopWords, true)
|
||||
.reusableTokenStream("text", new StringReader(text));
|
||||
.tokenStream("text", new StringReader(text));
|
||||
Highlighter hg = getHighlighter(query, "text", fm);
|
||||
hg.setTextFragmenter(new NullFragmenter());
|
||||
hg.setMaxDocCharsToAnalyze(36);
|
||||
|
@ -1294,7 +1294,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
|
||||
for (int i = 0; i < hits.totalHits; i++) {
|
||||
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
|
||||
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
|
||||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
|
||||
Highlighter highlighter = getHighlighter(query, FIELD_NAME, HighlighterTest.this, false);
|
||||
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(40));
|
||||
|
@ -1323,7 +1323,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
doSearching(new TermQuery(new Term(FIELD_NAME, "aninvalidquerywhichshouldyieldnoresults")));
|
||||
|
||||
for (String text : texts) {
|
||||
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
|
||||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
|
||||
Highlighter highlighter = getHighlighter(query, FIELD_NAME,
|
||||
HighlighterTest.this);
|
||||
String result = highlighter.getBestFragment(tokenStream, text);
|
||||
|
@ -1363,7 +1363,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
}
|
||||
});
|
||||
highlighter.setTextFragmenter(new SimpleFragmenter(2000));
|
||||
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(rawDocContent));
|
||||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(rawDocContent));
|
||||
|
||||
String encodedSnippet = highlighter.getBestFragments(tokenStream, rawDocContent, 1, "");
|
||||
// An ugly bit of XML creation:
|
||||
|
@ -1714,7 +1714,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
final int expectedHighlights) throws Exception {
|
||||
for (int i = 0; i < hits.totalHits; i++) {
|
||||
String text = searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME);
|
||||
TokenStream tokenStream = analyzer.reusableTokenStream(FIELD_NAME, new StringReader(text));
|
||||
TokenStream tokenStream = analyzer.tokenStream(FIELD_NAME, new StringReader(text));
|
||||
QueryScorer scorer = new QueryScorer(query, FIELD_NAME);
|
||||
Highlighter highlighter = new Highlighter(this, scorer);
|
||||
|
||||
|
@ -1961,7 +1961,7 @@ final class SynonymTokenizer extends TokenStream {
|
|||
int maxNumFragmentsRequired = 2;
|
||||
String fragmentSeparator = "...";
|
||||
Scorer scorer = null;
|
||||
TokenStream tokenStream = analyzer.reusableTokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
|
||||
TokenStream tokenStream = analyzer.tokenStream(HighlighterTest.FIELD_NAME, new StringReader(text));
|
||||
if (mode == QUERY) {
|
||||
scorer = new QueryScorer(query);
|
||||
} else if (mode == QUERY_TERM) {
|
||||
|
|
|
@ -170,7 +170,7 @@ public abstract class AbstractTestCase extends LuceneTestCase {
|
|||
protected List<BytesRef> analyze(String text, String field, Analyzer analyzer) throws IOException {
|
||||
List<BytesRef> bytesRefs = new ArrayList<BytesRef>();
|
||||
|
||||
TokenStream tokenStream = analyzer.reusableTokenStream(field, new StringReader(text));
|
||||
TokenStream tokenStream = analyzer.tokenStream(field, new StringReader(text));
|
||||
TermToBytesRefAttribute termAttribute = tokenStream.getAttribute(TermToBytesRefAttribute.class);
|
||||
|
||||
BytesRef bytesRef = termAttribute.getBytesRef();
|
||||
|
|
|
@ -41,7 +41,6 @@ import org.apache.lucene.index.FieldInvertState;
|
|||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.FieldsEnum;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader.ReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.OrdTermState;
|
||||
import org.apache.lucene.index.StoredFieldVisitor;
|
||||
|
@ -261,7 +260,7 @@ public class MemoryIndex {
|
|||
|
||||
TokenStream stream;
|
||||
try {
|
||||
stream = analyzer.reusableTokenStream(fieldName, new StringReader(text));
|
||||
stream = analyzer.tokenStream(fieldName, new StringReader(text));
|
||||
} catch (IOException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
|
|
|
@ -193,7 +193,7 @@ public class FuzzyLikeThisQuery extends Query
|
|||
private void addTerms(IndexReader reader,FieldVals f) throws IOException
|
||||
{
|
||||
if(f.queryString==null) return;
|
||||
TokenStream ts=analyzer.reusableTokenStream(f.fieldName,new StringReader(f.queryString));
|
||||
TokenStream ts=analyzer.tokenStream(f.fieldName, new StringReader(f.queryString));
|
||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||
|
||||
int corpusNumDocs=reader.numDocs();
|
||||
|
|
|
@ -30,14 +30,9 @@ import java.util.Map;
|
|||
* An Analyzer builds TokenStreams, which analyze text. It thus represents a
|
||||
* policy for extracting index terms from text.
|
||||
* <p>
|
||||
* To prevent consistency problems, this class does not allow subclasses to
|
||||
* extend {@link #reusableTokenStream(String, Reader)} or
|
||||
* {@link #tokenStream(String, Reader)} directly. Instead, subclasses must
|
||||
* implement {@link #createComponents(String, Reader)}.
|
||||
* </p>
|
||||
* <p>The {@code Analyzer}-API in Lucene is based on the decorator pattern.
|
||||
* Therefore all non-abstract subclasses must be final! This is checked
|
||||
* when Java assertions are enabled.
|
||||
* In order to define what analysis is done, subclasses must define their
|
||||
* {@link TokenStreamComponents} in {@link #createComponents(String, Reader)}.
|
||||
* The components are then reused in each call to {@link #tokenStream(String, Reader)}.
|
||||
*/
|
||||
public abstract class Analyzer {
|
||||
|
||||
|
@ -80,8 +75,8 @@ public abstract class Analyzer {
|
|||
* @param fieldName the name of the field the created TokenStream is used for
|
||||
* @param reader the reader the streams source reads from
|
||||
*/
|
||||
public final TokenStream reusableTokenStream(final String fieldName,
|
||||
final Reader reader) throws IOException {
|
||||
public final TokenStream tokenStream(final String fieldName,
|
||||
final Reader reader) throws IOException {
|
||||
TokenStreamComponents components = reuseStrategy.getReusableComponents(fieldName);
|
||||
final Reader r = initReader(reader);
|
||||
if (components == null) {
|
||||
|
@ -92,25 +87,6 @@ public abstract class Analyzer {
|
|||
}
|
||||
return components.getTokenStream();
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a TokenStream which tokenizes all the text in the provided
|
||||
* Reader.
|
||||
* <p>
|
||||
* This method uses {@link #createComponents(String, Reader)} to obtain an
|
||||
* instance of {@link TokenStreamComponents} and returns the sink of the
|
||||
* components. Each calls to this method will create a new instance of
|
||||
* {@link TokenStreamComponents}. Created {@link TokenStream} instances are
|
||||
* never reused.
|
||||
* </p>
|
||||
*
|
||||
* @param fieldName the name of the field the created TokenStream is used for
|
||||
* @param reader the reader the streams source reads from
|
||||
*/
|
||||
public final TokenStream tokenStream(final String fieldName,
|
||||
final Reader reader) {
|
||||
return createComponents(fieldName, initReader(reader)).getTokenStream();
|
||||
}
|
||||
|
||||
/**
|
||||
* Override this if you want to add a CharFilter chain.
|
||||
|
@ -166,7 +142,7 @@ public abstract class Analyzer {
|
|||
* instance of {@link TokenFilter} which also serves as the
|
||||
* {@link TokenStream} returned by
|
||||
* {@link Analyzer#tokenStream(String, Reader)} and
|
||||
* {@link Analyzer#reusableTokenStream(String, Reader)}.
|
||||
* {@link Analyzer#tokenStream(String, Reader)}.
|
||||
*/
|
||||
public static class TokenStreamComponents {
|
||||
protected final Tokenizer source;
|
||||
|
|
|
@ -167,11 +167,12 @@ public abstract class TokenStream extends AttributeSource implements Closeable {
|
|||
}
|
||||
|
||||
/**
|
||||
* Resets this stream to the beginning. This is an optional operation, so
|
||||
* subclasses may or may not implement this method. {@link #reset()} is not needed for
|
||||
* the standard indexing process. However, if the tokens of a
|
||||
* <code>TokenStream</code> are intended to be consumed more than once, it is
|
||||
* necessary to implement {@link #reset()}. Note that if your TokenStream
|
||||
* This method is called by a consumer before it begins consumption using
|
||||
* {@link #incrementToken()}.
|
||||
* <p/>
|
||||
* Resets this stream to the beginning. As all TokenStreams must be reusable,
|
||||
* any implementations which have state that needs to be reset between usages
|
||||
* of the TokenStream, must implement this method. Note that if your TokenStream
|
||||
* caches tokens and feeds them back again after a reset, it is imperative
|
||||
* that you clone the tokens when you store them away (on the first pass) as
|
||||
* well as when you return them (on future passes after {@link #reset()}).
|
||||
|
|
|
@ -86,7 +86,7 @@ public abstract class Tokenizer extends TokenStream {
|
|||
}
|
||||
|
||||
/** Expert: Reset the tokenizer to a new reader. Typically, an
|
||||
* analyzer (in its reusableTokenStream method) will use
|
||||
* analyzer (in its tokenStream method) will use
|
||||
* this to re-use a previously created tokenizer. */
|
||||
public void reset(Reader input) throws IOException {
|
||||
this.input = input;
|
||||
|
|
|
@ -350,9 +350,9 @@ public class Field implements IndexableField {
|
|||
if (tokenStream != null) {
|
||||
return tokenStream;
|
||||
} else if (readerValue() != null) {
|
||||
return analyzer.reusableTokenStream(name(), readerValue());
|
||||
return analyzer.tokenStream(name(), readerValue());
|
||||
} else if (stringValue() != null) {
|
||||
return analyzer.reusableTokenStream(name(), new StringReader(stringValue()));
|
||||
return analyzer.tokenStream(name(), new StringReader(stringValue()));
|
||||
}
|
||||
|
||||
throw new IllegalArgumentException("Field must have either TokenStream, String or Reader value");
|
||||
|
|
|
@ -57,7 +57,7 @@ public class QueryTermVector implements TermFreqVector {
|
|||
{
|
||||
TokenStream stream;
|
||||
try {
|
||||
stream = analyzer.reusableTokenStream("", new StringReader(queryString));
|
||||
stream = analyzer.tokenStream("", new StringReader(queryString));
|
||||
} catch (IOException e1) {
|
||||
stream = null;
|
||||
}
|
||||
|
|
|
@ -183,7 +183,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException {
|
||||
assertTokenStreamContents(a.reusableTokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements, input.length());
|
||||
assertTokenStreamContents(a.tokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements, input.length());
|
||||
}
|
||||
|
||||
public static void assertAnalyzesTo(Analyzer a, String input, String[] output) throws IOException {
|
||||
|
@ -208,7 +208,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
|
|||
|
||||
|
||||
public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[]) throws IOException {
|
||||
assertTokenStreamContents(a.reusableTokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements, input.length());
|
||||
assertTokenStreamContents(a.tokenStream("dummy", new StringReader(input)), output, startOffsets, endOffsets, types, posIncrements, input.length());
|
||||
}
|
||||
|
||||
public static void assertAnalyzesToReuse(Analyzer a, String input, String[] output) throws IOException {
|
||||
|
@ -265,7 +265,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
|
|||
System.out.println("NOTE: BaseTokenStreamTestCase: get first token stream now text=" + text);
|
||||
}
|
||||
|
||||
TokenStream ts = a.reusableTokenStream("dummy", new StringReader(text));
|
||||
TokenStream ts = a.tokenStream("dummy", new StringReader(text));
|
||||
assertTrue("has no CharTermAttribute", ts.hasAttribute(CharTermAttribute.class));
|
||||
CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class);
|
||||
OffsetAttribute offsetAtt = ts.hasAttribute(OffsetAttribute.class) ? ts.getAttribute(OffsetAttribute.class) : null;
|
||||
|
|
|
@ -207,13 +207,13 @@ public abstract class CollationTestBase extends LuceneTestCase {
|
|||
doc.add(new Field("tracer", customType, sortData[i][0]));
|
||||
doc.add(new TextField("contents", sortData[i][1]));
|
||||
if (sortData[i][2] != null)
|
||||
doc.add(new TextField("US", usAnalyzer.reusableTokenStream("US", new StringReader(sortData[i][2]))));
|
||||
doc.add(new TextField("US", usAnalyzer.tokenStream("US", new StringReader(sortData[i][2]))));
|
||||
if (sortData[i][3] != null)
|
||||
doc.add(new TextField("France", franceAnalyzer.reusableTokenStream("France", new StringReader(sortData[i][3]))));
|
||||
doc.add(new TextField("France", franceAnalyzer.tokenStream("France", new StringReader(sortData[i][3]))));
|
||||
if (sortData[i][4] != null)
|
||||
doc.add(new TextField("Sweden", swedenAnalyzer.reusableTokenStream("Sweden", new StringReader(sortData[i][4]))));
|
||||
doc.add(new TextField("Sweden", swedenAnalyzer.tokenStream("Sweden", new StringReader(sortData[i][4]))));
|
||||
if (sortData[i][5] != null)
|
||||
doc.add(new TextField("Denmark", denmarkAnalyzer.reusableTokenStream("Denmark", new StringReader(sortData[i][5]))));
|
||||
doc.add(new TextField("Denmark", denmarkAnalyzer.tokenStream("Denmark", new StringReader(sortData[i][5]))));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
writer.optimize();
|
||||
|
@ -265,7 +265,7 @@ public abstract class CollationTestBase extends LuceneTestCase {
|
|||
|
||||
for (int i = 0; i < numTestPoints; i++) {
|
||||
String term = _TestUtil.randomSimpleString(random);
|
||||
TokenStream ts = analyzer.reusableTokenStream("fake", new StringReader(term));
|
||||
TokenStream ts = analyzer.tokenStream("fake", new StringReader(term));
|
||||
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||
BytesRef bytes = termAtt.getBytesRef();
|
||||
ts.reset();
|
||||
|
@ -284,7 +284,7 @@ public abstract class CollationTestBase extends LuceneTestCase {
|
|||
for (Map.Entry<String,BytesRef> mapping : map.entrySet()) {
|
||||
String term = mapping.getKey();
|
||||
BytesRef expected = mapping.getValue();
|
||||
TokenStream ts = analyzer.reusableTokenStream("fake", new StringReader(term));
|
||||
TokenStream ts = analyzer.tokenStream("fake", new StringReader(term));
|
||||
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||
BytesRef bytes = termAtt.getBytesRef();
|
||||
ts.reset();
|
||||
|
|
|
@ -101,7 +101,7 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase {
|
|||
String testString = "t";
|
||||
|
||||
Analyzer analyzer = new MockAnalyzer(random);
|
||||
TokenStream stream = analyzer.reusableTokenStream("dummy", new StringReader(testString));
|
||||
TokenStream stream = analyzer.tokenStream("dummy", new StringReader(testString));
|
||||
stream.reset();
|
||||
while (stream.incrementToken()) {
|
||||
// consume
|
||||
|
|
|
@ -171,8 +171,8 @@ public class TestIndexableField extends LuceneTestCase {
|
|||
if (numeric()) {
|
||||
return new NumericField(name()).setIntValue(counter).tokenStream(analyzer);
|
||||
}
|
||||
return readerValue() != null ? analyzer.reusableTokenStream(name(), readerValue()) :
|
||||
analyzer.reusableTokenStream(name(), new StringReader(stringValue()));
|
||||
return readerValue() != null ? analyzer.tokenStream(name(), readerValue()) :
|
||||
analyzer.tokenStream(name(), new StringReader(stringValue()));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -47,7 +47,7 @@ public class TestLongPostings extends LuceneTestCase {
|
|||
if (other != null && s.equals(other)) {
|
||||
continue;
|
||||
}
|
||||
final TokenStream ts = a.reusableTokenStream("foo", new StringReader(s));
|
||||
final TokenStream ts = a.tokenStream("foo", new StringReader(s));
|
||||
final TermToBytesRefAttribute termAtt = ts.getAttribute(TermToBytesRefAttribute.class);
|
||||
final BytesRef termBytes = termAtt.getBytesRef();
|
||||
ts.reset();
|
||||
|
|
|
@ -136,7 +136,7 @@ public class TestTermVectorsWriter extends LuceneTestCase {
|
|||
Analyzer analyzer = new MockAnalyzer(random);
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer));
|
||||
Document doc = new Document();
|
||||
TokenStream stream = analyzer.reusableTokenStream("field", new StringReader("abcd "));
|
||||
TokenStream stream = analyzer.tokenStream("field", new StringReader("abcd "));
|
||||
stream.reset(); // TODO: wierd to reset before wrapping with CachingTokenFilter... correct?
|
||||
stream = new CachingTokenFilter(stream);
|
||||
FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
|
||||
|
|
|
@ -636,7 +636,7 @@ public class TestPhraseQuery extends LuceneTestCase {
|
|||
break;
|
||||
}
|
||||
}
|
||||
TokenStream ts = analyzer.reusableTokenStream("ignore", new StringReader(term));
|
||||
TokenStream ts = analyzer.tokenStream("ignore", new StringReader(term));
|
||||
CharTermAttribute termAttr = ts.addAttribute(CharTermAttribute.class);
|
||||
ts.reset();
|
||||
while(ts.incrementToken()) {
|
||||
|
|
|
@ -101,7 +101,7 @@ public final class ClassicAnalyzer extends StopwordAnalyzerBase {
|
|||
* Set maximum allowed token length. If a token is seen
|
||||
* that exceeds this length then it is discarded. This
|
||||
* setting only takes effect the next time tokenStream or
|
||||
* reusableTokenStream is called.
|
||||
* tokenStream is called.
|
||||
*/
|
||||
public void setMaxTokenLength(int length) {
|
||||
maxTokenLength = length;
|
||||
|
|
|
@ -102,7 +102,7 @@ public final class StandardAnalyzer extends StopwordAnalyzerBase {
|
|||
* Set maximum allowed token length. If a token is seen
|
||||
* that exceeds this length then it is discarded. This
|
||||
* setting only takes effect the next time tokenStream or
|
||||
* reusableTokenStream is called.
|
||||
* tokenStream is called.
|
||||
*/
|
||||
public void setMaxTokenLength(int length) {
|
||||
maxTokenLength = length;
|
||||
|
|
|
@ -115,7 +115,7 @@ public class SynonymMap {
|
|||
* separates by {@link SynonymMap#WORD_SEPARATOR}.
|
||||
* reuse and its chars must not be null. */
|
||||
public static CharsRef analyze(Analyzer analyzer, String text, CharsRef reuse) throws IOException {
|
||||
TokenStream ts = analyzer.reusableTokenStream("", new StringReader(text));
|
||||
TokenStream ts = analyzer.tokenStream("", new StringReader(text));
|
||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||
PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
|
||||
ts.reset();
|
||||
|
|
|
@ -98,7 +98,7 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
|
|||
|
||||
// LUCENE-1441
|
||||
public void testOffsets() throws Exception {
|
||||
TokenStream stream = new KeywordAnalyzer().reusableTokenStream("field", new StringReader("abcd"));
|
||||
TokenStream stream = new KeywordAnalyzer().tokenStream("field", new StringReader("abcd"));
|
||||
OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
|
||||
stream.reset();
|
||||
assertTrue(stream.incrementToken());
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.analysis.core;
|
|||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.core.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
@ -48,7 +47,7 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
|
|||
public void testDefaults() throws IOException {
|
||||
assertTrue(stop != null);
|
||||
StringReader reader = new StringReader("This is a test of the english stop analyzer");
|
||||
TokenStream stream = stop.reusableTokenStream("test", reader);
|
||||
TokenStream stream = stop.tokenStream("test", reader);
|
||||
assertTrue(stream != null);
|
||||
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
|
||||
stream.reset();
|
||||
|
@ -65,7 +64,7 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
|
|||
stopWordsSet.add("analyzer");
|
||||
StopAnalyzer newStop = new StopAnalyzer(Version.LUCENE_40, stopWordsSet);
|
||||
StringReader reader = new StringReader("This is a good test of the english stop analyzer");
|
||||
TokenStream stream = newStop.reusableTokenStream("test", reader);
|
||||
TokenStream stream = newStop.tokenStream("test", reader);
|
||||
assertNotNull(stream);
|
||||
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
|
||||
|
||||
|
@ -83,7 +82,7 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
|
|||
StopAnalyzer newStop = new StopAnalyzer(TEST_VERSION_CURRENT, stopWordsSet);
|
||||
StringReader reader = new StringReader("This is a good test of the english stop analyzer with positions");
|
||||
int expectedIncr[] = { 1, 1, 1, 3, 1, 1, 1, 2, 1};
|
||||
TokenStream stream = newStop.reusableTokenStream("test", reader);
|
||||
TokenStream stream = newStop.tokenStream("test", reader);
|
||||
assertNotNull(stream);
|
||||
int i = 0;
|
||||
CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
|
||||
|
|
|
@ -203,7 +203,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
/**
|
||||
* Basic test ensuring that reusableTokenStream works correctly.
|
||||
* Basic test ensuring that tokenStream works correctly.
|
||||
*/
|
||||
public void testReusableTokenStream() throws Exception {
|
||||
Analyzer a = new PersianAnalyzer(TEST_VERSION_CURRENT);
|
||||
|
|
|
@ -124,12 +124,12 @@ public class PatternAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
assertAnalyzesTo(analyzer, document, expected);
|
||||
|
||||
// analysis with a "FastStringReader"
|
||||
TokenStream ts = analyzer.reusableTokenStream("dummy",
|
||||
TokenStream ts = analyzer.tokenStream("dummy",
|
||||
new PatternAnalyzer.FastStringReader(document));
|
||||
assertTokenStreamContents(ts, expected);
|
||||
|
||||
// analysis of a String, uses PatternAnalyzer.tokenStream(String, String)
|
||||
TokenStream ts2 = analyzer.reusableTokenStream("dummy", new StringReader(document));
|
||||
TokenStream ts2 = analyzer.tokenStream("dummy", new StringReader(document));
|
||||
assertTokenStreamContents(ts2, expected);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,7 +23,6 @@ import java.io.StringReader;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -39,12 +38,12 @@ public class TestLimitTokenCountAnalyzer extends BaseTokenStreamTestCase {
|
|||
public void testLimitTokenCountAnalyzer() throws IOException {
|
||||
Analyzer a = new LimitTokenCountAnalyzer(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), 2);
|
||||
// dont use assertAnalyzesTo here, as the end offset is not the end of the string!
|
||||
assertTokenStreamContents(a.reusableTokenStream("dummy", new StringReader("1 2 3 4 5")), new String[] { "1", "2" }, new int[] { 0, 3 }, new int[] { 1, 4 }, 4);
|
||||
assertTokenStreamContents(a.reusableTokenStream("dummy", new StringReader("1 2 3 4 5")), new String[] { "1", "2" }, new int[] { 0, 2 }, new int[] { 1, 3 }, 3);
|
||||
assertTokenStreamContents(a.tokenStream("dummy", new StringReader("1 2 3 4 5")), new String[] { "1", "2" }, new int[] { 0, 3 }, new int[] { 1, 4 }, 4);
|
||||
assertTokenStreamContents(a.tokenStream("dummy", new StringReader("1 2 3 4 5")), new String[] { "1", "2" }, new int[] { 0, 2 }, new int[] { 1, 3 }, 3);
|
||||
|
||||
a = new LimitTokenCountAnalyzer(new StandardAnalyzer(TEST_VERSION_CURRENT), 2);
|
||||
// dont use assertAnalyzesTo here, as the end offset is not the end of the string!
|
||||
assertTokenStreamContents(a.reusableTokenStream("dummy", new StringReader("1 2 3 4 5")), new String[] { "1", "2" }, new int[] { 0, 2 }, new int[] { 1, 3 }, 3);
|
||||
assertTokenStreamContents(a.tokenStream("dummy", new StringReader("1 2 3 4 5")), new String[] { "1", "2" }, new int[] { 0, 2 }, new int[] { 1, 3 }, 3);
|
||||
}
|
||||
|
||||
public void testLimitTokenCountIndexWriter() throws IOException {
|
||||
|
|
|
@ -36,8 +36,8 @@ public class TestPerFieldAnalzyerWrapper extends BaseTokenStreamTestCase {
|
|||
PerFieldAnalyzerWrapper analyzer =
|
||||
new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(TEST_VERSION_CURRENT), analyzerPerField);
|
||||
|
||||
TokenStream tokenStream = analyzer.reusableTokenStream("field",
|
||||
new StringReader(text));
|
||||
TokenStream tokenStream = analyzer.tokenStream("field",
|
||||
new StringReader(text));
|
||||
CharTermAttribute termAtt = tokenStream.getAttribute(CharTermAttribute.class);
|
||||
tokenStream.reset();
|
||||
|
||||
|
@ -46,8 +46,8 @@ public class TestPerFieldAnalzyerWrapper extends BaseTokenStreamTestCase {
|
|||
"Qwerty",
|
||||
termAtt.toString());
|
||||
|
||||
tokenStream = analyzer.reusableTokenStream("special",
|
||||
new StringReader(text));
|
||||
tokenStream = analyzer.tokenStream("special",
|
||||
new StringReader(text));
|
||||
termAtt = tokenStream.getAttribute(CharTermAttribute.class);
|
||||
tokenStream.reset();
|
||||
|
||||
|
|
|
@ -65,44 +65,44 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
public void testNoStopwords() throws Exception {
|
||||
// Note: an empty list of fields passed in
|
||||
protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Collections.EMPTY_LIST, 1);
|
||||
TokenStream protectedTokenStream = protectedAnalyzer.reusableTokenStream("variedField", new StringReader("quick"));
|
||||
TokenStream protectedTokenStream = protectedAnalyzer.tokenStream("variedField", new StringReader("quick"));
|
||||
assertTokenStreamContents(protectedTokenStream, new String[]{"quick"});
|
||||
|
||||
protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("boring"));
|
||||
protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", new StringReader("boring"));
|
||||
assertTokenStreamContents(protectedTokenStream, new String[]{"boring"});
|
||||
}
|
||||
|
||||
public void testDefaultStopwordsAllFields() throws Exception {
|
||||
protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader);
|
||||
TokenStream protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("boring"));
|
||||
TokenStream protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", new StringReader("boring"));
|
||||
assertTokenStreamContents(protectedTokenStream, new String[0]); // Default stop word filtering will remove boring
|
||||
}
|
||||
|
||||
public void testStopwordsAllFieldsMaxPercentDocs() throws Exception {
|
||||
protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, 1f / 2f);
|
||||
|
||||
TokenStream protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("boring"));
|
||||
TokenStream protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", new StringReader("boring"));
|
||||
// A filter on terms in > one half of docs remove boring
|
||||
assertTokenStreamContents(protectedTokenStream, new String[0]);
|
||||
|
||||
protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("vaguelyboring"));
|
||||
protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", new StringReader("vaguelyboring"));
|
||||
// A filter on terms in > half of docs should not remove vaguelyBoring
|
||||
assertTokenStreamContents(protectedTokenStream, new String[]{"vaguelyboring"});
|
||||
|
||||
protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, 1f / 4f);
|
||||
protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("vaguelyboring"));
|
||||
protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", new StringReader("vaguelyboring"));
|
||||
// A filter on terms in > quarter of docs should remove vaguelyBoring
|
||||
assertTokenStreamContents(protectedTokenStream, new String[0]);
|
||||
}
|
||||
|
||||
public void testStopwordsPerFieldMaxPercentDocs() throws Exception {
|
||||
protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.asList("variedField"), 1f / 2f);
|
||||
TokenStream protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("boring"));
|
||||
TokenStream protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", new StringReader("boring"));
|
||||
// A filter on one Field should not affect queries on another
|
||||
assertTokenStreamContents(protectedTokenStream, new String[]{"boring"});
|
||||
|
||||
protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.asList("variedField", "repetitiveField"), 1f / 2f);
|
||||
protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("boring"));
|
||||
protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", new StringReader("boring"));
|
||||
// A filter on the right Field should affect queries on it
|
||||
assertTokenStreamContents(protectedTokenStream, new String[0]);
|
||||
}
|
||||
|
@ -120,11 +120,11 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
public void testNoFieldNamePollution() throws Exception {
|
||||
protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Arrays.asList("repetitiveField"), 10);
|
||||
|
||||
TokenStream protectedTokenStream = protectedAnalyzer.reusableTokenStream("repetitiveField", new StringReader("boring"));
|
||||
TokenStream protectedTokenStream = protectedAnalyzer.tokenStream("repetitiveField", new StringReader("boring"));
|
||||
// Check filter set up OK
|
||||
assertTokenStreamContents(protectedTokenStream, new String[0]);
|
||||
|
||||
protectedTokenStream = protectedAnalyzer.reusableTokenStream("variedField", new StringReader("boring"));
|
||||
protectedTokenStream = protectedAnalyzer.tokenStream("variedField", new StringReader("boring"));
|
||||
// Filter should not prevent stopwords in one field being used in another
|
||||
assertTokenStreamContents(protectedTokenStream, new String[]{"boring"});
|
||||
}
|
||||
|
@ -133,7 +133,7 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(
|
||||
TEST_VERSION_CURRENT,
|
||||
new MockAnalyzer(random, MockTokenizer.WHITESPACE, false), reader, 10);
|
||||
TokenStream ts = a.reusableTokenStream("repetitiveField", new StringReader("this boring"));
|
||||
TokenStream ts = a.tokenStream("repetitiveField", new StringReader("this boring"));
|
||||
assertTokenStreamContents(ts, new String[] { "this" });
|
||||
}
|
||||
}
|
||||
|
|
|
@ -88,7 +88,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
|
|||
|
||||
PhraseQuery q = new PhraseQuery();
|
||||
|
||||
TokenStream ts = analyzer.reusableTokenStream("content", new StringReader("this sentence"));
|
||||
TokenStream ts = analyzer.tokenStream("content", new StringReader("this sentence"));
|
||||
int j = -1;
|
||||
|
||||
PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
|
||||
|
@ -117,7 +117,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
|
|||
|
||||
BooleanQuery q = new BooleanQuery();
|
||||
|
||||
TokenStream ts = analyzer.reusableTokenStream("content", new StringReader("test sentence"));
|
||||
TokenStream ts = analyzer.tokenStream("content", new StringReader("test sentence"));
|
||||
|
||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||
|
||||
|
|
|
@ -87,7 +87,7 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
|
|||
Analyzer analyzer = new MockAnalyzer(random, MockTokenizer.WHITESPACE, false);
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
|
||||
Document doc = new Document();
|
||||
TokenStream tokenStream = analyzer.reusableTokenStream("field", new StringReader("abcd "));
|
||||
TokenStream tokenStream = analyzer.tokenStream("field", new StringReader("abcd "));
|
||||
TeeSinkTokenFilter tee = new TeeSinkTokenFilter(tokenStream);
|
||||
TokenStream sink = tee.newSinkTokenStream();
|
||||
FieldType ft = new FieldType(TextField.TYPE_UNSTORED);
|
||||
|
|
|
@ -156,10 +156,10 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
|
|||
assumeTrue("JRE does not support Thai dictionary-based BreakIterator", ThaiWordFilter.DBBI_AVAILABLE);
|
||||
ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_30);
|
||||
// just consume
|
||||
TokenStream ts = analyzer.reusableTokenStream("dummy", new StringReader("ภาษาไทย"));
|
||||
TokenStream ts = analyzer.tokenStream("dummy", new StringReader("ภาษาไทย"));
|
||||
assertTokenStreamContents(ts, new String[] { "ภาษา", "ไทย" });
|
||||
// this consumer adds flagsAtt, which this analyzer does not use.
|
||||
ts = analyzer.reusableTokenStream("dummy", new StringReader("ภาษาไทย"));
|
||||
ts = analyzer.tokenStream("dummy", new StringReader("ภาษาไทย"));
|
||||
ts.addAttribute(FlagsAttribute.class);
|
||||
assertTokenStreamContents(ts, new String[] { "ภาษา", "ไทย" });
|
||||
}
|
||||
|
|
|
@ -59,13 +59,13 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase {
|
|||
/** Test reuse of MorfologikFilter with leftover stems. */
|
||||
public final void testLeftoverStems() throws IOException {
|
||||
Analyzer a = getTestAnalyzer();
|
||||
TokenStream ts_1 = a.reusableTokenStream("dummy", new StringReader("liście"));
|
||||
TokenStream ts_1 = a.tokenStream("dummy", new StringReader("liście"));
|
||||
CharTermAttribute termAtt_1 = ts_1.getAttribute(CharTermAttribute.class);
|
||||
ts_1.reset();
|
||||
ts_1.incrementToken();
|
||||
assertEquals("first stream", "liść", termAtt_1.toString());
|
||||
|
||||
TokenStream ts_2 = a.reusableTokenStream("dummy", new StringReader("danych"));
|
||||
TokenStream ts_2 = a.tokenStream("dummy", new StringReader("danych"));
|
||||
CharTermAttribute termAtt_2 = ts_2.getAttribute(CharTermAttribute.class);
|
||||
ts_2.reset();
|
||||
ts_2.incrementToken();
|
||||
|
@ -96,7 +96,7 @@ public class TestMorfologikAnalyzer extends BaseTokenStreamTestCase {
|
|||
|
||||
/** Test morphosyntactic annotations. */
|
||||
public final void testPOSAttribute() throws IOException {
|
||||
TokenStream ts = getTestAnalyzer().reusableTokenStream("dummy", new StringReader("liście"));
|
||||
TokenStream ts = getTestAnalyzer().tokenStream("dummy", new StringReader("liście"));
|
||||
|
||||
assertPOSToken(ts, "liść", "subst:pl:acc.nom.voc:m3");
|
||||
assertPOSToken(ts, "list", "subst:sg:loc.voc:m3");
|
||||
|
|
|
@ -177,7 +177,7 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
|
|||
sb.append("我购买了道具和服装。");
|
||||
}
|
||||
Analyzer analyzer = new SmartChineseAnalyzer(TEST_VERSION_CURRENT);
|
||||
TokenStream stream = analyzer.reusableTokenStream("", new StringReader(sb.toString()));
|
||||
TokenStream stream = analyzer.tokenStream("", new StringReader(sb.toString()));
|
||||
stream.reset();
|
||||
while (stream.incrementToken()) {
|
||||
}
|
||||
|
@ -190,7 +190,7 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
|
|||
sb.append("我购买了道具和服装");
|
||||
}
|
||||
Analyzer analyzer = new SmartChineseAnalyzer(TEST_VERSION_CURRENT);
|
||||
TokenStream stream = analyzer.reusableTokenStream("", new StringReader(sb.toString()));
|
||||
TokenStream stream = analyzer.tokenStream("", new StringReader(sb.toString()));
|
||||
stream.reset();
|
||||
while (stream.incrementToken()) {
|
||||
}
|
||||
|
|
|
@ -958,8 +958,8 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
|
|||
|
||||
private void assertEqualCollation(Analyzer a1, Analyzer a2, String text)
|
||||
throws Exception {
|
||||
TokenStream ts1 = a1.reusableTokenStream("bogus", new StringReader(text));
|
||||
TokenStream ts2 = a2.reusableTokenStream("bogus", new StringReader(text));
|
||||
TokenStream ts1 = a1.tokenStream("bogus", new StringReader(text));
|
||||
TokenStream ts2 = a2.tokenStream("bogus", new StringReader(text));
|
||||
ts1.reset();
|
||||
ts2.reset();
|
||||
TermToBytesRefAttribute termAtt1 = ts1.addAttribute(TermToBytesRefAttribute.class);
|
||||
|
@ -1007,8 +1007,8 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
|
|||
|
||||
// Default analyzer, maxShingleSize, and outputUnigrams
|
||||
Benchmark benchmark = execBenchmark(getShingleConfig(""));
|
||||
benchmark.getRunData().getAnalyzer().reusableTokenStream
|
||||
("bogus", new StringReader(text)).close();
|
||||
benchmark.getRunData().getAnalyzer().tokenStream
|
||||
("bogus", new StringReader(text)).close();
|
||||
assertEqualShingle(benchmark.getRunData().getAnalyzer(), text,
|
||||
new String[] {"one", "one two", "two", "two three",
|
||||
"three", "three four", "four", "four five",
|
||||
|
|
|
@ -763,7 +763,7 @@ public final class MoreLikeThis {
|
|||
throw new UnsupportedOperationException("To use MoreLikeThis without " +
|
||||
"term vectors, you must provide an Analyzer");
|
||||
}
|
||||
TokenStream ts = analyzer.reusableTokenStream(fieldName, r);
|
||||
TokenStream ts = analyzer.tokenStream(fieldName, r);
|
||||
int tokenCount = 0;
|
||||
// for every token
|
||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||
|
|
|
@ -13,6 +13,10 @@ Changes in runtime behavior
|
|||
instead of RangeQueryNode; the same applies for numeric nodes;
|
||||
(Vinicius Barros via Uwe Schindler)
|
||||
|
||||
* LUCENE-3455: QueryParserBase.newFieldQuery() will throw a ParseException if
|
||||
any of the calls to the Analyzer throw an IOException. QueryParseBase.analyzeRangePart()
|
||||
will throw a RuntimException if an IOException is thrown by the Analyzer.
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-1768: Deprecated Parametric(Range)QueryNode, RangeQueryNode(Builder),
|
||||
|
|
|
@ -109,7 +109,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryparser.classic.
|
|||
|
||||
int countTokens = 0;
|
||||
try {
|
||||
source = getAnalyzer().reusableTokenStream(field, new StringReader(termStr));
|
||||
source = getAnalyzer().tokenStream(field, new StringReader(termStr));
|
||||
source.reset();
|
||||
} catch (IOException e1) {
|
||||
throw new RuntimeException(e1);
|
||||
|
@ -197,7 +197,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryparser.classic.
|
|||
TokenStream source;
|
||||
List<String> tlist = new ArrayList<String>();
|
||||
try {
|
||||
source = getAnalyzer().reusableTokenStream(field, new StringReader(termStr));
|
||||
source = getAnalyzer().tokenStream(field, new StringReader(termStr));
|
||||
source.reset();
|
||||
} catch (IOException e1) {
|
||||
throw new RuntimeException(e1);
|
||||
|
@ -253,7 +253,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryparser.classic.
|
|||
boolean multipleTokens = false;
|
||||
|
||||
try {
|
||||
source = getAnalyzer().reusableTokenStream(field, new StringReader(termStr));
|
||||
source = getAnalyzer().tokenStream(field, new StringReader(termStr));
|
||||
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
|
||||
source.reset();
|
||||
if (source.incrementToken()) {
|
||||
|
@ -294,7 +294,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryparser.classic.
|
|||
if (part1 != null) {
|
||||
// part1
|
||||
try {
|
||||
source = getAnalyzer().reusableTokenStream(field, new StringReader(part1));
|
||||
source = getAnalyzer().tokenStream(field, new StringReader(part1));
|
||||
termAtt = source.addAttribute(CharTermAttribute.class);
|
||||
source.reset();
|
||||
multipleTokens = false;
|
||||
|
@ -322,7 +322,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryparser.classic.
|
|||
if (part2 != null) {
|
||||
try {
|
||||
// part2
|
||||
source = getAnalyzer().reusableTokenStream(field, new StringReader(part2));
|
||||
source = getAnalyzer().tokenStream(field, new StringReader(part2));
|
||||
termAtt = source.addAttribute(CharTermAttribute.class);
|
||||
source.reset();
|
||||
if (source.incrementToken()) {
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.queryparser.classic;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.text.Collator;
|
||||
import java.text.DateFormat;
|
||||
import java.util.*;
|
||||
|
||||
|
@ -474,7 +473,7 @@ public abstract class QueryParserBase {
|
|||
|
||||
TokenStream source;
|
||||
try {
|
||||
source = analyzer.reusableTokenStream(field, new StringReader(queryText));
|
||||
source = analyzer.tokenStream(field, new StringReader(queryText));
|
||||
source.reset();
|
||||
} catch (IOException e) {
|
||||
throw new ParseException("Unable to initialize TokenStream to analyze query text", e);
|
||||
|
@ -783,7 +782,7 @@ public abstract class QueryParserBase {
|
|||
TokenStream source;
|
||||
|
||||
try {
|
||||
source = analyzer.reusableTokenStream(field, new StringReader(part));
|
||||
source = analyzer.tokenStream(field, new StringReader(part));
|
||||
source.reset();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Unable to initialize TokenStream to analyze range part: " + part, e);
|
||||
|
|
|
@ -114,7 +114,7 @@ public class AnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl {
|
|||
|
||||
TokenStream source;
|
||||
try {
|
||||
source = this.analyzer.reusableTokenStream(field, new StringReader(text));
|
||||
source = this.analyzer.tokenStream(field, new StringReader(text));
|
||||
source.reset();
|
||||
} catch (IOException e1) {
|
||||
throw new RuntimeException(e1);
|
||||
|
|
|
@ -74,7 +74,7 @@ public class LikeThisQueryBuilder implements QueryBuilder {
|
|||
stopWordsSet = new HashSet<String>();
|
||||
for (String field : fields) {
|
||||
try {
|
||||
TokenStream ts = analyzer.reusableTokenStream(field, new StringReader(stopWords));
|
||||
TokenStream ts = analyzer.tokenStream(field, new StringReader(stopWords));
|
||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||
ts.reset();
|
||||
while (ts.incrementToken()) {
|
||||
|
|
|
@ -53,7 +53,7 @@ public class SpanOrTermsBuilder extends SpanBuilderBase {
|
|||
|
||||
try {
|
||||
List<SpanQuery> clausesList = new ArrayList<SpanQuery>();
|
||||
TokenStream ts = analyzer.reusableTokenStream(fieldName, new StringReader(value));
|
||||
TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(value));
|
||||
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||
BytesRef bytes = termAtt.getBytesRef();
|
||||
ts.reset();
|
||||
|
|
|
@ -57,7 +57,7 @@ public class TermsFilterBuilder implements FilterBuilder {
|
|||
String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
|
||||
|
||||
try {
|
||||
TokenStream ts = analyzer.reusableTokenStream(fieldName, new StringReader(text));
|
||||
TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text));
|
||||
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||
Term term = null;
|
||||
BytesRef bytes = termAtt.getBytesRef();
|
||||
|
|
|
@ -52,7 +52,7 @@ public class TermsQueryBuilder implements QueryBuilder {
|
|||
BooleanQuery bq = new BooleanQuery(DOMUtils.getAttribute(e, "disableCoord", false));
|
||||
bq.setMinimumNumberShouldMatch(DOMUtils.getAttribute(e, "minimumNumberShouldMatch", 0));
|
||||
try {
|
||||
TokenStream ts = analyzer.reusableTokenStream(fieldName, new StringReader(text));
|
||||
TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text));
|
||||
TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
|
||||
Term term = null;
|
||||
BytesRef bytes = termAtt.getBytesRef();
|
||||
|
|
|
@ -192,7 +192,7 @@ public class ICUCollationField extends FieldType {
|
|||
TokenStream source;
|
||||
|
||||
try {
|
||||
source = analyzer.reusableTokenStream(field, new StringReader(part));
|
||||
source = analyzer.tokenStream(field, new StringReader(part));
|
||||
source.reset();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Unable to initialize TokenStream to analyze range part: " + part, e);
|
||||
|
|
|
@ -88,7 +88,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
|||
|
||||
TokenStream tokenStream = null;
|
||||
try {
|
||||
tokenStream = analyzer.reusableTokenStream(context.getFieldName(), new StringReader(value));
|
||||
tokenStream = analyzer.tokenStream(context.getFieldName(), new StringReader(value));
|
||||
} catch (IOException e) {
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
|
||||
}
|
||||
|
@ -142,7 +142,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
|
|||
protected Set<BytesRef> getQueryTokenSet(String query, Analyzer analyzer) {
|
||||
try {
|
||||
final Set<BytesRef> tokens = new HashSet<BytesRef>();
|
||||
final TokenStream tokenStream = analyzer.reusableTokenStream("", new StringReader(query));
|
||||
final TokenStream tokenStream = analyzer.tokenStream("", new StringReader(query));
|
||||
final TermToBytesRefAttribute bytesAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class);
|
||||
final BytesRef bytes = bytesAtt.getBytesRef();
|
||||
|
||||
|
|
|
@ -309,7 +309,7 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
|
|||
return query;
|
||||
}
|
||||
StringBuilder norm = new StringBuilder();
|
||||
TokenStream tokens = analyzer.reusableTokenStream( "", new StringReader( query ) );
|
||||
TokenStream tokens = analyzer.tokenStream("", new StringReader(query));
|
||||
tokens.reset();
|
||||
|
||||
CharTermAttribute termAtt = tokens.addAttribute(CharTermAttribute.class);
|
||||
|
|
|
@ -474,7 +474,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar
|
|||
private Collection<Token> getTokens(String q, Analyzer analyzer) throws IOException {
|
||||
Collection<Token> result = new ArrayList<Token>();
|
||||
assert analyzer != null;
|
||||
TokenStream ts = analyzer.reusableTokenStream("", new StringReader(q));
|
||||
TokenStream ts = analyzer.tokenStream("", new StringReader(q));
|
||||
ts.reset();
|
||||
// TODO: support custom attributes
|
||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||
|
|
|
@ -599,7 +599,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf
|
|||
private TokenStream createAnalyzerTStream(IndexSchema schema, String fieldName, String docText) throws IOException {
|
||||
|
||||
TokenStream tstream;
|
||||
TokenStream ts = schema.getAnalyzer().reusableTokenStream(fieldName, new StringReader(docText));
|
||||
TokenStream ts = schema.getAnalyzer().tokenStream(fieldName, new StringReader(docText));
|
||||
ts.reset();
|
||||
tstream = new TokenOrderingFilter(ts, 10);
|
||||
return tstream;
|
||||
|
|
|
@ -214,7 +214,7 @@ public class CollationField extends FieldType {
|
|||
TokenStream source;
|
||||
|
||||
try {
|
||||
source = analyzer.reusableTokenStream(field, new StringReader(part));
|
||||
source = analyzer.tokenStream(field, new StringReader(part));
|
||||
source.reset();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Unable to initialize TokenStream to analyze range part: " + part, e);
|
||||
|
|
|
@ -109,7 +109,7 @@ public class TextField extends FieldType {
|
|||
|
||||
TokenStream source;
|
||||
try {
|
||||
source = analyzer.reusableTokenStream(field, new StringReader(queryText));
|
||||
source = analyzer.tokenStream(field, new StringReader(queryText));
|
||||
source.reset();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Unable to initialize TokenStream to analyze query text", e);
|
||||
|
|
|
@ -104,7 +104,7 @@ public class SpellingQueryConverter extends QueryConverter {
|
|||
String word = matcher.group(0);
|
||||
if (word.equals("AND") == false && word.equals("OR") == false) {
|
||||
try {
|
||||
stream = analyzer.reusableTokenStream("", new StringReader(word));
|
||||
stream = analyzer.tokenStream("", new StringReader(word));
|
||||
// TODO: support custom attributes
|
||||
CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
|
||||
FlagsAttribute flagsAtt = stream.addAttribute(FlagsAttribute.class);
|
||||
|
|
|
@ -86,7 +86,7 @@ public class TestReversedWildcardFilterFactory extends SolrTestCaseJ4 {
|
|||
String text = "one two three si\uD834\uDD1Ex";
|
||||
|
||||
// field one
|
||||
TokenStream input = a.reusableTokenStream("one", new StringReader(text));
|
||||
TokenStream input = a.tokenStream("one", new StringReader(text));
|
||||
assertTokenStreamContents(input,
|
||||
new String[] { "\u0001eno", "one", "\u0001owt", "two",
|
||||
"\u0001eerht", "three", "\u0001x\uD834\uDD1Eis", "si\uD834\uDD1Ex" },
|
||||
|
@ -95,7 +95,7 @@ public class TestReversedWildcardFilterFactory extends SolrTestCaseJ4 {
|
|||
new int[] { 1, 0, 1, 0, 1, 0, 1, 0 }
|
||||
);
|
||||
// field two
|
||||
input = a.reusableTokenStream("two", new StringReader(text));
|
||||
input = a.tokenStream("two", new StringReader(text));
|
||||
assertTokenStreamContents(input,
|
||||
new String[] { "\u0001eno", "\u0001owt",
|
||||
"\u0001eerht", "\u0001x\uD834\uDD1Eis" },
|
||||
|
@ -104,7 +104,7 @@ public class TestReversedWildcardFilterFactory extends SolrTestCaseJ4 {
|
|||
new int[] { 1, 1, 1, 1 }
|
||||
);
|
||||
// field three
|
||||
input = a.reusableTokenStream("three", new StringReader(text));
|
||||
input = a.tokenStream("three", new StringReader(text));
|
||||
assertTokenStreamContents(input,
|
||||
new String[] { "one", "two", "three", "si\uD834\uDD1Ex" },
|
||||
new int[] { 0, 4, 8, 14 },
|
||||
|
|
|
@ -155,7 +155,7 @@ public class HighlighterTest extends SolrTestCaseJ4 {
|
|||
public void testTermOffsetsTokenStream() throws Exception {
|
||||
String[] multivalued = { "a b c d", "e f g", "h", "i j k l m n" };
|
||||
Analyzer a1 = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
|
||||
TokenStream tokenStream = a1.reusableTokenStream("", new StringReader("a b c d e f g h i j k l m n"));
|
||||
TokenStream tokenStream = a1.tokenStream("", new StringReader("a b c d e f g h i j k l m n"));
|
||||
tokenStream.reset();
|
||||
|
||||
TermOffsetsTokenStream tots = new TermOffsetsTokenStream(
|
||||
|
@ -163,7 +163,7 @@ public class HighlighterTest extends SolrTestCaseJ4 {
|
|||
for( String v : multivalued ){
|
||||
TokenStream ts1 = tots.getMultiValuedTokenStream( v.length() );
|
||||
Analyzer a2 = new WhitespaceAnalyzer(TEST_VERSION_CURRENT);
|
||||
TokenStream ts2 = a2.reusableTokenStream( "", new StringReader( v ) );
|
||||
TokenStream ts2 = a2.tokenStream("", new StringReader(v));
|
||||
ts2.reset();
|
||||
|
||||
while (ts1.incrementToken()) {
|
||||
|
|
|
@ -44,7 +44,7 @@ class SimpleQueryConverter extends SpellingQueryConverter {
|
|||
try {
|
||||
Collection<Token> result = new HashSet<Token>();
|
||||
WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_40);
|
||||
TokenStream ts = analyzer.reusableTokenStream("", new StringReader(origQuery));
|
||||
TokenStream ts = analyzer.tokenStream("", new StringReader(origQuery));
|
||||
// TODO: support custom attributes
|
||||
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
|
||||
OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
|
||||
|
|
|
@ -152,7 +152,7 @@
|
|||
if (qval!="" && highlight) {
|
||||
Reader reader = new StringReader(qval);
|
||||
Analyzer analyzer = field.getType().getQueryAnalyzer();
|
||||
TokenStream tstream = analyzer.reusableTokenStream(field.getName(),reader);
|
||||
TokenStream tstream = analyzer.tokenStream(field.getName(), reader);
|
||||
TermToBytesRefAttribute bytesAtt = tstream.getAttribute(TermToBytesRefAttribute.class);
|
||||
tstream.reset();
|
||||
matches = new HashSet<BytesRef>();
|
||||
|
@ -241,7 +241,7 @@
|
|||
}
|
||||
|
||||
} else {
|
||||
TokenStream tstream = analyzer.reusableTokenStream(field.getName(),new StringReader(val));
|
||||
TokenStream tstream = analyzer.tokenStream(field.getName(), new StringReader(val));
|
||||
tstream.reset();
|
||||
List<AttributeSource> tokens = getTokens(tstream);
|
||||
if (verbose) {
|
||||
|
|
Loading…
Reference in New Issue