LUCENE-8352: Make TokenStreamComponents final

This commit is contained in:
Alan Woodward 2018-09-18 11:00:20 +01:00
parent 10060a6237
commit c696cafc0d
23 changed files with 317 additions and 446 deletions

View File

@ -78,6 +78,9 @@ API Changes
* LUCENE-8483: Scorers may no longer take null as a Weight (Alan Woodward)
* LUCENE-8352: TokenStreamComponents is now final, and can take a Consumer<Reader>
in its constructor (Mark Harwood, Alan Woodward, Adrien Grand)
Changes in Runtime Behavior
* LUCENE-8333: Switch MoreLikeThis.setMaxDocFreqPct to use maxDoc instead of

View File

@ -123,3 +123,9 @@ be replaced with a Scorable instead.
instead of long at suggest time ##
Most code should just require recompilation, though possibly requiring some added casts.
## TokenStreamComponents is now final ##
Instead of overriding TokenStreamComponents#setReader() to customise analyzer
initialisation, you should now pass a Consumer&lt;Reader> instance to the
TokenStreamComponents constructor.

View File

@ -59,7 +59,7 @@ public final class LimitTokenCountAnalyzer extends AnalyzerWrapper {
@Override
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
return new TokenStreamComponents(components.getTokenizer(),
return new TokenStreamComponents(components.getSource(),
new LimitTokenCountFilter(components.getTokenStream(), maxTokenCount, consumeAllTokens));
}

View File

@ -169,7 +169,7 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper {
}
StopFilter stopFilter = new StopFilter(components.getTokenStream(),
new CharArraySet(stopWords, false));
return new TokenStreamComponents(components.getTokenizer(), stopFilter);
return new TokenStreamComponents(components.getSource(), stopFilter);
}
/**

View File

@ -159,6 +159,6 @@ public final class ShingleAnalyzerWrapper extends AnalyzerWrapper {
filter.setOutputUnigrams(outputUnigrams);
filter.setOutputUnigramsIfNoShingles(outputUnigramsIfNoShingles);
filter.setFillerToken(fillerToken);
return new TokenStreamComponents(components.getTokenizer(), filter);
return new TokenStreamComponents(components.getSource(), filter);
}
}

View File

@ -92,13 +92,10 @@ public final class ClassicAnalyzer extends StopwordAnalyzerBase {
TokenStream tok = new ClassicFilter(src);
tok = new LowerCaseFilter(tok);
tok = new StopFilter(tok, stopwords);
return new TokenStreamComponents(src, tok) {
@Override
protected void setReader(final Reader reader) {
src.setMaxTokenLength(ClassicAnalyzer.this.maxTokenLength);
super.setReader(reader);
}
};
return new TokenStreamComponents(r -> {
src.setMaxTokenLength(ClassicAnalyzer.this.maxTokenLength);
src.setReader(r);
}, tok);
}
@Override

View File

@ -88,15 +88,10 @@ public final class UAX29URLEmailAnalyzer extends StopwordAnalyzerBase {
src.setMaxTokenLength(maxTokenLength);
TokenStream tok = new LowerCaseFilter(src);
tok = new StopFilter(tok, stopwords);
return new TokenStreamComponents(src, tok) {
@Override
protected void setReader(final Reader reader) {
// So that if maxTokenLength was changed, the change takes
// effect next time tokenStream is called:
src.setMaxTokenLength(UAX29URLEmailAnalyzer.this.maxTokenLength);
super.setReader(reader);
}
};
return new TokenStreamComponents(r -> {
src.setMaxTokenLength(UAX29URLEmailAnalyzer.this.maxTokenLength);
src.setReader(r);
}, tok);
}
@Override

View File

@ -107,7 +107,7 @@ public class TestPerFieldAnalyzerWrapper extends BaseTokenStreamTestCase {
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
assertNotSame(specialAnalyzer.tokenStream("special", text), components.getTokenStream());
TokenFilter filter = new ASCIIFoldingFilter(components.getTokenStream());
return new TokenStreamComponents(components.getTokenizer(), filter);
return new TokenStreamComponents(components.getSource(), filter);
}
};
ts3 = wrapper2.tokenStream("special", text);

View File

@ -23,6 +23,7 @@ import java.io.Reader;
import java.io.StringReader;
import java.util.HashMap;
import java.util.Map;
import java.util.function.Consumer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
@ -355,16 +356,16 @@ public abstract class Analyzer implements Closeable {
/**
* This class encapsulates the outer components of a token stream. It provides
* access to the source ({@link Tokenizer}) and the outer end (sink), an
* access to the source (a {@link Reader} {@link Consumer} and the outer end (sink), an
* instance of {@link TokenFilter} which also serves as the
* {@link TokenStream} returned by
* {@link Analyzer#tokenStream(String, Reader)}.
*/
public static class TokenStreamComponents {
public static final class TokenStreamComponents {
/**
* Original source of the tokens.
*/
protected final Tokenizer source;
protected final Consumer<Reader> source;
/**
* Sink tokenstream, such as the outer tokenfilter decorating
* the chain. This can be the source if there are no filters.
@ -378,25 +379,30 @@ public abstract class Analyzer implements Closeable {
* Creates a new {@link TokenStreamComponents} instance.
*
* @param source
* the analyzer's tokenizer
* the source to set the reader on
* @param result
* the analyzer's resulting token stream
*/
public TokenStreamComponents(final Tokenizer source,
public TokenStreamComponents(final Consumer<Reader> source,
final TokenStream result) {
this.source = source;
this.sink = result;
}
/**
* Creates a new {@link TokenStreamComponents} instance.
*
* @param source
* the analyzer's tokenizer
* Creates a new {@link TokenStreamComponents} instance
* @param tokenizer the analyzer's Tokenizer
* @param result the analyzer's resulting token stream
*/
public TokenStreamComponents(final Tokenizer source) {
this.source = source;
this.sink = source;
public TokenStreamComponents(final Tokenizer tokenizer, final TokenStream result) {
this(tokenizer::setReader, result);
}
/**
* Creates a new {@link TokenStreamComponents} from a Tokenizer
*/
public TokenStreamComponents(final Tokenizer tokenizer) {
this(tokenizer::setReader, tokenizer);
}
/**
@ -406,8 +412,8 @@ public abstract class Analyzer implements Closeable {
* @param reader
* a reader to reset the source component
*/
protected void setReader(final Reader reader) {
source.setReader(reader);
private void setReader(final Reader reader) {
source.accept(reader);
}
/**
@ -420,11 +426,9 @@ public abstract class Analyzer implements Closeable {
}
/**
* Returns the component's {@link Tokenizer}
*
* @return Component's {@link Tokenizer}
* Returns the component's source
*/
public Tokenizer getTokenizer() {
public Consumer<Reader> getSource() {
return source;
}
}

View File

@ -81,15 +81,10 @@ public final class StandardAnalyzer extends StopwordAnalyzerBase {
src.setMaxTokenLength(maxTokenLength);
TokenStream tok = new LowerCaseFilter(src);
tok = new StopFilter(tok, stopwords);
return new TokenStreamComponents(src, tok) {
@Override
protected void setReader(final Reader reader) {
// So that if maxTokenLength was changed, the change takes
// effect next time tokenStream is called:
src.setMaxTokenLength(StandardAnalyzer.this.maxTokenLength);
super.setReader(reader);
}
};
return new TokenStreamComponents(r -> {
src.setMaxTokenLength(StandardAnalyzer.this.maxTokenLength);
src.setReader(r);
}, tok);
}
@Override

View File

@ -0,0 +1,58 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis;
import java.io.IOException;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.lucene.util.LuceneTestCase;
public class TestAnalyzerWrapper extends LuceneTestCase {
public void testSourceDelegation() throws IOException {
AtomicBoolean sourceCalled = new AtomicBoolean(false);
Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
return new TokenStreamComponents(r -> {
sourceCalled.set(true);
}, new CannedTokenStream());
}
};
Analyzer wrapped = new AnalyzerWrapper(analyzer.getReuseStrategy()) {
@Override
protected Analyzer getWrappedAnalyzer(String fieldName) {
return analyzer;
}
@Override
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
return new TokenStreamComponents(components.getSource(), new LowerCaseFilter(components.getTokenStream()));
}
};
try (TokenStream ts = wrapped.tokenStream("", "text")) {
assertTrue(sourceCalled.get());
}
}
}

View File

@ -28,7 +28,7 @@ import org.apache.lucene.util.BytesRefIterator;
*
* @lucene.internal
*/
class BytesRefIteratorTokenStream extends TokenStream {
public class BytesRefIteratorTokenStream extends TokenStream {
public BytesRefIterator getBytesRefIterator() {
return bytesIter;

View File

@ -157,6 +157,23 @@ public abstract class PrefixTreeStrategy extends SpatialStrategy {
return new Field[]{field};
}
public class ShapeTokenStream extends BytesRefIteratorTokenStream {
public void setShape(Shape shape) {
double distErr = SpatialArgs.calcDistanceFromErrPct(shape, distErrPct, ctx);
int detailLevel = grid.getLevelForDistance(distErr);
Iterator<Cell> cells = createCellIteratorToIndex(shape, detailLevel, null);
CellToBytesRefIterator cellToBytesRefIterator = newCellToBytesRefIterator();
cellToBytesRefIterator.reset(cells);
setBytesRefIterator(cellToBytesRefIterator);
}
}
public ShapeTokenStream tokenStream() {
return new ShapeTokenStream();
}
protected CellToBytesRefIterator newCellToBytesRefIterator() {
//subclasses could return one that never emits leaves, or does both, or who knows.
return new CellToBytesRefIterator();

View File

@ -360,7 +360,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
if (fieldName.equals(TEXTGRAMS_FIELD_NAME) && minPrefixChars > 0) {
// TODO: should use an EdgeNGramTokenFilterFactory here
TokenFilter filter = new EdgeNGramTokenFilter(components.getTokenStream(), 1, minPrefixChars, false);
return new TokenStreamComponents(components.getTokenizer(), filter);
return new TokenStreamComponents(components.getSource(), filter);
} else {
return components;
}

View File

@ -236,7 +236,7 @@ public class FreeTextSuggester extends Lookup implements Accountable {
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
ShingleFilter shingles = new ShingleFilter(components.getTokenStream(), 2, grams);
shingles.setTokenSeparator(Character.toString((char) separator));
return new TokenStreamComponents(components.getTokenizer(), shingles);
return new TokenStreamComponents(components.getSource(), shingles);
}
};
}

View File

@ -132,6 +132,6 @@ public final class CompletionAnalyzer extends AnalyzerWrapper {
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
CompletionTokenStream tokenStream = new CompletionTokenStream(components.getTokenStream(),
preserveSep, preservePositionIncrements, maxGraphExpansions);
return new TokenStreamComponents(components.getTokenizer(), tokenStream);
return new TokenStreamComponents(components.getSource(), tokenStream);
}
}

View File

@ -44,6 +44,7 @@ import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.BytesTermAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.Document;
@ -303,52 +304,82 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
IOUtils.close(a, tempDir);
}
static final class MultiCannedTokenizer extends Tokenizer {
int counter = -1;
final TokenStream[] tokenStreams;
MultiCannedTokenizer(TokenStream... tokenStreams) {
super(tokenStreams[0].getAttributeFactory());
this.tokenStreams = tokenStreams;
}
@Override
public boolean incrementToken() throws IOException {
if (tokenStreams[counter].incrementToken() == false) {
return false;
}
this.restoreState(tokenStreams[counter].captureState());
return true;
}
@Override
public void reset() throws IOException {
tokenStreams[counter].reset();
}
}
static final class MultiCannedAnalyzer extends Analyzer {
final MultiCannedTokenizer tokenizer;
MultiCannedAnalyzer(TokenStream... tokenStreams) {
this(false, tokenStreams);
}
MultiCannedAnalyzer(boolean addBytesAtt, TokenStream... tokenStreams) {
this.tokenizer = new MultiCannedTokenizer(tokenStreams);
if (addBytesAtt) {
this.tokenizer.addAttribute(BytesTermAttribute.class);
}
}
@Override
protected TokenStreamComponents createComponents(String fieldName) {
tokenizer.counter = 0;
return new TokenStreamComponents(tokenizer);
}
@Override
protected Reader initReader(String fieldName, Reader reader) {
tokenizer.counter++;
if (tokenizer.counter >= tokenizer.tokenStreams.length) {
tokenizer.counter = tokenizer.tokenStreams.length - 1;
}
return super.initReader(fieldName, reader);
}
}
public void testGraphDups() throws Exception {
final Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
return new TokenStreamComponents(tokenizer) {
int tokenStreamCounter = 0;
final TokenStream[] tokenStreams = new TokenStream[] {
new CannedTokenStream(new Token[] {
token("wifi",1,1),
token("hotspot",0,2),
token("network",1,1),
token("is",1,1),
token("slow",1,1)
}),
new CannedTokenStream(new Token[] {
token("wi",1,1),
token("hotspot",0,3),
token("fi",1,1),
token("network",1,1),
token("is",1,1),
token("fast",1,1)
}),
new CannedTokenStream(new Token[] {
token("wifi",1,1),
token("hotspot",0,2),
token("network",1,1)
}),
};
@Override
public TokenStream getTokenStream() {
TokenStream result = tokenStreams[tokenStreamCounter];
tokenStreamCounter++;
return result;
}
@Override
protected void setReader(final Reader reader) {
}
};
}
};
final Analyzer analyzer = new MultiCannedAnalyzer(
new CannedTokenStream(
token("wifi",1,1),
token("hotspot",0,2),
token("network",1,1),
token("is",1,1),
token("slow",1,1)),
new CannedTokenStream(
token("wi",1,1),
token("hotspot",0,3),
token("fi",1,1),
token("network",1,1),
token("is",1,1),
token("fast",1,1)),
new CannedTokenStream(
token("wifi",1,1),
token("hotspot",0,2),
token("network",1,1)));
Input keys[] = new Input[] {
new Input("wifi network is slow", 50),
@ -378,45 +409,20 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
// The Analyzer below mimics the functionality of the SynonymAnalyzer
// using the above map, so that the suggest module does not need a dependency on the
// synonym module
// synonym module
final Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
return new TokenStreamComponents(tokenizer) {
int tokenStreamCounter = 0;
final TokenStream[] tokenStreams = new TokenStream[] {
new CannedTokenStream(new Token[] {
token("ab",1,1),
token("ba",0,1),
token("xc",1,1)
}),
new CannedTokenStream(new Token[] {
token("ba",1,1),
token("xd",1,1)
}),
new CannedTokenStream(new Token[] {
token("ab",1,1),
token("ba",0,1),
token("x",1,1)
})
};
@Override
public TokenStream getTokenStream() {
TokenStream result = tokenStreams[tokenStreamCounter];
tokenStreamCounter++;
return result;
}
@Override
protected void setReader(final Reader reader) {
}
};
}
};
final Analyzer analyzer = new MultiCannedAnalyzer(
new CannedTokenStream(
token("ab", 1, 1),
token("ba", 0, 1),
token("xc", 1, 1)),
new CannedTokenStream(
token("ba", 1, 1),
token("xd", 1, 1)),
new CannedTokenStream(
token("ab",1,1),
token("ba",0,1),
token("x",1,1)));
Input keys[] = new Input[] {
new Input("ab xc", 50),
@ -426,7 +432,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", analyzer);
suggester.build(new InputArrayIterator(keys));
List<LookupResult> results = suggester.lookup("ab x", false, 1);
assertTrue(results.size() == 1);
assertEquals(1, results.size());
IOUtils.close(analyzer, tempDir);
}
@ -459,39 +465,15 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
}
*/
private final Analyzer getUnusualAnalyzer() {
return new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
return new TokenStreamComponents(tokenizer) {
int count;
@Override
public TokenStream getTokenStream() {
// 4th time we are called, return tokens a b,
// else just a:
if (count++ != 3) {
return new CannedTokenStream(new Token[] {
token("a", 1, 1),
});
} else {
// After that "a b":
return new CannedTokenStream(new Token[] {
token("a", 1, 1),
token("b", 1, 1),
});
}
}
@Override
protected void setReader(final Reader reader) {
}
};
}
};
private Analyzer getUnusualAnalyzer() {
// First three calls just returns "a", then returns ["a","b"], then "a" again
return new MultiCannedAnalyzer(
new CannedTokenStream(token("a", 1, 1)),
new CannedTokenStream(token("a", 1, 1)),
new CannedTokenStream(token("a", 1, 1)),
new CannedTokenStream(token("a", 1, 1), token("b", 1, 1)),
new CannedTokenStream(token("a", 1, 1)),
new CannedTokenStream(token("a", 1, 1)));
}
public void testExactFirst() throws Exception {
@ -999,23 +981,10 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
return new TokenStreamComponents(tokenizer) {
@Override
public TokenStream getTokenStream() {
return new CannedTokenStream(new Token[] {
token("hairy", 1, 1),
token("smelly", 0, 1),
token("dog", 1, 1),
});
}
@Override
protected void setReader(final Reader reader) {
}
};
return new TokenStreamComponents(r -> {}, new CannedTokenStream(
token("hairy", 1, 1),
token("smelly", 0, 1),
token("dog", 1, 1)));
}
};
@ -1056,38 +1025,15 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
}
public void testDupSurfaceFormsMissingResults2() throws Exception {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
return new TokenStreamComponents(tokenizer) {
int count;
@Override
public TokenStream getTokenStream() {
if (count == 0) {
count++;
return new CannedTokenStream(new Token[] {
token("p", 1, 1),
token("q", 1, 1),
token("r", 0, 1),
token("s", 0, 1),
});
} else {
return new CannedTokenStream(new Token[] {
token("p", 1, 1),
});
}
}
@Override
protected void setReader(final Reader reader) {
}
};
}
};
Analyzer a = new MultiCannedAnalyzer(
new CannedTokenStream(
token("p", 1, 1),
token("q", 1, 1),
token("r", 0, 1),
token("s", 0, 1)),
new CannedTokenStream(token("p", 1, 1)),
new CannedTokenStream(token("p", 1, 1)),
new CannedTokenStream(token("p", 1, 1)));
Directory tempDir = getDirectory();
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, 0, 256, -1, true);
@ -1131,24 +1077,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
* and checks that they come back in surface-form order.
*/
public void testTieBreakOnSurfaceForm() throws Exception {
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
return new TokenStreamComponents(tokenizer) {
@Override
public TokenStream getTokenStream() {
return new CannedTokenStream(new Token[] {
token("dog", 1, 1)
});
}
@Override
protected void setReader(final Reader reader) {
}
};
}
};
Analyzer a = new MultiCannedAnalyzer(new CannedTokenStream(token("dog", 1, 1)));
Directory tempDir = getDirectory();
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, 0, 256, -1, true);
@ -1187,41 +1116,12 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
}
public void test0ByteKeys() throws Exception {
final Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
return new TokenStreamComponents(tokenizer) {
int tokenStreamCounter = 0;
final TokenStream[] tokenStreams = new TokenStream[] {
new CannedBinaryTokenStream(new BinaryToken[] {
token(new BytesRef(new byte[] {0x0, 0x0, 0x0})),
}),
new CannedBinaryTokenStream(new BinaryToken[] {
token(new BytesRef(new byte[] {0x0, 0x0})),
}),
new CannedBinaryTokenStream(new BinaryToken[] {
token(new BytesRef(new byte[] {0x0, 0x0, 0x0})),
}),
new CannedBinaryTokenStream(new BinaryToken[] {
token(new BytesRef(new byte[] {0x0, 0x0})),
}),
};
@Override
public TokenStream getTokenStream() {
TokenStream result = tokenStreams[tokenStreamCounter];
tokenStreamCounter++;
return result;
}
@Override
protected void setReader(final Reader reader) {
}
};
}
};
final Analyzer a = new MultiCannedAnalyzer(true,
new CannedBinaryTokenStream(token(new BytesRef(new byte[] {0x0, 0x0, 0x0}))),
new CannedBinaryTokenStream(token(new BytesRef(new byte[] {0x0, 0x0}))),
new CannedBinaryTokenStream(token(new BytesRef(new byte[] {0x0, 0x0, 0x0}))),
new CannedBinaryTokenStream(token(new BytesRef(new byte[] {0x0, 0x0})))
);
Directory tempDir = getDirectory();
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, 0, 256, -1, true);
@ -1265,26 +1165,13 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
public void testTooManyExpansions() throws Exception {
final Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
return new TokenStreamComponents(tokenizer) {
@Override
public TokenStream getTokenStream() {
Token a = new Token("a", 0, 1);
a.setPositionIncrement(1);
Token b = new Token("b", 0, 1);
b.setPositionIncrement(0);
return new CannedTokenStream(new Token[] {a, b});
}
@Override
protected void setReader(final Reader reader) {
}
};
}
};
@Override
protected TokenStreamComponents createComponents(String fieldName) {
return new TokenStreamComponents(r -> {}, new CannedTokenStream(
new Token("a", 0, 1),
new Token("b", 0, 0, 1)));
}
};
Directory tempDir = getDirectory();
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, 0, 256, 1, true);

View File

@ -17,7 +17,6 @@
package org.apache.lucene.search.suggest.analyzing;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
@ -36,7 +35,6 @@ import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.TokenStreamToAutomaton;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.search.suggest.Input;
@ -240,50 +238,24 @@ public class FuzzySuggesterTest extends LuceneTestCase {
public void testGraphDups() throws Exception {
final Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
return new TokenStreamComponents(tokenizer) {
int tokenStreamCounter = 0;
final TokenStream[] tokenStreams = new TokenStream[] {
new CannedTokenStream(new Token[] {
token("wifi",1,1),
token("hotspot",0,2),
token("network",1,1),
token("is",1,1),
token("slow",1,1)
}),
new CannedTokenStream(new Token[] {
token("wi",1,1),
token("hotspot",0,3),
token("fi",1,1),
token("network",1,1),
token("is",1,1),
token("fast",1,1)
}),
new CannedTokenStream(new Token[] {
token("wifi",1,1),
token("hotspot",0,2),
token("network",1,1)
}),
};
@Override
public TokenStream getTokenStream() {
TokenStream result = tokenStreams[tokenStreamCounter];
tokenStreamCounter++;
return result;
}
@Override
protected void setReader(final Reader reader) {
}
};
}
};
final Analyzer analyzer = new AnalyzingSuggesterTest.MultiCannedAnalyzer(
new CannedTokenStream(
token("wifi", 1, 1),
token("hotspot", 0, 2),
token("network", 1, 1),
token("is", 1, 1),
token("slow", 1, 1)),
new CannedTokenStream(
token("wi", 1, 1),
token("hotspot", 0, 3),
token("fi", 1, 1),
token("network", 1, 1),
token("is", 1, 1),
token("fast", 1, 1)),
new CannedTokenStream(
token("wifi", 1, 1),
token("hotspot",0,2),
token("network",1,1)));
Input keys[] = new Input[] {
new Input("wifi network is slow", 50),
@ -326,43 +298,18 @@ public class FuzzySuggesterTest extends LuceneTestCase {
// using the above map, so that the suggest module does not need a dependency on the
// synonym module
final Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
return new TokenStreamComponents(tokenizer) {
int tokenStreamCounter = 0;
final TokenStream[] tokenStreams = new TokenStream[] {
new CannedTokenStream(new Token[] {
token("ab",1,1),
token("ba",0,1),
token("xc",1,1)
}),
new CannedTokenStream(new Token[] {
token("ba",1,1),
token("xd",1,1)
}),
new CannedTokenStream(new Token[] {
token("ab",1,1),
token("ba",0,1),
token("x",1,1)
})
};
@Override
public TokenStream getTokenStream() {
TokenStream result = tokenStreams[tokenStreamCounter];
tokenStreamCounter++;
return result;
}
@Override
protected void setReader(final Reader reader) {
}
};
}
};
final Analyzer analyzer = new AnalyzingSuggesterTest.MultiCannedAnalyzer(
new CannedTokenStream(
token("ab", 1, 1),
token("ba", 0, 1),
token("xc", 1, 1)),
new CannedTokenStream(
token("ba", 1, 1),
token("xd", 1, 1)),
new CannedTokenStream(
token("ab", 1, 1),
token("ba", 0, 1),
token("x", 1, 1)));
Input keys[] = new Input[] {
new Input("ab xc", 50),
@ -399,41 +346,17 @@ public class FuzzySuggesterTest extends LuceneTestCase {
ts.end();
ts.close();
}
*/
*/
private final Analyzer getUnusualAnalyzer() {
return new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
return new TokenStreamComponents(tokenizer) {
int count;
@Override
public TokenStream getTokenStream() {
// 4th time we are called, return tokens a b,
// else just a:
if (count++ != 3) {
return new CannedTokenStream(new Token[] {
token("a", 1, 1),
});
} else {
// After that "a b":
return new CannedTokenStream(new Token[] {
token("a", 1, 1),
token("b", 1, 1),
});
}
}
@Override
protected void setReader(final Reader reader) {
}
};
}
};
private Analyzer getUnusualAnalyzer() {
// First three calls just returns "a", then returns ["a","b"], then "a" again
return new AnalyzingSuggesterTest.MultiCannedAnalyzer(
new CannedTokenStream(token("a", 1, 1)),
new CannedTokenStream(token("a", 1, 1)),
new CannedTokenStream(token("a", 1, 1)),
new CannedTokenStream(token("a", 1, 1), token("b", 1, 1)),
new CannedTokenStream(token("a", 1, 1)),
new CannedTokenStream(token("a", 1, 1)));
}
public void testExactFirst() throws Exception {

View File

@ -57,7 +57,7 @@ public final class CannedBinaryTokenStream extends TokenStream {
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
public CannedBinaryTokenStream(BinaryToken... tokens) {
super();
super(Token.TOKEN_ATTRIBUTE_FACTORY);
this.tokens = tokens;
}

View File

@ -52,7 +52,13 @@ public final class CannedTokenStream extends TokenStream {
posIncrAtt.setPositionIncrement(finalPosInc);
offsetAtt.setOffset(finalOffset, finalOffset);
}
@Override
public void reset() throws IOException {
upto = 0;
super.reset();
}
@Override
public boolean incrementToken() {
if (upto < tokens.length) {

View File

@ -17,21 +17,16 @@
package org.apache.solr.schema;
import java.io.IOException;
import java.io.Reader;
import java.lang.invoke.MethodHandles;
import java.util.Map;
import org.apache.commons.io.IOUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.spatial.prefix.PrefixTreeStrategy;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTreeFactory;
import org.apache.lucene.spatial.query.SpatialArgsParser;
import org.apache.solr.util.MapListener;
import org.locationtech.spatial4j.shape.Shape;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@ -80,29 +75,19 @@ public abstract class AbstractSpatialPrefixTreeFieldType<T extends PrefixTreeStr
* so that the analysis UI will show reasonable tokens.
*/
@Override
public Analyzer getIndexAnalyzer()
{
public Analyzer getIndexAnalyzer() {
return new Analyzer() {
@Override
protected TokenStreamComponents createComponents(final String fieldName) {
return new TokenStreamComponents(new KeywordTokenizer()) {
private Shape shape = null;
protected void setReader(final Reader reader) {
source.setReader(reader);
try {
shape = parseShape(IOUtils.toString(reader));
} catch (IOException e) {
throw new RuntimeException(e);
}
protected TokenStreamComponents createComponents(String fieldName) {
PrefixTreeStrategy s = newSpatialStrategy(fieldName == null ? getTypeName() : fieldName);
PrefixTreeStrategy.ShapeTokenStream ts = s.tokenStream();
return new TokenStreamComponents(r -> {
try {
ts.setShape(parseShape(IOUtils.toString(r)));
} catch (IOException e) {
throw new RuntimeException(e);
}
public TokenStream getTokenStream() {
PrefixTreeStrategy s = newSpatialStrategy(fieldName==null ? getTypeName() : fieldName);
return s.createIndexableFields(shape)[0].tokenStreamValue();
}
};
}, ts);
}
};
}

View File

@ -374,18 +374,13 @@ public class PreAnalyzedField extends TextField implements HasImplicitIndexAnaly
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final PreAnalyzedTokenizer tokenizer = new PreAnalyzedTokenizer(parser);
return new TokenStreamComponents(tokenizer) {
@Override
protected void setReader(final Reader reader) {
super.setReader(reader);
try {
tokenizer.decodeInput(reader);
} catch (IOException e) {
// save this exception for reporting when reset() is called
tokenizer.setReaderConsumptionException(e);
}
return new TokenStreamComponents(r -> {
try {
tokenizer.decodeInput(r);
} catch (IOException e) {
tokenizer.setReaderConsumptionException(e);
}
};
}, tokenizer);
}
}
}

View File

@ -432,7 +432,7 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
NamedList<NamedList> result = handler.handleAnalysisRequest(request, h.getCore().getLatestSchema());
NamedList<List<NamedList>> tokens = (NamedList<List<NamedList>>)
((NamedList)result.get("field_types").get("location_rpt")).get("index");
List<NamedList> tokenList = tokens.get("org.apache.lucene.spatial.prefix.BytesRefIteratorTokenStream");
List<NamedList> tokenList = tokens.get("org.apache.lucene.spatial.prefix.PrefixTreeStrategy$ShapeTokenStream");
List<String> vals = new ArrayList<>(tokenList.size());