mirror of
https://github.com/apache/lucene.git
synced 2025-02-10 20:15:18 +00:00
LUCENE-8352: Make TokenStreamComponents final
This commit is contained in:
parent
10060a6237
commit
c696cafc0d
@ -78,6 +78,9 @@ API Changes
|
||||
|
||||
* LUCENE-8483: Scorers may no longer take null as a Weight (Alan Woodward)
|
||||
|
||||
* LUCENE-8352: TokenStreamComponents is now final, and can take a Consumer<Reader>
|
||||
in its constructor (Mark Harwood, Alan Woodward, Adrien Grand)
|
||||
|
||||
Changes in Runtime Behavior
|
||||
|
||||
* LUCENE-8333: Switch MoreLikeThis.setMaxDocFreqPct to use maxDoc instead of
|
||||
|
@ -123,3 +123,9 @@ be replaced with a Scorable instead.
|
||||
instead of long at suggest time ##
|
||||
|
||||
Most code should just require recompilation, though possibly requiring some added casts.
|
||||
|
||||
## TokenStreamComponents is now final ##
|
||||
|
||||
Instead of overriding TokenStreamComponents#setReader() to customise analyzer
|
||||
initialisation, you should now pass a Consumer<Reader> instance to the
|
||||
TokenStreamComponents constructor.
|
||||
|
@ -59,7 +59,7 @@ public final class LimitTokenCountAnalyzer extends AnalyzerWrapper {
|
||||
|
||||
@Override
|
||||
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
|
||||
return new TokenStreamComponents(components.getTokenizer(),
|
||||
return new TokenStreamComponents(components.getSource(),
|
||||
new LimitTokenCountFilter(components.getTokenStream(), maxTokenCount, consumeAllTokens));
|
||||
}
|
||||
|
||||
|
@ -169,7 +169,7 @@ public final class QueryAutoStopWordAnalyzer extends AnalyzerWrapper {
|
||||
}
|
||||
StopFilter stopFilter = new StopFilter(components.getTokenStream(),
|
||||
new CharArraySet(stopWords, false));
|
||||
return new TokenStreamComponents(components.getTokenizer(), stopFilter);
|
||||
return new TokenStreamComponents(components.getSource(), stopFilter);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -159,6 +159,6 @@ public final class ShingleAnalyzerWrapper extends AnalyzerWrapper {
|
||||
filter.setOutputUnigrams(outputUnigrams);
|
||||
filter.setOutputUnigramsIfNoShingles(outputUnigramsIfNoShingles);
|
||||
filter.setFillerToken(fillerToken);
|
||||
return new TokenStreamComponents(components.getTokenizer(), filter);
|
||||
return new TokenStreamComponents(components.getSource(), filter);
|
||||
}
|
||||
}
|
||||
|
@ -92,13 +92,10 @@ public final class ClassicAnalyzer extends StopwordAnalyzerBase {
|
||||
TokenStream tok = new ClassicFilter(src);
|
||||
tok = new LowerCaseFilter(tok);
|
||||
tok = new StopFilter(tok, stopwords);
|
||||
return new TokenStreamComponents(src, tok) {
|
||||
@Override
|
||||
protected void setReader(final Reader reader) {
|
||||
src.setMaxTokenLength(ClassicAnalyzer.this.maxTokenLength);
|
||||
super.setReader(reader);
|
||||
}
|
||||
};
|
||||
return new TokenStreamComponents(r -> {
|
||||
src.setMaxTokenLength(ClassicAnalyzer.this.maxTokenLength);
|
||||
src.setReader(r);
|
||||
}, tok);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -88,15 +88,10 @@ public final class UAX29URLEmailAnalyzer extends StopwordAnalyzerBase {
|
||||
src.setMaxTokenLength(maxTokenLength);
|
||||
TokenStream tok = new LowerCaseFilter(src);
|
||||
tok = new StopFilter(tok, stopwords);
|
||||
return new TokenStreamComponents(src, tok) {
|
||||
@Override
|
||||
protected void setReader(final Reader reader) {
|
||||
// So that if maxTokenLength was changed, the change takes
|
||||
// effect next time tokenStream is called:
|
||||
src.setMaxTokenLength(UAX29URLEmailAnalyzer.this.maxTokenLength);
|
||||
super.setReader(reader);
|
||||
}
|
||||
};
|
||||
return new TokenStreamComponents(r -> {
|
||||
src.setMaxTokenLength(UAX29URLEmailAnalyzer.this.maxTokenLength);
|
||||
src.setReader(r);
|
||||
}, tok);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -107,7 +107,7 @@ public class TestPerFieldAnalyzerWrapper extends BaseTokenStreamTestCase {
|
||||
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
|
||||
assertNotSame(specialAnalyzer.tokenStream("special", text), components.getTokenStream());
|
||||
TokenFilter filter = new ASCIIFoldingFilter(components.getTokenStream());
|
||||
return new TokenStreamComponents(components.getTokenizer(), filter);
|
||||
return new TokenStreamComponents(components.getSource(), filter);
|
||||
}
|
||||
};
|
||||
ts3 = wrapper2.tokenStream("special", text);
|
||||
|
@ -23,6 +23,7 @@ import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
@ -355,16 +356,16 @@ public abstract class Analyzer implements Closeable {
|
||||
|
||||
/**
|
||||
* This class encapsulates the outer components of a token stream. It provides
|
||||
* access to the source ({@link Tokenizer}) and the outer end (sink), an
|
||||
* access to the source (a {@link Reader} {@link Consumer} and the outer end (sink), an
|
||||
* instance of {@link TokenFilter} which also serves as the
|
||||
* {@link TokenStream} returned by
|
||||
* {@link Analyzer#tokenStream(String, Reader)}.
|
||||
*/
|
||||
public static class TokenStreamComponents {
|
||||
public static final class TokenStreamComponents {
|
||||
/**
|
||||
* Original source of the tokens.
|
||||
*/
|
||||
protected final Tokenizer source;
|
||||
protected final Consumer<Reader> source;
|
||||
/**
|
||||
* Sink tokenstream, such as the outer tokenfilter decorating
|
||||
* the chain. This can be the source if there are no filters.
|
||||
@ -378,25 +379,30 @@ public abstract class Analyzer implements Closeable {
|
||||
* Creates a new {@link TokenStreamComponents} instance.
|
||||
*
|
||||
* @param source
|
||||
* the analyzer's tokenizer
|
||||
* the source to set the reader on
|
||||
* @param result
|
||||
* the analyzer's resulting token stream
|
||||
*/
|
||||
public TokenStreamComponents(final Tokenizer source,
|
||||
public TokenStreamComponents(final Consumer<Reader> source,
|
||||
final TokenStream result) {
|
||||
this.source = source;
|
||||
this.sink = result;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Creates a new {@link TokenStreamComponents} instance.
|
||||
*
|
||||
* @param source
|
||||
* the analyzer's tokenizer
|
||||
* Creates a new {@link TokenStreamComponents} instance
|
||||
* @param tokenizer the analyzer's Tokenizer
|
||||
* @param result the analyzer's resulting token stream
|
||||
*/
|
||||
public TokenStreamComponents(final Tokenizer source) {
|
||||
this.source = source;
|
||||
this.sink = source;
|
||||
public TokenStreamComponents(final Tokenizer tokenizer, final TokenStream result) {
|
||||
this(tokenizer::setReader, result);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new {@link TokenStreamComponents} from a Tokenizer
|
||||
*/
|
||||
public TokenStreamComponents(final Tokenizer tokenizer) {
|
||||
this(tokenizer::setReader, tokenizer);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -406,8 +412,8 @@ public abstract class Analyzer implements Closeable {
|
||||
* @param reader
|
||||
* a reader to reset the source component
|
||||
*/
|
||||
protected void setReader(final Reader reader) {
|
||||
source.setReader(reader);
|
||||
private void setReader(final Reader reader) {
|
||||
source.accept(reader);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -420,11 +426,9 @@ public abstract class Analyzer implements Closeable {
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the component's {@link Tokenizer}
|
||||
*
|
||||
* @return Component's {@link Tokenizer}
|
||||
* Returns the component's source
|
||||
*/
|
||||
public Tokenizer getTokenizer() {
|
||||
public Consumer<Reader> getSource() {
|
||||
return source;
|
||||
}
|
||||
}
|
||||
|
@ -81,15 +81,10 @@ public final class StandardAnalyzer extends StopwordAnalyzerBase {
|
||||
src.setMaxTokenLength(maxTokenLength);
|
||||
TokenStream tok = new LowerCaseFilter(src);
|
||||
tok = new StopFilter(tok, stopwords);
|
||||
return new TokenStreamComponents(src, tok) {
|
||||
@Override
|
||||
protected void setReader(final Reader reader) {
|
||||
// So that if maxTokenLength was changed, the change takes
|
||||
// effect next time tokenStream is called:
|
||||
src.setMaxTokenLength(StandardAnalyzer.this.maxTokenLength);
|
||||
super.setReader(reader);
|
||||
}
|
||||
};
|
||||
return new TokenStreamComponents(r -> {
|
||||
src.setMaxTokenLength(StandardAnalyzer.this.maxTokenLength);
|
||||
src.setReader(r);
|
||||
}, tok);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -0,0 +1,58 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.analysis;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestAnalyzerWrapper extends LuceneTestCase {
|
||||
|
||||
public void testSourceDelegation() throws IOException {
|
||||
|
||||
AtomicBoolean sourceCalled = new AtomicBoolean(false);
|
||||
|
||||
Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
return new TokenStreamComponents(r -> {
|
||||
sourceCalled.set(true);
|
||||
}, new CannedTokenStream());
|
||||
}
|
||||
};
|
||||
|
||||
Analyzer wrapped = new AnalyzerWrapper(analyzer.getReuseStrategy()) {
|
||||
@Override
|
||||
protected Analyzer getWrappedAnalyzer(String fieldName) {
|
||||
return analyzer;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
|
||||
return new TokenStreamComponents(components.getSource(), new LowerCaseFilter(components.getTokenStream()));
|
||||
}
|
||||
};
|
||||
|
||||
try (TokenStream ts = wrapped.tokenStream("", "text")) {
|
||||
assertTrue(sourceCalled.get());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
@ -28,7 +28,7 @@ import org.apache.lucene.util.BytesRefIterator;
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
class BytesRefIteratorTokenStream extends TokenStream {
|
||||
public class BytesRefIteratorTokenStream extends TokenStream {
|
||||
|
||||
public BytesRefIterator getBytesRefIterator() {
|
||||
return bytesIter;
|
||||
|
@ -157,6 +157,23 @@ public abstract class PrefixTreeStrategy extends SpatialStrategy {
|
||||
return new Field[]{field};
|
||||
}
|
||||
|
||||
public class ShapeTokenStream extends BytesRefIteratorTokenStream {
|
||||
|
||||
public void setShape(Shape shape) {
|
||||
double distErr = SpatialArgs.calcDistanceFromErrPct(shape, distErrPct, ctx);
|
||||
int detailLevel = grid.getLevelForDistance(distErr);
|
||||
Iterator<Cell> cells = createCellIteratorToIndex(shape, detailLevel, null);
|
||||
CellToBytesRefIterator cellToBytesRefIterator = newCellToBytesRefIterator();
|
||||
cellToBytesRefIterator.reset(cells);
|
||||
setBytesRefIterator(cellToBytesRefIterator);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public ShapeTokenStream tokenStream() {
|
||||
return new ShapeTokenStream();
|
||||
}
|
||||
|
||||
protected CellToBytesRefIterator newCellToBytesRefIterator() {
|
||||
//subclasses could return one that never emits leaves, or does both, or who knows.
|
||||
return new CellToBytesRefIterator();
|
||||
|
@ -360,7 +360,7 @@ public class AnalyzingInfixSuggester extends Lookup implements Closeable {
|
||||
if (fieldName.equals(TEXTGRAMS_FIELD_NAME) && minPrefixChars > 0) {
|
||||
// TODO: should use an EdgeNGramTokenFilterFactory here
|
||||
TokenFilter filter = new EdgeNGramTokenFilter(components.getTokenStream(), 1, minPrefixChars, false);
|
||||
return new TokenStreamComponents(components.getTokenizer(), filter);
|
||||
return new TokenStreamComponents(components.getSource(), filter);
|
||||
} else {
|
||||
return components;
|
||||
}
|
||||
|
@ -236,7 +236,7 @@ public class FreeTextSuggester extends Lookup implements Accountable {
|
||||
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
|
||||
ShingleFilter shingles = new ShingleFilter(components.getTokenStream(), 2, grams);
|
||||
shingles.setTokenSeparator(Character.toString((char) separator));
|
||||
return new TokenStreamComponents(components.getTokenizer(), shingles);
|
||||
return new TokenStreamComponents(components.getSource(), shingles);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -132,6 +132,6 @@ public final class CompletionAnalyzer extends AnalyzerWrapper {
|
||||
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
|
||||
CompletionTokenStream tokenStream = new CompletionTokenStream(components.getTokenStream(),
|
||||
preserveSep, preservePositionIncrements, maxGraphExpansions);
|
||||
return new TokenStreamComponents(components.getTokenizer(), tokenStream);
|
||||
return new TokenStreamComponents(components.getSource(), tokenStream);
|
||||
}
|
||||
}
|
||||
|
@ -44,6 +44,7 @@ import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.BytesTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
@ -303,52 +304,82 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
||||
IOUtils.close(a, tempDir);
|
||||
}
|
||||
|
||||
static final class MultiCannedTokenizer extends Tokenizer {
|
||||
|
||||
int counter = -1;
|
||||
final TokenStream[] tokenStreams;
|
||||
|
||||
MultiCannedTokenizer(TokenStream... tokenStreams) {
|
||||
super(tokenStreams[0].getAttributeFactory());
|
||||
this.tokenStreams = tokenStreams;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
if (tokenStreams[counter].incrementToken() == false) {
|
||||
return false;
|
||||
}
|
||||
this.restoreState(tokenStreams[counter].captureState());
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
tokenStreams[counter].reset();
|
||||
}
|
||||
}
|
||||
|
||||
static final class MultiCannedAnalyzer extends Analyzer {
|
||||
|
||||
final MultiCannedTokenizer tokenizer;
|
||||
|
||||
MultiCannedAnalyzer(TokenStream... tokenStreams) {
|
||||
this(false, tokenStreams);
|
||||
}
|
||||
|
||||
MultiCannedAnalyzer(boolean addBytesAtt, TokenStream... tokenStreams) {
|
||||
this.tokenizer = new MultiCannedTokenizer(tokenStreams);
|
||||
if (addBytesAtt) {
|
||||
this.tokenizer.addAttribute(BytesTermAttribute.class);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
tokenizer.counter = 0;
|
||||
return new TokenStreamComponents(tokenizer);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Reader initReader(String fieldName, Reader reader) {
|
||||
tokenizer.counter++;
|
||||
if (tokenizer.counter >= tokenizer.tokenStreams.length) {
|
||||
tokenizer.counter = tokenizer.tokenStreams.length - 1;
|
||||
}
|
||||
return super.initReader(fieldName, reader);
|
||||
}
|
||||
}
|
||||
|
||||
public void testGraphDups() throws Exception {
|
||||
|
||||
final Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
|
||||
|
||||
return new TokenStreamComponents(tokenizer) {
|
||||
int tokenStreamCounter = 0;
|
||||
final TokenStream[] tokenStreams = new TokenStream[] {
|
||||
new CannedTokenStream(new Token[] {
|
||||
token("wifi",1,1),
|
||||
token("hotspot",0,2),
|
||||
token("network",1,1),
|
||||
token("is",1,1),
|
||||
token("slow",1,1)
|
||||
}),
|
||||
new CannedTokenStream(new Token[] {
|
||||
token("wi",1,1),
|
||||
token("hotspot",0,3),
|
||||
token("fi",1,1),
|
||||
token("network",1,1),
|
||||
token("is",1,1),
|
||||
token("fast",1,1)
|
||||
|
||||
}),
|
||||
new CannedTokenStream(new Token[] {
|
||||
token("wifi",1,1),
|
||||
token("hotspot",0,2),
|
||||
token("network",1,1)
|
||||
}),
|
||||
};
|
||||
|
||||
@Override
|
||||
public TokenStream getTokenStream() {
|
||||
TokenStream result = tokenStreams[tokenStreamCounter];
|
||||
tokenStreamCounter++;
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setReader(final Reader reader) {
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
final Analyzer analyzer = new MultiCannedAnalyzer(
|
||||
new CannedTokenStream(
|
||||
token("wifi",1,1),
|
||||
token("hotspot",0,2),
|
||||
token("network",1,1),
|
||||
token("is",1,1),
|
||||
token("slow",1,1)),
|
||||
new CannedTokenStream(
|
||||
token("wi",1,1),
|
||||
token("hotspot",0,3),
|
||||
token("fi",1,1),
|
||||
token("network",1,1),
|
||||
token("is",1,1),
|
||||
token("fast",1,1)),
|
||||
new CannedTokenStream(
|
||||
token("wifi",1,1),
|
||||
token("hotspot",0,2),
|
||||
token("network",1,1)));
|
||||
|
||||
Input keys[] = new Input[] {
|
||||
new Input("wifi network is slow", 50),
|
||||
@ -378,45 +409,20 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
||||
|
||||
// The Analyzer below mimics the functionality of the SynonymAnalyzer
|
||||
// using the above map, so that the suggest module does not need a dependency on the
|
||||
// synonym module
|
||||
// synonym module
|
||||
|
||||
final Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
|
||||
|
||||
return new TokenStreamComponents(tokenizer) {
|
||||
int tokenStreamCounter = 0;
|
||||
final TokenStream[] tokenStreams = new TokenStream[] {
|
||||
new CannedTokenStream(new Token[] {
|
||||
token("ab",1,1),
|
||||
token("ba",0,1),
|
||||
token("xc",1,1)
|
||||
}),
|
||||
new CannedTokenStream(new Token[] {
|
||||
token("ba",1,1),
|
||||
token("xd",1,1)
|
||||
}),
|
||||
new CannedTokenStream(new Token[] {
|
||||
token("ab",1,1),
|
||||
token("ba",0,1),
|
||||
token("x",1,1)
|
||||
})
|
||||
};
|
||||
|
||||
@Override
|
||||
public TokenStream getTokenStream() {
|
||||
TokenStream result = tokenStreams[tokenStreamCounter];
|
||||
tokenStreamCounter++;
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setReader(final Reader reader) {
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
final Analyzer analyzer = new MultiCannedAnalyzer(
|
||||
new CannedTokenStream(
|
||||
token("ab", 1, 1),
|
||||
token("ba", 0, 1),
|
||||
token("xc", 1, 1)),
|
||||
new CannedTokenStream(
|
||||
token("ba", 1, 1),
|
||||
token("xd", 1, 1)),
|
||||
new CannedTokenStream(
|
||||
token("ab",1,1),
|
||||
token("ba",0,1),
|
||||
token("x",1,1)));
|
||||
|
||||
Input keys[] = new Input[] {
|
||||
new Input("ab xc", 50),
|
||||
@ -426,7 +432,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", analyzer);
|
||||
suggester.build(new InputArrayIterator(keys));
|
||||
List<LookupResult> results = suggester.lookup("ab x", false, 1);
|
||||
assertTrue(results.size() == 1);
|
||||
assertEquals(1, results.size());
|
||||
IOUtils.close(analyzer, tempDir);
|
||||
}
|
||||
|
||||
@ -459,39 +465,15 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
||||
}
|
||||
*/
|
||||
|
||||
private final Analyzer getUnusualAnalyzer() {
|
||||
return new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
|
||||
|
||||
return new TokenStreamComponents(tokenizer) {
|
||||
|
||||
int count;
|
||||
|
||||
@Override
|
||||
public TokenStream getTokenStream() {
|
||||
// 4th time we are called, return tokens a b,
|
||||
// else just a:
|
||||
if (count++ != 3) {
|
||||
return new CannedTokenStream(new Token[] {
|
||||
token("a", 1, 1),
|
||||
});
|
||||
} else {
|
||||
// After that "a b":
|
||||
return new CannedTokenStream(new Token[] {
|
||||
token("a", 1, 1),
|
||||
token("b", 1, 1),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setReader(final Reader reader) {
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
private Analyzer getUnusualAnalyzer() {
|
||||
// First three calls just returns "a", then returns ["a","b"], then "a" again
|
||||
return new MultiCannedAnalyzer(
|
||||
new CannedTokenStream(token("a", 1, 1)),
|
||||
new CannedTokenStream(token("a", 1, 1)),
|
||||
new CannedTokenStream(token("a", 1, 1)),
|
||||
new CannedTokenStream(token("a", 1, 1), token("b", 1, 1)),
|
||||
new CannedTokenStream(token("a", 1, 1)),
|
||||
new CannedTokenStream(token("a", 1, 1)));
|
||||
}
|
||||
|
||||
public void testExactFirst() throws Exception {
|
||||
@ -999,23 +981,10 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
||||
Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
|
||||
|
||||
return new TokenStreamComponents(tokenizer) {
|
||||
|
||||
@Override
|
||||
public TokenStream getTokenStream() {
|
||||
return new CannedTokenStream(new Token[] {
|
||||
token("hairy", 1, 1),
|
||||
token("smelly", 0, 1),
|
||||
token("dog", 1, 1),
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setReader(final Reader reader) {
|
||||
}
|
||||
};
|
||||
return new TokenStreamComponents(r -> {}, new CannedTokenStream(
|
||||
token("hairy", 1, 1),
|
||||
token("smelly", 0, 1),
|
||||
token("dog", 1, 1)));
|
||||
}
|
||||
};
|
||||
|
||||
@ -1056,38 +1025,15 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
||||
}
|
||||
|
||||
public void testDupSurfaceFormsMissingResults2() throws Exception {
|
||||
Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
|
||||
|
||||
return new TokenStreamComponents(tokenizer) {
|
||||
|
||||
int count;
|
||||
|
||||
@Override
|
||||
public TokenStream getTokenStream() {
|
||||
if (count == 0) {
|
||||
count++;
|
||||
return new CannedTokenStream(new Token[] {
|
||||
token("p", 1, 1),
|
||||
token("q", 1, 1),
|
||||
token("r", 0, 1),
|
||||
token("s", 0, 1),
|
||||
});
|
||||
} else {
|
||||
return new CannedTokenStream(new Token[] {
|
||||
token("p", 1, 1),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setReader(final Reader reader) {
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
Analyzer a = new MultiCannedAnalyzer(
|
||||
new CannedTokenStream(
|
||||
token("p", 1, 1),
|
||||
token("q", 1, 1),
|
||||
token("r", 0, 1),
|
||||
token("s", 0, 1)),
|
||||
new CannedTokenStream(token("p", 1, 1)),
|
||||
new CannedTokenStream(token("p", 1, 1)),
|
||||
new CannedTokenStream(token("p", 1, 1)));
|
||||
|
||||
Directory tempDir = getDirectory();
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, 0, 256, -1, true);
|
||||
@ -1131,24 +1077,7 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
||||
* and checks that they come back in surface-form order.
|
||||
*/
|
||||
public void testTieBreakOnSurfaceForm() throws Exception {
|
||||
Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
|
||||
|
||||
return new TokenStreamComponents(tokenizer) {
|
||||
@Override
|
||||
public TokenStream getTokenStream() {
|
||||
return new CannedTokenStream(new Token[] {
|
||||
token("dog", 1, 1)
|
||||
});
|
||||
}
|
||||
@Override
|
||||
protected void setReader(final Reader reader) {
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
Analyzer a = new MultiCannedAnalyzer(new CannedTokenStream(token("dog", 1, 1)));
|
||||
|
||||
Directory tempDir = getDirectory();
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, 0, 256, -1, true);
|
||||
@ -1187,41 +1116,12 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
||||
}
|
||||
|
||||
public void test0ByteKeys() throws Exception {
|
||||
final Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
|
||||
|
||||
return new TokenStreamComponents(tokenizer) {
|
||||
int tokenStreamCounter = 0;
|
||||
final TokenStream[] tokenStreams = new TokenStream[] {
|
||||
new CannedBinaryTokenStream(new BinaryToken[] {
|
||||
token(new BytesRef(new byte[] {0x0, 0x0, 0x0})),
|
||||
}),
|
||||
new CannedBinaryTokenStream(new BinaryToken[] {
|
||||
token(new BytesRef(new byte[] {0x0, 0x0})),
|
||||
}),
|
||||
new CannedBinaryTokenStream(new BinaryToken[] {
|
||||
token(new BytesRef(new byte[] {0x0, 0x0, 0x0})),
|
||||
}),
|
||||
new CannedBinaryTokenStream(new BinaryToken[] {
|
||||
token(new BytesRef(new byte[] {0x0, 0x0})),
|
||||
}),
|
||||
};
|
||||
|
||||
@Override
|
||||
public TokenStream getTokenStream() {
|
||||
TokenStream result = tokenStreams[tokenStreamCounter];
|
||||
tokenStreamCounter++;
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setReader(final Reader reader) {
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
final Analyzer a = new MultiCannedAnalyzer(true,
|
||||
new CannedBinaryTokenStream(token(new BytesRef(new byte[] {0x0, 0x0, 0x0}))),
|
||||
new CannedBinaryTokenStream(token(new BytesRef(new byte[] {0x0, 0x0}))),
|
||||
new CannedBinaryTokenStream(token(new BytesRef(new byte[] {0x0, 0x0, 0x0}))),
|
||||
new CannedBinaryTokenStream(token(new BytesRef(new byte[] {0x0, 0x0})))
|
||||
);
|
||||
|
||||
Directory tempDir = getDirectory();
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, 0, 256, -1, true);
|
||||
@ -1265,26 +1165,13 @@ public class AnalyzingSuggesterTest extends LuceneTestCase {
|
||||
public void testTooManyExpansions() throws Exception {
|
||||
|
||||
final Analyzer a = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
|
||||
|
||||
return new TokenStreamComponents(tokenizer) {
|
||||
@Override
|
||||
public TokenStream getTokenStream() {
|
||||
Token a = new Token("a", 0, 1);
|
||||
a.setPositionIncrement(1);
|
||||
Token b = new Token("b", 0, 1);
|
||||
b.setPositionIncrement(0);
|
||||
return new CannedTokenStream(new Token[] {a, b});
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setReader(final Reader reader) {
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
return new TokenStreamComponents(r -> {}, new CannedTokenStream(
|
||||
new Token("a", 0, 1),
|
||||
new Token("b", 0, 0, 1)));
|
||||
}
|
||||
};
|
||||
|
||||
Directory tempDir = getDirectory();
|
||||
AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, 0, 256, 1, true);
|
||||
|
@ -17,7 +17,6 @@
|
||||
package org.apache.lucene.search.suggest.analyzing;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
@ -36,7 +35,6 @@ import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.TokenStreamToAutomaton;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.search.suggest.Input;
|
||||
@ -240,50 +238,24 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
||||
|
||||
public void testGraphDups() throws Exception {
|
||||
|
||||
final Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
|
||||
|
||||
return new TokenStreamComponents(tokenizer) {
|
||||
int tokenStreamCounter = 0;
|
||||
final TokenStream[] tokenStreams = new TokenStream[] {
|
||||
new CannedTokenStream(new Token[] {
|
||||
token("wifi",1,1),
|
||||
token("hotspot",0,2),
|
||||
token("network",1,1),
|
||||
token("is",1,1),
|
||||
token("slow",1,1)
|
||||
}),
|
||||
new CannedTokenStream(new Token[] {
|
||||
token("wi",1,1),
|
||||
token("hotspot",0,3),
|
||||
token("fi",1,1),
|
||||
token("network",1,1),
|
||||
token("is",1,1),
|
||||
token("fast",1,1)
|
||||
|
||||
}),
|
||||
new CannedTokenStream(new Token[] {
|
||||
token("wifi",1,1),
|
||||
token("hotspot",0,2),
|
||||
token("network",1,1)
|
||||
}),
|
||||
};
|
||||
|
||||
@Override
|
||||
public TokenStream getTokenStream() {
|
||||
TokenStream result = tokenStreams[tokenStreamCounter];
|
||||
tokenStreamCounter++;
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setReader(final Reader reader) {
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
final Analyzer analyzer = new AnalyzingSuggesterTest.MultiCannedAnalyzer(
|
||||
new CannedTokenStream(
|
||||
token("wifi", 1, 1),
|
||||
token("hotspot", 0, 2),
|
||||
token("network", 1, 1),
|
||||
token("is", 1, 1),
|
||||
token("slow", 1, 1)),
|
||||
new CannedTokenStream(
|
||||
token("wi", 1, 1),
|
||||
token("hotspot", 0, 3),
|
||||
token("fi", 1, 1),
|
||||
token("network", 1, 1),
|
||||
token("is", 1, 1),
|
||||
token("fast", 1, 1)),
|
||||
new CannedTokenStream(
|
||||
token("wifi", 1, 1),
|
||||
token("hotspot",0,2),
|
||||
token("network",1,1)));
|
||||
|
||||
Input keys[] = new Input[] {
|
||||
new Input("wifi network is slow", 50),
|
||||
@ -326,43 +298,18 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
||||
// using the above map, so that the suggest module does not need a dependency on the
|
||||
// synonym module
|
||||
|
||||
final Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
|
||||
|
||||
return new TokenStreamComponents(tokenizer) {
|
||||
int tokenStreamCounter = 0;
|
||||
final TokenStream[] tokenStreams = new TokenStream[] {
|
||||
new CannedTokenStream(new Token[] {
|
||||
token("ab",1,1),
|
||||
token("ba",0,1),
|
||||
token("xc",1,1)
|
||||
}),
|
||||
new CannedTokenStream(new Token[] {
|
||||
token("ba",1,1),
|
||||
token("xd",1,1)
|
||||
}),
|
||||
new CannedTokenStream(new Token[] {
|
||||
token("ab",1,1),
|
||||
token("ba",0,1),
|
||||
token("x",1,1)
|
||||
})
|
||||
};
|
||||
|
||||
@Override
|
||||
public TokenStream getTokenStream() {
|
||||
TokenStream result = tokenStreams[tokenStreamCounter];
|
||||
tokenStreamCounter++;
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setReader(final Reader reader) {
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
final Analyzer analyzer = new AnalyzingSuggesterTest.MultiCannedAnalyzer(
|
||||
new CannedTokenStream(
|
||||
token("ab", 1, 1),
|
||||
token("ba", 0, 1),
|
||||
token("xc", 1, 1)),
|
||||
new CannedTokenStream(
|
||||
token("ba", 1, 1),
|
||||
token("xd", 1, 1)),
|
||||
new CannedTokenStream(
|
||||
token("ab", 1, 1),
|
||||
token("ba", 0, 1),
|
||||
token("x", 1, 1)));
|
||||
|
||||
Input keys[] = new Input[] {
|
||||
new Input("ab xc", 50),
|
||||
@ -399,41 +346,17 @@ public class FuzzySuggesterTest extends LuceneTestCase {
|
||||
ts.end();
|
||||
ts.close();
|
||||
}
|
||||
*/
|
||||
*/
|
||||
|
||||
private final Analyzer getUnusualAnalyzer() {
|
||||
return new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
|
||||
|
||||
return new TokenStreamComponents(tokenizer) {
|
||||
|
||||
int count;
|
||||
|
||||
@Override
|
||||
public TokenStream getTokenStream() {
|
||||
// 4th time we are called, return tokens a b,
|
||||
// else just a:
|
||||
if (count++ != 3) {
|
||||
return new CannedTokenStream(new Token[] {
|
||||
token("a", 1, 1),
|
||||
});
|
||||
} else {
|
||||
// After that "a b":
|
||||
return new CannedTokenStream(new Token[] {
|
||||
token("a", 1, 1),
|
||||
token("b", 1, 1),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void setReader(final Reader reader) {
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
private Analyzer getUnusualAnalyzer() {
|
||||
// First three calls just returns "a", then returns ["a","b"], then "a" again
|
||||
return new AnalyzingSuggesterTest.MultiCannedAnalyzer(
|
||||
new CannedTokenStream(token("a", 1, 1)),
|
||||
new CannedTokenStream(token("a", 1, 1)),
|
||||
new CannedTokenStream(token("a", 1, 1)),
|
||||
new CannedTokenStream(token("a", 1, 1), token("b", 1, 1)),
|
||||
new CannedTokenStream(token("a", 1, 1)),
|
||||
new CannedTokenStream(token("a", 1, 1)));
|
||||
}
|
||||
|
||||
public void testExactFirst() throws Exception {
|
||||
|
@ -57,7 +57,7 @@ public final class CannedBinaryTokenStream extends TokenStream {
|
||||
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
|
||||
public CannedBinaryTokenStream(BinaryToken... tokens) {
|
||||
super();
|
||||
super(Token.TOKEN_ATTRIBUTE_FACTORY);
|
||||
this.tokens = tokens;
|
||||
}
|
||||
|
||||
|
@ -52,7 +52,13 @@ public final class CannedTokenStream extends TokenStream {
|
||||
posIncrAtt.setPositionIncrement(finalPosInc);
|
||||
offsetAtt.setOffset(finalOffset, finalOffset);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
upto = 0;
|
||||
super.reset();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() {
|
||||
if (upto < tokens.length) {
|
||||
|
@ -17,21 +17,16 @@
|
||||
package org.apache.solr.schema;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.spatial.prefix.PrefixTreeStrategy;
|
||||
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
|
||||
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTreeFactory;
|
||||
import org.apache.lucene.spatial.query.SpatialArgsParser;
|
||||
import org.apache.solr.util.MapListener;
|
||||
|
||||
import org.locationtech.spatial4j.shape.Shape;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
@ -80,29 +75,19 @@ public abstract class AbstractSpatialPrefixTreeFieldType<T extends PrefixTreeStr
|
||||
* so that the analysis UI will show reasonable tokens.
|
||||
*/
|
||||
@Override
|
||||
public Analyzer getIndexAnalyzer()
|
||||
{
|
||||
public Analyzer getIndexAnalyzer() {
|
||||
return new Analyzer() {
|
||||
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(final String fieldName) {
|
||||
return new TokenStreamComponents(new KeywordTokenizer()) {
|
||||
private Shape shape = null;
|
||||
|
||||
protected void setReader(final Reader reader) {
|
||||
source.setReader(reader);
|
||||
try {
|
||||
shape = parseShape(IOUtils.toString(reader));
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
PrefixTreeStrategy s = newSpatialStrategy(fieldName == null ? getTypeName() : fieldName);
|
||||
PrefixTreeStrategy.ShapeTokenStream ts = s.tokenStream();
|
||||
return new TokenStreamComponents(r -> {
|
||||
try {
|
||||
ts.setShape(parseShape(IOUtils.toString(r)));
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
public TokenStream getTokenStream() {
|
||||
PrefixTreeStrategy s = newSpatialStrategy(fieldName==null ? getTypeName() : fieldName);
|
||||
return s.createIndexableFields(shape)[0].tokenStreamValue();
|
||||
}
|
||||
};
|
||||
}, ts);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -374,18 +374,13 @@ public class PreAnalyzedField extends TextField implements HasImplicitIndexAnaly
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
final PreAnalyzedTokenizer tokenizer = new PreAnalyzedTokenizer(parser);
|
||||
return new TokenStreamComponents(tokenizer) {
|
||||
@Override
|
||||
protected void setReader(final Reader reader) {
|
||||
super.setReader(reader);
|
||||
try {
|
||||
tokenizer.decodeInput(reader);
|
||||
} catch (IOException e) {
|
||||
// save this exception for reporting when reset() is called
|
||||
tokenizer.setReaderConsumptionException(e);
|
||||
}
|
||||
return new TokenStreamComponents(r -> {
|
||||
try {
|
||||
tokenizer.decodeInput(r);
|
||||
} catch (IOException e) {
|
||||
tokenizer.setReaderConsumptionException(e);
|
||||
}
|
||||
};
|
||||
}, tokenizer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -432,7 +432,7 @@ public class FieldAnalysisRequestHandlerTest extends AnalysisRequestHandlerTestB
|
||||
NamedList<NamedList> result = handler.handleAnalysisRequest(request, h.getCore().getLatestSchema());
|
||||
NamedList<List<NamedList>> tokens = (NamedList<List<NamedList>>)
|
||||
((NamedList)result.get("field_types").get("location_rpt")).get("index");
|
||||
List<NamedList> tokenList = tokens.get("org.apache.lucene.spatial.prefix.BytesRefIteratorTokenStream");
|
||||
List<NamedList> tokenList = tokens.get("org.apache.lucene.spatial.prefix.PrefixTreeStrategy$ShapeTokenStream");
|
||||
|
||||
|
||||
List<String> vals = new ArrayList<>(tokenList.size());
|
||||
|
Loading…
x
Reference in New Issue
Block a user