mirror of https://github.com/apache/lucene.git
LUCENE-3766: Remove Tokenizer's default ctor
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1242890 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
18a1d40283
commit
590741dcfe
|
@ -784,6 +784,11 @@ API Changes
|
|||
Deprecated StandardAnalyzer and ClassicAnalyzer ctors that take File,
|
||||
please use the Reader ctor instead. (Robert Muir)
|
||||
|
||||
* LUCENE-3766: Deprecate no-arg ctors of Tokenizer. Tokenizers are
|
||||
TokenStreams with Readers: tokenizers with null Readers will not be
|
||||
supported in Lucene 4.0, just use a TokenStream.
|
||||
(Mike McCandless, Robert Muir)
|
||||
|
||||
New Features
|
||||
|
||||
* LUCENE-3593: Added a FieldValueFilter that accepts all documents that either
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.search.highlight;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
@ -47,15 +46,7 @@ import org.apache.lucene.util.LuceneTestCase;
|
|||
public class TokenSourcesTest extends LuceneTestCase {
|
||||
private static final String FIELD = "text";
|
||||
|
||||
private static final class OverlapAnalyzer extends Analyzer {
|
||||
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(new TokenStreamOverlap());
|
||||
}
|
||||
}
|
||||
|
||||
private static final class TokenStreamOverlap extends Tokenizer {
|
||||
private static final class OverlappingTokenStream extends TokenStream {
|
||||
private Token[] tokens;
|
||||
|
||||
private int i = -1;
|
||||
|
@ -64,10 +55,6 @@ public class TokenSourcesTest extends LuceneTestCase {
|
|||
private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
|
||||
private final PositionIncrementAttribute positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class);
|
||||
|
||||
public TokenStreamOverlap() {
|
||||
reset();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
this.i++;
|
||||
|
@ -102,13 +89,13 @@ public class TokenSourcesTest extends LuceneTestCase {
|
|||
final String TEXT = "the fox did not jump";
|
||||
final Directory directory = newDirectory();
|
||||
final IndexWriter indexWriter = new IndexWriter(directory,
|
||||
newIndexWriterConfig(TEST_VERSION_CURRENT, new OverlapAnalyzer()));
|
||||
newIndexWriterConfig(TEST_VERSION_CURRENT, null));
|
||||
try {
|
||||
final Document document = new Document();
|
||||
FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
|
||||
customType.setStoreTermVectors(true);
|
||||
customType.setStoreTermVectorOffsets(true);
|
||||
document.add(new Field(FIELD, new TokenStreamOverlap(), customType));
|
||||
document.add(new Field(FIELD, new OverlappingTokenStream(), customType));
|
||||
indexWriter.addDocument(document);
|
||||
} finally {
|
||||
indexWriter.close();
|
||||
|
@ -146,14 +133,14 @@ public class TokenSourcesTest extends LuceneTestCase {
|
|||
final String TEXT = "the fox did not jump";
|
||||
final Directory directory = newDirectory();
|
||||
final IndexWriter indexWriter = new IndexWriter(directory,
|
||||
newIndexWriterConfig(TEST_VERSION_CURRENT, new OverlapAnalyzer()));
|
||||
newIndexWriterConfig(TEST_VERSION_CURRENT, null));
|
||||
try {
|
||||
final Document document = new Document();
|
||||
FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
|
||||
customType.setStoreTermVectors(true);
|
||||
customType.setStoreTermVectorOffsets(true);
|
||||
customType.setStoreTermVectorPositions(true);
|
||||
document.add(new Field(FIELD, new TokenStreamOverlap(), customType));
|
||||
document.add(new Field(FIELD, new OverlappingTokenStream(), customType));
|
||||
indexWriter.addDocument(document);
|
||||
} finally {
|
||||
indexWriter.close();
|
||||
|
@ -191,13 +178,13 @@ public class TokenSourcesTest extends LuceneTestCase {
|
|||
final String TEXT = "the fox did not jump";
|
||||
final Directory directory = newDirectory();
|
||||
final IndexWriter indexWriter = new IndexWriter(directory,
|
||||
newIndexWriterConfig(TEST_VERSION_CURRENT, new OverlapAnalyzer()));
|
||||
newIndexWriterConfig(TEST_VERSION_CURRENT, null));
|
||||
try {
|
||||
final Document document = new Document();
|
||||
FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
|
||||
customType.setStoreTermVectors(true);
|
||||
customType.setStoreTermVectorOffsets(true);
|
||||
document.add(new Field(FIELD, new TokenStreamOverlap(), customType));
|
||||
document.add(new Field(FIELD, new OverlappingTokenStream(), customType));
|
||||
indexWriter.addDocument(document);
|
||||
} finally {
|
||||
indexWriter.close();
|
||||
|
@ -236,13 +223,13 @@ public class TokenSourcesTest extends LuceneTestCase {
|
|||
final String TEXT = "the fox did not jump";
|
||||
final Directory directory = newDirectory();
|
||||
final IndexWriter indexWriter = new IndexWriter(directory,
|
||||
newIndexWriterConfig(TEST_VERSION_CURRENT, new OverlapAnalyzer()));
|
||||
newIndexWriterConfig(TEST_VERSION_CURRENT, null));
|
||||
try {
|
||||
final Document document = new Document();
|
||||
FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
|
||||
customType.setStoreTermVectors(true);
|
||||
customType.setStoreTermVectorOffsets(true);
|
||||
document.add(new Field(FIELD, new TokenStreamOverlap(), customType));
|
||||
document.add(new Field(FIELD, new OverlappingTokenStream(), customType));
|
||||
indexWriter.addDocument(document);
|
||||
} finally {
|
||||
indexWriter.close();
|
||||
|
|
|
@ -298,7 +298,7 @@ public class IndexTimeSynonymTest extends AbstractTestCase {
|
|||
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
Tokenizer ts = new Tokenizer(Token.TOKEN_ATTRIBUTE_FACTORY) {
|
||||
Tokenizer ts = new Tokenizer(Token.TOKEN_ATTRIBUTE_FACTORY, reader) {
|
||||
final AttributeImpl reusableToken = (AttributeImpl) addAttribute(CharTermAttribute.class);
|
||||
int p = 0;
|
||||
|
||||
|
|
|
@ -34,34 +34,24 @@ public abstract class Tokenizer extends TokenStream {
|
|||
/** The text source for this Tokenizer. */
|
||||
protected Reader input;
|
||||
|
||||
/** Construct a tokenizer with null input. */
|
||||
protected Tokenizer() {}
|
||||
|
||||
/** Construct a token stream processing the given input. */
|
||||
protected Tokenizer(Reader input) {
|
||||
this.input = CharReader.get(input);
|
||||
assert input != null: "input must not be null";
|
||||
this.input = input;
|
||||
}
|
||||
|
||||
/** Construct a tokenizer with null input using the given AttributeFactory. */
|
||||
protected Tokenizer(AttributeFactory factory) {
|
||||
super(factory);
|
||||
}
|
||||
|
||||
/** Construct a token stream processing the given input using the given AttributeFactory. */
|
||||
protected Tokenizer(AttributeFactory factory, Reader input) {
|
||||
super(factory);
|
||||
this.input = CharReader.get(input);
|
||||
}
|
||||
|
||||
/** Construct a token stream processing the given input using the given AttributeSource. */
|
||||
protected Tokenizer(AttributeSource source) {
|
||||
super(source);
|
||||
assert input != null: "input must not be null";
|
||||
this.input = input;
|
||||
}
|
||||
|
||||
/** Construct a token stream processing the given input using the given AttributeSource. */
|
||||
protected Tokenizer(AttributeSource source, Reader input) {
|
||||
super(source);
|
||||
this.input = CharReader.get(input);
|
||||
assert input != null: "input must not be null";
|
||||
this.input = input;
|
||||
}
|
||||
|
||||
/** By default, closes the input Reader. */
|
||||
|
@ -82,6 +72,7 @@ public abstract class Tokenizer extends TokenStream {
|
|||
* @see CharStream#correctOffset
|
||||
*/
|
||||
protected final int correctOffset(int currentOff) {
|
||||
assert input != null: "this tokenizer is closed";
|
||||
return (input instanceof CharStream) ? ((CharStream) input).correctOffset(currentOff) : currentOff;
|
||||
}
|
||||
|
||||
|
@ -89,6 +80,7 @@ public abstract class Tokenizer extends TokenStream {
|
|||
* analyzer (in its tokenStream method) will use
|
||||
* this to re-use a previously created tokenizer. */
|
||||
public void reset(Reader input) throws IOException {
|
||||
assert input != null: "input must not be null";
|
||||
this.input = input;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -119,7 +119,7 @@ public class AttributeSource {
|
|||
private final Map<Class<? extends AttributeImpl>, AttributeImpl> attributeImpls;
|
||||
private final State[] currentState;
|
||||
|
||||
private AttributeFactory factory;
|
||||
private final AttributeFactory factory;
|
||||
|
||||
/**
|
||||
* An AttributeSource using the default attribute factory {@link AttributeSource.AttributeFactory#DEFAULT_ATTRIBUTE_FACTORY}.
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.document;
|
|||
|
||||
import java.io.StringReader;
|
||||
|
||||
import org.apache.lucene.analysis.EmptyTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.Fields;
|
||||
|
@ -285,11 +286,7 @@ public class TestDocument extends LuceneTestCase {
|
|||
// LUCENE-3616
|
||||
public void testInvalidFields() {
|
||||
try {
|
||||
new Field("foo", new Tokenizer() {
|
||||
@Override
|
||||
public boolean incrementToken() {
|
||||
return false;
|
||||
}}, StringField.TYPE_STORED);
|
||||
new Field("foo", new EmptyTokenizer(new StringReader("")), StringField.TYPE_STORED);
|
||||
fail("did not hit expected exc");
|
||||
} catch (IllegalArgumentException iae) {
|
||||
// expected
|
||||
|
|
|
@ -1569,6 +1569,7 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
|
||||
public StringSplitTokenizer(Reader r) {
|
||||
super(r);
|
||||
try {
|
||||
reset(r);
|
||||
} catch (IOException e) {
|
||||
|
|
|
@ -23,7 +23,7 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CannedAnalyzer;
|
||||
import org.apache.lucene.analysis.CannedTokenStream;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockPayloadAnalyzer;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
|
@ -72,9 +72,9 @@ public class TestPostingsOffsets extends LuceneTestCase {
|
|||
makeToken("a", 1, 9, 17),
|
||||
makeToken("c", 1, 19, 50),
|
||||
};
|
||||
doc.add(new Field("content", new CannedAnalyzer.CannedTokenizer(tokens), ft));
|
||||
doc.add(new Field("content", new CannedTokenStream(tokens), ft));
|
||||
|
||||
w.addDocument(doc, new CannedAnalyzer(tokens));
|
||||
w.addDocument(doc);
|
||||
IndexReader r = w.getReader();
|
||||
w.close();
|
||||
|
||||
|
@ -281,7 +281,7 @@ public class TestPostingsOffsets extends LuceneTestCase {
|
|||
offset += offIncr + tokenOffset;
|
||||
//System.out.println(" " + token + " posIncr=" + token.getPositionIncrement() + " pos=" + pos + " off=" + token.startOffset() + "/" + token.endOffset() + " (freq=" + postingsByDoc.get(docCount).size() + ")");
|
||||
}
|
||||
doc.add(new Field("content", new CannedAnalyzer.CannedTokenizer(tokens.toArray(new Token[tokens.size()])), ft));
|
||||
doc.add(new Field("content", new CannedTokenStream(tokens.toArray(new Token[tokens.size()])), ft));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
final DirectoryReader r = w.getReader();
|
||||
|
|
|
@ -18,15 +18,12 @@ package org.apache.lucene.index;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
@ -39,9 +36,9 @@ public class TestSameTokenSamePosition extends LuceneTestCase {
|
|||
*/
|
||||
public void test() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter riw = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new BugReproAnalyzer()));
|
||||
RandomIndexWriter riw = new RandomIndexWriter(random, dir);
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("eng", "Six drunken", TextField.TYPE_STORED /*This shouldn't matter. */));
|
||||
doc.add(new TextField("eng", new BugReproTokenStream()));
|
||||
riw.addDocument(doc);
|
||||
riw.close();
|
||||
dir.close();
|
||||
|
@ -52,10 +49,10 @@ public class TestSameTokenSamePosition extends LuceneTestCase {
|
|||
*/
|
||||
public void testMoreDocs() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter riw = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new BugReproAnalyzer()));
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("eng", "Six drunken", TextField.TYPE_STORED /*This shouldn't matter. */));
|
||||
RandomIndexWriter riw = new RandomIndexWriter(random, dir);
|
||||
for (int i = 0; i < 100; i++) {
|
||||
Document doc = new Document();
|
||||
doc.add(new TextField("eng", new BugReproTokenStream()));
|
||||
riw.addDocument(doc);
|
||||
}
|
||||
riw.close();
|
||||
|
@ -63,14 +60,7 @@ public class TestSameTokenSamePosition extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
final class BugReproAnalyzer extends Analyzer {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String arg0, Reader arg1) {
|
||||
return new TokenStreamComponents(new BugReproAnalyzerTokenizer());
|
||||
}
|
||||
}
|
||||
|
||||
final class BugReproAnalyzerTokenizer extends Tokenizer {
|
||||
final class BugReproTokenStream extends TokenStream {
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
|
|
|
@ -137,14 +137,15 @@ public class TestTermVectorsReader extends LuceneTestCase {
|
|||
super.tearDown();
|
||||
}
|
||||
|
||||
private class MyTokenStream extends Tokenizer {
|
||||
private class MyTokenizer extends Tokenizer {
|
||||
private int tokenUpto;
|
||||
|
||||
private final CharTermAttribute termAtt;
|
||||
private final PositionIncrementAttribute posIncrAtt;
|
||||
private final OffsetAttribute offsetAtt;
|
||||
|
||||
public MyTokenStream() {
|
||||
public MyTokenizer(Reader reader) {
|
||||
super(reader);
|
||||
termAtt = addAttribute(CharTermAttribute.class);
|
||||
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
|
@ -178,7 +179,7 @@ public class TestTermVectorsReader extends LuceneTestCase {
|
|||
private class MyAnalyzer extends Analyzer {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(new MyTokenStream());
|
||||
return new TokenStreamComponents(new MyTokenizer(reader));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -32,7 +32,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
class RepeatingTokenStream extends Tokenizer {
|
||||
class RepeatingTokenizer extends Tokenizer {
|
||||
|
||||
private final Random random;
|
||||
private final float percentDocs;
|
||||
|
@ -41,7 +41,8 @@ class RepeatingTokenStream extends Tokenizer {
|
|||
CharTermAttribute termAtt;
|
||||
String value;
|
||||
|
||||
public RepeatingTokenStream(String val, Random random, float percentDocs, int maxTF) {
|
||||
public RepeatingTokenizer(Reader reader, String val, Random random, float percentDocs, int maxTF) {
|
||||
super(reader);
|
||||
this.value = val;
|
||||
this.random = random;
|
||||
this.percentDocs = percentDocs;
|
||||
|
@ -75,12 +76,11 @@ class RepeatingTokenStream extends Tokenizer {
|
|||
public class TestTermdocPerf extends LuceneTestCase {
|
||||
|
||||
void addDocs(final Random random, Directory dir, final int ndocs, String field, final String val, final int maxTF, final float percentDocs) throws IOException {
|
||||
final RepeatingTokenStream ts = new RepeatingTokenStream(val, random, percentDocs, maxTF);
|
||||
|
||||
Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(ts);
|
||||
return new TokenStreamComponents(new RepeatingTokenizer(reader, val, random, percentDocs, maxTF));
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -18,19 +18,11 @@ package org.apache.lucene.search;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.Collection;
|
||||
import java.util.LinkedList;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CannedAnalyzer;
|
||||
import org.apache.lucene.analysis.CannedTokenStream;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
@ -46,7 +38,6 @@ import org.apache.lucene.store.Directory;
|
|||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TermContext;
|
||||
|
||||
/**
|
||||
* This class tests the MultiPhraseQuery class.
|
||||
|
@ -336,10 +327,12 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
|
|||
tokens[2].append("c");
|
||||
tokens[2].setPositionIncrement(0);
|
||||
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, dir, new CannedAnalyzer(tokens));
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, dir);
|
||||
Document doc = new Document();
|
||||
doc.add(new TextField("field", ""));
|
||||
doc.add(new TextField("field", new CannedTokenStream(tokens)));
|
||||
writer.addDocument(doc);
|
||||
doc = new Document();
|
||||
doc.add(new TextField("field", new CannedTokenStream(tokens)));
|
||||
writer.addDocument(doc);
|
||||
IndexReader r = writer.getReader();
|
||||
writer.close();
|
||||
|
@ -434,10 +427,10 @@ public class TestMultiPhraseQuery extends LuceneTestCase {
|
|||
|
||||
private void doTestZeroPosIncrSloppy(Query q, int nExpected) throws IOException {
|
||||
Directory dir = newDirectory(); // random dir
|
||||
IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, new CannedAnalyzer(INCR_0_DOC_TOKENS));
|
||||
IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, null);
|
||||
IndexWriter writer = new IndexWriter(dir, cfg);
|
||||
Document doc = new Document();
|
||||
doc.add(new TextField("field", ""));
|
||||
doc.add(new TextField("field", new CannedTokenStream(INCR_0_DOC_TOKENS)));
|
||||
writer.addDocument(doc);
|
||||
IndexReader r = IndexReader.open(writer,false);
|
||||
writer.close();
|
||||
|
|
|
@ -58,7 +58,7 @@ public class TestPositionIncrement extends LuceneTestCase {
|
|||
Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(new Tokenizer() {
|
||||
return new TokenStreamComponents(new Tokenizer(reader) {
|
||||
private final String[] TOKENS = {"1", "2", "3", "4", "5"};
|
||||
private final int[] INCREMENTS = {0, 2, 1, 0, 1};
|
||||
private int i = 0;
|
||||
|
|
|
@ -1,73 +0,0 @@
|
|||
package org.apache.lucene.analysis;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.Reader;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
|
||||
public class CannedAnalyzer extends Analyzer {
|
||||
private final Token[] tokens;
|
||||
|
||||
public CannedAnalyzer(Token[] tokens) {
|
||||
this.tokens = tokens;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(new CannedTokenizer(tokens));
|
||||
}
|
||||
|
||||
public static class CannedTokenizer extends Tokenizer {
|
||||
private final Token[] tokens;
|
||||
private int upto = 0;
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
|
||||
public CannedTokenizer(Token[] tokens) {
|
||||
this.tokens = tokens;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final boolean incrementToken() throws IOException {
|
||||
if (upto < tokens.length) {
|
||||
final Token token = tokens[upto++];
|
||||
// TODO: can we just capture/restoreState so
|
||||
// we get all attrs...?
|
||||
clearAttributes();
|
||||
termAtt.setEmpty();
|
||||
termAtt.append(token.toString());
|
||||
posIncrAtt.setPositionIncrement(token.getPositionIncrement());
|
||||
offsetAtt.setOffset(token.startOffset(), token.endOffset());
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
super.reset();
|
||||
this.upto = 0;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,56 @@
|
|||
package org.apache.lucene.analysis;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
|
||||
/**
|
||||
* emits a canned set of {@link Token}
|
||||
*/
|
||||
public final class CannedTokenStream extends TokenStream {
|
||||
private final Token[] tokens;
|
||||
private int upto = 0;
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
|
||||
public CannedTokenStream(Token[] tokens) {
|
||||
this.tokens = tokens;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
if (upto < tokens.length) {
|
||||
final Token token = tokens[upto++];
|
||||
// TODO: can we just capture/restoreState so
|
||||
// we get all attrs...?
|
||||
clearAttributes();
|
||||
termAtt.setEmpty();
|
||||
termAtt.append(token.toString());
|
||||
posIncrAtt.setPositionIncrement(token.getPositionIncrement());
|
||||
offsetAtt.setOffset(token.startOffset(), token.endOffset());
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
package org.apache.lucene.analysis;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
/**
|
||||
* Emits no tokens
|
||||
*/
|
||||
public final class EmptyTokenizer extends Tokenizer {
|
||||
|
||||
public EmptyTokenizer(Reader input) {
|
||||
super(input);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
return false;
|
||||
}
|
||||
}
|
|
@ -99,29 +99,28 @@ public final class ClassicTokenizer extends Tokenizer {
|
|||
* See http://issues.apache.org/jira/browse/LUCENE-1068
|
||||
*/
|
||||
public ClassicTokenizer(Version matchVersion, Reader input) {
|
||||
super();
|
||||
init(input, matchVersion);
|
||||
super(input);
|
||||
init(matchVersion);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new ClassicTokenizer with a given {@link AttributeSource}.
|
||||
*/
|
||||
public ClassicTokenizer(Version matchVersion, AttributeSource source, Reader input) {
|
||||
super(source);
|
||||
init(input, matchVersion);
|
||||
super(source, input);
|
||||
init(matchVersion);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new ClassicTokenizer with a given {@link org.apache.lucene.util.AttributeSource.AttributeFactory}
|
||||
*/
|
||||
public ClassicTokenizer(Version matchVersion, AttributeFactory factory, Reader input) {
|
||||
super(factory);
|
||||
init(input, matchVersion);
|
||||
super(factory, input);
|
||||
init(matchVersion);
|
||||
}
|
||||
|
||||
private void init(Reader input, Version matchVersion) {
|
||||
private void init(Version matchVersion) {
|
||||
this.scanner = new ClassicTokenizerImpl(input);
|
||||
this.input = input;
|
||||
}
|
||||
|
||||
// this tokenizer generates three attributes:
|
||||
|
|
|
@ -125,27 +125,27 @@ public final class StandardTokenizer extends Tokenizer {
|
|||
* See http://issues.apache.org/jira/browse/LUCENE-1068
|
||||
*/
|
||||
public StandardTokenizer(Version matchVersion, Reader input) {
|
||||
super();
|
||||
init(input, matchVersion);
|
||||
super(input);
|
||||
init(matchVersion);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new StandardTokenizer with a given {@link AttributeSource}.
|
||||
*/
|
||||
public StandardTokenizer(Version matchVersion, AttributeSource source, Reader input) {
|
||||
super(source);
|
||||
init(input, matchVersion);
|
||||
super(source, input);
|
||||
init(matchVersion);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new StandardTokenizer with a given {@link org.apache.lucene.util.AttributeSource.AttributeFactory}
|
||||
*/
|
||||
public StandardTokenizer(Version matchVersion, AttributeFactory factory, Reader input) {
|
||||
super(factory);
|
||||
init(input, matchVersion);
|
||||
super(factory, input);
|
||||
init(matchVersion);
|
||||
}
|
||||
|
||||
private final void init(Reader input, Version matchVersion) {
|
||||
private final void init(Version matchVersion) {
|
||||
if (matchVersion.onOrAfter(Version.LUCENE_34)) {
|
||||
this.scanner = new StandardTokenizerImpl(input);
|
||||
} else if (matchVersion.onOrAfter(Version.LUCENE_31)) {
|
||||
|
@ -153,7 +153,6 @@ public final class StandardTokenizer extends Tokenizer {
|
|||
} else {
|
||||
this.scanner = new ClassicTokenizerImpl(input);
|
||||
}
|
||||
this.input = input;
|
||||
}
|
||||
|
||||
// this tokenizer generates three attributes:
|
||||
|
|
|
@ -17,7 +17,6 @@ package org.apache.lucene.queryparser.classic;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
@ -27,11 +26,11 @@ import org.apache.lucene.document.Document;
|
|||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
|
@ -312,18 +311,11 @@ public class TestMultiFieldQueryParser extends LuceneTestCase {
|
|||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
if ("f1".equals(fieldName)) {
|
||||
return new TokenStreamComponents(new EmptyTokenStream());
|
||||
return new TokenStreamComponents(new EmptyTokenizer(reader));
|
||||
} else {
|
||||
return stdAnalyzer.createComponents(fieldName, reader);
|
||||
}
|
||||
}
|
||||
|
||||
private static class EmptyTokenStream extends Tokenizer {
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -50,7 +50,7 @@ public class TestMultiPhraseQueryParsing extends LuceneTestCase {
|
|||
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
return new TokenStreamComponents(new CannedTokenizer(tokens));
|
||||
return new TokenStreamComponents(new CannedTokenizer(reader, tokens));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -61,7 +61,8 @@ public class TestMultiPhraseQueryParsing extends LuceneTestCase {
|
|||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
|
||||
public CannedTokenizer(TokenAndPos[] tokens) {
|
||||
public CannedTokenizer(Reader reader, TokenAndPos[] tokens) {
|
||||
super(reader);
|
||||
this.tokens = tokens;
|
||||
}
|
||||
|
||||
|
|
|
@ -17,7 +17,9 @@ package org.apache.lucene.queryparser.flexible.standard;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
|
@ -28,11 +30,11 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
|
||||
import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
|
@ -350,18 +352,11 @@ public class TestMultiFieldQPHelper extends LuceneTestCase {
|
|||
@Override
|
||||
public TokenStreamComponents createComponents(String fieldName, Reader reader) {
|
||||
if ("f1".equals(fieldName)) {
|
||||
return new TokenStreamComponents(new EmptyTokenStream());
|
||||
return new TokenStreamComponents(new EmptyTokenizer(reader));
|
||||
} else {
|
||||
return stdAnalyzer.createComponents(fieldName, reader);
|
||||
}
|
||||
}
|
||||
|
||||
private static class EmptyTokenStream extends Tokenizer {
|
||||
@Override
|
||||
public boolean incrementToken() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.queryparser.flexible.standard;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
import java.text.DateFormat;
|
||||
import java.util.Calendar;
|
||||
import java.util.Date;
|
||||
|
@ -35,18 +36,19 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
|||
import org.apache.lucene.document.DateTools;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queryparser.flexible.messages.MessageImpl;
|
||||
import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
|
||||
import org.apache.lucene.queryparser.flexible.core.messages.QueryParserMessages;
|
||||
import org.apache.lucene.queryparser.flexible.core.nodes.FuzzyQueryNode;
|
||||
import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
|
||||
import org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessorImpl;
|
||||
import org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessorPipeline;
|
||||
import org.apache.lucene.queryparser.flexible.messages.MessageImpl;
|
||||
import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler;
|
||||
import org.apache.lucene.queryparser.flexible.standard.nodes.WildcardQueryNode;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.FuzzyQuery;
|
||||
|
@ -62,7 +64,6 @@ import org.apache.lucene.search.ScoreDoc;
|
|||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TermRangeQuery;
|
||||
import org.apache.lucene.search.WildcardQuery;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.automaton.BasicAutomata;
|
||||
|
@ -1215,11 +1216,15 @@ public class TestQPHelper extends LuceneTestCase {
|
|||
super.tearDown();
|
||||
}
|
||||
|
||||
private class CannedTokenStream extends Tokenizer {
|
||||
private class CannedTokenizer extends Tokenizer {
|
||||
private int upto = 0;
|
||||
private final PositionIncrementAttribute posIncr = addAttribute(PositionIncrementAttribute.class);
|
||||
private final CharTermAttribute term = addAttribute(CharTermAttribute.class);
|
||||
|
||||
|
||||
public CannedTokenizer(Reader reader) {
|
||||
super(reader);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() {
|
||||
clearAttributes();
|
||||
|
@ -1253,7 +1258,7 @@ public class TestQPHelper extends LuceneTestCase {
|
|||
private class CannedAnalyzer extends Analyzer {
|
||||
@Override
|
||||
public TokenStreamComponents createComponents(String ignored, Reader alsoIgnored) {
|
||||
return new TokenStreamComponents(new CannedTokenStream());
|
||||
return new TokenStreamComponents(new CannedTokenizer(alsoIgnored));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -67,7 +67,7 @@ final class TrieTokenizer extends Tokenizer {
|
|||
|
||||
public TrieTokenizer(Reader input, TrieTypes type, int precisionStep, NumericTokenStream ts) {
|
||||
// must share the attribute source with the NumericTokenStream we delegate to
|
||||
super(ts);
|
||||
super(ts, input);
|
||||
this.type = type;
|
||||
this.precisionStep = precisionStep;
|
||||
this.ts = ts;
|
||||
|
|
Loading…
Reference in New Issue