lucene 4: implement createComponents in Analyzers

This commit is contained in:
Igor Motov 2012-10-26 00:07:20 -04:00 committed by Shay Banon
parent 6fad75df82
commit 1cc5ee7ad9
4 changed files with 24 additions and 78 deletions

View File

@ -19,10 +19,10 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.*;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import java.io.IOException;
import java.io.Reader;
/**
@ -71,7 +71,7 @@ public final class CustomAnalyzer extends Analyzer {
}
@Override
public int getOffsetGap(Fieldable field) {
public int getOffsetGap(String field) {
if (offsetGap < 0) {
return super.getOffsetGap(field);
}
@ -79,29 +79,13 @@ public final class CustomAnalyzer extends Analyzer {
}
@Override
public final TokenStream tokenStream(String fieldName, Reader reader) {
return buildHolder(reader).tokenStream;
}
@Override
public final TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
Holder holder = (Holder) getPreviousTokenStream();
if (holder == null) {
holder = buildHolder(charFilterIfNeeded(reader));
setPreviousTokenStream(holder);
} else {
holder.tokenizer.reset(charFilterIfNeeded(reader));
}
return holder.tokenStream;
}
private Holder buildHolder(Reader input) {
Tokenizer tokenizer = tokenizerFactory.create(input);
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
Tokenizer tokenizer = tokenizerFactory.create(charFilterIfNeeded(reader));
TokenStream tokenStream = tokenizer;
for (TokenFilterFactory tokenFilter : tokenFilters) {
tokenStream = tokenFilter.create(tokenStream);
}
return new Holder(tokenizer, tokenStream);
return new TokenStreamComponents(tokenizer, tokenStream);
}
private Reader charFilterIfNeeded(Reader reader) {
@ -113,13 +97,4 @@ public final class CustomAnalyzer extends Analyzer {
return reader;
}
static class Holder {
final Tokenizer tokenizer;
final TokenStream tokenStream;
private Holder(Tokenizer tokenizer, TokenStream tokenStream) {
this.tokenizer = tokenizer;
this.tokenStream = tokenStream;
}
}
}

View File

@ -20,17 +20,15 @@
package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.analysis.AnalyzerWrapper;
import java.io.IOException;
import java.io.Reader;
/**
* Named analyzer is an analyzer wrapper around an actual analyzer ({@link #analyzer} that is associated
* with a name ({@link #name()}.
*/
public class NamedAnalyzer extends Analyzer {
public class NamedAnalyzer extends AnalyzerWrapper {
private final String name;
@ -70,28 +68,13 @@ public class NamedAnalyzer extends Analyzer {
}
@Override
public final TokenStream tokenStream(String fieldName, Reader reader) {
return analyzer.tokenStream(fieldName, reader);
protected Analyzer getWrappedAnalyzer(String fieldName) {
return this.analyzer;
}
@Override
public final TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
return analyzer.reusableTokenStream(fieldName, reader);
}
@Override
public int getPositionIncrementGap(String fieldName) {
return analyzer.getPositionIncrementGap(fieldName);
}
@Override
public int getOffsetGap(Fieldable field) {
return analyzer.getOffsetGap(field);
}
@Override
public void close() {
analyzer.close();
protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
return components;
}
@Override

View File

@ -31,27 +31,16 @@ import java.io.Reader;
public abstract class NumericAnalyzer<T extends NumericTokenizer> extends Analyzer {
@Override
public final TokenStream tokenStream(String fieldName, Reader reader) {
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
try {
return createNumericTokenizer(reader, new char[32]);
// LUCENE 4 UPGRADE: in reusableTokenStream the buffer size was char[120]
// Not sure if this is intentional or not
return new TokenStreamComponents(createNumericTokenizer(reader, new char[32]));
} catch (IOException e) {
throw new RuntimeException("Failed to create numeric tokenizer", e);
}
}
@Override
public final TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
Holder holder = (Holder) getPreviousTokenStream();
if (holder == null) {
char[] buffer = new char[120];
holder = new Holder(createNumericTokenizer(reader, buffer), buffer);
setPreviousTokenStream(holder);
} else {
holder.tokenizer.reset(reader, holder.buffer);
}
return holder.tokenizer;
}
protected abstract T createNumericTokenizer(Reader reader, char[] buffer) throws IOException;
private static final class Holder {

View File

@ -35,27 +35,26 @@ public abstract class NumericTokenizer extends Tokenizer {
protected final Object extra;
protected NumericTokenizer(Reader reader, NumericTokenStream numericTokenStream, Object extra) throws IOException {
super(numericTokenStream);
super(numericTokenStream, reader);
this.numericTokenStream = numericTokenStream;
this.extra = extra;
reset(reader);
reset();
}
protected NumericTokenizer(Reader reader, NumericTokenStream numericTokenStream, char[] buffer, Object extra) throws IOException {
super(numericTokenStream);
super(numericTokenStream, reader);
this.numericTokenStream = numericTokenStream;
this.extra = extra;
reset(reader, buffer);
reset(buffer);
}
@Override
public void reset(Reader input) throws IOException {
public void reset() throws IOException {
char[] buffer = new char[32];
reset(input, buffer);
reset(buffer);
}
public void reset(Reader input, char[] buffer) throws IOException {
super.reset(input);
public void reset(char[] buffer) throws IOException {
int len = input.read(buffer);
String value = new String(buffer, 0, len);
setValue(numericTokenStream, value);