lucene 4: implement createComponents in Analyzers

This commit is contained in:
Igor Motov 2012-10-26 00:07:20 -04:00 committed by Shay Banon
parent 6fad75df82
commit 1cc5ee7ad9
4 changed files with 24 additions and 78 deletions

View File

@ -19,10 +19,10 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Fieldable; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import java.io.IOException;
import java.io.Reader; import java.io.Reader;
/** /**
@ -71,7 +71,7 @@ public final class CustomAnalyzer extends Analyzer {
} }
@Override @Override
public int getOffsetGap(Fieldable field) { public int getOffsetGap(String field) {
if (offsetGap < 0) { if (offsetGap < 0) {
return super.getOffsetGap(field); return super.getOffsetGap(field);
} }
@ -79,29 +79,13 @@ public final class CustomAnalyzer extends Analyzer {
} }
@Override @Override
public final TokenStream tokenStream(String fieldName, Reader reader) { protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
return buildHolder(reader).tokenStream; Tokenizer tokenizer = tokenizerFactory.create(charFilterIfNeeded(reader));
}
@Override
public final TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
Holder holder = (Holder) getPreviousTokenStream();
if (holder == null) {
holder = buildHolder(charFilterIfNeeded(reader));
setPreviousTokenStream(holder);
} else {
holder.tokenizer.reset(charFilterIfNeeded(reader));
}
return holder.tokenStream;
}
private Holder buildHolder(Reader input) {
Tokenizer tokenizer = tokenizerFactory.create(input);
TokenStream tokenStream = tokenizer; TokenStream tokenStream = tokenizer;
for (TokenFilterFactory tokenFilter : tokenFilters) { for (TokenFilterFactory tokenFilter : tokenFilters) {
tokenStream = tokenFilter.create(tokenStream); tokenStream = tokenFilter.create(tokenStream);
} }
return new Holder(tokenizer, tokenStream); return new TokenStreamComponents(tokenizer, tokenStream);
} }
private Reader charFilterIfNeeded(Reader reader) { private Reader charFilterIfNeeded(Reader reader) {
@ -113,13 +97,4 @@ public final class CustomAnalyzer extends Analyzer {
return reader; return reader;
} }
static class Holder {
final Tokenizer tokenizer;
final TokenStream tokenStream;
private Holder(Tokenizer tokenizer, TokenStream tokenStream) {
this.tokenizer = tokenizer;
this.tokenStream = tokenStream;
}
}
} }

View File

@ -20,17 +20,15 @@
package org.elasticsearch.index.analysis; package org.elasticsearch.index.analysis;
import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.AnalyzerWrapper;
import org.apache.lucene.document.Fieldable;
import java.io.IOException;
import java.io.Reader; import java.io.Reader;
/** /**
* Named analyzer is an analyzer wrapper around an actual analyzer ({@link #analyzer} that is associated * Named analyzer is an analyzer wrapper around an actual analyzer ({@link #analyzer} that is associated
* with a name ({@link #name()}. * with a name ({@link #name()}.
*/ */
public class NamedAnalyzer extends Analyzer { public class NamedAnalyzer extends AnalyzerWrapper {
private final String name; private final String name;
@ -70,28 +68,13 @@ public class NamedAnalyzer extends Analyzer {
} }
@Override @Override
public final TokenStream tokenStream(String fieldName, Reader reader) { protected Analyzer getWrappedAnalyzer(String fieldName) {
return analyzer.tokenStream(fieldName, reader); return this.analyzer;
} }
@Override @Override
public final TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
return analyzer.reusableTokenStream(fieldName, reader); return components;
}
@Override
public int getPositionIncrementGap(String fieldName) {
return analyzer.getPositionIncrementGap(fieldName);
}
@Override
public int getOffsetGap(Fieldable field) {
return analyzer.getOffsetGap(field);
}
@Override
public void close() {
analyzer.close();
} }
@Override @Override

View File

@ -31,27 +31,16 @@ import java.io.Reader;
public abstract class NumericAnalyzer<T extends NumericTokenizer> extends Analyzer { public abstract class NumericAnalyzer<T extends NumericTokenizer> extends Analyzer {
@Override @Override
public final TokenStream tokenStream(String fieldName, Reader reader) { protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
try { try {
return createNumericTokenizer(reader, new char[32]); // LUCENE 4 UPGRADE: in reusableTokenStream the buffer size was char[120]
// Not sure if this is intentional or not
return new TokenStreamComponents(createNumericTokenizer(reader, new char[32]));
} catch (IOException e) { } catch (IOException e) {
throw new RuntimeException("Failed to create numeric tokenizer", e); throw new RuntimeException("Failed to create numeric tokenizer", e);
} }
} }
@Override
public final TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
Holder holder = (Holder) getPreviousTokenStream();
if (holder == null) {
char[] buffer = new char[120];
holder = new Holder(createNumericTokenizer(reader, buffer), buffer);
setPreviousTokenStream(holder);
} else {
holder.tokenizer.reset(reader, holder.buffer);
}
return holder.tokenizer;
}
protected abstract T createNumericTokenizer(Reader reader, char[] buffer) throws IOException; protected abstract T createNumericTokenizer(Reader reader, char[] buffer) throws IOException;
private static final class Holder { private static final class Holder {

View File

@ -35,27 +35,26 @@ public abstract class NumericTokenizer extends Tokenizer {
protected final Object extra; protected final Object extra;
protected NumericTokenizer(Reader reader, NumericTokenStream numericTokenStream, Object extra) throws IOException { protected NumericTokenizer(Reader reader, NumericTokenStream numericTokenStream, Object extra) throws IOException {
super(numericTokenStream); super(numericTokenStream, reader);
this.numericTokenStream = numericTokenStream; this.numericTokenStream = numericTokenStream;
this.extra = extra; this.extra = extra;
reset(reader); reset();
} }
protected NumericTokenizer(Reader reader, NumericTokenStream numericTokenStream, char[] buffer, Object extra) throws IOException { protected NumericTokenizer(Reader reader, NumericTokenStream numericTokenStream, char[] buffer, Object extra) throws IOException {
super(numericTokenStream); super(numericTokenStream, reader);
this.numericTokenStream = numericTokenStream; this.numericTokenStream = numericTokenStream;
this.extra = extra; this.extra = extra;
reset(reader, buffer); reset(buffer);
} }
@Override @Override
public void reset(Reader input) throws IOException { public void reset() throws IOException {
char[] buffer = new char[32]; char[] buffer = new char[32];
reset(input, buffer); reset(buffer);
} }
public void reset(Reader input, char[] buffer) throws IOException { public void reset(char[] buffer) throws IOException {
super.reset(input);
int len = input.read(buffer); int len = input.read(buffer);
String value = new String(buffer, 0, len); String value = new String(buffer, 0, len);
setValue(numericTokenStream, value); setValue(numericTokenStream, value);