From 1cc5ee7ad9be4412392b0f7e4de53c1c8b5fdd10 Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Fri, 26 Oct 2012 00:07:20 -0400 Subject: [PATCH] lucene 4: implement createComponents in Analyzers --- .../index/analysis/CustomAnalyzer.java | 39 ++++--------------- .../index/analysis/NamedAnalyzer.java | 29 +++----------- .../index/analysis/NumericAnalyzer.java | 19 ++------- .../index/analysis/NumericTokenizer.java | 15 ++++--- 4 files changed, 24 insertions(+), 78 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzer.java b/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzer.java index 15d6a706006..a43b2185974 100644 --- a/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzer.java +++ b/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzer.java @@ -19,10 +19,10 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.*; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; -import java.io.IOException; import java.io.Reader; /** @@ -71,7 +71,7 @@ public final class CustomAnalyzer extends Analyzer { } @Override - public int getOffsetGap(Fieldable field) { + public int getOffsetGap(String field) { if (offsetGap < 0) { return super.getOffsetGap(field); } @@ -79,29 +79,13 @@ public final class CustomAnalyzer extends Analyzer { } @Override - public final TokenStream tokenStream(String fieldName, Reader reader) { - return buildHolder(reader).tokenStream; - } - - @Override - public final TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { - Holder holder = (Holder) getPreviousTokenStream(); - if (holder == null) { - holder = buildHolder(charFilterIfNeeded(reader)); - setPreviousTokenStream(holder); - } else { - holder.tokenizer.reset(charFilterIfNeeded(reader)); - } - return holder.tokenStream; - } - - private Holder buildHolder(Reader input) { - Tokenizer tokenizer = tokenizerFactory.create(input); + protected TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer tokenizer = tokenizerFactory.create(charFilterIfNeeded(reader)); TokenStream tokenStream = tokenizer; for (TokenFilterFactory tokenFilter : tokenFilters) { tokenStream = tokenFilter.create(tokenStream); } - return new Holder(tokenizer, tokenStream); + return new TokenStreamComponents(tokenizer, tokenStream); } private Reader charFilterIfNeeded(Reader reader) { @@ -113,13 +97,4 @@ public final class CustomAnalyzer extends Analyzer { return reader; } - static class Holder { - final Tokenizer tokenizer; - final TokenStream tokenStream; - - private Holder(Tokenizer tokenizer, TokenStream tokenStream) { - this.tokenizer = tokenizer; - this.tokenStream = tokenStream; - } - } } diff --git a/src/main/java/org/elasticsearch/index/analysis/NamedAnalyzer.java b/src/main/java/org/elasticsearch/index/analysis/NamedAnalyzer.java index 823286c244b..70cff9e27a0 100644 --- a/src/main/java/org/elasticsearch/index/analysis/NamedAnalyzer.java +++ b/src/main/java/org/elasticsearch/index/analysis/NamedAnalyzer.java @@ -20,17 +20,15 @@ package org.elasticsearch.index.analysis; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.analysis.AnalyzerWrapper; -import java.io.IOException; import java.io.Reader; /** * Named analyzer is an analyzer wrapper around an actual analyzer ({@link #analyzer} that is associated * with a name ({@link #name()}. */ -public class NamedAnalyzer extends Analyzer { +public class NamedAnalyzer extends AnalyzerWrapper { private final String name; @@ -70,28 +68,13 @@ public class NamedAnalyzer extends Analyzer { } @Override - public final TokenStream tokenStream(String fieldName, Reader reader) { - return analyzer.tokenStream(fieldName, reader); + protected Analyzer getWrappedAnalyzer(String fieldName) { + return this.analyzer; } @Override - public final TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { - return analyzer.reusableTokenStream(fieldName, reader); - } - - @Override - public int getPositionIncrementGap(String fieldName) { - return analyzer.getPositionIncrementGap(fieldName); - } - - @Override - public int getOffsetGap(Fieldable field) { - return analyzer.getOffsetGap(field); - } - - @Override - public void close() { - analyzer.close(); + protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) { + return components; } @Override diff --git a/src/main/java/org/elasticsearch/index/analysis/NumericAnalyzer.java b/src/main/java/org/elasticsearch/index/analysis/NumericAnalyzer.java index 24db00cfe3f..1b20e95a504 100644 --- a/src/main/java/org/elasticsearch/index/analysis/NumericAnalyzer.java +++ b/src/main/java/org/elasticsearch/index/analysis/NumericAnalyzer.java @@ -31,27 +31,16 @@ import java.io.Reader; public abstract class NumericAnalyzer extends Analyzer { @Override - public final TokenStream tokenStream(String fieldName, Reader reader) { + protected TokenStreamComponents createComponents(String fieldName, Reader reader) { try { - return createNumericTokenizer(reader, new char[32]); + // LUCENE 4 UPGRADE: in reusableTokenStream the buffer size was char[120] + // Not sure if this is intentional or not + return new TokenStreamComponents(createNumericTokenizer(reader, new char[32])); } catch (IOException e) { throw new RuntimeException("Failed to create numeric tokenizer", e); } } - @Override - public final TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { - Holder holder = (Holder) getPreviousTokenStream(); - if (holder == null) { - char[] buffer = new char[120]; - holder = new Holder(createNumericTokenizer(reader, buffer), buffer); - setPreviousTokenStream(holder); - } else { - holder.tokenizer.reset(reader, holder.buffer); - } - return holder.tokenizer; - } - protected abstract T createNumericTokenizer(Reader reader, char[] buffer) throws IOException; private static final class Holder { diff --git a/src/main/java/org/elasticsearch/index/analysis/NumericTokenizer.java b/src/main/java/org/elasticsearch/index/analysis/NumericTokenizer.java index a3f11037eef..46a1379a47c 100644 --- a/src/main/java/org/elasticsearch/index/analysis/NumericTokenizer.java +++ b/src/main/java/org/elasticsearch/index/analysis/NumericTokenizer.java @@ -35,27 +35,26 @@ public abstract class NumericTokenizer extends Tokenizer { protected final Object extra; protected NumericTokenizer(Reader reader, NumericTokenStream numericTokenStream, Object extra) throws IOException { - super(numericTokenStream); + super(numericTokenStream, reader); this.numericTokenStream = numericTokenStream; this.extra = extra; - reset(reader); + reset(); } protected NumericTokenizer(Reader reader, NumericTokenStream numericTokenStream, char[] buffer, Object extra) throws IOException { - super(numericTokenStream); + super(numericTokenStream, reader); this.numericTokenStream = numericTokenStream; this.extra = extra; - reset(reader, buffer); + reset(buffer); } @Override - public void reset(Reader input) throws IOException { + public void reset() throws IOException { char[] buffer = new char[32]; - reset(input, buffer); + reset(buffer); } - public void reset(Reader input, char[] buffer) throws IOException { - super.reset(input); + public void reset(char[] buffer) throws IOException { int len = input.read(buffer); String value = new String(buffer, 0, len); setValue(numericTokenStream, value);