From 9a43d0ee41981f847431f5109d2141d4b91f2689 Mon Sep 17 00:00:00 2001 From: Robert Muir Date: Sun, 11 Apr 2010 10:31:34 +0000 Subject: [PATCH] SOLR-1876: convert all Solr tokenstreams to CharTermAttribute, make all non-final TokenStreams/Analyzers final git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@932862 13f79535-47bb-0310-9956-ffa450edef68 --- solr/CHANGES.txt | 3 +- solr/build.xml | 4 +++ solr/contrib/clustering/build.xml | 4 +++ solr/contrib/dataimporthandler/build.xml | 4 +++ solr/contrib/extraction/build.xml | 4 +++ solr/contrib/velocity/build.xml | 4 +++ .../solr/analysis/BufferedTokenStream.java | 8 +++--- .../analysis/CapitalizationFilterFactory.java | 13 ++++----- .../solr/analysis/CommonGramsFilter.java | 16 +++++------ .../solr/analysis/DoubleMetaphoneFilter.java | 18 ++++++------ .../solr/analysis/HyphenatedWordsFilter.java | 16 +++++------ .../apache/solr/analysis/KeepWordFilter.java | 7 ++--- .../solr/analysis/PatternReplaceFilter.java | 3 +- .../solr/analysis/PatternTokenizer.java | 10 +++---- .../apache/solr/analysis/PhoneticFilter.java | 20 ++++++------- .../analysis/RemoveDuplicatesTokenFilter.java | 8 +++--- .../solr/analysis/ReversedWildcardFilter.java | 14 +++++----- .../apache/solr/analysis/SynonymFilter.java | 18 ++++++------ .../solr/analysis/SynonymFilterFactory.java | 9 +++--- .../apache/solr/analysis/TokenizerChain.java | 2 +- .../solr/analysis/TrieTokenizerFactory.java | 2 +- .../org/apache/solr/analysis/TrimFilter.java | 17 +++++------ .../solr/analysis/WordDelimiterFilter.java | 26 ++++++++--------- .../solr/handler/AnalysisRequestHandler.java | 9 +++--- .../handler/AnalysisRequestHandlerBase.java | 16 +++++------ .../component/QueryElevationComponent.java | 6 ++-- .../component/SpellCheckComponent.java | 16 +++++------ .../highlight/DefaultSolrHighlighter.java | 4 +-- .../org/apache/solr/schema/BoolField.java | 8 ++---- .../org/apache/solr/schema/FieldType.java | 10 +++---- .../org/apache/solr/schema/TextField.java | 18 ++++++------ .../search/ExtendedDismaxQParserPlugin.java | 2 +- .../solr/spelling/SpellingQueryConverter.java | 14 +++++----- .../solr/analysis/CommonGramsFilterTest.java | 22 +++++++-------- .../DoubleMetaphoneFilterFactoryTest.java | 8 +++--- .../analysis/TestBufferedTokenStream.java | 12 ++++---- .../TestCollationKeyFilterFactory.java | 12 ++++---- .../analysis/TestPatternTokenizerFactory.java | 10 +++---- .../TestRemoveDuplicatesTokenFilter.java | 6 ++-- .../solr/analysis/TestSynonymFilter.java | 6 ++-- .../apache/solr/analysis/TestTrimFilter.java | 6 ++-- .../analysis/TestWordDelimiterFilter.java | 10 +++---- .../solr/spelling/SimpleQueryConverter.java | 16 +++++------ solr/src/webapp/web/admin/analysis.jsp | 28 +++++++++---------- 44 files changed, 237 insertions(+), 232 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 2c5ee85009f..221a4cdcb9c 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -59,7 +59,8 @@ Upgrading from Solr 1.4 "terms" container is a map instead of a named list. This affects response formats like JSON, but not XML. (yonik) - +* SOLR-1876: All Analyzers and TokenStreams are now final to enforce + the decorator pattern. (rmuir, uschindler) Detailed Change List ---------------------- diff --git a/solr/build.xml b/solr/build.xml index 15f5d6f6f56..95aee103c09 100644 --- a/solr/build.xml +++ b/solr/build.xml @@ -455,6 +455,10 @@ + + + + diff --git a/solr/contrib/clustering/build.xml b/solr/contrib/clustering/build.xml index 8c15c11a09e..b9cb9c12880 100644 --- a/solr/contrib/clustering/build.xml +++ b/solr/contrib/clustering/build.xml @@ -139,6 +139,10 @@ > + + + + diff --git a/solr/contrib/dataimporthandler/build.xml b/solr/contrib/dataimporthandler/build.xml index 5875f9150fb..92fa79f9e87 100644 --- a/solr/contrib/dataimporthandler/build.xml +++ b/solr/contrib/dataimporthandler/build.xml @@ -162,6 +162,10 @@ > + + + + diff --git a/solr/contrib/extraction/build.xml b/solr/contrib/extraction/build.xml index 467c124cfe2..e883926fc32 100644 --- a/solr/contrib/extraction/build.xml +++ b/solr/contrib/extraction/build.xml @@ -88,6 +88,10 @@ > + + + + diff --git a/solr/contrib/velocity/build.xml b/solr/contrib/velocity/build.xml index be94b06e4d3..97dbf5cf08d 100644 --- a/solr/contrib/velocity/build.xml +++ b/solr/contrib/velocity/build.xml @@ -87,6 +87,10 @@ + + + + diff --git a/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java b/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java index 3fec7833a03..0e563f389a1 100644 --- a/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java +++ b/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java @@ -20,11 +20,11 @@ package org.apache.solr.analysis; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.util.AttributeSource; // javadoc @link @@ -73,7 +73,7 @@ public abstract class BufferedTokenStream extends TokenFilter { private final LinkedList inQueue = new LinkedList(); private final LinkedList outQueue = new LinkedList(); - private final TermAttribute termAtt = addAttribute(TermAttribute.class); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class); @@ -150,7 +150,7 @@ public abstract class BufferedTokenStream extends TokenFilter { return null; } else { Token token = new Token(); - token.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength()); + token.setTermBuffer(termAtt.buffer(), 0, termAtt.length()); token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); token.setType(typeAtt.type()); token.setFlags(flagsAtt.getFlags()); @@ -163,7 +163,7 @@ public abstract class BufferedTokenStream extends TokenFilter { /** old api emulation for back compat */ private boolean writeToken(Token token) throws IOException { clearAttributes(); - termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength()); + termAtt.copyBuffer(token.termBuffer(), 0, token.termLength()); offsetAtt.setOffset(token.startOffset(), token.endOffset()); typeAtt.setType(token.type()); flagsAtt.setFlags(token.getFlags()); diff --git a/solr/src/java/org/apache/solr/analysis/CapitalizationFilterFactory.java b/solr/src/java/org/apache/solr/analysis/CapitalizationFilterFactory.java index 025dd4fbf20..6da0c54a876 100644 --- a/solr/src/java/org/apache/solr/analysis/CapitalizationFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/CapitalizationFilterFactory.java @@ -18,7 +18,7 @@ package org.apache.solr.analysis; import org.apache.lucene.analysis.*; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import java.io.IOException; import java.util.ArrayList; @@ -188,22 +188,21 @@ public class CapitalizationFilterFactory extends BaseTokenFilterFactory { *

* This is package protected since it is not useful without the Factory */ -class CapitalizationFilter extends TokenFilter { +final class CapitalizationFilter extends TokenFilter { private final CapitalizationFilterFactory factory; - private final TermAttribute termAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); public CapitalizationFilter(TokenStream in, final CapitalizationFilterFactory factory) { super(in); this.factory = factory; - this.termAtt = addAttribute(TermAttribute.class); } @Override public boolean incrementToken() throws IOException { if (!input.incrementToken()) return false; - char[] termBuffer = termAtt.termBuffer(); - int termBufferLength = termAtt.termLength(); + char[] termBuffer = termAtt.buffer(); + int termBufferLength = termAtt.length(); char[] backup = null; if (factory.maxWordCount < CapitalizationFilterFactory.DEFAULT_MAX_WORD_COUNT) { //make a backup in case we exceed the word count @@ -232,7 +231,7 @@ class CapitalizationFilter extends TokenFilter { } if (wordCount > factory.maxWordCount) { - termAtt.setTermBuffer(backup, 0, termBufferLength); + termAtt.copyBuffer(backup, 0, termBufferLength); } } diff --git a/solr/src/java/org/apache/solr/analysis/CommonGramsFilter.java b/solr/src/java/org/apache/solr/analysis/CommonGramsFilter.java index 0cad27ac8dd..fb34a1bd4a2 100644 --- a/solr/src/java/org/apache/solr/analysis/CommonGramsFilter.java +++ b/solr/src/java/org/apache/solr/analysis/CommonGramsFilter.java @@ -18,7 +18,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.util.Version; @@ -52,7 +52,7 @@ public final class CommonGramsFilter extends TokenFilter { private final StringBuilder buffer = new StringBuilder(); - private final TermAttribute termAttribute = addAttribute(TermAttribute.class); + private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class); private final TypeAttribute typeAttribute = addAttribute(TypeAttribute.class); private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class); @@ -231,7 +231,7 @@ public final class CommonGramsFilter extends TokenFilter { * @return {@code true} if the current token is a common term, {@code false} otherwise */ private boolean isCommon() { - return commonWords != null && commonWords.contains(termAttribute.termBuffer(), 0, termAttribute.termLength()); + return commonWords != null && commonWords.contains(termAttribute.buffer(), 0, termAttribute.length()); } /** @@ -239,7 +239,7 @@ public final class CommonGramsFilter extends TokenFilter { */ private void saveTermBuffer() { buffer.setLength(0); - buffer.append(termAttribute.termBuffer(), 0, termAttribute.termLength()); + buffer.append(termAttribute.buffer(), 0, termAttribute.length()); buffer.append(SEPARATOR); lastStartOffset = offsetAttribute.startOffset(); lastWasCommon = isCommon(); @@ -249,19 +249,19 @@ public final class CommonGramsFilter extends TokenFilter { * Constructs a compound token. */ private void gramToken() { - buffer.append(termAttribute.termBuffer(), 0, termAttribute.termLength()); + buffer.append(termAttribute.buffer(), 0, termAttribute.length()); int endOffset = offsetAttribute.endOffset(); clearAttributes(); int length = buffer.length(); - char termText[] = termAttribute.termBuffer(); + char termText[] = termAttribute.buffer(); if (length > termText.length) { - termText = termAttribute.resizeTermBuffer(length); + termText = termAttribute.resizeBuffer(length); } buffer.getChars(0, length, termText, 0); - termAttribute.setTermLength(length); + termAttribute.setLength(length); posIncAttribute.setPositionIncrement(0); offsetAttribute.setOffset(lastStartOffset, endOffset); typeAttribute.setType(GRAM_TYPE); diff --git a/solr/src/java/org/apache/solr/analysis/DoubleMetaphoneFilter.java b/solr/src/java/org/apache/solr/analysis/DoubleMetaphoneFilter.java index 3b8ed14cfe1..d384d2c1ece 100644 --- a/solr/src/java/org/apache/solr/analysis/DoubleMetaphoneFilter.java +++ b/solr/src/java/org/apache/solr/analysis/DoubleMetaphoneFilter.java @@ -22,25 +22,23 @@ import java.util.LinkedList; import org.apache.commons.codec.language.DoubleMetaphone; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -public class DoubleMetaphoneFilter extends TokenFilter { +public final class DoubleMetaphoneFilter extends TokenFilter { private static final String TOKEN_TYPE = "DoubleMetaphone"; private final LinkedList remainingTokens = new LinkedList(); private final DoubleMetaphone encoder = new DoubleMetaphone(); private final boolean inject; - private final TermAttribute termAtt; - private final PositionIncrementAttribute posAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class); protected DoubleMetaphoneFilter(TokenStream input, int maxCodeLength, boolean inject) { super(input); this.encoder.setMaxCodeLen(maxCodeLength); this.inject = inject; - this.termAtt = addAttribute(TermAttribute.class); - this.posAtt = addAttribute(PositionIncrementAttribute.class); } @Override @@ -55,12 +53,12 @@ public class DoubleMetaphoneFilter extends TokenFilter { if (!input.incrementToken()) return false; - int len = termAtt.termLength(); + int len = termAtt.length(); if (len==0) return true; // pass through zero length terms int firstAlternativeIncrement = inject ? 0 : posAtt.getPositionIncrement(); - String v = new String(termAtt.termBuffer(), 0, len); + String v = termAtt.toString(); String primaryPhoneticValue = encoder.doubleMetaphone(v); String alternatePhoneticValue = encoder.doubleMetaphone(v, true); @@ -74,7 +72,7 @@ public class DoubleMetaphoneFilter extends TokenFilter { } posAtt.setPositionIncrement( firstAlternativeIncrement ); firstAlternativeIncrement = 0; - termAtt.setTermBuffer(primaryPhoneticValue); + termAtt.setEmpty().append(primaryPhoneticValue); saveState = true; } @@ -86,7 +84,7 @@ public class DoubleMetaphoneFilter extends TokenFilter { saveState = false; } posAtt.setPositionIncrement( firstAlternativeIncrement ); - termAtt.setTermBuffer(alternatePhoneticValue); + termAtt.setEmpty().append(alternatePhoneticValue); saveState = true; } diff --git a/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilter.java b/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilter.java index aadfc682d80..ec9d77b8678 100755 --- a/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilter.java +++ b/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilter.java @@ -21,7 +21,7 @@ import java.io.IOException; import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * When the plain text is extracted from documents, we will often have many words hyphenated and broken into @@ -54,7 +54,7 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute; */ public final class HyphenatedWordsFilter extends TokenFilter { - private final TermAttribute termAttribute = addAttribute(TermAttribute.class); + private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class); private final StringBuilder hyphenated = new StringBuilder(); @@ -75,8 +75,8 @@ public final class HyphenatedWordsFilter extends TokenFilter { @Override public boolean incrementToken() throws IOException { while (input.incrementToken()) { - char[] term = termAttribute.termBuffer(); - int termLength = termAttribute.termLength(); + char[] term = termAttribute.buffer(); + int termLength = termAttribute.length(); if (termLength > 0 && term[termLength - 1] == '-') { // a hyphenated word @@ -128,14 +128,14 @@ public final class HyphenatedWordsFilter extends TokenFilter { restoreState(savedState); savedState = null; - char term[] = termAttribute.termBuffer(); + char term[] = termAttribute.buffer(); int length = hyphenated.length(); - if (length > termAttribute.termLength()) { - term = termAttribute.resizeTermBuffer(length); + if (length > termAttribute.length()) { + term = termAttribute.resizeBuffer(length); } hyphenated.getChars(0, length, term, 0); - termAttribute.setTermLength(length); + termAttribute.setLength(length); offsetAttribute.setOffset(offsetAttribute.startOffset(), endOffset); hyphenated.setLength(0); } diff --git a/solr/src/java/org/apache/solr/analysis/KeepWordFilter.java b/solr/src/java/org/apache/solr/analysis/KeepWordFilter.java index ca26532a67d..4a78a94fea8 100644 --- a/solr/src/java/org/apache/solr/analysis/KeepWordFilter.java +++ b/solr/src/java/org/apache/solr/analysis/KeepWordFilter.java @@ -20,7 +20,7 @@ package org.apache.solr.analysis; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.CharArraySet; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import java.io.IOException; import java.util.Set; @@ -34,7 +34,7 @@ import java.util.Set; */ public final class KeepWordFilter extends TokenFilter { private final CharArraySet words; - private final TermAttribute termAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); /** @deprecated Use {@link #KeepWordFilter(TokenStream, Set, boolean)} instead */ @Deprecated @@ -47,13 +47,12 @@ public final class KeepWordFilter extends TokenFilter { public KeepWordFilter(TokenStream in, CharArraySet words) { super(in); this.words = words; - this.termAtt = addAttribute(TermAttribute.class); } @Override public boolean incrementToken() throws IOException { while (input.incrementToken()) { - if (words.contains(termAtt.termBuffer(), 0, termAtt.termLength())) return true; + if (words.contains(termAtt.buffer(), 0, termAtt.length())) return true; } return false; } diff --git a/solr/src/java/org/apache/solr/analysis/PatternReplaceFilter.java b/solr/src/java/org/apache/solr/analysis/PatternReplaceFilter.java index 908294b211a..b9831fc6901 100644 --- a/solr/src/java/org/apache/solr/analysis/PatternReplaceFilter.java +++ b/solr/src/java/org/apache/solr/analysis/PatternReplaceFilter.java @@ -42,7 +42,7 @@ public final class PatternReplaceFilter extends TokenFilter { private final Pattern p; private final String replacement; private final boolean all; - private final CharTermAttribute termAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final Matcher m; /** @@ -64,7 +64,6 @@ public final class PatternReplaceFilter extends TokenFilter { this.p=p; this.replacement = (null == replacement) ? "" : replacement; this.all=all; - this.termAtt = addAttribute(CharTermAttribute.class); this.m = p.matcher(termAtt); } diff --git a/solr/src/java/org/apache/solr/analysis/PatternTokenizer.java b/solr/src/java/org/apache/solr/analysis/PatternTokenizer.java index 9253e936f02..b387767bbc6 100644 --- a/solr/src/java/org/apache/solr/analysis/PatternTokenizer.java +++ b/solr/src/java/org/apache/solr/analysis/PatternTokenizer.java @@ -22,7 +22,7 @@ import java.io.Reader; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.commons.io.IOUtils; @@ -56,7 +56,7 @@ import org.apache.commons.io.IOUtils; */ public final class PatternTokenizer extends Tokenizer { - private final TermAttribute termAtt = addAttribute(TermAttribute.class); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); private String str; @@ -86,7 +86,7 @@ public final class PatternTokenizer extends Tokenizer { while (matcher.find()) { final String match = matcher.group(group); if (match.length() == 0) continue; - termAtt.setTermBuffer(match); + termAtt.setEmpty().append(match); index = matcher.start(group); offsetAtt.setOffset(correctOffset(index), correctOffset(matcher.end(group))); return true; @@ -101,7 +101,7 @@ public final class PatternTokenizer extends Tokenizer { while (matcher.find()) { if (matcher.start() - index > 0) { // found a non-zero-length token - termAtt.setTermBuffer(str, index, matcher.start() - index); + termAtt.setEmpty().append(str, index, matcher.start()); offsetAtt.setOffset(correctOffset(index), correctOffset(matcher.start())); index = matcher.end(); return true; @@ -115,7 +115,7 @@ public final class PatternTokenizer extends Tokenizer { return false; } - termAtt.setTermBuffer(str, index, str.length() - index); + termAtt.setEmpty().append(str, index, str.length()); offsetAtt.setOffset(correctOffset(index), correctOffset(str.length())); index = Integer.MAX_VALUE; // mark exhausted return true; diff --git a/solr/src/java/org/apache/solr/analysis/PhoneticFilter.java b/solr/src/java/org/apache/solr/analysis/PhoneticFilter.java index dcf6d8d63cc..a6d0a3bbe21 100644 --- a/solr/src/java/org/apache/solr/analysis/PhoneticFilter.java +++ b/solr/src/java/org/apache/solr/analysis/PhoneticFilter.java @@ -20,7 +20,7 @@ package org.apache.solr.analysis; import org.apache.commons.codec.Encoder; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import java.io.IOException; @@ -31,23 +31,21 @@ import java.io.IOException; * * @version $Id$ */ -public class PhoneticFilter extends TokenFilter +public final class PhoneticFilter extends TokenFilter { protected boolean inject = true; protected Encoder encoder = null; protected String name = null; protected State save = null; - private final TermAttribute termAtt; - private final PositionIncrementAttribute posAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class); public PhoneticFilter(TokenStream in, Encoder encoder, String name, boolean inject) { super(in); this.encoder = encoder; this.name = name; - this.inject = inject; - this.termAtt = addAttribute(TermAttribute.class); - this.posAtt = addAttribute(PositionIncrementAttribute.class); + this.inject = inject; } @Override @@ -62,9 +60,9 @@ public class PhoneticFilter extends TokenFilter if (!input.incrementToken()) return false; // pass through zero-length terms - if (termAtt.termLength()==0) return true; + if (termAtt.length() == 0) return true; - String value = termAtt.term(); + String value = termAtt.toString(); String phonetic = null; try { String v = encoder.encode(value).toString(); @@ -75,7 +73,7 @@ public class PhoneticFilter extends TokenFilter if (!inject) { // just modify this token - termAtt.setTermBuffer(phonetic); + termAtt.setEmpty().append(phonetic); return true; } @@ -88,7 +86,7 @@ public class PhoneticFilter extends TokenFilter save = captureState(); posAtt.setPositionIncrement(origOffset); - termAtt.setTermBuffer(phonetic); + termAtt.setEmpty().append(phonetic); return true; } diff --git a/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilter.java b/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilter.java index 563356c70af..2978115867d 100644 --- a/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilter.java +++ b/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilter.java @@ -20,8 +20,8 @@ package org.apache.solr.analysis; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.util.Version; import java.io.IOException; @@ -31,7 +31,7 @@ import java.io.IOException; */ public final class RemoveDuplicatesTokenFilter extends TokenFilter { - private final TermAttribute termAttribute = addAttribute(TermAttribute.class); + private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class); // use a fixed version, as we don't care about case sensitivity. @@ -52,8 +52,8 @@ public final class RemoveDuplicatesTokenFilter extends TokenFilter { @Override public boolean incrementToken() throws IOException { while (input.incrementToken()) { - final char term[] = termAttribute.termBuffer(); - final int length = termAttribute.termLength(); + final char term[] = termAttribute.buffer(); + final int length = termAttribute.length(); final int posIncrement = posIncAttribute.getPositionIncrement(); if (posIncrement > 0) { diff --git a/solr/src/java/org/apache/solr/analysis/ReversedWildcardFilter.java b/solr/src/java/org/apache/solr/analysis/ReversedWildcardFilter.java index d820ec4040d..4a595e0b5dc 100644 --- a/solr/src/java/org/apache/solr/analysis/ReversedWildcardFilter.java +++ b/solr/src/java/org/apache/solr/analysis/ReversedWildcardFilter.java @@ -21,7 +21,7 @@ import java.io.IOException; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * This class produces a special form of reversed tokens, suitable for @@ -35,17 +35,17 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute; * withOriginal == true, which proportionally increases the size * of postings and term dictionary in the index. */ -public class ReversedWildcardFilter extends TokenFilter { +public final class ReversedWildcardFilter extends TokenFilter { private boolean withOriginal; private char markerChar; private State save; - private TermAttribute termAtt; + private CharTermAttribute termAtt; private PositionIncrementAttribute posAtt; protected ReversedWildcardFilter(TokenStream input, boolean withOriginal, char markerChar) { super(input); - this.termAtt = addAttribute(TermAttribute.class); + this.termAtt = addAttribute(CharTermAttribute.class); this.posAtt = addAttribute(PositionIncrementAttribute.class); this.withOriginal = withOriginal; this.markerChar = markerChar; @@ -63,19 +63,19 @@ public class ReversedWildcardFilter extends TokenFilter { if (!input.incrementToken()) return false; // pass through zero-length terms - int oldLen = termAtt.termLength(); + int oldLen = termAtt.length(); if (oldLen ==0) return true; int origOffset = posAtt.getPositionIncrement(); if (withOriginal == true){ posAtt.setPositionIncrement(0); save = captureState(); } - char [] buffer = termAtt.resizeTermBuffer(oldLen + 1); + char [] buffer = termAtt.resizeBuffer(oldLen + 1); buffer[oldLen] = markerChar; reverse(buffer, 0, oldLen + 1); posAtt.setPositionIncrement(origOffset); - termAtt.setTermBuffer(buffer, 0, oldLen +1); + termAtt.copyBuffer(buffer, 0, oldLen +1); return true; } diff --git a/solr/src/java/org/apache/solr/analysis/SynonymFilter.java b/solr/src/java/org/apache/solr/analysis/SynonymFilter.java index 4a7db36db95..376fad329d9 100644 --- a/solr/src/java/org/apache/solr/analysis/SynonymFilter.java +++ b/solr/src/java/org/apache/solr/analysis/SynonymFilter.java @@ -22,7 +22,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.util.AttributeSource; @@ -41,7 +41,7 @@ import java.util.LinkedList; * * @version $Id$ */ -public class SynonymFilter extends TokenFilter { +public final class SynonymFilter extends TokenFilter { private final SynonymMap map; // Map private Iterator replacement; // iterator over generated tokens @@ -50,7 +50,7 @@ public class SynonymFilter extends TokenFilter { super(in); this.map = map; // just ensuring these attributes exist... - addAttribute(TermAttribute.class); + addAttribute(CharTermAttribute.class); addAttribute(PositionIncrementAttribute.class); addAttribute(OffsetAttribute.class); addAttribute(TypeAttribute.class); @@ -87,8 +87,8 @@ public class SynonymFilter extends TokenFilter { // common case fast-path of first token not matching anything AttributeSource firstTok = nextTok(); if (firstTok == null) return false; - TermAttribute termAtt = firstTok.addAttribute(TermAttribute.class); - SynonymMap result = map.submap!=null ? map.submap.get(termAtt.termBuffer(), 0, termAtt.termLength()) : null; + CharTermAttribute termAtt = firstTok.addAttribute(CharTermAttribute.class); + SynonymMap result = map.submap!=null ? map.submap.get(termAtt.buffer(), 0, termAtt.length()) : null; if (result == null) { copy(this, firstTok); return true; @@ -128,14 +128,14 @@ public class SynonymFilter extends TokenFilter { for (int i=0; i1, should not match, if==0, check multiple at this level? - TermAttribute termAtt = tok.getAttribute(TermAttribute.class); - SynonymMap subMap = map.submap.get(termAtt.termBuffer(), 0, termAtt.termLength()); + CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class); + SynonymMap subMap = map.submap.get(termAtt.buffer(), 0, termAtt.length()); if (subMap != null) { // recurse diff --git a/solr/src/java/org/apache/solr/analysis/SynonymFilterFactory.java b/solr/src/java/org/apache/solr/analysis/SynonymFilterFactory.java index 049143172a4..c6ae83271bc 100644 --- a/solr/src/java/org/apache/solr/analysis/SynonymFilterFactory.java +++ b/solr/src/java/org/apache/solr/analysis/SynonymFilterFactory.java @@ -18,7 +18,7 @@ package org.apache.solr.analysis; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.solr.common.ResourceLoader; import org.apache.solr.common.util.StrUtils; import org.apache.solr.util.plugin.ResourceLoaderAware; @@ -135,11 +135,10 @@ public class SynonymFilterFactory extends BaseTokenFilterFactory implements Reso TokenStream ts = loadTokenizer(tokFactory, reader); List tokList = new ArrayList(); try { - TermAttribute termAtt = ts.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); while (ts.incrementToken()){ - String text = new String(termAtt.termBuffer(), 0, termAtt.termLength()); - if( text.length() > 0 ) - tokList.add( text ); + if( termAtt.length() > 0 ) + tokList.add( termAtt.toString() ); } } catch (IOException e) { throw new RuntimeException(e); diff --git a/solr/src/java/org/apache/solr/analysis/TokenizerChain.java b/solr/src/java/org/apache/solr/analysis/TokenizerChain.java index 3f92f256d80..e882fefaa90 100644 --- a/solr/src/java/org/apache/solr/analysis/TokenizerChain.java +++ b/solr/src/java/org/apache/solr/analysis/TokenizerChain.java @@ -32,7 +32,7 @@ import java.io.Reader; // An analyzer that uses a tokenizer and a list of token filters to // create a TokenStream. // -public class TokenizerChain extends SolrAnalyzer { +public final class TokenizerChain extends SolrAnalyzer { final private CharFilterFactory[] charFilters; final private TokenizerFactory tokenizer; final private TokenFilterFactory[] filters; diff --git a/solr/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java b/solr/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java index aec963c8bf2..075d8853176 100644 --- a/solr/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java +++ b/solr/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java @@ -51,7 +51,7 @@ public class TrieTokenizerFactory extends BaseTokenizerFactory { } } -class TrieTokenizer extends Tokenizer { +final class TrieTokenizer extends Tokenizer { protected static final DateField dateField = new DateField(); protected final int precisionStep; protected final TrieTypes type; diff --git a/solr/src/java/org/apache/solr/analysis/TrimFilter.java b/solr/src/java/org/apache/solr/analysis/TrimFilter.java index 821fc27f7b2..b0cc7c3aa49 100644 --- a/solr/src/java/org/apache/solr/analysis/TrimFilter.java +++ b/solr/src/java/org/apache/solr/analysis/TrimFilter.java @@ -19,7 +19,7 @@ package org.apache.solr.analysis; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import java.io.IOException; @@ -32,24 +32,21 @@ import java.io.IOException; public final class TrimFilter extends TokenFilter { final boolean updateOffsets; - private final TermAttribute termAtt; - private final OffsetAttribute offsetAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); public TrimFilter(TokenStream in, boolean updateOffsets) { super(in); this.updateOffsets = updateOffsets; - - this.termAtt = addAttribute(TermAttribute.class); - this.offsetAtt = addAttribute(OffsetAttribute.class); } @Override public boolean incrementToken() throws IOException { if (!input.incrementToken()) return false; - char[] termBuffer = termAtt.termBuffer(); - int len = termAtt.termLength(); + char[] termBuffer = termAtt.buffer(); + int len = termAtt.length(); //TODO: Is this the right behavior or should we return false? Currently, " ", returns true, so I think this should //also return true if (len == 0){ @@ -69,9 +66,9 @@ public final class TrimFilter extends TokenFilter { } if (start > 0 || end < len) { if (start < end) { - termAtt.setTermBuffer(termBuffer, start, (end - start)); + termAtt.copyBuffer(termBuffer, start, (end - start)); } else { - termAtt.setTermLength(0); + termAtt.setEmpty(); } if (updateOffsets) { int newStart = offsetAtt.startOffset()+start; diff --git a/solr/src/java/org/apache/solr/analysis/WordDelimiterFilter.java b/solr/src/java/org/apache/solr/analysis/WordDelimiterFilter.java index f5963f873bc..cf65608a5a6 100644 --- a/solr/src/java/org/apache/solr/analysis/WordDelimiterFilter.java +++ b/solr/src/java/org/apache/solr/analysis/WordDelimiterFilter.java @@ -22,7 +22,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.RamUsageEstimator; @@ -120,7 +120,7 @@ final class WordDelimiterFilter extends TokenFilter { */ final CharArraySet protWords; - private final TermAttribute termAttribute = addAttribute(TermAttribute.class); + private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class); private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class); private final TypeAttribute typeAttribute = addAttribute(TypeAttribute.class); @@ -338,8 +338,8 @@ final class WordDelimiterFilter extends TokenFilter { return false; } - int termLength = termAttribute.termLength(); - char[] termBuffer = termAttribute.termBuffer(); + int termLength = termAttribute.length(); + char[] termBuffer = termAttribute.buffer(); accumPosInc += posIncAttribute.getPositionIncrement(); @@ -462,14 +462,14 @@ final class WordDelimiterFilter extends TokenFilter { savedStartOffset = offsetAttribute.startOffset(); savedEndOffset = offsetAttribute.endOffset(); // if length by start + end offsets doesn't match the term text then assume this is a synonym and don't adjust the offsets. - hasIllegalOffsets = (savedEndOffset - savedStartOffset != termAttribute.termLength()); + hasIllegalOffsets = (savedEndOffset - savedStartOffset != termAttribute.length()); savedType = typeAttribute.type(); - if (savedBuffer.length < termAttribute.termLength()) { - savedBuffer = new char[ArrayUtil.oversize(termAttribute.termLength(), RamUsageEstimator.NUM_BYTES_CHAR)]; + if (savedBuffer.length < termAttribute.length()) { + savedBuffer = new char[ArrayUtil.oversize(termAttribute.length(), RamUsageEstimator.NUM_BYTES_CHAR)]; } - System.arraycopy(termAttribute.termBuffer(), 0, savedBuffer, 0, termAttribute.termLength()); + System.arraycopy(termAttribute.buffer(), 0, savedBuffer, 0, termAttribute.length()); iterator.text = savedBuffer; hasSavedState = true; @@ -531,7 +531,7 @@ final class WordDelimiterFilter extends TokenFilter { */ private void generatePart(boolean isSingleWord) { clearAttributes(); - termAttribute.setTermBuffer(savedBuffer, iterator.current, iterator.end - iterator.current); + termAttribute.copyBuffer(savedBuffer, iterator.current, iterator.end - iterator.current); int startOffSet = (isSingleWord || !hasIllegalOffsets) ? savedStartOffset + iterator.current : savedStartOffset; int endOffSet = (hasIllegalOffsets) ? savedEndOffset : savedStartOffset + iterator.end; @@ -636,13 +636,13 @@ final class WordDelimiterFilter extends TokenFilter { */ void write() { clearAttributes(); - if (termAttribute.termLength() < buffer.length()) { - termAttribute.resizeTermBuffer(buffer.length()); + if (termAttribute.length() < buffer.length()) { + termAttribute.resizeBuffer(buffer.length()); } - char termbuffer[] = termAttribute.termBuffer(); + char termbuffer[] = termAttribute.buffer(); buffer.getChars(0, buffer.length(), termbuffer, 0); - termAttribute.setTermLength(buffer.length()); + termAttribute.setLength(buffer.length()); if (hasIllegalOffsets) { offsetAttribute.setOffset(savedStartOffset, savedEndOffset); diff --git a/solr/src/java/org/apache/solr/handler/AnalysisRequestHandler.java b/solr/src/java/org/apache/solr/handler/AnalysisRequestHandler.java index beb8bab5e70..8e580764465 100644 --- a/solr/src/java/org/apache/solr/handler/AnalysisRequestHandler.java +++ b/solr/src/java/org/apache/solr/handler/AnalysisRequestHandler.java @@ -18,7 +18,6 @@ package org.apache.solr.handler; import org.apache.commons.io.IOUtils; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.*; import org.apache.lucene.util.BytesRef; @@ -135,10 +134,10 @@ public class AnalysisRequestHandler extends RequestHandlerBase { // outer is namedList since order of tokens is important NamedList> tokens = new NamedList>(); // TODO: support custom attributes - TermAttribute termAtt = null; + CharTermAttribute termAtt = null; TermToBytesRefAttribute bytesAtt = null; - if (tstream.hasAttribute(TermAttribute.class)) { - termAtt = tstream.getAttribute(TermAttribute.class); + if (tstream.hasAttribute(CharTermAttribute.class)) { + termAtt = tstream.getAttribute(CharTermAttribute.class); } else if (tstream.hasAttribute(TermToBytesRefAttribute.class)) { bytesAtt = tstream.getAttribute(TermToBytesRefAttribute.class); } @@ -151,7 +150,7 @@ public class AnalysisRequestHandler extends RequestHandlerBase { NamedList token = new SimpleOrderedMap(); tokens.add("token", token); if (termAtt != null) { - token.add("value", termAtt.term()); + token.add("value", termAtt.toString()); } if (bytesAtt != null) { bytesAtt.toBytesRef(bytes); diff --git a/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java b/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java index 1b2e8b14dcb..188d522cd80 100644 --- a/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java +++ b/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java @@ -145,10 +145,10 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase { List tokens = new ArrayList(); // TODO change this API to support custom attributes - TermAttribute termAtt = null; + CharTermAttribute termAtt = null; TermToBytesRefAttribute bytesAtt = null; - if (tokenStream.hasAttribute(TermAttribute.class)) { - termAtt = tokenStream.getAttribute(TermAttribute.class); + if (tokenStream.hasAttribute(CharTermAttribute.class)) { + termAtt = tokenStream.getAttribute(CharTermAttribute.class); } else if (tokenStream.hasAttribute(TermToBytesRefAttribute.class)) { bytesAtt = tokenStream.getAttribute(TermToBytesRefAttribute.class); } @@ -163,7 +163,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase { while (tokenStream.incrementToken()) { Token token = new Token(); if (termAtt != null) { - token.setTermBuffer(termAtt.term()); + token.setTermBuffer(termAtt.toString()); } if (bytesAtt != null) { bytesAtt.toBytesRef(bytes); @@ -259,12 +259,12 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase { * TokenStream that iterates over a list of pre-existing Tokens */ // TODO refactor to support custom attributes - protected static class ListBasedTokenStream extends TokenStream { + protected final static class ListBasedTokenStream extends TokenStream { private final List tokens; private Iterator tokenIterator; - private final TermAttribute termAtt = (TermAttribute) - addAttribute(TermAttribute.class); + private final CharTermAttribute termAtt = (CharTermAttribute) + addAttribute(CharTermAttribute.class); private final OffsetAttribute offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class); private final TypeAttribute typeAtt = (TypeAttribute) @@ -292,7 +292,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase { public boolean incrementToken() throws IOException { if (tokenIterator.hasNext()) { Token next = tokenIterator.next(); - termAtt.setTermBuffer(next.termBuffer(), 0, next.termLength()); + termAtt.copyBuffer(next.termBuffer(), 0, next.termLength()); typeAtt.setType(next.type()); offsetAtt.setOffset(next.startOffset(), next.endOffset()); flagsAtt.setFlags(next.getFlags()); diff --git a/solr/src/java/org/apache/solr/handler/component/QueryElevationComponent.java b/solr/src/java/org/apache/solr/handler/component/QueryElevationComponent.java index a188e20fd38..f961cdd7dcd 100644 --- a/solr/src/java/org/apache/solr/handler/component/QueryElevationComponent.java +++ b/solr/src/java/org/apache/solr/handler/component/QueryElevationComponent.java @@ -39,7 +39,7 @@ import javax.xml.xpath.XPathFactory; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.search.*; @@ -298,9 +298,9 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore TokenStream tokens = analyzer.reusableTokenStream( "", new StringReader( query ) ); tokens.reset(); - TermAttribute termAtt = (TermAttribute) tokens.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = tokens.addAttribute(CharTermAttribute.class); while( tokens.incrementToken() ) { - norm.append( termAtt.termBuffer(), 0, termAtt.termLength() ); + norm.append( termAtt.buffer(), 0, termAtt.length() ); } return norm.toString(); } diff --git a/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java b/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java index 32b0276c309..07da525c584 100644 --- a/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java +++ b/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java @@ -37,7 +37,7 @@ import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.index.IndexReader; import org.apache.solr.common.SolrException; @@ -373,16 +373,16 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar TokenStream ts = analyzer.reusableTokenStream("", new StringReader(q)); ts.reset(); // TODO: support custom attributes - TermAttribute termAtt = (TermAttribute) ts.addAttribute(TermAttribute.class); - OffsetAttribute offsetAtt = (OffsetAttribute) ts.addAttribute(OffsetAttribute.class); - TypeAttribute typeAtt = (TypeAttribute) ts.addAttribute(TypeAttribute.class); - FlagsAttribute flagsAtt = (FlagsAttribute) ts.addAttribute(FlagsAttribute.class); - PayloadAttribute payloadAtt = (PayloadAttribute) ts.addAttribute(PayloadAttribute.class); - PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute) ts.addAttribute(PositionIncrementAttribute.class); + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); + OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); + TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class); + FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class); + PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class); + PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class); while (ts.incrementToken()){ Token token = new Token(); - token.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength()); + token.setTermBuffer(termAtt.buffer(), 0, termAtt.length()); token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); token.setType(typeAtt.type()); token.setFlags(flagsAtt.getFlags()); diff --git a/solr/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java b/solr/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java index 31bc278301e..cc69fd0253c 100644 --- a/solr/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java +++ b/solr/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java @@ -512,7 +512,7 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf * This is meant to work around fickleness in the highlighter only. It * can mess up token positions and should not be used for indexing or querying. */ -class TokenOrderingFilter extends TokenFilter { +final class TokenOrderingFilter extends TokenFilter { private final int windowSize; private final LinkedList queue = new LinkedList(); private boolean done=false; @@ -586,7 +586,7 @@ class TermOffsetsTokenStream { return new MultiValuedStream(length); } - class MultiValuedStream extends TokenStream { + final class MultiValuedStream extends TokenStream { private final int length; OffsetAttribute offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class); diff --git a/solr/src/java/org/apache/solr/schema/BoolField.java b/solr/src/java/org/apache/solr/schema/BoolField.java index 354c4431e2e..0e939b9f745 100644 --- a/solr/src/java/org/apache/solr/schema/BoolField.java +++ b/solr/src/java/org/apache/solr/schema/BoolField.java @@ -20,11 +20,9 @@ package org.apache.solr.schema; import org.apache.lucene.search.SortField; import org.apache.solr.search.function.ValueSource; import org.apache.solr.search.function.OrdFieldSource; -import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Fieldable; import org.apache.solr.response.TextResponseWriter; import org.apache.solr.response.XMLWriter; @@ -59,7 +57,7 @@ public class BoolField extends FieldType { protected final static Analyzer boolAnalyzer = new SolrAnalyzer() { public TokenStreamInfo getStream(String fieldName, Reader reader) { Tokenizer tokenizer = new Tokenizer(reader) { - final TermAttribute termAtt = (TermAttribute) addAttribute(TermAttribute.class); + final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); boolean done = false; @Override @@ -75,7 +73,7 @@ public class BoolField extends FieldType { done = true; int ch = input.read(); if (ch==-1) return false; - termAtt.setTermBuffer( + termAtt.copyBuffer( ((ch=='t' || ch=='T' || ch=='1') ? TRUE_TOKEN : FALSE_TOKEN) ,0,1); return true; diff --git a/solr/src/java/org/apache/solr/schema/FieldType.java b/solr/src/java/org/apache/solr/schema/FieldType.java index b41fb499fc1..ac2af992f76 100644 --- a/solr/src/java/org/apache/solr/schema/FieldType.java +++ b/solr/src/java/org/apache/solr/schema/FieldType.java @@ -21,7 +21,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.Fieldable; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.search.SortField; import org.apache.lucene.search.Query; @@ -369,7 +369,7 @@ public abstract class FieldType extends FieldProperties { * Default analyzer for types that only produce 1 verbatim token... * A maximum size of chars to be read must be specified */ - protected class DefaultAnalyzer extends SolrAnalyzer { + protected final class DefaultAnalyzer extends SolrAnalyzer { final int maxChars; DefaultAnalyzer(int maxChars) { @@ -379,15 +379,15 @@ public abstract class FieldType extends FieldProperties { public TokenStreamInfo getStream(String fieldName, Reader reader) { Tokenizer ts = new Tokenizer(reader) { final char[] cbuf = new char[maxChars]; - final TermAttribute termAtt = (TermAttribute) addAttribute(TermAttribute.class); - final OffsetAttribute offsetAtt = (OffsetAttribute) addAttribute(OffsetAttribute.class); + final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); @Override public boolean incrementToken() throws IOException { clearAttributes(); int n = input.read(cbuf,0,maxChars); if (n<=0) return false; String s = toInternal(new String(cbuf,0,n)); - termAtt.setTermBuffer(s); + termAtt.setEmpty().append(s); offsetAtt.setOffset(correctOffset(0),correctOffset(n)); return true; } diff --git a/solr/src/java/org/apache/solr/schema/TextField.java b/solr/src/java/org/apache/solr/schema/TextField.java index 722066bb726..1796195897e 100644 --- a/solr/src/java/org/apache/solr/schema/TextField.java +++ b/solr/src/java/org/apache/solr/schema/TextField.java @@ -27,7 +27,7 @@ import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.Term; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Analyzer; @@ -88,7 +88,7 @@ public class TextField extends FieldType { source = analyzer.tokenStream(field, new StringReader(queryText)); } CachingTokenFilter buffer = new CachingTokenFilter(source); - TermAttribute termAtt = null; + CharTermAttribute termAtt = null; PositionIncrementAttribute posIncrAtt = null; int numTokens = 0; @@ -100,11 +100,11 @@ public class TextField extends FieldType { // success==false if we hit an exception } if (success) { - if (buffer.hasAttribute(TermAttribute.class)) { - termAtt = (TermAttribute) buffer.getAttribute(TermAttribute.class); + if (buffer.hasAttribute(CharTermAttribute.class)) { + termAtt = buffer.getAttribute(CharTermAttribute.class); } if (buffer.hasAttribute(PositionIncrementAttribute.class)) { - posIncrAtt = (PositionIncrementAttribute) buffer.getAttribute(PositionIncrementAttribute.class); + posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class); } } @@ -147,7 +147,7 @@ public class TextField extends FieldType { try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; - term = termAtt.term(); + term = termAtt.toString(); } catch (IOException e) { // safe to ignore, because we know the number of tokens } @@ -164,7 +164,7 @@ public class TextField extends FieldType { try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; - term = termAtt.term(); + term = termAtt.toString(); } catch (IOException e) { // safe to ignore, because we know the number of tokens } @@ -188,7 +188,7 @@ public class TextField extends FieldType { try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; - term = termAtt.term(); + term = termAtt.toString(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); } @@ -229,7 +229,7 @@ public class TextField extends FieldType { try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; - term = termAtt.term(); + term = termAtt.toString(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); } diff --git a/solr/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java b/solr/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java index a5bdc263668..3877ddd629c 100755 --- a/solr/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java +++ b/solr/src/java/org/apache/solr/search/ExtendedDismaxQParserPlugin.java @@ -1043,7 +1043,7 @@ class ExtendedDismaxQParser extends QParser { } -class ExtendedAnalyzer extends Analyzer { +final class ExtendedAnalyzer extends Analyzer { final Map map = new HashMap(); final QParser parser; final Analyzer queryAnalyzer; diff --git a/solr/src/java/org/apache/solr/spelling/SpellingQueryConverter.java b/solr/src/java/org/apache/solr/spelling/SpellingQueryConverter.java index 4e820913d24..8060e1bb998 100644 --- a/solr/src/java/org/apache/solr/spelling/SpellingQueryConverter.java +++ b/solr/src/java/org/apache/solr/spelling/SpellingQueryConverter.java @@ -30,7 +30,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; @@ -105,15 +105,15 @@ public class SpellingQueryConverter extends QueryConverter { try { stream = analyzer.reusableTokenStream("", new StringReader(word)); // TODO: support custom attributes - TermAttribute termAtt = (TermAttribute) stream.addAttribute(TermAttribute.class); - FlagsAttribute flagsAtt = (FlagsAttribute) stream.addAttribute(FlagsAttribute.class); - TypeAttribute typeAtt = (TypeAttribute) stream.addAttribute(TypeAttribute.class); - PayloadAttribute payloadAtt = (PayloadAttribute) stream.addAttribute(PayloadAttribute.class); - PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute) stream.addAttribute(PositionIncrementAttribute.class); + CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class); + FlagsAttribute flagsAtt = stream.addAttribute(FlagsAttribute.class); + TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class); + PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class); + PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class); stream.reset(); while (stream.incrementToken()) { Token token = new Token(); - token.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength()); + token.setTermBuffer(termAtt.buffer(), 0, termAtt.length()); token.setStartOffset(matcher.start()); token.setEndOffset(matcher.end()); token.setFlags(flagsAtt.getFlags()); diff --git a/solr/src/test/org/apache/solr/analysis/CommonGramsFilterTest.java b/solr/src/test/org/apache/solr/analysis/CommonGramsFilterTest.java index 0b0f4067e14..4dfbcbe7030 100644 --- a/solr/src/test/org/apache/solr/analysis/CommonGramsFilterTest.java +++ b/solr/src/test/org/apache/solr/analysis/CommonGramsFilterTest.java @@ -24,7 +24,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.WhitespaceTokenizer; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * Tests CommonGramsQueryFilter @@ -38,20 +38,20 @@ public class CommonGramsFilterTest extends BaseTokenTestCase { WhitespaceTokenizer wt = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input)); CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords); - TermAttribute term = cgf.addAttribute(TermAttribute.class); + CharTermAttribute term = cgf.addAttribute(CharTermAttribute.class); assertTrue(cgf.incrementToken()); - assertEquals("How", term.term()); + assertEquals("How", term.toString()); assertTrue(cgf.incrementToken()); - assertEquals("How_the", term.term()); + assertEquals("How_the", term.toString()); assertTrue(cgf.incrementToken()); - assertEquals("the", term.term()); + assertEquals("the", term.toString()); assertTrue(cgf.incrementToken()); - assertEquals("the_s", term.term()); + assertEquals("the_s", term.toString()); wt.reset(new StringReader(input)); cgf.reset(); assertTrue(cgf.incrementToken()); - assertEquals("How", term.term()); + assertEquals("How", term.toString()); } public void testQueryReset() throws Exception { @@ -60,16 +60,16 @@ public class CommonGramsFilterTest extends BaseTokenTestCase { CommonGramsFilter cgf = new CommonGramsFilter(wt, commonWords); CommonGramsQueryFilter nsf = new CommonGramsQueryFilter(cgf); - TermAttribute term = wt.addAttribute(TermAttribute.class); + CharTermAttribute term = wt.addAttribute(CharTermAttribute.class); assertTrue(nsf.incrementToken()); - assertEquals("How_the", term.term()); + assertEquals("How_the", term.toString()); assertTrue(nsf.incrementToken()); - assertEquals("the_s", term.term()); + assertEquals("the_s", term.toString()); wt.reset(new StringReader(input)); nsf.reset(); assertTrue(nsf.incrementToken()); - assertEquals("How_the", term.term()); + assertEquals("How_the", term.toString()); } /** diff --git a/solr/src/test/org/apache/solr/analysis/DoubleMetaphoneFilterFactoryTest.java b/solr/src/test/org/apache/solr/analysis/DoubleMetaphoneFilterFactoryTest.java index 87d7098c2f0..45f8c1fd612 100644 --- a/solr/src/test/org/apache/solr/analysis/DoubleMetaphoneFilterFactoryTest.java +++ b/solr/src/test/org/apache/solr/analysis/DoubleMetaphoneFilterFactoryTest.java @@ -22,7 +22,7 @@ import java.util.Map; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.WhitespaceTokenizer; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; public class DoubleMetaphoneFilterFactoryTest extends BaseTokenTestCase { @@ -59,12 +59,12 @@ public class DoubleMetaphoneFilterFactoryTest extends BaseTokenTestCase { TokenStream inputStream = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader("international")); TokenStream filteredStream = factory.create(inputStream); - TermAttribute termAtt = filteredStream.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = filteredStream.addAttribute(CharTermAttribute.class); assertEquals(DoubleMetaphoneFilter.class, filteredStream.getClass()); assertTrue(filteredStream.incrementToken()); - assertEquals(13, termAtt.termLength()); - assertEquals("international", termAtt.term()); + assertEquals(13, termAtt.length()); + assertEquals("international", termAtt.toString()); filteredStream.reset(); // ensure there are no more tokens, such as ANTRNXNL diff --git a/solr/src/test/org/apache/solr/analysis/TestBufferedTokenStream.java b/solr/src/test/org/apache/solr/analysis/TestBufferedTokenStream.java index a69bb785643..aa0012fa802 100644 --- a/solr/src/test/org/apache/solr/analysis/TestBufferedTokenStream.java +++ b/solr/src/test/org/apache/solr/analysis/TestBufferedTokenStream.java @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.WhitespaceTokenizer; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import java.io.IOException; import java.io.StringReader; @@ -75,18 +75,18 @@ public class TestBufferedTokenStream extends BaseTokenTestCase { final String input = "How now A B brown A cow B like A B thing?"; Tokenizer tokenizer = new WhitespaceTokenizer(DEFAULT_VERSION, new StringReader(input)); TokenStream ts = new AB_AAB_Stream(tokenizer); - TermAttribute term = ts.addAttribute(TermAttribute.class); + CharTermAttribute term = ts.addAttribute(CharTermAttribute.class); assertTrue(ts.incrementToken()); - assertEquals("How", term.term()); + assertEquals("How", term.toString()); assertTrue(ts.incrementToken()); - assertEquals("now", term.term()); + assertEquals("now", term.toString()); assertTrue(ts.incrementToken()); - assertEquals("A", term.term()); + assertEquals("A", term.toString()); // reset back to input, // if reset() does not work correctly then previous buffered tokens will remain tokenizer.reset(new StringReader(input)); ts.reset(); assertTrue(ts.incrementToken()); - assertEquals("How", term.term()); + assertEquals("How", term.toString()); } } diff --git a/solr/src/test/org/apache/solr/analysis/TestCollationKeyFilterFactory.java b/solr/src/test/org/apache/solr/analysis/TestCollationKeyFilterFactory.java index 69b7342a39b..c508a65a0a8 100644 --- a/solr/src/test/org/apache/solr/analysis/TestCollationKeyFilterFactory.java +++ b/solr/src/test/org/apache/solr/analysis/TestCollationKeyFilterFactory.java @@ -30,7 +30,7 @@ import java.util.Map; import org.apache.lucene.analysis.KeywordTokenizer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.solr.common.ResourceLoader; public class TestCollationKeyFilterFactory extends BaseTokenTestCase { @@ -177,13 +177,13 @@ public class TestCollationKeyFilterFactory extends BaseTokenTestCase { private void assertCollatesToSame(TokenStream stream1, TokenStream stream2) throws IOException { - TermAttribute term1 = stream1 - .addAttribute(TermAttribute.class); - TermAttribute term2 = stream2 - .addAttribute(TermAttribute.class); + CharTermAttribute term1 = stream1 + .addAttribute(CharTermAttribute.class); + CharTermAttribute term2 = stream2 + .addAttribute(CharTermAttribute.class); assertTrue(stream1.incrementToken()); assertTrue(stream2.incrementToken()); - assertEquals(term1.term(), term2.term()); + assertEquals(term1.toString(), term2.toString()); assertFalse(stream1.incrementToken()); assertFalse(stream2.incrementToken()); } diff --git a/solr/src/test/org/apache/solr/analysis/TestPatternTokenizerFactory.java b/solr/src/test/org/apache/solr/analysis/TestPatternTokenizerFactory.java index 6faf2187d97..b5bc51fd9f9 100644 --- a/solr/src/test/org/apache/solr/analysis/TestPatternTokenizerFactory.java +++ b/solr/src/test/org/apache/solr/analysis/TestPatternTokenizerFactory.java @@ -29,7 +29,7 @@ import org.apache.lucene.analysis.CharStream; import org.apache.lucene.analysis.MappingCharFilter; import org.apache.lucene.analysis.NormalizeCharMap; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; public class TestPatternTokenizerFactory extends BaseTokenTestCase { @@ -117,17 +117,17 @@ public class TestPatternTokenizerFactory extends BaseTokenTestCase */ private static String tsToString(TokenStream in) throws IOException { StringBuilder out = new StringBuilder(); - TermAttribute termAtt = in.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = in.addAttribute(CharTermAttribute.class); // extra safety to enforce, that the state is not preserved and also // assign bogus values in.clearAttributes(); - termAtt.setTermBuffer("bogusTerm"); + termAtt.setEmpty().append("bogusTerm"); while (in.incrementToken()) { if (out.length() > 0) out.append(' '); - out.append(termAtt.term()); + out.append(termAtt.toString()); in.clearAttributes(); - termAtt.setTermBuffer("bogusTerm"); + termAtt.setEmpty().append("bogusTerm"); } in.close(); diff --git a/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilter.java b/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilter.java index ceeb2d1b604..5a51117346a 100644 --- a/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilter.java +++ b/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilter.java @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import java.util.Iterator; import java.util.Arrays; @@ -44,14 +44,14 @@ public class TestRemoveDuplicatesTokenFilter extends BaseTokenTestCase { RemoveDuplicatesTokenFilterFactory factory = new RemoveDuplicatesTokenFilterFactory(); final TokenStream ts = factory.create (new TokenStream() { - TermAttribute termAtt = addAttribute(TermAttribute.class); + CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class); public boolean incrementToken() { if (toks.hasNext()) { clearAttributes(); Token tok = toks.next(); - termAtt.setTermBuffer(tok.term()); + termAtt.setEmpty().append(tok.term()); offsetAtt.setOffset(tok.startOffset(), tok.endOffset()); posIncAtt.setPositionIncrement(tok.getPositionIncrement()); return true; diff --git a/solr/src/test/org/apache/solr/analysis/TestSynonymFilter.java b/solr/src/test/org/apache/solr/analysis/TestSynonymFilter.java index bf233bde835..e724a837a53 100644 --- a/solr/src/test/org/apache/solr/analysis/TestSynonymFilter.java +++ b/solr/src/test/org/apache/solr/analysis/TestSynonymFilter.java @@ -25,7 +25,7 @@ import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import java.io.IOException; @@ -381,7 +381,7 @@ public class TestSynonymFilter extends BaseTokenTestCase { private static class IterTokenStream extends TokenStream { final Token tokens[]; int index = 0; - TermAttribute termAtt = addAttribute(TermAttribute.class); + CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class); FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class); @@ -403,7 +403,7 @@ public class TestSynonymFilter extends BaseTokenTestCase { else { clearAttributes(); Token token = tokens[index++]; - termAtt.setTermBuffer(token.term()); + termAtt.setEmpty().append(token.term()); offsetAtt.setOffset(token.startOffset(), token.endOffset()); posIncAtt.setPositionIncrement(token.getPositionIncrement()); flagsAtt.setFlags(token.getFlags()); diff --git a/solr/src/test/org/apache/solr/analysis/TestTrimFilter.java b/solr/src/test/org/apache/solr/analysis/TestTrimFilter.java index 32fba1c30f5..39616ce13bd 100644 --- a/solr/src/test/org/apache/solr/analysis/TestTrimFilter.java +++ b/solr/src/test/org/apache/solr/analysis/TestTrimFilter.java @@ -28,7 +28,7 @@ import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; /** @@ -81,7 +81,7 @@ public class TestTrimFilter extends BaseTokenTestCase { private static class IterTokenStream extends TokenStream { final Token tokens[]; int index = 0; - TermAttribute termAtt = addAttribute(TermAttribute.class); + CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class); FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class); @@ -103,7 +103,7 @@ public class TestTrimFilter extends BaseTokenTestCase { else { clearAttributes(); Token token = tokens[index++]; - termAtt.setTermBuffer(token.term()); + termAtt.setEmpty().append(token.term()); offsetAtt.setOffset(token.startOffset(), token.endOffset()); posIncAtt.setPositionIncrement(token.getPositionIncrement()); flagsAtt.setFlags(token.getFlags()); diff --git a/solr/src/test/org/apache/solr/analysis/TestWordDelimiterFilter.java b/solr/src/test/org/apache/solr/analysis/TestWordDelimiterFilter.java index 11af40f5c6e..fa3ba01c714 100644 --- a/solr/src/test/org/apache/solr/analysis/TestWordDelimiterFilter.java +++ b/solr/src/test/org/apache/solr/analysis/TestWordDelimiterFilter.java @@ -28,7 +28,7 @@ import org.apache.lucene.analysis.WhitespaceTokenizer; import org.apache.lucene.analysis.miscellaneous.SingleTokenTokenStream; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.solr.SolrTestCaseJ4; import org.junit.BeforeClass; import org.junit.Test; @@ -347,19 +347,17 @@ public class TestWordDelimiterFilter extends SolrTestCaseJ4 { * Set a large position increment gap of 10 if the token is "largegap" or "/" */ private final class LargePosIncTokenFilter extends TokenFilter { - private TermAttribute termAtt; - private PositionIncrementAttribute posIncAtt; + private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class); protected LargePosIncTokenFilter(TokenStream input) { super(input); - termAtt = addAttribute(TermAttribute.class); - posIncAtt = addAttribute(PositionIncrementAttribute.class); } @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - if (termAtt.term().equals("largegap") || termAtt.term().equals("/")) + if (termAtt.toString().equals("largegap") || termAtt.toString().equals("/")) posIncAtt.setPositionIncrement(10); return true; } else { diff --git a/solr/src/test/org/apache/solr/spelling/SimpleQueryConverter.java b/solr/src/test/org/apache/solr/spelling/SimpleQueryConverter.java index 53814562e7c..19bc9c66bd5 100644 --- a/solr/src/test/org/apache/solr/spelling/SimpleQueryConverter.java +++ b/solr/src/test/org/apache/solr/spelling/SimpleQueryConverter.java @@ -19,11 +19,11 @@ package org.apache.solr.spelling; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import java.util.Collection; @@ -43,18 +43,18 @@ class SimpleQueryConverter extends SpellingQueryConverter{ WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(); TokenStream ts = analyzer.tokenStream("", new StringReader(origQuery)); // TODO: support custom attributes - TermAttribute termAtt = (TermAttribute) ts.addAttribute(TermAttribute.class); - OffsetAttribute offsetAtt = (OffsetAttribute) ts.addAttribute(OffsetAttribute.class); - TypeAttribute typeAtt = (TypeAttribute) ts.addAttribute(TypeAttribute.class); - FlagsAttribute flagsAtt = (FlagsAttribute) ts.addAttribute(FlagsAttribute.class); - PayloadAttribute payloadAtt = (PayloadAttribute) ts.addAttribute(PayloadAttribute.class); - PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute) ts.addAttribute(PositionIncrementAttribute.class); + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); + OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); + TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class); + FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class); + PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class); + PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class); try { ts.reset(); while (ts.incrementToken()){ Token tok = new Token(); - tok.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength()); + tok.setTermBuffer(termAtt.buffer(), 0, termAtt.length()); tok.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); tok.setFlags(flagsAtt.getFlags()); tok.setPayload(payloadAtt.getPayload()); diff --git a/solr/src/webapp/web/admin/analysis.jsp b/solr/src/webapp/web/admin/analysis.jsp index 7d25282b2f8..bd756df9369 100644 --- a/solr/src/webapp/web/admin/analysis.jsp +++ b/solr/src/webapp/web/admin/analysis.jsp @@ -213,17 +213,17 @@ final Iterator iter = tokens.iterator(); tstream = filtfac.create( new TokenStream() { - TermAttribute termAtt = (TermAttribute) addAttribute(TermAttribute.class); - OffsetAttribute offsetAtt = (OffsetAttribute) addAttribute (OffsetAttribute.class); - TypeAttribute typeAtt = (TypeAttribute) addAttribute (TypeAttribute.class); - FlagsAttribute flagsAtt = (FlagsAttribute) addAttribute (FlagsAttribute.class); - PayloadAttribute payloadAtt = (PayloadAttribute) addAttribute (PayloadAttribute.class); - PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute) addAttribute (PositionIncrementAttribute.class); + CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + OffsetAttribute offsetAtt = addAttribute (OffsetAttribute.class); + TypeAttribute typeAtt = addAttribute (TypeAttribute.class); + FlagsAttribute flagsAtt = addAttribute (FlagsAttribute.class); + PayloadAttribute payloadAtt = addAttribute (PayloadAttribute.class); + PositionIncrementAttribute posIncAtt = addAttribute (PositionIncrementAttribute.class); public boolean incrementToken() throws IOException { if (iter.hasNext()) { Token token = iter.next(); - termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength()); + termAtt.copyBuffer(token.termBuffer(), 0, token.termLength()); offsetAtt.setOffset(token.startOffset(), token.endOffset()); typeAtt.setType(token.type()); flagsAtt.setFlags(token.getFlags()); @@ -255,19 +255,19 @@ static List getTokens(TokenStream tstream) throws IOException { List tokens = new ArrayList(); - TermAttribute termAtt = (TermAttribute) tstream.addAttribute(TermAttribute.class); - OffsetAttribute offsetAtt = (OffsetAttribute) tstream.addAttribute (OffsetAttribute.class); - TypeAttribute typeAtt = (TypeAttribute) tstream.addAttribute (TypeAttribute.class); - FlagsAttribute flagsAtt = (FlagsAttribute) tstream.addAttribute (FlagsAttribute.class); - PayloadAttribute payloadAtt = (PayloadAttribute) tstream.addAttribute (PayloadAttribute.class); - PositionIncrementAttribute posIncAtt = (PositionIncrementAttribute) tstream.addAttribute (PositionIncrementAttribute.class); + CharTermAttribute termAtt = tstream.addAttribute(CharTermAttribute.class); + OffsetAttribute offsetAtt = tstream.addAttribute (OffsetAttribute.class); + TypeAttribute typeAtt = tstream.addAttribute (TypeAttribute.class); + FlagsAttribute flagsAtt = tstream.addAttribute (FlagsAttribute.class); + PayloadAttribute payloadAtt = tstream.addAttribute (PayloadAttribute.class); + PositionIncrementAttribute posIncAtt = tstream.addAttribute (PositionIncrementAttribute.class); while (true) { if (!tstream.incrementToken()) break; else { Token token = new Token(); - token.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength()); + token.setTermBuffer(termAtt.buffer(), 0, termAtt.length()); token.setType(typeAtt.type()); token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); token.setPayload(payloadAtt.getPayload());