diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 2c5ee85009f..221a4cdcb9c 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -59,7 +59,8 @@ Upgrading from Solr 1.4
"terms" container is a map instead of a named list. This affects
response formats like JSON, but not XML. (yonik)
-
+* SOLR-1876: All Analyzers and TokenStreams are now final to enforce
+ the decorator pattern. (rmuir, uschindler)
Detailed Change List
----------------------
diff --git a/solr/build.xml b/solr/build.xml
index 15f5d6f6f56..95aee103c09 100644
--- a/solr/build.xml
+++ b/solr/build.xml
@@ -455,6 +455,10 @@
+
+
+
+
diff --git a/solr/contrib/clustering/build.xml b/solr/contrib/clustering/build.xml
index 8c15c11a09e..b9cb9c12880 100644
--- a/solr/contrib/clustering/build.xml
+++ b/solr/contrib/clustering/build.xml
@@ -139,6 +139,10 @@
>
+
+
+
+
diff --git a/solr/contrib/dataimporthandler/build.xml b/solr/contrib/dataimporthandler/build.xml
index 5875f9150fb..92fa79f9e87 100644
--- a/solr/contrib/dataimporthandler/build.xml
+++ b/solr/contrib/dataimporthandler/build.xml
@@ -162,6 +162,10 @@
>
+
+
+
+
diff --git a/solr/contrib/extraction/build.xml b/solr/contrib/extraction/build.xml
index 467c124cfe2..e883926fc32 100644
--- a/solr/contrib/extraction/build.xml
+++ b/solr/contrib/extraction/build.xml
@@ -88,6 +88,10 @@
>
+
+
+
+
diff --git a/solr/contrib/velocity/build.xml b/solr/contrib/velocity/build.xml
index be94b06e4d3..97dbf5cf08d 100644
--- a/solr/contrib/velocity/build.xml
+++ b/solr/contrib/velocity/build.xml
@@ -87,6 +87,10 @@
+
+
+
+
diff --git a/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java b/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java
index 3fec7833a03..0e563f389a1 100644
--- a/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java
+++ b/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java
@@ -20,11 +20,11 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeSource; // javadoc @link
@@ -73,7 +73,7 @@ public abstract class BufferedTokenStream extends TokenFilter {
private final LinkedList inQueue = new LinkedList();
private final LinkedList outQueue = new LinkedList();
- private final TermAttribute termAtt = addAttribute(TermAttribute.class);
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class);
@@ -150,7 +150,7 @@ public abstract class BufferedTokenStream extends TokenFilter {
return null;
} else {
Token token = new Token();
- token.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength());
+ token.setTermBuffer(termAtt.buffer(), 0, termAtt.length());
token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
token.setType(typeAtt.type());
token.setFlags(flagsAtt.getFlags());
@@ -163,7 +163,7 @@ public abstract class BufferedTokenStream extends TokenFilter {
/** old api emulation for back compat */
private boolean writeToken(Token token) throws IOException {
clearAttributes();
- termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength());
+ termAtt.copyBuffer(token.termBuffer(), 0, token.termLength());
offsetAtt.setOffset(token.startOffset(), token.endOffset());
typeAtt.setType(token.type());
flagsAtt.setFlags(token.getFlags());
diff --git a/solr/src/java/org/apache/solr/analysis/CapitalizationFilterFactory.java b/solr/src/java/org/apache/solr/analysis/CapitalizationFilterFactory.java
index 025dd4fbf20..6da0c54a876 100644
--- a/solr/src/java/org/apache/solr/analysis/CapitalizationFilterFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/CapitalizationFilterFactory.java
@@ -18,7 +18,7 @@
package org.apache.solr.analysis;
import org.apache.lucene.analysis.*;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import java.io.IOException;
import java.util.ArrayList;
@@ -188,22 +188,21 @@ public class CapitalizationFilterFactory extends BaseTokenFilterFactory {
*
* This is package protected since it is not useful without the Factory
*/
-class CapitalizationFilter extends TokenFilter {
+final class CapitalizationFilter extends TokenFilter {
private final CapitalizationFilterFactory factory;
- private final TermAttribute termAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
public CapitalizationFilter(TokenStream in, final CapitalizationFilterFactory factory) {
super(in);
this.factory = factory;
- this.termAtt = addAttribute(TermAttribute.class);
}
@Override
public boolean incrementToken() throws IOException {
if (!input.incrementToken()) return false;
- char[] termBuffer = termAtt.termBuffer();
- int termBufferLength = termAtt.termLength();
+ char[] termBuffer = termAtt.buffer();
+ int termBufferLength = termAtt.length();
char[] backup = null;
if (factory.maxWordCount < CapitalizationFilterFactory.DEFAULT_MAX_WORD_COUNT) {
//make a backup in case we exceed the word count
@@ -232,7 +231,7 @@ class CapitalizationFilter extends TokenFilter {
}
if (wordCount > factory.maxWordCount) {
- termAtt.setTermBuffer(backup, 0, termBufferLength);
+ termAtt.copyBuffer(backup, 0, termBufferLength);
}
}
diff --git a/solr/src/java/org/apache/solr/analysis/CommonGramsFilter.java b/solr/src/java/org/apache/solr/analysis/CommonGramsFilter.java
index 0cad27ac8dd..fb34a1bd4a2 100644
--- a/solr/src/java/org/apache/solr/analysis/CommonGramsFilter.java
+++ b/solr/src/java/org/apache/solr/analysis/CommonGramsFilter.java
@@ -18,7 +18,7 @@ import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.Version;
@@ -52,7 +52,7 @@ public final class CommonGramsFilter extends TokenFilter {
private final StringBuilder buffer = new StringBuilder();
- private final TermAttribute termAttribute = addAttribute(TermAttribute.class);
+ private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
private final TypeAttribute typeAttribute = addAttribute(TypeAttribute.class);
private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class);
@@ -231,7 +231,7 @@ public final class CommonGramsFilter extends TokenFilter {
* @return {@code true} if the current token is a common term, {@code false} otherwise
*/
private boolean isCommon() {
- return commonWords != null && commonWords.contains(termAttribute.termBuffer(), 0, termAttribute.termLength());
+ return commonWords != null && commonWords.contains(termAttribute.buffer(), 0, termAttribute.length());
}
/**
@@ -239,7 +239,7 @@ public final class CommonGramsFilter extends TokenFilter {
*/
private void saveTermBuffer() {
buffer.setLength(0);
- buffer.append(termAttribute.termBuffer(), 0, termAttribute.termLength());
+ buffer.append(termAttribute.buffer(), 0, termAttribute.length());
buffer.append(SEPARATOR);
lastStartOffset = offsetAttribute.startOffset();
lastWasCommon = isCommon();
@@ -249,19 +249,19 @@ public final class CommonGramsFilter extends TokenFilter {
* Constructs a compound token.
*/
private void gramToken() {
- buffer.append(termAttribute.termBuffer(), 0, termAttribute.termLength());
+ buffer.append(termAttribute.buffer(), 0, termAttribute.length());
int endOffset = offsetAttribute.endOffset();
clearAttributes();
int length = buffer.length();
- char termText[] = termAttribute.termBuffer();
+ char termText[] = termAttribute.buffer();
if (length > termText.length) {
- termText = termAttribute.resizeTermBuffer(length);
+ termText = termAttribute.resizeBuffer(length);
}
buffer.getChars(0, length, termText, 0);
- termAttribute.setTermLength(length);
+ termAttribute.setLength(length);
posIncAttribute.setPositionIncrement(0);
offsetAttribute.setOffset(lastStartOffset, endOffset);
typeAttribute.setType(GRAM_TYPE);
diff --git a/solr/src/java/org/apache/solr/analysis/DoubleMetaphoneFilter.java b/solr/src/java/org/apache/solr/analysis/DoubleMetaphoneFilter.java
index 3b8ed14cfe1..d384d2c1ece 100644
--- a/solr/src/java/org/apache/solr/analysis/DoubleMetaphoneFilter.java
+++ b/solr/src/java/org/apache/solr/analysis/DoubleMetaphoneFilter.java
@@ -22,25 +22,23 @@ import java.util.LinkedList;
import org.apache.commons.codec.language.DoubleMetaphone;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-public class DoubleMetaphoneFilter extends TokenFilter {
+public final class DoubleMetaphoneFilter extends TokenFilter {
private static final String TOKEN_TYPE = "DoubleMetaphone";
private final LinkedList remainingTokens = new LinkedList();
private final DoubleMetaphone encoder = new DoubleMetaphone();
private final boolean inject;
- private final TermAttribute termAtt;
- private final PositionIncrementAttribute posAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
protected DoubleMetaphoneFilter(TokenStream input, int maxCodeLength, boolean inject) {
super(input);
this.encoder.setMaxCodeLen(maxCodeLength);
this.inject = inject;
- this.termAtt = addAttribute(TermAttribute.class);
- this.posAtt = addAttribute(PositionIncrementAttribute.class);
}
@Override
@@ -55,12 +53,12 @@ public class DoubleMetaphoneFilter extends TokenFilter {
if (!input.incrementToken()) return false;
- int len = termAtt.termLength();
+ int len = termAtt.length();
if (len==0) return true; // pass through zero length terms
int firstAlternativeIncrement = inject ? 0 : posAtt.getPositionIncrement();
- String v = new String(termAtt.termBuffer(), 0, len);
+ String v = termAtt.toString();
String primaryPhoneticValue = encoder.doubleMetaphone(v);
String alternatePhoneticValue = encoder.doubleMetaphone(v, true);
@@ -74,7 +72,7 @@ public class DoubleMetaphoneFilter extends TokenFilter {
}
posAtt.setPositionIncrement( firstAlternativeIncrement );
firstAlternativeIncrement = 0;
- termAtt.setTermBuffer(primaryPhoneticValue);
+ termAtt.setEmpty().append(primaryPhoneticValue);
saveState = true;
}
@@ -86,7 +84,7 @@ public class DoubleMetaphoneFilter extends TokenFilter {
saveState = false;
}
posAtt.setPositionIncrement( firstAlternativeIncrement );
- termAtt.setTermBuffer(alternatePhoneticValue);
+ termAtt.setEmpty().append(alternatePhoneticValue);
saveState = true;
}
diff --git a/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilter.java b/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilter.java
index aadfc682d80..ec9d77b8678 100755
--- a/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilter.java
+++ b/solr/src/java/org/apache/solr/analysis/HyphenatedWordsFilter.java
@@ -21,7 +21,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* When the plain text is extracted from documents, we will often have many words hyphenated and broken into
@@ -54,7 +54,7 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
*/
public final class HyphenatedWordsFilter extends TokenFilter {
- private final TermAttribute termAttribute = addAttribute(TermAttribute.class);
+ private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
private final StringBuilder hyphenated = new StringBuilder();
@@ -75,8 +75,8 @@ public final class HyphenatedWordsFilter extends TokenFilter {
@Override
public boolean incrementToken() throws IOException {
while (input.incrementToken()) {
- char[] term = termAttribute.termBuffer();
- int termLength = termAttribute.termLength();
+ char[] term = termAttribute.buffer();
+ int termLength = termAttribute.length();
if (termLength > 0 && term[termLength - 1] == '-') {
// a hyphenated word
@@ -128,14 +128,14 @@ public final class HyphenatedWordsFilter extends TokenFilter {
restoreState(savedState);
savedState = null;
- char term[] = termAttribute.termBuffer();
+ char term[] = termAttribute.buffer();
int length = hyphenated.length();
- if (length > termAttribute.termLength()) {
- term = termAttribute.resizeTermBuffer(length);
+ if (length > termAttribute.length()) {
+ term = termAttribute.resizeBuffer(length);
}
hyphenated.getChars(0, length, term, 0);
- termAttribute.setTermLength(length);
+ termAttribute.setLength(length);
offsetAttribute.setOffset(offsetAttribute.startOffset(), endOffset);
hyphenated.setLength(0);
}
diff --git a/solr/src/java/org/apache/solr/analysis/KeepWordFilter.java b/solr/src/java/org/apache/solr/analysis/KeepWordFilter.java
index ca26532a67d..4a78a94fea8 100644
--- a/solr/src/java/org/apache/solr/analysis/KeepWordFilter.java
+++ b/solr/src/java/org/apache/solr/analysis/KeepWordFilter.java
@@ -20,7 +20,7 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.CharArraySet;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import java.io.IOException;
import java.util.Set;
@@ -34,7 +34,7 @@ import java.util.Set;
*/
public final class KeepWordFilter extends TokenFilter {
private final CharArraySet words;
- private final TermAttribute termAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
/** @deprecated Use {@link #KeepWordFilter(TokenStream, Set, boolean)} instead */
@Deprecated
@@ -47,13 +47,12 @@ public final class KeepWordFilter extends TokenFilter {
public KeepWordFilter(TokenStream in, CharArraySet words) {
super(in);
this.words = words;
- this.termAtt = addAttribute(TermAttribute.class);
}
@Override
public boolean incrementToken() throws IOException {
while (input.incrementToken()) {
- if (words.contains(termAtt.termBuffer(), 0, termAtt.termLength())) return true;
+ if (words.contains(termAtt.buffer(), 0, termAtt.length())) return true;
}
return false;
}
diff --git a/solr/src/java/org/apache/solr/analysis/PatternReplaceFilter.java b/solr/src/java/org/apache/solr/analysis/PatternReplaceFilter.java
index 908294b211a..b9831fc6901 100644
--- a/solr/src/java/org/apache/solr/analysis/PatternReplaceFilter.java
+++ b/solr/src/java/org/apache/solr/analysis/PatternReplaceFilter.java
@@ -42,7 +42,7 @@ public final class PatternReplaceFilter extends TokenFilter {
private final Pattern p;
private final String replacement;
private final boolean all;
- private final CharTermAttribute termAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final Matcher m;
/**
@@ -64,7 +64,6 @@ public final class PatternReplaceFilter extends TokenFilter {
this.p=p;
this.replacement = (null == replacement) ? "" : replacement;
this.all=all;
- this.termAtt = addAttribute(CharTermAttribute.class);
this.m = p.matcher(termAtt);
}
diff --git a/solr/src/java/org/apache/solr/analysis/PatternTokenizer.java b/solr/src/java/org/apache/solr/analysis/PatternTokenizer.java
index 9253e936f02..b387767bbc6 100644
--- a/solr/src/java/org/apache/solr/analysis/PatternTokenizer.java
+++ b/solr/src/java/org/apache/solr/analysis/PatternTokenizer.java
@@ -22,7 +22,7 @@ import java.io.Reader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.commons.io.IOUtils;
@@ -56,7 +56,7 @@ import org.apache.commons.io.IOUtils;
*/
public final class PatternTokenizer extends Tokenizer {
- private final TermAttribute termAtt = addAttribute(TermAttribute.class);
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private String str;
@@ -86,7 +86,7 @@ public final class PatternTokenizer extends Tokenizer {
while (matcher.find()) {
final String match = matcher.group(group);
if (match.length() == 0) continue;
- termAtt.setTermBuffer(match);
+ termAtt.setEmpty().append(match);
index = matcher.start(group);
offsetAtt.setOffset(correctOffset(index), correctOffset(matcher.end(group)));
return true;
@@ -101,7 +101,7 @@ public final class PatternTokenizer extends Tokenizer {
while (matcher.find()) {
if (matcher.start() - index > 0) {
// found a non-zero-length token
- termAtt.setTermBuffer(str, index, matcher.start() - index);
+ termAtt.setEmpty().append(str, index, matcher.start());
offsetAtt.setOffset(correctOffset(index), correctOffset(matcher.start()));
index = matcher.end();
return true;
@@ -115,7 +115,7 @@ public final class PatternTokenizer extends Tokenizer {
return false;
}
- termAtt.setTermBuffer(str, index, str.length() - index);
+ termAtt.setEmpty().append(str, index, str.length());
offsetAtt.setOffset(correctOffset(index), correctOffset(str.length()));
index = Integer.MAX_VALUE; // mark exhausted
return true;
diff --git a/solr/src/java/org/apache/solr/analysis/PhoneticFilter.java b/solr/src/java/org/apache/solr/analysis/PhoneticFilter.java
index dcf6d8d63cc..a6d0a3bbe21 100644
--- a/solr/src/java/org/apache/solr/analysis/PhoneticFilter.java
+++ b/solr/src/java/org/apache/solr/analysis/PhoneticFilter.java
@@ -20,7 +20,7 @@ package org.apache.solr.analysis;
import org.apache.commons.codec.Encoder;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import java.io.IOException;
@@ -31,23 +31,21 @@ import java.io.IOException;
*
* @version $Id$
*/
-public class PhoneticFilter extends TokenFilter
+public final class PhoneticFilter extends TokenFilter
{
protected boolean inject = true;
protected Encoder encoder = null;
protected String name = null;
protected State save = null;
- private final TermAttribute termAtt;
- private final PositionIncrementAttribute posAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class);
public PhoneticFilter(TokenStream in, Encoder encoder, String name, boolean inject) {
super(in);
this.encoder = encoder;
this.name = name;
- this.inject = inject;
- this.termAtt = addAttribute(TermAttribute.class);
- this.posAtt = addAttribute(PositionIncrementAttribute.class);
+ this.inject = inject;
}
@Override
@@ -62,9 +60,9 @@ public class PhoneticFilter extends TokenFilter
if (!input.incrementToken()) return false;
// pass through zero-length terms
- if (termAtt.termLength()==0) return true;
+ if (termAtt.length() == 0) return true;
- String value = termAtt.term();
+ String value = termAtt.toString();
String phonetic = null;
try {
String v = encoder.encode(value).toString();
@@ -75,7 +73,7 @@ public class PhoneticFilter extends TokenFilter
if (!inject) {
// just modify this token
- termAtt.setTermBuffer(phonetic);
+ termAtt.setEmpty().append(phonetic);
return true;
}
@@ -88,7 +86,7 @@ public class PhoneticFilter extends TokenFilter
save = captureState();
posAtt.setPositionIncrement(origOffset);
- termAtt.setTermBuffer(phonetic);
+ termAtt.setEmpty().append(phonetic);
return true;
}
diff --git a/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilter.java b/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilter.java
index 563356c70af..2978115867d 100644
--- a/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilter.java
+++ b/solr/src/java/org/apache/solr/analysis/RemoveDuplicatesTokenFilter.java
@@ -20,8 +20,8 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.util.Version;
import java.io.IOException;
@@ -31,7 +31,7 @@ import java.io.IOException;
*/
public final class RemoveDuplicatesTokenFilter extends TokenFilter {
- private final TermAttribute termAttribute = addAttribute(TermAttribute.class);
+ private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class);
// use a fixed version, as we don't care about case sensitivity.
@@ -52,8 +52,8 @@ public final class RemoveDuplicatesTokenFilter extends TokenFilter {
@Override
public boolean incrementToken() throws IOException {
while (input.incrementToken()) {
- final char term[] = termAttribute.termBuffer();
- final int length = termAttribute.termLength();
+ final char term[] = termAttribute.buffer();
+ final int length = termAttribute.length();
final int posIncrement = posIncAttribute.getPositionIncrement();
if (posIncrement > 0) {
diff --git a/solr/src/java/org/apache/solr/analysis/ReversedWildcardFilter.java b/solr/src/java/org/apache/solr/analysis/ReversedWildcardFilter.java
index d820ec4040d..4a595e0b5dc 100644
--- a/solr/src/java/org/apache/solr/analysis/ReversedWildcardFilter.java
+++ b/solr/src/java/org/apache/solr/analysis/ReversedWildcardFilter.java
@@ -21,7 +21,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* This class produces a special form of reversed tokens, suitable for
@@ -35,17 +35,17 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
* withOriginal == true
, which proportionally increases the size
* of postings and term dictionary in the index.
*/
-public class ReversedWildcardFilter extends TokenFilter {
+public final class ReversedWildcardFilter extends TokenFilter {
private boolean withOriginal;
private char markerChar;
private State save;
- private TermAttribute termAtt;
+ private CharTermAttribute termAtt;
private PositionIncrementAttribute posAtt;
protected ReversedWildcardFilter(TokenStream input, boolean withOriginal, char markerChar) {
super(input);
- this.termAtt = addAttribute(TermAttribute.class);
+ this.termAtt = addAttribute(CharTermAttribute.class);
this.posAtt = addAttribute(PositionIncrementAttribute.class);
this.withOriginal = withOriginal;
this.markerChar = markerChar;
@@ -63,19 +63,19 @@ public class ReversedWildcardFilter extends TokenFilter {
if (!input.incrementToken()) return false;
// pass through zero-length terms
- int oldLen = termAtt.termLength();
+ int oldLen = termAtt.length();
if (oldLen ==0) return true;
int origOffset = posAtt.getPositionIncrement();
if (withOriginal == true){
posAtt.setPositionIncrement(0);
save = captureState();
}
- char [] buffer = termAtt.resizeTermBuffer(oldLen + 1);
+ char [] buffer = termAtt.resizeBuffer(oldLen + 1);
buffer[oldLen] = markerChar;
reverse(buffer, 0, oldLen + 1);
posAtt.setPositionIncrement(origOffset);
- termAtt.setTermBuffer(buffer, 0, oldLen +1);
+ termAtt.copyBuffer(buffer, 0, oldLen +1);
return true;
}
diff --git a/solr/src/java/org/apache/solr/analysis/SynonymFilter.java b/solr/src/java/org/apache/solr/analysis/SynonymFilter.java
index 4a7db36db95..376fad329d9 100644
--- a/solr/src/java/org/apache/solr/analysis/SynonymFilter.java
+++ b/solr/src/java/org/apache/solr/analysis/SynonymFilter.java
@@ -22,7 +22,7 @@ import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeSource;
@@ -41,7 +41,7 @@ import java.util.LinkedList;
*
* @version $Id$
*/
-public class SynonymFilter extends TokenFilter {
+public final class SynonymFilter extends TokenFilter {
private final SynonymMap map; // Map
private Iterator replacement; // iterator over generated tokens
@@ -50,7 +50,7 @@ public class SynonymFilter extends TokenFilter {
super(in);
this.map = map;
// just ensuring these attributes exist...
- addAttribute(TermAttribute.class);
+ addAttribute(CharTermAttribute.class);
addAttribute(PositionIncrementAttribute.class);
addAttribute(OffsetAttribute.class);
addAttribute(TypeAttribute.class);
@@ -87,8 +87,8 @@ public class SynonymFilter extends TokenFilter {
// common case fast-path of first token not matching anything
AttributeSource firstTok = nextTok();
if (firstTok == null) return false;
- TermAttribute termAtt = firstTok.addAttribute(TermAttribute.class);
- SynonymMap result = map.submap!=null ? map.submap.get(termAtt.termBuffer(), 0, termAtt.termLength()) : null;
+ CharTermAttribute termAtt = firstTok.addAttribute(CharTermAttribute.class);
+ SynonymMap result = map.submap!=null ? map.submap.get(termAtt.buffer(), 0, termAtt.length()) : null;
if (result == null) {
copy(this, firstTok);
return true;
@@ -128,14 +128,14 @@ public class SynonymFilter extends TokenFilter {
for (int i=0; i1, should not match, if==0, check multiple at this level?
- TermAttribute termAtt = tok.getAttribute(TermAttribute.class);
- SynonymMap subMap = map.submap.get(termAtt.termBuffer(), 0, termAtt.termLength());
+ CharTermAttribute termAtt = tok.getAttribute(CharTermAttribute.class);
+ SynonymMap subMap = map.submap.get(termAtt.buffer(), 0, termAtt.length());
if (subMap != null) {
// recurse
diff --git a/solr/src/java/org/apache/solr/analysis/SynonymFilterFactory.java b/solr/src/java/org/apache/solr/analysis/SynonymFilterFactory.java
index 049143172a4..c6ae83271bc 100644
--- a/solr/src/java/org/apache/solr/analysis/SynonymFilterFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/SynonymFilterFactory.java
@@ -18,7 +18,7 @@
package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.solr.common.ResourceLoader;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.util.plugin.ResourceLoaderAware;
@@ -135,11 +135,10 @@ public class SynonymFilterFactory extends BaseTokenFilterFactory implements Reso
TokenStream ts = loadTokenizer(tokFactory, reader);
List tokList = new ArrayList();
try {
- TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
while (ts.incrementToken()){
- String text = new String(termAtt.termBuffer(), 0, termAtt.termLength());
- if( text.length() > 0 )
- tokList.add( text );
+ if( termAtt.length() > 0 )
+ tokList.add( termAtt.toString() );
}
} catch (IOException e) {
throw new RuntimeException(e);
diff --git a/solr/src/java/org/apache/solr/analysis/TokenizerChain.java b/solr/src/java/org/apache/solr/analysis/TokenizerChain.java
index 3f92f256d80..e882fefaa90 100644
--- a/solr/src/java/org/apache/solr/analysis/TokenizerChain.java
+++ b/solr/src/java/org/apache/solr/analysis/TokenizerChain.java
@@ -32,7 +32,7 @@ import java.io.Reader;
// An analyzer that uses a tokenizer and a list of token filters to
// create a TokenStream.
//
-public class TokenizerChain extends SolrAnalyzer {
+public final class TokenizerChain extends SolrAnalyzer {
final private CharFilterFactory[] charFilters;
final private TokenizerFactory tokenizer;
final private TokenFilterFactory[] filters;
diff --git a/solr/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java b/solr/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java
index aec963c8bf2..075d8853176 100644
--- a/solr/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java
+++ b/solr/src/java/org/apache/solr/analysis/TrieTokenizerFactory.java
@@ -51,7 +51,7 @@ public class TrieTokenizerFactory extends BaseTokenizerFactory {
}
}
-class TrieTokenizer extends Tokenizer {
+final class TrieTokenizer extends Tokenizer {
protected static final DateField dateField = new DateField();
protected final int precisionStep;
protected final TrieTypes type;
diff --git a/solr/src/java/org/apache/solr/analysis/TrimFilter.java b/solr/src/java/org/apache/solr/analysis/TrimFilter.java
index 821fc27f7b2..b0cc7c3aa49 100644
--- a/solr/src/java/org/apache/solr/analysis/TrimFilter.java
+++ b/solr/src/java/org/apache/solr/analysis/TrimFilter.java
@@ -19,7 +19,7 @@ package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import java.io.IOException;
@@ -32,24 +32,21 @@ import java.io.IOException;
public final class TrimFilter extends TokenFilter {
final boolean updateOffsets;
- private final TermAttribute termAtt;
- private final OffsetAttribute offsetAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
public TrimFilter(TokenStream in, boolean updateOffsets) {
super(in);
this.updateOffsets = updateOffsets;
-
- this.termAtt = addAttribute(TermAttribute.class);
- this.offsetAtt = addAttribute(OffsetAttribute.class);
}
@Override
public boolean incrementToken() throws IOException {
if (!input.incrementToken()) return false;
- char[] termBuffer = termAtt.termBuffer();
- int len = termAtt.termLength();
+ char[] termBuffer = termAtt.buffer();
+ int len = termAtt.length();
//TODO: Is this the right behavior or should we return false? Currently, " ", returns true, so I think this should
//also return true
if (len == 0){
@@ -69,9 +66,9 @@ public final class TrimFilter extends TokenFilter {
}
if (start > 0 || end < len) {
if (start < end) {
- termAtt.setTermBuffer(termBuffer, start, (end - start));
+ termAtt.copyBuffer(termBuffer, start, (end - start));
} else {
- termAtt.setTermLength(0);
+ termAtt.setEmpty();
}
if (updateOffsets) {
int newStart = offsetAtt.startOffset()+start;
diff --git a/solr/src/java/org/apache/solr/analysis/WordDelimiterFilter.java b/solr/src/java/org/apache/solr/analysis/WordDelimiterFilter.java
index f5963f873bc..cf65608a5a6 100644
--- a/solr/src/java/org/apache/solr/analysis/WordDelimiterFilter.java
+++ b/solr/src/java/org/apache/solr/analysis/WordDelimiterFilter.java
@@ -22,7 +22,7 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.RamUsageEstimator;
@@ -120,7 +120,7 @@ final class WordDelimiterFilter extends TokenFilter {
*/
final CharArraySet protWords;
- private final TermAttribute termAttribute = addAttribute(TermAttribute.class);
+ private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class);
private final TypeAttribute typeAttribute = addAttribute(TypeAttribute.class);
@@ -338,8 +338,8 @@ final class WordDelimiterFilter extends TokenFilter {
return false;
}
- int termLength = termAttribute.termLength();
- char[] termBuffer = termAttribute.termBuffer();
+ int termLength = termAttribute.length();
+ char[] termBuffer = termAttribute.buffer();
accumPosInc += posIncAttribute.getPositionIncrement();
@@ -462,14 +462,14 @@ final class WordDelimiterFilter extends TokenFilter {
savedStartOffset = offsetAttribute.startOffset();
savedEndOffset = offsetAttribute.endOffset();
// if length by start + end offsets doesn't match the term text then assume this is a synonym and don't adjust the offsets.
- hasIllegalOffsets = (savedEndOffset - savedStartOffset != termAttribute.termLength());
+ hasIllegalOffsets = (savedEndOffset - savedStartOffset != termAttribute.length());
savedType = typeAttribute.type();
- if (savedBuffer.length < termAttribute.termLength()) {
- savedBuffer = new char[ArrayUtil.oversize(termAttribute.termLength(), RamUsageEstimator.NUM_BYTES_CHAR)];
+ if (savedBuffer.length < termAttribute.length()) {
+ savedBuffer = new char[ArrayUtil.oversize(termAttribute.length(), RamUsageEstimator.NUM_BYTES_CHAR)];
}
- System.arraycopy(termAttribute.termBuffer(), 0, savedBuffer, 0, termAttribute.termLength());
+ System.arraycopy(termAttribute.buffer(), 0, savedBuffer, 0, termAttribute.length());
iterator.text = savedBuffer;
hasSavedState = true;
@@ -531,7 +531,7 @@ final class WordDelimiterFilter extends TokenFilter {
*/
private void generatePart(boolean isSingleWord) {
clearAttributes();
- termAttribute.setTermBuffer(savedBuffer, iterator.current, iterator.end - iterator.current);
+ termAttribute.copyBuffer(savedBuffer, iterator.current, iterator.end - iterator.current);
int startOffSet = (isSingleWord || !hasIllegalOffsets) ? savedStartOffset + iterator.current : savedStartOffset;
int endOffSet = (hasIllegalOffsets) ? savedEndOffset : savedStartOffset + iterator.end;
@@ -636,13 +636,13 @@ final class WordDelimiterFilter extends TokenFilter {
*/
void write() {
clearAttributes();
- if (termAttribute.termLength() < buffer.length()) {
- termAttribute.resizeTermBuffer(buffer.length());
+ if (termAttribute.length() < buffer.length()) {
+ termAttribute.resizeBuffer(buffer.length());
}
- char termbuffer[] = termAttribute.termBuffer();
+ char termbuffer[] = termAttribute.buffer();
buffer.getChars(0, buffer.length(), termbuffer, 0);
- termAttribute.setTermLength(buffer.length());
+ termAttribute.setLength(buffer.length());
if (hasIllegalOffsets) {
offsetAttribute.setOffset(savedStartOffset, savedEndOffset);
diff --git a/solr/src/java/org/apache/solr/handler/AnalysisRequestHandler.java b/solr/src/java/org/apache/solr/handler/AnalysisRequestHandler.java
index beb8bab5e70..8e580764465 100644
--- a/solr/src/java/org/apache/solr/handler/AnalysisRequestHandler.java
+++ b/solr/src/java/org/apache/solr/handler/AnalysisRequestHandler.java
@@ -18,7 +18,6 @@ package org.apache.solr.handler;
import org.apache.commons.io.IOUtils;
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.*;
import org.apache.lucene.util.BytesRef;
@@ -135,10 +134,10 @@ public class AnalysisRequestHandler extends RequestHandlerBase {
// outer is namedList since order of tokens is important
NamedList> tokens = new NamedList>();
// TODO: support custom attributes
- TermAttribute termAtt = null;
+ CharTermAttribute termAtt = null;
TermToBytesRefAttribute bytesAtt = null;
- if (tstream.hasAttribute(TermAttribute.class)) {
- termAtt = tstream.getAttribute(TermAttribute.class);
+ if (tstream.hasAttribute(CharTermAttribute.class)) {
+ termAtt = tstream.getAttribute(CharTermAttribute.class);
} else if (tstream.hasAttribute(TermToBytesRefAttribute.class)) {
bytesAtt = tstream.getAttribute(TermToBytesRefAttribute.class);
}
@@ -151,7 +150,7 @@ public class AnalysisRequestHandler extends RequestHandlerBase {
NamedList