diff --git a/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java b/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
index 47a58dc8401..22d0c2fb8fa 100755
--- a/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
+++ b/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
@@ -26,9 +26,10 @@ import java.util.List;
import java.util.Locale;
import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.benchmark.BenchmarkTestCase;
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
import org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker;
@@ -918,11 +919,11 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
TokenStream ts2 = a2.tokenStream("bogus", new StringReader(text));
ts1.reset();
ts2.reset();
- TermAttribute termAtt1 = ts1.addAttribute(TermAttribute.class);
- TermAttribute termAtt2 = ts2.addAttribute(TermAttribute.class);
+ CharTermAttribute termAtt1 = ts1.addAttribute(CharTermAttribute.class);
+ CharTermAttribute termAtt2 = ts2.addAttribute(CharTermAttribute.class);
assertTrue(ts1.incrementToken());
assertTrue(ts2.incrementToken());
- assertEquals(termAtt1.term(), termAtt2.term());
+ assertEquals(termAtt1.toString(), termAtt2.toString());
assertFalse(ts1.incrementToken());
assertFalse(ts2.incrementToken());
ts1.close();
@@ -994,21 +995,7 @@ public class TestPerfTasksLogic extends BenchmarkTestCase {
private void assertEqualShingle
(Analyzer analyzer, String text, String[] expected) throws Exception {
- TokenStream stream = analyzer.tokenStream("bogus", new StringReader(text));
- stream.reset();
- TermAttribute termAtt = stream.addAttribute(TermAttribute.class);
- int termNum = 0;
- while (stream.incrementToken()) {
- assertTrue("Extra output term(s), starting with '"
- + new String(termAtt.termBuffer(), 0, termAtt.termLength()) + "'",
- termNum < expected.length);
- assertEquals("Mismatch in output term # " + termNum + " - ",
- expected[termNum],
- new String(termAtt.termBuffer(), 0, termAtt.termLength()));
- ++termNum;
- }
- assertEquals("Too few output terms", expected.length, termNum);
- stream.close();
+ BaseTokenStreamTestCase.assertAnalyzesTo(analyzer, text, expected);
}
private String[] getShingleConfig(String params) {
diff --git a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
index b5e4fac21bc..1a692d62c84 100644
--- a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
+++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java
@@ -23,9 +23,9 @@ import java.util.Iterator;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.util.PriorityQueue;
/**
@@ -191,7 +191,7 @@ public class Highlighter
ArrayList docFrags = new ArrayList();
StringBuilder newText=new StringBuilder();
- TermAttribute termAtt = tokenStream.addAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
tokenStream.addAttribute(PositionIncrementAttribute.class);
tokenStream.reset();
@@ -225,7 +225,7 @@ public class Highlighter
(offsetAtt.startOffset()>text.length())
)
{
- throw new InvalidTokenOffsetsException("Token "+ termAtt.term()
+ throw new InvalidTokenOffsetsException("Token "+ termAtt.toString()
+" exceeds length of provided text sized "+text.length());
}
if((tokenGroup.numTokens>0)&&(tokenGroup.isDistinct()))
diff --git a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java
index 24dbb4644da..e0b76a4aebd 100644
--- a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java
+++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java
@@ -25,8 +25,8 @@ import java.util.Set;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.search.Query;
@@ -46,7 +46,7 @@ public class QueryScorer implements Scorer {
private float maxTermWeight;
private int position = -1;
private String defaultField;
- private TermAttribute termAtt;
+ private CharTermAttribute termAtt;
private PositionIncrementAttribute posIncAtt;
private boolean expandMultiTermQuery = true;
private Query query;
@@ -145,7 +145,7 @@ public class QueryScorer implements Scorer {
*/
public float getTokenScore() {
position += posIncAtt.getPositionIncrement();
- String termText = termAtt.term();
+ String termText = termAtt.toString();
WeightedSpanTerm weightedSpanTerm;
@@ -175,7 +175,7 @@ public class QueryScorer implements Scorer {
*/
public TokenStream init(TokenStream tokenStream) throws IOException {
position = -1;
- termAtt = tokenStream.addAttribute(TermAttribute.class);
+ termAtt = tokenStream.addAttribute(CharTermAttribute.class);
posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class);
if(!skipInitExtractor) {
if(fieldWeightedSpanTerms != null) {
diff --git a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermScorer.java b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermScorer.java
index e44d3072063..167bf3dd6f7 100644
--- a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermScorer.java
+++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermScorer.java
@@ -21,7 +21,7 @@ import java.util.HashMap;
import java.util.HashSet;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Query;
@@ -41,7 +41,7 @@ public class QueryTermScorer implements Scorer {
float maxTermWeight = 0;
private HashMap termsToFind;
- private TermAttribute termAtt;
+ private CharTermAttribute termAtt;
/**
*
@@ -95,7 +95,7 @@ public class QueryTermScorer implements Scorer {
* @see org.apache.lucene.search.highlight.Scorer#init(org.apache.lucene.analysis.TokenStream)
*/
public TokenStream init(TokenStream tokenStream) {
- termAtt = tokenStream.addAttribute(TermAttribute.class);
+ termAtt = tokenStream.addAttribute(CharTermAttribute.class);
return null;
}
@@ -118,7 +118,7 @@ public class QueryTermScorer implements Scorer {
* @see org.apache.lucene.search.highlight.Scorer#getTokenScore()
*/
public float getTokenScore() {
- String termText = termAtt.term();
+ String termText = termAtt.toString();
WeightedTerm queryTerm = termsToFind.get(termText);
if (queryTerm == null) {
diff --git a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SimpleSpanFragmenter.java b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SimpleSpanFragmenter.java
index e7cb034f83e..c468867850c 100644
--- a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SimpleSpanFragmenter.java
+++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SimpleSpanFragmenter.java
@@ -20,9 +20,9 @@ package org.apache.lucene.search.highlight;
import java.util.List;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.search.spans.Spans;
@@ -38,7 +38,7 @@ public class SimpleSpanFragmenter implements Fragmenter {
private QueryScorer queryScorer;
private int waitForPos = -1;
private int textSize;
- private TermAttribute termAtt;
+ private CharTermAttribute termAtt;
private PositionIncrementAttribute posIncAtt;
private OffsetAttribute offsetAtt;
@@ -70,7 +70,7 @@ public class SimpleSpanFragmenter implements Fragmenter {
return false;
}
- WeightedSpanTerm wSpanTerm = queryScorer.getWeightedSpanTerm(termAtt.term());
+ WeightedSpanTerm wSpanTerm = queryScorer.getWeightedSpanTerm(termAtt.toString());
if (wSpanTerm != null) {
List positionSpans = wSpanTerm.getPositionSpans();
@@ -101,7 +101,7 @@ public class SimpleSpanFragmenter implements Fragmenter {
position = -1;
currentNumFrags = 1;
textSize = originalText.length();
- termAtt = tokenStream.addAttribute(TermAttribute.class);
+ termAtt = tokenStream.addAttribute(CharTermAttribute.class);
posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class);
offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
}
diff --git a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenGroup.java b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenGroup.java
index 57355e51427..e0f88f2daea 100644
--- a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenGroup.java
+++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenGroup.java
@@ -19,8 +19,8 @@ package org.apache.lucene.search.highlight;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
/**
* One, or several overlapping tokens, along with the score(s) and the scope of
@@ -38,11 +38,11 @@ public class TokenGroup {
int matchStartOffset, matchEndOffset;
private OffsetAttribute offsetAtt;
- private TermAttribute termAtt;
+ private CharTermAttribute termAtt;
public TokenGroup(TokenStream tokenStream) {
offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
- termAtt = tokenStream.addAttribute(TermAttribute.class);
+ termAtt = tokenStream.addAttribute(CharTermAttribute.class);
}
void addToken(float score) {
@@ -68,7 +68,7 @@ public class TokenGroup {
}
}
Token token = new Token(termStartOffset, termEndOffset);
- token.setTermBuffer(termAtt.term());
+ token.setEmpty().append(termAtt);
tokens[numTokens] = token;
scores[numTokens] = score;
numTokens++;
diff --git a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
index 5129f238ac7..e5ecc8bd92a 100644
--- a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
+++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
@@ -29,8 +29,8 @@ import java.util.Comparator;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermFreqVector;
@@ -153,13 +153,13 @@ public class TokenSources {
int currentToken = 0;
- TermAttribute termAtt;
+ CharTermAttribute termAtt;
OffsetAttribute offsetAtt;
StoredTokenStream(Token tokens[]) {
this.tokens = tokens;
- termAtt = addAttribute(TermAttribute.class);
+ termAtt = addAttribute(CharTermAttribute.class);
offsetAtt = addAttribute(OffsetAttribute.class);
}
@@ -170,7 +170,7 @@ public class TokenSources {
}
Token token = tokens[currentToken++];
clearAttributes();
- termAtt.setTermBuffer(token.term());
+ termAtt.setEmpty().append(token);
offsetAtt.setOffset(token.startOffset(), token.endOffset());
return true;
}
@@ -204,9 +204,8 @@ public class TokenSources {
unsortedTokens = new ArrayList();
}
for (int tp = 0; tp < offsets.length; tp++) {
- Token token = new Token(offsets[tp].getStartOffset(), offsets[tp]
+ Token token = new Token(terms[t], offsets[tp].getStartOffset(), offsets[tp]
.getEndOffset());
- token.setTermBuffer(terms[t]);
unsortedTokens.add(token);
}
} else {
diff --git a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java
index 8cb2f141b79..810441677c5 100644
--- a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java
+++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java
@@ -25,9 +25,9 @@ import java.util.List;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.index.TermPositionVector;
import org.apache.lucene.index.TermVectorOffsetInfo;
@@ -37,7 +37,7 @@ public final class TokenStreamFromTermPositionVector extends TokenStream {
private Iterator tokensAtCurrentPosition;
- private TermAttribute termAttribute;
+ private CharTermAttribute termAttribute;
private PositionIncrementAttribute positionIncrementAttribute;
@@ -51,7 +51,7 @@ public final class TokenStreamFromTermPositionVector extends TokenStream {
*/
public TokenStreamFromTermPositionVector(
final TermPositionVector termPositionVector) {
- termAttribute = addAttribute(TermAttribute.class);
+ termAttribute = addAttribute(CharTermAttribute.class);
positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class);
offsetAttribute = addAttribute(OffsetAttribute.class);
final String[] terms = termPositionVector.getTerms();
@@ -65,7 +65,7 @@ public final class TokenStreamFromTermPositionVector extends TokenStream {
offsets[j].getStartOffset(), offsets[j].getEndOffset());
} else {
token = new Token();
- token.setTermBuffer(terms[i]);
+ token.setEmpty().append(terms[i]);
}
// Yes - this is the position, not the increment! This is for
// sorting. This value
@@ -100,7 +100,7 @@ public final class TokenStreamFromTermPositionVector extends TokenStream {
if (this.tokensAtCurrentPosition.hasNext()) {
final Token next = this.tokensAtCurrentPosition.next();
clearAttributes();
- termAttribute.setTermBuffer(next.term());
+ termAttribute.setEmpty().append(next);
positionIncrementAttribute.setPositionIncrement(next
.getPositionIncrement());
offsetAttribute.setOffset(next.startOffset(), next.endOffset());
diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java
index 9076a69d2e7..40166fcd531 100644
--- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java
+++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java
@@ -25,7 +25,7 @@ import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
@@ -296,16 +296,11 @@ public class HighlighterPhraseTest extends LuceneTestCase {
private int i = -1;
- private TermAttribute termAttribute;
-
- private OffsetAttribute offsetAttribute;
-
- private PositionIncrementAttribute positionIncrementAttribute;
+ private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
+ private final PositionIncrementAttribute positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class);
public TokenStreamSparse() {
- termAttribute = addAttribute(TermAttribute.class);
- offsetAttribute = addAttribute(OffsetAttribute.class);
- positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class);
reset();
}
@@ -316,8 +311,7 @@ public class HighlighterPhraseTest extends LuceneTestCase {
return false;
}
clearAttributes();
- termAttribute.setTermBuffer(this.tokens[i].term(), 0, this.tokens[i]
- .term().length());
+ termAttribute.setEmpty().append(this.tokens[i]);
offsetAttribute.setOffset(this.tokens[i].startOffset(), this.tokens[i]
.endOffset());
positionIncrementAttribute.setPositionIncrement(this.tokens[i]
@@ -342,16 +336,11 @@ public class HighlighterPhraseTest extends LuceneTestCase {
private int i = -1;
- private TermAttribute termAttribute;
-
- private OffsetAttribute offsetAttribute;
-
- private PositionIncrementAttribute positionIncrementAttribute;
+ private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class);
+ private final PositionIncrementAttribute positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class);
public TokenStreamConcurrent() {
- termAttribute = addAttribute(TermAttribute.class);
- offsetAttribute = addAttribute(OffsetAttribute.class);
- positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class);
reset();
}
@@ -362,8 +351,7 @@ public class HighlighterPhraseTest extends LuceneTestCase {
return false;
}
clearAttributes();
- termAttribute.setTermBuffer(this.tokens[i].term(), 0, this.tokens[i]
- .term().length());
+ termAttribute.setEmpty().append(this.tokens[i]);
offsetAttribute.setOffset(this.tokens[i].startOffset(), this.tokens[i]
.endOffset());
positionIncrementAttribute.setPositionIncrement(this.tokens[i]
diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
index ddb065387ce..1b786a3a41c 100644
--- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
+++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
@@ -41,7 +41,7 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
@@ -1424,13 +1424,10 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
return new TokenStream() {
Iterator iter;
List lst;
- private TermAttribute termAtt;
- private PositionIncrementAttribute posIncrAtt;
- private OffsetAttribute offsetAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
{
- termAtt = addAttribute(TermAttribute.class);
- posIncrAtt = addAttribute(PositionIncrementAttribute.class);
- offsetAtt = addAttribute(OffsetAttribute.class);
lst = new ArrayList();
Token t;
t = createToken("hi", 0, 2);
@@ -1456,7 +1453,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
if(iter.hasNext()) {
Token token = iter.next();
clearAttributes();
- termAtt.setTermBuffer(token.term());
+ termAtt.setEmpty().append(token);
posIncrAtt.setPositionIncrement(token.getPositionIncrement());
offsetAtt.setOffset(token.startOffset(), token.endOffset());
return true;
@@ -1473,13 +1470,10 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
return new TokenStream() {
Iterator iter;
List lst;
- private TermAttribute termAtt;
- private PositionIncrementAttribute posIncrAtt;
- private OffsetAttribute offsetAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
{
- termAtt = addAttribute(TermAttribute.class);
- posIncrAtt = addAttribute(PositionIncrementAttribute.class);
- offsetAtt = addAttribute(OffsetAttribute.class);
lst = new ArrayList();
Token t;
t = createToken("hispeed", 0, 8);
@@ -1505,7 +1499,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
if(iter.hasNext()) {
Token token = iter.next();
clearAttributes();
- termAtt.setTermBuffer(token.term());
+ termAtt.setEmpty().append(token);
posIncrAtt.setPositionIncrement(token.getPositionIncrement());
offsetAtt.setOffset(token.startOffset(), token.endOffset());
return true;
@@ -1762,9 +1756,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
private static Token createToken(String term, int start, int offset)
{
- Token token = new Token(start, offset);
- token.setTermBuffer(term);
- return token;
+ return new Token(term, start, offset);
}
}
@@ -1795,7 +1787,7 @@ final class SynonymAnalyzer extends Analyzer {
@Override
public TokenStream tokenStream(String arg0, Reader arg1) {
Tokenizer stream = new MockTokenizer(arg1, MockTokenizer.SIMPLE, true);
- stream.addAttribute(TermAttribute.class);
+ stream.addAttribute(CharTermAttribute.class);
stream.addAttribute(PositionIncrementAttribute.class);
stream.addAttribute(OffsetAttribute.class);
return new SynonymTokenizer(stream, synonyms);
@@ -1811,21 +1803,21 @@ final class SynonymTokenizer extends TokenStream {
private Token currentRealToken = null;
private Map synonyms;
StringTokenizer st = null;
- private TermAttribute realTermAtt;
+ private CharTermAttribute realTermAtt;
private PositionIncrementAttribute realPosIncrAtt;
private OffsetAttribute realOffsetAtt;
- private TermAttribute termAtt;
+ private CharTermAttribute termAtt;
private PositionIncrementAttribute posIncrAtt;
private OffsetAttribute offsetAtt;
public SynonymTokenizer(TokenStream realStream, Map synonyms) {
this.realStream = realStream;
this.synonyms = synonyms;
- realTermAtt = realStream.addAttribute(TermAttribute.class);
+ realTermAtt = realStream.addAttribute(CharTermAttribute.class);
realPosIncrAtt = realStream.addAttribute(PositionIncrementAttribute.class);
realOffsetAtt = realStream.addAttribute(OffsetAttribute.class);
- termAtt = addAttribute(TermAttribute.class);
+ termAtt = addAttribute(CharTermAttribute.class);
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
offsetAtt = addAttribute(OffsetAttribute.class);
}
@@ -1840,25 +1832,25 @@ final class SynonymTokenizer extends TokenStream {
}
//Token nextRealToken = new Token(, offsetAtt.startOffset(), offsetAtt.endOffset());
clearAttributes();
- termAtt.setTermBuffer(realTermAtt.term());
+ termAtt.copyBuffer(realTermAtt.buffer(), 0, realTermAtt.length());
offsetAtt.setOffset(realOffsetAtt.startOffset(), realOffsetAtt.endOffset());
posIncrAtt.setPositionIncrement(realPosIncrAtt.getPositionIncrement());
- String expansions = synonyms.get(realTermAtt.term());
+ String expansions = synonyms.get(realTermAtt.toString());
if (expansions == null) {
return true;
}
st = new StringTokenizer(expansions, ",");
if (st.hasMoreTokens()) {
currentRealToken = new Token(realOffsetAtt.startOffset(), realOffsetAtt.endOffset());
- currentRealToken.setTermBuffer(realTermAtt.term());
+ currentRealToken.copyBuffer(realTermAtt.buffer(), 0, realTermAtt.length());
}
return true;
} else {
String tok = st.nextToken();
clearAttributes();
- termAtt.setTermBuffer(tok);
+ termAtt.setEmpty().append(tok);
offsetAtt.setOffset(currentRealToken.startOffset(), currentRealToken.endOffset());
posIncrAtt.setPositionIncrement(0);
if (!st.hasMoreTokens()) {
diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
index f32c7e08367..b0926087144 100644
--- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
+++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
@@ -26,8 +26,8 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
@@ -221,14 +221,14 @@ public abstract class AbstractTestCase extends LuceneTestCase {
ch = 0;
}
- TermAttribute termAtt = addAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
@Override
public boolean incrementToken() throws IOException {
if( !getNextPartialSnippet() )
return false;
clearAttributes();
- termAtt.setTermBuffer(snippet, startTerm, lenTerm);
+ termAtt.setEmpty().append(snippet, startTerm, startTerm + lenTerm);
offsetAtt.setOffset(correctOffset(startOffset), correctOffset(startOffset + lenTerm));
return true;
}
diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/IndexTimeSynonymTest.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/IndexTimeSynonymTest.java
index 53a1602f625..f31a5bd0993 100644
--- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/IndexTimeSynonymTest.java
+++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/IndexTimeSynonymTest.java
@@ -25,7 +25,7 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.util.AttributeImpl;
@@ -301,7 +301,7 @@ public class IndexTimeSynonymTest extends AbstractTestCase {
@Override
public TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream ts = new TokenStream(Token.TOKEN_ATTRIBUTE_FACTORY) {
- final AttributeImpl reusableToken = (AttributeImpl) addAttribute(TermAttribute.class);
+ final AttributeImpl reusableToken = (AttributeImpl) addAttribute(CharTermAttribute.class);
int p = 0;
@Override
diff --git a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java
index 5e336ebc296..69c05bf15d6 100644
--- a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java
+++ b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java
@@ -561,7 +561,7 @@ public class InstantiatedIndexWriter implements Closeable {
// untokenized
String fieldVal = field.stringValue();
Token token = new Token(0, fieldVal.length(), "untokenized");
- token.setTermBuffer(fieldVal);
+ token.setEmpty().append(fieldVal);
tokens.add(token);
fieldSetting.fieldLength++;
}
@@ -596,10 +596,10 @@ public class InstantiatedIndexWriter implements Closeable {
for (Token token : eField_Tokens.getValue()) {
- TermDocumentInformationFactory termDocumentInformationFactory = termDocumentInformationFactoryByTermText.get(token.term());
+ TermDocumentInformationFactory termDocumentInformationFactory = termDocumentInformationFactoryByTermText.get(token.toString());
if (termDocumentInformationFactory == null) {
termDocumentInformationFactory = new TermDocumentInformationFactory();
- termDocumentInformationFactoryByTermText.put(token.term(), termDocumentInformationFactory);
+ termDocumentInformationFactoryByTermText.put(token.toString(), termDocumentInformationFactory);
}
//termDocumentInformationFactory.termFrequency++;
diff --git a/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java b/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java
index dbfdcbb7de0..ebb66c8d5a4 100644
--- a/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java
+++ b/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java
@@ -25,7 +25,7 @@ import java.util.List;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.MockAnalyzer;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
@@ -278,7 +278,7 @@ public class TestIndicesEquals extends LuceneTestCase {
tokens.add(t);
tokens.add(createToken("fin", 7, 9));
TokenStream ts = new TokenStream(Token.TOKEN_ATTRIBUTE_FACTORY) {
- final AttributeImpl reusableToken = (AttributeImpl) addAttribute(TermAttribute.class);
+ final AttributeImpl reusableToken = (AttributeImpl) addAttribute(CharTermAttribute.class);
Iterator it = tokens.iterator();
@Override
@@ -601,16 +601,12 @@ public class TestIndicesEquals extends LuceneTestCase {
private static Token createToken(String term, int start, int offset)
{
- Token token = new Token(start, offset);
- token.setTermBuffer(term);
- return token;
+ return new Token(term, start, offset);
}
private static Token createToken(String term, int start, int offset, String type)
{
- Token token = new Token(start, offset, type);
- token.setTermBuffer(term);
- return token;
+ return new Token(term, start, offset, type);
}
diff --git a/lucene/contrib/lucli/src/java/lucli/LuceneMethods.java b/lucene/contrib/lucli/src/java/lucli/LuceneMethods.java
index 6bb65ca3931..5671f476848 100644
--- a/lucene/contrib/lucli/src/java/lucli/LuceneMethods.java
+++ b/lucene/contrib/lucli/src/java/lucli/LuceneMethods.java
@@ -36,8 +36,8 @@ import jline.ConsoleReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexReader;
@@ -303,14 +303,14 @@ class LuceneMethods {
int position = 0;
// Tokenize field and add to postingTable
TokenStream stream = analyzer.tokenStream(fieldName, reader);
- TermAttribute termAtt = stream.addAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class);
try {
while (stream.incrementToken()) {
position += (posIncrAtt.getPositionIncrement() - 1);
position++;
- String name = termAtt.term();
+ String name = termAtt.toString();
Integer Count = tokenMap.get(name);
if (Count == null) { // not in there yet
tokenMap.put(name, Integer.valueOf(1)); //first one
diff --git a/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
index 35c5a58bfb6..311fb0580d0 100644
--- a/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
+++ b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java
@@ -30,9 +30,10 @@ import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.index.IndexReader;
@@ -51,6 +52,7 @@ import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.RAMDirectory; // for javadocs
+import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Constants; // for javadocs
/**
@@ -276,8 +278,8 @@ public class MemoryIndex implements Serializable {
return new TokenStream() {
private Iterator iter = keywords.iterator();
private int start = 0;
- private TermAttribute termAtt = addAttribute(TermAttribute.class);
- private OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
@Override
public boolean incrementToken() {
@@ -289,8 +291,8 @@ public class MemoryIndex implements Serializable {
String term = obj.toString();
clearAttributes();
- termAtt.setTermBuffer(term);
- offsetAtt.setOffset(start, start+termAtt.termLength());
+ termAtt.setEmpty().append(term);
+ offsetAtt.setOffset(start, start+termAtt.length());
start += term.length() + 1; // separate words by 1 (blank) character
return true;
}
@@ -340,13 +342,15 @@ public class MemoryIndex implements Serializable {
int numOverlapTokens = 0;
int pos = -1;
- TermAttribute termAtt = stream.addAttribute(TermAttribute.class);
+ TermToBytesRefAttribute termAtt = stream.addAttribute(TermToBytesRefAttribute.class);
PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class);
OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
-
+ BytesRef ref = new BytesRef(10);
stream.reset();
while (stream.incrementToken()) {
- String term = termAtt.term();
+ termAtt.toBytesRef(ref);
+ // TODO: support non-UTF8 strings (like numerics) here
+ String term = ref.utf8ToString();
if (term.length() == 0) continue; // nothing to do
// if (DEBUG) System.err.println("token='" + term + "'");
numTokens++;
diff --git a/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java b/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java
index 84c76bd7dd0..cfef2072376 100644
--- a/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java
+++ b/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java
@@ -26,7 +26,7 @@ import java.util.Iterator;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.BytesRef;
@@ -185,14 +185,14 @@ public class FuzzyLikeThisQuery extends Query
{
if(f.queryString==null) return;
TokenStream ts=analyzer.tokenStream(f.fieldName,new StringReader(f.queryString));
- TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
int corpusNumDocs=reader.numDocs();
Term internSavingTemplateTerm =new Term(f.fieldName); //optimization to avoid constructing new Term() objects
HashSet processedTerms=new HashSet();
while (ts.incrementToken())
{
- String term = termAtt.term();
+ String term = termAtt.toString();
if(!processedTerms.contains(term))
{
processedTerms.add(term);
diff --git a/lucene/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java b/lucene/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java
index f7970f8d8ff..d54e237dd4b 100644
--- a/lucene/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java
+++ b/lucene/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java
@@ -32,7 +32,7 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
@@ -884,10 +884,10 @@ public final class MoreLikeThis {
TokenStream ts = analyzer.tokenStream(fieldName, r);
int tokenCount=0;
// for every token
- TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
while (ts.incrementToken()) {
- String word = termAtt.term();
+ String word = termAtt.toString();
tokenCount++;
if(tokenCount>maxNumTokensParsed)
{
diff --git a/lucene/contrib/queries/src/java/org/apache/lucene/search/similar/SimilarityQueries.java b/lucene/contrib/queries/src/java/org/apache/lucene/search/similar/SimilarityQueries.java
index 6a780ad6256..5fd953bfc83 100644
--- a/lucene/contrib/queries/src/java/org/apache/lucene/search/similar/SimilarityQueries.java
+++ b/lucene/contrib/queries/src/java/org/apache/lucene/search/similar/SimilarityQueries.java
@@ -22,7 +22,7 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
@@ -86,12 +86,12 @@ public final class SimilarityQueries
throws IOException
{
TokenStream ts = a.tokenStream( field, new StringReader( body));
- TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
BooleanQuery tmp = new BooleanQuery();
Set already = new HashSet(); // ignore dups
while (ts.incrementToken()) {
- String word = termAtt.term();
+ String word = termAtt.toString();
// ignore opt stop words
if ( stop != null &&
stop.contains( word)) continue;
diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/analyzing/AnalyzingQueryParser.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/analyzing/AnalyzingQueryParser.java
index 1dac672ec36..7d9f2f3152d 100644
--- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/analyzing/AnalyzingQueryParser.java
+++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/analyzing/AnalyzingQueryParser.java
@@ -24,7 +24,7 @@ import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.Version;
@@ -107,7 +107,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
// get Analyzer from superclass and tokenize the term
TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr));
- TermAttribute termAtt = source.addAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
int countTokens = 0;
while (true) {
@@ -116,7 +116,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
} catch (IOException e) {
break;
}
- String term = termAtt.term();
+ String term = termAtt.toString();
if (!"".equals(term)) {
try {
tlist.set(countTokens++, term);
@@ -190,7 +190,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
// get Analyzer from superclass and tokenize the term
TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr));
List tlist = new ArrayList();
- TermAttribute termAtt = source.addAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
while (true) {
try {
@@ -198,7 +198,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
} catch (IOException e) {
break;
}
- tlist.add(termAtt.term());
+ tlist.add(termAtt.toString());
}
try {
@@ -237,13 +237,13 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
throws ParseException {
// get Analyzer from superclass and tokenize the term
TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr));
- TermAttribute termAtt = source.addAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
String nextToken = null;
boolean multipleTokens = false;
try {
if (source.incrementToken()) {
- nextToken = termAtt.term();
+ nextToken = termAtt.toString();
}
multipleTokens = source.incrementToken();
} catch (IOException e) {
@@ -273,13 +273,13 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
throws ParseException {
// get Analyzer from superclass and tokenize the terms
TokenStream source = getAnalyzer().tokenStream(field, new StringReader(part1));
- TermAttribute termAtt = source.addAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
boolean multipleTokens = false;
// part1
try {
if (source.incrementToken()) {
- part1 = termAtt.term();
+ part1 = termAtt.toString();
}
multipleTokens = source.incrementToken();
} catch (IOException e) {
@@ -297,11 +297,11 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
// part2
source = getAnalyzer().tokenStream(field, new StringReader(part2));
- termAtt = source.addAttribute(TermAttribute.class);
+ termAtt = source.addAttribute(CharTermAttribute.class);
try {
if (source.incrementToken()) {
- part2 = termAtt.term();
+ part2 = termAtt.toString();
}
multipleTokens = source.incrementToken();
} catch (IOException e) {
diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java
index b76ddf0d3c5..3ff9dfb3ae5 100644
--- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java
+++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java
@@ -307,7 +307,7 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
List list = new ArrayList();
int positionCount = 0;
boolean severalTokensAtSamePosition = false;
- TermAttribute termAtt = source.addAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
PositionIncrementAttribute posincrAtt = source.addAttribute(PositionIncrementAttribute.class);
try {
@@ -328,7 +328,7 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
return null;
else if (list.size() == 1) {
source.restoreState(list.get(0));
- return new TermQuery(new Term(field, termAtt.term()));
+ return new TermQuery(new Term(field, termAtt.toString()));
} else {
if (severalTokensAtSamePosition) {
if (positionCount == 1) {
@@ -337,7 +337,7 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
for (int i = 0; i < list.size(); i++) {
source.restoreState(list.get(i));
TermQuery currentQuery = new TermQuery(
- new Term(field, termAtt.term()));
+ new Term(field, termAtt.toString()));
q.add(currentQuery, BooleanClause.Occur.SHOULD);
}
return q;
@@ -352,7 +352,7 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
mpq.add(multiTerms.toArray(new Term[0]));
multiTerms.clear();
}
- multiTerms.add(new Term(field, termAtt.term()));
+ multiTerms.add(new Term(field, termAtt.toString()));
}
mpq.add(multiTerms.toArray(new Term[0]));
return mpq;
@@ -363,7 +363,7 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
q.setSlop(phraseSlop);
for (int i = 0; i < list.size(); i++) {
source.restoreState(list.get(i));
- q.add(new Term(field, termAtt.term()));
+ q.add(new Term(field, termAtt.toString()));
}
return q;
}
diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj
index 9cd21242042..c8f740b4ea0 100644
--- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj
+++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj
@@ -331,7 +331,7 @@ public class PrecedenceQueryParser {
List list = new ArrayList();
int positionCount = 0;
boolean severalTokensAtSamePosition = false;
- TermAttribute termAtt = source.addAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
PositionIncrementAttribute posincrAtt = source.addAttribute(PositionIncrementAttribute.class);
try {
@@ -352,7 +352,7 @@ public class PrecedenceQueryParser {
return null;
else if (list.size() == 1) {
source.restoreState(list.get(0));
- return new TermQuery(new Term(field, termAtt.term()));
+ return new TermQuery(new Term(field, termAtt.toString()));
} else {
if (severalTokensAtSamePosition) {
if (positionCount == 1) {
@@ -361,7 +361,7 @@ public class PrecedenceQueryParser {
for (int i = 0; i < list.size(); i++) {
source.restoreState(list.get(i));
TermQuery currentQuery = new TermQuery(
- new Term(field, termAtt.term()));
+ new Term(field, termAtt.toString()));
q.add(currentQuery, BooleanClause.Occur.SHOULD);
}
return q;
@@ -376,7 +376,7 @@ public class PrecedenceQueryParser {
mpq.add(multiTerms.toArray(new Term[0]));
multiTerms.clear();
}
- multiTerms.add(new Term(field, termAtt.term()));
+ multiTerms.add(new Term(field, termAtt.toString()));
}
mpq.add(multiTerms.toArray(new Term[0]));
return mpq;
@@ -387,7 +387,7 @@ public class PrecedenceQueryParser {
q.setSlop(phraseSlop);
for (int i = 0; i < list.size(); i++) {
source.restoreState(list.get(i));
- q.add(new Term(field, termAtt.term()));
+ q.add(new Term(field, termAtt.toString()));
}
return q;
}
diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/AnalyzerQueryNodeProcessor.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/AnalyzerQueryNodeProcessor.java
index 7be5c9afafd..818b3f98c14 100644
--- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/AnalyzerQueryNodeProcessor.java
+++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/AnalyzerQueryNodeProcessor.java
@@ -26,8 +26,8 @@ import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.queryParser.core.QueryNodeException;
import org.apache.lucene.queryParser.core.config.QueryConfigHandler;
import org.apache.lucene.queryParser.core.nodes.FieldQueryNode;
@@ -162,11 +162,11 @@ public class AnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl {
// ignore
}
- if (!buffer.hasAttribute(TermAttribute.class)) {
+ if (!buffer.hasAttribute(CharTermAttribute.class)) {
return new NoTokenFoundQueryNode();
}
- TermAttribute termAtt = buffer.getAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = buffer.getAttribute(CharTermAttribute.class);
if (numTokens == 0) {
return new NoTokenFoundQueryNode();
@@ -177,7 +177,7 @@ public class AnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl {
boolean hasNext;
hasNext = buffer.incrementToken();
assert hasNext == true;
- term = termAtt.term();
+ term = termAtt.toString();
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
@@ -197,7 +197,7 @@ public class AnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl {
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
- term = termAtt.term();
+ term = termAtt.toString();
} catch (IOException e) {
// safe to ignore, because we know the number of tokens
@@ -224,7 +224,7 @@ public class AnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl {
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
- term = termAtt.term();
+ term = termAtt.toString();
if (posIncrAtt != null) {
positionIncrement = posIncrAtt.getPositionIncrement();
}
@@ -290,7 +290,7 @@ public class AnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl {
try {
boolean hasNext = buffer.incrementToken();
assert hasNext == true;
- term = termAtt.term();
+ term = termAtt.toString();
if (posIncrAtt != null) {
positionIncrement = posIncrAtt.getPositionIncrement();
diff --git a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java
index b0907db3b4e..5f26ed078f0 100644
--- a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java
+++ b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java
@@ -23,8 +23,8 @@ import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
@@ -68,7 +68,7 @@ public class TestPrecedenceQueryParser extends LocalizedTestCase {
boolean inPhrase = false;
int savedStart = 0, savedEnd = 0;
- TermAttribute termAtt = addAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
@Override
@@ -76,19 +76,19 @@ public class TestPrecedenceQueryParser extends LocalizedTestCase {
clearAttributes();
if (inPhrase) {
inPhrase = false;
- termAtt.setTermBuffer("phrase2");
+ termAtt.setEmpty().append("phrase2");
offsetAtt.setOffset(savedStart, savedEnd);
return true;
} else
while(input.incrementToken())
- if (termAtt.term().equals("phrase")) {
+ if (termAtt.toString().equals("phrase")) {
inPhrase = true;
savedStart = offsetAtt.startOffset();
savedEnd = offsetAtt.endOffset();
- termAtt.setTermBuffer("phrase1");
+ termAtt.setEmpty().append("phrase1");
offsetAtt.setOffset(savedStart, savedEnd);
return true;
- } else if (!termAtt.term().equals("stop"))
+ } else if (!termAtt.toString().equals("stop"))
return true;
return false;
}
diff --git a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerQPHelper.java b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerQPHelper.java
index e98cc6f80a3..ea5907a366c 100644
--- a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerQPHelper.java
+++ b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerQPHelper.java
@@ -23,9 +23,9 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.queryParser.core.QueryNodeException;
import org.apache.lucene.queryParser.standard.config.DefaultOperatorAttribute.Operator;
@@ -163,24 +163,19 @@ public class TestMultiAnalyzerQPHelper extends LuceneTestCase {
private int prevStartOffset;
private int prevEndOffset;
- TermAttribute termAtt;
- PositionIncrementAttribute posIncrAtt;
- OffsetAttribute offsetAtt;
- TypeAttribute typeAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
public TestFilter(TokenStream in) {
super(in);
- termAtt = addAttribute(TermAttribute.class);
- posIncrAtt = addAttribute(PositionIncrementAttribute.class);
- offsetAtt = addAttribute(OffsetAttribute.class);
- typeAtt = addAttribute(TypeAttribute.class);
-
}
@Override
public final boolean incrementToken() throws java.io.IOException {
if (multiToken > 0) {
- termAtt.setTermBuffer("multi" + (multiToken + 1));
+ termAtt.setEmpty().append("multi" + (multiToken + 1));
offsetAtt.setOffset(prevStartOffset, prevEndOffset);
typeAtt.setType(prevType);
posIncrAtt.setPositionIncrement(0);
@@ -194,7 +189,7 @@ public class TestMultiAnalyzerQPHelper extends LuceneTestCase {
prevType = typeAtt.type();
prevStartOffset = offsetAtt.startOffset();
prevEndOffset = offsetAtt.endOffset();
- String text = termAtt.term();
+ String text = termAtt.toString();
if (text.equals("triplemulti")) {
multiToken = 2;
return true;
@@ -228,21 +223,19 @@ public class TestMultiAnalyzerQPHelper extends LuceneTestCase {
private class TestPosIncrementFilter extends TokenFilter {
- TermAttribute termAtt;
- PositionIncrementAttribute posIncrAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
public TestPosIncrementFilter(TokenStream in) {
super(in);
- termAtt = addAttribute(TermAttribute.class);
- posIncrAtt = addAttribute(PositionIncrementAttribute.class);
}
@Override
public final boolean incrementToken() throws java.io.IOException {
while (input.incrementToken()) {
- if (termAtt.term().equals("the")) {
+ if (termAtt.toString().equals("the")) {
// stopword, do nothing
- } else if (termAtt.term().equals("quick")) {
+ } else if (termAtt.toString().equals("quick")) {
posIncrAtt.setPositionIncrement(2);
return true;
} else {
diff --git a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerWrapper.java b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerWrapper.java
index 8e56944a2ec..4f3b14a4e8b 100644
--- a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerWrapper.java
+++ b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerWrapper.java
@@ -23,9 +23,9 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.util.LuceneTestCase;
@@ -157,24 +157,19 @@ public class TestMultiAnalyzerWrapper extends LuceneTestCase {
private int prevStartOffset;
private int prevEndOffset;
- TermAttribute termAtt;
- PositionIncrementAttribute posIncrAtt;
- OffsetAttribute offsetAtt;
- TypeAttribute typeAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
public TestFilter(TokenStream in) {
super(in);
- termAtt = addAttribute(TermAttribute.class);
- posIncrAtt = addAttribute(PositionIncrementAttribute.class);
- offsetAtt = addAttribute(OffsetAttribute.class);
- typeAtt = addAttribute(TypeAttribute.class);
-
}
@Override
public final boolean incrementToken() throws java.io.IOException {
if (multiToken > 0) {
- termAtt.setTermBuffer("multi" + (multiToken + 1));
+ termAtt.setEmpty().append("multi" + (multiToken + 1));
offsetAtt.setOffset(prevStartOffset, prevEndOffset);
typeAtt.setType(prevType);
posIncrAtt.setPositionIncrement(0);
@@ -188,7 +183,7 @@ public class TestMultiAnalyzerWrapper extends LuceneTestCase {
prevType = typeAtt.type();
prevStartOffset = offsetAtt.startOffset();
prevEndOffset = offsetAtt.endOffset();
- String text = termAtt.term();
+ String text = termAtt.toString();
if (text.equals("triplemulti")) {
multiToken = 2;
return true;
@@ -222,21 +217,19 @@ public class TestMultiAnalyzerWrapper extends LuceneTestCase {
private class TestPosIncrementFilter extends TokenFilter {
- TermAttribute termAtt;
- PositionIncrementAttribute posIncrAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
public TestPosIncrementFilter(TokenStream in) {
super(in);
- termAtt = addAttribute(TermAttribute.class);
- posIncrAtt = addAttribute(PositionIncrementAttribute.class);
}
@Override
public final boolean incrementToken() throws java.io.IOException {
while (input.incrementToken()) {
- if (termAtt.term().equals("the")) {
+ if (termAtt.toString().equals("the")) {
// stopword, do nothing
- } else if (termAtt.term().equals("quick")) {
+ } else if (termAtt.toString().equals("quick")) {
posIncrAtt.setPositionIncrement(2);
return true;
} else {
diff --git a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java
index addbca26476..4d3e3840355 100644
--- a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java
+++ b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java
@@ -37,8 +37,8 @@ import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.DateField;
import org.apache.lucene.document.DateTools;
@@ -96,8 +96,8 @@ public class TestQPHelper extends LocalizedTestCase {
public static Analyzer qpAnalyzer = new QPTestAnalyzer();
public static final class QPTestFilter extends TokenFilter {
- TermAttribute termAtt;
- OffsetAttribute offsetAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
/**
* Filter which discards the token 'stop' and which expands the token
@@ -105,8 +105,6 @@ public class TestQPHelper extends LocalizedTestCase {
*/
public QPTestFilter(TokenStream in) {
super(in);
- termAtt = addAttribute(TermAttribute.class);
- offsetAtt = addAttribute(OffsetAttribute.class);
}
boolean inPhrase = false;
@@ -117,19 +115,19 @@ public class TestQPHelper extends LocalizedTestCase {
if (inPhrase) {
inPhrase = false;
clearAttributes();
- termAtt.setTermBuffer("phrase2");
+ termAtt.setEmpty().append("phrase2");
offsetAtt.setOffset(savedStart, savedEnd);
return true;
} else
while (input.incrementToken()) {
- if (termAtt.term().equals("phrase")) {
+ if (termAtt.toString().equals("phrase")) {
inPhrase = true;
savedStart = offsetAtt.startOffset();
savedEnd = offsetAtt.endOffset();
- termAtt.setTermBuffer("phrase1");
+ termAtt.setEmpty().append("phrase1");
offsetAtt.setOffset(savedStart, savedEnd);
return true;
- } else if (!termAtt.term().equals("stop"))
+ } else if (!termAtt.toString().equals("stop"))
return true;
}
return false;
@@ -1158,7 +1156,7 @@ public class TestQPHelper extends LocalizedTestCase {
private class CannedTokenStream extends TokenStream {
private int upto = 0;
final PositionIncrementAttribute posIncr = addAttribute(PositionIncrementAttribute.class);
- final TermAttribute term = addAttribute(TermAttribute.class);
+ final CharTermAttribute term = addAttribute(CharTermAttribute.class);
@Override
public boolean incrementToken() {
clearAttributes();
@@ -1167,16 +1165,16 @@ public class TestQPHelper extends LocalizedTestCase {
}
if (upto == 0) {
posIncr.setPositionIncrement(1);
- term.setTermBuffer("a");
+ term.setEmpty().append("a");
} else if (upto == 1) {
posIncr.setPositionIncrement(1);
- term.setTermBuffer("b");
+ term.setEmpty().append("b");
} else if (upto == 2) {
posIncr.setPositionIncrement(0);
- term.setTermBuffer("c");
+ term.setEmpty().append("c");
} else {
posIncr.setPositionIncrement(0);
- term.setTermBuffer("d");
+ term.setEmpty().append("d");
}
upto++;
return true;
diff --git a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java
index b3a28dbe1b0..fc18e2ce98f 100644
--- a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java
+++ b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java
@@ -36,7 +36,7 @@ import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.DateField;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;
@@ -93,8 +93,8 @@ public class TestQueryParserWrapper extends LocalizedTestCase {
public static Analyzer qpAnalyzer = new QPTestAnalyzer();
public static final class QPTestFilter extends TokenFilter {
- TermAttribute termAtt;
- OffsetAttribute offsetAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
/**
* Filter which discards the token 'stop' and which expands the token
@@ -102,8 +102,6 @@ public class TestQueryParserWrapper extends LocalizedTestCase {
*/
public QPTestFilter(TokenStream in) {
super(in);
- termAtt = addAttribute(TermAttribute.class);
- offsetAtt = addAttribute(OffsetAttribute.class);
}
boolean inPhrase = false;
@@ -114,19 +112,19 @@ public class TestQueryParserWrapper extends LocalizedTestCase {
if (inPhrase) {
inPhrase = false;
clearAttributes();
- termAtt.setTermBuffer("phrase2");
+ termAtt.setEmpty().append("phrase2");
offsetAtt.setOffset(savedStart, savedEnd);
return true;
} else
while (input.incrementToken()) {
- if (termAtt.term().equals("phrase")) {
+ if (termAtt.toString().equals("phrase")) {
inPhrase = true;
savedStart = offsetAtt.startOffset();
savedEnd = offsetAtt.endOffset();
- termAtt.setTermBuffer("phrase1");
+ termAtt.setEmpty().append("phrase1");
offsetAtt.setOffset(savedStart, savedEnd);
return true;
- } else if (!termAtt.term().equals("stop"))
+ } else if (!termAtt.toString().equals("stop"))
return true;
}
return false;
diff --git a/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java b/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java
index d0b1f46f825..0e573e85555 100755
--- a/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java
+++ b/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java
@@ -29,7 +29,7 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
@@ -117,10 +117,10 @@ public final class SynExpand {
// [1] Parse query into separate words so that when we expand we can avoid dups
TokenStream ts = a.tokenStream( field, new StringReader( query));
- TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
while (ts.incrementToken()) {
- String word = termAtt.term();
+ String word = termAtt.toString();
if ( already.add( word))
top.add( word);
}
diff --git a/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynLookup.java b/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynLookup.java
index 07dd65b6116..894e7494908 100644
--- a/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynLookup.java
+++ b/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynLookup.java
@@ -28,7 +28,7 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
@@ -125,10 +125,10 @@ public class SynLookup {
// [1] Parse query into separate words so that when we expand we can avoid dups
TokenStream ts = a.tokenStream( field, new StringReader( query));
- TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
while (ts.incrementToken()) {
- String word = termAtt.term();
+ String word = termAtt.toString();
if ( already.add( word))
top.add( word);
}
diff --git a/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynonymTokenFilter.java b/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynonymTokenFilter.java
index 47af190bf5a..e4b45a0c691 100644
--- a/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynonymTokenFilter.java
+++ b/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynonymTokenFilter.java
@@ -21,8 +21,8 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeSource;
@@ -45,9 +45,9 @@ public class SynonymTokenFilter extends TokenFilter {
private AttributeSource.State current = null;
private int todo = 0;
- private TermAttribute termAtt;
- private TypeAttribute typeAtt;
- private PositionIncrementAttribute posIncrAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
+ private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
/**
* Creates an instance for the given underlying stream and synonym table.
@@ -71,10 +71,6 @@ public class SynonymTokenFilter extends TokenFilter {
this.synonyms = synonyms;
this.maxSynonyms = maxSynonyms;
-
- this.termAtt = addAttribute(TermAttribute.class);
- this.typeAtt = addAttribute(TypeAttribute.class);
- this.posIncrAtt = addAttribute(PositionIncrementAttribute.class);
}
/** Returns the next token in the stream, or null at EOS. */
@@ -89,7 +85,7 @@ public class SynonymTokenFilter extends TokenFilter {
if (!input.incrementToken()) return false; // EOS; iterator exhausted
- stack = synonyms.getSynonyms(termAtt.term()); // push onto stack
+ stack = synonyms.getSynonyms(termAtt.toString()); // push onto stack
if (stack.length > maxSynonyms) randomize(stack);
index = 0;
current = captureState();
@@ -110,7 +106,7 @@ public class SynonymTokenFilter extends TokenFilter {
*/
protected boolean createToken(String synonym, AttributeSource.State current) {
restoreState(current);
- termAtt.setTermBuffer(synonym);
+ termAtt.setEmpty().append(synonym);
typeAtt.setType(SYNONYM_TOKEN_TYPE);
posIncrAtt.setPositionIncrement(0);
return true;
diff --git a/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/LikeThisQueryBuilder.java b/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/LikeThisQueryBuilder.java
index 9c256d43f81..b96cf7bab4b 100644
--- a/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/LikeThisQueryBuilder.java
+++ b/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/LikeThisQueryBuilder.java
@@ -10,7 +10,7 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.search.similar.MoreLikeThisQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.xmlparser.DOMUtils;
@@ -77,11 +77,11 @@ public class LikeThisQueryBuilder implements QueryBuilder {
for (int i = 0; i < fields.length; i++)
{
TokenStream ts = analyzer.tokenStream(fields[i],new StringReader(stopWords));
- TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
try
{
while(ts.incrementToken()) {
- stopWordsSet.add(termAtt.term());
+ stopWordsSet.add(termAtt.toString());
}
}
catch(IOException ioe)
diff --git a/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/SpanOrTermsBuilder.java b/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/SpanOrTermsBuilder.java
index ae79d6d0475..957f655c989 100644
--- a/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/SpanOrTermsBuilder.java
+++ b/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/SpanOrTermsBuilder.java
@@ -6,7 +6,7 @@ import java.util.ArrayList;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
@@ -56,10 +56,10 @@ public class SpanOrTermsBuilder extends SpanBuilderBase
{
ArrayList clausesList=new ArrayList();
TokenStream ts=analyzer.tokenStream(fieldName,new StringReader(value));
- TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
while (ts.incrementToken()) {
- SpanTermQuery stq=new SpanTermQuery(new Term(fieldName, termAtt.term()));
+ SpanTermQuery stq=new SpanTermQuery(new Term(fieldName, termAtt.toString()));
clausesList.add(stq);
}
SpanOrQuery soq=new SpanOrQuery(clausesList.toArray(new SpanQuery[clausesList.size()]));
diff --git a/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsFilterBuilder.java b/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsFilterBuilder.java
index 7373a95b8c2..edc3daf98f2 100644
--- a/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsFilterBuilder.java
+++ b/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsFilterBuilder.java
@@ -5,7 +5,7 @@ import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.TermsFilter;
@@ -57,7 +57,7 @@ public class TermsFilterBuilder implements FilterBuilder
String text = DOMUtils.getNonBlankTextOrFail(e);
String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName");
TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text));
- TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
try
{
@@ -65,11 +65,11 @@ public class TermsFilterBuilder implements FilterBuilder
while (ts.incrementToken()) {
if (term == null)
{
- term = new Term(fieldName, termAtt.term());
+ term = new Term(fieldName, termAtt.toString());
} else
{
// create from previous to save fieldName.intern overhead
- term = term.createTerm(termAtt.term());
+ term = term.createTerm(termAtt.toString());
}
tf.addTerm(term);
}
diff --git a/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsQueryBuilder.java b/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsQueryBuilder.java
index 83e6bb336f4..63fe23bbbb4 100644
--- a/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsQueryBuilder.java
+++ b/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsQueryBuilder.java
@@ -5,7 +5,7 @@ import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
@@ -57,16 +57,16 @@ public class TermsQueryBuilder implements QueryBuilder {
TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text));
try
{
- TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
Term term = null;
while (ts.incrementToken()) {
if (term == null)
{
- term = new Term(fieldName, termAtt.term());
+ term = new Term(fieldName, termAtt.toString());
} else
{
// create from previous to save fieldName.intern overhead
- term = term.createTerm(termAtt.term());
+ term = term.createTerm(termAtt.toString());
}
bq.add(new BooleanClause(new TermQuery(term),BooleanClause.Occur.SHOULD));
}
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilter.java
index 8a74505bd7d..0a74366810a 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilter.java
@@ -21,7 +21,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* A {@link TokenFilter} that applies {@link ArabicNormalizer} to normalize the orthography.
@@ -29,21 +29,18 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
*/
public final class ArabicNormalizationFilter extends TokenFilter {
-
- private final ArabicNormalizer normalizer;
- private final TermAttribute termAtt;
+ private final ArabicNormalizer normalizer = new ArabicNormalizer();
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
public ArabicNormalizationFilter(TokenStream input) {
super(input);
- normalizer = new ArabicNormalizer();
- termAtt = addAttribute(TermAttribute.class);
}
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
- int newlen = normalizer.normalize(termAtt.termBuffer(), termAtt.termLength());
- termAtt.setTermLength(newlen);
+ int newlen = normalizer.normalize(termAtt.buffer(), termAtt.length());
+ termAtt.setLength(newlen);
return true;
}
return false;
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilter.java
index f45e98d65da..636481e123b 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilter.java
@@ -23,7 +23,7 @@ import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; // javadoc
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* A {@link TokenFilter} that applies {@link ArabicStemmer} to stem Arabic words..
@@ -35,24 +35,20 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
* @see KeywordMarkerFilter */
public final class ArabicStemFilter extends TokenFilter {
-
- private final ArabicStemmer stemmer;
- private final TermAttribute termAtt;
- private final KeywordAttribute keywordAttr;
+ private final ArabicStemmer stemmer = new ArabicStemmer();
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
public ArabicStemFilter(TokenStream input) {
super(input);
- stemmer = new ArabicStemmer();
- termAtt = addAttribute(TermAttribute.class);
- keywordAttr = addAttribute(KeywordAttribute.class);
}
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if(!keywordAttr.isKeyword()) {
- final int newlen = stemmer.stem(termAtt.termBuffer(), termAtt.termLength());
- termAtt.setTermLength(newlen);
+ final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+ termAtt.setLength(newlen);
}
return true;
} else {
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemFilter.java
index 30387f0a694..e4264201ec9 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemFilter.java
@@ -23,7 +23,7 @@ import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; // for java
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* A {@link TokenFilter} that applies {@link BulgarianStemmer} to stem Bulgarian
@@ -35,23 +35,20 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
*
*/
public final class BulgarianStemFilter extends TokenFilter {
- private final BulgarianStemmer stemmer;
- private final TermAttribute termAtt;
- private final KeywordAttribute keywordAttr;
+ private final BulgarianStemmer stemmer = new BulgarianStemmer();
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
public BulgarianStemFilter(final TokenStream input) {
super(input);
- stemmer = new BulgarianStemmer();
- termAtt = addAttribute(TermAttribute.class);
- keywordAttr = addAttribute(KeywordAttribute.class);
}
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if(!keywordAttr.isKeyword()) {
- final int newlen = stemmer.stem(termAtt.termBuffer(), termAtt.termLength());
- termAtt.setTermLength(newlen);
+ final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+ termAtt.setLength(newlen);
}
return true;
} else {
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java
index 6bbb2d83312..a6c42eb9943 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java
@@ -24,7 +24,7 @@ import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; // for java
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* A {@link TokenFilter} that applies {@link BrazilianStemmer}.
@@ -41,10 +41,10 @@ public final class BrazilianStemFilter extends TokenFilter {
/**
* {@link BrazilianStemmer} in use by this filter.
*/
- private BrazilianStemmer stemmer = null;
+ private BrazilianStemmer stemmer = new BrazilianStemmer();
private Set> exclusions = null;
- private final TermAttribute termAtt;
- private final KeywordAttribute keywordAttr;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
/**
* Creates a new BrazilianStemFilter
@@ -53,9 +53,6 @@ public final class BrazilianStemFilter extends TokenFilter {
*/
public BrazilianStemFilter(TokenStream in) {
super(in);
- stemmer = new BrazilianStemmer();
- termAtt = addAttribute(TermAttribute.class);
- keywordAttr = addAttribute(KeywordAttribute.class);
}
/**
@@ -74,13 +71,13 @@ public final class BrazilianStemFilter extends TokenFilter {
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
- final String term = termAtt.term();
+ final String term = termAtt.toString();
// Check the exclusion table.
if (!keywordAttr.isKeyword() && (exclusions == null || !exclusions.contains(term))) {
final String s = stemmer.stem(term);
// If not stemmed, don't waste the time adjusting the token.
if ((s != null) && !s.equals(term))
- termAtt.setTermBuffer(s);
+ termAtt.setEmpty().append(s);
}
return true;
} else {
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java
index 1eb14d0af91..d907d51d499 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java
@@ -22,7 +22,7 @@ import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeSource;
@@ -98,9 +98,9 @@ public final class CJKTokenizer extends Tokenizer {
*/
private boolean preIsTokened = false;
- private TermAttribute termAtt;
- private OffsetAttribute offsetAtt;
- private TypeAttribute typeAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
//~ Constructors -----------------------------------------------------------
@@ -111,23 +111,14 @@ public final class CJKTokenizer extends Tokenizer {
*/
public CJKTokenizer(Reader in) {
super(in);
- init();
}
public CJKTokenizer(AttributeSource source, Reader in) {
super(source, in);
- init();
}
public CJKTokenizer(AttributeFactory factory, Reader in) {
super(factory, in);
- init();
- }
-
- private void init() {
- termAtt = addAttribute(TermAttribute.class);
- offsetAtt = addAttribute(OffsetAttribute.class);
- typeAtt = addAttribute(TypeAttribute.class);
}
//~ Methods ----------------------------------------------------------------
@@ -287,7 +278,7 @@ public final class CJKTokenizer extends Tokenizer {
}
if (length > 0) {
- termAtt.setTermBuffer(buffer, 0, length);
+ termAtt.copyBuffer(buffer, 0, length);
offsetAtt.setOffset(correctOffset(start), correctOffset(start+length));
typeAtt.setType(TOKEN_TYPE_NAMES[tokenType]);
return true;
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java
index af30be18af6..22cd6cda90c 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java
@@ -23,7 +23,7 @@ import java.util.Arrays;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.StopFilter;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.Version;
@@ -61,21 +61,20 @@ public final class ChineseFilter extends TokenFilter {
private CharArraySet stopTable;
- private TermAttribute termAtt;
+ private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
public ChineseFilter(TokenStream in) {
super(in);
stopTable = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(STOP_WORDS), false);
- termAtt = addAttribute(TermAttribute.class);
}
@Override
public boolean incrementToken() throws IOException {
while (input.incrementToken()) {
- char text[] = termAtt.termBuffer();
- int termLength = termAtt.termLength();
+ char text[] = termAtt.buffer();
+ int termLength = termAtt.length();
// why not key off token type here assuming ChineseTokenizer comes first?
if (!stopTable.contains(text, 0, termLength)) {
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java
index 7af1d4da3a6..c3f50998da9 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java
@@ -23,8 +23,8 @@ import java.io.Reader;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.util.AttributeSource;
@@ -62,24 +62,16 @@ public final class ChineseTokenizer extends Tokenizer {
public ChineseTokenizer(Reader in) {
super(in);
- init();
}
public ChineseTokenizer(AttributeSource source, Reader in) {
super(source, in);
- init();
}
public ChineseTokenizer(AttributeFactory factory, Reader in) {
super(factory, in);
- init();
}
-
- private void init() {
- termAtt = addAttribute(TermAttribute.class);
- offsetAtt = addAttribute(OffsetAttribute.class);
- }
-
+
private int offset = 0, bufferIndex=0, dataLen=0;
private final static int MAX_WORD_LEN = 255;
private final static int IO_BUFFER_SIZE = 1024;
@@ -90,8 +82,8 @@ public final class ChineseTokenizer extends Tokenizer {
private int length;
private int start;
- private TermAttribute termAtt;
- private OffsetAttribute offsetAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
private final void push(char c) {
@@ -105,7 +97,7 @@ public final class ChineseTokenizer extends Tokenizer {
if (length>0) {
//System.out.println(new String(buffer, 0,
//length));
- termAtt.setTermBuffer(buffer, 0, length);
+ termAtt.copyBuffer(buffer, 0, length);
offsetAtt.setOffset(correctOffset(start), correctOffset(start+length));
return true;
}
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
index 4038b72bb5e..0d82cbff197 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java
@@ -30,7 +30,7 @@ import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.Version;
@@ -69,12 +69,12 @@ public abstract class CompoundWordTokenFilterBase extends TokenFilter {
protected final int maxSubwordSize;
protected final boolean onlyLongestMatch;
- private TermAttribute termAtt;
- private OffsetAttribute offsetAtt;
- private FlagsAttribute flagsAtt;
- private PositionIncrementAttribute posIncAtt;
- private TypeAttribute typeAtt;
- private PayloadAttribute payloadAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class);
+ private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
+ private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
+ private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class);
private final Token wrapper = new Token();
/**
@@ -160,13 +160,6 @@ public abstract class CompoundWordTokenFilterBase extends TokenFilter {
this.dictionary = new CharArraySet(matchVersion, dictionary.size(), false);
addAllLowerCase(this.dictionary, dictionary);
}
-
- termAtt = addAttribute(TermAttribute.class);
- offsetAtt = addAttribute(OffsetAttribute.class);
- flagsAtt = addAttribute(FlagsAttribute.class);
- posIncAtt = addAttribute(PositionIncrementAttribute.class);
- typeAtt = addAttribute(TypeAttribute.class);
- payloadAtt = addAttribute(PayloadAttribute.class);
}
/**
@@ -192,7 +185,7 @@ public abstract class CompoundWordTokenFilterBase extends TokenFilter {
private final void setToken(final Token token) throws IOException {
clearAttributes();
- termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength());
+ termAtt.copyBuffer(token.buffer(), 0, token.length());
flagsAtt.setFlags(token.getFlags());
typeAtt.setType(token.type());
offsetAtt.setOffset(token.startOffset(), token.endOffset());
@@ -210,7 +203,7 @@ public abstract class CompoundWordTokenFilterBase extends TokenFilter {
if (!input.incrementToken())
return false;
- wrapper.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength());
+ wrapper.copyBuffer(termAtt.buffer(), 0, termAtt.length());
wrapper.setStartOffset(offsetAtt.startOffset());
wrapper.setEndOffset(offsetAtt.endOffset());
wrapper.setFlags(flagsAtt.getFlags());
@@ -248,7 +241,7 @@ public abstract class CompoundWordTokenFilterBase extends TokenFilter {
protected final Token createToken(final int offset, final int length,
final Token prototype) {
int newStart = prototype.startOffset() + offset;
- Token t = prototype.clone(prototype.termBuffer(), offset, length, newStart, newStart+length);
+ Token t = prototype.clone(prototype.buffer(), offset, length, newStart, newStart+length);
t.setPositionIncrement(0);
return t;
}
@@ -258,7 +251,7 @@ public abstract class CompoundWordTokenFilterBase extends TokenFilter {
tokens.add((Token) token.clone());
// Only words longer than minWordSize get processed
- if (token.termLength() < this.minWordSize) {
+ if (token.length() < this.minWordSize) {
return;
}
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java
index 6d8374a1530..ade9b314db8 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java
@@ -191,22 +191,22 @@ public class DictionaryCompoundWordTokenFilter extends CompoundWordTokenFilterBa
@Override
protected void decomposeInternal(final Token token) {
// Only words longer than minWordSize get processed
- if (token.termLength() < this.minWordSize) {
+ if (token.length() < this.minWordSize) {
return;
}
- char[] lowerCaseTermBuffer=makeLowerCaseCopy(token.termBuffer());
+ char[] lowerCaseTermBuffer=makeLowerCaseCopy(token.buffer());
- for (int i=0;itoken.termLength()) {
+ if(i+j>token.length()) {
break;
}
if(dictionary.contains(lowerCaseTermBuffer, i, j)) {
if (this.onlyLongestMatch) {
if (longestMatchToken!=null) {
- if (longestMatchToken.termLength() exit
if (hyphens == null) {
return;
}
final int[] hyp = hyphens.getHyphenationPoints();
- char[] lowerCaseTermBuffer=makeLowerCaseCopy(token.termBuffer());
+ char[] lowerCaseTermBuffer=makeLowerCaseCopy(token.buffer());
for (int i = 0; i < hyp.length; ++i) {
int remaining = hyp.length - i;
@@ -335,7 +335,7 @@ public class HyphenationCompoundWordTokenFilter extends
if (dictionary == null || dictionary.contains(lowerCaseTermBuffer, start, partLength)) {
if (this.onlyLongestMatch) {
if (longestMatchToken != null) {
- if (longestMatchToken.termLength() < partLength) {
+ if (longestMatchToken.length() < partLength) {
longestMatchToken = createToken(start, partLength, token);
}
} else {
@@ -352,7 +352,7 @@ public class HyphenationCompoundWordTokenFilter extends
// characters
if (this.onlyLongestMatch) {
if (longestMatchToken != null) {
- if (longestMatchToken.termLength() < partLength - 1) {
+ if (longestMatchToken.length() < partLength - 1) {
longestMatchToken = createToken(start, partLength - 1, token);
}
} else {
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemFilter.java
index 486d33c5c48..4f0e17fea39 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemFilter.java
@@ -6,7 +6,7 @@ import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; // for java
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@@ -37,23 +37,20 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
* @see KeywordMarkerFilter
*/
public final class CzechStemFilter extends TokenFilter {
- private final CzechStemmer stemmer;
- private final TermAttribute termAtt;
- private final KeywordAttribute keywordAttr;
+ private final CzechStemmer stemmer = new CzechStemmer();
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
public CzechStemFilter(TokenStream input) {
super(input);
- stemmer = new CzechStemmer();
- termAtt = addAttribute(TermAttribute.class);
- keywordAttr = addAttribute(KeywordAttribute.class);
}
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if(!keywordAttr.isKeyword()) {
- final int newlen = stemmer.stem(termAtt.termBuffer(), termAtt.termLength());
- termAtt.setTermLength(newlen);
+ final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length());
+ termAtt.setLength(newlen);
}
return true;
} else {
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java
index dcba092e318..be408321c0b 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java
@@ -24,7 +24,7 @@ import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; // for java
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* A {@link TokenFilter} that stems German words.
@@ -45,11 +45,11 @@ public final class GermanStemFilter extends TokenFilter
/**
* The actual token in the input stream.
*/
- private GermanStemmer stemmer = null;
+ private GermanStemmer stemmer = new GermanStemmer();
private Set> exclusionSet = null;
- private final TermAttribute termAtt;
- private final KeywordAttribute keywordAttr;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
/**
* Creates a {@link GermanStemFilter} instance
@@ -58,9 +58,6 @@ public final class GermanStemFilter extends TokenFilter
public GermanStemFilter( TokenStream in )
{
super(in);
- stemmer = new GermanStemmer();
- termAtt = addAttribute(TermAttribute.class);
- keywordAttr = addAttribute(KeywordAttribute.class);
}
/**
@@ -80,13 +77,13 @@ public final class GermanStemFilter extends TokenFilter
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
- String term = termAtt.term();
+ String term = termAtt.toString();
// Check the exclusion table.
if (!keywordAttr.isKeyword() && (exclusionSet == null || !exclusionSet.contains(term))) {
String s = stemmer.stem(term);
// If not stemmed, don't waste the time adjusting the token.
if ((s != null) && !s.equals(term))
- termAtt.setTermBuffer(s);
+ termAtt.setEmpty().append(s);
}
return true;
} else {
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilter.java
index 3ea5bd67e97..20c1b1720ca 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilter.java
@@ -21,7 +21,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* A {@link TokenFilter} that applies {@link PersianNormalizer} to normalize the
@@ -30,22 +30,19 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
*/
public final class PersianNormalizationFilter extends TokenFilter {
-
- private final PersianNormalizer normalizer;
- private final TermAttribute termAtt;
+ private final PersianNormalizer normalizer = new PersianNormalizer();
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
public PersianNormalizationFilter(TokenStream input) {
super(input);
- normalizer = new PersianNormalizer();
- termAtt = addAttribute(TermAttribute.class);
}
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
- final int newlen = normalizer.normalize(termAtt.termBuffer(), termAtt
- .termLength());
- termAtt.setTermLength(newlen);
+ final int newlen = normalizer.normalize(termAtt.buffer(),
+ termAtt.length());
+ termAtt.setLength(newlen);
return true;
}
return false;
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java
index bf2c9876716..97b7922b29c 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java
@@ -23,7 +23,7 @@ import java.util.Arrays;
import org.apache.lucene.analysis.standard.StandardTokenizer; // for javadocs
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.TokenFilter;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.Version;
@@ -37,7 +37,7 @@ import org.apache.lucene.util.Version;
*/
public final class ElisionFilter extends TokenFilter {
private CharArraySet articles = CharArraySet.EMPTY_SET;
- private final TermAttribute termAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet(
new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(
"l", "m", "t", "qu", "n", "s", "j"), true));
@@ -100,7 +100,6 @@ public final class ElisionFilter extends TokenFilter {
super(input);
this.articles = CharArraySet.unmodifiableSet(
new CharArraySet(matchVersion, articles, true));
- termAtt = addAttribute(TermAttribute.class);
}
/**
@@ -115,13 +114,13 @@ public final class ElisionFilter extends TokenFilter {
}
/**
- * Increments the {@link TokenStream} with a {@link TermAttribute} without elisioned start
+ * Increments the {@link TokenStream} with a {@link CharTermAttribute} without elisioned start
*/
@Override
public final boolean incrementToken() throws IOException {
if (input.incrementToken()) {
- char[] termBuffer = termAtt.termBuffer();
- int termLength = termAtt.termLength();
+ char[] termBuffer = termAtt.buffer();
+ int termLength = termAtt.length();
int minPoz = Integer.MAX_VALUE;
for (int i = 0; i < apostrophes.length; i++) {
@@ -137,8 +136,8 @@ public final class ElisionFilter extends TokenFilter {
// An apostrophe has been found. If the prefix is an article strip it off.
if (minPoz != Integer.MAX_VALUE
- && articles.contains(termAtt.termBuffer(), 0, minPoz)) {
- termAtt.setTermBuffer(termAtt.termBuffer(), minPoz + 1, termAtt.termLength() - (minPoz + 1));
+ && articles.contains(termAtt.buffer(), 0, minPoz)) {
+ termAtt.copyBuffer(termAtt.buffer(), minPoz + 1, termAtt.length() - (minPoz + 1));
}
return true;
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java
index fa61deae642..6e5d1ebd648 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java
@@ -22,7 +22,7 @@ import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import java.io.IOException;
import java.util.HashSet;
@@ -51,17 +51,14 @@ public final class FrenchStemFilter extends TokenFilter {
/**
* The actual token in the input stream.
*/
- private FrenchStemmer stemmer = null;
+ private FrenchStemmer stemmer = new FrenchStemmer();
private Set> exclusions = null;
- private final TermAttribute termAtt;
- private final KeywordAttribute keywordAttr;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
public FrenchStemFilter( TokenStream in ) {
- super(in);
- stemmer = new FrenchStemmer();
- termAtt = addAttribute(TermAttribute.class);
- keywordAttr = addAttribute(KeywordAttribute.class);
+ super(in);
}
/**
@@ -82,14 +79,14 @@ public final class FrenchStemFilter extends TokenFilter {
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
- String term = termAtt.term();
+ String term = termAtt.toString();
// Check the exclusion table
if ( !keywordAttr.isKeyword() && (exclusions == null || !exclusions.contains( term )) ) {
String s = stemmer.stem( term );
// If not stemmed, don't waste the time adjusting the token.
if ((s != null) && !s.equals( term ) )
- termAtt.setTermBuffer(s);
+ termAtt.setEmpty().append(s);
}
return true;
} else {
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilter.java
index cad610b561d..b8da0723ea3 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilter.java
@@ -23,7 +23,7 @@ import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; // javadoc
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* A {@link TokenFilter} that applies {@link HindiNormalizer} to normalize the
@@ -39,7 +39,7 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
public final class HindiNormalizationFilter extends TokenFilter {
private final HindiNormalizer normalizer = new HindiNormalizer();
- private final TermAttribute termAtt = addAttribute(TermAttribute.class);
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final KeywordAttribute keywordAtt = addAttribute(KeywordAttribute.class);
public HindiNormalizationFilter(TokenStream input) {
@@ -50,8 +50,8 @@ public final class HindiNormalizationFilter extends TokenFilter {
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (!keywordAtt.isKeyword())
- termAtt.setTermLength(normalizer.normalize(termAtt.termBuffer(),
- termAtt.termLength()));
+ termAtt.setLength(normalizer.normalize(termAtt.buffer(),
+ termAtt.length()));
return true;
}
return false;
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemFilter.java
index cd470a407cc..946fe347825 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemFilter.java
@@ -22,13 +22,13 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* A {@link TokenFilter} that applies {@link HindiStemmer} to stem Hindi words.
*/
public final class HindiStemFilter extends TokenFilter {
- private final TermAttribute termAtt = addAttribute(TermAttribute.class);
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final KeywordAttribute keywordAtt = addAttribute(KeywordAttribute.class);
private final HindiStemmer stemmer = new HindiStemmer();
@@ -40,7 +40,7 @@ public final class HindiStemFilter extends TokenFilter {
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (!keywordAtt.isKeyword())
- termAtt.setTermLength(stemmer.stem(termAtt.termBuffer(), termAtt.termLength()));
+ termAtt.setLength(stemmer.stem(termAtt.buffer(), termAtt.length()));
return true;
} else {
return false;
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizationFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizationFilter.java
index dd006349fad..de485b02c33 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizationFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizationFilter.java
@@ -21,14 +21,14 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* A {@link TokenFilter} that applies {@link IndicNormalizer} to normalize text
* in Indian Languages.
*/
public final class IndicNormalizationFilter extends TokenFilter {
- private final TermAttribute termAtt = addAttribute(TermAttribute.class);
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final IndicNormalizer normalizer = new IndicNormalizer();
public IndicNormalizationFilter(TokenStream input) {
@@ -38,7 +38,7 @@ public final class IndicNormalizationFilter extends TokenFilter {
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
- termAtt.setTermLength(normalizer.normalize(termAtt.termBuffer(), termAtt.termLength()));
+ termAtt.setLength(normalizer.normalize(termAtt.buffer(), termAtt.length()));
return true;
} else {
return false;
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java
index a4396da74e5..4d17af04eac 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java
@@ -30,8 +30,8 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.core.StopFilter;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.Version;
@@ -332,8 +332,8 @@ public final class PatternAnalyzer extends Analyzer {
private Matcher matcher;
private int pos = 0;
private static final Locale locale = Locale.getDefault();
- private TermAttribute termAtt = addAttribute(TermAttribute.class);
- private OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
public PatternTokenizer(String str, Pattern pattern, boolean toLowerCase) {
this.str = str;
@@ -360,7 +360,7 @@ public final class PatternAnalyzer extends Analyzer {
if (start != end) { // non-empty match (header/trailer)
String text = str.substring(start, end);
if (toLowerCase) text = text.toLowerCase(locale);
- termAtt.setTermBuffer(text);
+ termAtt.setEmpty().append(text);
offsetAtt.setOffset(start, end);
return true;
}
@@ -392,8 +392,8 @@ public final class PatternAnalyzer extends Analyzer {
private final boolean toLowerCase;
private final Set> stopWords;
private static final Locale locale = Locale.getDefault();
- private TermAttribute termAtt = addAttribute(TermAttribute.class);
- private OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
public FastStringTokenizer(String str, boolean isLetter, boolean toLowerCase, Set> stopWords) {
this.str = str;
@@ -446,7 +446,7 @@ public final class PatternAnalyzer extends Analyzer {
{
return false;
}
- termAtt.setTermBuffer(text);
+ termAtt.setEmpty().append(text);
offsetAtt.setOffset(start, i);
return true;
}
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java
index c2ee148e3f9..5fb20f56f8f 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java
@@ -23,7 +23,7 @@ import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.index.Payload;
@@ -44,14 +44,14 @@ public class PrefixAwareTokenFilter extends TokenStream {
private TokenStream prefix;
private TokenStream suffix;
- private TermAttribute termAtt;
+ private CharTermAttribute termAtt;
private PositionIncrementAttribute posIncrAtt;
private PayloadAttribute payloadAtt;
private OffsetAttribute offsetAtt;
private TypeAttribute typeAtt;
private FlagsAttribute flagsAtt;
- private TermAttribute p_termAtt;
+ private CharTermAttribute p_termAtt;
private PositionIncrementAttribute p_posIncrAtt;
private PayloadAttribute p_payloadAtt;
private OffsetAttribute p_offsetAtt;
@@ -64,14 +64,14 @@ public class PrefixAwareTokenFilter extends TokenStream {
this.prefix = prefix;
prefixExhausted = false;
- termAtt = addAttribute(TermAttribute.class);
+ termAtt = addAttribute(CharTermAttribute.class);
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
payloadAtt = addAttribute(PayloadAttribute.class);
offsetAtt = addAttribute(OffsetAttribute.class);
typeAtt = addAttribute(TypeAttribute.class);
flagsAtt = addAttribute(FlagsAttribute.class);
- p_termAtt = prefix.addAttribute(TermAttribute.class);
+ p_termAtt = prefix.addAttribute(CharTermAttribute.class);
p_posIncrAtt = prefix.addAttribute(PositionIncrementAttribute.class);
p_payloadAtt = prefix.addAttribute(PayloadAttribute.class);
p_offsetAtt = prefix.addAttribute(OffsetAttribute.class);
@@ -115,7 +115,7 @@ public class PrefixAwareTokenFilter extends TokenStream {
private void setCurrentToken(Token token) {
if (token == null) return;
clearAttributes();
- termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength());
+ termAtt.copyBuffer(token.buffer(), 0, token.length());
posIncrAtt.setPositionIncrement(token.getPositionIncrement());
flagsAtt.setFlags(token.getFlags());
offsetAtt.setOffset(token.startOffset(), token.endOffset());
@@ -125,7 +125,7 @@ public class PrefixAwareTokenFilter extends TokenStream {
private Token getNextPrefixInputToken(Token token) throws IOException {
if (!prefix.incrementToken()) return null;
- token.setTermBuffer(p_termAtt.termBuffer(), 0, p_termAtt.termLength());
+ token.copyBuffer(p_termAtt.buffer(), 0, p_termAtt.length());
token.setPositionIncrement(p_posIncrAtt.getPositionIncrement());
token.setFlags(p_flagsAtt.getFlags());
token.setOffset(p_offsetAtt.startOffset(), p_offsetAtt.endOffset());
@@ -136,7 +136,7 @@ public class PrefixAwareTokenFilter extends TokenStream {
private Token getNextSuffixInputToken(Token token) throws IOException {
if (!suffix.incrementToken()) return null;
- token.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength());
+ token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
token.setPositionIncrement(posIncrAtt.getPositionIncrement());
token.setFlags(flagsAtt.getFlags());
token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java
index 05de91a2063..8a401368ca6 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java
@@ -22,7 +22,7 @@ import java.io.IOException;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* A {@link TokenStream} containing a single token.
@@ -41,7 +41,7 @@ public final class SingleTokenTokenStream extends TokenStream {
assert token != null;
this.singleToken = (Token) token.clone();
- tokenAtt = (AttributeImpl) addAttribute(TermAttribute.class);
+ tokenAtt = (AttributeImpl) addAttribute(CharTermAttribute.class);
assert (tokenAtt instanceof Token);
}
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java
index 54cb798a0ce..65c0e160552 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java
@@ -23,7 +23,7 @@ import java.util.Map;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.util.CharArrayMap;
import org.apache.lucene.util.Version;
@@ -34,7 +34,7 @@ import org.apache.lucene.util.Version;
public final class StemmerOverrideFilter extends TokenFilter {
private final CharArrayMap dictionary;
- private final TermAttribute termAtt = addAttribute(TermAttribute.class);
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final KeywordAttribute keywordAtt = addAttribute(KeywordAttribute.class);
/**
@@ -56,9 +56,9 @@ public final class StemmerOverrideFilter extends TokenFilter {
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (!keywordAtt.isKeyword()) { // don't muck with already-keyworded terms
- String stem = dictionary.get(termAtt.termBuffer(), 0, termAtt.termLength());
+ String stem = dictionary.get(termAtt.buffer(), 0, termAtt.length());
if (stem != null) {
- termAtt.setTermBuffer(stem);
+ termAtt.setEmpty().append(stem);
keywordAtt.setKeyword(true);
}
}
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
index fa85dd8ec93..55fa29b777c 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java
@@ -20,7 +20,7 @@ package org.apache.lucene.analysis.ngram;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import java.io.IOException;
@@ -72,8 +72,8 @@ public final class EdgeNGramTokenFilter extends TokenFilter {
private int curGramSize;
private int tokStart;
- private final TermAttribute termAtt;
- private final OffsetAttribute offsetAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
/**
* Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given range
@@ -101,8 +101,6 @@ public final class EdgeNGramTokenFilter extends TokenFilter {
this.minGram = minGram;
this.maxGram = maxGram;
this.side = side;
- this.termAtt = addAttribute(TermAttribute.class);
- this.offsetAtt = addAttribute(OffsetAttribute.class);
}
/**
@@ -124,8 +122,8 @@ public final class EdgeNGramTokenFilter extends TokenFilter {
if (!input.incrementToken()) {
return false;
} else {
- curTermBuffer = termAtt.termBuffer().clone();
- curTermLength = termAtt.termLength();
+ curTermBuffer = termAtt.buffer().clone();
+ curTermLength = termAtt.length();
curGramSize = minGram;
tokStart = offsetAtt.startOffset();
}
@@ -138,7 +136,7 @@ public final class EdgeNGramTokenFilter extends TokenFilter {
int end = start + curGramSize;
clearAttributes();
offsetAtt.setOffset(tokStart + start, tokStart + end);
- termAtt.setTermBuffer(curTermBuffer, start, curGramSize);
+ termAtt.copyBuffer(curTermBuffer, start, curGramSize);
curGramSize++;
return true;
}
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java
index efb49a90519..b11814b0dc1 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java
@@ -18,8 +18,8 @@ package org.apache.lucene.analysis.ngram;
*/
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.util.AttributeSource;
import java.io.IOException;
@@ -37,8 +37,8 @@ public final class EdgeNGramTokenizer extends Tokenizer {
public static final int DEFAULT_MAX_GRAM_SIZE = 1;
public static final int DEFAULT_MIN_GRAM_SIZE = 1;
- private TermAttribute termAtt;
- private OffsetAttribute offsetAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
/** Specifies which side of the input the n-gram should be generated from */
public static enum Side {
@@ -173,10 +173,6 @@ public final class EdgeNGramTokenizer extends Tokenizer {
this.minGram = minGram;
this.maxGram = maxGram;
this.side = side;
-
- this.termAtt = addAttribute(TermAttribute.class);
- this.offsetAtt = addAttribute(OffsetAttribute.class);
-
}
/** Returns the next token in the stream, or null at EOS. */
@@ -206,7 +202,7 @@ public final class EdgeNGramTokenizer extends Tokenizer {
// grab gramSize chars from front or back
int start = side == Side.FRONT ? 0 : inLen - gramSize;
int end = start + gramSize;
- termAtt.setTermBuffer(inStr, start, gramSize);
+ termAtt.setEmpty().append(inStr, start, end);
offsetAtt.setOffset(correctOffset(start), correctOffset(end));
gramSize++;
return true;
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
index 41b956357ac..c73208bf36b 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java
@@ -22,7 +22,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* Tokenizes the input into n-grams of the given size(s).
@@ -39,8 +39,8 @@ public final class NGramTokenFilter extends TokenFilter {
private int curPos;
private int tokStart;
- private TermAttribute termAtt;
- private OffsetAttribute offsetAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
/**
* Creates NGramTokenFilter with given min and max n-grams.
@@ -58,9 +58,6 @@ public final class NGramTokenFilter extends TokenFilter {
}
this.minGram = minGram;
this.maxGram = maxGram;
-
- this.termAtt = addAttribute(TermAttribute.class);
- this.offsetAtt = addAttribute(OffsetAttribute.class);
}
/**
@@ -79,8 +76,8 @@ public final class NGramTokenFilter extends TokenFilter {
if (!input.incrementToken()) {
return false;
} else {
- curTermBuffer = termAtt.termBuffer().clone();
- curTermLength = termAtt.termLength();
+ curTermBuffer = termAtt.buffer().clone();
+ curTermLength = termAtt.length();
curGramSize = minGram;
curPos = 0;
tokStart = offsetAtt.startOffset();
@@ -89,7 +86,7 @@ public final class NGramTokenFilter extends TokenFilter {
while (curGramSize <= maxGram) {
while (curPos+curGramSize <= curTermLength) { // while there is input
clearAttributes();
- termAtt.setTermBuffer(curTermBuffer, curPos, curGramSize);
+ termAtt.copyBuffer(curTermBuffer, curPos, curGramSize);
offsetAtt.setOffset(tokStart + curPos, tokStart + curPos + curGramSize);
curPos++;
return true;
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
index 81804d937e4..e7137c9ecbe 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java
@@ -18,8 +18,8 @@ package org.apache.lucene.analysis.ngram;
*/
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.util.AttributeSource;
import java.io.IOException;
@@ -39,8 +39,8 @@ public final class NGramTokenizer extends Tokenizer {
private String inStr;
private boolean started = false;
- private TermAttribute termAtt;
- private OffsetAttribute offsetAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
/**
* Creates NGramTokenizer with given min and max n-grams.
@@ -94,9 +94,6 @@ public final class NGramTokenizer extends Tokenizer {
}
this.minGram = minGram;
this.maxGram = maxGram;
-
- this.termAtt = addAttribute(TermAttribute.class);
- this.offsetAtt = addAttribute(OffsetAttribute.class);
}
/** Returns the next token in the stream, or null at EOS. */
@@ -123,7 +120,7 @@ public final class NGramTokenizer extends Tokenizer {
int oldPos = pos;
pos++;
- termAtt.setTermBuffer(inStr, oldPos, gramSize);
+ termAtt.setEmpty().append(inStr, oldPos, oldPos+gramSize);
offsetAtt.setOffset(correctOffset(oldPos), correctOffset(oldPos+gramSize));
return true;
}
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java
index 65ab9cbec32..1b9d0d0e4f4 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java
@@ -28,7 +28,7 @@ import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* A {@link TokenFilter} that stems Dutch words.
@@ -52,17 +52,14 @@ public final class DutchStemFilter extends TokenFilter {
/**
* The actual token in the input stream.
*/
- private DutchStemmer stemmer = null;
+ private DutchStemmer stemmer = new DutchStemmer();
private Set> exclusions = null;
- private final TermAttribute termAtt;
- private final KeywordAttribute keywordAttr;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
public DutchStemFilter(TokenStream _in) {
super(_in);
- stemmer = new DutchStemmer();
- termAtt = addAttribute(TermAttribute.class);
- keywordAttr = addAttribute(KeywordAttribute.class);
}
/**
@@ -99,14 +96,14 @@ public final class DutchStemFilter extends TokenFilter {
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
- final String term = termAtt.term();
+ final String term = termAtt.toString();
// Check the exclusion table.
if (!keywordAttr.isKeyword() && (exclusions == null || !exclusions.contains(term))) {
final String s = stemmer.stem(term);
// If not stemmed, don't waste the time adjusting the token.
if ((s != null) && !s.equals(term))
- termAtt.setTermBuffer(s);
+ termAtt.setEmpty().append(s);
}
return true;
} else {
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java
index b032f76a8d8..1f86beb4eae 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java
@@ -21,7 +21,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
@@ -39,15 +39,13 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
public final class DelimitedPayloadTokenFilter extends TokenFilter {
public static final char DEFAULT_DELIMITER = '|';
private final char delimiter;
- private final TermAttribute termAtt;
- private final PayloadAttribute payAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final PayloadAttribute payAtt = addAttribute(PayloadAttribute.class);
private final PayloadEncoder encoder;
public DelimitedPayloadTokenFilter(TokenStream input, char delimiter, PayloadEncoder encoder) {
super(input);
- termAtt = addAttribute(TermAttribute.class);
- payAtt = addAttribute(PayloadAttribute.class);
this.delimiter = delimiter;
this.encoder = encoder;
}
@@ -55,12 +53,12 @@ public final class DelimitedPayloadTokenFilter extends TokenFilter {
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
- final char[] buffer = termAtt.termBuffer();
- final int length = termAtt.termLength();
+ final char[] buffer = termAtt.buffer();
+ final int length = termAtt.length();
for (int i = 0; i < length; i++) {
if (buffer[i] == delimiter) {
payAtt.setPayload(encoder.encode(buffer, i + 1, (length - (i + 1))));
- termAtt.setTermLength(i); // simply set a new length
+ termAtt.setLength(i); // simply set a new length
return true;
}
}
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java
index 9b2af214c4a..8ec5f700f76 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java
@@ -35,16 +35,14 @@ public class NumericPayloadTokenFilter extends TokenFilter {
private String typeMatch;
private Payload thePayload;
- private PayloadAttribute payloadAtt;
- private TypeAttribute typeAtt;
+ private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class);
+ private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
public NumericPayloadTokenFilter(TokenStream input, float payload, String typeMatch) {
super(input);
//Need to encode the payload
thePayload = new Payload(PayloadHelper.encodeFloat(payload));
this.typeMatch = typeMatch;
- payloadAtt = addAttribute(PayloadAttribute.class);
- typeAtt = addAttribute(TypeAttribute.class);
}
@Override
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java
index 28c590c1945..24c16db2c06 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java
@@ -33,13 +33,11 @@ import org.apache.lucene.index.Payload;
*
**/
public class TokenOffsetPayloadTokenFilter extends TokenFilter {
- protected OffsetAttribute offsetAtt;
- protected PayloadAttribute payAtt;
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ private final PayloadAttribute payAtt = addAttribute(PayloadAttribute.class);
public TokenOffsetPayloadTokenFilter(TokenStream input) {
super(input);
- offsetAtt = addAttribute(OffsetAttribute.class);
- payAtt = addAttribute(PayloadAttribute.class);
}
@Override
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java
index e7be3b38911..eaf7647adac 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java
@@ -33,13 +33,11 @@ import java.io.IOException;
*
**/
public class TypeAsPayloadTokenFilter extends TokenFilter {
- private PayloadAttribute payloadAtt;
- private TypeAttribute typeAtt;
+ private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class);
+ private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
public TypeAsPayloadTokenFilter(TokenStream input) {
super(input);
- payloadAtt = addAttribute(PayloadAttribute.class);
- typeAtt = addAttribute(TypeAttribute.class);
}
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java
index 90321dbecad..d7fea12b6c3 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java
@@ -35,7 +35,7 @@ public final class PositionFilter extends TokenFilter {
/** The first token must have non-zero positionIncrement **/
private boolean firstTokenPositioned = false;
- private PositionIncrementAttribute posIncrAtt;
+ private PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
/**
* Constructs a PositionFilter that assigns a position increment of zero to
@@ -45,7 +45,6 @@ public final class PositionFilter extends TokenFilter {
*/
public PositionFilter(final TokenStream input) {
super(input);
- posIncrAtt = addAttribute(PositionIncrementAttribute.class);
}
/**
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java
index ef9d2f996b2..7f67d0a7cee 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java
@@ -19,7 +19,7 @@ package org.apache.lucene.analysis.reverse;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.Version;
import java.io.IOException;
@@ -42,7 +42,7 @@ import java.io.IOException;
*/
public final class ReverseStringFilter extends TokenFilter {
- private TermAttribute termAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final char marker;
private final Version matchVersion;
private static final char NOMARKER = '\uFFFF';
@@ -131,20 +131,19 @@ public final class ReverseStringFilter extends TokenFilter {
super(in);
this.matchVersion = matchVersion;
this.marker = marker;
- termAtt = addAttribute(TermAttribute.class);
}
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
- int len = termAtt.termLength();
+ int len = termAtt.length();
if (marker != NOMARKER) {
len++;
- termAtt.resizeTermBuffer(len);
- termAtt.termBuffer()[len - 1] = marker;
+ termAtt.resizeBuffer(len);
+ termAtt.buffer()[len - 1] = marker;
}
- reverse( matchVersion, termAtt.termBuffer(), 0, len );
- termAtt.setTermLength(len);
+ reverse( matchVersion, termAtt.buffer(), 0, len );
+ termAtt.setLength(len);
return true;
} else {
return false;
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLowerCaseFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLowerCaseFilter.java
index 6b96e16220d..86f3e2b63df 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLowerCaseFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLowerCaseFilter.java
@@ -22,7 +22,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.LowerCaseFilter;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* Normalizes token text to lower case.
@@ -32,20 +32,19 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
@Deprecated
public final class RussianLowerCaseFilter extends TokenFilter
{
- private TermAttribute termAtt;
+ private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
public RussianLowerCaseFilter(TokenStream in)
{
super(in);
- termAtt = addAttribute(TermAttribute.class);
}
@Override
public final boolean incrementToken() throws IOException
{
if (input.incrementToken()) {
- char[] chArray = termAtt.termBuffer();
- int chLen = termAtt.termLength();
+ char[] chArray = termAtt.buffer();
+ int chLen = termAtt.length();
for (int i = 0; i < chLen; i++)
{
chArray[i] = Character.toLowerCase(chArray[i]);
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java
index 11655a87a6b..7e62e02d997 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java
@@ -22,7 +22,7 @@ import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; // for java
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.ru.RussianStemmer;//javadoc @link
import org.apache.lucene.analysis.snowball.SnowballFilter; // javadoc @link
@@ -51,17 +51,14 @@ public final class RussianStemFilter extends TokenFilter
/**
* The actual token in the input stream.
*/
- private RussianStemmer stemmer = null;
+ private RussianStemmer stemmer = new RussianStemmer();
- private final TermAttribute termAtt;
- private final KeywordAttribute keywordAttr;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
public RussianStemFilter(TokenStream in)
{
super(in);
- stemmer = new RussianStemmer();
- termAtt = addAttribute(TermAttribute.class);
- keywordAttr = addAttribute(KeywordAttribute.class);
}
/**
* Returns the next token in the stream, or null at EOS
@@ -71,10 +68,10 @@ public final class RussianStemFilter extends TokenFilter
{
if (input.incrementToken()) {
if(!keywordAttr.isKeyword()) {
- final String term = termAtt.term();
+ final String term = termAtt.toString();
final String s = stemmer.stem(term);
if (s != null && !s.equals(term))
- termAtt.setTermBuffer(s);
+ termAtt.setEmpty().append(s);
}
return true;
} else {
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java
index f0bf4871408..cccd8cd1c33 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java
@@ -137,10 +137,10 @@ public final class ShingleFilter extends TokenFilter {
*/
private boolean isOutputHere = false;
- private final CharTermAttribute termAtt;
- private final OffsetAttribute offsetAtt;
- private final PositionIncrementAttribute posIncrAtt;
- private final TypeAttribute typeAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+ private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
/**
@@ -155,10 +155,6 @@ public final class ShingleFilter extends TokenFilter {
super(input);
setMaxShingleSize(maxShingleSize);
setMinShingleSize(minShingleSize);
- this.termAtt = addAttribute(CharTermAttribute.class);
- this.offsetAtt = addAttribute(OffsetAttribute.class);
- this.posIncrAtt = addAttribute(PositionIncrementAttribute.class);
- this.typeAtt = addAttribute(TypeAttribute.class);
}
/**
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java
index ec6eee07dd4..a21ff3711e2 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java
@@ -31,11 +31,11 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.EmptyTokenStream;
import org.apache.lucene.analysis.payloads.PayloadHelper;
import org.apache.lucene.analysis.shingle.ShingleMatrixFilter.Matrix.Column.Row;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.index.Payload;
@@ -193,14 +193,14 @@ public final class ShingleMatrixFilter extends TokenStream {
private TokenStream input;
- private TermAttribute termAtt;
+ private CharTermAttribute termAtt;
private PositionIncrementAttribute posIncrAtt;
private PayloadAttribute payloadAtt;
private OffsetAttribute offsetAtt;
private TypeAttribute typeAtt;
private FlagsAttribute flagsAtt;
- private TermAttribute in_termAtt;
+ private CharTermAttribute in_termAtt;
private PositionIncrementAttribute in_posIncrAtt;
private PayloadAttribute in_payloadAtt;
private OffsetAttribute in_offsetAtt;
@@ -229,7 +229,7 @@ public final class ShingleMatrixFilter extends TokenStream {
this.ignoringSinglePrefixOrSuffixShingle = ignoringSinglePrefixOrSuffixShingle;
this.settingsCodec = settingsCodec;
- termAtt = addAttribute(TermAttribute.class);
+ termAtt = addAttribute(CharTermAttribute.class);
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
payloadAtt = addAttribute(PayloadAttribute.class);
offsetAtt = addAttribute(OffsetAttribute.class);
@@ -239,7 +239,7 @@ public final class ShingleMatrixFilter extends TokenStream {
// set the input to be an empty token stream, we already have the data.
this.input = new EmptyTokenStream();
- in_termAtt = input.addAttribute(TermAttribute.class);
+ in_termAtt = input.addAttribute(CharTermAttribute.class);
in_posIncrAtt = input.addAttribute(PositionIncrementAttribute.class);
in_payloadAtt = input.addAttribute(PayloadAttribute.class);
in_offsetAtt = input.addAttribute(OffsetAttribute.class);
@@ -311,14 +311,14 @@ public final class ShingleMatrixFilter extends TokenStream {
this.spacerCharacter = spacerCharacter;
this.ignoringSinglePrefixOrSuffixShingle = ignoringSinglePrefixOrSuffixShingle;
this.settingsCodec = settingsCodec;
- termAtt = addAttribute(TermAttribute.class);
+ termAtt = addAttribute(CharTermAttribute.class);
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
payloadAtt = addAttribute(PayloadAttribute.class);
offsetAtt = addAttribute(OffsetAttribute.class);
typeAtt = addAttribute(TypeAttribute.class);
flagsAtt = addAttribute(FlagsAttribute.class);
- in_termAtt = input.addAttribute(TermAttribute.class);
+ in_termAtt = input.addAttribute(CharTermAttribute.class);
in_posIncrAtt = input.addAttribute(PositionIncrementAttribute.class);
in_payloadAtt = input.addAttribute(PayloadAttribute.class);
in_offsetAtt = input.addAttribute(OffsetAttribute.class);
@@ -377,7 +377,7 @@ public final class ShingleMatrixFilter extends TokenStream {
if (token == null) return false;
clearAttributes();
- termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength());
+ termAtt.copyBuffer(token.buffer(), 0, token.length());
posIncrAtt.setPositionIncrement(token.getPositionIncrement());
flagsAtt.setFlags(token.getFlags());
offsetAtt.setOffset(token.startOffset(), token.endOffset());
@@ -388,7 +388,7 @@ public final class ShingleMatrixFilter extends TokenStream {
private Token getNextInputToken(Token token) throws IOException {
if (!input.incrementToken()) return null;
- token.setTermBuffer(in_termAtt.termBuffer(), 0, in_termAtt.termLength());
+ token.copyBuffer(in_termAtt.buffer(), 0, in_termAtt.length());
token.setPositionIncrement(in_posIncrAtt.getPositionIncrement());
token.setFlags(in_flagsAtt.getFlags());
token.setOffset(in_offsetAtt.startOffset(), in_offsetAtt.endOffset());
@@ -399,7 +399,7 @@ public final class ShingleMatrixFilter extends TokenStream {
private Token getNextToken(Token token) throws IOException {
if (!this.incrementToken()) return null;
- token.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength());
+ token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
token.setPositionIncrement(posIncrAtt.getPositionIncrement());
token.setFlags(flagsAtt.getFlags());
token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
@@ -441,7 +441,7 @@ public final class ShingleMatrixFilter extends TokenStream {
for (int i = 0; i < currentShingleLength; i++) {
Token shingleToken = currentPermuationTokens.get(i + currentPermutationTokensStartOffset);
- termLength += shingleToken.termLength();
+ termLength += shingleToken.length();
shingle.add(shingleToken);
}
if (spacerCharacter != null) {
@@ -459,9 +459,9 @@ public final class ShingleMatrixFilter extends TokenStream {
if (spacerCharacter != null && sb.length() > 0) {
sb.append(spacerCharacter);
}
- sb.append(shingleToken.termBuffer(), 0, shingleToken.termLength());
+ sb.append(shingleToken.buffer(), 0, shingleToken.length());
}
- reusableToken.setTermBuffer(sb.toString());
+ reusableToken.setEmpty().append(sb);
updateToken(reusableToken, shingle, currentPermutationTokensStartOffset, currentPermutationRows, currentPermuationTokens);
return reusableToken;
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkFilter.java
index 9270e974ec8..63142468cec 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkFilter.java
@@ -21,7 +21,7 @@ import java.text.DateFormat;
import java.text.ParseException;
import java.util.Date;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.AttributeSource;
/**
@@ -34,7 +34,7 @@ public class DateRecognizerSinkFilter extends TeeSinkTokenFilter.SinkFilter {
public static final String DATE_TYPE = "date";
protected DateFormat dateFormat;
- protected TermAttribute termAtt;
+ protected CharTermAttribute termAtt;
/**
* Uses {@link java.text.SimpleDateFormat#getDateInstance()} as the {@link java.text.DateFormat} object.
@@ -50,10 +50,10 @@ public class DateRecognizerSinkFilter extends TeeSinkTokenFilter.SinkFilter {
@Override
public boolean accept(AttributeSource source) {
if (termAtt == null) {
- termAtt = source.addAttribute(TermAttribute.class);
+ termAtt = source.addAttribute(CharTermAttribute.class);
}
try {
- Date date = dateFormat.parse(termAtt.term());//We don't care about the date, just that we can parse it as a date
+ Date date = dateFormat.parse(termAtt.toString());//We don't care about the date, just that we can parse it as a date
if (date != null) {
return true;
}
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java
index feea896d959..c69d4707bb4 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java
@@ -23,7 +23,7 @@ import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter; // javadoc @link
import org.tartarus.snowball.SnowballProgram;
@@ -42,7 +42,7 @@ public final class SnowballFilter extends TokenFilter {
private final SnowballProgram stemmer;
- private final TermAttribute termAtt = addAttribute(TermAttribute.class);
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class);
public SnowballFilter(TokenStream input, SnowballProgram stemmer) {
@@ -76,16 +76,16 @@ public final class SnowballFilter extends TokenFilter {
public final boolean incrementToken() throws IOException {
if (input.incrementToken()) {
if (!keywordAttr.isKeyword()) {
- char termBuffer[] = termAtt.termBuffer();
- final int length = termAtt.termLength();
+ char termBuffer[] = termAtt.buffer();
+ final int length = termAtt.length();
stemmer.setCurrent(termBuffer, length);
stemmer.stem();
final char finalTerm[] = stemmer.getCurrentBuffer();
final int newLength = stemmer.getCurrentBufferLength();
if (finalTerm != termBuffer)
- termAtt.setTermBuffer(finalTerm, 0, newLength);
+ termAtt.copyBuffer(finalTerm, 0, newLength);
else
- termAtt.setTermLength(newLength);
+ termAtt.setLength(newLength);
}
return true;
} else {
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java
index 4dcc21178b3..7b6a5ca45ca 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java
@@ -133,7 +133,7 @@ public final class SynonymFilter extends TokenFilter {
OffsetAttribute lastOffsetAtt = lastTok.addAttribute(OffsetAttribute.class);
newOffsetAtt.setOffset(newOffsetAtt.startOffset(), lastOffsetAtt.endOffset());
- newTermAtt.copyBuffer(repTok.termBuffer(), 0, repTok.termLength());
+ newTermAtt.copyBuffer(repTok.buffer(), 0, repTok.length());
repPos += repTok.getPositionIncrement();
if (i==0) repPos=origPos; // make position of first token equal to original
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java
index 1959b6e021c..e3a60abe6e4 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java
@@ -103,8 +103,7 @@ public class SynonymMap {
List ret = new ArrayList(strings.size());
for (String str : strings) {
//Token newTok = new Token(str,0,0,"SYNONYM");
- Token newTok = new Token(0,0,"SYNONYM");
- newTok.setTermBuffer(str.toCharArray(), 0, str.length());
+ Token newTok = new Token(str, 0,0,"SYNONYM");
ret.add(newTok);
}
return ret;
@@ -137,7 +136,7 @@ public class SynonymMap {
while(tok1!=null || tok2!=null) {
while (tok1 != null && (pos1 <= pos2 || tok2==null)) {
Token tok = new Token(tok1.startOffset(), tok1.endOffset(), tok1.type());
- tok.setTermBuffer(tok1.termBuffer(), 0, tok1.termLength());
+ tok.copyBuffer(tok1.buffer(), 0, tok1.length());
tok.setPositionIncrement(pos1-pos);
result.add(tok);
pos=pos1;
@@ -146,7 +145,7 @@ public class SynonymMap {
}
while (tok2 != null && (pos2 <= pos1 || tok1==null)) {
Token tok = new Token(tok2.startOffset(), tok2.endOffset(), tok2.type());
- tok.setTermBuffer(tok2.termBuffer(), 0, tok2.termLength());
+ tok.copyBuffer(tok2.buffer(), 0, tok2.length());
tok.setPositionIncrement(pos2-pos);
result.add(tok);
pos=pos2;
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilter.java
index 6b9cf374582..923c4fcbe8e 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilter.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilter.java
@@ -21,7 +21,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* Normalizes Turkish token text to lower case.
@@ -37,7 +37,7 @@ public final class TurkishLowerCaseFilter extends TokenFilter {
private static final int LATIN_SMALL_LETTER_I = '\u0069';
private static final int LATIN_SMALL_LETTER_DOTLESS_I = '\u0131';
private static final int COMBINING_DOT_ABOVE = '\u0307';
- private final TermAttribute termAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
/**
* Create a new TurkishLowerCaseFilter, that normalizes Turkish token text
@@ -47,7 +47,6 @@ public final class TurkishLowerCaseFilter extends TokenFilter {
*/
public TurkishLowerCaseFilter(TokenStream in) {
super(in);
- termAtt = addAttribute(TermAttribute.class);
}
@Override
@@ -55,8 +54,8 @@ public final class TurkishLowerCaseFilter extends TokenFilter {
boolean iOrAfter = false;
if (input.incrementToken()) {
- final char[] buffer = termAtt.termBuffer();
- int length = termAtt.termLength();
+ final char[] buffer = termAtt.buffer();
+ int length = termAtt.length();
for (int i = 0; i < length;) {
final int ch = Character.codePointAt(buffer, i);
@@ -88,7 +87,7 @@ public final class TurkishLowerCaseFilter extends TokenFilter {
i += Character.toChars(Character.toLowerCase(ch), buffer, i);
}
- termAtt.setTermLength(length);
+ termAtt.setLength(length);
return true;
} else
return false;
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java
index 4ff201d5caa..0d4cae87d3f 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java
@@ -18,10 +18,10 @@
package org.apache.lucene.analysis.wikipedia;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeSource;
@@ -116,11 +116,11 @@ public final class WikipediaTokenizer extends Tokenizer {
private Set untokenizedTypes = Collections.emptySet();
private Iterator tokens = null;
- private OffsetAttribute offsetAtt;
- private TypeAttribute typeAtt;
- private PositionIncrementAttribute posIncrAtt;
- private TermAttribute termAtt;
- private FlagsAttribute flagsAtt;
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
+ private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class);
/**
* Creates a new instance of the {@link WikipediaTokenizer}. Attaches the
@@ -176,12 +176,7 @@ public final class WikipediaTokenizer extends Tokenizer {
private void init(int tokenOutput, Set untokenizedTypes) {
this.tokenOutput = tokenOutput;
- this.untokenizedTypes = untokenizedTypes;
- this.offsetAtt = addAttribute(OffsetAttribute.class);
- this.typeAtt = addAttribute(TypeAttribute.class);
- this.posIncrAtt = addAttribute(PositionIncrementAttribute.class);
- this.termAtt = addAttribute(TermAttribute.class);
- this.flagsAtt = addAttribute(FlagsAttribute.class);
+ this.untokenizedTypes = untokenizedTypes;
}
/*
@@ -245,8 +240,9 @@ public final class WikipediaTokenizer extends Tokenizer {
lastPos = currPos + numAdded;
}
//trim the buffer
+ // TODO: this is inefficient
String s = buffer.toString().trim();
- termAtt.setTermBuffer(s.toCharArray(), 0, s.length());
+ termAtt.setEmpty().append(s);
offsetAtt.setOffset(correctOffset(theStart), correctOffset(theStart + s.length()));
flagsAtt.setFlags(UNTOKENIZED_TOKEN_FLAG);
//The way the loop is written, we will have proceeded to the next token. We need to pushback the scanner to lastPos
@@ -283,8 +279,9 @@ public final class WikipediaTokenizer extends Tokenizer {
lastPos = currPos + numAdded;
}
//trim the buffer
+ // TODO: this is inefficient
String s = buffer.toString().trim();
- termAtt.setTermBuffer(s.toCharArray(), 0, s.length());
+ termAtt.setEmpty().append(s);
offsetAtt.setOffset(correctOffset(theStart), correctOffset(theStart + s.length()));
flagsAtt.setFlags(UNTOKENIZED_TOKEN_FLAG);
//The way the loop is written, we will have proceeded to the next token. We need to pushback the scanner to lastPos
@@ -298,7 +295,7 @@ public final class WikipediaTokenizer extends Tokenizer {
private void setupToken() {
scanner.getText(termAtt);
final int start = scanner.yychar();
- offsetAtt.setOffset(correctOffset(start), correctOffset(start + termAtt.termLength()));
+ offsetAtt.setOffset(correctOffset(start), correctOffset(start + termAtt.length()));
}
/*
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java
index 6b703a03a89..34735b05e08 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java
@@ -1,4 +1,4 @@
-/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 17.05.10 14:51 */
+/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 5/31/10 3:11 PM */
package org.apache.lucene.analysis.wikipedia;
@@ -19,14 +19,14 @@ package org.apache.lucene.analysis.wikipedia;
* limitations under the License.
*/
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
/**
* This class is a scanner generated by
* JFlex 1.5.0-SNAPSHOT
- * on 17.05.10 14:51 from the specification file
- * C:/Users/Uwe Schindler/Projects/lucene/newtrunk/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex
+ * on 5/31/10 3:11 PM from the specification file
+ * C:/Users/rmuir/workspace/solrcene/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex
*/
class WikipediaTokenizerImpl {
@@ -37,16 +37,16 @@ class WikipediaTokenizerImpl {
private static final int ZZ_BUFFERSIZE = 16384;
/** lexical states */
- public static final int CATEGORY_STATE = 2;
- public static final int DOUBLE_EQUALS_STATE = 14;
+ public static final int THREE_SINGLE_QUOTES_STATE = 10;
public static final int EXTERNAL_LINK_STATE = 6;
+ public static final int DOUBLE_EQUALS_STATE = 14;
public static final int INTERNAL_LINK_STATE = 4;
public static final int DOUBLE_BRACE_STATE = 16;
- public static final int FIVE_SINGLE_QUOTES_STATE = 12;
- public static final int STRING = 18;
- public static final int TWO_SINGLE_QUOTES_STATE = 8;
+ public static final int CATEGORY_STATE = 2;
public static final int YYINITIAL = 0;
- public static final int THREE_SINGLE_QUOTES_STATE = 10;
+ public static final int STRING = 18;
+ public static final int FIVE_SINGLE_QUOTES_STATE = 12;
+ public static final int TWO_SINGLE_QUOTES_STATE = 8;
/**
* ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l
@@ -487,8 +487,8 @@ public final int getPositionIncrement(){
/**
* Fills Lucene token with the current token text.
*/
-final void getText(TermAttribute t) {
- t.setTermBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
+final void getText(CharTermAttribute t) {
+ t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
}
final int setText(StringBuilder buffer){
@@ -803,184 +803,184 @@ final int setText(StringBuilder buffer){
zzMarkedPos = zzMarkedPosL;
switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) {
- case 25:
- { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);
- }
- case 46: break;
- case 30:
- { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL);/*end italics*/
- }
- case 47: break;
- case 41:
- { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL);/*end bold italics*/
- }
- case 48: break;
- case 14:
- { yybegin(STRING); numWikiTokensSeen++; return currentTokType;
- }
- case 49: break;
- case 23:
- { numWikiTokensSeen = 0; positionInc = 1; yybegin(DOUBLE_EQUALS_STATE);
- }
- case 50: break;
- case 34:
- { positionInc = 1; return NUM;
- }
- case 51: break;
- case 18:
- { /* ignore STRING */
- }
- case 52: break;
- case 12:
- { currentTokType = ITALICS; numWikiTokensSeen++; yybegin(STRING); return currentTokType;/*italics*/
- }
- case 53: break;
- case 37:
- { numBalanced = 0;currentTokType = ALPHANUM;yybegin(YYINITIAL);/*end bold*/
- }
- case 54: break;
- case 31:
- { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = INTERNAL_LINK;yybegin(INTERNAL_LINK_STATE);
- }
- case 55: break;
- case 10:
- { numLinkToks = 0; positionInc = 0; yybegin(YYINITIAL);
- }
- case 56: break;
- case 38:
- { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL);/*end sub header*/
- }
- case 57: break;
- case 19:
- { yybegin(STRING); numWikiTokensSeen++; return currentTokType;/* STRING ALPHANUM*/
- }
- case 58: break;
- case 11:
- { currentTokType = BOLD; yybegin(THREE_SINGLE_QUOTES_STATE);
- }
- case 59: break;
- case 1:
- { numWikiTokensSeen = 0; positionInc = 1;
- }
- case 60: break;
- case 33:
- { positionInc = 1; return HOST;
- }
- case 61: break;
- case 3:
- { positionInc = 1; return CJ;
- }
- case 62: break;
- case 17:
- { yybegin(DOUBLE_BRACE_STATE); numWikiTokensSeen = 0; return currentTokType;
- }
- case 63: break;
- case 32:
- { positionInc = 1; return APOSTROPHE;
- }
- case 64: break;
- case 8:
- { /* ignore */
- }
- case 65: break;
- case 4:
- { numWikiTokensSeen = 0; positionInc = 1; currentTokType = EXTERNAL_LINK_URL; yybegin(EXTERNAL_LINK_STATE);
- }
- case 66: break;
- case 2:
- { positionInc = 1; return ALPHANUM;
- }
- case 67: break;
- case 26:
- { yybegin(YYINITIAL);
- }
- case 68: break;
- case 43:
- { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CATEGORY; yybegin(CATEGORY_STATE);
- }
- case 69: break;
- case 36:
- { currentTokType = BOLD_ITALICS; yybegin(FIVE_SINGLE_QUOTES_STATE);
- }
- case 70: break;
- case 13:
- { currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE);
- }
- case 71: break;
- case 24:
- { numWikiTokensSeen = 0; positionInc = 1; currentTokType = INTERNAL_LINK; yybegin(INTERNAL_LINK_STATE);
- }
- case 72: break;
- case 27:
- { numLinkToks = 0; yybegin(YYINITIAL);
- }
- case 73: break;
- case 15:
- { currentTokType = SUB_HEADING; numWikiTokensSeen = 0; yybegin(STRING);
- }
- case 74: break;
- case 28:
- { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE);
- }
- case 75: break;
- case 39:
- { positionInc = 1; return ACRONYM;
- }
- case 76: break;
- case 29:
- { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE);
- }
- case 77: break;
- case 7:
- { yybegin(INTERNAL_LINK_STATE); numWikiTokensSeen++; return currentTokType;
- }
- case 78: break;
case 16:
{ currentTokType = HEADING; yybegin(DOUBLE_EQUALS_STATE); numWikiTokensSeen++; return currentTokType;
}
- case 79: break;
+ case 46: break;
+ case 39:
+ { positionInc = 1; return ACRONYM;
+ }
+ case 47: break;
+ case 8:
+ { /* ignore */
+ }
+ case 48: break;
case 20:
{ numBalanced = 0; numWikiTokensSeen = 0; currentTokType = EXTERNAL_LINK;yybegin(EXTERNAL_LINK_STATE);
}
- case 80: break;
+ case 49: break;
case 35:
{ positionInc = 1; return COMPANY;
}
+ case 50: break;
+ case 4:
+ { numWikiTokensSeen = 0; positionInc = 1; currentTokType = EXTERNAL_LINK_URL; yybegin(EXTERNAL_LINK_STATE);
+ }
+ case 51: break;
+ case 25:
+ { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);
+ }
+ case 52: break;
+ case 43:
+ { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CATEGORY; yybegin(CATEGORY_STATE);
+ }
+ case 53: break;
+ case 22:
+ { numWikiTokensSeen = 0; positionInc = 1; if (numBalanced == 0){numBalanced++;yybegin(TWO_SINGLE_QUOTES_STATE);} else{numBalanced = 0;}
+ }
+ case 54: break;
+ case 34:
+ { positionInc = 1; return NUM;
+ }
+ case 55: break;
+ case 32:
+ { positionInc = 1; return APOSTROPHE;
+ }
+ case 56: break;
+ case 23:
+ { numWikiTokensSeen = 0; positionInc = 1; yybegin(DOUBLE_EQUALS_STATE);
+ }
+ case 57: break;
+ case 21:
+ { yybegin(STRING); return currentTokType;/*pipe*/
+ }
+ case 58: break;
+ case 2:
+ { positionInc = 1; return ALPHANUM;
+ }
+ case 59: break;
+ case 29:
+ { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE);
+ }
+ case 60: break;
+ case 17:
+ { yybegin(DOUBLE_BRACE_STATE); numWikiTokensSeen = 0; return currentTokType;
+ }
+ case 61: break;
+ case 44:
+ { currentTokType = CATEGORY; numWikiTokensSeen = 0; yybegin(CATEGORY_STATE);
+ }
+ case 62: break;
+ case 26:
+ { yybegin(YYINITIAL);
+ }
+ case 63: break;
+ case 3:
+ { positionInc = 1; return CJ;
+ }
+ case 64: break;
+ case 38:
+ { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL);/*end sub header*/
+ }
+ case 65: break;
+ case 15:
+ { currentTokType = SUB_HEADING; numWikiTokensSeen = 0; yybegin(STRING);
+ }
+ case 66: break;
+ case 30:
+ { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL);/*end italics*/
+ }
+ case 67: break;
+ case 6:
+ { yybegin(CATEGORY_STATE); numWikiTokensSeen++; return currentTokType;
+ }
+ case 68: break;
+ case 5:
+ { positionInc = 1;
+ }
+ case 69: break;
+ case 19:
+ { yybegin(STRING); numWikiTokensSeen++; return currentTokType;/* STRING ALPHANUM*/
+ }
+ case 70: break;
+ case 42:
+ { positionInc = 1; numWikiTokensSeen++; yybegin(EXTERNAL_LINK_STATE); return currentTokType;
+ }
+ case 71: break;
+ case 27:
+ { numLinkToks = 0; yybegin(YYINITIAL);
+ }
+ case 72: break;
+ case 11:
+ { currentTokType = BOLD; yybegin(THREE_SINGLE_QUOTES_STATE);
+ }
+ case 73: break;
+ case 13:
+ { currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE);
+ }
+ case 74: break;
+ case 14:
+ { yybegin(STRING); numWikiTokensSeen++; return currentTokType;
+ }
+ case 75: break;
+ case 45:
+ { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = CATEGORY;yybegin(CATEGORY_STATE);
+ }
+ case 76: break;
+ case 28:
+ { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE);
+ }
+ case 77: break;
+ case 37:
+ { numBalanced = 0;currentTokType = ALPHANUM;yybegin(YYINITIAL);/*end bold*/
+ }
+ case 78: break;
+ case 9:
+ { if (numLinkToks == 0){positionInc = 0;} else{positionInc = 1;} numWikiTokensSeen++; currentTokType = EXTERNAL_LINK; yybegin(EXTERNAL_LINK_STATE); numLinkToks++; return currentTokType;
+ }
+ case 79: break;
+ case 7:
+ { yybegin(INTERNAL_LINK_STATE); numWikiTokensSeen++; return currentTokType;
+ }
+ case 80: break;
+ case 24:
+ { numWikiTokensSeen = 0; positionInc = 1; currentTokType = INTERNAL_LINK; yybegin(INTERNAL_LINK_STATE);
+ }
case 81: break;
case 40:
{ positionInc = 1; return EMAIL;
}
case 82: break;
- case 42:
- { positionInc = 1; numWikiTokensSeen++; yybegin(EXTERNAL_LINK_STATE); return currentTokType;
+ case 1:
+ { numWikiTokensSeen = 0; positionInc = 1;
}
case 83: break;
- case 6:
- { yybegin(CATEGORY_STATE); numWikiTokensSeen++; return currentTokType;
+ case 18:
+ { /* ignore STRING */
}
case 84: break;
- case 44:
- { currentTokType = CATEGORY; numWikiTokensSeen = 0; yybegin(CATEGORY_STATE);
+ case 36:
+ { currentTokType = BOLD_ITALICS; yybegin(FIVE_SINGLE_QUOTES_STATE);
}
case 85: break;
- case 5:
- { positionInc = 1;
+ case 33:
+ { positionInc = 1; return HOST;
}
case 86: break;
- case 9:
- { if (numLinkToks == 0){positionInc = 0;} else{positionInc = 1;} numWikiTokensSeen++; currentTokType = EXTERNAL_LINK; yybegin(EXTERNAL_LINK_STATE); numLinkToks++; return currentTokType;
+ case 31:
+ { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = INTERNAL_LINK;yybegin(INTERNAL_LINK_STATE);
}
case 87: break;
- case 45:
- { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = CATEGORY;yybegin(CATEGORY_STATE);
+ case 41:
+ { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL);/*end bold italics*/
}
case 88: break;
- case 22:
- { numWikiTokensSeen = 0; positionInc = 1; if (numBalanced == 0){numBalanced++;yybegin(TWO_SINGLE_QUOTES_STATE);} else{numBalanced = 0;}
+ case 12:
+ { currentTokType = ITALICS; numWikiTokensSeen++; yybegin(STRING); return currentTokType;/*italics*/
}
case 89: break;
- case 21:
- { yybegin(STRING); return currentTokType;/*pipe*/
+ case 10:
+ { numLinkToks = 0; positionInc = 0; yybegin(YYINITIAL);
}
case 90: break;
default:
diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex b/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex
index d012a59e71a..477c55bd030 100644
--- a/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex
+++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex
@@ -17,7 +17,7 @@ package org.apache.lucene.analysis.wikipedia;
* limitations under the License.
*/
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
%%
@@ -81,8 +81,8 @@ public final int getPositionIncrement(){
/**
* Fills Lucene token with the current token text.
*/
-final void getText(TermAttribute t) {
- t.setTermBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
+final void getText(CharTermAttribute t) {
+ t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
}
final int setText(StringBuilder buffer){
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
index 28bfbf69572..ebf5f541449 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java
@@ -17,8 +17,6 @@ package org.apache.lucene.analysis.compound;
* limitations under the License.
*/
-import java.io.File;
-import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
@@ -27,7 +25,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
public void testHyphenationCompoundWordsDA() throws Exception {
@@ -176,15 +174,15 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase {
CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE,
CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false);
- TermAttribute termAtt = tf.getAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = tf.getAttribute(CharTermAttribute.class);
assertTrue(tf.incrementToken());
- assertEquals("Rindfleischüberwachungsgesetz", termAtt.term());
+ assertEquals("Rindfleischüberwachungsgesetz", termAtt.toString());
assertTrue(tf.incrementToken());
- assertEquals("Rind", termAtt.term());
+ assertEquals("Rind", termAtt.toString());
wsTokenizer.reset(new StringReader("Rindfleischüberwachungsgesetz"));
tf.reset();
assertTrue(tf.incrementToken());
- assertEquals("Rindfleischüberwachungsgesetz", termAtt.term());
+ assertEquals("Rindfleischüberwachungsgesetz", termAtt.toString());
}
private Reader getHyphenationReader() throws Exception {
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestElision.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestElision.java
index d7b23c8069a..42514679e46 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestElision.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestElision.java
@@ -28,6 +28,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
/**
@@ -50,9 +51,9 @@ public class TestElision extends BaseTokenStreamTestCase {
private List filter(TokenFilter filter) throws IOException {
List tas = new ArrayList();
- TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
while (filter.incrementToken()) {
- tas.add(termAtt.term());
+ tas.add(termAtt.toString());
}
return tas;
}
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java
index a266fff3395..80da0991355 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java
@@ -41,8 +41,6 @@ public class TestPrefixAndSuffixAwareTokenFilter extends BaseTokenStreamTestCase
private static Token createToken(String term, int start, int offset)
{
- Token token = new Token(start, offset);
- token.setTermBuffer(term);
- return token;
+ return new Token(term, start, offset);
}
}
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java
index c7c9ae5efba..e470e3e28f5 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java
@@ -52,8 +52,6 @@ public class TestPrefixAwareTokenFilter extends BaseTokenStreamTestCase {
private static Token createToken(String term, int start, int offset)
{
- Token token = new Token(start, offset);
- token.setTermBuffer(term);
- return token;
+ return new Token(term, start, offset);
}
}
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilter.java
index 75b8b88cb69..946f9787c4c 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilter.java
@@ -51,7 +51,7 @@ public class TestRemoveDuplicatesTokenFilter extends BaseTokenStreamTestCase {
if (toks.hasNext()) {
clearAttributes();
Token tok = toks.next();
- termAtt.setEmpty().append(tok.term());
+ termAtt.setEmpty().append(tok);
offsetAtt.setOffset(tok.startOffset(), tok.endOffset());
posIncAtt.setPositionIncrement(tok.getPositionIncrement());
return true;
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestSingleTokenTokenFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestSingleTokenTokenFilter.java
index 1253e56057a..094378629da 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestSingleTokenTokenFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestSingleTokenTokenFilter.java
@@ -22,14 +22,14 @@ import java.io.IOException;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.AttributeImpl;
import org.apache.lucene.analysis.Token;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
public class TestSingleTokenTokenFilter extends LuceneTestCase {
public void test() throws IOException {
Token token = new Token();
SingleTokenTokenStream ts = new SingleTokenTokenStream(token);
- AttributeImpl tokenAtt = (AttributeImpl) ts.addAttribute(TermAttribute.class);
+ AttributeImpl tokenAtt = (AttributeImpl) ts.addAttribute(CharTermAttribute.class);
assertTrue(tokenAtt instanceof Token);
ts.reset();
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java
index 6439d6bb76b..9b4d31d877f 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java
@@ -97,7 +97,7 @@ public class TestTrimFilter extends BaseTokenStreamTestCase {
else {
clearAttributes();
Token token = tokens[index++];
- termAtt.setEmpty().append(token.term());
+ termAtt.setEmpty().append(token);
offsetAtt.setOffset(token.startOffset(), token.endOffset());
posIncAtt.setPositionIncrement(token.getPositionIncrement());
flagsAtt.setFlags(token.getFlags());
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java
index dc1e53fb5de..3e0ed8b5f61 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java
@@ -18,8 +18,8 @@ package org.apache.lucene.analysis.payloads;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.index.Payload;
import org.apache.lucene.util.LuceneTestCase;
@@ -32,7 +32,7 @@ public class DelimitedPayloadTokenFilterTest extends LuceneTestCase {
DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter
(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)),
DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder());
- TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
assertTermEquals("The", filter, termAtt, payAtt, null);
assertTermEquals("quick", filter, termAtt, payAtt, "JJ".getBytes("UTF-8"));
@@ -70,7 +70,7 @@ public class DelimitedPayloadTokenFilterTest extends LuceneTestCase {
public void testFloatEncoding() throws Exception {
String test = "The quick|1.0 red|2.0 fox|3.5 jumped|0.5 over the lazy|5 brown|99.3 dogs|83.7";
DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)), '|', new FloatEncoder());
- TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
assertTermEquals("The", filter, termAtt, payAtt, null);
assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeFloat(1.0f));
@@ -88,7 +88,7 @@ public class DelimitedPayloadTokenFilterTest extends LuceneTestCase {
public void testIntEncoding() throws Exception {
String test = "The quick|1 red|2 fox|3 jumped over the lazy|5 brown|99 dogs|83";
DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)), '|', new IntegerEncoder());
- TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class);
assertTermEquals("The", filter, termAtt, payAtt, null);
assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeInt(1));
@@ -104,10 +104,10 @@ public class DelimitedPayloadTokenFilterTest extends LuceneTestCase {
}
void assertTermEquals(String expected, TokenStream stream, byte[] expectPay) throws Exception {
- TermAttribute termAtt = stream.getAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class);
PayloadAttribute payloadAtt = stream.getAttribute(PayloadAttribute.class);
assertTrue(stream.incrementToken());
- assertEquals(expected, termAtt.term());
+ assertEquals(expected, termAtt.toString());
Payload payload = payloadAtt.getPayload();
if (payload != null) {
assertTrue(payload.length() + " does not equal: " + expectPay.length, payload.length() == expectPay.length);
@@ -121,9 +121,9 @@ public class DelimitedPayloadTokenFilterTest extends LuceneTestCase {
}
- void assertTermEquals(String expected, TokenStream stream, TermAttribute termAtt, PayloadAttribute payAtt, byte[] expectPay) throws Exception {
+ void assertTermEquals(String expected, TokenStream stream, CharTermAttribute termAtt, PayloadAttribute payAtt, byte[] expectPay) throws Exception {
assertTrue(stream.incrementToken());
- assertEquals(expected, termAtt.term());
+ assertEquals(expected, termAtt.toString());
Payload payload = payAtt.getPayload();
if (payload != null) {
assertTrue(payload.length() + " does not equal: " + expectPay.length, payload.length() == expectPay.length);
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java
index 7cc9a4a56d5..aa6b2cd4606 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java
@@ -20,8 +20,8 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import java.io.IOException;
@@ -39,11 +39,11 @@ public class NumericPayloadTokenFilterTest extends BaseTokenStreamTestCase {
NumericPayloadTokenFilter nptf = new NumericPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test))), 3, "D");
boolean seenDogs = false;
- TermAttribute termAtt = nptf.getAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = nptf.getAttribute(CharTermAttribute.class);
TypeAttribute typeAtt = nptf.getAttribute(TypeAttribute.class);
PayloadAttribute payloadAtt = nptf.getAttribute(PayloadAttribute.class);
while (nptf.incrementToken()) {
- if (termAtt.term().equals("dogs")) {
+ if (termAtt.toString().equals("dogs")) {
seenDogs = true;
assertTrue(typeAtt.type() + " is not equal to " + "D", typeAtt.type().equals("D") == true);
assertTrue("payloadAtt.getPayload() is null and it shouldn't be", payloadAtt.getPayload() != null);
@@ -60,19 +60,17 @@ public class NumericPayloadTokenFilterTest extends BaseTokenStreamTestCase {
}
private final class WordTokenFilter extends TokenFilter {
- private TermAttribute termAtt;
- private TypeAttribute typeAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
private WordTokenFilter(TokenStream input) {
super(input);
- termAtt = addAttribute(TermAttribute.class);
- typeAtt = addAttribute(TypeAttribute.class);
}
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
- if (termAtt.term().equals("dogs"))
+ if (termAtt.toString().equals("dogs"))
typeAtt.setType("D");
return true;
} else {
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java
index aacebe85894..35fa092e02e 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java
@@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import java.io.IOException;
@@ -39,12 +39,12 @@ public class TypeAsPayloadTokenFilterTest extends BaseTokenStreamTestCase {
TypeAsPayloadTokenFilter nptf = new TypeAsPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test))));
int count = 0;
- TermAttribute termAtt = nptf.getAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = nptf.getAttribute(CharTermAttribute.class);
TypeAttribute typeAtt = nptf.getAttribute(TypeAttribute.class);
PayloadAttribute payloadAtt = nptf.getAttribute(PayloadAttribute.class);
while (nptf.incrementToken()) {
- assertTrue(typeAtt.type() + " is not null and it should be", typeAtt.type().equals(String.valueOf(Character.toUpperCase(termAtt.termBuffer()[0]))));
+ assertTrue(typeAtt.type() + " is not null and it should be", typeAtt.type().equals(String.valueOf(Character.toUpperCase(termAtt.buffer()[0]))));
assertTrue("nextToken.getPayload() is null and it shouldn't be", payloadAtt.getPayload() != null);
String type = new String(payloadAtt.getPayload().getData(), "UTF-8");
assertTrue(type + " is not equal to " + typeAtt.type(), type.equals(typeAtt.type()) == true);
@@ -55,19 +55,17 @@ public class TypeAsPayloadTokenFilterTest extends BaseTokenStreamTestCase {
}
private final class WordTokenFilter extends TokenFilter {
- private TermAttribute termAtt;
- private TypeAttribute typeAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
private WordTokenFilter(TokenStream input) {
super(input);
- termAtt = addAttribute(TermAttribute.class);
- typeAtt = addAttribute(TypeAttribute.class);
}
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
- typeAtt.setType(String.valueOf(Character.toUpperCase(termAtt.termBuffer()[0])));
+ typeAtt.setType(String.valueOf(Character.toUpperCase(termAtt.buffer()[0])));
return true;
} else {
return false;
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/position/PositionFilterTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/position/PositionFilterTest.java
index ed12a7f607b..ea3938acdda 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/position/PositionFilterTest.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/position/PositionFilterTest.java
@@ -22,7 +22,7 @@ import java.io.IOException;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.shingle.ShingleFilter;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
public class PositionFilterTest extends BaseTokenStreamTestCase {
@@ -30,19 +30,18 @@ public class PositionFilterTest extends BaseTokenStreamTestCase {
protected int index = 0;
protected String[] testToken;
- protected TermAttribute termAtt;
+ protected final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
public TestTokenStream(String[] testToken) {
super();
this.testToken = testToken;
- termAtt = addAttribute(TermAttribute.class);
}
@Override
public final boolean incrementToken() throws IOException {
clearAttributes();
if (index < testToken.length) {
- termAtt.setTermBuffer(testToken[index++]);
+ termAtt.setEmpty().append(testToken[index++]);
return true;
} else {
return false;
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java
index 7ed432a9ddf..26512acb75d 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java
@@ -26,7 +26,6 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.LetterTokenizer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
@@ -176,9 +175,6 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT));
a.addStopWords(reader, 10);
TokenStream ts = a.tokenStream("repetitiveField", new StringReader("this boring"));
- TermAttribute termAtt = ts.getAttribute(TermAttribute.class);
- assertTrue(ts.incrementToken());
- assertEquals("this", termAtt.term());
- assertFalse(ts.incrementToken());
+ assertTokenStreamContents(ts, new String[] { "this" });
}
}
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java
index b55b7353a30..a6896e504a8 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java
@@ -21,46 +21,22 @@ import java.io.StringReader;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.util.Version;
public class TestReverseStringFilter extends BaseTokenStreamTestCase {
public void testFilter() throws Exception {
TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
new StringReader("Do have a nice day")); // 1-4 length string
ReverseStringFilter filter = new ReverseStringFilter(TEST_VERSION_CURRENT, stream);
- TermAttribute text = filter.getAttribute(TermAttribute.class);
- assertTrue(filter.incrementToken());
- assertEquals("oD", text.term());
- assertTrue(filter.incrementToken());
- assertEquals("evah", text.term());
- assertTrue(filter.incrementToken());
- assertEquals("a", text.term());
- assertTrue(filter.incrementToken());
- assertEquals("ecin", text.term());
- assertTrue(filter.incrementToken());
- assertEquals("yad", text.term());
- assertFalse(filter.incrementToken());
+ assertTokenStreamContents(filter, new String[] { "oD", "evah", "a", "ecin", "yad" });
}
public void testFilterWithMark() throws Exception {
TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(
"Do have a nice day")); // 1-4 length string
ReverseStringFilter filter = new ReverseStringFilter(TEST_VERSION_CURRENT, stream, '\u0001');
- TermAttribute text = filter
- .getAttribute(TermAttribute.class);
- assertTrue(filter.incrementToken());
- assertEquals("\u0001oD", text.term());
- assertTrue(filter.incrementToken());
- assertEquals("\u0001evah", text.term());
- assertTrue(filter.incrementToken());
- assertEquals("\u0001a", text.term());
- assertTrue(filter.incrementToken());
- assertEquals("\u0001ecin", text.term());
- assertTrue(filter.incrementToken());
- assertEquals("\u0001yad", text.term());
- assertFalse(filter.incrementToken());
+ assertTokenStreamContents(filter,
+ new String[] { "\u0001oD", "\u0001evah", "\u0001a", "\u0001ecin", "\u0001yad" });
}
public void testReverseString() throws Exception {
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
index f08b9fbdb17..45d97898278 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java
@@ -17,17 +17,13 @@ package org.apache.lucene.analysis.ru;
* limitations under the License.
*/
-import java.io.File;
-import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
-import java.io.Reader;
-import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.Version;
@@ -65,8 +61,8 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase
new RussianLetterTokenizer(TEST_VERSION_CURRENT,
sampleUnicode);
- TermAttribute text = in.getAttribute(TermAttribute.class);
- TermAttribute sampleText = sample.getAttribute(TermAttribute.class);
+ CharTermAttribute text = in.getAttribute(CharTermAttribute.class);
+ CharTermAttribute sampleText = sample.getAttribute(CharTermAttribute.class);
for (;;)
{
@@ -76,34 +72,21 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase
boolean nextSampleToken = sample.incrementToken();
assertEquals(
"Unicode",
- text.term(),
+ text.toString(),
nextSampleToken == false
? null
- : sampleText.term());
+ : sampleText.toString());
}
inWords.close();
sampleUnicode.close();
}
- public void testDigitsInRussianCharset()
+ /** Check that RussianAnalyzer doesnt discard any numbers */
+ public void testDigitsInRussianCharset() throws IOException
{
- Reader reader = new StringReader("text 1000");
- RussianAnalyzer ra = new RussianAnalyzer(TEST_VERSION_CURRENT);
- TokenStream stream = ra.tokenStream("", reader);
-
- TermAttribute termText = stream.getAttribute(TermAttribute.class);
- try {
- assertTrue(stream.incrementToken());
- assertEquals("text", termText.term());
- assertTrue(stream.incrementToken());
- assertEquals("RussianAnalyzer's tokenizer skips numbers from input text", "1000", termText.term());
- assertFalse(stream.incrementToken());
- }
- catch (IOException e)
- {
- fail("unexpected IOException");
- }
+ RussianAnalyzer ra = new RussianAnalyzer(TEST_VERSION_CURRENT);
+ assertAnalyzesTo(ra, "text 1000", new String[] { "text", "1000" });
}
/** @deprecated remove this test in Lucene 4.0: stopwords changed */
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java
index ba7346c5889..29c3a0f7b74 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java
@@ -26,8 +26,8 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.LetterTokenizer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
@@ -159,11 +159,11 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
int j = -1;
PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
- TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
while (ts.incrementToken()) {
j += posIncrAtt.getPositionIncrement();
- String termText = termAtt.term();
+ String termText = termAtt.toString();
q.add(new Term("content", termText), j);
}
@@ -186,10 +186,10 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
TokenStream ts = analyzer.tokenStream("content",
new StringReader("test sentence"));
- TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
while (ts.incrementToken()) {
- String termText = termAtt.term();
+ String termText = termAtt.toString();
q.add(new TermQuery(new Term("content", termText)),
BooleanClause.Occur.SHOULD);
}
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java
index 363b97dfc0e..a6f3e112aa8 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java
@@ -31,7 +31,12 @@ import org.apache.lucene.analysis.miscellaneous.SingleTokenTokenStream;
import org.apache.lucene.analysis.payloads.PayloadHelper;
import org.apache.lucene.analysis.shingle.ShingleMatrixFilter.Matrix;
import org.apache.lucene.analysis.shingle.ShingleMatrixFilter.Matrix.Column;
-import org.apache.lucene.analysis.tokenattributes.*;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
@@ -415,7 +420,7 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
private Token tokenFactory(String text, int posIncr, int startOffset, int endOffset) {
Token token = new Token(startOffset, endOffset);
- token.setTermBuffer(text);
+ token.setEmpty().append(text);
token.setPositionIncrement(posIncr);
return token;
}
@@ -427,7 +432,7 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
private Token tokenFactory(String text, int posIncr, float weight, int startOffset, int endOffset) {
Token token = new Token(startOffset, endOffset);
- token.setTermBuffer(text);
+ token.setEmpty().append(text);
token.setPositionIncrement(posIncr);
ShingleMatrixFilter.defaultSettingsCodec.setWeight(token, weight);
return token;
@@ -435,7 +440,7 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
private Token tokenFactory(String text, int posIncr, float weight, int startOffset, int endOffset, ShingleMatrixFilter.TokenPositioner positioner) {
Token token = new Token(startOffset, endOffset);
- token.setTermBuffer(text);
+ token.setEmpty().append(text);
token.setPositionIncrement(posIncr);
ShingleMatrixFilter.defaultSettingsCodec.setWeight(token, weight);
ShingleMatrixFilter.defaultSettingsCodec.setTokenPositioner(token, positioner);
@@ -445,20 +450,20 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
// assert-methods start here
private void assertNext(TokenStream ts, String text) throws IOException {
- TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
assertTrue(ts.incrementToken());
- assertEquals(text, termAtt.term());
+ assertEquals(text, termAtt.toString());
}
private void assertNext(TokenStream ts, String text, int positionIncrement, float boost, int startOffset, int endOffset) throws IOException {
- TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
assertTrue(ts.incrementToken());
- assertEquals(text, termAtt.term());
+ assertEquals(text, termAtt.toString());
assertEquals(positionIncrement, posIncrAtt.getPositionIncrement());
assertEquals(boost, payloadAtt.getPayload() == null ? 1f : PayloadHelper.decodeFloat(payloadAtt.getPayload().getData()), 0);
assertEquals(startOffset, offsetAtt.startOffset());
@@ -466,11 +471,11 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
}
private void assertNext(TokenStream ts, String text, int startOffset, int endOffset) throws IOException {
- TermAttribute termAtt = ts.addAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class);
assertTrue(ts.incrementToken());
- assertEquals(text, termAtt.term());
+ assertEquals(text, termAtt.toString());
assertEquals(startOffset, offsetAtt.startOffset());
assertEquals(endOffset, offsetAtt.endOffset());
}
@@ -478,7 +483,7 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
private static Token createToken(String term, int start, int offset)
{
Token token = new Token(start, offset);
- token.setTermBuffer(term);
+ token.setEmpty().append(term);
return token;
}
@@ -486,21 +491,15 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
public final static class TokenListStream extends TokenStream {
private Collection tokens;
- TermAttribute termAtt;
- PositionIncrementAttribute posIncrAtt;
- PayloadAttribute payloadAtt;
- OffsetAttribute offsetAtt;
- TypeAttribute typeAtt;
- FlagsAttribute flagsAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+ private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
+ private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class);
public TokenListStream(Collection tokens) {
this.tokens = tokens;
- termAtt = addAttribute(TermAttribute.class);
- posIncrAtt = addAttribute(PositionIncrementAttribute.class);
- payloadAtt = addAttribute(PayloadAttribute.class);
- offsetAtt = addAttribute(OffsetAttribute.class);
- typeAtt = addAttribute(TypeAttribute.class);
- flagsAtt = addAttribute(FlagsAttribute.class);
}
private Iterator iterator;
@@ -515,7 +514,7 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase {
}
Token prototype = iterator.next();
clearAttributes();
- termAtt.setTermBuffer(prototype.termBuffer(), 0, prototype.termLength());
+ termAtt.copyBuffer(prototype.buffer(), 0, prototype.length());
posIncrAtt.setPositionIncrement(prototype.getPositionIncrement());
flagsAtt.setFlags(prototype.getFlags());
offsetAtt.setOffset(prototype.startOffset(), prototype.endOffset());
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java
index bb3fe3c546a..ab623452dd3 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java
@@ -23,7 +23,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
public class TokenTypeSinkTokenizerTest extends BaseTokenStreamTestCase {
@@ -41,11 +41,11 @@ public class TokenTypeSinkTokenizerTest extends BaseTokenStreamTestCase {
boolean seenDogs = false;
- TermAttribute termAtt = ttf.addAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = ttf.addAttribute(CharTermAttribute.class);
TypeAttribute typeAtt = ttf.addAttribute(TypeAttribute.class);
ttf.reset();
while (ttf.incrementToken()) {
- if (termAtt.term().equals("dogs")) {
+ if (termAtt.toString().equals("dogs")) {
seenDogs = true;
assertTrue(typeAtt.type() + " is not equal to " + "D", typeAtt.type().equals("D") == true);
} else {
@@ -64,20 +64,18 @@ public class TokenTypeSinkTokenizerTest extends BaseTokenStreamTestCase {
}
private class WordTokenFilter extends TokenFilter {
- private TermAttribute termAtt;
- private TypeAttribute typeAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
private WordTokenFilter(TokenStream input) {
super(input);
- termAtt = addAttribute(TermAttribute.class);
- typeAtt = addAttribute(TypeAttribute.class);
}
@Override
public final boolean incrementToken() throws IOException {
if (!input.incrementToken()) return false;
- if (termAtt.term().equals("dogs")) {
+ if (termAtt.toString().equals("dogs")) {
typeAtt.setType("D");
}
return true;
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java
index 4b456633e56..63a4e23e4b7 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java
@@ -22,11 +22,11 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.Payload;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.Version;
@@ -93,7 +93,7 @@ public class TestSnowball extends BaseTokenStreamTestCase {
public void testFilterTokens() throws Exception {
SnowballFilter filter = new SnowballFilter(new TestTokenStream(), "English");
- TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
+ CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class);
OffsetAttribute offsetAtt = filter.getAttribute(OffsetAttribute.class);
TypeAttribute typeAtt = filter.getAttribute(TypeAttribute.class);
PayloadAttribute payloadAtt = filter.getAttribute(PayloadAttribute.class);
@@ -102,7 +102,7 @@ public class TestSnowball extends BaseTokenStreamTestCase {
filter.incrementToken();
- assertEquals("accent", termAtt.term());
+ assertEquals("accent", termAtt.toString());
assertEquals(2, offsetAtt.startOffset());
assertEquals(7, offsetAtt.endOffset());
assertEquals("wrd", typeAtt.type());
@@ -112,27 +112,21 @@ public class TestSnowball extends BaseTokenStreamTestCase {
}
private final class TestTokenStream extends TokenStream {
- private TermAttribute termAtt;
- private OffsetAttribute offsetAtt;
- private TypeAttribute typeAtt;
- private PayloadAttribute payloadAtt;
- private PositionIncrementAttribute posIncAtt;
- private FlagsAttribute flagsAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
+ private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class);
+ private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
+ private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class);
TestTokenStream() {
super();
- termAtt = addAttribute(TermAttribute.class);
- offsetAtt = addAttribute(OffsetAttribute.class);
- typeAtt = addAttribute(TypeAttribute.class);
- payloadAtt = addAttribute(PayloadAttribute.class);
- posIncAtt = addAttribute(PositionIncrementAttribute.class);
- flagsAtt = addAttribute(FlagsAttribute.class);
}
@Override
public boolean incrementToken() {
clearAttributes();
- termAtt.setTermBuffer("accents");
+ termAtt.setEmpty().append("accents");
offsetAtt.setOffset(2, 7);
typeAtt.setType("wrd");
posIncAtt.setPositionIncrement(3);
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java
index a8cbff57a7d..7cb690ee517 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java
@@ -404,7 +404,7 @@ public class TestSynonymFilter extends BaseTokenStreamTestCase {
else {
clearAttributes();
Token token = tokens[index++];
- termAtt.setEmpty().append(token.term());
+ termAtt.setEmpty().append(token);
offsetAtt.setOffset(token.startOffset(), token.endOffset());
posIncAtt.setPositionIncrement(token.getPositionIncrement());
flagsAtt.setFlags(token.getFlags());
diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java
index 300595ba273..28d5634dc45 100644
--- a/modules/analysis/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java
+++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java
@@ -20,30 +20,20 @@ package org.apache.lucene.analysis.wikipedia;
import java.io.StringReader;
import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
import java.util.Set;
import java.util.HashSet;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
-import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
-import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import static org.apache.lucene.analysis.wikipedia.WikipediaTokenizer.*;
/**
- *
- *
+ * Basic Tests for {@link WikipediaTokenizer}
**/
public class WikipediaTokenizerTest extends BaseTokenStreamTestCase {
protected static final String LINK_PHRASES = "click [[link here again]] click [http://lucene.apache.org here again] [[Category:a b c d]]";
- public WikipediaTokenizerTest(String s) {
- super(s);
- }
-
public void testSimple() throws Exception {
String text = "This is a [[Category:foo]]";
WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(text));
@@ -51,216 +41,85 @@ public class WikipediaTokenizerTest extends BaseTokenStreamTestCase {
new String[] { "This", "is", "a", "foo" },
new int[] { 0, 5, 8, 21 },
new int[] { 4, 7, 9, 24 },
- new String[] { "", "", "", WikipediaTokenizer.CATEGORY },
+ new String[] { "", "", "", CATEGORY },
new int[] { 1, 1, 1, 1, },
text.length());
}
public void testHandwritten() throws Exception {
- //make sure all tokens are in only one type
- String test = "[[link]] This is a [[Category:foo]] Category This is a linked [[:Category:bar none withstanding]] " +
- "Category This is (parens) This is a [[link]] This is an external URL [http://lucene.apache.org] " +
- "Here is ''italics'' and ''more italics'', '''bold''' and '''''five quotes''''' " +
- " This is a [[link|display info]] This is a period. Here is $3.25 and here is 3.50. Here's Johnny. " +
- "==heading== ===sub head=== followed by some text [[Category:blah| ]] " +
- "''[[Category:ital_cat]]'' here is some that is ''italics [[Category:foo]] but is never closed." +
- "'''same [[Category:foo]] goes for this '''''and2 [[Category:foo]] and this" +
- " [http://foo.boo.com/test/test/ Test Test] [http://foo.boo.com/test/test/test.html Test Test]" +
- " [http://foo.boo.com/test/test/test.html?g=b&c=d Test Test] [Citation] martian code";
- Map tcm = new HashMap();//map tokens to types
- tcm.put("link", WikipediaTokenizer.INTERNAL_LINK);
- tcm.put("display", WikipediaTokenizer.INTERNAL_LINK);
- tcm.put("info", WikipediaTokenizer.INTERNAL_LINK);
-
- tcm.put("http://lucene.apache.org", WikipediaTokenizer.EXTERNAL_LINK_URL);
- tcm.put("http://foo.boo.com/test/test/", WikipediaTokenizer.EXTERNAL_LINK_URL);
- tcm.put("http://foo.boo.com/test/test/test.html", WikipediaTokenizer.EXTERNAL_LINK_URL);
- tcm.put("http://foo.boo.com/test/test/test.html?g=b&c=d", WikipediaTokenizer.EXTERNAL_LINK_URL);
- tcm.put("Test", WikipediaTokenizer.EXTERNAL_LINK);
+ // make sure all tokens are in only one type
+ String test = "[[link]] This is a [[Category:foo]] Category This is a linked [[:Category:bar none withstanding]] "
+ + "Category This is (parens) This is a [[link]] This is an external URL [http://lucene.apache.org] "
+ + "Here is ''italics'' and ''more italics'', '''bold''' and '''''five quotes''''' "
+ + " This is a [[link|display info]] This is a period. Here is $3.25 and here is 3.50. Here's Johnny. "
+ + "==heading== ===sub head=== followed by some text [[Category:blah| ]] "
+ + "''[[Category:ital_cat]]'' here is some that is ''italics [[Category:foo]] but is never closed."
+ + "'''same [[Category:foo]] goes for this '''''and2 [[Category:foo]] and this"
+ + " [http://foo.boo.com/test/test/ Test Test] [http://foo.boo.com/test/test/test.html Test Test]"
+ + " [http://foo.boo.com/test/test/test.html?g=b&c=d Test Test] [Citation] martian code";
- //alphanums
- tcm.put("This", "");
- tcm.put("is", "");
- tcm.put("a", "");
- tcm.put("Category", "");
- tcm.put("linked", "");
- tcm.put("parens", "");
- tcm.put("external", "");
- tcm.put("URL", "");
- tcm.put("and", "");
- tcm.put("period", "");
- tcm.put("Here", "");
- tcm.put("Here's", "");
- tcm.put("here", "");
- tcm.put("Johnny", "");
- tcm.put("followed", "");
- tcm.put("by", "");
- tcm.put("text", "");
- tcm.put("that", "");
- tcm.put("but", "");
- tcm.put("never", "");
- tcm.put("closed", "");
- tcm.put("goes", "");
- tcm.put("for", "");
- tcm.put("this", "");
- tcm.put("an", "");
- tcm.put("some", "");
- tcm.put("martian", "");
- tcm.put("code", "");
-
- tcm.put("foo", WikipediaTokenizer.CATEGORY);
- tcm.put("bar", WikipediaTokenizer.CATEGORY);
- tcm.put("none", WikipediaTokenizer.CATEGORY);
- tcm.put("withstanding", WikipediaTokenizer.CATEGORY);
- tcm.put("blah", WikipediaTokenizer.CATEGORY);
- tcm.put("ital", WikipediaTokenizer.CATEGORY);
- tcm.put("cat", WikipediaTokenizer.CATEGORY);
-
- tcm.put("italics", WikipediaTokenizer.ITALICS);
- tcm.put("more", WikipediaTokenizer.ITALICS);
- tcm.put("bold", WikipediaTokenizer.BOLD);
- tcm.put("same", WikipediaTokenizer.BOLD);
- tcm.put("five", WikipediaTokenizer.BOLD_ITALICS);
- tcm.put("and2", WikipediaTokenizer.BOLD_ITALICS);
- tcm.put("quotes", WikipediaTokenizer.BOLD_ITALICS);
-
- tcm.put("heading", WikipediaTokenizer.HEADING);
- tcm.put("sub", WikipediaTokenizer.SUB_HEADING);
- tcm.put("head", WikipediaTokenizer.SUB_HEADING);
-
- tcm.put("Citation", WikipediaTokenizer.CITATION);
-
- tcm.put("3.25", "");
- tcm.put("3.50", "");
WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(test));
- int count = 0;
- int numItalics = 0;
- int numBoldItalics = 0;
- int numCategory = 0;
- int numCitation = 0;
- TermAttribute termAtt = tf.addAttribute(TermAttribute.class);
- TypeAttribute typeAtt = tf.addAttribute(TypeAttribute.class);
-
- while (tf.incrementToken()) {
- String tokText = termAtt.term();
- //System.out.println("Text: " + tokText + " Type: " + token.type());
- String expectedType = tcm.get(tokText);
- assertTrue("expectedType is null and it shouldn't be for: " + tf.toString(), expectedType != null);
- assertTrue(typeAtt.type() + " is not equal to " + expectedType + " for " + tf.toString(), typeAtt.type().equals(expectedType) == true);
- count++;
- if (typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true){
- numItalics++;
- } else if (typeAtt.type().equals(WikipediaTokenizer.BOLD_ITALICS) == true){
- numBoldItalics++;
- } else if (typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true){
- numCategory++;
- }
- else if (typeAtt.type().equals(WikipediaTokenizer.CITATION) == true){
- numCitation++;
- }
- }
- assertTrue("We have not seen enough tokens: " + count + " is not >= " + tcm.size(), count >= tcm.size());
- assertTrue(numItalics + " does not equal: " + 4 + " for numItalics", numItalics == 4);
- assertTrue(numBoldItalics + " does not equal: " + 3 + " for numBoldItalics", numBoldItalics == 3);
- assertTrue(numCategory + " does not equal: " + 10 + " for numCategory", numCategory == 10);
- assertTrue(numCitation + " does not equal: " + 1 + " for numCitation", numCitation == 1);
+ assertTokenStreamContents(tf,
+ new String[] {"link", "This", "is", "a",
+ "foo", "Category", "This", "is", "a", "linked", "bar", "none",
+ "withstanding", "Category", "This", "is", "parens", "This", "is", "a",
+ "link", "This", "is", "an", "external", "URL",
+ "http://lucene.apache.org", "Here", "is", "italics", "and", "more",
+ "italics", "bold", "and", "five", "quotes", "This", "is", "a", "link",
+ "display", "info", "This", "is", "a", "period", "Here", "is", "3.25",
+ "and", "here", "is", "3.50", "Here's", "Johnny", "heading", "sub",
+ "head", "followed", "by", "some", "text", "blah", "ital", "cat",
+ "here", "is", "some", "that", "is", "italics", "foo", "but", "is",
+ "never", "closed", "same", "foo", "goes", "for", "this", "and2", "foo",
+ "and", "this", "http://foo.boo.com/test/test/", "Test", "Test",
+ "http://foo.boo.com/test/test/test.html", "Test", "Test",
+ "http://foo.boo.com/test/test/test.html?g=b&c=d", "Test", "Test",
+ "Citation", "martian", "code"},
+ new String[] {INTERNAL_LINK,
+ "", "", "", CATEGORY, "",
+ "", "", "", "", CATEGORY,
+ CATEGORY, CATEGORY, "", "", "",
+ "", "", "", "", INTERNAL_LINK,
+ "", "", "", "", "",
+ EXTERNAL_LINK_URL, "", "", ITALICS, "",
+ ITALICS, ITALICS, BOLD, "", BOLD_ITALICS, BOLD_ITALICS,
+ "", "", "", INTERNAL_LINK, INTERNAL_LINK,
+ INTERNAL_LINK, "", "", "", "",
+ "", "", "", "", "",
+ "", "", "", "", HEADING,
+ SUB_HEADING, SUB_HEADING, "", "", "",
+ "", CATEGORY, CATEGORY, CATEGORY, "", "",
+ "", "", "", ITALICS, CATEGORY,
+ "", "", "", "", BOLD, CATEGORY,
+ "", "", "", BOLD_ITALICS, CATEGORY,
+ "", "", EXTERNAL_LINK_URL, EXTERNAL_LINK,
+ EXTERNAL_LINK, EXTERNAL_LINK_URL, EXTERNAL_LINK, EXTERNAL_LINK,
+ EXTERNAL_LINK_URL, EXTERNAL_LINK, EXTERNAL_LINK, CITATION,
+ "", ""});
}
public void testLinkPhrases() throws Exception {
-
WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(LINK_PHRASES));
checkLinkPhrases(tf);
-
}
private void checkLinkPhrases(WikipediaTokenizer tf) throws IOException {
- TermAttribute termAtt = tf.addAttribute(TermAttribute.class);
- PositionIncrementAttribute posIncrAtt = tf.addAttribute(PositionIncrementAttribute.class);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "click", termAtt.term().equals("click") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "link", termAtt.term().equals("link") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "here",
- termAtt.term().equals("here") == true);
- //The link, and here should be at the same position for phrases to work
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "again",
- termAtt.term().equals("again") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "click",
- termAtt.term().equals("click") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "http://lucene.apache.org",
- termAtt.term().equals("http://lucene.apache.org") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "here",
- termAtt.term().equals("here") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "again",
- termAtt.term().equals("again") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "a",
- termAtt.term().equals("a") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "b",
- termAtt.term().equals("b") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "c",
- termAtt.term().equals("c") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "d",
- termAtt.term().equals("d") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
-
- assertFalse(tf.incrementToken());
+ assertTokenStreamContents(tf,
+ new String[] { "click", "link", "here", "again", "click",
+ "http://lucene.apache.org", "here", "again", "a", "b", "c", "d" },
+ new int[] { 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1 });
}
public void testLinks() throws Exception {
String test = "[http://lucene.apache.org/java/docs/index.html#news here] [http://lucene.apache.org/java/docs/index.html?b=c here] [https://lucene.apache.org/java/docs/index.html?b=c here]";
WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(test));
- TermAttribute termAtt = tf.addAttribute(TermAttribute.class);
- TypeAttribute typeAtt = tf.addAttribute(TypeAttribute.class);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "http://lucene.apache.org/java/docs/index.html#news",
- termAtt.term().equals("http://lucene.apache.org/java/docs/index.html#news") == true);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.EXTERNAL_LINK_URL, typeAtt.type().equals(WikipediaTokenizer.EXTERNAL_LINK_URL) == true);
- tf.incrementToken();//skip here
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "http://lucene.apache.org/java/docs/index.html?b=c",
- termAtt.term().equals("http://lucene.apache.org/java/docs/index.html?b=c") == true);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.EXTERNAL_LINK_URL, typeAtt.type().equals(WikipediaTokenizer.EXTERNAL_LINK_URL) == true);
- tf.incrementToken();//skip here
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "https://lucene.apache.org/java/docs/index.html?b=c",
- termAtt.term().equals("https://lucene.apache.org/java/docs/index.html?b=c") == true);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.EXTERNAL_LINK_URL, typeAtt.type().equals(WikipediaTokenizer.EXTERNAL_LINK_URL) == true);
-
- assertTrue(tf.incrementToken());
- assertFalse(tf.incrementToken());
+ assertTokenStreamContents(tf,
+ new String[] { "http://lucene.apache.org/java/docs/index.html#news", "here",
+ "http://lucene.apache.org/java/docs/index.html?b=c", "here",
+ "https://lucene.apache.org/java/docs/index.html?b=c", "here" },
+ new String[] { EXTERNAL_LINK_URL, EXTERNAL_LINK,
+ EXTERNAL_LINK_URL, EXTERNAL_LINK,
+ EXTERNAL_LINK_URL, EXTERNAL_LINK, });
}
public void testLucene1133() throws Exception {
@@ -272,73 +131,13 @@ public class WikipediaTokenizerTest extends BaseTokenStreamTestCase {
checkLinkPhrases(tf);
String test = "[[Category:a b c d]] [[Category:e f g]] [[link here]] [[link there]] ''italics here'' something ''more italics'' [[Category:h i j]]";
tf = new WikipediaTokenizer(new StringReader(test), WikipediaTokenizer.UNTOKENIZED_ONLY, untoks);
- TermAttribute termAtt = tf.addAttribute(TermAttribute.class);
- PositionIncrementAttribute posIncrAtt = tf.addAttribute(PositionIncrementAttribute.class);
- OffsetAttribute offsetAtt = tf.addAttribute(OffsetAttribute.class);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "a b c d",
- termAtt.term().equals("a b c d") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 11, offsetAtt.startOffset() == 11);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 18, offsetAtt.endOffset() == 18);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "e f g",
- termAtt.term().equals("e f g") == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 32, offsetAtt.startOffset() == 32);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 37, offsetAtt.endOffset() == 37);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "link",
- termAtt.term().equals("link") == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 42, offsetAtt.startOffset() == 42);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 46, offsetAtt.endOffset() == 46);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "here",
- termAtt.term().equals("here") == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 47, offsetAtt.startOffset() == 47);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 51, offsetAtt.endOffset() == 51);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "link",
- termAtt.term().equals("link") == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 56, offsetAtt.startOffset() == 56);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 60, offsetAtt.endOffset() == 60);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "there",
- termAtt.term().equals("there") == true);
-
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 61, offsetAtt.startOffset() == 61);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 66, offsetAtt.endOffset() == 66);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "italics here",
- termAtt.term().equals("italics here") == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 71, offsetAtt.startOffset() == 71);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 83, offsetAtt.endOffset() == 83);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "something",
- termAtt.term().equals("something") == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 86, offsetAtt.startOffset() == 86);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 95, offsetAtt.endOffset() == 95);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "more italics",
- termAtt.term().equals("more italics") == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 98, offsetAtt.startOffset() == 98);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 110, offsetAtt.endOffset() == 110);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "h i j",
- termAtt.term().equals("h i j") == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 124, offsetAtt.startOffset() == 124);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 133, offsetAtt.endOffset() == 133);
-
- assertFalse(tf.incrementToken());
+ assertTokenStreamContents(tf,
+ new String[] { "a b c d", "e f g", "link", "here", "link",
+ "there", "italics here", "something", "more italics", "h i j" },
+ new int[] { 11, 32, 42, 47, 56, 61, 71, 86, 98, 124 },
+ new int[] { 18, 37, 46, 51, 60, 66, 83, 95, 110, 133 },
+ new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }
+ );
}
public void testBoth() throws Exception {
@@ -348,211 +147,26 @@ public class WikipediaTokenizerTest extends BaseTokenStreamTestCase {
String test = "[[Category:a b c d]] [[Category:e f g]] [[link here]] [[link there]] ''italics here'' something ''more italics'' [[Category:h i j]]";
//should output all the indivual tokens plus the untokenized tokens as well. Untokenized tokens
WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(test), WikipediaTokenizer.BOTH, untoks);
- TermAttribute termAtt = tf.addAttribute(TermAttribute.class);
- TypeAttribute typeAtt = tf.addAttribute(TypeAttribute.class);
- PositionIncrementAttribute posIncrAtt = tf.addAttribute(PositionIncrementAttribute.class);
- OffsetAttribute offsetAtt = tf.addAttribute(OffsetAttribute.class);
+ assertTokenStreamContents(tf,
+ new String[] { "a b c d", "a", "b", "c", "d", "e f g", "e", "f", "g",
+ "link", "here", "link", "there", "italics here", "italics", "here",
+ "something", "more italics", "more", "italics", "h i j", "h", "i", "j" },
+ new int[] { 11, 11, 13, 15, 17, 32, 32, 34, 36, 42, 47, 56, 61, 71, 71, 79, 86, 98, 98, 103, 124, 124, 128, 132 },
+ new int[] { 18, 12, 14, 16, 18, 37, 33, 35, 37, 46, 51, 60, 66, 83, 78, 83, 95, 110, 102, 110, 133, 125, 129, 133 },
+ new int[] { 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1 }
+ );
+
+ // now check the flags, TODO: add way to check flags from BaseTokenStreamTestCase?
+ tf = new WikipediaTokenizer(new StringReader(test), WikipediaTokenizer.BOTH, untoks);
+ int expectedFlags[] = new int[] { UNTOKENIZED_TOKEN_FLAG, 0, 0, 0, 0, UNTOKENIZED_TOKEN_FLAG, 0, 0, 0, 0,
+ 0, 0, 0, UNTOKENIZED_TOKEN_FLAG, 0, 0, 0, UNTOKENIZED_TOKEN_FLAG, 0, 0, UNTOKENIZED_TOKEN_FLAG, 0, 0, 0 };
FlagsAttribute flagsAtt = tf.addAttribute(FlagsAttribute.class);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "a b c d",
- termAtt.term().equals("a b c d") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
- assertTrue(flagsAtt.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, flagsAtt.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 11, offsetAtt.startOffset() == 11);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 18, offsetAtt.endOffset() == 18);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "a",
- termAtt.term().equals("a") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
- assertTrue(flagsAtt.getFlags() + " equals: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG + " and it shouldn't", flagsAtt.getFlags() != WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 11, offsetAtt.startOffset() == 11);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 12, offsetAtt.endOffset() == 12);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "b",
- termAtt.term().equals("b") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 13, offsetAtt.startOffset() == 13);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 14, offsetAtt.endOffset() == 14);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "c",
- termAtt.term().equals("c") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 15, offsetAtt.startOffset() == 15);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 16, offsetAtt.endOffset() == 16);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "d",
- termAtt.term().equals("d") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 17, offsetAtt.startOffset() == 17);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 18, offsetAtt.endOffset() == 18);
-
-
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "e f g",
- termAtt.term().equals("e f g") == true);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
- assertTrue(flagsAtt.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, flagsAtt.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 32, offsetAtt.startOffset() == 32);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 37, offsetAtt.endOffset() == 37);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "e",
- termAtt.term().equals("e") == true);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 32, offsetAtt.startOffset() == 32);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 33, offsetAtt.endOffset() == 33);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "f",
- termAtt.term().equals("f") == true);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 34, offsetAtt.startOffset() == 34);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 35, offsetAtt.endOffset() == 35);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "g",
- termAtt.term().equals("g") == true);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 36, offsetAtt.startOffset() == 36);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 37, offsetAtt.endOffset() == 37);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "link",
- termAtt.term().equals("link") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.INTERNAL_LINK, typeAtt.type().equals(WikipediaTokenizer.INTERNAL_LINK) == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 42, offsetAtt.startOffset() == 42);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 46, offsetAtt.endOffset() == 46);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "here",
- termAtt.term().equals("here") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.INTERNAL_LINK, typeAtt.type().equals(WikipediaTokenizer.INTERNAL_LINK) == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 47, offsetAtt.startOffset() == 47);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 51, offsetAtt.endOffset() == 51);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "link",
- termAtt.term().equals("link") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 56, offsetAtt.startOffset() == 56);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.INTERNAL_LINK, typeAtt.type().equals(WikipediaTokenizer.INTERNAL_LINK) == true);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 60, offsetAtt.endOffset() == 60);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "there",
- termAtt.term().equals("there") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.INTERNAL_LINK, typeAtt.type().equals(WikipediaTokenizer.INTERNAL_LINK) == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 61, offsetAtt.startOffset() == 61);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 66, offsetAtt.endOffset() == 66);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "italics here",
- termAtt.term().equals("italics here") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.ITALICS, typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true);
- assertTrue(flagsAtt.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, flagsAtt.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 71, offsetAtt.startOffset() == 71);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 83, offsetAtt.endOffset() == 83);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "italics",
- termAtt.term().equals("italics") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.ITALICS, typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 71, offsetAtt.startOffset() == 71);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 78, offsetAtt.endOffset() == 78);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "here",
- termAtt.term().equals("here") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.ITALICS, typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 79, offsetAtt.startOffset() == 79);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 83, offsetAtt.endOffset() == 83);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "something",
- termAtt.term().equals("something") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 86, offsetAtt.startOffset() == 86);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 95, offsetAtt.endOffset() == 95);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "more italics",
- termAtt.term().equals("more italics") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.ITALICS, typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true);
- assertTrue(flagsAtt.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, flagsAtt.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 98, offsetAtt.startOffset() == 98);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 110, offsetAtt.endOffset() == 110);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "more",
- termAtt.term().equals("more") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.ITALICS, typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 98, offsetAtt.startOffset() == 98);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 102, offsetAtt.endOffset() == 102);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "italics",
- termAtt.term().equals("italics") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.ITALICS, typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true);
-
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 103, offsetAtt.startOffset() == 103);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 110, offsetAtt.endOffset() == 110);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "h i j",
- termAtt.term().equals("h i j") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
- assertTrue(flagsAtt.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, flagsAtt.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 124, offsetAtt.startOffset() == 124);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 133, offsetAtt.endOffset() == 133);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "h",
- termAtt.term().equals("h") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 124, offsetAtt.startOffset() == 124);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 125, offsetAtt.endOffset() == 125);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "i",
- termAtt.term().equals("i") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 128, offsetAtt.startOffset() == 128);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 129, offsetAtt.endOffset() == 129);
-
- assertTrue(tf.incrementToken());
- assertTrue(termAtt.term() + " is not equal to " + "j",
- termAtt.term().equals("j") == true);
- assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1);
- assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true);
- assertTrue(offsetAtt.startOffset() + " does not equal: " + 132, offsetAtt.startOffset() == 132);
- assertTrue(offsetAtt.endOffset() + " does not equal: " + 133, offsetAtt.endOffset() == 133);
-
+ tf.reset();
+ for (int i = 0; i < expectedFlags.length; i++) {
+ assertTrue(tf.incrementToken());
+ assertEquals("flags " + i, expectedFlags[i], flagsAtt.getFlags());
+ }
assertFalse(tf.incrementToken());
+ tf.close();
}
}
diff --git a/modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java b/modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java
index 6309b2e4163..c1e48fbbed8 100644
--- a/modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java
+++ b/modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java
@@ -23,7 +23,7 @@ import com.ibm.icu.text.RawCollationKey;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.IndexableBinaryStringTools;
import java.io.IOException;
@@ -70,7 +70,7 @@ import java.io.IOException;
public final class ICUCollationKeyFilter extends TokenFilter {
private Collator collator = null;
private RawCollationKey reusableKey = new RawCollationKey();
- private TermAttribute termAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
/**
*
@@ -80,23 +80,22 @@ public final class ICUCollationKeyFilter extends TokenFilter {
public ICUCollationKeyFilter(TokenStream input, Collator collator) {
super(input);
this.collator = collator;
- termAtt = addAttribute(TermAttribute.class);
}
@Override
public boolean incrementToken() throws IOException {
if (input.incrementToken()) {
- char[] termBuffer = termAtt.termBuffer();
- String termText = new String(termBuffer, 0, termAtt.termLength());
+ char[] termBuffer = termAtt.buffer();
+ String termText = new String(termBuffer, 0, termAtt.length());
collator.getRawCollationKey(termText, reusableKey);
int encodedLength = IndexableBinaryStringTools.getEncodedLength(
reusableKey.bytes, 0, reusableKey.size);
if (encodedLength > termBuffer.length) {
- termAtt.resizeTermBuffer(encodedLength);
+ termAtt.resizeBuffer(encodedLength);
}
- termAtt.setTermLength(encodedLength);
+ termAtt.setLength(encodedLength);
IndexableBinaryStringTools.encode(reusableKey.bytes, 0, reusableKey.size,
- termAtt.termBuffer(), 0, encodedLength);
+ termAtt.buffer(), 0, encodedLength);
return true;
} else {
return false;
diff --git a/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java b/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java
index 3d79a3f3ed3..bdb71e22122 100644
--- a/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java
+++ b/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java
@@ -21,8 +21,8 @@ import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeSource;
@@ -44,29 +44,20 @@ public final class SentenceTokenizer extends Tokenizer {
private int tokenStart = 0, tokenEnd = 0;
- private TermAttribute termAtt;
- private OffsetAttribute offsetAtt;
- private TypeAttribute typeAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
public SentenceTokenizer(Reader reader) {
super(reader);
- init();
}
public SentenceTokenizer(AttributeSource source, Reader reader) {
super(source, reader);
- init();
}
public SentenceTokenizer(AttributeFactory factory, Reader reader) {
super(factory, reader);
- init();
- }
-
- private void init() {
- termAtt = addAttribute(TermAttribute.class);
- offsetAtt = addAttribute(OffsetAttribute.class);
- typeAtt = addAttribute(TypeAttribute.class);
}
@Override
@@ -112,7 +103,7 @@ public final class SentenceTokenizer extends Tokenizer {
if (buffer.length() == 0)
return false;
else {
- termAtt.setTermBuffer(buffer.toString());
+ termAtt.setEmpty().append(buffer);
offsetAtt.setOffset(correctOffset(tokenStart), correctOffset(tokenEnd));
typeAtt.setType("sentence");
return true;
diff --git a/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java b/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java
index 6999b0a2e60..6f0ecea5dd3 100644
--- a/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java
+++ b/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java
@@ -24,8 +24,8 @@ import java.util.List;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.cn.smart.hhmm.SegToken;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
/**
@@ -40,9 +40,9 @@ public final class WordTokenFilter extends TokenFilter {
private List tokenBuffer;
- private TermAttribute termAtt;
- private OffsetAttribute offsetAtt;
- private TypeAttribute typeAtt;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+ private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+ private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
/**
* Construct a new WordTokenizer.
@@ -52,9 +52,6 @@ public final class WordTokenFilter extends TokenFilter {
public WordTokenFilter(TokenStream in) {
super(in);
this.wordSegmenter = new WordSegmenter();
- termAtt = addAttribute(TermAttribute.class);
- offsetAtt = addAttribute(OffsetAttribute.class);
- typeAtt = addAttribute(TypeAttribute.class);
}
@Override
@@ -63,7 +60,7 @@ public final class WordTokenFilter extends TokenFilter {
// there are no remaining tokens from the current sentence... are there more sentences?
if (input.incrementToken()) {
// a new sentence is available: process it.
- tokenBuffer = wordSegmenter.segmentSentence(termAtt.term(), offsetAtt.startOffset());
+ tokenBuffer = wordSegmenter.segmentSentence(termAtt.toString(), offsetAtt.startOffset());
tokenIter = tokenBuffer.iterator();
/*
* it should not be possible to have a sentence with 0 words, check just in case.
@@ -79,7 +76,7 @@ public final class WordTokenFilter extends TokenFilter {
clearAttributes();
// There are remaining tokens from the current sentence, return the next one.
SegToken nextWord = tokenIter.next();
- termAtt.setTermBuffer(nextWord.charArray, 0, nextWord.charArray.length);
+ termAtt.copyBuffer(nextWord.charArray, 0, nextWord.charArray.length);
offsetAtt.setOffset(nextWord.startOffset, nextWord.endOffset);
typeAtt.setType("word");
return true;
diff --git a/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java b/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java
index 0e563f389a1..5ad44b3dad4 100644
--- a/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java
+++ b/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java
@@ -150,7 +150,7 @@ public abstract class BufferedTokenStream extends TokenFilter {
return null;
} else {
Token token = new Token();
- token.setTermBuffer(termAtt.buffer(), 0, termAtt.length());
+ token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
token.setType(typeAtt.type());
token.setFlags(flagsAtt.getFlags());
@@ -163,7 +163,7 @@ public abstract class BufferedTokenStream extends TokenFilter {
/** old api emulation for back compat */
private boolean writeToken(Token token) throws IOException {
clearAttributes();
- termAtt.copyBuffer(token.termBuffer(), 0, token.termLength());
+ termAtt.copyBuffer(token.buffer(), 0, token.length());
offsetAtt.setOffset(token.startOffset(), token.endOffset());
typeAtt.setType(token.type());
flagsAtt.setFlags(token.getFlags());
diff --git a/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java b/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java
index 188d522cd80..f086fadad6f 100644
--- a/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java
+++ b/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java
@@ -163,12 +163,12 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
while (tokenStream.incrementToken()) {
Token token = new Token();
if (termAtt != null) {
- token.setTermBuffer(termAtt.toString());
+ token.setEmpty().append(termAtt);
}
if (bytesAtt != null) {
bytesAtt.toBytesRef(bytes);
// TODO: This is incorrect when numeric fields change in later lucene versions. It should use BytesRef directly!
- token.setTermBuffer(bytes.utf8ToString());
+ token.setEmpty().append(bytes.utf8ToString());
}
token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
token.setType(typeAtt.type());
@@ -208,10 +208,10 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase {
for (Token token : tokens) {
NamedList