diff --git a/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java b/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java index 47a58dc8401..22d0c2fb8fa 100755 --- a/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java +++ b/lucene/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java @@ -26,9 +26,10 @@ import java.util.List; import java.util.Locale; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.benchmark.BenchmarkTestCase; import org.apache.lucene.benchmark.byTask.feeds.DocMaker; import org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker; @@ -918,11 +919,11 @@ public class TestPerfTasksLogic extends BenchmarkTestCase { TokenStream ts2 = a2.tokenStream("bogus", new StringReader(text)); ts1.reset(); ts2.reset(); - TermAttribute termAtt1 = ts1.addAttribute(TermAttribute.class); - TermAttribute termAtt2 = ts2.addAttribute(TermAttribute.class); + CharTermAttribute termAtt1 = ts1.addAttribute(CharTermAttribute.class); + CharTermAttribute termAtt2 = ts2.addAttribute(CharTermAttribute.class); assertTrue(ts1.incrementToken()); assertTrue(ts2.incrementToken()); - assertEquals(termAtt1.term(), termAtt2.term()); + assertEquals(termAtt1.toString(), termAtt2.toString()); assertFalse(ts1.incrementToken()); assertFalse(ts2.incrementToken()); ts1.close(); @@ -994,21 +995,7 @@ public class TestPerfTasksLogic extends BenchmarkTestCase { private void assertEqualShingle (Analyzer analyzer, String text, String[] expected) throws Exception { - TokenStream stream = analyzer.tokenStream("bogus", new StringReader(text)); - stream.reset(); - TermAttribute termAtt = stream.addAttribute(TermAttribute.class); - int termNum = 0; - while (stream.incrementToken()) { - assertTrue("Extra output term(s), starting with '" - + new String(termAtt.termBuffer(), 0, termAtt.termLength()) + "'", - termNum < expected.length); - assertEquals("Mismatch in output term # " + termNum + " - ", - expected[termNum], - new String(termAtt.termBuffer(), 0, termAtt.termLength())); - ++termNum; - } - assertEquals("Too few output terms", expected.length, termNum); - stream.close(); + BaseTokenStreamTestCase.assertAnalyzesTo(analyzer, text, expected); } private String[] getShingleConfig(String params) { diff --git a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java index b5e4fac21bc..1a692d62c84 100644 --- a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java +++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/Highlighter.java @@ -23,9 +23,9 @@ import java.util.Iterator; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.util.PriorityQueue; /** @@ -191,7 +191,7 @@ public class Highlighter ArrayList docFrags = new ArrayList(); StringBuilder newText=new StringBuilder(); - TermAttribute termAtt = tokenStream.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = tokenStream.addAttribute(OffsetAttribute.class); tokenStream.addAttribute(PositionIncrementAttribute.class); tokenStream.reset(); @@ -225,7 +225,7 @@ public class Highlighter (offsetAtt.startOffset()>text.length()) ) { - throw new InvalidTokenOffsetsException("Token "+ termAtt.term() + throw new InvalidTokenOffsetsException("Token "+ termAtt.toString() +" exceeds length of provided text sized "+text.length()); } if((tokenGroup.numTokens>0)&&(tokenGroup.isDistinct())) diff --git a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java index 24dbb4644da..e0b76a4aebd 100644 --- a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java +++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryScorer.java @@ -25,8 +25,8 @@ import java.util.Set; import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.memory.MemoryIndex; import org.apache.lucene.search.Query; @@ -46,7 +46,7 @@ public class QueryScorer implements Scorer { private float maxTermWeight; private int position = -1; private String defaultField; - private TermAttribute termAtt; + private CharTermAttribute termAtt; private PositionIncrementAttribute posIncAtt; private boolean expandMultiTermQuery = true; private Query query; @@ -145,7 +145,7 @@ public class QueryScorer implements Scorer { */ public float getTokenScore() { position += posIncAtt.getPositionIncrement(); - String termText = termAtt.term(); + String termText = termAtt.toString(); WeightedSpanTerm weightedSpanTerm; @@ -175,7 +175,7 @@ public class QueryScorer implements Scorer { */ public TokenStream init(TokenStream tokenStream) throws IOException { position = -1; - termAtt = tokenStream.addAttribute(TermAttribute.class); + termAtt = tokenStream.addAttribute(CharTermAttribute.class); posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class); if(!skipInitExtractor) { if(fieldWeightedSpanTerms != null) { diff --git a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermScorer.java b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermScorer.java index e44d3072063..167bf3dd6f7 100644 --- a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermScorer.java +++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermScorer.java @@ -21,7 +21,7 @@ import java.util.HashMap; import java.util.HashSet; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Query; @@ -41,7 +41,7 @@ public class QueryTermScorer implements Scorer { float maxTermWeight = 0; private HashMap termsToFind; - private TermAttribute termAtt; + private CharTermAttribute termAtt; /** * @@ -95,7 +95,7 @@ public class QueryTermScorer implements Scorer { * @see org.apache.lucene.search.highlight.Scorer#init(org.apache.lucene.analysis.TokenStream) */ public TokenStream init(TokenStream tokenStream) { - termAtt = tokenStream.addAttribute(TermAttribute.class); + termAtt = tokenStream.addAttribute(CharTermAttribute.class); return null; } @@ -118,7 +118,7 @@ public class QueryTermScorer implements Scorer { * @see org.apache.lucene.search.highlight.Scorer#getTokenScore() */ public float getTokenScore() { - String termText = termAtt.term(); + String termText = termAtt.toString(); WeightedTerm queryTerm = termsToFind.get(termText); if (queryTerm == null) { diff --git a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SimpleSpanFragmenter.java b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SimpleSpanFragmenter.java index e7cb034f83e..c468867850c 100644 --- a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SimpleSpanFragmenter.java +++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/SimpleSpanFragmenter.java @@ -20,9 +20,9 @@ package org.apache.lucene.search.highlight; import java.util.List; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.search.spans.Spans; @@ -38,7 +38,7 @@ public class SimpleSpanFragmenter implements Fragmenter { private QueryScorer queryScorer; private int waitForPos = -1; private int textSize; - private TermAttribute termAtt; + private CharTermAttribute termAtt; private PositionIncrementAttribute posIncAtt; private OffsetAttribute offsetAtt; @@ -70,7 +70,7 @@ public class SimpleSpanFragmenter implements Fragmenter { return false; } - WeightedSpanTerm wSpanTerm = queryScorer.getWeightedSpanTerm(termAtt.term()); + WeightedSpanTerm wSpanTerm = queryScorer.getWeightedSpanTerm(termAtt.toString()); if (wSpanTerm != null) { List positionSpans = wSpanTerm.getPositionSpans(); @@ -101,7 +101,7 @@ public class SimpleSpanFragmenter implements Fragmenter { position = -1; currentNumFrags = 1; textSize = originalText.length(); - termAtt = tokenStream.addAttribute(TermAttribute.class); + termAtt = tokenStream.addAttribute(CharTermAttribute.class); posIncAtt = tokenStream.addAttribute(PositionIncrementAttribute.class); offsetAtt = tokenStream.addAttribute(OffsetAttribute.class); } diff --git a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenGroup.java b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenGroup.java index 57355e51427..e0f88f2daea 100644 --- a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenGroup.java +++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenGroup.java @@ -19,8 +19,8 @@ package org.apache.lucene.search.highlight; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; /** * One, or several overlapping tokens, along with the score(s) and the scope of @@ -38,11 +38,11 @@ public class TokenGroup { int matchStartOffset, matchEndOffset; private OffsetAttribute offsetAtt; - private TermAttribute termAtt; + private CharTermAttribute termAtt; public TokenGroup(TokenStream tokenStream) { offsetAtt = tokenStream.addAttribute(OffsetAttribute.class); - termAtt = tokenStream.addAttribute(TermAttribute.class); + termAtt = tokenStream.addAttribute(CharTermAttribute.class); } void addToken(float score) { @@ -68,7 +68,7 @@ public class TokenGroup { } } Token token = new Token(termStartOffset, termEndOffset); - token.setTermBuffer(termAtt.term()); + token.setEmpty().append(termAtt); tokens[numTokens] = token; scores[numTokens] = score; numTokens++; diff --git a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java index 5129f238ac7..e5ecc8bd92a 100644 --- a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java +++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java @@ -29,8 +29,8 @@ import java.util.Comparator; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.TermFreqVector; @@ -153,13 +153,13 @@ public class TokenSources { int currentToken = 0; - TermAttribute termAtt; + CharTermAttribute termAtt; OffsetAttribute offsetAtt; StoredTokenStream(Token tokens[]) { this.tokens = tokens; - termAtt = addAttribute(TermAttribute.class); + termAtt = addAttribute(CharTermAttribute.class); offsetAtt = addAttribute(OffsetAttribute.class); } @@ -170,7 +170,7 @@ public class TokenSources { } Token token = tokens[currentToken++]; clearAttributes(); - termAtt.setTermBuffer(token.term()); + termAtt.setEmpty().append(token); offsetAtt.setOffset(token.startOffset(), token.endOffset()); return true; } @@ -204,9 +204,8 @@ public class TokenSources { unsortedTokens = new ArrayList(); } for (int tp = 0; tp < offsets.length; tp++) { - Token token = new Token(offsets[tp].getStartOffset(), offsets[tp] + Token token = new Token(terms[t], offsets[tp].getStartOffset(), offsets[tp] .getEndOffset()); - token.setTermBuffer(terms[t]); unsortedTokens.add(token); } } else { diff --git a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java index 8cb2f141b79..810441677c5 100644 --- a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java +++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenStreamFromTermPositionVector.java @@ -25,9 +25,9 @@ import java.util.List; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.index.TermPositionVector; import org.apache.lucene.index.TermVectorOffsetInfo; @@ -37,7 +37,7 @@ public final class TokenStreamFromTermPositionVector extends TokenStream { private Iterator tokensAtCurrentPosition; - private TermAttribute termAttribute; + private CharTermAttribute termAttribute; private PositionIncrementAttribute positionIncrementAttribute; @@ -51,7 +51,7 @@ public final class TokenStreamFromTermPositionVector extends TokenStream { */ public TokenStreamFromTermPositionVector( final TermPositionVector termPositionVector) { - termAttribute = addAttribute(TermAttribute.class); + termAttribute = addAttribute(CharTermAttribute.class); positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class); offsetAttribute = addAttribute(OffsetAttribute.class); final String[] terms = termPositionVector.getTerms(); @@ -65,7 +65,7 @@ public final class TokenStreamFromTermPositionVector extends TokenStream { offsets[j].getStartOffset(), offsets[j].getEndOffset()); } else { token = new Token(); - token.setTermBuffer(terms[i]); + token.setEmpty().append(terms[i]); } // Yes - this is the position, not the increment! This is for // sorting. This value @@ -100,7 +100,7 @@ public final class TokenStreamFromTermPositionVector extends TokenStream { if (this.tokensAtCurrentPosition.hasNext()) { final Token next = this.tokensAtCurrentPosition.next(); clearAttributes(); - termAttribute.setTermBuffer(next.term()); + termAttribute.setEmpty().append(next); positionIncrementAttribute.setPositionIncrement(next .getPositionIncrement()); offsetAttribute.setOffset(next.startOffset(), next.endOffset()); diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java index 9076a69d2e7..40166fcd531 100644 --- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java +++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java @@ -25,7 +25,7 @@ import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; @@ -296,16 +296,11 @@ public class HighlighterPhraseTest extends LuceneTestCase { private int i = -1; - private TermAttribute termAttribute; - - private OffsetAttribute offsetAttribute; - - private PositionIncrementAttribute positionIncrementAttribute; + private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class); + private final PositionIncrementAttribute positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class); public TokenStreamSparse() { - termAttribute = addAttribute(TermAttribute.class); - offsetAttribute = addAttribute(OffsetAttribute.class); - positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class); reset(); } @@ -316,8 +311,7 @@ public class HighlighterPhraseTest extends LuceneTestCase { return false; } clearAttributes(); - termAttribute.setTermBuffer(this.tokens[i].term(), 0, this.tokens[i] - .term().length()); + termAttribute.setEmpty().append(this.tokens[i]); offsetAttribute.setOffset(this.tokens[i].startOffset(), this.tokens[i] .endOffset()); positionIncrementAttribute.setPositionIncrement(this.tokens[i] @@ -342,16 +336,11 @@ public class HighlighterPhraseTest extends LuceneTestCase { private int i = -1; - private TermAttribute termAttribute; - - private OffsetAttribute offsetAttribute; - - private PositionIncrementAttribute positionIncrementAttribute; + private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class); + private final PositionIncrementAttribute positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class); public TokenStreamConcurrent() { - termAttribute = addAttribute(TermAttribute.class); - offsetAttribute = addAttribute(OffsetAttribute.class); - positionIncrementAttribute = addAttribute(PositionIncrementAttribute.class); reset(); } @@ -362,8 +351,7 @@ public class HighlighterPhraseTest extends LuceneTestCase { return false; } clearAttributes(); - termAttribute.setTermBuffer(this.tokens[i].term(), 0, this.tokens[i] - .term().length()); + termAttribute.setEmpty().append(this.tokens[i]); offsetAttribute.setOffset(this.tokens[i].startOffset(), this.tokens[i] .endOffset()); positionIncrementAttribute.setPositionIncrement(this.tokens[i] diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java index ddb065387ce..1b786a3a41c 100644 --- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java +++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java @@ -41,7 +41,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericField; @@ -1424,13 +1424,10 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte return new TokenStream() { Iterator iter; List lst; - private TermAttribute termAtt; - private PositionIncrementAttribute posIncrAtt; - private OffsetAttribute offsetAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); { - termAtt = addAttribute(TermAttribute.class); - posIncrAtt = addAttribute(PositionIncrementAttribute.class); - offsetAtt = addAttribute(OffsetAttribute.class); lst = new ArrayList(); Token t; t = createToken("hi", 0, 2); @@ -1456,7 +1453,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte if(iter.hasNext()) { Token token = iter.next(); clearAttributes(); - termAtt.setTermBuffer(token.term()); + termAtt.setEmpty().append(token); posIncrAtt.setPositionIncrement(token.getPositionIncrement()); offsetAtt.setOffset(token.startOffset(), token.endOffset()); return true; @@ -1473,13 +1470,10 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte return new TokenStream() { Iterator iter; List lst; - private TermAttribute termAtt; - private PositionIncrementAttribute posIncrAtt; - private OffsetAttribute offsetAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); { - termAtt = addAttribute(TermAttribute.class); - posIncrAtt = addAttribute(PositionIncrementAttribute.class); - offsetAtt = addAttribute(OffsetAttribute.class); lst = new ArrayList(); Token t; t = createToken("hispeed", 0, 8); @@ -1505,7 +1499,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte if(iter.hasNext()) { Token token = iter.next(); clearAttributes(); - termAtt.setTermBuffer(token.term()); + termAtt.setEmpty().append(token); posIncrAtt.setPositionIncrement(token.getPositionIncrement()); offsetAtt.setOffset(token.startOffset(), token.endOffset()); return true; @@ -1762,9 +1756,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte private static Token createToken(String term, int start, int offset) { - Token token = new Token(start, offset); - token.setTermBuffer(term); - return token; + return new Token(term, start, offset); } } @@ -1795,7 +1787,7 @@ final class SynonymAnalyzer extends Analyzer { @Override public TokenStream tokenStream(String arg0, Reader arg1) { Tokenizer stream = new MockTokenizer(arg1, MockTokenizer.SIMPLE, true); - stream.addAttribute(TermAttribute.class); + stream.addAttribute(CharTermAttribute.class); stream.addAttribute(PositionIncrementAttribute.class); stream.addAttribute(OffsetAttribute.class); return new SynonymTokenizer(stream, synonyms); @@ -1811,21 +1803,21 @@ final class SynonymTokenizer extends TokenStream { private Token currentRealToken = null; private Map synonyms; StringTokenizer st = null; - private TermAttribute realTermAtt; + private CharTermAttribute realTermAtt; private PositionIncrementAttribute realPosIncrAtt; private OffsetAttribute realOffsetAtt; - private TermAttribute termAtt; + private CharTermAttribute termAtt; private PositionIncrementAttribute posIncrAtt; private OffsetAttribute offsetAtt; public SynonymTokenizer(TokenStream realStream, Map synonyms) { this.realStream = realStream; this.synonyms = synonyms; - realTermAtt = realStream.addAttribute(TermAttribute.class); + realTermAtt = realStream.addAttribute(CharTermAttribute.class); realPosIncrAtt = realStream.addAttribute(PositionIncrementAttribute.class); realOffsetAtt = realStream.addAttribute(OffsetAttribute.class); - termAtt = addAttribute(TermAttribute.class); + termAtt = addAttribute(CharTermAttribute.class); posIncrAtt = addAttribute(PositionIncrementAttribute.class); offsetAtt = addAttribute(OffsetAttribute.class); } @@ -1840,25 +1832,25 @@ final class SynonymTokenizer extends TokenStream { } //Token nextRealToken = new Token(, offsetAtt.startOffset(), offsetAtt.endOffset()); clearAttributes(); - termAtt.setTermBuffer(realTermAtt.term()); + termAtt.copyBuffer(realTermAtt.buffer(), 0, realTermAtt.length()); offsetAtt.setOffset(realOffsetAtt.startOffset(), realOffsetAtt.endOffset()); posIncrAtt.setPositionIncrement(realPosIncrAtt.getPositionIncrement()); - String expansions = synonyms.get(realTermAtt.term()); + String expansions = synonyms.get(realTermAtt.toString()); if (expansions == null) { return true; } st = new StringTokenizer(expansions, ","); if (st.hasMoreTokens()) { currentRealToken = new Token(realOffsetAtt.startOffset(), realOffsetAtt.endOffset()); - currentRealToken.setTermBuffer(realTermAtt.term()); + currentRealToken.copyBuffer(realTermAtt.buffer(), 0, realTermAtt.length()); } return true; } else { String tok = st.nextToken(); clearAttributes(); - termAtt.setTermBuffer(tok); + termAtt.setEmpty().append(tok); offsetAtt.setOffset(currentRealToken.startOffset(), currentRealToken.endOffset()); posIncrAtt.setPositionIncrement(0); if (!st.hasMoreTokens()) { diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java index f32c7e08367..b0926087144 100644 --- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java +++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java @@ -26,8 +26,8 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.Field.Index; @@ -221,14 +221,14 @@ public abstract class AbstractTestCase extends LuceneTestCase { ch = 0; } - TermAttribute termAtt = addAttribute(TermAttribute.class); + CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); @Override public boolean incrementToken() throws IOException { if( !getNextPartialSnippet() ) return false; clearAttributes(); - termAtt.setTermBuffer(snippet, startTerm, lenTerm); + termAtt.setEmpty().append(snippet, startTerm, startTerm + lenTerm); offsetAtt.setOffset(correctOffset(startOffset), correctOffset(startOffset + lenTerm)); return true; } diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/IndexTimeSynonymTest.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/IndexTimeSynonymTest.java index 53a1602f625..f31a5bd0993 100644 --- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/IndexTimeSynonymTest.java +++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/IndexTimeSynonymTest.java @@ -25,7 +25,7 @@ import java.util.Set; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.util.AttributeImpl; @@ -301,7 +301,7 @@ public class IndexTimeSynonymTest extends AbstractTestCase { @Override public TokenStream tokenStream(String fieldName, Reader reader) { TokenStream ts = new TokenStream(Token.TOKEN_ATTRIBUTE_FACTORY) { - final AttributeImpl reusableToken = (AttributeImpl) addAttribute(TermAttribute.class); + final AttributeImpl reusableToken = (AttributeImpl) addAttribute(CharTermAttribute.class); int p = 0; @Override diff --git a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java index 5e336ebc296..69c05bf15d6 100644 --- a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java +++ b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java @@ -561,7 +561,7 @@ public class InstantiatedIndexWriter implements Closeable { // untokenized String fieldVal = field.stringValue(); Token token = new Token(0, fieldVal.length(), "untokenized"); - token.setTermBuffer(fieldVal); + token.setEmpty().append(fieldVal); tokens.add(token); fieldSetting.fieldLength++; } @@ -596,10 +596,10 @@ public class InstantiatedIndexWriter implements Closeable { for (Token token : eField_Tokens.getValue()) { - TermDocumentInformationFactory termDocumentInformationFactory = termDocumentInformationFactoryByTermText.get(token.term()); + TermDocumentInformationFactory termDocumentInformationFactory = termDocumentInformationFactoryByTermText.get(token.toString()); if (termDocumentInformationFactory == null) { termDocumentInformationFactory = new TermDocumentInformationFactory(); - termDocumentInformationFactoryByTermText.put(token.term(), termDocumentInformationFactory); + termDocumentInformationFactoryByTermText.put(token.toString(), termDocumentInformationFactory); } //termDocumentInformationFactory.termFrequency++; diff --git a/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java b/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java index dbfdcbb7de0..ebb66c8d5a4 100644 --- a/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java +++ b/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java @@ -25,7 +25,7 @@ import java.util.List; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; @@ -278,7 +278,7 @@ public class TestIndicesEquals extends LuceneTestCase { tokens.add(t); tokens.add(createToken("fin", 7, 9)); TokenStream ts = new TokenStream(Token.TOKEN_ATTRIBUTE_FACTORY) { - final AttributeImpl reusableToken = (AttributeImpl) addAttribute(TermAttribute.class); + final AttributeImpl reusableToken = (AttributeImpl) addAttribute(CharTermAttribute.class); Iterator it = tokens.iterator(); @Override @@ -601,16 +601,12 @@ public class TestIndicesEquals extends LuceneTestCase { private static Token createToken(String term, int start, int offset) { - Token token = new Token(start, offset); - token.setTermBuffer(term); - return token; + return new Token(term, start, offset); } private static Token createToken(String term, int start, int offset, String type) { - Token token = new Token(start, offset, type); - token.setTermBuffer(term); - return token; + return new Token(term, start, offset, type); } diff --git a/lucene/contrib/lucli/src/java/lucli/LuceneMethods.java b/lucene/contrib/lucli/src/java/lucli/LuceneMethods.java index 6bb65ca3931..5671f476848 100644 --- a/lucene/contrib/lucli/src/java/lucli/LuceneMethods.java +++ b/lucene/contrib/lucli/src/java/lucli/LuceneMethods.java @@ -36,8 +36,8 @@ import jline.ConsoleReader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.IndexReader; @@ -303,14 +303,14 @@ class LuceneMethods { int position = 0; // Tokenize field and add to postingTable TokenStream stream = analyzer.tokenStream(fieldName, reader); - TermAttribute termAtt = stream.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class); PositionIncrementAttribute posIncrAtt = stream.addAttribute(PositionIncrementAttribute.class); try { while (stream.incrementToken()) { position += (posIncrAtt.getPositionIncrement() - 1); position++; - String name = termAtt.term(); + String name = termAtt.toString(); Integer Count = tokenMap.get(name); if (Count == null) { // not in there yet tokenMap.put(name, Integer.valueOf(1)); //first one diff --git a/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java index 35c5a58bfb6..311fb0580d0 100644 --- a/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java +++ b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java @@ -30,9 +30,10 @@ import java.util.Map; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldSelector; import org.apache.lucene.index.IndexReader; @@ -51,6 +52,7 @@ import org.apache.lucene.search.Searcher; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Similarity; import org.apache.lucene.store.RAMDirectory; // for javadocs +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.Constants; // for javadocs /** @@ -276,8 +278,8 @@ public class MemoryIndex implements Serializable { return new TokenStream() { private Iterator iter = keywords.iterator(); private int start = 0; - private TermAttribute termAtt = addAttribute(TermAttribute.class); - private OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); @Override public boolean incrementToken() { @@ -289,8 +291,8 @@ public class MemoryIndex implements Serializable { String term = obj.toString(); clearAttributes(); - termAtt.setTermBuffer(term); - offsetAtt.setOffset(start, start+termAtt.termLength()); + termAtt.setEmpty().append(term); + offsetAtt.setOffset(start, start+termAtt.length()); start += term.length() + 1; // separate words by 1 (blank) character return true; } @@ -340,13 +342,15 @@ public class MemoryIndex implements Serializable { int numOverlapTokens = 0; int pos = -1; - TermAttribute termAtt = stream.addAttribute(TermAttribute.class); + TermToBytesRefAttribute termAtt = stream.addAttribute(TermToBytesRefAttribute.class); PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class); OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class); - + BytesRef ref = new BytesRef(10); stream.reset(); while (stream.incrementToken()) { - String term = termAtt.term(); + termAtt.toBytesRef(ref); + // TODO: support non-UTF8 strings (like numerics) here + String term = ref.utf8ToString(); if (term.length() == 0) continue; // nothing to do // if (DEBUG) System.err.println("token='" + term + "'"); numTokens++; diff --git a/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java b/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java index 84c76bd7dd0..cfef2072376 100644 --- a/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java +++ b/lucene/contrib/queries/src/java/org/apache/lucene/search/FuzzyLikeThisQuery.java @@ -26,7 +26,7 @@ import java.util.Iterator; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.util.BytesRef; @@ -185,14 +185,14 @@ public class FuzzyLikeThisQuery extends Query { if(f.queryString==null) return; TokenStream ts=analyzer.tokenStream(f.fieldName,new StringReader(f.queryString)); - TermAttribute termAtt = ts.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); int corpusNumDocs=reader.numDocs(); Term internSavingTemplateTerm =new Term(f.fieldName); //optimization to avoid constructing new Term() objects HashSet processedTerms=new HashSet(); while (ts.incrementToken()) { - String term = termAtt.term(); + String term = termAtt.toString(); if(!processedTerms.contains(term)) { processedTerms.add(term); diff --git a/lucene/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java b/lucene/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java index f7970f8d8ff..d54e237dd4b 100644 --- a/lucene/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java +++ b/lucene/contrib/queries/src/java/org/apache/lucene/search/similar/MoreLikeThis.java @@ -32,7 +32,7 @@ import java.util.Set; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; @@ -884,10 +884,10 @@ public final class MoreLikeThis { TokenStream ts = analyzer.tokenStream(fieldName, r); int tokenCount=0; // for every token - TermAttribute termAtt = ts.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); while (ts.incrementToken()) { - String word = termAtt.term(); + String word = termAtt.toString(); tokenCount++; if(tokenCount>maxNumTokensParsed) { diff --git a/lucene/contrib/queries/src/java/org/apache/lucene/search/similar/SimilarityQueries.java b/lucene/contrib/queries/src/java/org/apache/lucene/search/similar/SimilarityQueries.java index 6a780ad6256..5fd953bfc83 100644 --- a/lucene/contrib/queries/src/java/org/apache/lucene/search/similar/SimilarityQueries.java +++ b/lucene/contrib/queries/src/java/org/apache/lucene/search/similar/SimilarityQueries.java @@ -22,7 +22,7 @@ import java.util.Set; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; @@ -86,12 +86,12 @@ public final class SimilarityQueries throws IOException { TokenStream ts = a.tokenStream( field, new StringReader( body)); - TermAttribute termAtt = ts.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); BooleanQuery tmp = new BooleanQuery(); Set already = new HashSet(); // ignore dups while (ts.incrementToken()) { - String word = termAtt.term(); + String word = termAtt.toString(); // ignore opt stop words if ( stop != null && stop.contains( word)) continue; diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/analyzing/AnalyzingQueryParser.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/analyzing/AnalyzingQueryParser.java index 1dac672ec36..7d9f2f3152d 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/analyzing/AnalyzingQueryParser.java +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/analyzing/AnalyzingQueryParser.java @@ -24,7 +24,7 @@ import java.util.List; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.search.Query; import org.apache.lucene.util.Version; @@ -107,7 +107,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar // get Analyzer from superclass and tokenize the term TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr)); - TermAttribute termAtt = source.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class); int countTokens = 0; while (true) { @@ -116,7 +116,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar } catch (IOException e) { break; } - String term = termAtt.term(); + String term = termAtt.toString(); if (!"".equals(term)) { try { tlist.set(countTokens++, term); @@ -190,7 +190,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar // get Analyzer from superclass and tokenize the term TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr)); List tlist = new ArrayList(); - TermAttribute termAtt = source.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class); while (true) { try { @@ -198,7 +198,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar } catch (IOException e) { break; } - tlist.add(termAtt.term()); + tlist.add(termAtt.toString()); } try { @@ -237,13 +237,13 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar throws ParseException { // get Analyzer from superclass and tokenize the term TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr)); - TermAttribute termAtt = source.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class); String nextToken = null; boolean multipleTokens = false; try { if (source.incrementToken()) { - nextToken = termAtt.term(); + nextToken = termAtt.toString(); } multipleTokens = source.incrementToken(); } catch (IOException e) { @@ -273,13 +273,13 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar throws ParseException { // get Analyzer from superclass and tokenize the terms TokenStream source = getAnalyzer().tokenStream(field, new StringReader(part1)); - TermAttribute termAtt = source.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class); boolean multipleTokens = false; // part1 try { if (source.incrementToken()) { - part1 = termAtt.term(); + part1 = termAtt.toString(); } multipleTokens = source.incrementToken(); } catch (IOException e) { @@ -297,11 +297,11 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar // part2 source = getAnalyzer().tokenStream(field, new StringReader(part2)); - termAtt = source.addAttribute(TermAttribute.class); + termAtt = source.addAttribute(CharTermAttribute.class); try { if (source.incrementToken()) { - part2 = termAtt.term(); + part2 = termAtt.toString(); } multipleTokens = source.incrementToken(); } catch (IOException e) { diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java index b76ddf0d3c5..3ff9dfb3ae5 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java @@ -307,7 +307,7 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants { List list = new ArrayList(); int positionCount = 0; boolean severalTokensAtSamePosition = false; - TermAttribute termAtt = source.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class); PositionIncrementAttribute posincrAtt = source.addAttribute(PositionIncrementAttribute.class); try { @@ -328,7 +328,7 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants { return null; else if (list.size() == 1) { source.restoreState(list.get(0)); - return new TermQuery(new Term(field, termAtt.term())); + return new TermQuery(new Term(field, termAtt.toString())); } else { if (severalTokensAtSamePosition) { if (positionCount == 1) { @@ -337,7 +337,7 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants { for (int i = 0; i < list.size(); i++) { source.restoreState(list.get(i)); TermQuery currentQuery = new TermQuery( - new Term(field, termAtt.term())); + new Term(field, termAtt.toString())); q.add(currentQuery, BooleanClause.Occur.SHOULD); } return q; @@ -352,7 +352,7 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants { mpq.add(multiTerms.toArray(new Term[0])); multiTerms.clear(); } - multiTerms.add(new Term(field, termAtt.term())); + multiTerms.add(new Term(field, termAtt.toString())); } mpq.add(multiTerms.toArray(new Term[0])); return mpq; @@ -363,7 +363,7 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants { q.setSlop(phraseSlop); for (int i = 0; i < list.size(); i++) { source.restoreState(list.get(i)); - q.add(new Term(field, termAtt.term())); + q.add(new Term(field, termAtt.toString())); } return q; } diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj index 9cd21242042..c8f740b4ea0 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj @@ -331,7 +331,7 @@ public class PrecedenceQueryParser { List list = new ArrayList(); int positionCount = 0; boolean severalTokensAtSamePosition = false; - TermAttribute termAtt = source.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class); PositionIncrementAttribute posincrAtt = source.addAttribute(PositionIncrementAttribute.class); try { @@ -352,7 +352,7 @@ public class PrecedenceQueryParser { return null; else if (list.size() == 1) { source.restoreState(list.get(0)); - return new TermQuery(new Term(field, termAtt.term())); + return new TermQuery(new Term(field, termAtt.toString())); } else { if (severalTokensAtSamePosition) { if (positionCount == 1) { @@ -361,7 +361,7 @@ public class PrecedenceQueryParser { for (int i = 0; i < list.size(); i++) { source.restoreState(list.get(i)); TermQuery currentQuery = new TermQuery( - new Term(field, termAtt.term())); + new Term(field, termAtt.toString())); q.add(currentQuery, BooleanClause.Occur.SHOULD); } return q; @@ -376,7 +376,7 @@ public class PrecedenceQueryParser { mpq.add(multiTerms.toArray(new Term[0])); multiTerms.clear(); } - multiTerms.add(new Term(field, termAtt.term())); + multiTerms.add(new Term(field, termAtt.toString())); } mpq.add(multiTerms.toArray(new Term[0])); return mpq; @@ -387,7 +387,7 @@ public class PrecedenceQueryParser { q.setSlop(phraseSlop); for (int i = 0; i < list.size(); i++) { source.restoreState(list.get(i)); - q.add(new Term(field, termAtt.term())); + q.add(new Term(field, termAtt.toString())); } return q; } diff --git a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/AnalyzerQueryNodeProcessor.java b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/AnalyzerQueryNodeProcessor.java index 7be5c9afafd..818b3f98c14 100644 --- a/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/AnalyzerQueryNodeProcessor.java +++ b/lucene/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/processors/AnalyzerQueryNodeProcessor.java @@ -26,8 +26,8 @@ import java.util.List; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.queryParser.core.QueryNodeException; import org.apache.lucene.queryParser.core.config.QueryConfigHandler; import org.apache.lucene.queryParser.core.nodes.FieldQueryNode; @@ -162,11 +162,11 @@ public class AnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl { // ignore } - if (!buffer.hasAttribute(TermAttribute.class)) { + if (!buffer.hasAttribute(CharTermAttribute.class)) { return new NoTokenFoundQueryNode(); } - TermAttribute termAtt = buffer.getAttribute(TermAttribute.class); + CharTermAttribute termAtt = buffer.getAttribute(CharTermAttribute.class); if (numTokens == 0) { return new NoTokenFoundQueryNode(); @@ -177,7 +177,7 @@ public class AnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl { boolean hasNext; hasNext = buffer.incrementToken(); assert hasNext == true; - term = termAtt.term(); + term = termAtt.toString(); } catch (IOException e) { // safe to ignore, because we know the number of tokens @@ -197,7 +197,7 @@ public class AnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl { try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; - term = termAtt.term(); + term = termAtt.toString(); } catch (IOException e) { // safe to ignore, because we know the number of tokens @@ -224,7 +224,7 @@ public class AnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl { try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; - term = termAtt.term(); + term = termAtt.toString(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); } @@ -290,7 +290,7 @@ public class AnalyzerQueryNodeProcessor extends QueryNodeProcessorImpl { try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; - term = termAtt.term(); + term = termAtt.toString(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); diff --git a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java index b0907db3b4e..5f26ed078f0 100644 --- a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java +++ b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java @@ -23,8 +23,8 @@ import org.apache.lucene.analysis.MockTokenFilter; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.document.DateTools; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.FuzzyQuery; @@ -68,7 +68,7 @@ public class TestPrecedenceQueryParser extends LocalizedTestCase { boolean inPhrase = false; int savedStart = 0, savedEnd = 0; - TermAttribute termAtt = addAttribute(TermAttribute.class); + CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); @Override @@ -76,19 +76,19 @@ public class TestPrecedenceQueryParser extends LocalizedTestCase { clearAttributes(); if (inPhrase) { inPhrase = false; - termAtt.setTermBuffer("phrase2"); + termAtt.setEmpty().append("phrase2"); offsetAtt.setOffset(savedStart, savedEnd); return true; } else while(input.incrementToken()) - if (termAtt.term().equals("phrase")) { + if (termAtt.toString().equals("phrase")) { inPhrase = true; savedStart = offsetAtt.startOffset(); savedEnd = offsetAtt.endOffset(); - termAtt.setTermBuffer("phrase1"); + termAtt.setEmpty().append("phrase1"); offsetAtt.setOffset(savedStart, savedEnd); return true; - } else if (!termAtt.term().equals("stop")) + } else if (!termAtt.toString().equals("stop")) return true; return false; } diff --git a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerQPHelper.java b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerQPHelper.java index e98cc6f80a3..ea5907a366c 100644 --- a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerQPHelper.java +++ b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerQPHelper.java @@ -23,9 +23,9 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.queryParser.core.QueryNodeException; import org.apache.lucene.queryParser.standard.config.DefaultOperatorAttribute.Operator; @@ -163,24 +163,19 @@ public class TestMultiAnalyzerQPHelper extends LuceneTestCase { private int prevStartOffset; private int prevEndOffset; - TermAttribute termAtt; - PositionIncrementAttribute posIncrAtt; - OffsetAttribute offsetAtt; - TypeAttribute typeAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); public TestFilter(TokenStream in) { super(in); - termAtt = addAttribute(TermAttribute.class); - posIncrAtt = addAttribute(PositionIncrementAttribute.class); - offsetAtt = addAttribute(OffsetAttribute.class); - typeAtt = addAttribute(TypeAttribute.class); - } @Override public final boolean incrementToken() throws java.io.IOException { if (multiToken > 0) { - termAtt.setTermBuffer("multi" + (multiToken + 1)); + termAtt.setEmpty().append("multi" + (multiToken + 1)); offsetAtt.setOffset(prevStartOffset, prevEndOffset); typeAtt.setType(prevType); posIncrAtt.setPositionIncrement(0); @@ -194,7 +189,7 @@ public class TestMultiAnalyzerQPHelper extends LuceneTestCase { prevType = typeAtt.type(); prevStartOffset = offsetAtt.startOffset(); prevEndOffset = offsetAtt.endOffset(); - String text = termAtt.term(); + String text = termAtt.toString(); if (text.equals("triplemulti")) { multiToken = 2; return true; @@ -228,21 +223,19 @@ public class TestMultiAnalyzerQPHelper extends LuceneTestCase { private class TestPosIncrementFilter extends TokenFilter { - TermAttribute termAtt; - PositionIncrementAttribute posIncrAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); public TestPosIncrementFilter(TokenStream in) { super(in); - termAtt = addAttribute(TermAttribute.class); - posIncrAtt = addAttribute(PositionIncrementAttribute.class); } @Override public final boolean incrementToken() throws java.io.IOException { while (input.incrementToken()) { - if (termAtt.term().equals("the")) { + if (termAtt.toString().equals("the")) { // stopword, do nothing - } else if (termAtt.term().equals("quick")) { + } else if (termAtt.toString().equals("quick")) { posIncrAtt.setPositionIncrement(2); return true; } else { diff --git a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerWrapper.java b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerWrapper.java index 8e56944a2ec..4f3b14a4e8b 100644 --- a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerWrapper.java +++ b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestMultiAnalyzerWrapper.java @@ -23,9 +23,9 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.util.LuceneTestCase; @@ -157,24 +157,19 @@ public class TestMultiAnalyzerWrapper extends LuceneTestCase { private int prevStartOffset; private int prevEndOffset; - TermAttribute termAtt; - PositionIncrementAttribute posIncrAtt; - OffsetAttribute offsetAtt; - TypeAttribute typeAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); public TestFilter(TokenStream in) { super(in); - termAtt = addAttribute(TermAttribute.class); - posIncrAtt = addAttribute(PositionIncrementAttribute.class); - offsetAtt = addAttribute(OffsetAttribute.class); - typeAtt = addAttribute(TypeAttribute.class); - } @Override public final boolean incrementToken() throws java.io.IOException { if (multiToken > 0) { - termAtt.setTermBuffer("multi" + (multiToken + 1)); + termAtt.setEmpty().append("multi" + (multiToken + 1)); offsetAtt.setOffset(prevStartOffset, prevEndOffset); typeAtt.setType(prevType); posIncrAtt.setPositionIncrement(0); @@ -188,7 +183,7 @@ public class TestMultiAnalyzerWrapper extends LuceneTestCase { prevType = typeAtt.type(); prevStartOffset = offsetAtt.startOffset(); prevEndOffset = offsetAtt.endOffset(); - String text = termAtt.term(); + String text = termAtt.toString(); if (text.equals("triplemulti")) { multiToken = 2; return true; @@ -222,21 +217,19 @@ public class TestMultiAnalyzerWrapper extends LuceneTestCase { private class TestPosIncrementFilter extends TokenFilter { - TermAttribute termAtt; - PositionIncrementAttribute posIncrAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); public TestPosIncrementFilter(TokenStream in) { super(in); - termAtt = addAttribute(TermAttribute.class); - posIncrAtt = addAttribute(PositionIncrementAttribute.class); } @Override public final boolean incrementToken() throws java.io.IOException { while (input.incrementToken()) { - if (termAtt.term().equals("the")) { + if (termAtt.toString().equals("the")) { // stopword, do nothing - } else if (termAtt.term().equals("quick")) { + } else if (termAtt.toString().equals("quick")) { posIncrAtt.setPositionIncrement(2); return true; } else { diff --git a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java index addbca26476..4d3e3840355 100644 --- a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java +++ b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java @@ -37,8 +37,8 @@ import org.apache.lucene.analysis.MockTokenFilter; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.document.DateField; import org.apache.lucene.document.DateTools; @@ -96,8 +96,8 @@ public class TestQPHelper extends LocalizedTestCase { public static Analyzer qpAnalyzer = new QPTestAnalyzer(); public static final class QPTestFilter extends TokenFilter { - TermAttribute termAtt; - OffsetAttribute offsetAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); /** * Filter which discards the token 'stop' and which expands the token @@ -105,8 +105,6 @@ public class TestQPHelper extends LocalizedTestCase { */ public QPTestFilter(TokenStream in) { super(in); - termAtt = addAttribute(TermAttribute.class); - offsetAtt = addAttribute(OffsetAttribute.class); } boolean inPhrase = false; @@ -117,19 +115,19 @@ public class TestQPHelper extends LocalizedTestCase { if (inPhrase) { inPhrase = false; clearAttributes(); - termAtt.setTermBuffer("phrase2"); + termAtt.setEmpty().append("phrase2"); offsetAtt.setOffset(savedStart, savedEnd); return true; } else while (input.incrementToken()) { - if (termAtt.term().equals("phrase")) { + if (termAtt.toString().equals("phrase")) { inPhrase = true; savedStart = offsetAtt.startOffset(); savedEnd = offsetAtt.endOffset(); - termAtt.setTermBuffer("phrase1"); + termAtt.setEmpty().append("phrase1"); offsetAtt.setOffset(savedStart, savedEnd); return true; - } else if (!termAtt.term().equals("stop")) + } else if (!termAtt.toString().equals("stop")) return true; } return false; @@ -1158,7 +1156,7 @@ public class TestQPHelper extends LocalizedTestCase { private class CannedTokenStream extends TokenStream { private int upto = 0; final PositionIncrementAttribute posIncr = addAttribute(PositionIncrementAttribute.class); - final TermAttribute term = addAttribute(TermAttribute.class); + final CharTermAttribute term = addAttribute(CharTermAttribute.class); @Override public boolean incrementToken() { clearAttributes(); @@ -1167,16 +1165,16 @@ public class TestQPHelper extends LocalizedTestCase { } if (upto == 0) { posIncr.setPositionIncrement(1); - term.setTermBuffer("a"); + term.setEmpty().append("a"); } else if (upto == 1) { posIncr.setPositionIncrement(1); - term.setTermBuffer("b"); + term.setEmpty().append("b"); } else if (upto == 2) { posIncr.setPositionIncrement(0); - term.setTermBuffer("c"); + term.setEmpty().append("c"); } else { posIncr.setPositionIncrement(0); - term.setTermBuffer("d"); + term.setEmpty().append("d"); } upto++; return true; diff --git a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java index b3a28dbe1b0..fc18e2ce98f 100644 --- a/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java +++ b/lucene/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java @@ -36,7 +36,7 @@ import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.DateField; import org.apache.lucene.document.DateTools; import org.apache.lucene.document.Document; @@ -93,8 +93,8 @@ public class TestQueryParserWrapper extends LocalizedTestCase { public static Analyzer qpAnalyzer = new QPTestAnalyzer(); public static final class QPTestFilter extends TokenFilter { - TermAttribute termAtt; - OffsetAttribute offsetAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); /** * Filter which discards the token 'stop' and which expands the token @@ -102,8 +102,6 @@ public class TestQueryParserWrapper extends LocalizedTestCase { */ public QPTestFilter(TokenStream in) { super(in); - termAtt = addAttribute(TermAttribute.class); - offsetAtt = addAttribute(OffsetAttribute.class); } boolean inPhrase = false; @@ -114,19 +112,19 @@ public class TestQueryParserWrapper extends LocalizedTestCase { if (inPhrase) { inPhrase = false; clearAttributes(); - termAtt.setTermBuffer("phrase2"); + termAtt.setEmpty().append("phrase2"); offsetAtt.setOffset(savedStart, savedEnd); return true; } else while (input.incrementToken()) { - if (termAtt.term().equals("phrase")) { + if (termAtt.toString().equals("phrase")) { inPhrase = true; savedStart = offsetAtt.startOffset(); savedEnd = offsetAtt.endOffset(); - termAtt.setTermBuffer("phrase1"); + termAtt.setEmpty().append("phrase1"); offsetAtt.setOffset(savedStart, savedEnd); return true; - } else if (!termAtt.term().equals("stop")) + } else if (!termAtt.toString().equals("stop")) return true; } return false; diff --git a/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java b/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java index d0b1f46f825..0e573e85555 100755 --- a/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java +++ b/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynExpand.java @@ -29,7 +29,7 @@ import java.util.Set; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.standard.StandardAnalyzer; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; @@ -117,10 +117,10 @@ public final class SynExpand { // [1] Parse query into separate words so that when we expand we can avoid dups TokenStream ts = a.tokenStream( field, new StringReader( query)); - TermAttribute termAtt = ts.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); while (ts.incrementToken()) { - String word = termAtt.term(); + String word = termAtt.toString(); if ( already.add( word)) top.add( word); } diff --git a/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynLookup.java b/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynLookup.java index 07dd65b6116..894e7494908 100644 --- a/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynLookup.java +++ b/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynLookup.java @@ -28,7 +28,7 @@ import java.util.Set; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; @@ -125,10 +125,10 @@ public class SynLookup { // [1] Parse query into separate words so that when we expand we can avoid dups TokenStream ts = a.tokenStream( field, new StringReader( query)); - TermAttribute termAtt = ts.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); while (ts.incrementToken()) { - String word = termAtt.term(); + String word = termAtt.toString(); if ( already.add( word)) top.add( word); } diff --git a/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynonymTokenFilter.java b/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynonymTokenFilter.java index 47af190bf5a..e4b45a0c691 100644 --- a/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynonymTokenFilter.java +++ b/lucene/contrib/wordnet/src/java/org/apache/lucene/wordnet/SynonymTokenFilter.java @@ -21,8 +21,8 @@ import java.io.IOException; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.util.AttributeSource; @@ -45,9 +45,9 @@ public class SynonymTokenFilter extends TokenFilter { private AttributeSource.State current = null; private int todo = 0; - private TermAttribute termAtt; - private TypeAttribute typeAtt; - private PositionIncrementAttribute posIncrAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); + private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); /** * Creates an instance for the given underlying stream and synonym table. @@ -71,10 +71,6 @@ public class SynonymTokenFilter extends TokenFilter { this.synonyms = synonyms; this.maxSynonyms = maxSynonyms; - - this.termAtt = addAttribute(TermAttribute.class); - this.typeAtt = addAttribute(TypeAttribute.class); - this.posIncrAtt = addAttribute(PositionIncrementAttribute.class); } /** Returns the next token in the stream, or null at EOS. */ @@ -89,7 +85,7 @@ public class SynonymTokenFilter extends TokenFilter { if (!input.incrementToken()) return false; // EOS; iterator exhausted - stack = synonyms.getSynonyms(termAtt.term()); // push onto stack + stack = synonyms.getSynonyms(termAtt.toString()); // push onto stack if (stack.length > maxSynonyms) randomize(stack); index = 0; current = captureState(); @@ -110,7 +106,7 @@ public class SynonymTokenFilter extends TokenFilter { */ protected boolean createToken(String synonym, AttributeSource.State current) { restoreState(current); - termAtt.setTermBuffer(synonym); + termAtt.setEmpty().append(synonym); typeAtt.setType(SYNONYM_TOKEN_TYPE); posIncrAtt.setPositionIncrement(0); return true; diff --git a/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/LikeThisQueryBuilder.java b/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/LikeThisQueryBuilder.java index 9c256d43f81..b96cf7bab4b 100644 --- a/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/LikeThisQueryBuilder.java +++ b/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/LikeThisQueryBuilder.java @@ -10,7 +10,7 @@ import java.util.Set; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.search.similar.MoreLikeThisQuery; import org.apache.lucene.search.Query; import org.apache.lucene.xmlparser.DOMUtils; @@ -77,11 +77,11 @@ public class LikeThisQueryBuilder implements QueryBuilder { for (int i = 0; i < fields.length; i++) { TokenStream ts = analyzer.tokenStream(fields[i],new StringReader(stopWords)); - TermAttribute termAtt = ts.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); try { while(ts.incrementToken()) { - stopWordsSet.add(termAtt.term()); + stopWordsSet.add(termAtt.toString()); } } catch(IOException ioe) diff --git a/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/SpanOrTermsBuilder.java b/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/SpanOrTermsBuilder.java index ae79d6d0475..957f655c989 100644 --- a/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/SpanOrTermsBuilder.java +++ b/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/SpanOrTermsBuilder.java @@ -6,7 +6,7 @@ import java.util.ArrayList; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.index.Term; import org.apache.lucene.search.spans.SpanOrQuery; import org.apache.lucene.search.spans.SpanQuery; @@ -56,10 +56,10 @@ public class SpanOrTermsBuilder extends SpanBuilderBase { ArrayList clausesList=new ArrayList(); TokenStream ts=analyzer.tokenStream(fieldName,new StringReader(value)); - TermAttribute termAtt = ts.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); while (ts.incrementToken()) { - SpanTermQuery stq=new SpanTermQuery(new Term(fieldName, termAtt.term())); + SpanTermQuery stq=new SpanTermQuery(new Term(fieldName, termAtt.toString())); clausesList.add(stq); } SpanOrQuery soq=new SpanOrQuery(clausesList.toArray(new SpanQuery[clausesList.size()])); diff --git a/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsFilterBuilder.java b/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsFilterBuilder.java index 7373a95b8c2..edc3daf98f2 100644 --- a/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsFilterBuilder.java +++ b/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsFilterBuilder.java @@ -5,7 +5,7 @@ import java.io.StringReader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.index.Term; import org.apache.lucene.search.Filter; import org.apache.lucene.search.TermsFilter; @@ -57,7 +57,7 @@ public class TermsFilterBuilder implements FilterBuilder String text = DOMUtils.getNonBlankTextOrFail(e); String fieldName = DOMUtils.getAttributeWithInheritanceOrFail(e, "fieldName"); TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text)); - TermAttribute termAtt = ts.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); try { @@ -65,11 +65,11 @@ public class TermsFilterBuilder implements FilterBuilder while (ts.incrementToken()) { if (term == null) { - term = new Term(fieldName, termAtt.term()); + term = new Term(fieldName, termAtt.toString()); } else { // create from previous to save fieldName.intern overhead - term = term.createTerm(termAtt.term()); + term = term.createTerm(termAtt.toString()); } tf.addTerm(term); } diff --git a/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsQueryBuilder.java b/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsQueryBuilder.java index 83e6bb336f4..63fe23bbbb4 100644 --- a/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsQueryBuilder.java +++ b/lucene/contrib/xml-query-parser/src/java/org/apache/lucene/xmlparser/builders/TermsQueryBuilder.java @@ -5,7 +5,7 @@ import java.io.StringReader; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; @@ -57,16 +57,16 @@ public class TermsQueryBuilder implements QueryBuilder { TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(text)); try { - TermAttribute termAtt = ts.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); Term term = null; while (ts.incrementToken()) { if (term == null) { - term = new Term(fieldName, termAtt.term()); + term = new Term(fieldName, termAtt.toString()); } else { // create from previous to save fieldName.intern overhead - term = term.createTerm(termAtt.term()); + term = term.createTerm(termAtt.toString()); } bq.add(new BooleanClause(new TermQuery(term),BooleanClause.Occur.SHOULD)); } diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilter.java index 8a74505bd7d..0a74366810a 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicNormalizationFilter.java @@ -21,7 +21,7 @@ import java.io.IOException; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * A {@link TokenFilter} that applies {@link ArabicNormalizer} to normalize the orthography. @@ -29,21 +29,18 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute; */ public final class ArabicNormalizationFilter extends TokenFilter { - - private final ArabicNormalizer normalizer; - private final TermAttribute termAtt; + private final ArabicNormalizer normalizer = new ArabicNormalizer(); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); public ArabicNormalizationFilter(TokenStream input) { super(input); - normalizer = new ArabicNormalizer(); - termAtt = addAttribute(TermAttribute.class); } @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - int newlen = normalizer.normalize(termAtt.termBuffer(), termAtt.termLength()); - termAtt.setTermLength(newlen); + int newlen = normalizer.normalize(termAtt.buffer(), termAtt.length()); + termAtt.setLength(newlen); return true; } return false; diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilter.java index f45e98d65da..636481e123b 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemFilter.java @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; // javadoc import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * A {@link TokenFilter} that applies {@link ArabicStemmer} to stem Arabic words.. @@ -35,24 +35,20 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute; * @see KeywordMarkerFilter */ public final class ArabicStemFilter extends TokenFilter { - - private final ArabicStemmer stemmer; - private final TermAttribute termAtt; - private final KeywordAttribute keywordAttr; + private final ArabicStemmer stemmer = new ArabicStemmer(); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); public ArabicStemFilter(TokenStream input) { super(input); - stemmer = new ArabicStemmer(); - termAtt = addAttribute(TermAttribute.class); - keywordAttr = addAttribute(KeywordAttribute.class); } @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { if(!keywordAttr.isKeyword()) { - final int newlen = stemmer.stem(termAtt.termBuffer(), termAtt.termLength()); - termAtt.setTermLength(newlen); + final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length()); + termAtt.setLength(newlen); } return true; } else { diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemFilter.java index 30387f0a694..e4264201ec9 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/bg/BulgarianStemFilter.java @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; // for java import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * A {@link TokenFilter} that applies {@link BulgarianStemmer} to stem Bulgarian @@ -35,23 +35,20 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute; *

*/ public final class BulgarianStemFilter extends TokenFilter { - private final BulgarianStemmer stemmer; - private final TermAttribute termAtt; - private final KeywordAttribute keywordAttr; + private final BulgarianStemmer stemmer = new BulgarianStemmer(); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); public BulgarianStemFilter(final TokenStream input) { super(input); - stemmer = new BulgarianStemmer(); - termAtt = addAttribute(TermAttribute.class); - keywordAttr = addAttribute(KeywordAttribute.class); } @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { if(!keywordAttr.isKeyword()) { - final int newlen = stemmer.stem(termAtt.termBuffer(), termAtt.termLength()); - termAtt.setTermLength(newlen); + final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length()); + termAtt.setLength(newlen); } return true; } else { diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java index 6bbb2d83312..a6c42eb9943 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/br/BrazilianStemFilter.java @@ -24,7 +24,7 @@ import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; // for java import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * A {@link TokenFilter} that applies {@link BrazilianStemmer}. @@ -41,10 +41,10 @@ public final class BrazilianStemFilter extends TokenFilter { /** * {@link BrazilianStemmer} in use by this filter. */ - private BrazilianStemmer stemmer = null; + private BrazilianStemmer stemmer = new BrazilianStemmer(); private Set exclusions = null; - private final TermAttribute termAtt; - private final KeywordAttribute keywordAttr; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); /** * Creates a new BrazilianStemFilter @@ -53,9 +53,6 @@ public final class BrazilianStemFilter extends TokenFilter { */ public BrazilianStemFilter(TokenStream in) { super(in); - stemmer = new BrazilianStemmer(); - termAtt = addAttribute(TermAttribute.class); - keywordAttr = addAttribute(KeywordAttribute.class); } /** @@ -74,13 +71,13 @@ public final class BrazilianStemFilter extends TokenFilter { @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - final String term = termAtt.term(); + final String term = termAtt.toString(); // Check the exclusion table. if (!keywordAttr.isKeyword() && (exclusions == null || !exclusions.contains(term))) { final String s = stemmer.stem(term); // If not stemmed, don't waste the time adjusting the token. if ((s != null) && !s.equals(term)) - termAtt.setTermBuffer(s); + termAtt.setEmpty().append(s); } return true; } else { diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java index 1eb14d0af91..d907d51d499 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/cjk/CJKTokenizer.java @@ -22,7 +22,7 @@ import java.io.Reader; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.util.AttributeSource; @@ -98,9 +98,9 @@ public final class CJKTokenizer extends Tokenizer { */ private boolean preIsTokened = false; - private TermAttribute termAtt; - private OffsetAttribute offsetAtt; - private TypeAttribute typeAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); //~ Constructors ----------------------------------------------------------- @@ -111,23 +111,14 @@ public final class CJKTokenizer extends Tokenizer { */ public CJKTokenizer(Reader in) { super(in); - init(); } public CJKTokenizer(AttributeSource source, Reader in) { super(source, in); - init(); } public CJKTokenizer(AttributeFactory factory, Reader in) { super(factory, in); - init(); - } - - private void init() { - termAtt = addAttribute(TermAttribute.class); - offsetAtt = addAttribute(OffsetAttribute.class); - typeAtt = addAttribute(TypeAttribute.class); } //~ Methods ---------------------------------------------------------------- @@ -287,7 +278,7 @@ public final class CJKTokenizer extends Tokenizer { } if (length > 0) { - termAtt.setTermBuffer(buffer, 0, length); + termAtt.copyBuffer(buffer, 0, length); offsetAtt.setOffset(correctOffset(start), correctOffset(start+length)); typeAtt.setType(TOKEN_TYPE_NAMES[tokenType]); return true; diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java index af30be18af6..22cd6cda90c 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseFilter.java @@ -23,7 +23,7 @@ import java.util.Arrays; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.StopFilter; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.util.Version; @@ -61,21 +61,20 @@ public final class ChineseFilter extends TokenFilter { private CharArraySet stopTable; - private TermAttribute termAtt; + private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); public ChineseFilter(TokenStream in) { super(in); stopTable = new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList(STOP_WORDS), false); - termAtt = addAttribute(TermAttribute.class); } @Override public boolean incrementToken() throws IOException { while (input.incrementToken()) { - char text[] = termAtt.termBuffer(); - int termLength = termAtt.termLength(); + char text[] = termAtt.buffer(); + int termLength = termAtt.length(); // why not key off token type here assuming ChineseTokenizer comes first? if (!stopTable.contains(text, 0, termLength)) { diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java index 7af1d4da3a6..c3f50998da9 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/cn/ChineseTokenizer.java @@ -23,8 +23,8 @@ import java.io.Reader; import org.apache.lucene.analysis.standard.StandardTokenizer; import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.util.AttributeSource; @@ -62,24 +62,16 @@ public final class ChineseTokenizer extends Tokenizer { public ChineseTokenizer(Reader in) { super(in); - init(); } public ChineseTokenizer(AttributeSource source, Reader in) { super(source, in); - init(); } public ChineseTokenizer(AttributeFactory factory, Reader in) { super(factory, in); - init(); } - - private void init() { - termAtt = addAttribute(TermAttribute.class); - offsetAtt = addAttribute(OffsetAttribute.class); - } - + private int offset = 0, bufferIndex=0, dataLen=0; private final static int MAX_WORD_LEN = 255; private final static int IO_BUFFER_SIZE = 1024; @@ -90,8 +82,8 @@ public final class ChineseTokenizer extends Tokenizer { private int length; private int start; - private TermAttribute termAtt; - private OffsetAttribute offsetAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); private final void push(char c) { @@ -105,7 +97,7 @@ public final class ChineseTokenizer extends Tokenizer { if (length>0) { //System.out.println(new String(buffer, 0, //length)); - termAtt.setTermBuffer(buffer, 0, length); + termAtt.copyBuffer(buffer, 0, length); offsetAtt.setOffset(correctOffset(start), correctOffset(start+length)); return true; } diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java index 4038b72bb5e..0d82cbff197 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/CompoundWordTokenFilterBase.java @@ -30,7 +30,7 @@ import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.util.Version; @@ -69,12 +69,12 @@ public abstract class CompoundWordTokenFilterBase extends TokenFilter { protected final int maxSubwordSize; protected final boolean onlyLongestMatch; - private TermAttribute termAtt; - private OffsetAttribute offsetAtt; - private FlagsAttribute flagsAtt; - private PositionIncrementAttribute posIncAtt; - private TypeAttribute typeAtt; - private PayloadAttribute payloadAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class); + private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); + private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class); private final Token wrapper = new Token(); /** @@ -160,13 +160,6 @@ public abstract class CompoundWordTokenFilterBase extends TokenFilter { this.dictionary = new CharArraySet(matchVersion, dictionary.size(), false); addAllLowerCase(this.dictionary, dictionary); } - - termAtt = addAttribute(TermAttribute.class); - offsetAtt = addAttribute(OffsetAttribute.class); - flagsAtt = addAttribute(FlagsAttribute.class); - posIncAtt = addAttribute(PositionIncrementAttribute.class); - typeAtt = addAttribute(TypeAttribute.class); - payloadAtt = addAttribute(PayloadAttribute.class); } /** @@ -192,7 +185,7 @@ public abstract class CompoundWordTokenFilterBase extends TokenFilter { private final void setToken(final Token token) throws IOException { clearAttributes(); - termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength()); + termAtt.copyBuffer(token.buffer(), 0, token.length()); flagsAtt.setFlags(token.getFlags()); typeAtt.setType(token.type()); offsetAtt.setOffset(token.startOffset(), token.endOffset()); @@ -210,7 +203,7 @@ public abstract class CompoundWordTokenFilterBase extends TokenFilter { if (!input.incrementToken()) return false; - wrapper.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength()); + wrapper.copyBuffer(termAtt.buffer(), 0, termAtt.length()); wrapper.setStartOffset(offsetAtt.startOffset()); wrapper.setEndOffset(offsetAtt.endOffset()); wrapper.setFlags(flagsAtt.getFlags()); @@ -248,7 +241,7 @@ public abstract class CompoundWordTokenFilterBase extends TokenFilter { protected final Token createToken(final int offset, final int length, final Token prototype) { int newStart = prototype.startOffset() + offset; - Token t = prototype.clone(prototype.termBuffer(), offset, length, newStart, newStart+length); + Token t = prototype.clone(prototype.buffer(), offset, length, newStart, newStart+length); t.setPositionIncrement(0); return t; } @@ -258,7 +251,7 @@ public abstract class CompoundWordTokenFilterBase extends TokenFilter { tokens.add((Token) token.clone()); // Only words longer than minWordSize get processed - if (token.termLength() < this.minWordSize) { + if (token.length() < this.minWordSize) { return; } diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java index 6d8374a1530..ade9b314db8 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/compound/DictionaryCompoundWordTokenFilter.java @@ -191,22 +191,22 @@ public class DictionaryCompoundWordTokenFilter extends CompoundWordTokenFilterBa @Override protected void decomposeInternal(final Token token) { // Only words longer than minWordSize get processed - if (token.termLength() < this.minWordSize) { + if (token.length() < this.minWordSize) { return; } - char[] lowerCaseTermBuffer=makeLowerCaseCopy(token.termBuffer()); + char[] lowerCaseTermBuffer=makeLowerCaseCopy(token.buffer()); - for (int i=0;itoken.termLength()) { + if(i+j>token.length()) { break; } if(dictionary.contains(lowerCaseTermBuffer, i, j)) { if (this.onlyLongestMatch) { if (longestMatchToken!=null) { - if (longestMatchToken.termLength() exit if (hyphens == null) { return; } final int[] hyp = hyphens.getHyphenationPoints(); - char[] lowerCaseTermBuffer=makeLowerCaseCopy(token.termBuffer()); + char[] lowerCaseTermBuffer=makeLowerCaseCopy(token.buffer()); for (int i = 0; i < hyp.length; ++i) { int remaining = hyp.length - i; @@ -335,7 +335,7 @@ public class HyphenationCompoundWordTokenFilter extends if (dictionary == null || dictionary.contains(lowerCaseTermBuffer, start, partLength)) { if (this.onlyLongestMatch) { if (longestMatchToken != null) { - if (longestMatchToken.termLength() < partLength) { + if (longestMatchToken.length() < partLength) { longestMatchToken = createToken(start, partLength, token); } } else { @@ -352,7 +352,7 @@ public class HyphenationCompoundWordTokenFilter extends // characters if (this.onlyLongestMatch) { if (longestMatchToken != null) { - if (longestMatchToken.termLength() < partLength - 1) { + if (longestMatchToken.length() < partLength - 1) { longestMatchToken = createToken(start, partLength - 1, token); } } else { diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemFilter.java index 486d33c5c48..4f0e17fea39 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/cz/CzechStemFilter.java @@ -6,7 +6,7 @@ import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; // for java import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * Licensed to the Apache Software Foundation (ASF) under one or more @@ -37,23 +37,20 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute; * @see KeywordMarkerFilter */ public final class CzechStemFilter extends TokenFilter { - private final CzechStemmer stemmer; - private final TermAttribute termAtt; - private final KeywordAttribute keywordAttr; + private final CzechStemmer stemmer = new CzechStemmer(); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); public CzechStemFilter(TokenStream input) { super(input); - stemmer = new CzechStemmer(); - termAtt = addAttribute(TermAttribute.class); - keywordAttr = addAttribute(KeywordAttribute.class); } @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { if(!keywordAttr.isKeyword()) { - final int newlen = stemmer.stem(termAtt.termBuffer(), termAtt.termLength()); - termAtt.setTermLength(newlen); + final int newlen = stemmer.stem(termAtt.buffer(), termAtt.length()); + termAtt.setLength(newlen); } return true; } else { diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java index dcba092e318..be408321c0b 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/de/GermanStemFilter.java @@ -24,7 +24,7 @@ import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; // for java import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * A {@link TokenFilter} that stems German words. @@ -45,11 +45,11 @@ public final class GermanStemFilter extends TokenFilter /** * The actual token in the input stream. */ - private GermanStemmer stemmer = null; + private GermanStemmer stemmer = new GermanStemmer(); private Set exclusionSet = null; - private final TermAttribute termAtt; - private final KeywordAttribute keywordAttr; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); /** * Creates a {@link GermanStemFilter} instance @@ -58,9 +58,6 @@ public final class GermanStemFilter extends TokenFilter public GermanStemFilter( TokenStream in ) { super(in); - stemmer = new GermanStemmer(); - termAtt = addAttribute(TermAttribute.class); - keywordAttr = addAttribute(KeywordAttribute.class); } /** @@ -80,13 +77,13 @@ public final class GermanStemFilter extends TokenFilter @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - String term = termAtt.term(); + String term = termAtt.toString(); // Check the exclusion table. if (!keywordAttr.isKeyword() && (exclusionSet == null || !exclusionSet.contains(term))) { String s = stemmer.stem(term); // If not stemmed, don't waste the time adjusting the token. if ((s != null) && !s.equals(term)) - termAtt.setTermBuffer(s); + termAtt.setEmpty().append(s); } return true; } else { diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilter.java index 3ea5bd67e97..20c1b1720ca 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/fa/PersianNormalizationFilter.java @@ -21,7 +21,7 @@ import java.io.IOException; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * A {@link TokenFilter} that applies {@link PersianNormalizer} to normalize the @@ -30,22 +30,19 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute; */ public final class PersianNormalizationFilter extends TokenFilter { - - private final PersianNormalizer normalizer; - private final TermAttribute termAtt; + private final PersianNormalizer normalizer = new PersianNormalizer(); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); public PersianNormalizationFilter(TokenStream input) { super(input); - normalizer = new PersianNormalizer(); - termAtt = addAttribute(TermAttribute.class); } @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - final int newlen = normalizer.normalize(termAtt.termBuffer(), termAtt - .termLength()); - termAtt.setTermLength(newlen); + final int newlen = normalizer.normalize(termAtt.buffer(), + termAtt.length()); + termAtt.setLength(newlen); return true; } return false; diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java index bf2c9876716..97b7922b29c 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/ElisionFilter.java @@ -23,7 +23,7 @@ import java.util.Arrays; import org.apache.lucene.analysis.standard.StandardTokenizer; // for javadocs import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenFilter; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.util.Version; @@ -37,7 +37,7 @@ import org.apache.lucene.util.Version; */ public final class ElisionFilter extends TokenFilter { private CharArraySet articles = CharArraySet.EMPTY_SET; - private final TermAttribute termAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private static final CharArraySet DEFAULT_ARTICLES = CharArraySet.unmodifiableSet( new CharArraySet(Version.LUCENE_CURRENT, Arrays.asList( "l", "m", "t", "qu", "n", "s", "j"), true)); @@ -100,7 +100,6 @@ public final class ElisionFilter extends TokenFilter { super(input); this.articles = CharArraySet.unmodifiableSet( new CharArraySet(matchVersion, articles, true)); - termAtt = addAttribute(TermAttribute.class); } /** @@ -115,13 +114,13 @@ public final class ElisionFilter extends TokenFilter { } /** - * Increments the {@link TokenStream} with a {@link TermAttribute} without elisioned start + * Increments the {@link TokenStream} with a {@link CharTermAttribute} without elisioned start */ @Override public final boolean incrementToken() throws IOException { if (input.incrementToken()) { - char[] termBuffer = termAtt.termBuffer(); - int termLength = termAtt.termLength(); + char[] termBuffer = termAtt.buffer(); + int termLength = termAtt.length(); int minPoz = Integer.MAX_VALUE; for (int i = 0; i < apostrophes.length; i++) { @@ -137,8 +136,8 @@ public final class ElisionFilter extends TokenFilter { // An apostrophe has been found. If the prefix is an article strip it off. if (minPoz != Integer.MAX_VALUE - && articles.contains(termAtt.termBuffer(), 0, minPoz)) { - termAtt.setTermBuffer(termAtt.termBuffer(), minPoz + 1, termAtt.termLength() - (minPoz + 1)); + && articles.contains(termAtt.buffer(), 0, minPoz)) { + termAtt.copyBuffer(termAtt.buffer(), minPoz + 1, termAtt.length() - (minPoz + 1)); } return true; diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java index fa61deae642..6e5d1ebd648 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchStemFilter.java @@ -22,7 +22,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.snowball.SnowballFilter; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import java.io.IOException; import java.util.HashSet; @@ -51,17 +51,14 @@ public final class FrenchStemFilter extends TokenFilter { /** * The actual token in the input stream. */ - private FrenchStemmer stemmer = null; + private FrenchStemmer stemmer = new FrenchStemmer(); private Set exclusions = null; - private final TermAttribute termAtt; - private final KeywordAttribute keywordAttr; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); public FrenchStemFilter( TokenStream in ) { - super(in); - stemmer = new FrenchStemmer(); - termAtt = addAttribute(TermAttribute.class); - keywordAttr = addAttribute(KeywordAttribute.class); + super(in); } /** @@ -82,14 +79,14 @@ public final class FrenchStemFilter extends TokenFilter { @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - String term = termAtt.term(); + String term = termAtt.toString(); // Check the exclusion table if ( !keywordAttr.isKeyword() && (exclusions == null || !exclusions.contains( term )) ) { String s = stemmer.stem( term ); // If not stemmed, don't waste the time adjusting the token. if ((s != null) && !s.equals( term ) ) - termAtt.setTermBuffer(s); + termAtt.setEmpty().append(s); } return true; } else { diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilter.java index cad610b561d..b8da0723ea3 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiNormalizationFilter.java @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; // javadoc import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * A {@link TokenFilter} that applies {@link HindiNormalizer} to normalize the @@ -39,7 +39,7 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute; public final class HindiNormalizationFilter extends TokenFilter { private final HindiNormalizer normalizer = new HindiNormalizer(); - private final TermAttribute termAtt = addAttribute(TermAttribute.class); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final KeywordAttribute keywordAtt = addAttribute(KeywordAttribute.class); public HindiNormalizationFilter(TokenStream input) { @@ -50,8 +50,8 @@ public final class HindiNormalizationFilter extends TokenFilter { public boolean incrementToken() throws IOException { if (input.incrementToken()) { if (!keywordAtt.isKeyword()) - termAtt.setTermLength(normalizer.normalize(termAtt.termBuffer(), - termAtt.termLength())); + termAtt.setLength(normalizer.normalize(termAtt.buffer(), + termAtt.length())); return true; } return false; diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemFilter.java index cd470a407cc..946fe347825 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/hi/HindiStemFilter.java @@ -22,13 +22,13 @@ import java.io.IOException; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * A {@link TokenFilter} that applies {@link HindiStemmer} to stem Hindi words. */ public final class HindiStemFilter extends TokenFilter { - private final TermAttribute termAtt = addAttribute(TermAttribute.class); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final KeywordAttribute keywordAtt = addAttribute(KeywordAttribute.class); private final HindiStemmer stemmer = new HindiStemmer(); @@ -40,7 +40,7 @@ public final class HindiStemFilter extends TokenFilter { public boolean incrementToken() throws IOException { if (input.incrementToken()) { if (!keywordAtt.isKeyword()) - termAtt.setTermLength(stemmer.stem(termAtt.termBuffer(), termAtt.termLength())); + termAtt.setLength(stemmer.stem(termAtt.buffer(), termAtt.length())); return true; } else { return false; diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizationFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizationFilter.java index dd006349fad..de485b02c33 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizationFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/in/IndicNormalizationFilter.java @@ -21,14 +21,14 @@ import java.io.IOException; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * A {@link TokenFilter} that applies {@link IndicNormalizer} to normalize text * in Indian Languages. */ public final class IndicNormalizationFilter extends TokenFilter { - private final TermAttribute termAtt = addAttribute(TermAttribute.class); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final IndicNormalizer normalizer = new IndicNormalizer(); public IndicNormalizationFilter(TokenStream input) { @@ -38,7 +38,7 @@ public final class IndicNormalizationFilter extends TokenFilter { @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - termAtt.setTermLength(normalizer.normalize(termAtt.termBuffer(), termAtt.termLength())); + termAtt.setLength(normalizer.normalize(termAtt.buffer(), termAtt.length())); return true; } else { return false; diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java index a4396da74e5..4d17af04eac 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternAnalyzer.java @@ -30,8 +30,8 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.StopAnalyzer; import org.apache.lucene.analysis.core.StopFilter; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.util.Version; @@ -332,8 +332,8 @@ public final class PatternAnalyzer extends Analyzer { private Matcher matcher; private int pos = 0; private static final Locale locale = Locale.getDefault(); - private TermAttribute termAtt = addAttribute(TermAttribute.class); - private OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); public PatternTokenizer(String str, Pattern pattern, boolean toLowerCase) { this.str = str; @@ -360,7 +360,7 @@ public final class PatternAnalyzer extends Analyzer { if (start != end) { // non-empty match (header/trailer) String text = str.substring(start, end); if (toLowerCase) text = text.toLowerCase(locale); - termAtt.setTermBuffer(text); + termAtt.setEmpty().append(text); offsetAtt.setOffset(start, end); return true; } @@ -392,8 +392,8 @@ public final class PatternAnalyzer extends Analyzer { private final boolean toLowerCase; private final Set stopWords; private static final Locale locale = Locale.getDefault(); - private TermAttribute termAtt = addAttribute(TermAttribute.class); - private OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); public FastStringTokenizer(String str, boolean isLetter, boolean toLowerCase, Set stopWords) { this.str = str; @@ -446,7 +446,7 @@ public final class PatternAnalyzer extends Analyzer { { return false; } - termAtt.setTermBuffer(text); + termAtt.setEmpty().append(text); offsetAtt.setOffset(start, i); return true; } diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java index c2ee148e3f9..5fb20f56f8f 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PrefixAwareTokenFilter.java @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.index.Payload; @@ -44,14 +44,14 @@ public class PrefixAwareTokenFilter extends TokenStream { private TokenStream prefix; private TokenStream suffix; - private TermAttribute termAtt; + private CharTermAttribute termAtt; private PositionIncrementAttribute posIncrAtt; private PayloadAttribute payloadAtt; private OffsetAttribute offsetAtt; private TypeAttribute typeAtt; private FlagsAttribute flagsAtt; - private TermAttribute p_termAtt; + private CharTermAttribute p_termAtt; private PositionIncrementAttribute p_posIncrAtt; private PayloadAttribute p_payloadAtt; private OffsetAttribute p_offsetAtt; @@ -64,14 +64,14 @@ public class PrefixAwareTokenFilter extends TokenStream { this.prefix = prefix; prefixExhausted = false; - termAtt = addAttribute(TermAttribute.class); + termAtt = addAttribute(CharTermAttribute.class); posIncrAtt = addAttribute(PositionIncrementAttribute.class); payloadAtt = addAttribute(PayloadAttribute.class); offsetAtt = addAttribute(OffsetAttribute.class); typeAtt = addAttribute(TypeAttribute.class); flagsAtt = addAttribute(FlagsAttribute.class); - p_termAtt = prefix.addAttribute(TermAttribute.class); + p_termAtt = prefix.addAttribute(CharTermAttribute.class); p_posIncrAtt = prefix.addAttribute(PositionIncrementAttribute.class); p_payloadAtt = prefix.addAttribute(PayloadAttribute.class); p_offsetAtt = prefix.addAttribute(OffsetAttribute.class); @@ -115,7 +115,7 @@ public class PrefixAwareTokenFilter extends TokenStream { private void setCurrentToken(Token token) { if (token == null) return; clearAttributes(); - termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength()); + termAtt.copyBuffer(token.buffer(), 0, token.length()); posIncrAtt.setPositionIncrement(token.getPositionIncrement()); flagsAtt.setFlags(token.getFlags()); offsetAtt.setOffset(token.startOffset(), token.endOffset()); @@ -125,7 +125,7 @@ public class PrefixAwareTokenFilter extends TokenStream { private Token getNextPrefixInputToken(Token token) throws IOException { if (!prefix.incrementToken()) return null; - token.setTermBuffer(p_termAtt.termBuffer(), 0, p_termAtt.termLength()); + token.copyBuffer(p_termAtt.buffer(), 0, p_termAtt.length()); token.setPositionIncrement(p_posIncrAtt.getPositionIncrement()); token.setFlags(p_flagsAtt.getFlags()); token.setOffset(p_offsetAtt.startOffset(), p_offsetAtt.endOffset()); @@ -136,7 +136,7 @@ public class PrefixAwareTokenFilter extends TokenStream { private Token getNextSuffixInputToken(Token token) throws IOException { if (!suffix.incrementToken()) return null; - token.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength()); + token.copyBuffer(termAtt.buffer(), 0, termAtt.length()); token.setPositionIncrement(posIncrAtt.getPositionIncrement()); token.setFlags(flagsAtt.getFlags()); token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java index 05de91a2063..8a401368ca6 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SingleTokenTokenStream.java @@ -22,7 +22,7 @@ import java.io.IOException; import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * A {@link TokenStream} containing a single token. @@ -41,7 +41,7 @@ public final class SingleTokenTokenStream extends TokenStream { assert token != null; this.singleToken = (Token) token.clone(); - tokenAtt = (AttributeImpl) addAttribute(TermAttribute.class); + tokenAtt = (AttributeImpl) addAttribute(CharTermAttribute.class); assert (tokenAtt instanceof Token); } diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java index 54cb798a0ce..65c0e160552 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/StemmerOverrideFilter.java @@ -23,7 +23,7 @@ import java.util.Map; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.util.CharArrayMap; import org.apache.lucene.util.Version; @@ -34,7 +34,7 @@ import org.apache.lucene.util.Version; public final class StemmerOverrideFilter extends TokenFilter { private final CharArrayMap dictionary; - private final TermAttribute termAtt = addAttribute(TermAttribute.class); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final KeywordAttribute keywordAtt = addAttribute(KeywordAttribute.class); /** @@ -56,9 +56,9 @@ public final class StemmerOverrideFilter extends TokenFilter { public boolean incrementToken() throws IOException { if (input.incrementToken()) { if (!keywordAtt.isKeyword()) { // don't muck with already-keyworded terms - String stem = dictionary.get(termAtt.termBuffer(), 0, termAtt.termLength()); + String stem = dictionary.get(termAtt.buffer(), 0, termAtt.length()); if (stem != null) { - termAtt.setTermBuffer(stem); + termAtt.setEmpty().append(stem); keywordAtt.setKeyword(true); } } diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java index fa85dd8ec93..55fa29b777c 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenFilter.java @@ -20,7 +20,7 @@ package org.apache.lucene.analysis.ngram; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import java.io.IOException; @@ -72,8 +72,8 @@ public final class EdgeNGramTokenFilter extends TokenFilter { private int curGramSize; private int tokStart; - private final TermAttribute termAtt; - private final OffsetAttribute offsetAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); /** * Creates EdgeNGramTokenFilter that can generate n-grams in the sizes of the given range @@ -101,8 +101,6 @@ public final class EdgeNGramTokenFilter extends TokenFilter { this.minGram = minGram; this.maxGram = maxGram; this.side = side; - this.termAtt = addAttribute(TermAttribute.class); - this.offsetAtt = addAttribute(OffsetAttribute.class); } /** @@ -124,8 +122,8 @@ public final class EdgeNGramTokenFilter extends TokenFilter { if (!input.incrementToken()) { return false; } else { - curTermBuffer = termAtt.termBuffer().clone(); - curTermLength = termAtt.termLength(); + curTermBuffer = termAtt.buffer().clone(); + curTermLength = termAtt.length(); curGramSize = minGram; tokStart = offsetAtt.startOffset(); } @@ -138,7 +136,7 @@ public final class EdgeNGramTokenFilter extends TokenFilter { int end = start + curGramSize; clearAttributes(); offsetAtt.setOffset(tokStart + start, tokStart + end); - termAtt.setTermBuffer(curTermBuffer, start, curGramSize); + termAtt.copyBuffer(curTermBuffer, start, curGramSize); curGramSize++; return true; } diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java index efb49a90519..b11814b0dc1 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/EdgeNGramTokenizer.java @@ -18,8 +18,8 @@ package org.apache.lucene.analysis.ngram; */ import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.util.AttributeSource; import java.io.IOException; @@ -37,8 +37,8 @@ public final class EdgeNGramTokenizer extends Tokenizer { public static final int DEFAULT_MAX_GRAM_SIZE = 1; public static final int DEFAULT_MIN_GRAM_SIZE = 1; - private TermAttribute termAtt; - private OffsetAttribute offsetAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); /** Specifies which side of the input the n-gram should be generated from */ public static enum Side { @@ -173,10 +173,6 @@ public final class EdgeNGramTokenizer extends Tokenizer { this.minGram = minGram; this.maxGram = maxGram; this.side = side; - - this.termAtt = addAttribute(TermAttribute.class); - this.offsetAtt = addAttribute(OffsetAttribute.class); - } /** Returns the next token in the stream, or null at EOS. */ @@ -206,7 +202,7 @@ public final class EdgeNGramTokenizer extends Tokenizer { // grab gramSize chars from front or back int start = side == Side.FRONT ? 0 : inLen - gramSize; int end = start + gramSize; - termAtt.setTermBuffer(inStr, start, gramSize); + termAtt.setEmpty().append(inStr, start, end); offsetAtt.setOffset(correctOffset(start), correctOffset(end)); gramSize++; return true; diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java index 41b956357ac..c73208bf36b 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenFilter.java @@ -22,7 +22,7 @@ import java.io.IOException; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * Tokenizes the input into n-grams of the given size(s). @@ -39,8 +39,8 @@ public final class NGramTokenFilter extends TokenFilter { private int curPos; private int tokStart; - private TermAttribute termAtt; - private OffsetAttribute offsetAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); /** * Creates NGramTokenFilter with given min and max n-grams. @@ -58,9 +58,6 @@ public final class NGramTokenFilter extends TokenFilter { } this.minGram = minGram; this.maxGram = maxGram; - - this.termAtt = addAttribute(TermAttribute.class); - this.offsetAtt = addAttribute(OffsetAttribute.class); } /** @@ -79,8 +76,8 @@ public final class NGramTokenFilter extends TokenFilter { if (!input.incrementToken()) { return false; } else { - curTermBuffer = termAtt.termBuffer().clone(); - curTermLength = termAtt.termLength(); + curTermBuffer = termAtt.buffer().clone(); + curTermLength = termAtt.length(); curGramSize = minGram; curPos = 0; tokStart = offsetAtt.startOffset(); @@ -89,7 +86,7 @@ public final class NGramTokenFilter extends TokenFilter { while (curGramSize <= maxGram) { while (curPos+curGramSize <= curTermLength) { // while there is input clearAttributes(); - termAtt.setTermBuffer(curTermBuffer, curPos, curGramSize); + termAtt.copyBuffer(curTermBuffer, curPos, curGramSize); offsetAtt.setOffset(tokStart + curPos, tokStart + curPos + curGramSize); curPos++; return true; diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java index 81804d937e4..e7137c9ecbe 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/ngram/NGramTokenizer.java @@ -18,8 +18,8 @@ package org.apache.lucene.analysis.ngram; */ import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.util.AttributeSource; import java.io.IOException; @@ -39,8 +39,8 @@ public final class NGramTokenizer extends Tokenizer { private String inStr; private boolean started = false; - private TermAttribute termAtt; - private OffsetAttribute offsetAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); /** * Creates NGramTokenizer with given min and max n-grams. @@ -94,9 +94,6 @@ public final class NGramTokenizer extends Tokenizer { } this.minGram = minGram; this.maxGram = maxGram; - - this.termAtt = addAttribute(TermAttribute.class); - this.offsetAtt = addAttribute(OffsetAttribute.class); } /** Returns the next token in the stream, or null at EOS. */ @@ -123,7 +120,7 @@ public final class NGramTokenizer extends Tokenizer { int oldPos = pos; pos++; - termAtt.setTermBuffer(inStr, oldPos, gramSize); + termAtt.setEmpty().append(inStr, oldPos, oldPos+gramSize); offsetAtt.setOffset(correctOffset(oldPos), correctOffset(oldPos+gramSize)); return true; } diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java index 65ab9cbec32..1b9d0d0e4f4 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/nl/DutchStemFilter.java @@ -28,7 +28,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.snowball.SnowballFilter; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * A {@link TokenFilter} that stems Dutch words. @@ -52,17 +52,14 @@ public final class DutchStemFilter extends TokenFilter { /** * The actual token in the input stream. */ - private DutchStemmer stemmer = null; + private DutchStemmer stemmer = new DutchStemmer(); private Set exclusions = null; - private final TermAttribute termAtt; - private final KeywordAttribute keywordAttr; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); public DutchStemFilter(TokenStream _in) { super(_in); - stemmer = new DutchStemmer(); - termAtt = addAttribute(TermAttribute.class); - keywordAttr = addAttribute(KeywordAttribute.class); } /** @@ -99,14 +96,14 @@ public final class DutchStemFilter extends TokenFilter { @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - final String term = termAtt.term(); + final String term = termAtt.toString(); // Check the exclusion table. if (!keywordAttr.isKeyword() && (exclusions == null || !exclusions.contains(term))) { final String s = stemmer.stem(term); // If not stemmed, don't waste the time adjusting the token. if ((s != null) && !s.equals(term)) - termAtt.setTermBuffer(s); + termAtt.setEmpty().append(s); } return true; } else { diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java index b032f76a8d8..1f86beb4eae 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilter.java @@ -21,7 +21,7 @@ import java.io.IOException; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** @@ -39,15 +39,13 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute; public final class DelimitedPayloadTokenFilter extends TokenFilter { public static final char DEFAULT_DELIMITER = '|'; private final char delimiter; - private final TermAttribute termAtt; - private final PayloadAttribute payAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final PayloadAttribute payAtt = addAttribute(PayloadAttribute.class); private final PayloadEncoder encoder; public DelimitedPayloadTokenFilter(TokenStream input, char delimiter, PayloadEncoder encoder) { super(input); - termAtt = addAttribute(TermAttribute.class); - payAtt = addAttribute(PayloadAttribute.class); this.delimiter = delimiter; this.encoder = encoder; } @@ -55,12 +53,12 @@ public final class DelimitedPayloadTokenFilter extends TokenFilter { @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - final char[] buffer = termAtt.termBuffer(); - final int length = termAtt.termLength(); + final char[] buffer = termAtt.buffer(); + final int length = termAtt.length(); for (int i = 0; i < length; i++) { if (buffer[i] == delimiter) { payAtt.setPayload(encoder.encode(buffer, i + 1, (length - (i + 1)))); - termAtt.setTermLength(i); // simply set a new length + termAtt.setLength(i); // simply set a new length return true; } } diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java index 9b2af214c4a..8ec5f700f76 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilter.java @@ -35,16 +35,14 @@ public class NumericPayloadTokenFilter extends TokenFilter { private String typeMatch; private Payload thePayload; - private PayloadAttribute payloadAtt; - private TypeAttribute typeAtt; + private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); public NumericPayloadTokenFilter(TokenStream input, float payload, String typeMatch) { super(input); //Need to encode the payload thePayload = new Payload(PayloadHelper.encodeFloat(payload)); this.typeMatch = typeMatch; - payloadAtt = addAttribute(PayloadAttribute.class); - typeAtt = addAttribute(TypeAttribute.class); } @Override diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java index 28c590c1945..24c16db2c06 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/TokenOffsetPayloadTokenFilter.java @@ -33,13 +33,11 @@ import org.apache.lucene.index.Payload; * **/ public class TokenOffsetPayloadTokenFilter extends TokenFilter { - protected OffsetAttribute offsetAtt; - protected PayloadAttribute payAtt; + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final PayloadAttribute payAtt = addAttribute(PayloadAttribute.class); public TokenOffsetPayloadTokenFilter(TokenStream input) { super(input); - offsetAtt = addAttribute(OffsetAttribute.class); - payAtt = addAttribute(PayloadAttribute.class); } @Override diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java index e7be3b38911..eaf7647adac 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilter.java @@ -33,13 +33,11 @@ import java.io.IOException; * **/ public class TypeAsPayloadTokenFilter extends TokenFilter { - private PayloadAttribute payloadAtt; - private TypeAttribute typeAtt; + private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); public TypeAsPayloadTokenFilter(TokenStream input) { super(input); - payloadAtt = addAttribute(PayloadAttribute.class); - typeAtt = addAttribute(TypeAttribute.class); } diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java index 90321dbecad..d7fea12b6c3 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/position/PositionFilter.java @@ -35,7 +35,7 @@ public final class PositionFilter extends TokenFilter { /** The first token must have non-zero positionIncrement **/ private boolean firstTokenPositioned = false; - private PositionIncrementAttribute posIncrAtt; + private PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); /** * Constructs a PositionFilter that assigns a position increment of zero to @@ -45,7 +45,6 @@ public final class PositionFilter extends TokenFilter { */ public PositionFilter(final TokenStream input) { super(input); - posIncrAtt = addAttribute(PositionIncrementAttribute.class); } /** diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java index ef9d2f996b2..7f67d0a7cee 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/reverse/ReverseStringFilter.java @@ -19,7 +19,7 @@ package org.apache.lucene.analysis.reverse; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.util.Version; import java.io.IOException; @@ -42,7 +42,7 @@ import java.io.IOException; */ public final class ReverseStringFilter extends TokenFilter { - private TermAttribute termAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final char marker; private final Version matchVersion; private static final char NOMARKER = '\uFFFF'; @@ -131,20 +131,19 @@ public final class ReverseStringFilter extends TokenFilter { super(in); this.matchVersion = matchVersion; this.marker = marker; - termAtt = addAttribute(TermAttribute.class); } @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - int len = termAtt.termLength(); + int len = termAtt.length(); if (marker != NOMARKER) { len++; - termAtt.resizeTermBuffer(len); - termAtt.termBuffer()[len - 1] = marker; + termAtt.resizeBuffer(len); + termAtt.buffer()[len - 1] = marker; } - reverse( matchVersion, termAtt.termBuffer(), 0, len ); - termAtt.setTermLength(len); + reverse( matchVersion, termAtt.buffer(), 0, len ); + termAtt.setLength(len); return true; } else { return false; diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLowerCaseFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLowerCaseFilter.java index 6b96e16220d..86f3e2b63df 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLowerCaseFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLowerCaseFilter.java @@ -22,7 +22,7 @@ import java.io.IOException; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.LowerCaseFilter; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * Normalizes token text to lower case. @@ -32,20 +32,19 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute; @Deprecated public final class RussianLowerCaseFilter extends TokenFilter { - private TermAttribute termAtt; + private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); public RussianLowerCaseFilter(TokenStream in) { super(in); - termAtt = addAttribute(TermAttribute.class); } @Override public final boolean incrementToken() throws IOException { if (input.incrementToken()) { - char[] chArray = termAtt.termBuffer(); - int chLen = termAtt.termLength(); + char[] chArray = termAtt.buffer(); + int chLen = termAtt.length(); for (int i = 0; i < chLen; i++) { chArray[i] = Character.toLowerCase(chArray[i]); diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java index 11655a87a6b..7e62e02d997 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/ru/RussianStemFilter.java @@ -22,7 +22,7 @@ import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter; // for java import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.ru.RussianStemmer;//javadoc @link import org.apache.lucene.analysis.snowball.SnowballFilter; // javadoc @link @@ -51,17 +51,14 @@ public final class RussianStemFilter extends TokenFilter /** * The actual token in the input stream. */ - private RussianStemmer stemmer = null; + private RussianStemmer stemmer = new RussianStemmer(); - private final TermAttribute termAtt; - private final KeywordAttribute keywordAttr; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); public RussianStemFilter(TokenStream in) { super(in); - stemmer = new RussianStemmer(); - termAtt = addAttribute(TermAttribute.class); - keywordAttr = addAttribute(KeywordAttribute.class); } /** * Returns the next token in the stream, or null at EOS @@ -71,10 +68,10 @@ public final class RussianStemFilter extends TokenFilter { if (input.incrementToken()) { if(!keywordAttr.isKeyword()) { - final String term = termAtt.term(); + final String term = termAtt.toString(); final String s = stemmer.stem(term); if (s != null && !s.equals(term)) - termAtt.setTermBuffer(s); + termAtt.setEmpty().append(s); } return true; } else { diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java index f0bf4871408..cccd8cd1c33 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleFilter.java @@ -137,10 +137,10 @@ public final class ShingleFilter extends TokenFilter { */ private boolean isOutputHere = false; - private final CharTermAttribute termAtt; - private final OffsetAttribute offsetAtt; - private final PositionIncrementAttribute posIncrAtt; - private final TypeAttribute typeAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); /** @@ -155,10 +155,6 @@ public final class ShingleFilter extends TokenFilter { super(input); setMaxShingleSize(maxShingleSize); setMinShingleSize(minShingleSize); - this.termAtt = addAttribute(CharTermAttribute.class); - this.offsetAtt = addAttribute(OffsetAttribute.class); - this.posIncrAtt = addAttribute(PositionIncrementAttribute.class); - this.typeAtt = addAttribute(TypeAttribute.class); } /** diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java index ec6eee07dd4..a21ff3711e2 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/shingle/ShingleMatrixFilter.java @@ -31,11 +31,11 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.miscellaneous.EmptyTokenStream; import org.apache.lucene.analysis.payloads.PayloadHelper; import org.apache.lucene.analysis.shingle.ShingleMatrixFilter.Matrix.Column.Row; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.index.Payload; @@ -193,14 +193,14 @@ public final class ShingleMatrixFilter extends TokenStream { private TokenStream input; - private TermAttribute termAtt; + private CharTermAttribute termAtt; private PositionIncrementAttribute posIncrAtt; private PayloadAttribute payloadAtt; private OffsetAttribute offsetAtt; private TypeAttribute typeAtt; private FlagsAttribute flagsAtt; - private TermAttribute in_termAtt; + private CharTermAttribute in_termAtt; private PositionIncrementAttribute in_posIncrAtt; private PayloadAttribute in_payloadAtt; private OffsetAttribute in_offsetAtt; @@ -229,7 +229,7 @@ public final class ShingleMatrixFilter extends TokenStream { this.ignoringSinglePrefixOrSuffixShingle = ignoringSinglePrefixOrSuffixShingle; this.settingsCodec = settingsCodec; - termAtt = addAttribute(TermAttribute.class); + termAtt = addAttribute(CharTermAttribute.class); posIncrAtt = addAttribute(PositionIncrementAttribute.class); payloadAtt = addAttribute(PayloadAttribute.class); offsetAtt = addAttribute(OffsetAttribute.class); @@ -239,7 +239,7 @@ public final class ShingleMatrixFilter extends TokenStream { // set the input to be an empty token stream, we already have the data. this.input = new EmptyTokenStream(); - in_termAtt = input.addAttribute(TermAttribute.class); + in_termAtt = input.addAttribute(CharTermAttribute.class); in_posIncrAtt = input.addAttribute(PositionIncrementAttribute.class); in_payloadAtt = input.addAttribute(PayloadAttribute.class); in_offsetAtt = input.addAttribute(OffsetAttribute.class); @@ -311,14 +311,14 @@ public final class ShingleMatrixFilter extends TokenStream { this.spacerCharacter = spacerCharacter; this.ignoringSinglePrefixOrSuffixShingle = ignoringSinglePrefixOrSuffixShingle; this.settingsCodec = settingsCodec; - termAtt = addAttribute(TermAttribute.class); + termAtt = addAttribute(CharTermAttribute.class); posIncrAtt = addAttribute(PositionIncrementAttribute.class); payloadAtt = addAttribute(PayloadAttribute.class); offsetAtt = addAttribute(OffsetAttribute.class); typeAtt = addAttribute(TypeAttribute.class); flagsAtt = addAttribute(FlagsAttribute.class); - in_termAtt = input.addAttribute(TermAttribute.class); + in_termAtt = input.addAttribute(CharTermAttribute.class); in_posIncrAtt = input.addAttribute(PositionIncrementAttribute.class); in_payloadAtt = input.addAttribute(PayloadAttribute.class); in_offsetAtt = input.addAttribute(OffsetAttribute.class); @@ -377,7 +377,7 @@ public final class ShingleMatrixFilter extends TokenStream { if (token == null) return false; clearAttributes(); - termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength()); + termAtt.copyBuffer(token.buffer(), 0, token.length()); posIncrAtt.setPositionIncrement(token.getPositionIncrement()); flagsAtt.setFlags(token.getFlags()); offsetAtt.setOffset(token.startOffset(), token.endOffset()); @@ -388,7 +388,7 @@ public final class ShingleMatrixFilter extends TokenStream { private Token getNextInputToken(Token token) throws IOException { if (!input.incrementToken()) return null; - token.setTermBuffer(in_termAtt.termBuffer(), 0, in_termAtt.termLength()); + token.copyBuffer(in_termAtt.buffer(), 0, in_termAtt.length()); token.setPositionIncrement(in_posIncrAtt.getPositionIncrement()); token.setFlags(in_flagsAtt.getFlags()); token.setOffset(in_offsetAtt.startOffset(), in_offsetAtt.endOffset()); @@ -399,7 +399,7 @@ public final class ShingleMatrixFilter extends TokenStream { private Token getNextToken(Token token) throws IOException { if (!this.incrementToken()) return null; - token.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength()); + token.copyBuffer(termAtt.buffer(), 0, termAtt.length()); token.setPositionIncrement(posIncrAtt.getPositionIncrement()); token.setFlags(flagsAtt.getFlags()); token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); @@ -441,7 +441,7 @@ public final class ShingleMatrixFilter extends TokenStream { for (int i = 0; i < currentShingleLength; i++) { Token shingleToken = currentPermuationTokens.get(i + currentPermutationTokensStartOffset); - termLength += shingleToken.termLength(); + termLength += shingleToken.length(); shingle.add(shingleToken); } if (spacerCharacter != null) { @@ -459,9 +459,9 @@ public final class ShingleMatrixFilter extends TokenStream { if (spacerCharacter != null && sb.length() > 0) { sb.append(spacerCharacter); } - sb.append(shingleToken.termBuffer(), 0, shingleToken.termLength()); + sb.append(shingleToken.buffer(), 0, shingleToken.length()); } - reusableToken.setTermBuffer(sb.toString()); + reusableToken.setEmpty().append(sb); updateToken(reusableToken, shingle, currentPermutationTokensStartOffset, currentPermutationRows, currentPermuationTokens); return reusableToken; diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkFilter.java index 9270e974ec8..63142468cec 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/sinks/DateRecognizerSinkFilter.java @@ -21,7 +21,7 @@ import java.text.DateFormat; import java.text.ParseException; import java.util.Date; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.util.AttributeSource; /** @@ -34,7 +34,7 @@ public class DateRecognizerSinkFilter extends TeeSinkTokenFilter.SinkFilter { public static final String DATE_TYPE = "date"; protected DateFormat dateFormat; - protected TermAttribute termAtt; + protected CharTermAttribute termAtt; /** * Uses {@link java.text.SimpleDateFormat#getDateInstance()} as the {@link java.text.DateFormat} object. @@ -50,10 +50,10 @@ public class DateRecognizerSinkFilter extends TeeSinkTokenFilter.SinkFilter { @Override public boolean accept(AttributeSource source) { if (termAtt == null) { - termAtt = source.addAttribute(TermAttribute.class); + termAtt = source.addAttribute(CharTermAttribute.class); } try { - Date date = dateFormat.parse(termAtt.term());//We don't care about the date, just that we can parse it as a date + Date date = dateFormat.parse(termAtt.toString());//We don't care about the date, just that we can parse it as a date if (date != null) { return true; } diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java index feea896d959..c69d4707bb4 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.LowerCaseFilter; import org.apache.lucene.analysis.tokenattributes.KeywordAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tr.TurkishLowerCaseFilter; // javadoc @link import org.tartarus.snowball.SnowballProgram; @@ -42,7 +42,7 @@ public final class SnowballFilter extends TokenFilter { private final SnowballProgram stemmer; - private final TermAttribute termAtt = addAttribute(TermAttribute.class); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final KeywordAttribute keywordAttr = addAttribute(KeywordAttribute.class); public SnowballFilter(TokenStream input, SnowballProgram stemmer) { @@ -76,16 +76,16 @@ public final class SnowballFilter extends TokenFilter { public final boolean incrementToken() throws IOException { if (input.incrementToken()) { if (!keywordAttr.isKeyword()) { - char termBuffer[] = termAtt.termBuffer(); - final int length = termAtt.termLength(); + char termBuffer[] = termAtt.buffer(); + final int length = termAtt.length(); stemmer.setCurrent(termBuffer, length); stemmer.stem(); final char finalTerm[] = stemmer.getCurrentBuffer(); final int newLength = stemmer.getCurrentBufferLength(); if (finalTerm != termBuffer) - termAtt.setTermBuffer(finalTerm, 0, newLength); + termAtt.copyBuffer(finalTerm, 0, newLength); else - termAtt.setTermLength(newLength); + termAtt.setLength(newLength); } return true; } else { diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java index 4dcc21178b3..7b6a5ca45ca 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymFilter.java @@ -133,7 +133,7 @@ public final class SynonymFilter extends TokenFilter { OffsetAttribute lastOffsetAtt = lastTok.addAttribute(OffsetAttribute.class); newOffsetAtt.setOffset(newOffsetAtt.startOffset(), lastOffsetAtt.endOffset()); - newTermAtt.copyBuffer(repTok.termBuffer(), 0, repTok.termLength()); + newTermAtt.copyBuffer(repTok.buffer(), 0, repTok.length()); repPos += repTok.getPositionIncrement(); if (i==0) repPos=origPos; // make position of first token equal to original diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java index 1959b6e021c..e3a60abe6e4 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java @@ -103,8 +103,7 @@ public class SynonymMap { List ret = new ArrayList(strings.size()); for (String str : strings) { //Token newTok = new Token(str,0,0,"SYNONYM"); - Token newTok = new Token(0,0,"SYNONYM"); - newTok.setTermBuffer(str.toCharArray(), 0, str.length()); + Token newTok = new Token(str, 0,0,"SYNONYM"); ret.add(newTok); } return ret; @@ -137,7 +136,7 @@ public class SynonymMap { while(tok1!=null || tok2!=null) { while (tok1 != null && (pos1 <= pos2 || tok2==null)) { Token tok = new Token(tok1.startOffset(), tok1.endOffset(), tok1.type()); - tok.setTermBuffer(tok1.termBuffer(), 0, tok1.termLength()); + tok.copyBuffer(tok1.buffer(), 0, tok1.length()); tok.setPositionIncrement(pos1-pos); result.add(tok); pos=pos1; @@ -146,7 +145,7 @@ public class SynonymMap { } while (tok2 != null && (pos2 <= pos1 || tok1==null)) { Token tok = new Token(tok2.startOffset(), tok2.endOffset(), tok2.type()); - tok.setTermBuffer(tok2.termBuffer(), 0, tok2.termLength()); + tok.copyBuffer(tok2.buffer(), 0, tok2.length()); tok.setPositionIncrement(pos2-pos); result.add(tok); pos=pos2; diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilter.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilter.java index 6b9cf374582..923c4fcbe8e 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilter.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/tr/TurkishLowerCaseFilter.java @@ -21,7 +21,7 @@ import java.io.IOException; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * Normalizes Turkish token text to lower case. @@ -37,7 +37,7 @@ public final class TurkishLowerCaseFilter extends TokenFilter { private static final int LATIN_SMALL_LETTER_I = '\u0069'; private static final int LATIN_SMALL_LETTER_DOTLESS_I = '\u0131'; private static final int COMBINING_DOT_ABOVE = '\u0307'; - private final TermAttribute termAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); /** * Create a new TurkishLowerCaseFilter, that normalizes Turkish token text @@ -47,7 +47,6 @@ public final class TurkishLowerCaseFilter extends TokenFilter { */ public TurkishLowerCaseFilter(TokenStream in) { super(in); - termAtt = addAttribute(TermAttribute.class); } @Override @@ -55,8 +54,8 @@ public final class TurkishLowerCaseFilter extends TokenFilter { boolean iOrAfter = false; if (input.incrementToken()) { - final char[] buffer = termAtt.termBuffer(); - int length = termAtt.termLength(); + final char[] buffer = termAtt.buffer(); + int length = termAtt.length(); for (int i = 0; i < length;) { final int ch = Character.codePointAt(buffer, i); @@ -88,7 +87,7 @@ public final class TurkishLowerCaseFilter extends TokenFilter { i += Character.toChars(Character.toLowerCase(ch), buffer, i); } - termAtt.setTermLength(length); + termAtt.setLength(length); return true; } else return false; diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java index 4ff201d5caa..0d4cae87d3f 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizer.java @@ -18,10 +18,10 @@ package org.apache.lucene.analysis.wikipedia; import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.util.AttributeSource; @@ -116,11 +116,11 @@ public final class WikipediaTokenizer extends Tokenizer { private Set untokenizedTypes = Collections.emptySet(); private Iterator tokens = null; - private OffsetAttribute offsetAtt; - private TypeAttribute typeAtt; - private PositionIncrementAttribute posIncrAtt; - private TermAttribute termAtt; - private FlagsAttribute flagsAtt; + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); + private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class); /** * Creates a new instance of the {@link WikipediaTokenizer}. Attaches the @@ -176,12 +176,7 @@ public final class WikipediaTokenizer extends Tokenizer { private void init(int tokenOutput, Set untokenizedTypes) { this.tokenOutput = tokenOutput; - this.untokenizedTypes = untokenizedTypes; - this.offsetAtt = addAttribute(OffsetAttribute.class); - this.typeAtt = addAttribute(TypeAttribute.class); - this.posIncrAtt = addAttribute(PositionIncrementAttribute.class); - this.termAtt = addAttribute(TermAttribute.class); - this.flagsAtt = addAttribute(FlagsAttribute.class); + this.untokenizedTypes = untokenizedTypes; } /* @@ -245,8 +240,9 @@ public final class WikipediaTokenizer extends Tokenizer { lastPos = currPos + numAdded; } //trim the buffer + // TODO: this is inefficient String s = buffer.toString().trim(); - termAtt.setTermBuffer(s.toCharArray(), 0, s.length()); + termAtt.setEmpty().append(s); offsetAtt.setOffset(correctOffset(theStart), correctOffset(theStart + s.length())); flagsAtt.setFlags(UNTOKENIZED_TOKEN_FLAG); //The way the loop is written, we will have proceeded to the next token. We need to pushback the scanner to lastPos @@ -283,8 +279,9 @@ public final class WikipediaTokenizer extends Tokenizer { lastPos = currPos + numAdded; } //trim the buffer + // TODO: this is inefficient String s = buffer.toString().trim(); - termAtt.setTermBuffer(s.toCharArray(), 0, s.length()); + termAtt.setEmpty().append(s); offsetAtt.setOffset(correctOffset(theStart), correctOffset(theStart + s.length())); flagsAtt.setFlags(UNTOKENIZED_TOKEN_FLAG); //The way the loop is written, we will have proceeded to the next token. We need to pushback the scanner to lastPos @@ -298,7 +295,7 @@ public final class WikipediaTokenizer extends Tokenizer { private void setupToken() { scanner.getText(termAtt); final int start = scanner.yychar(); - offsetAtt.setOffset(correctOffset(start), correctOffset(start + termAtt.termLength())); + offsetAtt.setOffset(correctOffset(start), correctOffset(start + termAtt.length())); } /* diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java index 6b703a03a89..34735b05e08 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java @@ -1,4 +1,4 @@ -/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 17.05.10 14:51 */ +/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 5/31/10 3:11 PM */ package org.apache.lucene.analysis.wikipedia; @@ -19,14 +19,14 @@ package org.apache.lucene.analysis.wikipedia; * limitations under the License. */ -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * This class is a scanner generated by * JFlex 1.5.0-SNAPSHOT - * on 17.05.10 14:51 from the specification file - * C:/Users/Uwe Schindler/Projects/lucene/newtrunk/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex + * on 5/31/10 3:11 PM from the specification file + * C:/Users/rmuir/workspace/solrcene/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex */ class WikipediaTokenizerImpl { @@ -37,16 +37,16 @@ class WikipediaTokenizerImpl { private static final int ZZ_BUFFERSIZE = 16384; /** lexical states */ - public static final int CATEGORY_STATE = 2; - public static final int DOUBLE_EQUALS_STATE = 14; + public static final int THREE_SINGLE_QUOTES_STATE = 10; public static final int EXTERNAL_LINK_STATE = 6; + public static final int DOUBLE_EQUALS_STATE = 14; public static final int INTERNAL_LINK_STATE = 4; public static final int DOUBLE_BRACE_STATE = 16; - public static final int FIVE_SINGLE_QUOTES_STATE = 12; - public static final int STRING = 18; - public static final int TWO_SINGLE_QUOTES_STATE = 8; + public static final int CATEGORY_STATE = 2; public static final int YYINITIAL = 0; - public static final int THREE_SINGLE_QUOTES_STATE = 10; + public static final int STRING = 18; + public static final int FIVE_SINGLE_QUOTES_STATE = 12; + public static final int TWO_SINGLE_QUOTES_STATE = 8; /** * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l @@ -487,8 +487,8 @@ public final int getPositionIncrement(){ /** * Fills Lucene token with the current token text. */ -final void getText(TermAttribute t) { - t.setTermBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead); +final void getText(CharTermAttribute t) { + t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead); } final int setText(StringBuilder buffer){ @@ -803,184 +803,184 @@ final int setText(StringBuilder buffer){ zzMarkedPos = zzMarkedPosL; switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) { - case 25: - { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE); - } - case 46: break; - case 30: - { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL);/*end italics*/ - } - case 47: break; - case 41: - { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL);/*end bold italics*/ - } - case 48: break; - case 14: - { yybegin(STRING); numWikiTokensSeen++; return currentTokType; - } - case 49: break; - case 23: - { numWikiTokensSeen = 0; positionInc = 1; yybegin(DOUBLE_EQUALS_STATE); - } - case 50: break; - case 34: - { positionInc = 1; return NUM; - } - case 51: break; - case 18: - { /* ignore STRING */ - } - case 52: break; - case 12: - { currentTokType = ITALICS; numWikiTokensSeen++; yybegin(STRING); return currentTokType;/*italics*/ - } - case 53: break; - case 37: - { numBalanced = 0;currentTokType = ALPHANUM;yybegin(YYINITIAL);/*end bold*/ - } - case 54: break; - case 31: - { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = INTERNAL_LINK;yybegin(INTERNAL_LINK_STATE); - } - case 55: break; - case 10: - { numLinkToks = 0; positionInc = 0; yybegin(YYINITIAL); - } - case 56: break; - case 38: - { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL);/*end sub header*/ - } - case 57: break; - case 19: - { yybegin(STRING); numWikiTokensSeen++; return currentTokType;/* STRING ALPHANUM*/ - } - case 58: break; - case 11: - { currentTokType = BOLD; yybegin(THREE_SINGLE_QUOTES_STATE); - } - case 59: break; - case 1: - { numWikiTokensSeen = 0; positionInc = 1; - } - case 60: break; - case 33: - { positionInc = 1; return HOST; - } - case 61: break; - case 3: - { positionInc = 1; return CJ; - } - case 62: break; - case 17: - { yybegin(DOUBLE_BRACE_STATE); numWikiTokensSeen = 0; return currentTokType; - } - case 63: break; - case 32: - { positionInc = 1; return APOSTROPHE; - } - case 64: break; - case 8: - { /* ignore */ - } - case 65: break; - case 4: - { numWikiTokensSeen = 0; positionInc = 1; currentTokType = EXTERNAL_LINK_URL; yybegin(EXTERNAL_LINK_STATE); - } - case 66: break; - case 2: - { positionInc = 1; return ALPHANUM; - } - case 67: break; - case 26: - { yybegin(YYINITIAL); - } - case 68: break; - case 43: - { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CATEGORY; yybegin(CATEGORY_STATE); - } - case 69: break; - case 36: - { currentTokType = BOLD_ITALICS; yybegin(FIVE_SINGLE_QUOTES_STATE); - } - case 70: break; - case 13: - { currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); - } - case 71: break; - case 24: - { numWikiTokensSeen = 0; positionInc = 1; currentTokType = INTERNAL_LINK; yybegin(INTERNAL_LINK_STATE); - } - case 72: break; - case 27: - { numLinkToks = 0; yybegin(YYINITIAL); - } - case 73: break; - case 15: - { currentTokType = SUB_HEADING; numWikiTokensSeen = 0; yybegin(STRING); - } - case 74: break; - case 28: - { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE); - } - case 75: break; - case 39: - { positionInc = 1; return ACRONYM; - } - case 76: break; - case 29: - { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE); - } - case 77: break; - case 7: - { yybegin(INTERNAL_LINK_STATE); numWikiTokensSeen++; return currentTokType; - } - case 78: break; case 16: { currentTokType = HEADING; yybegin(DOUBLE_EQUALS_STATE); numWikiTokensSeen++; return currentTokType; } - case 79: break; + case 46: break; + case 39: + { positionInc = 1; return ACRONYM; + } + case 47: break; + case 8: + { /* ignore */ + } + case 48: break; case 20: { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = EXTERNAL_LINK;yybegin(EXTERNAL_LINK_STATE); } - case 80: break; + case 49: break; case 35: { positionInc = 1; return COMPANY; } + case 50: break; + case 4: + { numWikiTokensSeen = 0; positionInc = 1; currentTokType = EXTERNAL_LINK_URL; yybegin(EXTERNAL_LINK_STATE); + } + case 51: break; + case 25: + { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE); + } + case 52: break; + case 43: + { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CATEGORY; yybegin(CATEGORY_STATE); + } + case 53: break; + case 22: + { numWikiTokensSeen = 0; positionInc = 1; if (numBalanced == 0){numBalanced++;yybegin(TWO_SINGLE_QUOTES_STATE);} else{numBalanced = 0;} + } + case 54: break; + case 34: + { positionInc = 1; return NUM; + } + case 55: break; + case 32: + { positionInc = 1; return APOSTROPHE; + } + case 56: break; + case 23: + { numWikiTokensSeen = 0; positionInc = 1; yybegin(DOUBLE_EQUALS_STATE); + } + case 57: break; + case 21: + { yybegin(STRING); return currentTokType;/*pipe*/ + } + case 58: break; + case 2: + { positionInc = 1; return ALPHANUM; + } + case 59: break; + case 29: + { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE); + } + case 60: break; + case 17: + { yybegin(DOUBLE_BRACE_STATE); numWikiTokensSeen = 0; return currentTokType; + } + case 61: break; + case 44: + { currentTokType = CATEGORY; numWikiTokensSeen = 0; yybegin(CATEGORY_STATE); + } + case 62: break; + case 26: + { yybegin(YYINITIAL); + } + case 63: break; + case 3: + { positionInc = 1; return CJ; + } + case 64: break; + case 38: + { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL);/*end sub header*/ + } + case 65: break; + case 15: + { currentTokType = SUB_HEADING; numWikiTokensSeen = 0; yybegin(STRING); + } + case 66: break; + case 30: + { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL);/*end italics*/ + } + case 67: break; + case 6: + { yybegin(CATEGORY_STATE); numWikiTokensSeen++; return currentTokType; + } + case 68: break; + case 5: + { positionInc = 1; + } + case 69: break; + case 19: + { yybegin(STRING); numWikiTokensSeen++; return currentTokType;/* STRING ALPHANUM*/ + } + case 70: break; + case 42: + { positionInc = 1; numWikiTokensSeen++; yybegin(EXTERNAL_LINK_STATE); return currentTokType; + } + case 71: break; + case 27: + { numLinkToks = 0; yybegin(YYINITIAL); + } + case 72: break; + case 11: + { currentTokType = BOLD; yybegin(THREE_SINGLE_QUOTES_STATE); + } + case 73: break; + case 13: + { currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); + } + case 74: break; + case 14: + { yybegin(STRING); numWikiTokensSeen++; return currentTokType; + } + case 75: break; + case 45: + { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = CATEGORY;yybegin(CATEGORY_STATE); + } + case 76: break; + case 28: + { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE); + } + case 77: break; + case 37: + { numBalanced = 0;currentTokType = ALPHANUM;yybegin(YYINITIAL);/*end bold*/ + } + case 78: break; + case 9: + { if (numLinkToks == 0){positionInc = 0;} else{positionInc = 1;} numWikiTokensSeen++; currentTokType = EXTERNAL_LINK; yybegin(EXTERNAL_LINK_STATE); numLinkToks++; return currentTokType; + } + case 79: break; + case 7: + { yybegin(INTERNAL_LINK_STATE); numWikiTokensSeen++; return currentTokType; + } + case 80: break; + case 24: + { numWikiTokensSeen = 0; positionInc = 1; currentTokType = INTERNAL_LINK; yybegin(INTERNAL_LINK_STATE); + } case 81: break; case 40: { positionInc = 1; return EMAIL; } case 82: break; - case 42: - { positionInc = 1; numWikiTokensSeen++; yybegin(EXTERNAL_LINK_STATE); return currentTokType; + case 1: + { numWikiTokensSeen = 0; positionInc = 1; } case 83: break; - case 6: - { yybegin(CATEGORY_STATE); numWikiTokensSeen++; return currentTokType; + case 18: + { /* ignore STRING */ } case 84: break; - case 44: - { currentTokType = CATEGORY; numWikiTokensSeen = 0; yybegin(CATEGORY_STATE); + case 36: + { currentTokType = BOLD_ITALICS; yybegin(FIVE_SINGLE_QUOTES_STATE); } case 85: break; - case 5: - { positionInc = 1; + case 33: + { positionInc = 1; return HOST; } case 86: break; - case 9: - { if (numLinkToks == 0){positionInc = 0;} else{positionInc = 1;} numWikiTokensSeen++; currentTokType = EXTERNAL_LINK; yybegin(EXTERNAL_LINK_STATE); numLinkToks++; return currentTokType; + case 31: + { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = INTERNAL_LINK;yybegin(INTERNAL_LINK_STATE); } case 87: break; - case 45: - { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = CATEGORY;yybegin(CATEGORY_STATE); + case 41: + { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL);/*end bold italics*/ } case 88: break; - case 22: - { numWikiTokensSeen = 0; positionInc = 1; if (numBalanced == 0){numBalanced++;yybegin(TWO_SINGLE_QUOTES_STATE);} else{numBalanced = 0;} + case 12: + { currentTokType = ITALICS; numWikiTokensSeen++; yybegin(STRING); return currentTokType;/*italics*/ } case 89: break; - case 21: - { yybegin(STRING); return currentTokType;/*pipe*/ + case 10: + { numLinkToks = 0; positionInc = 0; yybegin(YYINITIAL); } case 90: break; default: diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex b/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex index d012a59e71a..477c55bd030 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex @@ -17,7 +17,7 @@ package org.apache.lucene.analysis.wikipedia; * limitations under the License. */ -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; %% @@ -81,8 +81,8 @@ public final int getPositionIncrement(){ /** * Fills Lucene token with the current token text. */ -final void getText(TermAttribute t) { - t.setTermBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead); +final void getText(CharTermAttribute t) { + t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead); } final int setText(StringBuilder buffer){ diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java index 28bfbf69572..ebf5f541449 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/compound/TestCompoundWordTokenFilter.java @@ -17,8 +17,6 @@ package org.apache.lucene.analysis.compound; * limitations under the License. */ -import java.io.File; -import java.io.FileInputStream; import java.io.InputStreamReader; import java.io.Reader; import java.io.StringReader; @@ -27,7 +25,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.compound.hyphenation.HyphenationTree; import org.apache.lucene.analysis.core.WhitespaceTokenizer; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase { public void testHyphenationCompoundWordsDA() throws Exception { @@ -176,15 +174,15 @@ public class TestCompoundWordTokenFilter extends BaseTokenStreamTestCase { CompoundWordTokenFilterBase.DEFAULT_MIN_SUBWORD_SIZE, CompoundWordTokenFilterBase.DEFAULT_MAX_SUBWORD_SIZE, false); - TermAttribute termAtt = tf.getAttribute(TermAttribute.class); + CharTermAttribute termAtt = tf.getAttribute(CharTermAttribute.class); assertTrue(tf.incrementToken()); - assertEquals("Rindfleischüberwachungsgesetz", termAtt.term()); + assertEquals("Rindfleischüberwachungsgesetz", termAtt.toString()); assertTrue(tf.incrementToken()); - assertEquals("Rind", termAtt.term()); + assertEquals("Rind", termAtt.toString()); wsTokenizer.reset(new StringReader("Rindfleischüberwachungsgesetz")); tf.reset(); assertTrue(tf.incrementToken()); - assertEquals("Rindfleischüberwachungsgesetz", termAtt.term()); + assertEquals("Rindfleischüberwachungsgesetz", termAtt.toString()); } private Reader getHyphenationReader() throws Exception { diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestElision.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestElision.java index d7b23c8069a..42514679e46 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestElision.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/fr/TestElision.java @@ -28,6 +28,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TermAttribute; /** @@ -50,9 +51,9 @@ public class TestElision extends BaseTokenStreamTestCase { private List filter(TokenFilter filter) throws IOException { List tas = new ArrayList(); - TermAttribute termAtt = filter.getAttribute(TermAttribute.class); + CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); while (filter.incrementToken()) { - tas.add(termAtt.term()); + tas.add(termAtt.toString()); } return tas; } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java index a266fff3395..80da0991355 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAndSuffixAwareTokenFilter.java @@ -41,8 +41,6 @@ public class TestPrefixAndSuffixAwareTokenFilter extends BaseTokenStreamTestCase private static Token createToken(String term, int start, int offset) { - Token token = new Token(start, offset); - token.setTermBuffer(term); - return token; + return new Token(term, start, offset); } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java index c7c9ae5efba..e470e3e28f5 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestPrefixAwareTokenFilter.java @@ -52,8 +52,6 @@ public class TestPrefixAwareTokenFilter extends BaseTokenStreamTestCase { private static Token createToken(String term, int start, int offset) { - Token token = new Token(start, offset); - token.setTermBuffer(term); - return token; + return new Token(term, start, offset); } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilter.java index 75b8b88cb69..946f9787c4c 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilter.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestRemoveDuplicatesTokenFilter.java @@ -51,7 +51,7 @@ public class TestRemoveDuplicatesTokenFilter extends BaseTokenStreamTestCase { if (toks.hasNext()) { clearAttributes(); Token tok = toks.next(); - termAtt.setEmpty().append(tok.term()); + termAtt.setEmpty().append(tok); offsetAtt.setOffset(tok.startOffset(), tok.endOffset()); posIncAtt.setPositionIncrement(tok.getPositionIncrement()); return true; diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestSingleTokenTokenFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestSingleTokenTokenFilter.java index 1253e56057a..094378629da 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestSingleTokenTokenFilter.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestSingleTokenTokenFilter.java @@ -22,14 +22,14 @@ import java.io.IOException; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.AttributeImpl; import org.apache.lucene.analysis.Token; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; public class TestSingleTokenTokenFilter extends LuceneTestCase { public void test() throws IOException { Token token = new Token(); SingleTokenTokenStream ts = new SingleTokenTokenStream(token); - AttributeImpl tokenAtt = (AttributeImpl) ts.addAttribute(TermAttribute.class); + AttributeImpl tokenAtt = (AttributeImpl) ts.addAttribute(CharTermAttribute.class); assertTrue(tokenAtt instanceof Token); ts.reset(); diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java index 6439d6bb76b..9b4d31d877f 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTrimFilter.java @@ -97,7 +97,7 @@ public class TestTrimFilter extends BaseTokenStreamTestCase { else { clearAttributes(); Token token = tokens[index++]; - termAtt.setEmpty().append(token.term()); + termAtt.setEmpty().append(token); offsetAtt.setOffset(token.startOffset(), token.endOffset()); posIncAtt.setPositionIncrement(token.getPositionIncrement()); flagsAtt.setFlags(token.getFlags()); diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java index dc1e53fb5de..3e0ed8b5f61 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/DelimitedPayloadTokenFilterTest.java @@ -18,8 +18,8 @@ package org.apache.lucene.analysis.payloads; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.WhitespaceTokenizer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.index.Payload; import org.apache.lucene.util.LuceneTestCase; @@ -32,7 +32,7 @@ public class DelimitedPayloadTokenFilterTest extends LuceneTestCase { DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter (new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)), DelimitedPayloadTokenFilter.DEFAULT_DELIMITER, new IdentityEncoder()); - TermAttribute termAtt = filter.getAttribute(TermAttribute.class); + CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class); assertTermEquals("The", filter, termAtt, payAtt, null); assertTermEquals("quick", filter, termAtt, payAtt, "JJ".getBytes("UTF-8")); @@ -70,7 +70,7 @@ public class DelimitedPayloadTokenFilterTest extends LuceneTestCase { public void testFloatEncoding() throws Exception { String test = "The quick|1.0 red|2.0 fox|3.5 jumped|0.5 over the lazy|5 brown|99.3 dogs|83.7"; DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)), '|', new FloatEncoder()); - TermAttribute termAtt = filter.getAttribute(TermAttribute.class); + CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class); assertTermEquals("The", filter, termAtt, payAtt, null); assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeFloat(1.0f)); @@ -88,7 +88,7 @@ public class DelimitedPayloadTokenFilterTest extends LuceneTestCase { public void testIntEncoding() throws Exception { String test = "The quick|1 red|2 fox|3 jumped over the lazy|5 brown|99 dogs|83"; DelimitedPayloadTokenFilter filter = new DelimitedPayloadTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)), '|', new IntegerEncoder()); - TermAttribute termAtt = filter.getAttribute(TermAttribute.class); + CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); PayloadAttribute payAtt = filter.getAttribute(PayloadAttribute.class); assertTermEquals("The", filter, termAtt, payAtt, null); assertTermEquals("quick", filter, termAtt, payAtt, PayloadHelper.encodeInt(1)); @@ -104,10 +104,10 @@ public class DelimitedPayloadTokenFilterTest extends LuceneTestCase { } void assertTermEquals(String expected, TokenStream stream, byte[] expectPay) throws Exception { - TermAttribute termAtt = stream.getAttribute(TermAttribute.class); + CharTermAttribute termAtt = stream.getAttribute(CharTermAttribute.class); PayloadAttribute payloadAtt = stream.getAttribute(PayloadAttribute.class); assertTrue(stream.incrementToken()); - assertEquals(expected, termAtt.term()); + assertEquals(expected, termAtt.toString()); Payload payload = payloadAtt.getPayload(); if (payload != null) { assertTrue(payload.length() + " does not equal: " + expectPay.length, payload.length() == expectPay.length); @@ -121,9 +121,9 @@ public class DelimitedPayloadTokenFilterTest extends LuceneTestCase { } - void assertTermEquals(String expected, TokenStream stream, TermAttribute termAtt, PayloadAttribute payAtt, byte[] expectPay) throws Exception { + void assertTermEquals(String expected, TokenStream stream, CharTermAttribute termAtt, PayloadAttribute payAtt, byte[] expectPay) throws Exception { assertTrue(stream.incrementToken()); - assertEquals(expected, termAtt.term()); + assertEquals(expected, termAtt.toString()); Payload payload = payAtt.getPayload(); if (payload != null) { assertTrue(payload.length() + " does not equal: " + expectPay.length, payload.length() == expectPay.length); diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java index 7cc9a4a56d5..aa6b2cd4606 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/NumericPayloadTokenFilterTest.java @@ -20,8 +20,8 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.WhitespaceTokenizer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import java.io.IOException; @@ -39,11 +39,11 @@ public class NumericPayloadTokenFilterTest extends BaseTokenStreamTestCase { NumericPayloadTokenFilter nptf = new NumericPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test))), 3, "D"); boolean seenDogs = false; - TermAttribute termAtt = nptf.getAttribute(TermAttribute.class); + CharTermAttribute termAtt = nptf.getAttribute(CharTermAttribute.class); TypeAttribute typeAtt = nptf.getAttribute(TypeAttribute.class); PayloadAttribute payloadAtt = nptf.getAttribute(PayloadAttribute.class); while (nptf.incrementToken()) { - if (termAtt.term().equals("dogs")) { + if (termAtt.toString().equals("dogs")) { seenDogs = true; assertTrue(typeAtt.type() + " is not equal to " + "D", typeAtt.type().equals("D") == true); assertTrue("payloadAtt.getPayload() is null and it shouldn't be", payloadAtt.getPayload() != null); @@ -60,19 +60,17 @@ public class NumericPayloadTokenFilterTest extends BaseTokenStreamTestCase { } private final class WordTokenFilter extends TokenFilter { - private TermAttribute termAtt; - private TypeAttribute typeAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); private WordTokenFilter(TokenStream input) { super(input); - termAtt = addAttribute(TermAttribute.class); - typeAtt = addAttribute(TypeAttribute.class); } @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - if (termAtt.term().equals("dogs")) + if (termAtt.toString().equals("dogs")) typeAtt.setType("D"); return true; } else { diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java index aacebe85894..35fa092e02e 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/payloads/TypeAsPayloadTokenFilterTest.java @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import java.io.IOException; @@ -39,12 +39,12 @@ public class TypeAsPayloadTokenFilterTest extends BaseTokenStreamTestCase { TypeAsPayloadTokenFilter nptf = new TypeAsPayloadTokenFilter(new WordTokenFilter(new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(test)))); int count = 0; - TermAttribute termAtt = nptf.getAttribute(TermAttribute.class); + CharTermAttribute termAtt = nptf.getAttribute(CharTermAttribute.class); TypeAttribute typeAtt = nptf.getAttribute(TypeAttribute.class); PayloadAttribute payloadAtt = nptf.getAttribute(PayloadAttribute.class); while (nptf.incrementToken()) { - assertTrue(typeAtt.type() + " is not null and it should be", typeAtt.type().equals(String.valueOf(Character.toUpperCase(termAtt.termBuffer()[0])))); + assertTrue(typeAtt.type() + " is not null and it should be", typeAtt.type().equals(String.valueOf(Character.toUpperCase(termAtt.buffer()[0])))); assertTrue("nextToken.getPayload() is null and it shouldn't be", payloadAtt.getPayload() != null); String type = new String(payloadAtt.getPayload().getData(), "UTF-8"); assertTrue(type + " is not equal to " + typeAtt.type(), type.equals(typeAtt.type()) == true); @@ -55,19 +55,17 @@ public class TypeAsPayloadTokenFilterTest extends BaseTokenStreamTestCase { } private final class WordTokenFilter extends TokenFilter { - private TermAttribute termAtt; - private TypeAttribute typeAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); private WordTokenFilter(TokenStream input) { super(input); - termAtt = addAttribute(TermAttribute.class); - typeAtt = addAttribute(TypeAttribute.class); } @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - typeAtt.setType(String.valueOf(Character.toUpperCase(termAtt.termBuffer()[0]))); + typeAtt.setType(String.valueOf(Character.toUpperCase(termAtt.buffer()[0]))); return true; } else { return false; diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/position/PositionFilterTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/position/PositionFilterTest.java index ed12a7f607b..ea3938acdda 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/position/PositionFilterTest.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/position/PositionFilterTest.java @@ -22,7 +22,7 @@ import java.io.IOException; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.shingle.ShingleFilter; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; public class PositionFilterTest extends BaseTokenStreamTestCase { @@ -30,19 +30,18 @@ public class PositionFilterTest extends BaseTokenStreamTestCase { protected int index = 0; protected String[] testToken; - protected TermAttribute termAtt; + protected final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); public TestTokenStream(String[] testToken) { super(); this.testToken = testToken; - termAtt = addAttribute(TermAttribute.class); } @Override public final boolean incrementToken() throws IOException { clearAttributes(); if (index < testToken.length) { - termAtt.setTermBuffer(testToken[index++]); + termAtt.setEmpty().append(testToken[index++]); return true; } else { return false; diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java index 7ed432a9ddf..26512acb75d 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java @@ -26,7 +26,6 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.LetterTokenizer; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.analysis.core.WhitespaceTokenizer; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; @@ -176,9 +175,6 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase { QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, new WhitespaceAnalyzer(TEST_VERSION_CURRENT)); a.addStopWords(reader, 10); TokenStream ts = a.tokenStream("repetitiveField", new StringReader("this boring")); - TermAttribute termAtt = ts.getAttribute(TermAttribute.class); - assertTrue(ts.incrementToken()); - assertEquals("this", termAtt.term()); - assertFalse(ts.incrementToken()); + assertTokenStreamContents(ts, new String[] { "this" }); } } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java index b55b7353a30..a6896e504a8 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/reverse/TestReverseStringFilter.java @@ -21,46 +21,22 @@ import java.io.StringReader; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.WhitespaceTokenizer; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.BaseTokenStreamTestCase; -import org.apache.lucene.util.Version; public class TestReverseStringFilter extends BaseTokenStreamTestCase { public void testFilter() throws Exception { TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader("Do have a nice day")); // 1-4 length string ReverseStringFilter filter = new ReverseStringFilter(TEST_VERSION_CURRENT, stream); - TermAttribute text = filter.getAttribute(TermAttribute.class); - assertTrue(filter.incrementToken()); - assertEquals("oD", text.term()); - assertTrue(filter.incrementToken()); - assertEquals("evah", text.term()); - assertTrue(filter.incrementToken()); - assertEquals("a", text.term()); - assertTrue(filter.incrementToken()); - assertEquals("ecin", text.term()); - assertTrue(filter.incrementToken()); - assertEquals("yad", text.term()); - assertFalse(filter.incrementToken()); + assertTokenStreamContents(filter, new String[] { "oD", "evah", "a", "ecin", "yad" }); } public void testFilterWithMark() throws Exception { TokenStream stream = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader( "Do have a nice day")); // 1-4 length string ReverseStringFilter filter = new ReverseStringFilter(TEST_VERSION_CURRENT, stream, '\u0001'); - TermAttribute text = filter - .getAttribute(TermAttribute.class); - assertTrue(filter.incrementToken()); - assertEquals("\u0001oD", text.term()); - assertTrue(filter.incrementToken()); - assertEquals("\u0001evah", text.term()); - assertTrue(filter.incrementToken()); - assertEquals("\u0001a", text.term()); - assertTrue(filter.incrementToken()); - assertEquals("\u0001ecin", text.term()); - assertTrue(filter.incrementToken()); - assertEquals("\u0001yad", text.term()); - assertFalse(filter.incrementToken()); + assertTokenStreamContents(filter, + new String[] { "\u0001oD", "\u0001evah", "\u0001a", "\u0001ecin", "\u0001yad" }); } public void testReverseString() throws Exception { diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java index f08b9fbdb17..45d97898278 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/ru/TestRussianAnalyzer.java @@ -17,17 +17,13 @@ package org.apache.lucene.analysis.ru; * limitations under the License. */ -import java.io.File; -import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; -import java.io.Reader; -import java.io.StringReader; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.util.Version; @@ -65,8 +61,8 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase new RussianLetterTokenizer(TEST_VERSION_CURRENT, sampleUnicode); - TermAttribute text = in.getAttribute(TermAttribute.class); - TermAttribute sampleText = sample.getAttribute(TermAttribute.class); + CharTermAttribute text = in.getAttribute(CharTermAttribute.class); + CharTermAttribute sampleText = sample.getAttribute(CharTermAttribute.class); for (;;) { @@ -76,34 +72,21 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase boolean nextSampleToken = sample.incrementToken(); assertEquals( "Unicode", - text.term(), + text.toString(), nextSampleToken == false ? null - : sampleText.term()); + : sampleText.toString()); } inWords.close(); sampleUnicode.close(); } - public void testDigitsInRussianCharset() + /** Check that RussianAnalyzer doesnt discard any numbers */ + public void testDigitsInRussianCharset() throws IOException { - Reader reader = new StringReader("text 1000"); - RussianAnalyzer ra = new RussianAnalyzer(TEST_VERSION_CURRENT); - TokenStream stream = ra.tokenStream("", reader); - - TermAttribute termText = stream.getAttribute(TermAttribute.class); - try { - assertTrue(stream.incrementToken()); - assertEquals("text", termText.term()); - assertTrue(stream.incrementToken()); - assertEquals("RussianAnalyzer's tokenizer skips numbers from input text", "1000", termText.term()); - assertFalse(stream.incrementToken()); - } - catch (IOException e) - { - fail("unexpected IOException"); - } + RussianAnalyzer ra = new RussianAnalyzer(TEST_VERSION_CURRENT); + assertAnalyzesTo(ra, "text 1000", new String[] { "text", "1000" }); } /** @deprecated remove this test in Lucene 4.0: stopwords changed */ diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java index ba7346c5889..29c3a0f7b74 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/ShingleAnalyzerWrapperTest.java @@ -26,8 +26,8 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.LetterTokenizer; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.analysis.core.WhitespaceTokenizer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; @@ -159,11 +159,11 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase { int j = -1; PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class); - TermAttribute termAtt = ts.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); while (ts.incrementToken()) { j += posIncrAtt.getPositionIncrement(); - String termText = termAtt.term(); + String termText = termAtt.toString(); q.add(new Term("content", termText), j); } @@ -186,10 +186,10 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase { TokenStream ts = analyzer.tokenStream("content", new StringReader("test sentence")); - TermAttribute termAtt = ts.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); while (ts.incrementToken()) { - String termText = termAtt.term(); + String termText = termAtt.toString(); q.add(new TermQuery(new Term("content", termText)), BooleanClause.Occur.SHOULD); } diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java index 363b97dfc0e..a6f3e112aa8 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/shingle/TestShingleMatrixFilter.java @@ -31,7 +31,12 @@ import org.apache.lucene.analysis.miscellaneous.SingleTokenTokenStream; import org.apache.lucene.analysis.payloads.PayloadHelper; import org.apache.lucene.analysis.shingle.ShingleMatrixFilter.Matrix; import org.apache.lucene.analysis.shingle.ShingleMatrixFilter.Matrix.Column; -import org.apache.lucene.analysis.tokenattributes.*; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.TypeAttribute; public class TestShingleMatrixFilter extends BaseTokenStreamTestCase { @@ -415,7 +420,7 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase { private Token tokenFactory(String text, int posIncr, int startOffset, int endOffset) { Token token = new Token(startOffset, endOffset); - token.setTermBuffer(text); + token.setEmpty().append(text); token.setPositionIncrement(posIncr); return token; } @@ -427,7 +432,7 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase { private Token tokenFactory(String text, int posIncr, float weight, int startOffset, int endOffset) { Token token = new Token(startOffset, endOffset); - token.setTermBuffer(text); + token.setEmpty().append(text); token.setPositionIncrement(posIncr); ShingleMatrixFilter.defaultSettingsCodec.setWeight(token, weight); return token; @@ -435,7 +440,7 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase { private Token tokenFactory(String text, int posIncr, float weight, int startOffset, int endOffset, ShingleMatrixFilter.TokenPositioner positioner) { Token token = new Token(startOffset, endOffset); - token.setTermBuffer(text); + token.setEmpty().append(text); token.setPositionIncrement(posIncr); ShingleMatrixFilter.defaultSettingsCodec.setWeight(token, weight); ShingleMatrixFilter.defaultSettingsCodec.setTokenPositioner(token, positioner); @@ -445,20 +450,20 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase { // assert-methods start here private void assertNext(TokenStream ts, String text) throws IOException { - TermAttribute termAtt = ts.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); assertTrue(ts.incrementToken()); - assertEquals(text, termAtt.term()); + assertEquals(text, termAtt.toString()); } private void assertNext(TokenStream ts, String text, int positionIncrement, float boost, int startOffset, int endOffset) throws IOException { - TermAttribute termAtt = ts.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class); PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class); OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); assertTrue(ts.incrementToken()); - assertEquals(text, termAtt.term()); + assertEquals(text, termAtt.toString()); assertEquals(positionIncrement, posIncrAtt.getPositionIncrement()); assertEquals(boost, payloadAtt.getPayload() == null ? 1f : PayloadHelper.decodeFloat(payloadAtt.getPayload().getData()), 0); assertEquals(startOffset, offsetAtt.startOffset()); @@ -466,11 +471,11 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase { } private void assertNext(TokenStream ts, String text, int startOffset, int endOffset) throws IOException { - TermAttribute termAtt = ts.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); assertTrue(ts.incrementToken()); - assertEquals(text, termAtt.term()); + assertEquals(text, termAtt.toString()); assertEquals(startOffset, offsetAtt.startOffset()); assertEquals(endOffset, offsetAtt.endOffset()); } @@ -478,7 +483,7 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase { private static Token createToken(String term, int start, int offset) { Token token = new Token(start, offset); - token.setTermBuffer(term); + token.setEmpty().append(term); return token; } @@ -486,21 +491,15 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase { public final static class TokenListStream extends TokenStream { private Collection tokens; - TermAttribute termAtt; - PositionIncrementAttribute posIncrAtt; - PayloadAttribute payloadAtt; - OffsetAttribute offsetAtt; - TypeAttribute typeAtt; - FlagsAttribute flagsAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); + private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); + private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class); public TokenListStream(Collection tokens) { this.tokens = tokens; - termAtt = addAttribute(TermAttribute.class); - posIncrAtt = addAttribute(PositionIncrementAttribute.class); - payloadAtt = addAttribute(PayloadAttribute.class); - offsetAtt = addAttribute(OffsetAttribute.class); - typeAtt = addAttribute(TypeAttribute.class); - flagsAtt = addAttribute(FlagsAttribute.class); } private Iterator iterator; @@ -515,7 +514,7 @@ public class TestShingleMatrixFilter extends BaseTokenStreamTestCase { } Token prototype = iterator.next(); clearAttributes(); - termAtt.setTermBuffer(prototype.termBuffer(), 0, prototype.termLength()); + termAtt.copyBuffer(prototype.buffer(), 0, prototype.length()); posIncrAtt.setPositionIncrement(prototype.getPositionIncrement()); flagsAtt.setFlags(prototype.getFlags()); offsetAtt.setOffset(prototype.startOffset(), prototype.endOffset()); diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java index bb3fe3c546a..ab623452dd3 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/sinks/TokenTypeSinkTokenizerTest.java @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.core.WhitespaceTokenizer; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; public class TokenTypeSinkTokenizerTest extends BaseTokenStreamTestCase { @@ -41,11 +41,11 @@ public class TokenTypeSinkTokenizerTest extends BaseTokenStreamTestCase { boolean seenDogs = false; - TermAttribute termAtt = ttf.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = ttf.addAttribute(CharTermAttribute.class); TypeAttribute typeAtt = ttf.addAttribute(TypeAttribute.class); ttf.reset(); while (ttf.incrementToken()) { - if (termAtt.term().equals("dogs")) { + if (termAtt.toString().equals("dogs")) { seenDogs = true; assertTrue(typeAtt.type() + " is not equal to " + "D", typeAtt.type().equals("D") == true); } else { @@ -64,20 +64,18 @@ public class TokenTypeSinkTokenizerTest extends BaseTokenStreamTestCase { } private class WordTokenFilter extends TokenFilter { - private TermAttribute termAtt; - private TypeAttribute typeAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); private WordTokenFilter(TokenStream input) { super(input); - termAtt = addAttribute(TermAttribute.class); - typeAtt = addAttribute(TypeAttribute.class); } @Override public final boolean incrementToken() throws IOException { if (!input.incrementToken()) return false; - if (termAtt.term().equals("dogs")) { + if (termAtt.toString().equals("dogs")) { typeAtt.setType("D"); } return true; diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java index 4b456633e56..63a4e23e4b7 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/snowball/TestSnowball.java @@ -22,11 +22,11 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.index.Payload; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.util.Version; @@ -93,7 +93,7 @@ public class TestSnowball extends BaseTokenStreamTestCase { public void testFilterTokens() throws Exception { SnowballFilter filter = new SnowballFilter(new TestTokenStream(), "English"); - TermAttribute termAtt = filter.getAttribute(TermAttribute.class); + CharTermAttribute termAtt = filter.getAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = filter.getAttribute(OffsetAttribute.class); TypeAttribute typeAtt = filter.getAttribute(TypeAttribute.class); PayloadAttribute payloadAtt = filter.getAttribute(PayloadAttribute.class); @@ -102,7 +102,7 @@ public class TestSnowball extends BaseTokenStreamTestCase { filter.incrementToken(); - assertEquals("accent", termAtt.term()); + assertEquals("accent", termAtt.toString()); assertEquals(2, offsetAtt.startOffset()); assertEquals(7, offsetAtt.endOffset()); assertEquals("wrd", typeAtt.type()); @@ -112,27 +112,21 @@ public class TestSnowball extends BaseTokenStreamTestCase { } private final class TestTokenStream extends TokenStream { - private TermAttribute termAtt; - private OffsetAttribute offsetAtt; - private TypeAttribute typeAtt; - private PayloadAttribute payloadAtt; - private PositionIncrementAttribute posIncAtt; - private FlagsAttribute flagsAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); + private final PayloadAttribute payloadAtt = addAttribute(PayloadAttribute.class); + private final PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class); + private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class); TestTokenStream() { super(); - termAtt = addAttribute(TermAttribute.class); - offsetAtt = addAttribute(OffsetAttribute.class); - typeAtt = addAttribute(TypeAttribute.class); - payloadAtt = addAttribute(PayloadAttribute.class); - posIncAtt = addAttribute(PositionIncrementAttribute.class); - flagsAtt = addAttribute(FlagsAttribute.class); } @Override public boolean incrementToken() { clearAttributes(); - termAtt.setTermBuffer("accents"); + termAtt.setEmpty().append("accents"); offsetAtt.setOffset(2, 7); typeAtt.setType("wrd"); posIncAtt.setPositionIncrement(3); diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java index a8cbff57a7d..7cb690ee517 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/synonym/TestSynonymFilter.java @@ -404,7 +404,7 @@ public class TestSynonymFilter extends BaseTokenStreamTestCase { else { clearAttributes(); Token token = tokens[index++]; - termAtt.setEmpty().append(token.term()); + termAtt.setEmpty().append(token); offsetAtt.setOffset(token.startOffset(), token.endOffset()); posIncAtt.setPositionIncrement(token.getPositionIncrement()); flagsAtt.setFlags(token.getFlags()); diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java index 300595ba273..28d5634dc45 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerTest.java @@ -20,30 +20,20 @@ package org.apache.lucene.analysis.wikipedia; import java.io.StringReader; import java.io.IOException; -import java.util.HashMap; -import java.util.Map; import java.util.Set; import java.util.HashSet; import org.apache.lucene.analysis.BaseTokenStreamTestCase; import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; -import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; -import org.apache.lucene.analysis.tokenattributes.TypeAttribute; +import static org.apache.lucene.analysis.wikipedia.WikipediaTokenizer.*; /** - * - * + * Basic Tests for {@link WikipediaTokenizer} **/ public class WikipediaTokenizerTest extends BaseTokenStreamTestCase { protected static final String LINK_PHRASES = "click [[link here again]] click [http://lucene.apache.org here again] [[Category:a b c d]]"; - public WikipediaTokenizerTest(String s) { - super(s); - } - public void testSimple() throws Exception { String text = "This is a [[Category:foo]]"; WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(text)); @@ -51,216 +41,85 @@ public class WikipediaTokenizerTest extends BaseTokenStreamTestCase { new String[] { "This", "is", "a", "foo" }, new int[] { 0, 5, 8, 21 }, new int[] { 4, 7, 9, 24 }, - new String[] { "", "", "", WikipediaTokenizer.CATEGORY }, + new String[] { "", "", "", CATEGORY }, new int[] { 1, 1, 1, 1, }, text.length()); } public void testHandwritten() throws Exception { - //make sure all tokens are in only one type - String test = "[[link]] This is a [[Category:foo]] Category This is a linked [[:Category:bar none withstanding]] " + - "Category This is (parens) This is a [[link]] This is an external URL [http://lucene.apache.org] " + - "Here is ''italics'' and ''more italics'', '''bold''' and '''''five quotes''''' " + - " This is a [[link|display info]] This is a period. Here is $3.25 and here is 3.50. Here's Johnny. " + - "==heading== ===sub head=== followed by some text [[Category:blah| ]] " + - "''[[Category:ital_cat]]'' here is some that is ''italics [[Category:foo]] but is never closed." + - "'''same [[Category:foo]] goes for this '''''and2 [[Category:foo]] and this" + - " [http://foo.boo.com/test/test/ Test Test] [http://foo.boo.com/test/test/test.html Test Test]" + - " [http://foo.boo.com/test/test/test.html?g=b&c=d Test Test] Citation martian code"; - Map tcm = new HashMap();//map tokens to types - tcm.put("link", WikipediaTokenizer.INTERNAL_LINK); - tcm.put("display", WikipediaTokenizer.INTERNAL_LINK); - tcm.put("info", WikipediaTokenizer.INTERNAL_LINK); - - tcm.put("http://lucene.apache.org", WikipediaTokenizer.EXTERNAL_LINK_URL); - tcm.put("http://foo.boo.com/test/test/", WikipediaTokenizer.EXTERNAL_LINK_URL); - tcm.put("http://foo.boo.com/test/test/test.html", WikipediaTokenizer.EXTERNAL_LINK_URL); - tcm.put("http://foo.boo.com/test/test/test.html?g=b&c=d", WikipediaTokenizer.EXTERNAL_LINK_URL); - tcm.put("Test", WikipediaTokenizer.EXTERNAL_LINK); + // make sure all tokens are in only one type + String test = "[[link]] This is a [[Category:foo]] Category This is a linked [[:Category:bar none withstanding]] " + + "Category This is (parens) This is a [[link]] This is an external URL [http://lucene.apache.org] " + + "Here is ''italics'' and ''more italics'', '''bold''' and '''''five quotes''''' " + + " This is a [[link|display info]] This is a period. Here is $3.25 and here is 3.50. Here's Johnny. " + + "==heading== ===sub head=== followed by some text [[Category:blah| ]] " + + "''[[Category:ital_cat]]'' here is some that is ''italics [[Category:foo]] but is never closed." + + "'''same [[Category:foo]] goes for this '''''and2 [[Category:foo]] and this" + + " [http://foo.boo.com/test/test/ Test Test] [http://foo.boo.com/test/test/test.html Test Test]" + + " [http://foo.boo.com/test/test/test.html?g=b&c=d Test Test] Citation martian code"; - //alphanums - tcm.put("This", ""); - tcm.put("is", ""); - tcm.put("a", ""); - tcm.put("Category", ""); - tcm.put("linked", ""); - tcm.put("parens", ""); - tcm.put("external", ""); - tcm.put("URL", ""); - tcm.put("and", ""); - tcm.put("period", ""); - tcm.put("Here", ""); - tcm.put("Here's", ""); - tcm.put("here", ""); - tcm.put("Johnny", ""); - tcm.put("followed", ""); - tcm.put("by", ""); - tcm.put("text", ""); - tcm.put("that", ""); - tcm.put("but", ""); - tcm.put("never", ""); - tcm.put("closed", ""); - tcm.put("goes", ""); - tcm.put("for", ""); - tcm.put("this", ""); - tcm.put("an", ""); - tcm.put("some", ""); - tcm.put("martian", ""); - tcm.put("code", ""); - - tcm.put("foo", WikipediaTokenizer.CATEGORY); - tcm.put("bar", WikipediaTokenizer.CATEGORY); - tcm.put("none", WikipediaTokenizer.CATEGORY); - tcm.put("withstanding", WikipediaTokenizer.CATEGORY); - tcm.put("blah", WikipediaTokenizer.CATEGORY); - tcm.put("ital", WikipediaTokenizer.CATEGORY); - tcm.put("cat", WikipediaTokenizer.CATEGORY); - - tcm.put("italics", WikipediaTokenizer.ITALICS); - tcm.put("more", WikipediaTokenizer.ITALICS); - tcm.put("bold", WikipediaTokenizer.BOLD); - tcm.put("same", WikipediaTokenizer.BOLD); - tcm.put("five", WikipediaTokenizer.BOLD_ITALICS); - tcm.put("and2", WikipediaTokenizer.BOLD_ITALICS); - tcm.put("quotes", WikipediaTokenizer.BOLD_ITALICS); - - tcm.put("heading", WikipediaTokenizer.HEADING); - tcm.put("sub", WikipediaTokenizer.SUB_HEADING); - tcm.put("head", WikipediaTokenizer.SUB_HEADING); - - tcm.put("Citation", WikipediaTokenizer.CITATION); - - tcm.put("3.25", ""); - tcm.put("3.50", ""); WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(test)); - int count = 0; - int numItalics = 0; - int numBoldItalics = 0; - int numCategory = 0; - int numCitation = 0; - TermAttribute termAtt = tf.addAttribute(TermAttribute.class); - TypeAttribute typeAtt = tf.addAttribute(TypeAttribute.class); - - while (tf.incrementToken()) { - String tokText = termAtt.term(); - //System.out.println("Text: " + tokText + " Type: " + token.type()); - String expectedType = tcm.get(tokText); - assertTrue("expectedType is null and it shouldn't be for: " + tf.toString(), expectedType != null); - assertTrue(typeAtt.type() + " is not equal to " + expectedType + " for " + tf.toString(), typeAtt.type().equals(expectedType) == true); - count++; - if (typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true){ - numItalics++; - } else if (typeAtt.type().equals(WikipediaTokenizer.BOLD_ITALICS) == true){ - numBoldItalics++; - } else if (typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true){ - numCategory++; - } - else if (typeAtt.type().equals(WikipediaTokenizer.CITATION) == true){ - numCitation++; - } - } - assertTrue("We have not seen enough tokens: " + count + " is not >= " + tcm.size(), count >= tcm.size()); - assertTrue(numItalics + " does not equal: " + 4 + " for numItalics", numItalics == 4); - assertTrue(numBoldItalics + " does not equal: " + 3 + " for numBoldItalics", numBoldItalics == 3); - assertTrue(numCategory + " does not equal: " + 10 + " for numCategory", numCategory == 10); - assertTrue(numCitation + " does not equal: " + 1 + " for numCitation", numCitation == 1); + assertTokenStreamContents(tf, + new String[] {"link", "This", "is", "a", + "foo", "Category", "This", "is", "a", "linked", "bar", "none", + "withstanding", "Category", "This", "is", "parens", "This", "is", "a", + "link", "This", "is", "an", "external", "URL", + "http://lucene.apache.org", "Here", "is", "italics", "and", "more", + "italics", "bold", "and", "five", "quotes", "This", "is", "a", "link", + "display", "info", "This", "is", "a", "period", "Here", "is", "3.25", + "and", "here", "is", "3.50", "Here's", "Johnny", "heading", "sub", + "head", "followed", "by", "some", "text", "blah", "ital", "cat", + "here", "is", "some", "that", "is", "italics", "foo", "but", "is", + "never", "closed", "same", "foo", "goes", "for", "this", "and2", "foo", + "and", "this", "http://foo.boo.com/test/test/", "Test", "Test", + "http://foo.boo.com/test/test/test.html", "Test", "Test", + "http://foo.boo.com/test/test/test.html?g=b&c=d", "Test", "Test", + "Citation", "martian", "code"}, + new String[] {INTERNAL_LINK, + "", "", "", CATEGORY, "", + "", "", "", "", CATEGORY, + CATEGORY, CATEGORY, "", "", "", + "", "", "", "", INTERNAL_LINK, + "", "", "", "", "", + EXTERNAL_LINK_URL, "", "", ITALICS, "", + ITALICS, ITALICS, BOLD, "", BOLD_ITALICS, BOLD_ITALICS, + "", "", "", INTERNAL_LINK, INTERNAL_LINK, + INTERNAL_LINK, "", "", "", "", + "", "", "", "", "", + "", "", "", "", HEADING, + SUB_HEADING, SUB_HEADING, "", "", "", + "", CATEGORY, CATEGORY, CATEGORY, "", "", + "", "", "", ITALICS, CATEGORY, + "", "", "", "", BOLD, CATEGORY, + "", "", "", BOLD_ITALICS, CATEGORY, + "", "", EXTERNAL_LINK_URL, EXTERNAL_LINK, + EXTERNAL_LINK, EXTERNAL_LINK_URL, EXTERNAL_LINK, EXTERNAL_LINK, + EXTERNAL_LINK_URL, EXTERNAL_LINK, EXTERNAL_LINK, CITATION, + "", ""}); } public void testLinkPhrases() throws Exception { - WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(LINK_PHRASES)); checkLinkPhrases(tf); - } private void checkLinkPhrases(WikipediaTokenizer tf) throws IOException { - TermAttribute termAtt = tf.addAttribute(TermAttribute.class); - PositionIncrementAttribute posIncrAtt = tf.addAttribute(PositionIncrementAttribute.class); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "click", termAtt.term().equals("click") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "link", termAtt.term().equals("link") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "here", - termAtt.term().equals("here") == true); - //The link, and here should be at the same position for phrases to work - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "again", - termAtt.term().equals("again") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "click", - termAtt.term().equals("click") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "http://lucene.apache.org", - termAtt.term().equals("http://lucene.apache.org") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "here", - termAtt.term().equals("here") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "again", - termAtt.term().equals("again") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "a", - termAtt.term().equals("a") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "b", - termAtt.term().equals("b") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "c", - termAtt.term().equals("c") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "d", - termAtt.term().equals("d") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - - assertFalse(tf.incrementToken()); + assertTokenStreamContents(tf, + new String[] { "click", "link", "here", "again", "click", + "http://lucene.apache.org", "here", "again", "a", "b", "c", "d" }, + new int[] { 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1 }); } public void testLinks() throws Exception { String test = "[http://lucene.apache.org/java/docs/index.html#news here] [http://lucene.apache.org/java/docs/index.html?b=c here] [https://lucene.apache.org/java/docs/index.html?b=c here]"; WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(test)); - TermAttribute termAtt = tf.addAttribute(TermAttribute.class); - TypeAttribute typeAtt = tf.addAttribute(TypeAttribute.class); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "http://lucene.apache.org/java/docs/index.html#news", - termAtt.term().equals("http://lucene.apache.org/java/docs/index.html#news") == true); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.EXTERNAL_LINK_URL, typeAtt.type().equals(WikipediaTokenizer.EXTERNAL_LINK_URL) == true); - tf.incrementToken();//skip here - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "http://lucene.apache.org/java/docs/index.html?b=c", - termAtt.term().equals("http://lucene.apache.org/java/docs/index.html?b=c") == true); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.EXTERNAL_LINK_URL, typeAtt.type().equals(WikipediaTokenizer.EXTERNAL_LINK_URL) == true); - tf.incrementToken();//skip here - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "https://lucene.apache.org/java/docs/index.html?b=c", - termAtt.term().equals("https://lucene.apache.org/java/docs/index.html?b=c") == true); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.EXTERNAL_LINK_URL, typeAtt.type().equals(WikipediaTokenizer.EXTERNAL_LINK_URL) == true); - - assertTrue(tf.incrementToken()); - assertFalse(tf.incrementToken()); + assertTokenStreamContents(tf, + new String[] { "http://lucene.apache.org/java/docs/index.html#news", "here", + "http://lucene.apache.org/java/docs/index.html?b=c", "here", + "https://lucene.apache.org/java/docs/index.html?b=c", "here" }, + new String[] { EXTERNAL_LINK_URL, EXTERNAL_LINK, + EXTERNAL_LINK_URL, EXTERNAL_LINK, + EXTERNAL_LINK_URL, EXTERNAL_LINK, }); } public void testLucene1133() throws Exception { @@ -272,73 +131,13 @@ public class WikipediaTokenizerTest extends BaseTokenStreamTestCase { checkLinkPhrases(tf); String test = "[[Category:a b c d]] [[Category:e f g]] [[link here]] [[link there]] ''italics here'' something ''more italics'' [[Category:h i j]]"; tf = new WikipediaTokenizer(new StringReader(test), WikipediaTokenizer.UNTOKENIZED_ONLY, untoks); - TermAttribute termAtt = tf.addAttribute(TermAttribute.class); - PositionIncrementAttribute posIncrAtt = tf.addAttribute(PositionIncrementAttribute.class); - OffsetAttribute offsetAtt = tf.addAttribute(OffsetAttribute.class); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "a b c d", - termAtt.term().equals("a b c d") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 11, offsetAtt.startOffset() == 11); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 18, offsetAtt.endOffset() == 18); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "e f g", - termAtt.term().equals("e f g") == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 32, offsetAtt.startOffset() == 32); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 37, offsetAtt.endOffset() == 37); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "link", - termAtt.term().equals("link") == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 42, offsetAtt.startOffset() == 42); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 46, offsetAtt.endOffset() == 46); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "here", - termAtt.term().equals("here") == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 47, offsetAtt.startOffset() == 47); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 51, offsetAtt.endOffset() == 51); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "link", - termAtt.term().equals("link") == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 56, offsetAtt.startOffset() == 56); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 60, offsetAtt.endOffset() == 60); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "there", - termAtt.term().equals("there") == true); - - assertTrue(offsetAtt.startOffset() + " does not equal: " + 61, offsetAtt.startOffset() == 61); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 66, offsetAtt.endOffset() == 66); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "italics here", - termAtt.term().equals("italics here") == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 71, offsetAtt.startOffset() == 71); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 83, offsetAtt.endOffset() == 83); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "something", - termAtt.term().equals("something") == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 86, offsetAtt.startOffset() == 86); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 95, offsetAtt.endOffset() == 95); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "more italics", - termAtt.term().equals("more italics") == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 98, offsetAtt.startOffset() == 98); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 110, offsetAtt.endOffset() == 110); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "h i j", - termAtt.term().equals("h i j") == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 124, offsetAtt.startOffset() == 124); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 133, offsetAtt.endOffset() == 133); - - assertFalse(tf.incrementToken()); + assertTokenStreamContents(tf, + new String[] { "a b c d", "e f g", "link", "here", "link", + "there", "italics here", "something", "more italics", "h i j" }, + new int[] { 11, 32, 42, 47, 56, 61, 71, 86, 98, 124 }, + new int[] { 18, 37, 46, 51, 60, 66, 83, 95, 110, 133 }, + new int[] { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 } + ); } public void testBoth() throws Exception { @@ -348,211 +147,26 @@ public class WikipediaTokenizerTest extends BaseTokenStreamTestCase { String test = "[[Category:a b c d]] [[Category:e f g]] [[link here]] [[link there]] ''italics here'' something ''more italics'' [[Category:h i j]]"; //should output all the indivual tokens plus the untokenized tokens as well. Untokenized tokens WikipediaTokenizer tf = new WikipediaTokenizer(new StringReader(test), WikipediaTokenizer.BOTH, untoks); - TermAttribute termAtt = tf.addAttribute(TermAttribute.class); - TypeAttribute typeAtt = tf.addAttribute(TypeAttribute.class); - PositionIncrementAttribute posIncrAtt = tf.addAttribute(PositionIncrementAttribute.class); - OffsetAttribute offsetAtt = tf.addAttribute(OffsetAttribute.class); + assertTokenStreamContents(tf, + new String[] { "a b c d", "a", "b", "c", "d", "e f g", "e", "f", "g", + "link", "here", "link", "there", "italics here", "italics", "here", + "something", "more italics", "more", "italics", "h i j", "h", "i", "j" }, + new int[] { 11, 11, 13, 15, 17, 32, 32, 34, 36, 42, 47, 56, 61, 71, 71, 79, 86, 98, 98, 103, 124, 124, 128, 132 }, + new int[] { 18, 12, 14, 16, 18, 37, 33, 35, 37, 46, 51, 60, 66, 83, 78, 83, 95, 110, 102, 110, 133, 125, 129, 133 }, + new int[] { 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1 } + ); + + // now check the flags, TODO: add way to check flags from BaseTokenStreamTestCase? + tf = new WikipediaTokenizer(new StringReader(test), WikipediaTokenizer.BOTH, untoks); + int expectedFlags[] = new int[] { UNTOKENIZED_TOKEN_FLAG, 0, 0, 0, 0, UNTOKENIZED_TOKEN_FLAG, 0, 0, 0, 0, + 0, 0, 0, UNTOKENIZED_TOKEN_FLAG, 0, 0, 0, UNTOKENIZED_TOKEN_FLAG, 0, 0, UNTOKENIZED_TOKEN_FLAG, 0, 0, 0 }; FlagsAttribute flagsAtt = tf.addAttribute(FlagsAttribute.class); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "a b c d", - termAtt.term().equals("a b c d") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(flagsAtt.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, flagsAtt.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 11, offsetAtt.startOffset() == 11); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 18, offsetAtt.endOffset() == 18); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "a", - termAtt.term().equals("a") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(flagsAtt.getFlags() + " equals: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG + " and it shouldn't", flagsAtt.getFlags() != WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 11, offsetAtt.startOffset() == 11); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 12, offsetAtt.endOffset() == 12); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "b", - termAtt.term().equals("b") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 13, offsetAtt.startOffset() == 13); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 14, offsetAtt.endOffset() == 14); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "c", - termAtt.term().equals("c") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 15, offsetAtt.startOffset() == 15); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 16, offsetAtt.endOffset() == 16); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "d", - termAtt.term().equals("d") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 17, offsetAtt.startOffset() == 17); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 18, offsetAtt.endOffset() == 18); - - - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "e f g", - termAtt.term().equals("e f g") == true); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(flagsAtt.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, flagsAtt.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 32, offsetAtt.startOffset() == 32); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 37, offsetAtt.endOffset() == 37); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "e", - termAtt.term().equals("e") == true); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 32, offsetAtt.startOffset() == 32); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 33, offsetAtt.endOffset() == 33); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "f", - termAtt.term().equals("f") == true); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 34, offsetAtt.startOffset() == 34); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 35, offsetAtt.endOffset() == 35); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "g", - termAtt.term().equals("g") == true); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 36, offsetAtt.startOffset() == 36); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 37, offsetAtt.endOffset() == 37); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "link", - termAtt.term().equals("link") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.INTERNAL_LINK, typeAtt.type().equals(WikipediaTokenizer.INTERNAL_LINK) == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 42, offsetAtt.startOffset() == 42); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 46, offsetAtt.endOffset() == 46); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "here", - termAtt.term().equals("here") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.INTERNAL_LINK, typeAtt.type().equals(WikipediaTokenizer.INTERNAL_LINK) == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 47, offsetAtt.startOffset() == 47); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 51, offsetAtt.endOffset() == 51); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "link", - termAtt.term().equals("link") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 56, offsetAtt.startOffset() == 56); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.INTERNAL_LINK, typeAtt.type().equals(WikipediaTokenizer.INTERNAL_LINK) == true); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 60, offsetAtt.endOffset() == 60); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "there", - termAtt.term().equals("there") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.INTERNAL_LINK, typeAtt.type().equals(WikipediaTokenizer.INTERNAL_LINK) == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 61, offsetAtt.startOffset() == 61); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 66, offsetAtt.endOffset() == 66); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "italics here", - termAtt.term().equals("italics here") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.ITALICS, typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true); - assertTrue(flagsAtt.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, flagsAtt.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 71, offsetAtt.startOffset() == 71); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 83, offsetAtt.endOffset() == 83); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "italics", - termAtt.term().equals("italics") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.ITALICS, typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 71, offsetAtt.startOffset() == 71); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 78, offsetAtt.endOffset() == 78); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "here", - termAtt.term().equals("here") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.ITALICS, typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 79, offsetAtt.startOffset() == 79); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 83, offsetAtt.endOffset() == 83); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "something", - termAtt.term().equals("something") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 86, offsetAtt.startOffset() == 86); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 95, offsetAtt.endOffset() == 95); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "more italics", - termAtt.term().equals("more italics") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.ITALICS, typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true); - assertTrue(flagsAtt.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, flagsAtt.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 98, offsetAtt.startOffset() == 98); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 110, offsetAtt.endOffset() == 110); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "more", - termAtt.term().equals("more") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.ITALICS, typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 98, offsetAtt.startOffset() == 98); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 102, offsetAtt.endOffset() == 102); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "italics", - termAtt.term().equals("italics") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.ITALICS, typeAtt.type().equals(WikipediaTokenizer.ITALICS) == true); - - assertTrue(offsetAtt.startOffset() + " does not equal: " + 103, offsetAtt.startOffset() == 103); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 110, offsetAtt.endOffset() == 110); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "h i j", - termAtt.term().equals("h i j") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(flagsAtt.getFlags() + " does not equal: " + WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG, flagsAtt.getFlags() == WikipediaTokenizer.UNTOKENIZED_TOKEN_FLAG); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 124, offsetAtt.startOffset() == 124); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 133, offsetAtt.endOffset() == 133); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "h", - termAtt.term().equals("h") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 0, posIncrAtt.getPositionIncrement() == 0); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 124, offsetAtt.startOffset() == 124); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 125, offsetAtt.endOffset() == 125); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "i", - termAtt.term().equals("i") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 128, offsetAtt.startOffset() == 128); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 129, offsetAtt.endOffset() == 129); - - assertTrue(tf.incrementToken()); - assertTrue(termAtt.term() + " is not equal to " + "j", - termAtt.term().equals("j") == true); - assertTrue(posIncrAtt.getPositionIncrement() + " does not equal: " + 1, posIncrAtt.getPositionIncrement() == 1); - assertTrue(typeAtt.type() + " is not equal to " + WikipediaTokenizer.CATEGORY, typeAtt.type().equals(WikipediaTokenizer.CATEGORY) == true); - assertTrue(offsetAtt.startOffset() + " does not equal: " + 132, offsetAtt.startOffset() == 132); - assertTrue(offsetAtt.endOffset() + " does not equal: " + 133, offsetAtt.endOffset() == 133); - + tf.reset(); + for (int i = 0; i < expectedFlags.length; i++) { + assertTrue(tf.incrementToken()); + assertEquals("flags " + i, expectedFlags[i], flagsAtt.getFlags()); + } assertFalse(tf.incrementToken()); + tf.close(); } } diff --git a/modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java b/modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java index 6309b2e4163..c1e48fbbed8 100644 --- a/modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java +++ b/modules/analysis/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java @@ -23,7 +23,7 @@ import com.ibm.icu.text.RawCollationKey; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.util.IndexableBinaryStringTools; import java.io.IOException; @@ -70,7 +70,7 @@ import java.io.IOException; public final class ICUCollationKeyFilter extends TokenFilter { private Collator collator = null; private RawCollationKey reusableKey = new RawCollationKey(); - private TermAttribute termAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); /** * @@ -80,23 +80,22 @@ public final class ICUCollationKeyFilter extends TokenFilter { public ICUCollationKeyFilter(TokenStream input, Collator collator) { super(input); this.collator = collator; - termAtt = addAttribute(TermAttribute.class); } @Override public boolean incrementToken() throws IOException { if (input.incrementToken()) { - char[] termBuffer = termAtt.termBuffer(); - String termText = new String(termBuffer, 0, termAtt.termLength()); + char[] termBuffer = termAtt.buffer(); + String termText = new String(termBuffer, 0, termAtt.length()); collator.getRawCollationKey(termText, reusableKey); int encodedLength = IndexableBinaryStringTools.getEncodedLength( reusableKey.bytes, 0, reusableKey.size); if (encodedLength > termBuffer.length) { - termAtt.resizeTermBuffer(encodedLength); + termAtt.resizeBuffer(encodedLength); } - termAtt.setTermLength(encodedLength); + termAtt.setLength(encodedLength); IndexableBinaryStringTools.encode(reusableKey.bytes, 0, reusableKey.size, - termAtt.termBuffer(), 0, encodedLength); + termAtt.buffer(), 0, encodedLength); return true; } else { return false; diff --git a/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java b/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java index 3d79a3f3ed3..bdb71e22122 100644 --- a/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java +++ b/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/SentenceTokenizer.java @@ -21,8 +21,8 @@ import java.io.IOException; import java.io.Reader; import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.util.AttributeSource; @@ -44,29 +44,20 @@ public final class SentenceTokenizer extends Tokenizer { private int tokenStart = 0, tokenEnd = 0; - private TermAttribute termAtt; - private OffsetAttribute offsetAtt; - private TypeAttribute typeAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); public SentenceTokenizer(Reader reader) { super(reader); - init(); } public SentenceTokenizer(AttributeSource source, Reader reader) { super(source, reader); - init(); } public SentenceTokenizer(AttributeFactory factory, Reader reader) { super(factory, reader); - init(); - } - - private void init() { - termAtt = addAttribute(TermAttribute.class); - offsetAtt = addAttribute(OffsetAttribute.class); - typeAtt = addAttribute(TypeAttribute.class); } @Override @@ -112,7 +103,7 @@ public final class SentenceTokenizer extends Tokenizer { if (buffer.length() == 0) return false; else { - termAtt.setTermBuffer(buffer.toString()); + termAtt.setEmpty().append(buffer); offsetAtt.setOffset(correctOffset(tokenStart), correctOffset(tokenEnd)); typeAtt.setType("sentence"); return true; diff --git a/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java b/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java index 6999b0a2e60..6f0ecea5dd3 100644 --- a/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java +++ b/modules/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/WordTokenFilter.java @@ -24,8 +24,8 @@ import java.util.List; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.cn.smart.hhmm.SegToken; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; /** @@ -40,9 +40,9 @@ public final class WordTokenFilter extends TokenFilter { private List tokenBuffer; - private TermAttribute termAtt; - private OffsetAttribute offsetAtt; - private TypeAttribute typeAtt; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); /** * Construct a new WordTokenizer. @@ -52,9 +52,6 @@ public final class WordTokenFilter extends TokenFilter { public WordTokenFilter(TokenStream in) { super(in); this.wordSegmenter = new WordSegmenter(); - termAtt = addAttribute(TermAttribute.class); - offsetAtt = addAttribute(OffsetAttribute.class); - typeAtt = addAttribute(TypeAttribute.class); } @Override @@ -63,7 +60,7 @@ public final class WordTokenFilter extends TokenFilter { // there are no remaining tokens from the current sentence... are there more sentences? if (input.incrementToken()) { // a new sentence is available: process it. - tokenBuffer = wordSegmenter.segmentSentence(termAtt.term(), offsetAtt.startOffset()); + tokenBuffer = wordSegmenter.segmentSentence(termAtt.toString(), offsetAtt.startOffset()); tokenIter = tokenBuffer.iterator(); /* * it should not be possible to have a sentence with 0 words, check just in case. @@ -79,7 +76,7 @@ public final class WordTokenFilter extends TokenFilter { clearAttributes(); // There are remaining tokens from the current sentence, return the next one. SegToken nextWord = tokenIter.next(); - termAtt.setTermBuffer(nextWord.charArray, 0, nextWord.charArray.length); + termAtt.copyBuffer(nextWord.charArray, 0, nextWord.charArray.length); offsetAtt.setOffset(nextWord.startOffset, nextWord.endOffset); typeAtt.setType("word"); return true; diff --git a/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java b/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java index 0e563f389a1..5ad44b3dad4 100644 --- a/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java +++ b/solr/src/java/org/apache/solr/analysis/BufferedTokenStream.java @@ -150,7 +150,7 @@ public abstract class BufferedTokenStream extends TokenFilter { return null; } else { Token token = new Token(); - token.setTermBuffer(termAtt.buffer(), 0, termAtt.length()); + token.copyBuffer(termAtt.buffer(), 0, termAtt.length()); token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); token.setType(typeAtt.type()); token.setFlags(flagsAtt.getFlags()); @@ -163,7 +163,7 @@ public abstract class BufferedTokenStream extends TokenFilter { /** old api emulation for back compat */ private boolean writeToken(Token token) throws IOException { clearAttributes(); - termAtt.copyBuffer(token.termBuffer(), 0, token.termLength()); + termAtt.copyBuffer(token.buffer(), 0, token.length()); offsetAtt.setOffset(token.startOffset(), token.endOffset()); typeAtt.setType(token.type()); flagsAtt.setFlags(token.getFlags()); diff --git a/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java b/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java index 188d522cd80..f086fadad6f 100644 --- a/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java +++ b/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java @@ -163,12 +163,12 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase { while (tokenStream.incrementToken()) { Token token = new Token(); if (termAtt != null) { - token.setTermBuffer(termAtt.toString()); + token.setEmpty().append(termAtt); } if (bytesAtt != null) { bytesAtt.toBytesRef(bytes); // TODO: This is incorrect when numeric fields change in later lucene versions. It should use BytesRef directly! - token.setTermBuffer(bytes.utf8ToString()); + token.setEmpty().append(bytes.utf8ToString()); } token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); token.setType(typeAtt.type()); @@ -208,10 +208,10 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase { for (Token token : tokens) { NamedList tokenNamedList = new SimpleOrderedMap(); - String text = fieldType.indexedToReadable(token.term()); + String text = fieldType.indexedToReadable(token.toString()); tokenNamedList.add("text", text); - if (!text.equals(token.term())) { - tokenNamedList.add("raw_text", token.term()); + if (!text.equals(token.toString())) { + tokenNamedList.add("raw_text", token.toString()); } tokenNamedList.add("type", token.type()); tokenNamedList.add("start", token.startOffset()); @@ -220,7 +220,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase { position += token.getPositionIncrement(); tokenNamedList.add("position", position); - if (context.getTermsToMatch().contains(token.term())) { + if (context.getTermsToMatch().contains(token.toString())) { tokenNamedList.add("match", true); } @@ -292,7 +292,7 @@ public abstract class AnalysisRequestHandlerBase extends RequestHandlerBase { public boolean incrementToken() throws IOException { if (tokenIterator.hasNext()) { Token next = tokenIterator.next(); - termAtt.copyBuffer(next.termBuffer(), 0, next.termLength()); + termAtt.copyBuffer(next.buffer(), 0, next.length()); typeAtt.setType(next.type()); offsetAtt.setOffset(next.startOffset(), next.endOffset()); flagsAtt.setFlags(next.getFlags()); diff --git a/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java b/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java index 3a53e09ad25..0f0aead5ec4 100644 --- a/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java +++ b/solr/src/java/org/apache/solr/handler/DocumentAnalysisRequestHandler.java @@ -221,7 +221,7 @@ public class DocumentAnalysisRequestHandler extends AnalysisRequestHandlerBase { try { List tokens = analyzeValue(request.getQuery(), fieldType.getQueryAnalyzer()); for (Token token : tokens) { - termsToMatch.add(token.term()); + termsToMatch.add(token.toString()); } } catch (Exception e) { // ignore analysis exceptions since we are applying arbitrary text to all fields diff --git a/solr/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java b/solr/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java index d4c2021d486..a670af6bcef 100644 --- a/solr/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java +++ b/solr/src/java/org/apache/solr/handler/FieldAnalysisRequestHandler.java @@ -227,7 +227,7 @@ public class FieldAnalysisRequestHandler extends AnalysisRequestHandlerBase { if (queryValue != null && analysisRequest.isShowMatch()) { List tokens = analyzeValue(queryValue, fieldType.getQueryAnalyzer()); for (Token token : tokens) { - termsToMatch.add(token.term()); + termsToMatch.add(token.toString()); } } diff --git a/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java b/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java index 8d373b221cc..f5fdd8005ee 100644 --- a/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java +++ b/solr/src/java/org/apache/solr/handler/component/SpellCheckComponent.java @@ -337,10 +337,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar // create token SpellCheckResponse.Suggestion suggestion = origVsSuggestion.get(original); - Token token = new Token(); - token.setTermBuffer(original); - token.setStartOffset(suggestion.getStartOffset()); - token.setEndOffset(suggestion.getEndOffset()); + Token token = new Token(original, suggestion.getStartOffset(), suggestion.getEndOffset()); // get top 'count' suggestions out of 'sugQueue.size()' candidates SuggestWord[] suggestions = new SuggestWord[Math.min(count, sugQueue.size())]; @@ -382,7 +379,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar while (ts.incrementToken()){ Token token = new Token(); - token.setTermBuffer(termAtt.buffer(), 0, termAtt.length()); + token.copyBuffer(termAtt.buffer(), 0, termAtt.length()); token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); token.setType(typeAtt.type()); token.setFlags(flagsAtt.getFlags()); @@ -461,7 +458,7 @@ public class SpellCheckComponent extends SearchComponent implements SolrCoreAwar if (hasFreqInfo) { isCorrectlySpelled = isCorrectlySpelled && spellingResult.getTokenFrequency(inputToken) > 0; } - result.add(new String(inputToken.termBuffer(), 0, inputToken.termLength()), suggestionList); + result.add(new String(inputToken.buffer(), 0, inputToken.length()), suggestionList); } } if (hasFreqInfo) { diff --git a/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java b/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java index 52a4d08ca0a..2654b35e01e 100644 --- a/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java +++ b/solr/src/java/org/apache/solr/spelling/AbstractLuceneSpellChecker.java @@ -136,7 +136,7 @@ public abstract class AbstractLuceneSpellChecker extends SolrSpellChecker { reader = determineReader(reader); Term term = field != null ? new Term(field, "") : null; for (Token token : tokens) { - String tokenText = new String(token.termBuffer(), 0, token.termLength()); + String tokenText = new String(token.buffer(), 0, token.length()); String[] suggestions = spellChecker.suggestSimilar(tokenText, (int) Math.max(count, AbstractLuceneSpellChecker.DEFAULT_SUGGESTION_COUNT), field != null ? reader : null, //workaround LUCENE-1295 field, diff --git a/solr/src/java/org/apache/solr/spelling/SpellingQueryConverter.java b/solr/src/java/org/apache/solr/spelling/SpellingQueryConverter.java index 8060e1bb998..0825781bdb0 100644 --- a/solr/src/java/org/apache/solr/spelling/SpellingQueryConverter.java +++ b/solr/src/java/org/apache/solr/spelling/SpellingQueryConverter.java @@ -113,7 +113,7 @@ public class SpellingQueryConverter extends QueryConverter { stream.reset(); while (stream.incrementToken()) { Token token = new Token(); - token.setTermBuffer(termAtt.buffer(), 0, termAtt.length()); + token.copyBuffer(termAtt.buffer(), 0, termAtt.length()); token.setStartOffset(matcher.start()); token.setEndOffset(matcher.end()); token.setFlags(flagsAtt.getFlags()); diff --git a/solr/src/test/org/apache/solr/analysis/TestBufferedTokenStream.java b/solr/src/test/org/apache/solr/analysis/TestBufferedTokenStream.java index b1352041aed..6c4baa44e86 100644 --- a/solr/src/test/org/apache/solr/analysis/TestBufferedTokenStream.java +++ b/solr/src/test/org/apache/solr/analysis/TestBufferedTokenStream.java @@ -35,9 +35,9 @@ public class TestBufferedTokenStream extends BaseTokenTestCase { public static class AB_Q_Stream extends BufferedTokenStream { public AB_Q_Stream(TokenStream input) {super(input);} protected Token process(Token t) throws IOException { - if ("A".equals(new String(t.termBuffer(), 0, t.termLength()))) { + if ("A".equals(new String(t.buffer(), 0, t.length()))) { Token t2 = read(); - if (t2!=null && "B".equals(new String(t2.termBuffer(), 0, t2.termLength()))) t.setTermBuffer("Q"); + if (t2!=null && "B".equals(new String(t2.buffer(), 0, t2.length()))) t.setEmpty().append("Q"); if (t2!=null) pushBack(t2); } return t; @@ -48,8 +48,8 @@ public class TestBufferedTokenStream extends BaseTokenTestCase { public static class AB_AAB_Stream extends BufferedTokenStream { public AB_AAB_Stream(TokenStream input) {super(input);} protected Token process(Token t) throws IOException { - if ("A".equals(new String(t.termBuffer(), 0, t.termLength())) && - "B".equals(new String(peek(1).termBuffer(), 0, peek(1).termLength()))) + if ("A".equals(new String(t.buffer(), 0, t.length())) && + "B".equals(new String(peek(1).buffer(), 0, peek(1).length()))) write((Token)t.clone()); return t; } diff --git a/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilterFactory.java b/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilterFactory.java index 4a629171011..6eb8a17f2c0 100644 --- a/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilterFactory.java +++ b/solr/src/test/org/apache/solr/analysis/TestRemoveDuplicatesTokenFilterFactory.java @@ -52,7 +52,7 @@ public class TestRemoveDuplicatesTokenFilterFactory extends BaseTokenTestCase { if (toks.hasNext()) { clearAttributes(); Token tok = toks.next(); - termAtt.setEmpty().append(tok.term()); + termAtt.setEmpty().append(tok); offsetAtt.setOffset(tok.startOffset(), tok.endOffset()); posIncAtt.setPositionIncrement(tok.getPositionIncrement()); return true; diff --git a/solr/src/test/org/apache/solr/analysis/TestSynonymMap.java b/solr/src/test/org/apache/solr/analysis/TestSynonymMap.java index 27b4103bf46..65d555800fa 100644 --- a/solr/src/test/org/apache/solr/analysis/TestSynonymMap.java +++ b/solr/src/test/org/apache/solr/analysis/TestSynonymMap.java @@ -262,7 +262,7 @@ public class TestSynonymMap extends TestCase { Token[] tokens = ((SynonymMap)map.submap.get( src )).synonyms; boolean inc = false; for( Token token : tokens ){ - if( exp.equals( new String(token.termBuffer(), 0, token.termLength()) ) ) + if( exp.equals( new String(token.buffer(), 0, token.length()) ) ) inc = true; } assertTrue( inc ); diff --git a/solr/src/test/org/apache/solr/spelling/SimpleQueryConverter.java b/solr/src/test/org/apache/solr/spelling/SimpleQueryConverter.java index 3b13cadc2fd..56b7e39bf7c 100644 --- a/solr/src/test/org/apache/solr/spelling/SimpleQueryConverter.java +++ b/solr/src/test/org/apache/solr/spelling/SimpleQueryConverter.java @@ -54,7 +54,7 @@ class SimpleQueryConverter extends SpellingQueryConverter{ ts.reset(); while (ts.incrementToken()){ Token tok = new Token(); - tok.setTermBuffer(termAtt.buffer(), 0, termAtt.length()); + tok.copyBuffer(termAtt.buffer(), 0, termAtt.length()); tok.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); tok.setFlags(flagsAtt.getFlags()); tok.setPayload(payloadAtt.getPayload()); diff --git a/solr/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java b/solr/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java index a6ba3fd37ec..39739e0cf97 100644 --- a/solr/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java +++ b/solr/src/test/org/apache/solr/spelling/SpellingQueryConverterTest.java @@ -88,7 +88,7 @@ public class SpellingQueryConverterTest { for (Token token : tokens) { int start = token.startOffset(); int end = token.endOffset(); - if (!s.substring(start, end).equals(token.term())) return false; + if (!s.substring(start, end).equals(token.toString())) return false; } return true; } diff --git a/solr/src/webapp/web/admin/analysis.jsp b/solr/src/webapp/web/admin/analysis.jsp index 5a9f2f965d4..d5832f6c293 100644 --- a/solr/src/webapp/web/admin/analysis.jsp +++ b/solr/src/webapp/web/admin/analysis.jsp @@ -223,7 +223,7 @@ public boolean incrementToken() throws IOException { if (iter.hasNext()) { Token token = iter.next(); - termAtt.copyBuffer(token.termBuffer(), 0, token.termLength()); + termAtt.copyBuffer(token.buffer(), 0, token.length()); offsetAtt.setOffset(token.startOffset(), token.endOffset()); typeAtt.setType(token.type()); flagsAtt.setFlags(token.getFlags()); @@ -267,7 +267,7 @@ break; else { Token token = new Token(); - token.setTermBuffer(termAtt.buffer(), 0, termAtt.length()); + token.copyBuffer(termAtt.buffer(), 0, termAtt.length()); token.setType(typeAtt.type()); token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); token.setPayload(payloadAtt.getPayload()); @@ -289,13 +289,13 @@ } public boolean equals(Object o) { - return ((Tok)o).token.term().equals(token.term()); + return ((Tok)o).token.toString().equals(token.toString()); } public int hashCode() { - return token.term().hashCode(); + return token.toString().hashCode(); } public String toString() { - return token.term(); + return token.toString(); } } @@ -377,7 +377,7 @@ boolean needRaw=false; int pos=0; for (Token t : tokens) { - if (!t.term().equals(ft.indexedToReadable(t.term()))) { + if (!t.toString().equals(ft.indexedToReadable(t.toString()))) { needRaw=true; } @@ -426,7 +426,7 @@ printRow(out,"term text", arr, new ToStr() { public String toStr(Object o) { - return ft.indexedToReadable( ((Tok)o).token.term() ); + return ft.indexedToReadable( ((Tok)o).token.toString() ); } } ,true @@ -438,7 +438,7 @@ printRow(out,"raw text", arr, new ToStr() { public String toStr(Object o) { // page is UTF-8, so anything goes. - return ((Tok)o).token.term(); + return ((Tok)o).token.toString(); } } ,true