mirror of
https://github.com/apache/lucene.git
synced 2025-02-06 01:58:44 +00:00
LUCENE-1946, LUCENE-1753: Remove deprecated TokenStream API. What a pity, my wonderful backwards layer is gone! :-( Enforce decorator pattern by making the rest of TokenStreams final.
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@824116 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
35f0f926a2
commit
4cded8042c
@ -32,17 +32,20 @@ API Changes
|
||||
(Michael Busch)
|
||||
|
||||
* LUCENE-1957: Remove Filter.bits(IndexReader) method and make
|
||||
Filter.getDocIdSet(IndexReader) abstract. (Michael Busch)
|
||||
Filter.getDocIdSet(IndexReader) abstract. (Michael Busch)
|
||||
|
||||
* LUCENE-1960: Remove deprecated Field.Store.COMPRESS. (Michael Busch)
|
||||
|
||||
* LUCENE-1961: Remove remaining deprecations from document package.
|
||||
(Michael Busch)
|
||||
(Michael Busch)
|
||||
|
||||
* LUCENE-1968: Remove deprecated methods in PriorityQueue. (Michael Busch)
|
||||
|
||||
* LUCENE-1970: Remove deprecated methods in DocIdSetIterator and make
|
||||
new ones abstract. (Michael Busch)
|
||||
new ones abstract. (Michael Busch)
|
||||
|
||||
* LUCENE-1753: Make not yet final TokenStreams final to enforce
|
||||
decorator pattern (Uwe Schindler)
|
||||
|
||||
Bug fixes
|
||||
|
||||
|
@ -577,7 +577,7 @@
|
||||
</delete>
|
||||
</target>
|
||||
|
||||
<target name="javacc" depends="clean-javacc,javacc-QueryParser,javacc-HTMLParser,javacc-contrib-queryparser, javacc-contrib-surround"/>
|
||||
<target name="javacc" depends="clean-javacc,javacc-QueryParser,javacc-HTMLParser,javacc-contrib-queryparser, javacc-contrib-surround, javacc-contrib-precedence"/>
|
||||
|
||||
<target name="javacc-QueryParser" depends="init,javacc-check" if="javacc.present">
|
||||
<invoke-javacc target="src/java/org/apache/lucene/queryParser/QueryParser.jj"
|
||||
@ -605,6 +605,13 @@
|
||||
/>
|
||||
</target>
|
||||
|
||||
<target name="javacc-contrib-precedence" depends="init,javacc-check" if="javacc.present">
|
||||
<ant target="javacc"
|
||||
dir="contrib/misc"
|
||||
antfile="build.xml"
|
||||
/>
|
||||
</target>
|
||||
|
||||
<!-- ================================================================== -->
|
||||
<!-- Build the JFlex files into the source tree -->
|
||||
<!-- ================================================================== -->
|
||||
|
@ -164,18 +164,6 @@ public abstract class CompoundWordTokenFilterBase extends TokenFilter {
|
||||
}
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next(final Token reusableToken) throws java.io.IOException {
|
||||
return super.next(reusableToken);
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next() throws java.io.IOException {
|
||||
return super.next();
|
||||
}
|
||||
|
||||
protected static final void addAllLowerCase(Set target, Collection col) {
|
||||
Iterator iter=col.iterator();
|
||||
|
||||
|
@ -36,7 +36,7 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
*
|
||||
* @see <a href="http://fr.wikipedia.org/wiki/%C3%89lision">Elision in Wikipedia</a>
|
||||
*/
|
||||
public class ElisionFilter extends TokenFilter {
|
||||
public final class ElisionFilter extends TokenFilter {
|
||||
private Set articles = null;
|
||||
private TermAttribute termAtt;
|
||||
|
||||
@ -109,16 +109,4 @@ public class ElisionFilter extends TokenFilter {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next(final Token reusableToken) throws java.io.IOException {
|
||||
return super.next(reusableToken);
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next() throws java.io.IOException {
|
||||
return super.next();
|
||||
}
|
||||
}
|
||||
|
@ -25,21 +25,10 @@ import java.io.IOException;
|
||||
/**
|
||||
* An always exhausted token stream.
|
||||
*/
|
||||
public class EmptyTokenStream extends TokenStream {
|
||||
public final class EmptyTokenStream extends TokenStream {
|
||||
|
||||
public final boolean incrementToken() throws IOException {
|
||||
return false;
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next(final Token reusableToken) throws java.io.IOException {
|
||||
return super.next(reusableToken);
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next() throws java.io.IOException {
|
||||
return super.next();
|
||||
}
|
||||
}
|
||||
|
@ -62,18 +62,6 @@ public class PrefixAndSuffixAwareTokenFilter extends TokenStream {
|
||||
public final boolean incrementToken() throws IOException {
|
||||
return suffix.incrementToken();
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next(final Token reusableToken) throws java.io.IOException {
|
||||
return super.next(reusableToken);
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next() throws java.io.IOException {
|
||||
return super.next();
|
||||
}
|
||||
|
||||
public void reset() throws IOException {
|
||||
suffix.reset();
|
||||
|
@ -111,18 +111,6 @@ public class PrefixAwareTokenFilter extends TokenStream {
|
||||
return true;
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next(final Token reusableToken) throws java.io.IOException {
|
||||
return super.next(reusableToken);
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next() throws java.io.IOException {
|
||||
return super.next();
|
||||
}
|
||||
|
||||
private void setCurrentToken(Token token) {
|
||||
if (token == null) return;
|
||||
termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength());
|
||||
|
@ -27,7 +27,7 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
/**
|
||||
* A {@link TokenStream} containing a single token.
|
||||
*/
|
||||
public class SingleTokenTokenStream extends TokenStream {
|
||||
public final class SingleTokenTokenStream extends TokenStream {
|
||||
|
||||
private boolean exhausted = false;
|
||||
|
||||
@ -42,7 +42,7 @@ public class SingleTokenTokenStream extends TokenStream {
|
||||
this.singleToken = (Token) token.clone();
|
||||
|
||||
tokenAtt = (AttributeImpl) addAttribute(TermAttribute.class);
|
||||
assert (tokenAtt instanceof Token || tokenAtt.getClass().getName().equals("org.apache.lucene.analysis.TokenWrapper"));
|
||||
assert (tokenAtt instanceof Token);
|
||||
}
|
||||
|
||||
public final boolean incrementToken() throws IOException {
|
||||
@ -55,18 +55,6 @@ public class SingleTokenTokenStream extends TokenStream {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next(final Token reusableToken) throws java.io.IOException {
|
||||
return super.next(reusableToken);
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next() throws java.io.IOException {
|
||||
return super.next();
|
||||
}
|
||||
|
||||
public void reset() throws IOException {
|
||||
exhausted = false;
|
||||
|
@ -31,7 +31,7 @@ import java.io.IOException;
|
||||
* This {@link TokenFilter} create n-grams from the beginning edge or ending edge of a input token.
|
||||
* </p>
|
||||
*/
|
||||
public class EdgeNGramTokenFilter extends TokenFilter {
|
||||
public final class EdgeNGramTokenFilter extends TokenFilter {
|
||||
public static final Side DEFAULT_SIDE = Side.FRONT;
|
||||
public static final int DEFAULT_MAX_GRAM_SIZE = 1;
|
||||
public static final int DEFAULT_MIN_GRAM_SIZE = 1;
|
||||
@ -149,18 +149,6 @@ public class EdgeNGramTokenFilter extends TokenFilter {
|
||||
curTermBuffer = null;
|
||||
}
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next(final Token reusableToken) throws java.io.IOException {
|
||||
return super.next(reusableToken);
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next() throws java.io.IOException {
|
||||
return super.next();
|
||||
}
|
||||
|
||||
public void reset() throws IOException {
|
||||
super.reset();
|
||||
|
@ -33,7 +33,7 @@ import java.io.Reader;
|
||||
* MaxGram can't be larger than 1024 because of limitation.
|
||||
* </p>
|
||||
*/
|
||||
public class EdgeNGramTokenizer extends Tokenizer {
|
||||
public final class EdgeNGramTokenizer extends Tokenizer {
|
||||
public static final Side DEFAULT_SIDE = Side.FRONT;
|
||||
public static final int DEFAULT_MAX_GRAM_SIZE = 1;
|
||||
public static final int DEFAULT_MIN_GRAM_SIZE = 1;
|
||||
@ -217,18 +217,6 @@ public class EdgeNGramTokenizer extends Tokenizer {
|
||||
final int finalOffset = inLen;
|
||||
this.offsetAtt.setOffset(finalOffset, finalOffset);
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next(final Token reusableToken) throws java.io.IOException {
|
||||
return super.next(reusableToken);
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next() throws java.io.IOException {
|
||||
return super.next();
|
||||
}
|
||||
|
||||
public void reset(Reader input) throws IOException {
|
||||
super.reset(input);
|
||||
|
@ -28,7 +28,7 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
/**
|
||||
* Tokenizes the input into n-grams of the given size(s).
|
||||
*/
|
||||
public class NGramTokenFilter extends TokenFilter {
|
||||
public final class NGramTokenFilter extends TokenFilter {
|
||||
public static final int DEFAULT_MIN_NGRAM_SIZE = 1;
|
||||
public static final int DEFAULT_MAX_NGRAM_SIZE = 2;
|
||||
|
||||
@ -97,18 +97,6 @@ public class NGramTokenFilter extends TokenFilter {
|
||||
curTermBuffer = null;
|
||||
}
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next(final Token reusableToken) throws java.io.IOException {
|
||||
return super.next(reusableToken);
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next() throws java.io.IOException {
|
||||
return super.next();
|
||||
}
|
||||
|
||||
public void reset() throws IOException {
|
||||
super.reset();
|
||||
|
@ -29,7 +29,7 @@ import java.io.Reader;
|
||||
/**
|
||||
* Tokenizes the input into n-grams of the given size(s).
|
||||
*/
|
||||
public class NGramTokenizer extends Tokenizer {
|
||||
public final class NGramTokenizer extends Tokenizer {
|
||||
public static final int DEFAULT_MIN_NGRAM_SIZE = 1;
|
||||
public static final int DEFAULT_MAX_NGRAM_SIZE = 2;
|
||||
|
||||
@ -134,18 +134,6 @@ public class NGramTokenizer extends Tokenizer {
|
||||
this.offsetAtt.setOffset(finalOffset, finalOffset);
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next(final Token reusableToken) throws java.io.IOException {
|
||||
return super.next(reusableToken);
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next() throws java.io.IOException {
|
||||
return super.next();
|
||||
}
|
||||
|
||||
public void reset(Reader input) throws IOException {
|
||||
super.reset(input);
|
||||
reset();
|
||||
|
@ -57,16 +57,4 @@ public class NumericPayloadTokenFilter extends TokenFilter {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next(final Token reusableToken) throws java.io.IOException {
|
||||
return super.next(reusableToken);
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next() throws java.io.IOException {
|
||||
return super.next();
|
||||
}
|
||||
}
|
||||
|
@ -55,16 +55,4 @@ public class TokenOffsetPayloadTokenFilter extends TokenFilter {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next(final Token reusableToken) throws java.io.IOException {
|
||||
return super.next(reusableToken);
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next() throws java.io.IOException {
|
||||
return super.next();
|
||||
}
|
||||
}
|
@ -55,16 +55,4 @@ public class TypeAsPayloadTokenFilter extends TokenFilter {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next(final Token reusableToken) throws java.io.IOException {
|
||||
return super.next(reusableToken);
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next() throws java.io.IOException {
|
||||
return super.next();
|
||||
}
|
||||
}
|
@ -28,7 +28,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
* except the first return token which retains its original positionIncrement value.
|
||||
* The default positionIncrement value is zero.
|
||||
*/
|
||||
public class PositionFilter extends TokenFilter {
|
||||
public final class PositionFilter extends TokenFilter {
|
||||
|
||||
/** Position increment to assign to all but the first token - default = 0 */
|
||||
private int positionIncrement = 0;
|
||||
@ -75,18 +75,6 @@ public class PositionFilter extends TokenFilter {
|
||||
}
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next(final Token reusableToken) throws java.io.IOException {
|
||||
return super.next(reusableToken);
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next() throws java.io.IOException {
|
||||
return super.next();
|
||||
}
|
||||
|
||||
public void reset() throws IOException {
|
||||
super.reset();
|
||||
firstTokenPositioned = false;
|
||||
|
@ -41,7 +41,7 @@ import org.apache.lucene.util.AttributeSource;
|
||||
* <p>This filter handles position increments > 1 by inserting filler tokens
|
||||
* (tokens with termtext "_"). It does not handle a position increment of 0.
|
||||
*/
|
||||
public class ShingleFilter extends TokenFilter {
|
||||
public final class ShingleFilter extends TokenFilter {
|
||||
|
||||
private LinkedList shingleBuf = new LinkedList();
|
||||
private StringBuilder[] shingles;
|
||||
@ -327,18 +327,6 @@ public class ShingleFilter extends TokenFilter {
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next(final Token reusableToken) throws java.io.IOException {
|
||||
return super.next(reusableToken);
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next() throws java.io.IOException {
|
||||
return super.next();
|
||||
}
|
||||
|
||||
public void reset() throws IOException {
|
||||
super.reset();
|
||||
|
@ -114,7 +114,7 @@ import org.apache.lucene.index.Payload;
|
||||
* <b>NOTE:</b> This filter might not behave correctly if used with custom Attributes, i.e. Attributes other than
|
||||
* the ones located in org.apache.lucene.analysis.tokenattributes.
|
||||
*/
|
||||
public class ShingleMatrixFilter extends TokenStream {
|
||||
public final class ShingleMatrixFilter extends TokenStream {
|
||||
|
||||
public static Character defaultSpacerCharacter = new Character('_');
|
||||
public static TokenSettingsCodec defaultSettingsCodec = new OneDimensionalNonWeightedTokenSettingsCodec();
|
||||
@ -393,16 +393,15 @@ public class ShingleMatrixFilter extends TokenStream {
|
||||
return token;
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next(final Token reusableToken) throws java.io.IOException {
|
||||
return super.next(reusableToken);
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next() throws java.io.IOException {
|
||||
return super.next();
|
||||
private Token getNextToken(Token token) throws IOException {
|
||||
if (!this.incrementToken()) return null;
|
||||
token.setTermBuffer(termAtt.termBuffer(), 0, termAtt.termLength());
|
||||
token.setPositionIncrement(posIncrAtt.getPositionIncrement());
|
||||
token.setFlags(flagsAtt.getFlags());
|
||||
token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset());
|
||||
token.setType(typeAtt.type());
|
||||
token.setPayload(payloadAtt.getPayload());
|
||||
return token;
|
||||
}
|
||||
|
||||
private static final Token request_next_token = new Token();
|
||||
@ -429,7 +428,7 @@ public class ShingleMatrixFilter extends TokenStream {
|
||||
if (ignoringSinglePrefixOrSuffixShingle
|
||||
&& currentShingleLength == 1
|
||||
&& ((currentPermutationRows.get(currentPermutationTokensStartOffset)).getColumn().isFirst() || (currentPermutationRows.get(currentPermutationTokensStartOffset)).getColumn().isLast())) {
|
||||
return next(reusableToken);
|
||||
return getNextToken(reusableToken);
|
||||
}
|
||||
|
||||
int termLength = 0;
|
||||
|
@ -1,88 +0,0 @@
|
||||
package org.apache.lucene.analysis.sinks;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.SinkTokenizer;
|
||||
import org.apache.lucene.analysis.TeeSinkTokenFilter;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
|
||||
import java.text.DateFormat;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.text.ParseException;
|
||||
import java.util.List;
|
||||
import java.util.Date;
|
||||
|
||||
|
||||
/**
|
||||
* Attempts to parse the {@link org.apache.lucene.analysis.Token#termBuffer()} as a Date using a {@link java.text.DateFormat}.
|
||||
* If the value is a Date, it will add it to the sink.
|
||||
* <p/>
|
||||
* Also marks the sink token with {@link org.apache.lucene.analysis.Token#type()} equal to {@link #DATE_TYPE}
|
||||
*
|
||||
* @deprecated Use {@link DateRecognizerSinkFilter} and {@link TeeSinkTokenFilter} instead.
|
||||
**/
|
||||
public class DateRecognizerSinkTokenizer extends SinkTokenizer {
|
||||
public static final String DATE_TYPE = "date";
|
||||
|
||||
protected DateFormat dateFormat;
|
||||
|
||||
/**
|
||||
* Uses {@link java.text.SimpleDateFormat#getDateInstance()} as the {@link java.text.DateFormat} object.
|
||||
*/
|
||||
public DateRecognizerSinkTokenizer() {
|
||||
this(null, SimpleDateFormat.getDateInstance());
|
||||
}
|
||||
|
||||
public DateRecognizerSinkTokenizer(DateFormat dateFormat) {
|
||||
this(null, dateFormat);
|
||||
}
|
||||
|
||||
/**
|
||||
* Uses {@link java.text.SimpleDateFormat#getDateInstance()} as the {@link java.text.DateFormat} object.
|
||||
* @param input The input list of Tokens that are already Dates. They should be marked as type {@link #DATE_TYPE} for completeness
|
||||
*/
|
||||
public DateRecognizerSinkTokenizer(List/*<Token>*/ input) {
|
||||
this(input, SimpleDateFormat.getDateInstance());
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param input
|
||||
* @param dateFormat The date format to use to try and parse the date. Note, this SinkTokenizer makes no attempt to synchronize the DateFormat object
|
||||
*/
|
||||
public DateRecognizerSinkTokenizer(List/*<Token>*/ input, DateFormat dateFormat) {
|
||||
super(input);
|
||||
this.dateFormat = dateFormat;
|
||||
}
|
||||
|
||||
|
||||
public void add(Token t) {
|
||||
//Check to see if this token is a date
|
||||
if (t != null) {
|
||||
try {
|
||||
Date date = dateFormat.parse(t.term());//We don't care about the date, just that we can parse it as a date
|
||||
if (date != null) {
|
||||
t.setType(DATE_TYPE);
|
||||
super.add(t);
|
||||
}
|
||||
} catch (ParseException e) {
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
@ -1,56 +0,0 @@
|
||||
package org.apache.lucene.analysis.sinks;
|
||||
|
||||
import org.apache.lucene.analysis.SinkTokenizer;
|
||||
import org.apache.lucene.analysis.TeeSinkTokenFilter;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
|
||||
import java.io.IOException;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* Counts the tokens as they go by and saves to the internal list those between the range of lower and upper, exclusive of upper
|
||||
* @deprecated Use {@link TokenRangeSinkFilter} and {@link TeeSinkTokenFilter} instead.
|
||||
**/
|
||||
public class TokenRangeSinkTokenizer extends SinkTokenizer {
|
||||
private int lower;
|
||||
private int upper;
|
||||
private int count;
|
||||
|
||||
public TokenRangeSinkTokenizer(int lower, int upper) {
|
||||
this.lower = lower;
|
||||
this.upper = upper;
|
||||
}
|
||||
|
||||
public TokenRangeSinkTokenizer(int initCap, int lower, int upper) {
|
||||
super(initCap);
|
||||
this.lower = lower;
|
||||
this.upper = upper;
|
||||
}
|
||||
|
||||
public void add(Token t) {
|
||||
if (count >= lower && count < upper){
|
||||
super.add(t);
|
||||
}
|
||||
count++;
|
||||
}
|
||||
|
||||
public void reset() throws IOException {
|
||||
count = 0;
|
||||
}
|
||||
}
|
@ -1,56 +0,0 @@
|
||||
package org.apache.lucene.analysis.sinks;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.SinkTokenizer;
|
||||
import org.apache.lucene.analysis.TeeSinkTokenFilter;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
* If the {@link org.apache.lucene.analysis.Token#type()} matches the passed in <code>typeToMatch</code> then
|
||||
* add it to the sink
|
||||
*
|
||||
* @deprecated Use {@link TokenTypeSinkFilter} and {@link TeeSinkTokenFilter} instead.
|
||||
**/
|
||||
public class TokenTypeSinkTokenizer extends SinkTokenizer {
|
||||
|
||||
private String typeToMatch;
|
||||
|
||||
public TokenTypeSinkTokenizer(String typeToMatch) {
|
||||
this.typeToMatch = typeToMatch;
|
||||
}
|
||||
|
||||
public TokenTypeSinkTokenizer(int initCap, String typeToMatch) {
|
||||
super(initCap);
|
||||
this.typeToMatch = typeToMatch;
|
||||
}
|
||||
|
||||
public TokenTypeSinkTokenizer(List/*<Token>*/ input, String typeToMatch) {
|
||||
super(input);
|
||||
this.typeToMatch = typeToMatch;
|
||||
}
|
||||
|
||||
public void add(Token t) {
|
||||
//check to see if this is a Category
|
||||
if (t != null && typeToMatch.equals(t.type())){
|
||||
super.add(t);
|
||||
}
|
||||
}
|
||||
}
|
@ -32,7 +32,7 @@ import java.text.BreakIterator;
|
||||
* Token that is Thai into separate Token(s) for each Thai word.
|
||||
* @version 0.2
|
||||
*/
|
||||
public class ThaiWordFilter extends TokenFilter {
|
||||
public final class ThaiWordFilter extends TokenFilter {
|
||||
|
||||
private BreakIterator breaker = null;
|
||||
|
||||
@ -82,18 +82,6 @@ public class ThaiWordFilter extends TokenFilter {
|
||||
return false;
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next(final Token reusableToken) throws java.io.IOException {
|
||||
return super.next(reusableToken);
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next() throws java.io.IOException {
|
||||
return super.next();
|
||||
}
|
||||
|
||||
public void reset() throws IOException {
|
||||
super.reset();
|
||||
thaiState = null;
|
||||
|
@ -20,25 +20,29 @@ package org.apache.lucene.analysis.miscellaneous;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
|
||||
public class TestSingleTokenTokenFilter extends LuceneTestCase {
|
||||
|
||||
public void test() throws IOException {
|
||||
final Token reusableToken = new Token();
|
||||
|
||||
Token token = new Token();
|
||||
SingleTokenTokenStream ts = new SingleTokenTokenStream(token);
|
||||
AttributeImpl tokenAtt = (AttributeImpl) ts.addAttribute(TermAttribute.class);
|
||||
assertTrue(tokenAtt instanceof Token);
|
||||
ts.reset();
|
||||
|
||||
assertEquals(token, ts.next(reusableToken));
|
||||
assertNull(ts.next(reusableToken));
|
||||
assertTrue(ts.incrementToken());
|
||||
assertEquals(token, tokenAtt);
|
||||
assertFalse(ts.incrementToken());
|
||||
|
||||
token = new Token("hallo", 10, 20, "someType");
|
||||
ts.setToken(token);
|
||||
ts.reset();
|
||||
|
||||
assertEquals(token, ts.next(reusableToken));
|
||||
assertNull(ts.next(reusableToken));
|
||||
assertTrue(ts.incrementToken());
|
||||
assertEquals(token, tokenAtt);
|
||||
assertFalse(ts.incrementToken());
|
||||
}
|
||||
}
|
||||
|
@ -21,7 +21,6 @@ import java.io.Reader;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
|
||||
@ -59,8 +58,6 @@ public class ReadTokensTask extends PerfTask {
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
Token token = new Token();
|
||||
|
||||
public int doLogic() throws Exception {
|
||||
List fields = doc.getFields();
|
||||
final int numField = fields.size();
|
||||
@ -96,7 +93,7 @@ public class ReadTokensTask extends PerfTask {
|
||||
// reset the TokenStream to the first token
|
||||
stream.reset();
|
||||
|
||||
while(stream.next(token) != null)
|
||||
while(stream.incrementToken())
|
||||
tokenCount++;
|
||||
}
|
||||
totalTokenCount += tokenCount;
|
||||
|
@ -45,6 +45,7 @@ import org.apache.lucene.index.TermVectorOffsetInfo;
|
||||
import org.apache.lucene.search.DefaultSimilarity;
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
|
||||
/**
|
||||
* This class, similar to {@link org.apache.lucene.index.IndexWriter}, has no locking mechanism.
|
||||
@ -524,14 +525,27 @@ public class InstantiatedIndexWriter {
|
||||
// reset the TokenStream to the first token
|
||||
tokenStream.reset();
|
||||
|
||||
final Token reusableToken = new Token();
|
||||
for (Token nextToken = tokenStream.next(reusableToken); nextToken != null; nextToken = tokenStream.next(reusableToken)) {
|
||||
tokens.add((Token) nextToken.clone()); // the vector will be built on commit.
|
||||
while (tokenStream.incrementToken()) {
|
||||
// TODO: this is a simple workaround to still work with tokens, not very effective, but as far as I know, this writer should get removed soon:
|
||||
final Token token = new Token();
|
||||
for (Iterator<AttributeImpl> atts = tokenStream.getAttributeImplsIterator(); atts.hasNext();) {
|
||||
final AttributeImpl att = atts.next();
|
||||
try {
|
||||
att.copyTo(token);
|
||||
} catch (Exception e) {
|
||||
// ignore unsupported attributes,
|
||||
// this may fail to copy some attributes, if a special combined AttributeImpl is used, that
|
||||
// implements basic attributes supported by Token and also other customized ones in one class.
|
||||
}
|
||||
}
|
||||
tokens.add(token); // the vector will be built on commit.
|
||||
fieldSetting.fieldLength++;
|
||||
if (fieldSetting.fieldLength > maxFieldLength) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
tokenStream.end();
|
||||
tokenStream.close();
|
||||
} else {
|
||||
// untokenized
|
||||
String fieldVal = field.stringValue();
|
||||
|
@ -39,6 +39,7 @@ import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.*;
|
||||
|
||||
/**
|
||||
Verifies that Lucene PatternAnalyzer and normal Lucene Analyzers have the same behaviour,
|
||||
@ -141,16 +142,30 @@ public class PatternAnalyzerTest extends LuceneTestCase {
|
||||
boolean toLowerCase = toLower != 0;
|
||||
|
||||
for (int run=0; run < runs; run++) {
|
||||
List tokens1 = null; List tokens2 = null;
|
||||
TokenStream tokens1 = null; TokenStream tokens2 = null;
|
||||
try {
|
||||
if (usePattern) tokens1 = getTokens(patternTokenStream(text, lettersOnly, toLowerCase, stopWords));
|
||||
if (useLucene) tokens2 = getTokens(luceneTokenStream(text, lettersOnly, toLowerCase, stopWords));
|
||||
if (usePattern && useLucene) assertEquals(tokens1, tokens2);
|
||||
if (usePattern) tokens1 = patternTokenStream(text, lettersOnly, toLowerCase, stopWords);
|
||||
if (useLucene) tokens2 = luceneTokenStream(text, lettersOnly, toLowerCase, stopWords);
|
||||
if (usePattern && useLucene) {
|
||||
final TermAttribute termAtt1 = tokens1.addAttribute(TermAttribute.class),
|
||||
termAtt2 = tokens2.addAttribute(TermAttribute.class);
|
||||
final OffsetAttribute offsetAtt1 = tokens1.addAttribute(OffsetAttribute.class),
|
||||
offsetAtt2 = tokens2.addAttribute(OffsetAttribute.class);
|
||||
final PositionIncrementAttribute posincrAtt1 = tokens1.addAttribute(PositionIncrementAttribute.class),
|
||||
posincrAtt2 = tokens2.addAttribute(PositionIncrementAttribute.class);
|
||||
while (tokens1.incrementToken()) {
|
||||
assertTrue(tokens2.incrementToken());
|
||||
assertEquals(termAtt1, termAtt2);
|
||||
assertEquals(offsetAtt1, offsetAtt2);
|
||||
assertEquals(posincrAtt1, posincrAtt2);
|
||||
}
|
||||
assertFalse(tokens2.incrementToken());
|
||||
tokens1.end(); tokens1.close();
|
||||
tokens2.end(); tokens2.close();
|
||||
}
|
||||
} catch (Throwable t) {
|
||||
if (t instanceof OutOfMemoryError) t.printStackTrace();
|
||||
System.out.println("fatal error at file=" + file + ", letters="+ lettersOnly + ", toLowerCase=" + toLowerCase + ", stopwords=" + (stopWords != null ? "english" : "none"));
|
||||
System.out.println("\n\ntokens1=" + toString(tokens1));
|
||||
System.out.println("\n\ntokens2=" + toString(tokens2));
|
||||
throw t;
|
||||
}
|
||||
}
|
||||
@ -194,50 +209,6 @@ public class PatternAnalyzerTest extends LuceneTestCase {
|
||||
return stream;
|
||||
}
|
||||
|
||||
private List getTokens(TokenStream stream) throws IOException {
|
||||
ArrayList tokens = new ArrayList();
|
||||
final Token reusableToken = new Token();
|
||||
for (Token nextToken = stream.next(reusableToken); nextToken != null; nextToken = stream.next(reusableToken)) {
|
||||
tokens.add(nextToken.clone());
|
||||
}
|
||||
return tokens;
|
||||
}
|
||||
|
||||
private void assertEquals(List tokens1, List tokens2) {
|
||||
int size = Math.min(tokens1.size(), tokens2.size());
|
||||
int i=0;
|
||||
try {
|
||||
for (; i < size; i++) {
|
||||
Token t1 = (Token) tokens1.get(i);
|
||||
Token t2 = (Token) tokens2.get(i);
|
||||
if (!(t1.term().equals(t2.term()))) throw new IllegalStateException("termText");
|
||||
if (t1.startOffset() != t2.startOffset()) throw new IllegalStateException("startOffset");
|
||||
if (t1.endOffset() != t2.endOffset()) throw new IllegalStateException("endOffset");
|
||||
if (!(t1.type().equals(t2.type()))) throw new IllegalStateException("type");
|
||||
}
|
||||
if (tokens1.size() != tokens2.size()) throw new IllegalStateException("size1=" + tokens1.size() + ", size2=" + tokens2.size());
|
||||
}
|
||||
|
||||
catch (IllegalStateException e) {
|
||||
if (size > 0) {
|
||||
System.out.println("i=" + i + ", size=" + size);
|
||||
System.out.println("t1[size]='" + ((Token) tokens1.get(size-1)).term() + "'");
|
||||
System.out.println("t2[size]='" + ((Token) tokens2.get(size-1)).term() + "'");
|
||||
}
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
private String toString(List tokens) {
|
||||
if (tokens == null) return "null";
|
||||
String str = "[";
|
||||
for (int i=0; i < tokens.size(); i++) {
|
||||
Token t1 = (Token) tokens.get(i);
|
||||
str = str + "'" + t1.term() + "', ";
|
||||
}
|
||||
return str + "]";
|
||||
}
|
||||
|
||||
// trick to detect default platform charset
|
||||
private static final Charset DEFAULT_PLATFORM_CHARSET =
|
||||
Charset.forName(new InputStreamReader(new ByteArrayInputStream(new byte[0])).getEncoding());
|
||||
|
@ -1,4 +1,5 @@
|
||||
/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 3.0 */
|
||||
/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 4.1 */
|
||||
/* JavaCCOptions:STATIC=false */
|
||||
package org.apache.lucene.queryParser.precedence;
|
||||
|
||||
/**
|
||||
@ -27,14 +28,14 @@ public interface CharStream {
|
||||
|
||||
/**
|
||||
* Returns the column position of the character last read.
|
||||
* @deprecated
|
||||
* @deprecated
|
||||
* @see #getEndColumn
|
||||
*/
|
||||
int getColumn();
|
||||
|
||||
/**
|
||||
* Returns the line number of the character last read.
|
||||
* @deprecated
|
||||
* @deprecated
|
||||
* @see #getEndLine
|
||||
*/
|
||||
int getLine();
|
||||
@ -79,7 +80,7 @@ public interface CharStream {
|
||||
char BeginToken() throws java.io.IOException;
|
||||
|
||||
/**
|
||||
* Returns a string made up of characters from the marked token beginning
|
||||
* Returns a string made up of characters from the marked token beginning
|
||||
* to the current buffer position. Implementations have the choice of returning
|
||||
* anything that they want to. For example, for efficiency, one might decide
|
||||
* to just return null, which is a valid implementation.
|
||||
@ -108,3 +109,4 @@ public interface CharStream {
|
||||
void Done();
|
||||
|
||||
}
|
||||
/* JavaCC - OriginalChecksum=8cc617b193267dc876ef9699367c8186 (do not edit this line) */
|
||||
|
@ -1,4 +1,5 @@
|
||||
/* Generated By:JavaCC: Do not edit this line. ParseException.java Version 3.0 */
|
||||
/* Generated By:JavaCC: Do not edit this line. ParseException.java Version 4.1 */
|
||||
/* JavaCCOptions:KEEP_LINE_COL=null */
|
||||
package org.apache.lucene.queryParser.precedence;
|
||||
|
||||
/**
|
||||
@ -51,6 +52,7 @@ public class ParseException extends Exception {
|
||||
specialConstructor = false;
|
||||
}
|
||||
|
||||
/** Constructor with message. */
|
||||
public ParseException(String message) {
|
||||
super(message);
|
||||
specialConstructor = false;
|
||||
@ -98,19 +100,19 @@ public class ParseException extends Exception {
|
||||
if (!specialConstructor) {
|
||||
return super.getMessage();
|
||||
}
|
||||
String expected = "";
|
||||
StringBuffer expected = new StringBuffer();
|
||||
int maxSize = 0;
|
||||
for (int i = 0; i < expectedTokenSequences.length; i++) {
|
||||
if (maxSize < expectedTokenSequences[i].length) {
|
||||
maxSize = expectedTokenSequences[i].length;
|
||||
}
|
||||
for (int j = 0; j < expectedTokenSequences[i].length; j++) {
|
||||
expected += tokenImage[expectedTokenSequences[i][j]] + " ";
|
||||
expected.append(tokenImage[expectedTokenSequences[i][j]]).append(' ');
|
||||
}
|
||||
if (expectedTokenSequences[i][expectedTokenSequences[i].length - 1] != 0) {
|
||||
expected += "...";
|
||||
expected.append("...");
|
||||
}
|
||||
expected += eol + " ";
|
||||
expected.append(eol).append(" ");
|
||||
}
|
||||
String retval = "Encountered \"";
|
||||
Token tok = currentToken.next;
|
||||
@ -120,8 +122,11 @@ public class ParseException extends Exception {
|
||||
retval += tokenImage[0];
|
||||
break;
|
||||
}
|
||||
retval += " " + tokenImage[tok.kind];
|
||||
retval += " \"";
|
||||
retval += add_escapes(tok.image);
|
||||
tok = tok.next;
|
||||
retval += " \"";
|
||||
tok = tok.next;
|
||||
}
|
||||
retval += "\" at line " + currentToken.next.beginLine + ", column " + currentToken.next.beginColumn;
|
||||
retval += "." + eol;
|
||||
@ -130,7 +135,7 @@ public class ParseException extends Exception {
|
||||
} else {
|
||||
retval += "Was expecting one of:" + eol + " ";
|
||||
}
|
||||
retval += expected;
|
||||
retval += expected.toString();
|
||||
return retval;
|
||||
}
|
||||
|
||||
@ -138,7 +143,7 @@ public class ParseException extends Exception {
|
||||
* The end of line string for this machine.
|
||||
*/
|
||||
protected String eol = System.getProperty("line.separator", "\n");
|
||||
|
||||
|
||||
/**
|
||||
* Used to convert raw characters to their escaped version
|
||||
* when these raw version cannot be used as part of an ASCII
|
||||
@ -190,3 +195,4 @@ public class ParseException extends Exception {
|
||||
}
|
||||
|
||||
}
|
||||
/* JavaCC - OriginalChecksum=15fbbe38a36c8ac9e2740d030624c321 (do not edit this line) */
|
||||
|
@ -12,6 +12,7 @@ import java.util.Vector;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.*;
|
||||
import org.apache.lucene.document.DateTools;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
@ -25,6 +26,7 @@ import org.apache.lucene.search.RangeQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.WildcardQuery;
|
||||
import org.apache.lucene.util.Parameter;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
|
||||
/**
|
||||
* Experimental query parser variant designed to handle operator precedence
|
||||
@ -70,6 +72,9 @@ import org.apache.lucene.util.Parameter;
|
||||
* documentation</a>.
|
||||
* </p>
|
||||
*
|
||||
* @author Brian Goetz
|
||||
* @author Peter Halacsy
|
||||
* @author Tatu Saloranta
|
||||
*/
|
||||
|
||||
public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
|
||||
@ -205,7 +210,7 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
|
||||
* In default mode (<code>OR_OPERATOR</code>) terms without any modifiers
|
||||
* are considered optional: for example <code>capital of Hungary</code> is equal to
|
||||
* <code>capital OR of OR Hungary</code>.<br/>
|
||||
* In <code>AND_OPERATOR</code> mode terms are considered to be in conjuction: the
|
||||
* In <code>AND_OPERATOR</code> mode terms are considered to be in conjunction: the
|
||||
* above mentioned query is parsed as <code>capital AND of AND Hungary</code>
|
||||
*/
|
||||
public void setDefaultOperator(Operator op) {
|
||||
@ -314,48 +319,40 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
|
||||
// PhraseQuery, or nothing based on the term count
|
||||
|
||||
TokenStream source = analyzer.tokenStream(field, new StringReader(queryText));
|
||||
List list = new ArrayList();
|
||||
final org.apache.lucene.analysis.Token reusableToken = new org.apache.lucene.analysis.Token();
|
||||
org.apache.lucene.analysis.Token nextToken;
|
||||
List<AttributeSource.State> list = new ArrayList<AttributeSource.State>();
|
||||
int positionCount = 0;
|
||||
boolean severalTokensAtSamePosition = false;
|
||||
TermAttribute termAtt = source.addAttribute(TermAttribute.class);
|
||||
PositionIncrementAttribute posincrAtt = source.addAttribute(PositionIncrementAttribute.class);
|
||||
|
||||
while (true) {
|
||||
try {
|
||||
nextToken = source.next(reusableToken);
|
||||
}
|
||||
catch (IOException e) {
|
||||
nextToken = null;
|
||||
}
|
||||
if (nextToken == null)
|
||||
break;
|
||||
list.add(nextToken.clone());
|
||||
if (nextToken.getPositionIncrement() == 1)
|
||||
positionCount++;
|
||||
else
|
||||
severalTokensAtSamePosition = true;
|
||||
}
|
||||
try {
|
||||
while (source.incrementToken()) {
|
||||
list.add(source.captureState());
|
||||
if (posincrAtt.getPositionIncrement() == 1)
|
||||
positionCount++;
|
||||
else
|
||||
severalTokensAtSamePosition = true;
|
||||
}
|
||||
source.end();
|
||||
source.close();
|
||||
}
|
||||
catch (IOException e) {
|
||||
// ignore
|
||||
} catch (IOException e) {
|
||||
// ignore, should never happen for StringReaders
|
||||
}
|
||||
|
||||
if (list.size() == 0)
|
||||
return null;
|
||||
else if (list.size() == 1) {
|
||||
nextToken = (org.apache.lucene.analysis.Token) list.get(0);
|
||||
return new TermQuery(new Term(field, nextToken.term()));
|
||||
source.restoreState(list.get(0));
|
||||
return new TermQuery(new Term(field, termAtt.term()));
|
||||
} else {
|
||||
if (severalTokensAtSamePosition) {
|
||||
if (positionCount == 1) {
|
||||
// no phrase query:
|
||||
BooleanQuery q = new BooleanQuery();
|
||||
for (int i = 0; i < list.size(); i++) {
|
||||
nextToken = (org.apache.lucene.analysis.Token) list.get(i);
|
||||
source.restoreState(list.get(i));
|
||||
TermQuery currentQuery = new TermQuery(
|
||||
new Term(field, nextToken.term()));
|
||||
new Term(field, termAtt.term()));
|
||||
q.add(currentQuery, BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
return q;
|
||||
@ -363,14 +360,14 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
|
||||
else {
|
||||
// phrase query:
|
||||
MultiPhraseQuery mpq = new MultiPhraseQuery();
|
||||
List multiTerms = new ArrayList();
|
||||
List<Term> multiTerms = new ArrayList<Term>();
|
||||
for (int i = 0; i < list.size(); i++) {
|
||||
nextToken = (org.apache.lucene.analysis.Token) list.get(i);
|
||||
if (nextToken.getPositionIncrement() == 1 && multiTerms.size() > 0) {
|
||||
source.restoreState(list.get(i));
|
||||
if (posincrAtt.getPositionIncrement() == 1 && multiTerms.size() > 0) {
|
||||
mpq.add((Term[])multiTerms.toArray(new Term[0]));
|
||||
multiTerms.clear();
|
||||
}
|
||||
multiTerms.add(new Term(field, nextToken.term()));
|
||||
multiTerms.add(new Term(field, termAtt.term()));
|
||||
}
|
||||
mpq.add((Term[])multiTerms.toArray(new Term[0]));
|
||||
return mpq;
|
||||
@ -380,8 +377,8 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
|
||||
PhraseQuery q = new PhraseQuery();
|
||||
q.setSlop(phraseSlop);
|
||||
for (int i = 0; i < list.size(); i++) {
|
||||
q.add(new Term(field, ((org.apache.lucene.analysis.Token)
|
||||
list.get(i)).term()));
|
||||
source.restoreState(list.get(i));
|
||||
q.add(new Term(field, termAtt.term()));
|
||||
}
|
||||
return q;
|
||||
}
|
||||
@ -1093,39 +1090,42 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
|
||||
throw new Error("Missing return statement in function");
|
||||
}
|
||||
|
||||
final private boolean jj_2_1(int xla) {
|
||||
private boolean jj_2_1(int xla) {
|
||||
jj_la = xla; jj_lastpos = jj_scanpos = token;
|
||||
try { return !jj_3_1(); }
|
||||
catch(LookaheadSuccess ls) { return true; }
|
||||
finally { jj_save(0, xla); }
|
||||
}
|
||||
|
||||
final private boolean jj_3_1() {
|
||||
private boolean jj_3_1() {
|
||||
if (jj_scan_token(TERM)) return true;
|
||||
if (jj_scan_token(COLON)) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/** Generated Token Manager. */
|
||||
public PrecedenceQueryParserTokenManager token_source;
|
||||
public Token token, jj_nt;
|
||||
/** Current token. */
|
||||
public Token token;
|
||||
/** Next token. */
|
||||
public Token jj_nt;
|
||||
private int jj_ntk;
|
||||
private Token jj_scanpos, jj_lastpos;
|
||||
private int jj_la;
|
||||
public boolean lookingAhead = false;
|
||||
private boolean jj_semLA;
|
||||
private int jj_gen;
|
||||
final private int[] jj_la1 = new int[24];
|
||||
static private int[] jj_la1_0;
|
||||
static {
|
||||
jj_la1_0();
|
||||
jj_la1_init_0();
|
||||
}
|
||||
private static void jj_la1_0() {
|
||||
private static void jj_la1_init_0() {
|
||||
jj_la1_0 = new int[] {0x180,0x180,0xe00,0xe00,0xfb1f00,0x100,0x80,0x8000,0xfb1000,0x9a0000,0x40000,0x40000,0x8000,0xc000000,0x1000000,0xc000000,0x8000,0xc0000000,0x10000000,0xc0000000,0x8000,0x40000,0x8000,0xfb0000,};
|
||||
}
|
||||
final private JJCalls[] jj_2_rtns = new JJCalls[1];
|
||||
private boolean jj_rescan = false;
|
||||
private int jj_gc = 0;
|
||||
|
||||
/** Constructor with user supplied CharStream. */
|
||||
public PrecedenceQueryParser(CharStream stream) {
|
||||
token_source = new PrecedenceQueryParserTokenManager(stream);
|
||||
token = new Token();
|
||||
@ -1135,6 +1135,7 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
|
||||
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
|
||||
}
|
||||
|
||||
/** Reinitialise. */
|
||||
public void ReInit(CharStream stream) {
|
||||
token_source.ReInit(stream);
|
||||
token = new Token();
|
||||
@ -1144,6 +1145,7 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
|
||||
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
|
||||
}
|
||||
|
||||
/** Constructor with generated Token Manager. */
|
||||
public PrecedenceQueryParser(PrecedenceQueryParserTokenManager tm) {
|
||||
token_source = tm;
|
||||
token = new Token();
|
||||
@ -1153,6 +1155,7 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
|
||||
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
|
||||
}
|
||||
|
||||
/** Reinitialise. */
|
||||
public void ReInit(PrecedenceQueryParserTokenManager tm) {
|
||||
token_source = tm;
|
||||
token = new Token();
|
||||
@ -1162,7 +1165,7 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
|
||||
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
|
||||
}
|
||||
|
||||
final private Token jj_consume_token(int kind) throws ParseException {
|
||||
private Token jj_consume_token(int kind) throws ParseException {
|
||||
Token oldToken;
|
||||
if ((oldToken = token).next != null) token = token.next;
|
||||
else token = token.next = token_source.getNextToken();
|
||||
@ -1188,7 +1191,7 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
|
||||
|
||||
static private final class LookaheadSuccess extends java.lang.Error { }
|
||||
final private LookaheadSuccess jj_ls = new LookaheadSuccess();
|
||||
final private boolean jj_scan_token(int kind) {
|
||||
private boolean jj_scan_token(int kind) {
|
||||
if (jj_scanpos == jj_lastpos) {
|
||||
jj_la--;
|
||||
if (jj_scanpos.next == null) {
|
||||
@ -1209,6 +1212,8 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/** Get the next Token. */
|
||||
final public Token getNextToken() {
|
||||
if (token.next != null) token = token.next;
|
||||
else token = token.next = token_source.getNextToken();
|
||||
@ -1217,8 +1222,9 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
|
||||
return token;
|
||||
}
|
||||
|
||||
/** Get the specific Token. */
|
||||
final public Token getToken(int index) {
|
||||
Token t = lookingAhead ? jj_scanpos : token;
|
||||
Token t = token;
|
||||
for (int i = 0; i < index; i++) {
|
||||
if (t.next != null) t = t.next;
|
||||
else t = t.next = token_source.getNextToken();
|
||||
@ -1226,14 +1232,14 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
|
||||
return t;
|
||||
}
|
||||
|
||||
final private int jj_ntk() {
|
||||
private int jj_ntk() {
|
||||
if ((jj_nt=token.next) == null)
|
||||
return (jj_ntk = (token.next=token_source.getNextToken()).kind);
|
||||
else
|
||||
return (jj_ntk = jj_nt.kind);
|
||||
}
|
||||
|
||||
private java.util.Vector jj_expentries = new java.util.Vector();
|
||||
private java.util.List jj_expentries = new java.util.ArrayList();
|
||||
private int[] jj_expentry;
|
||||
private int jj_kind = -1;
|
||||
private int[] jj_lasttokens = new int[100];
|
||||
@ -1248,31 +1254,26 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
|
||||
for (int i = 0; i < jj_endpos; i++) {
|
||||
jj_expentry[i] = jj_lasttokens[i];
|
||||
}
|
||||
boolean exists = false;
|
||||
for (java.util.Enumeration e = jj_expentries.elements(); e.hasMoreElements();) {
|
||||
int[] oldentry = (int[])(e.nextElement());
|
||||
jj_entries_loop: for (java.util.Iterator it = jj_expentries.iterator(); it.hasNext();) {
|
||||
int[] oldentry = (int[])(it.next());
|
||||
if (oldentry.length == jj_expentry.length) {
|
||||
exists = true;
|
||||
for (int i = 0; i < jj_expentry.length; i++) {
|
||||
if (oldentry[i] != jj_expentry[i]) {
|
||||
exists = false;
|
||||
break;
|
||||
continue jj_entries_loop;
|
||||
}
|
||||
}
|
||||
if (exists) break;
|
||||
jj_expentries.add(jj_expentry);
|
||||
break jj_entries_loop;
|
||||
}
|
||||
}
|
||||
if (!exists) jj_expentries.addElement(jj_expentry);
|
||||
if (pos != 0) jj_lasttokens[(jj_endpos = pos) - 1] = kind;
|
||||
}
|
||||
}
|
||||
|
||||
/** Generate ParseException. */
|
||||
public ParseException generateParseException() {
|
||||
jj_expentries.removeAllElements();
|
||||
jj_expentries.clear();
|
||||
boolean[] la1tokens = new boolean[32];
|
||||
for (int i = 0; i < 32; i++) {
|
||||
la1tokens[i] = false;
|
||||
}
|
||||
if (jj_kind >= 0) {
|
||||
la1tokens[jj_kind] = true;
|
||||
jj_kind = -1;
|
||||
@ -1290,7 +1291,7 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
|
||||
if (la1tokens[i]) {
|
||||
jj_expentry = new int[1];
|
||||
jj_expentry[0] = i;
|
||||
jj_expentries.addElement(jj_expentry);
|
||||
jj_expentries.add(jj_expentry);
|
||||
}
|
||||
}
|
||||
jj_endpos = 0;
|
||||
@ -1298,20 +1299,23 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
|
||||
jj_add_error_token(0, 0);
|
||||
int[][] exptokseq = new int[jj_expentries.size()][];
|
||||
for (int i = 0; i < jj_expentries.size(); i++) {
|
||||
exptokseq[i] = (int[])jj_expentries.elementAt(i);
|
||||
exptokseq[i] = (int[])jj_expentries.get(i);
|
||||
}
|
||||
return new ParseException(token, exptokseq, tokenImage);
|
||||
}
|
||||
|
||||
/** Enable tracing. */
|
||||
final public void enable_tracing() {
|
||||
}
|
||||
|
||||
/** Disable tracing. */
|
||||
final public void disable_tracing() {
|
||||
}
|
||||
|
||||
final private void jj_rescan_token() {
|
||||
private void jj_rescan_token() {
|
||||
jj_rescan = true;
|
||||
for (int i = 0; i < 1; i++) {
|
||||
try {
|
||||
JJCalls p = jj_2_rtns[i];
|
||||
do {
|
||||
if (p.gen > jj_gen) {
|
||||
@ -1322,11 +1326,12 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
|
||||
}
|
||||
p = p.next;
|
||||
} while (p != null);
|
||||
} catch(LookaheadSuccess ls) { }
|
||||
}
|
||||
jj_rescan = false;
|
||||
}
|
||||
|
||||
final private void jj_save(int index, int xla) {
|
||||
private void jj_save(int index, int xla) {
|
||||
JJCalls p = jj_2_rtns[index];
|
||||
while (p.gen > jj_gen) {
|
||||
if (p.next == null) { p = p.next = new JJCalls(); break; }
|
||||
|
@ -36,6 +36,7 @@ import java.util.Vector;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.*;
|
||||
import org.apache.lucene.document.DateTools;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
@ -49,6 +50,7 @@ import org.apache.lucene.search.RangeQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.WildcardQuery;
|
||||
import org.apache.lucene.util.Parameter;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
|
||||
/**
|
||||
* Experimental query parser variant designed to handle operator precedence
|
||||
@ -341,48 +343,40 @@ public class PrecedenceQueryParser {
|
||||
// PhraseQuery, or nothing based on the term count
|
||||
|
||||
TokenStream source = analyzer.tokenStream(field, new StringReader(queryText));
|
||||
List list = new ArrayList();
|
||||
final org.apache.lucene.analysis.Token reusableToken = new org.apache.lucene.analysis.Token();
|
||||
org.apache.lucene.analysis.Token nextToken;
|
||||
List<AttributeSource.State> list = new ArrayList<AttributeSource.State>();
|
||||
int positionCount = 0;
|
||||
boolean severalTokensAtSamePosition = false;
|
||||
TermAttribute termAtt = source.addAttribute(TermAttribute.class);
|
||||
PositionIncrementAttribute posincrAtt = source.addAttribute(PositionIncrementAttribute.class);
|
||||
|
||||
while (true) {
|
||||
try {
|
||||
nextToken = source.next(reusableToken);
|
||||
}
|
||||
catch (IOException e) {
|
||||
nextToken = null;
|
||||
}
|
||||
if (nextToken == null)
|
||||
break;
|
||||
list.add(nextToken.clone());
|
||||
if (nextToken.getPositionIncrement() == 1)
|
||||
positionCount++;
|
||||
else
|
||||
severalTokensAtSamePosition = true;
|
||||
}
|
||||
try {
|
||||
while (source.incrementToken()) {
|
||||
list.add(source.captureState());
|
||||
if (posincrAtt.getPositionIncrement() == 1)
|
||||
positionCount++;
|
||||
else
|
||||
severalTokensAtSamePosition = true;
|
||||
}
|
||||
source.end();
|
||||
source.close();
|
||||
}
|
||||
catch (IOException e) {
|
||||
// ignore
|
||||
} catch (IOException e) {
|
||||
// ignore, should never happen for StringReaders
|
||||
}
|
||||
|
||||
if (list.size() == 0)
|
||||
return null;
|
||||
else if (list.size() == 1) {
|
||||
nextToken = (org.apache.lucene.analysis.Token) list.get(0);
|
||||
return new TermQuery(new Term(field, nextToken.term()));
|
||||
source.restoreState(list.get(0));
|
||||
return new TermQuery(new Term(field, termAtt.term()));
|
||||
} else {
|
||||
if (severalTokensAtSamePosition) {
|
||||
if (positionCount == 1) {
|
||||
// no phrase query:
|
||||
BooleanQuery q = new BooleanQuery();
|
||||
for (int i = 0; i < list.size(); i++) {
|
||||
nextToken = (org.apache.lucene.analysis.Token) list.get(i);
|
||||
source.restoreState(list.get(i));
|
||||
TermQuery currentQuery = new TermQuery(
|
||||
new Term(field, nextToken.term()));
|
||||
new Term(field, termAtt.term()));
|
||||
q.add(currentQuery, BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
return q;
|
||||
@ -390,14 +384,14 @@ public class PrecedenceQueryParser {
|
||||
else {
|
||||
// phrase query:
|
||||
MultiPhraseQuery mpq = new MultiPhraseQuery();
|
||||
List multiTerms = new ArrayList();
|
||||
List<Term> multiTerms = new ArrayList<Term>();
|
||||
for (int i = 0; i < list.size(); i++) {
|
||||
nextToken = (org.apache.lucene.analysis.Token) list.get(i);
|
||||
if (nextToken.getPositionIncrement() == 1 && multiTerms.size() > 0) {
|
||||
source.restoreState(list.get(i));
|
||||
if (posincrAtt.getPositionIncrement() == 1 && multiTerms.size() > 0) {
|
||||
mpq.add((Term[])multiTerms.toArray(new Term[0]));
|
||||
multiTerms.clear();
|
||||
}
|
||||
multiTerms.add(new Term(field, nextToken.term()));
|
||||
multiTerms.add(new Term(field, termAtt.term()));
|
||||
}
|
||||
mpq.add((Term[])multiTerms.toArray(new Term[0]));
|
||||
return mpq;
|
||||
@ -407,8 +401,8 @@ public class PrecedenceQueryParser {
|
||||
PhraseQuery q = new PhraseQuery();
|
||||
q.setSlop(phraseSlop);
|
||||
for (int i = 0; i < list.size(); i++) {
|
||||
q.add(new Term(field, ((org.apache.lucene.analysis.Token)
|
||||
list.get(i)).term()));
|
||||
source.restoreState(list.get(i));
|
||||
q.add(new Term(field, termAtt.term()));
|
||||
}
|
||||
return q;
|
||||
}
|
||||
@ -700,7 +694,7 @@ PARSER_END(PrecedenceQueryParser)
|
||||
}
|
||||
|
||||
<DEFAULT, RangeIn, RangeEx> SKIP : {
|
||||
<<_WHITESPACE>>
|
||||
< <_WHITESPACE>>
|
||||
}
|
||||
|
||||
// OG: to support prefix queries:
|
||||
|
@ -1,45 +1,86 @@
|
||||
/* Generated By:JavaCC: Do not edit this line. PrecedenceQueryParserConstants.java */
|
||||
package org.apache.lucene.queryParser.precedence;
|
||||
|
||||
|
||||
/**
|
||||
* Token literal values and constants.
|
||||
* Generated by org.javacc.parser.OtherFilesGen#start()
|
||||
*/
|
||||
public interface PrecedenceQueryParserConstants {
|
||||
|
||||
/** End of File. */
|
||||
int EOF = 0;
|
||||
/** RegularExpression Id. */
|
||||
int _NUM_CHAR = 1;
|
||||
/** RegularExpression Id. */
|
||||
int _ESCAPED_CHAR = 2;
|
||||
/** RegularExpression Id. */
|
||||
int _TERM_START_CHAR = 3;
|
||||
/** RegularExpression Id. */
|
||||
int _TERM_CHAR = 4;
|
||||
/** RegularExpression Id. */
|
||||
int _WHITESPACE = 5;
|
||||
/** RegularExpression Id. */
|
||||
int AND = 7;
|
||||
/** RegularExpression Id. */
|
||||
int OR = 8;
|
||||
/** RegularExpression Id. */
|
||||
int NOT = 9;
|
||||
/** RegularExpression Id. */
|
||||
int PLUS = 10;
|
||||
/** RegularExpression Id. */
|
||||
int MINUS = 11;
|
||||
/** RegularExpression Id. */
|
||||
int LPAREN = 12;
|
||||
/** RegularExpression Id. */
|
||||
int RPAREN = 13;
|
||||
/** RegularExpression Id. */
|
||||
int COLON = 14;
|
||||
/** RegularExpression Id. */
|
||||
int CARAT = 15;
|
||||
/** RegularExpression Id. */
|
||||
int QUOTED = 16;
|
||||
/** RegularExpression Id. */
|
||||
int TERM = 17;
|
||||
/** RegularExpression Id. */
|
||||
int FUZZY_SLOP = 18;
|
||||
/** RegularExpression Id. */
|
||||
int PREFIXTERM = 19;
|
||||
/** RegularExpression Id. */
|
||||
int WILDTERM = 20;
|
||||
/** RegularExpression Id. */
|
||||
int RANGEIN_START = 21;
|
||||
/** RegularExpression Id. */
|
||||
int RANGEEX_START = 22;
|
||||
/** RegularExpression Id. */
|
||||
int NUMBER = 23;
|
||||
/** RegularExpression Id. */
|
||||
int RANGEIN_TO = 24;
|
||||
/** RegularExpression Id. */
|
||||
int RANGEIN_END = 25;
|
||||
/** RegularExpression Id. */
|
||||
int RANGEIN_QUOTED = 26;
|
||||
/** RegularExpression Id. */
|
||||
int RANGEIN_GOOP = 27;
|
||||
/** RegularExpression Id. */
|
||||
int RANGEEX_TO = 28;
|
||||
/** RegularExpression Id. */
|
||||
int RANGEEX_END = 29;
|
||||
/** RegularExpression Id. */
|
||||
int RANGEEX_QUOTED = 30;
|
||||
/** RegularExpression Id. */
|
||||
int RANGEEX_GOOP = 31;
|
||||
|
||||
/** Lexical state. */
|
||||
int Boost = 0;
|
||||
/** Lexical state. */
|
||||
int RangeEx = 1;
|
||||
/** Lexical state. */
|
||||
int RangeIn = 2;
|
||||
/** Lexical state. */
|
||||
int DEFAULT = 3;
|
||||
|
||||
/** Literal token values. */
|
||||
String[] tokenImage = {
|
||||
"<EOF>",
|
||||
"<_NUM_CHAR>",
|
||||
|
@ -10,6 +10,7 @@ import java.util.Locale;
|
||||
import java.util.Vector;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.*;
|
||||
import org.apache.lucene.document.DateTools;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
@ -23,10 +24,15 @@ import org.apache.lucene.search.RangeQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.WildcardQuery;
|
||||
import org.apache.lucene.util.Parameter;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
|
||||
/** Token Manager. */
|
||||
public class PrecedenceQueryParserTokenManager implements PrecedenceQueryParserConstants
|
||||
{
|
||||
|
||||
/** Debug output. */
|
||||
public java.io.PrintStream debugStream = System.out;
|
||||
/** Set debug output. */
|
||||
public void setDebugStream(java.io.PrintStream ds) { debugStream = ds; }
|
||||
private final int jjStopStringLiteralDfa_3(int pos, long active0)
|
||||
{
|
||||
@ -40,21 +46,13 @@ private final int jjStartNfa_3(int pos, long active0)
|
||||
{
|
||||
return jjMoveNfa_3(jjStopStringLiteralDfa_3(pos, active0), pos + 1);
|
||||
}
|
||||
private final int jjStopAtPos(int pos, int kind)
|
||||
private int jjStopAtPos(int pos, int kind)
|
||||
{
|
||||
jjmatchedKind = kind;
|
||||
jjmatchedPos = pos;
|
||||
return pos + 1;
|
||||
}
|
||||
private final int jjStartNfaWithStates_3(int pos, int kind, int state)
|
||||
{
|
||||
jjmatchedKind = kind;
|
||||
jjmatchedPos = pos;
|
||||
try { curChar = input_stream.readChar(); }
|
||||
catch(java.io.IOException e) { return pos + 1; }
|
||||
return jjMoveNfa_3(state, pos + 1);
|
||||
}
|
||||
private final int jjMoveStringLiteralDfa0_3()
|
||||
private int jjMoveStringLiteralDfa0_3()
|
||||
{
|
||||
switch(curChar)
|
||||
{
|
||||
@ -78,50 +76,19 @@ private final int jjMoveStringLiteralDfa0_3()
|
||||
return jjMoveNfa_3(0, 0);
|
||||
}
|
||||
}
|
||||
private final void jjCheckNAdd(int state)
|
||||
{
|
||||
if (jjrounds[state] != jjround)
|
||||
{
|
||||
jjstateSet[jjnewStateCnt++] = state;
|
||||
jjrounds[state] = jjround;
|
||||
}
|
||||
}
|
||||
private final void jjAddStates(int start, int end)
|
||||
{
|
||||
do {
|
||||
jjstateSet[jjnewStateCnt++] = jjnextStates[start];
|
||||
} while (start++ != end);
|
||||
}
|
||||
private final void jjCheckNAddTwoStates(int state1, int state2)
|
||||
{
|
||||
jjCheckNAdd(state1);
|
||||
jjCheckNAdd(state2);
|
||||
}
|
||||
private final void jjCheckNAddStates(int start, int end)
|
||||
{
|
||||
do {
|
||||
jjCheckNAdd(jjnextStates[start]);
|
||||
} while (start++ != end);
|
||||
}
|
||||
private final void jjCheckNAddStates(int start)
|
||||
{
|
||||
jjCheckNAdd(jjnextStates[start]);
|
||||
jjCheckNAdd(jjnextStates[start + 1]);
|
||||
}
|
||||
static final long[] jjbitVec0 = {
|
||||
0xfffffffffffffffeL, 0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffffffL
|
||||
};
|
||||
static final long[] jjbitVec2 = {
|
||||
0x0L, 0x0L, 0xffffffffffffffffL, 0xffffffffffffffffL
|
||||
};
|
||||
private final int jjMoveNfa_3(int startState, int curPos)
|
||||
private int jjMoveNfa_3(int startState, int curPos)
|
||||
{
|
||||
int[] nextStates;
|
||||
int startsAt = 0;
|
||||
jjnewStateCnt = 33;
|
||||
int i = 1;
|
||||
jjstateSet[0] = startState;
|
||||
int j, kind = 0x7fffffff;
|
||||
int kind = 0x7fffffff;
|
||||
for (;;)
|
||||
{
|
||||
if (++jjround == 0x7fffffff)
|
||||
@ -129,7 +96,7 @@ private final int jjMoveNfa_3(int startState, int curPos)
|
||||
if (curChar < 64)
|
||||
{
|
||||
long l = 1L << curChar;
|
||||
MatchLoop: do
|
||||
do
|
||||
{
|
||||
switch(jjstateSet[--i])
|
||||
{
|
||||
@ -251,7 +218,7 @@ private final int jjMoveNfa_3(int startState, int curPos)
|
||||
else if (curChar < 128)
|
||||
{
|
||||
long l = 1L << (curChar & 077);
|
||||
MatchLoop: do
|
||||
do
|
||||
{
|
||||
switch(jjstateSet[--i])
|
||||
{
|
||||
@ -399,7 +366,7 @@ private final int jjMoveNfa_3(int startState, int curPos)
|
||||
long l1 = 1L << (hiByte & 077);
|
||||
int i2 = (curChar & 0xff) >> 6;
|
||||
long l2 = 1L << (curChar & 077);
|
||||
MatchLoop: do
|
||||
do
|
||||
{
|
||||
switch(jjstateSet[--i])
|
||||
{
|
||||
@ -468,15 +435,7 @@ private final int jjStartNfa_1(int pos, long active0)
|
||||
{
|
||||
return jjMoveNfa_1(jjStopStringLiteralDfa_1(pos, active0), pos + 1);
|
||||
}
|
||||
private final int jjStartNfaWithStates_1(int pos, int kind, int state)
|
||||
{
|
||||
jjmatchedKind = kind;
|
||||
jjmatchedPos = pos;
|
||||
try { curChar = input_stream.readChar(); }
|
||||
catch(java.io.IOException e) { return pos + 1; }
|
||||
return jjMoveNfa_1(state, pos + 1);
|
||||
}
|
||||
private final int jjMoveStringLiteralDfa0_1()
|
||||
private int jjMoveStringLiteralDfa0_1()
|
||||
{
|
||||
switch(curChar)
|
||||
{
|
||||
@ -488,7 +447,7 @@ private final int jjMoveStringLiteralDfa0_1()
|
||||
return jjMoveNfa_1(0, 0);
|
||||
}
|
||||
}
|
||||
private final int jjMoveStringLiteralDfa1_1(long active0)
|
||||
private int jjMoveStringLiteralDfa1_1(long active0)
|
||||
{
|
||||
try { curChar = input_stream.readChar(); }
|
||||
catch(java.io.IOException e) {
|
||||
@ -506,14 +465,21 @@ private final int jjMoveStringLiteralDfa1_1(long active0)
|
||||
}
|
||||
return jjStartNfa_1(0, active0);
|
||||
}
|
||||
private final int jjMoveNfa_1(int startState, int curPos)
|
||||
private int jjStartNfaWithStates_1(int pos, int kind, int state)
|
||||
{
|
||||
jjmatchedKind = kind;
|
||||
jjmatchedPos = pos;
|
||||
try { curChar = input_stream.readChar(); }
|
||||
catch(java.io.IOException e) { return pos + 1; }
|
||||
return jjMoveNfa_1(state, pos + 1);
|
||||
}
|
||||
private int jjMoveNfa_1(int startState, int curPos)
|
||||
{
|
||||
int[] nextStates;
|
||||
int startsAt = 0;
|
||||
jjnewStateCnt = 5;
|
||||
int i = 1;
|
||||
jjstateSet[0] = startState;
|
||||
int j, kind = 0x7fffffff;
|
||||
int kind = 0x7fffffff;
|
||||
for (;;)
|
||||
{
|
||||
if (++jjround == 0x7fffffff)
|
||||
@ -521,7 +487,7 @@ private final int jjMoveNfa_1(int startState, int curPos)
|
||||
if (curChar < 64)
|
||||
{
|
||||
long l = 1L << curChar;
|
||||
MatchLoop: do
|
||||
do
|
||||
{
|
||||
switch(jjstateSet[--i])
|
||||
{
|
||||
@ -566,7 +532,7 @@ private final int jjMoveNfa_1(int startState, int curPos)
|
||||
else if (curChar < 128)
|
||||
{
|
||||
long l = 1L << (curChar & 077);
|
||||
MatchLoop: do
|
||||
do
|
||||
{
|
||||
switch(jjstateSet[--i])
|
||||
{
|
||||
@ -592,7 +558,7 @@ private final int jjMoveNfa_1(int startState, int curPos)
|
||||
long l1 = 1L << (hiByte & 077);
|
||||
int i2 = (curChar & 0xff) >> 6;
|
||||
long l2 = 1L << (curChar & 077);
|
||||
MatchLoop: do
|
||||
do
|
||||
{
|
||||
switch(jjstateSet[--i])
|
||||
{
|
||||
@ -625,18 +591,17 @@ private final int jjMoveNfa_1(int startState, int curPos)
|
||||
catch(java.io.IOException e) { return curPos; }
|
||||
}
|
||||
}
|
||||
private final int jjMoveStringLiteralDfa0_0()
|
||||
private int jjMoveStringLiteralDfa0_0()
|
||||
{
|
||||
return jjMoveNfa_0(0, 0);
|
||||
}
|
||||
private final int jjMoveNfa_0(int startState, int curPos)
|
||||
private int jjMoveNfa_0(int startState, int curPos)
|
||||
{
|
||||
int[] nextStates;
|
||||
int startsAt = 0;
|
||||
jjnewStateCnt = 3;
|
||||
int i = 1;
|
||||
jjstateSet[0] = startState;
|
||||
int j, kind = 0x7fffffff;
|
||||
int kind = 0x7fffffff;
|
||||
for (;;)
|
||||
{
|
||||
if (++jjround == 0x7fffffff)
|
||||
@ -644,7 +609,7 @@ private final int jjMoveNfa_0(int startState, int curPos)
|
||||
if (curChar < 64)
|
||||
{
|
||||
long l = 1L << curChar;
|
||||
MatchLoop: do
|
||||
do
|
||||
{
|
||||
switch(jjstateSet[--i])
|
||||
{
|
||||
@ -673,7 +638,7 @@ private final int jjMoveNfa_0(int startState, int curPos)
|
||||
else if (curChar < 128)
|
||||
{
|
||||
long l = 1L << (curChar & 077);
|
||||
MatchLoop: do
|
||||
do
|
||||
{
|
||||
switch(jjstateSet[--i])
|
||||
{
|
||||
@ -688,7 +653,7 @@ private final int jjMoveNfa_0(int startState, int curPos)
|
||||
long l1 = 1L << (hiByte & 077);
|
||||
int i2 = (curChar & 0xff) >> 6;
|
||||
long l2 = 1L << (curChar & 077);
|
||||
MatchLoop: do
|
||||
do
|
||||
{
|
||||
switch(jjstateSet[--i])
|
||||
{
|
||||
@ -728,15 +693,7 @@ private final int jjStartNfa_2(int pos, long active0)
|
||||
{
|
||||
return jjMoveNfa_2(jjStopStringLiteralDfa_2(pos, active0), pos + 1);
|
||||
}
|
||||
private final int jjStartNfaWithStates_2(int pos, int kind, int state)
|
||||
{
|
||||
jjmatchedKind = kind;
|
||||
jjmatchedPos = pos;
|
||||
try { curChar = input_stream.readChar(); }
|
||||
catch(java.io.IOException e) { return pos + 1; }
|
||||
return jjMoveNfa_2(state, pos + 1);
|
||||
}
|
||||
private final int jjMoveStringLiteralDfa0_2()
|
||||
private int jjMoveStringLiteralDfa0_2()
|
||||
{
|
||||
switch(curChar)
|
||||
{
|
||||
@ -748,7 +705,7 @@ private final int jjMoveStringLiteralDfa0_2()
|
||||
return jjMoveNfa_2(0, 0);
|
||||
}
|
||||
}
|
||||
private final int jjMoveStringLiteralDfa1_2(long active0)
|
||||
private int jjMoveStringLiteralDfa1_2(long active0)
|
||||
{
|
||||
try { curChar = input_stream.readChar(); }
|
||||
catch(java.io.IOException e) {
|
||||
@ -766,14 +723,21 @@ private final int jjMoveStringLiteralDfa1_2(long active0)
|
||||
}
|
||||
return jjStartNfa_2(0, active0);
|
||||
}
|
||||
private final int jjMoveNfa_2(int startState, int curPos)
|
||||
private int jjStartNfaWithStates_2(int pos, int kind, int state)
|
||||
{
|
||||
jjmatchedKind = kind;
|
||||
jjmatchedPos = pos;
|
||||
try { curChar = input_stream.readChar(); }
|
||||
catch(java.io.IOException e) { return pos + 1; }
|
||||
return jjMoveNfa_2(state, pos + 1);
|
||||
}
|
||||
private int jjMoveNfa_2(int startState, int curPos)
|
||||
{
|
||||
int[] nextStates;
|
||||
int startsAt = 0;
|
||||
jjnewStateCnt = 5;
|
||||
int i = 1;
|
||||
jjstateSet[0] = startState;
|
||||
int j, kind = 0x7fffffff;
|
||||
int kind = 0x7fffffff;
|
||||
for (;;)
|
||||
{
|
||||
if (++jjround == 0x7fffffff)
|
||||
@ -781,7 +745,7 @@ private final int jjMoveNfa_2(int startState, int curPos)
|
||||
if (curChar < 64)
|
||||
{
|
||||
long l = 1L << curChar;
|
||||
MatchLoop: do
|
||||
do
|
||||
{
|
||||
switch(jjstateSet[--i])
|
||||
{
|
||||
@ -826,7 +790,7 @@ private final int jjMoveNfa_2(int startState, int curPos)
|
||||
else if (curChar < 128)
|
||||
{
|
||||
long l = 1L << (curChar & 077);
|
||||
MatchLoop: do
|
||||
do
|
||||
{
|
||||
switch(jjstateSet[--i])
|
||||
{
|
||||
@ -852,7 +816,7 @@ private final int jjMoveNfa_2(int startState, int curPos)
|
||||
long l1 = 1L << (hiByte & 077);
|
||||
int i2 = (curChar & 0xff) >> 6;
|
||||
long l2 = 1L << (curChar & 077);
|
||||
MatchLoop: do
|
||||
do
|
||||
{
|
||||
switch(jjstateSet[--i])
|
||||
{
|
||||
@ -895,22 +859,28 @@ private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, lo
|
||||
{
|
||||
case 0:
|
||||
return ((jjbitVec2[i2] & l2) != 0L);
|
||||
default :
|
||||
default :
|
||||
if ((jjbitVec0[i1] & l1) != 0L)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/** Token literal values. */
|
||||
public static final String[] jjstrLiteralImages = {
|
||||
"", null, null, null, null, null, null, null, null, null, "\53", "\55", "\50",
|
||||
"\51", "\72", "\136", null, null, null, null, null, "\133", "\173", null, "\124\117",
|
||||
"\135", null, null, "\124\117", "\175", null, null, };
|
||||
|
||||
/** Lexer state names. */
|
||||
public static final String[] lexStateNames = {
|
||||
"Boost",
|
||||
"RangeEx",
|
||||
"RangeIn",
|
||||
"DEFAULT",
|
||||
"Boost",
|
||||
"RangeEx",
|
||||
"RangeIn",
|
||||
"DEFAULT",
|
||||
};
|
||||
|
||||
/** Lex State array. */
|
||||
public static final int[] jjnewLexState = {
|
||||
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, 2, 1, 3, -1,
|
||||
3, -1, -1, -1, 3, -1, -1,
|
||||
@ -925,15 +895,18 @@ protected CharStream input_stream;
|
||||
private final int[] jjrounds = new int[33];
|
||||
private final int[] jjstateSet = new int[66];
|
||||
protected char curChar;
|
||||
public PrecedenceQueryParserTokenManager(CharStream stream)
|
||||
{
|
||||
/** Constructor. */
|
||||
public PrecedenceQueryParserTokenManager(CharStream stream){
|
||||
input_stream = stream;
|
||||
}
|
||||
public PrecedenceQueryParserTokenManager(CharStream stream, int lexState)
|
||||
{
|
||||
|
||||
/** Constructor. */
|
||||
public PrecedenceQueryParserTokenManager(CharStream stream, int lexState){
|
||||
this(stream);
|
||||
SwitchTo(lexState);
|
||||
}
|
||||
|
||||
/** Reinitialise parser. */
|
||||
public void ReInit(CharStream stream)
|
||||
{
|
||||
jjmatchedPos = jjnewStateCnt = 0;
|
||||
@ -941,18 +914,22 @@ public void ReInit(CharStream stream)
|
||||
input_stream = stream;
|
||||
ReInitRounds();
|
||||
}
|
||||
private final void ReInitRounds()
|
||||
private void ReInitRounds()
|
||||
{
|
||||
int i;
|
||||
jjround = 0x80000001;
|
||||
for (i = 33; i-- > 0;)
|
||||
jjrounds[i] = 0x80000000;
|
||||
}
|
||||
|
||||
/** Reinitialise parser. */
|
||||
public void ReInit(CharStream stream, int lexState)
|
||||
{
|
||||
ReInit(stream);
|
||||
SwitchTo(lexState);
|
||||
}
|
||||
|
||||
/** Switch to specified lex state. */
|
||||
public void SwitchTo(int lexState)
|
||||
{
|
||||
if (lexState >= 4 || lexState < 0)
|
||||
@ -963,14 +940,25 @@ public void SwitchTo(int lexState)
|
||||
|
||||
protected Token jjFillToken()
|
||||
{
|
||||
Token t = Token.newToken(jjmatchedKind);
|
||||
t.kind = jjmatchedKind;
|
||||
final Token t;
|
||||
final String curTokenImage;
|
||||
final int beginLine;
|
||||
final int endLine;
|
||||
final int beginColumn;
|
||||
final int endColumn;
|
||||
String im = jjstrLiteralImages[jjmatchedKind];
|
||||
t.image = (im == null) ? input_stream.GetImage() : im;
|
||||
t.beginLine = input_stream.getBeginLine();
|
||||
t.beginColumn = input_stream.getBeginColumn();
|
||||
t.endLine = input_stream.getEndLine();
|
||||
t.endColumn = input_stream.getEndColumn();
|
||||
curTokenImage = (im == null) ? input_stream.GetImage() : im;
|
||||
beginLine = input_stream.getBeginLine();
|
||||
beginColumn = input_stream.getBeginColumn();
|
||||
endLine = input_stream.getEndLine();
|
||||
endColumn = input_stream.getEndColumn();
|
||||
t = Token.newToken(jjmatchedKind, curTokenImage);
|
||||
|
||||
t.beginLine = beginLine;
|
||||
t.endLine = endLine;
|
||||
t.beginColumn = beginColumn;
|
||||
t.endColumn = endColumn;
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
@ -981,22 +969,21 @@ int jjround;
|
||||
int jjmatchedPos;
|
||||
int jjmatchedKind;
|
||||
|
||||
/** Get the next Token. */
|
||||
public Token getNextToken()
|
||||
{
|
||||
int kind;
|
||||
Token specialToken = null;
|
||||
Token matchedToken;
|
||||
int curPos = 0;
|
||||
|
||||
EOFLoop :
|
||||
for (;;)
|
||||
{
|
||||
try
|
||||
{
|
||||
{
|
||||
try
|
||||
{
|
||||
curChar = input_stream.BeginToken();
|
||||
}
|
||||
}
|
||||
catch(java.io.IOException e)
|
||||
{
|
||||
{
|
||||
jjmatchedKind = 0;
|
||||
matchedToken = jjFillToken();
|
||||
return matchedToken;
|
||||
@ -1066,4 +1053,31 @@ public Token getNextToken()
|
||||
}
|
||||
}
|
||||
|
||||
private void jjCheckNAdd(int state)
|
||||
{
|
||||
if (jjrounds[state] != jjround)
|
||||
{
|
||||
jjstateSet[jjnewStateCnt++] = state;
|
||||
jjrounds[state] = jjround;
|
||||
}
|
||||
}
|
||||
private void jjAddStates(int start, int end)
|
||||
{
|
||||
do {
|
||||
jjstateSet[jjnewStateCnt++] = jjnextStates[start];
|
||||
} while (start++ != end);
|
||||
}
|
||||
private void jjCheckNAddTwoStates(int state1, int state2)
|
||||
{
|
||||
jjCheckNAdd(state1);
|
||||
jjCheckNAdd(state2);
|
||||
}
|
||||
|
||||
private void jjCheckNAddStates(int start, int end)
|
||||
{
|
||||
do {
|
||||
jjCheckNAdd(jjnextStates[start]);
|
||||
} while (start++ != end);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,4 +1,5 @@
|
||||
/* Generated By:JavaCC: Do not edit this line. Token.java Version 3.0 */
|
||||
/* Generated By:JavaCC: Do not edit this line. Token.java Version 4.1 */
|
||||
/* JavaCCOptions:TOKEN_EXTENDS=,KEEP_LINE_COL=null */
|
||||
package org.apache.lucene.queryParser.precedence;
|
||||
|
||||
/**
|
||||
@ -14,12 +15,14 @@ public class Token {
|
||||
*/
|
||||
public int kind;
|
||||
|
||||
/**
|
||||
* beginLine and beginColumn describe the position of the first character
|
||||
* of this token; endLine and endColumn describe the position of the
|
||||
* last character of this token.
|
||||
*/
|
||||
public int beginLine, beginColumn, endLine, endColumn;
|
||||
/** The line number of the first character of this Token. */
|
||||
public int beginLine;
|
||||
/** The column number of the first character of this Token. */
|
||||
public int beginColumn;
|
||||
/** The line number of the last character of this Token. */
|
||||
public int endLine;
|
||||
/** The column number of the last character of this Token. */
|
||||
public int endColumn;
|
||||
|
||||
/**
|
||||
* The string image of the token.
|
||||
@ -50,6 +53,40 @@ public class Token {
|
||||
*/
|
||||
public Token specialToken;
|
||||
|
||||
/**
|
||||
* An optional attribute value of the Token.
|
||||
* Tokens which are not used as syntactic sugar will often contain
|
||||
* meaningful values that will be used later on by the compiler or
|
||||
* interpreter. This attribute value is often different from the image.
|
||||
* Any subclass of Token that actually wants to return a non-null value can
|
||||
* override this method as appropriate.
|
||||
*/
|
||||
public Object getValue() {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* No-argument constructor
|
||||
*/
|
||||
public Token() {}
|
||||
|
||||
/**
|
||||
* Constructs a new token for the specified Image.
|
||||
*/
|
||||
public Token(int kind)
|
||||
{
|
||||
this(kind, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs a new token for the specified Image and Kind.
|
||||
*/
|
||||
public Token(int kind, String image)
|
||||
{
|
||||
this.kind = kind;
|
||||
this.image = image;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the image.
|
||||
*/
|
||||
@ -63,19 +100,25 @@ public class Token {
|
||||
* can create and return subclass objects based on the value of ofKind.
|
||||
* Simply add the cases to the switch for all those special cases.
|
||||
* For example, if you have a subclass of Token called IDToken that
|
||||
* you want to create if ofKind is ID, simlpy add something like :
|
||||
* you want to create if ofKind is ID, simply add something like :
|
||||
*
|
||||
* case MyParserConstants.ID : return new IDToken();
|
||||
* case MyParserConstants.ID : return new IDToken(ofKind, image);
|
||||
*
|
||||
* to the following switch statement. Then you can cast matchedToken
|
||||
* variable to the appropriate type and use it in your lexical actions.
|
||||
* variable to the appropriate type and use sit in your lexical actions.
|
||||
*/
|
||||
public static final Token newToken(int ofKind)
|
||||
public static Token newToken(int ofKind, String image)
|
||||
{
|
||||
switch(ofKind)
|
||||
{
|
||||
default : return new Token();
|
||||
default : return new Token(ofKind, image);
|
||||
}
|
||||
}
|
||||
|
||||
public static Token newToken(int ofKind)
|
||||
{
|
||||
return newToken(ofKind, null);
|
||||
}
|
||||
|
||||
}
|
||||
/* JavaCC - OriginalChecksum=0dc5808f2ab8aac8775ea9175fa2cb51 (do not edit this line) */
|
||||
|
@ -1,19 +1,22 @@
|
||||
/* Generated By:JavaCC: Do not edit this line. TokenMgrError.java Version 3.0 */
|
||||
/* Generated By:JavaCC: Do not edit this line. TokenMgrError.java Version 4.1 */
|
||||
/* JavaCCOptions: */
|
||||
package org.apache.lucene.queryParser.precedence;
|
||||
|
||||
/** Token Manager Error. */
|
||||
public class TokenMgrError extends Error
|
||||
{
|
||||
|
||||
/*
|
||||
* Ordinals for various reasons why an Error of this type can be thrown.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Lexical error occured.
|
||||
* Lexical error occurred.
|
||||
*/
|
||||
static final int LEXICAL_ERROR = 0;
|
||||
|
||||
/**
|
||||
* An attempt wass made to create a second instance of a static token manager.
|
||||
* An attempt was made to create a second instance of a static token manager.
|
||||
*/
|
||||
static final int STATIC_LEXER_ERROR = 1;
|
||||
|
||||
@ -34,7 +37,7 @@ public class TokenMgrError extends Error
|
||||
int errorCode;
|
||||
|
||||
/**
|
||||
* Replaces unprintable characters by their espaced (or unicode escaped)
|
||||
* Replaces unprintable characters by their escaped (or unicode escaped)
|
||||
* equivalents in the given string
|
||||
*/
|
||||
protected static final String addEscapes(String str) {
|
||||
@ -85,12 +88,12 @@ public class TokenMgrError extends Error
|
||||
/**
|
||||
* Returns a detailed message for the Error when it is thrown by the
|
||||
* token manager to indicate a lexical error.
|
||||
* Parameters :
|
||||
* EOFSeen : indicates if EOF caused the lexicl error
|
||||
* curLexState : lexical state in which this error occured
|
||||
* errorLine : line number when the error occured
|
||||
* errorColumn : column number when the error occured
|
||||
* errorAfter : prefix that was seen before this error occured
|
||||
* Parameters :
|
||||
* EOFSeen : indicates if EOF caused the lexical error
|
||||
* curLexState : lexical state in which this error occurred
|
||||
* errorLine : line number when the error occurred
|
||||
* errorColumn : column number when the error occurred
|
||||
* errorAfter : prefix that was seen before this error occurred
|
||||
* curchar : the offending character
|
||||
* Note: You can customize the lexical error message by modifying this method.
|
||||
*/
|
||||
@ -105,7 +108,7 @@ public class TokenMgrError extends Error
|
||||
/**
|
||||
* You can also modify the body of this method to customize your error messages.
|
||||
* For example, cases like LOOP_DETECTED and INVALID_LEXICAL_STATE are not
|
||||
* of end-users concern, so you can return something like :
|
||||
* of end-users concern, so you can return something like :
|
||||
*
|
||||
* "Internal Error : Please file a bug report .... "
|
||||
*
|
||||
@ -119,15 +122,19 @@ public class TokenMgrError extends Error
|
||||
* Constructors of various flavors follow.
|
||||
*/
|
||||
|
||||
/** No arg constructor. */
|
||||
public TokenMgrError() {
|
||||
}
|
||||
|
||||
/** Constructor with message and reason. */
|
||||
public TokenMgrError(String message, int reason) {
|
||||
super(message);
|
||||
errorCode = reason;
|
||||
}
|
||||
|
||||
/** Full Constructor. */
|
||||
public TokenMgrError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar, int reason) {
|
||||
this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
|
||||
}
|
||||
}
|
||||
/* JavaCC - OriginalChecksum=43f834e732189cec173ab002c94ae97f (do not edit this line) */
|
||||
|
@ -22,7 +22,7 @@ import java.io.Reader;
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.ISOLatin1AccentFilter;
|
||||
import org.apache.lucene.analysis.ASCIIFoldingFilter;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
@ -111,7 +111,7 @@ class ASCIIAnalyzer extends org.apache.lucene.analysis.Analyzer {
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result = new StandardTokenizer(reader);
|
||||
result = new StandardFilter(result);
|
||||
result = new ISOLatin1AccentFilter(result);
|
||||
result = new ASCIIFoldingFilter(result);
|
||||
result = new LowerCaseFilter(result);
|
||||
return result;
|
||||
}
|
||||
|
@ -181,48 +181,6 @@ public class TestMultiAnalyzerQPHelper extends LuceneTestCase {
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see
|
||||
* org.apache.lucene.analysis.TokenStream#next(org.apache.lucene.analysis
|
||||
* .Token)
|
||||
*/
|
||||
@Override
|
||||
public Token next(Token reusableToken) throws IOException {
|
||||
|
||||
if (multiToken > 0) {
|
||||
reusableToken.setTermBuffer("multi" + (multiToken + 1));
|
||||
reusableToken.setStartOffset(prevStartOffset);
|
||||
reusableToken.setEndOffset(prevEndOffset);
|
||||
reusableToken.setType(prevType);
|
||||
reusableToken.setPositionIncrement(0);
|
||||
multiToken--;
|
||||
return reusableToken;
|
||||
} else {
|
||||
boolean next = (reusableToken = input.next(token)) != null;
|
||||
if (next == false) {
|
||||
return null;
|
||||
}
|
||||
prevType = reusableToken.type();
|
||||
prevStartOffset = reusableToken.startOffset();
|
||||
prevEndOffset = reusableToken.endOffset();
|
||||
String text = reusableToken.term();
|
||||
if (text.equals("triplemulti")) {
|
||||
multiToken = 2;
|
||||
return reusableToken;
|
||||
} else if (text.equals("multi")) {
|
||||
multiToken = 1;
|
||||
return reusableToken;
|
||||
} else {
|
||||
return reusableToken;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private Token token = new Token();
|
||||
|
||||
public final boolean incrementToken() throws java.io.IOException {
|
||||
if (multiToken > 0) {
|
||||
termAtt.setTermBuffer("multi" + (multiToken + 1));
|
||||
@ -282,30 +240,6 @@ public class TestMultiAnalyzerQPHelper extends LuceneTestCase {
|
||||
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
}
|
||||
|
||||
private Token token = new Token();
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see org.apache.lucene.analysis.TokenStream#next()
|
||||
*/
|
||||
@Override
|
||||
public Token next(Token reusableToken) throws IOException {
|
||||
while (null != (reusableToken = input.next(token))) {
|
||||
String term = reusableToken.term();
|
||||
if (term.equals("the")) {
|
||||
// stopword, do nothing
|
||||
} else if (term.equals("quick")) {
|
||||
reusableToken.setPositionIncrement(2);
|
||||
return reusableToken;
|
||||
} else {
|
||||
reusableToken.setPositionIncrement(1);
|
||||
return reusableToken;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public final boolean incrementToken() throws java.io.IOException {
|
||||
while (input.incrementToken()) {
|
||||
if (termAtt.term().equals("the")) {
|
||||
|
@ -175,48 +175,6 @@ public class TestMultiAnalyzerWrapper extends LuceneTestCase {
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see
|
||||
* org.apache.lucene.analysis.TokenStream#next(org.apache.lucene.analysis
|
||||
* .Token)
|
||||
*/
|
||||
@Override
|
||||
public Token next(Token reusableToken) throws IOException {
|
||||
|
||||
if (multiToken > 0) {
|
||||
reusableToken.setTermBuffer("multi" + (multiToken + 1));
|
||||
reusableToken.setStartOffset(prevStartOffset);
|
||||
reusableToken.setEndOffset(prevEndOffset);
|
||||
reusableToken.setType(prevType);
|
||||
reusableToken.setPositionIncrement(0);
|
||||
multiToken--;
|
||||
return reusableToken;
|
||||
} else {
|
||||
boolean next = (reusableToken = input.next(token)) != null;
|
||||
if (next == false) {
|
||||
return null;
|
||||
}
|
||||
prevType = reusableToken.type();
|
||||
prevStartOffset = reusableToken.startOffset();
|
||||
prevEndOffset = reusableToken.endOffset();
|
||||
String text = reusableToken.term();
|
||||
if (text.equals("triplemulti")) {
|
||||
multiToken = 2;
|
||||
return reusableToken;
|
||||
} else if (text.equals("multi")) {
|
||||
multiToken = 1;
|
||||
return reusableToken;
|
||||
} else {
|
||||
return reusableToken;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private Token token = new Token();
|
||||
|
||||
public final boolean incrementToken() throws java.io.IOException {
|
||||
if (multiToken > 0) {
|
||||
termAtt.setTermBuffer("multi" + (multiToken + 1));
|
||||
@ -276,30 +234,6 @@ public class TestMultiAnalyzerWrapper extends LuceneTestCase {
|
||||
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
}
|
||||
|
||||
private Token token = new Token();
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
* @see org.apache.lucene.analysis.TokenStream#next()
|
||||
*/
|
||||
@Override
|
||||
public Token next(Token reusableToken) throws IOException {
|
||||
while (null != (reusableToken = input.next(token))) {
|
||||
String term = reusableToken.term();
|
||||
if (term.equals("the")) {
|
||||
// stopword, do nothing
|
||||
} else if (term.equals("quick")) {
|
||||
reusableToken.setPositionIncrement(2);
|
||||
return reusableToken;
|
||||
} else {
|
||||
reusableToken.setPositionIncrement(1);
|
||||
return reusableToken;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public final boolean incrementToken() throws java.io.IOException {
|
||||
while (input.incrementToken()) {
|
||||
if (termAtt.term().equals("the")) {
|
||||
|
@ -359,8 +359,8 @@ public class TestMultiFieldQPHelper extends LuceneTestCase {
|
||||
}
|
||||
|
||||
private static class EmptyTokenStream extends TokenStream {
|
||||
public Token next(final Token reusableToken) {
|
||||
return null;
|
||||
public boolean incrementToken() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -357,8 +357,8 @@ public class TestMultiFieldQueryParserWrapper extends LuceneTestCase {
|
||||
}
|
||||
|
||||
private static class EmptyTokenStream extends TokenStream {
|
||||
public Token next(final Token reusableToken) {
|
||||
return null;
|
||||
public boolean incrementToken() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -113,33 +113,6 @@ public class TestQPHelper extends LocalizedTestCase {
|
||||
boolean inPhrase = false;
|
||||
int savedStart = 0, savedEnd = 0;
|
||||
|
||||
public Token next(Token reusableToken) throws IOException {
|
||||
Token token = reusableToken;
|
||||
|
||||
if (inPhrase) {
|
||||
inPhrase = false;
|
||||
token.setTermBuffer("phrase2");
|
||||
token.setStartOffset(savedStart);
|
||||
token.setEndOffset(savedEnd);
|
||||
return reusableToken;
|
||||
} else
|
||||
while ((token = this.input.next(reusableToken)) != null) {
|
||||
if (token.term().equals("phrase")) {
|
||||
inPhrase = true;
|
||||
savedStart = token.startOffset();
|
||||
savedEnd = token.endOffset();
|
||||
token.setTermBuffer("phrase1");
|
||||
token.setStartOffset(savedStart);
|
||||
token.setEndOffset(savedEnd);
|
||||
return token;
|
||||
} else if (!token.term().equals("stop"))
|
||||
return token;
|
||||
}
|
||||
|
||||
return null;
|
||||
|
||||
}
|
||||
|
||||
public boolean incrementToken() throws IOException {
|
||||
if (inPhrase) {
|
||||
inPhrase = false;
|
||||
@ -1185,12 +1158,12 @@ public class TestQPHelper extends LocalizedTestCase {
|
||||
|
||||
private class CannedTokenStream extends TokenStream {
|
||||
private int upto = 0;
|
||||
final PositionIncrementAttribute posIncr = addAttribute(PositionIncrementAttribute.class);
|
||||
final TermAttribute term = addAttribute(TermAttribute.class);
|
||||
public boolean incrementToken() {
|
||||
if (upto == 4) {
|
||||
return false;
|
||||
}
|
||||
PositionIncrementAttribute posIncr = addAttribute(PositionIncrementAttribute.class);
|
||||
TermAttribute term = addAttribute(TermAttribute.class);
|
||||
if (upto == 0) {
|
||||
posIncr.setPositionIncrement(1);
|
||||
term.setTermBuffer("a");
|
||||
|
@ -108,33 +108,6 @@ public class TestQueryParserWrapper extends LocalizedTestCase {
|
||||
boolean inPhrase = false;
|
||||
int savedStart = 0, savedEnd = 0;
|
||||
|
||||
public Token next(Token reusableToken) throws IOException {
|
||||
Token token = reusableToken;
|
||||
|
||||
if (inPhrase) {
|
||||
inPhrase = false;
|
||||
token.setTermBuffer("phrase2");
|
||||
token.setStartOffset(savedStart);
|
||||
token.setEndOffset(savedEnd);
|
||||
return reusableToken;
|
||||
} else
|
||||
while ((token = this.input.next(reusableToken)) != null) {
|
||||
if (token.term().equals("phrase")) {
|
||||
inPhrase = true;
|
||||
savedStart = token.startOffset();
|
||||
savedEnd = token.endOffset();
|
||||
token.setTermBuffer("phrase1");
|
||||
token.setStartOffset(savedStart);
|
||||
token.setEndOffset(savedEnd);
|
||||
return token;
|
||||
} else if (!token.term().equals("stop"))
|
||||
return token;
|
||||
}
|
||||
|
||||
return null;
|
||||
|
||||
}
|
||||
|
||||
public boolean incrementToken() throws IOException {
|
||||
if (inPhrase) {
|
||||
inPhrase = false;
|
||||
|
@ -30,7 +30,7 @@ import org.tartarus.snowball.SnowballProgram;
|
||||
*
|
||||
* Available stemmers are listed in {@link org.tartarus.snowball.ext}.
|
||||
*/
|
||||
public class SnowballFilter extends TokenFilter {
|
||||
public final class SnowballFilter extends TokenFilter {
|
||||
|
||||
private SnowballProgram stemmer;
|
||||
|
||||
@ -78,16 +78,4 @@ public class SnowballFilter extends TokenFilter {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next(final Token reusableToken) throws java.io.IOException {
|
||||
return super.next(reusableToken);
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next() throws java.io.IOException {
|
||||
return super.next();
|
||||
}
|
||||
}
|
||||
|
@ -39,7 +39,7 @@ import java.util.*;
|
||||
* EXPERIMENTAL !!!!!!!!!
|
||||
* NOTE: This Tokenizer is considered experimental and the grammar is subject to change in the trunk and in follow up releases.
|
||||
*/
|
||||
public class WikipediaTokenizer extends Tokenizer {
|
||||
public final class WikipediaTokenizer extends Tokenizer {
|
||||
public static final String INTERNAL_LINK = "il";
|
||||
public static final String EXTERNAL_LINK = "el";
|
||||
//The URL part of the link, i.e. the first token
|
||||
@ -187,18 +187,6 @@ public class WikipediaTokenizer extends Tokenizer {
|
||||
this.termAtt = addAttribute(TermAttribute.class);
|
||||
this.flagsAtt = addAttribute(FlagsAttribute.class);
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next(final Token reusableToken) throws java.io.IOException {
|
||||
return super.next(reusableToken);
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next() throws java.io.IOException {
|
||||
return super.next();
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
|
@ -33,7 +33,7 @@ import org.apache.lucene.util.AttributeSource;
|
||||
* {@link TokenStream#reset()}, which repositions the
|
||||
* stream to the first Token.
|
||||
*/
|
||||
public class CachingTokenFilter extends TokenFilter {
|
||||
public final class CachingTokenFilter extends TokenFilter {
|
||||
private List cache = null;
|
||||
private Iterator iterator = null;
|
||||
private AttributeSource.State finalState;
|
||||
@ -41,18 +41,6 @@ public class CachingTokenFilter extends TokenFilter {
|
||||
public CachingTokenFilter(TokenStream input) {
|
||||
super(input);
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next(final Token reusableToken) throws IOException {
|
||||
return super.next(reusableToken);
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next() throws IOException {
|
||||
return super.next();
|
||||
}
|
||||
|
||||
public final boolean incrementToken() throws IOException {
|
||||
if (cache == null) {
|
||||
|
@ -114,18 +114,6 @@ public abstract class CharTokenizer extends Tokenizer {
|
||||
offsetAtt.setOffset(finalOffset, finalOffset);
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next(final Token reusableToken) throws IOException {
|
||||
return super.next(reusableToken);
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next() throws IOException {
|
||||
return super.next();
|
||||
}
|
||||
|
||||
public void reset(Reader input) throws IOException {
|
||||
super.reset(input);
|
||||
bufferIndex = 0;
|
||||
|
@ -26,10 +26,12 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
* For instance, 'à' will be replaced by 'a'.
|
||||
* <p>
|
||||
*
|
||||
* @deprecated in favor of {@link ASCIIFoldingFilter} which covers a superset
|
||||
* of Latin 1. This class will be removed in Lucene 3.0.
|
||||
* @deprecated If you build a new index, use {@link ASCIIFoldingFilter}
|
||||
* which covers a superset of Latin 1.
|
||||
* This class is included for use with existing
|
||||
* indexes and will be removed in a future release (possibly Lucene 4.0).
|
||||
*/
|
||||
public class ISOLatin1AccentFilter extends TokenFilter {
|
||||
public final class ISOLatin1AccentFilter extends TokenFilter {
|
||||
public ISOLatin1AccentFilter(TokenStream input) {
|
||||
super(input);
|
||||
termAtt = addAttribute(TermAttribute.class);
|
||||
@ -58,18 +60,6 @@ public class ISOLatin1AccentFilter extends TokenFilter {
|
||||
return false;
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next(final Token reusableToken) throws java.io.IOException {
|
||||
return super.next(reusableToken);
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next() throws java.io.IOException {
|
||||
return super.next();
|
||||
}
|
||||
|
||||
/**
|
||||
* To replace accented characters in a String by unaccented equivalents.
|
||||
*/
|
||||
|
@ -27,7 +27,7 @@ import org.apache.lucene.util.AttributeSource;
|
||||
/**
|
||||
* Emits the entire input as a single token.
|
||||
*/
|
||||
public class KeywordTokenizer extends Tokenizer {
|
||||
public final class KeywordTokenizer extends Tokenizer {
|
||||
|
||||
private static final int DEFAULT_BUFFER_SIZE = 256;
|
||||
|
||||
@ -88,18 +88,6 @@ public class KeywordTokenizer extends Tokenizer {
|
||||
offsetAtt.setOffset(finalOffset, finalOffset);
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next(final Token reusableToken) throws IOException {
|
||||
return super.next(reusableToken);
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next() throws IOException {
|
||||
return super.next();
|
||||
}
|
||||
|
||||
public void reset(Reader input) throws IOException {
|
||||
super.reset(input);
|
||||
this.done = false;
|
||||
|
@ -1,110 +0,0 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.analysis;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
|
||||
/**
|
||||
* A SinkTokenizer can be used to cache Tokens for use in an Analyzer
|
||||
* <p/>
|
||||
* WARNING: {@link TeeTokenFilter} and {@link SinkTokenizer} only work with the old TokenStream API.
|
||||
* If you switch to the new API, you need to use {@link TeeSinkTokenFilter} instead, which offers
|
||||
* the same functionality.
|
||||
* @see TeeTokenFilter
|
||||
* @deprecated Use {@link TeeSinkTokenFilter} instead
|
||||
*
|
||||
**/
|
||||
public class SinkTokenizer extends Tokenizer {
|
||||
protected List/*<Token>*/ lst = new ArrayList/*<Token>*/();
|
||||
protected Iterator/*<Token>*/ iter;
|
||||
|
||||
public SinkTokenizer(List/*<Token>*/ input) {
|
||||
this.lst = input;
|
||||
if (this.lst == null) this.lst = new ArrayList/*<Token>*/();
|
||||
}
|
||||
|
||||
public SinkTokenizer() {
|
||||
this.lst = new ArrayList/*<Token>*/();
|
||||
}
|
||||
|
||||
public SinkTokenizer(int initCap){
|
||||
this.lst = new ArrayList/*<Token>*/(initCap);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the tokens in the internal List.
|
||||
* <p/>
|
||||
* WARNING: Adding tokens to this list requires the {@link #reset()} method to be called in order for them
|
||||
* to be made available. Also, this Tokenizer does nothing to protect against {@link java.util.ConcurrentModificationException}s
|
||||
* in the case of adds happening while {@link #next(org.apache.lucene.analysis.Token)} is being called.
|
||||
* <p/>
|
||||
* WARNING: Since this SinkTokenizer can be reset and the cached tokens made available again, do not modify them. Modify clones instead.
|
||||
*
|
||||
* @return A List of {@link org.apache.lucene.analysis.Token}s
|
||||
*/
|
||||
public List/*<Token>*/ getTokens() {
|
||||
return lst;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next token out of the list of cached tokens
|
||||
* @return The next {@link org.apache.lucene.analysis.Token} in the Sink.
|
||||
* @throws IOException
|
||||
*/
|
||||
public Token next(final Token reusableToken) throws IOException {
|
||||
assert reusableToken != null;
|
||||
if (iter == null) iter = lst.iterator();
|
||||
// Since this TokenStream can be reset we have to maintain the tokens as immutable
|
||||
if (iter.hasNext()) {
|
||||
Token nextToken = (Token) iter.next();
|
||||
return (Token) nextToken.clone();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Override this method to cache only certain tokens, or new tokens based
|
||||
* on the old tokens.
|
||||
*
|
||||
* @param t The {@link org.apache.lucene.analysis.Token} to add to the sink
|
||||
*/
|
||||
public void add(Token t) {
|
||||
if (t == null) return;
|
||||
lst.add((Token) t.clone());
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
//nothing to close
|
||||
input = null;
|
||||
lst = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset the internal data structures to the start at the front of the list of tokens. Should be called
|
||||
* if tokens were added to the list after an invocation of {@link #next(Token)}
|
||||
* @throws IOException
|
||||
*/
|
||||
public void reset() throws IOException {
|
||||
iter = lst.iterator();
|
||||
}
|
||||
}
|
||||
|
@ -1,80 +0,0 @@
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.analysis;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
|
||||
/**
|
||||
* Works in conjunction with the SinkTokenizer to provide the ability to set aside tokens
|
||||
* that have already been analyzed. This is useful in situations where multiple fields share
|
||||
* many common analysis steps and then go their separate ways.
|
||||
* <p/>
|
||||
* It is also useful for doing things like entity extraction or proper noun analysis as
|
||||
* part of the analysis workflow and saving off those tokens for use in another field.
|
||||
*
|
||||
* <pre>
|
||||
SinkTokenizer sink1 = new SinkTokenizer();
|
||||
SinkTokenizer sink2 = new SinkTokenizer();
|
||||
|
||||
TokenStream source1 = new TeeTokenFilter(new TeeTokenFilter(new WhitespaceTokenizer(reader1), sink1), sink2);
|
||||
TokenStream source2 = new TeeTokenFilter(new TeeTokenFilter(new WhitespaceTokenizer(reader2), sink1), sink2);
|
||||
|
||||
TokenStream final1 = new LowerCaseFilter(source1);
|
||||
TokenStream final2 = source2;
|
||||
TokenStream final3 = new EntityDetect(sink1);
|
||||
TokenStream final4 = new URLDetect(sink2);
|
||||
|
||||
d.add(new Field("f1", final1));
|
||||
d.add(new Field("f2", final2));
|
||||
d.add(new Field("f3", final3));
|
||||
d.add(new Field("f4", final4));
|
||||
* </pre>
|
||||
* In this example, <code>sink1</code> and <code>sink2<code> will both get tokens from both
|
||||
* <code>reader1</code> and <code>reader2</code> after whitespace tokenizer
|
||||
* and now we can further wrap any of these in extra analysis, and more "sources" can be inserted if desired.
|
||||
* It is important, that tees are consumed before sinks (in the above example, the field names must be
|
||||
* less the sink's field names).
|
||||
* Note, the EntityDetect and URLDetect TokenStreams are for the example and do not currently exist in Lucene
|
||||
<p/>
|
||||
*
|
||||
* See <a href="http://issues.apache.org/jira/browse/LUCENE-1058">LUCENE-1058</a>.
|
||||
* <p/>
|
||||
* WARNING: {@link TeeTokenFilter} and {@link SinkTokenizer} only work with the old TokenStream API.
|
||||
* If you switch to the new API, you need to use {@link TeeSinkTokenFilter} instead, which offers
|
||||
* the same functionality.
|
||||
|
||||
* @see SinkTokenizer
|
||||
* @deprecated Use {@link TeeSinkTokenFilter} instead
|
||||
**/
|
||||
public class TeeTokenFilter extends TokenFilter {
|
||||
SinkTokenizer sink;
|
||||
|
||||
public TeeTokenFilter(TokenStream input, SinkTokenizer sink) {
|
||||
super(input);
|
||||
this.sink = sink;
|
||||
}
|
||||
|
||||
public Token next(final Token reusableToken) throws IOException {
|
||||
assert reusableToken != null;
|
||||
Token nextToken = input.next(reusableToken);
|
||||
sink.add(nextToken);
|
||||
return nextToken;
|
||||
}
|
||||
|
||||
}
|
@ -867,9 +867,6 @@ public class Token extends AttributeImpl
|
||||
if (payload !=null) {
|
||||
to.payload = (Payload) payload.clone();
|
||||
}
|
||||
// remove the following optimization in 3.0 when old TokenStream API removed:
|
||||
} else if (target instanceof TokenWrapper) {
|
||||
((TokenWrapper) target).delegate = (Token) this.clone();
|
||||
} else {
|
||||
initTermBuffer();
|
||||
((TermAttribute) target).setTermBuffer(termBuffer, 0, termLength);
|
||||
|
@ -18,18 +18,10 @@ package org.apache.lucene.analysis;
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.IdentityHashMap;
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.Payload;
|
||||
import org.apache.lucene.util.Attribute;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
@ -78,103 +70,19 @@ import org.apache.lucene.util.AttributeSource;
|
||||
* You can find some example code for the new API in the analysis package level
|
||||
* Javadoc.
|
||||
* <p>
|
||||
* Sometimes it is desirable to capture a current state of a <code>TokenStream</code>
|
||||
* , e. g. for buffering purposes (see {@link CachingTokenFilter},
|
||||
* Sometimes it is desirable to capture a current state of a <code>TokenStream</code>,
|
||||
* e.g. for buffering purposes (see {@link CachingTokenFilter},
|
||||
* {@link TeeSinkTokenFilter}). For this usecase
|
||||
* {@link AttributeSource#captureState} and {@link AttributeSource#restoreState}
|
||||
* can be used.
|
||||
*/
|
||||
public abstract class TokenStream extends AttributeSource {
|
||||
|
||||
/** @deprecated Remove this when old API is removed! */
|
||||
private static final AttributeFactory DEFAULT_TOKEN_WRAPPER_ATTRIBUTE_FACTORY
|
||||
= new TokenWrapperAttributeFactory(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY);
|
||||
|
||||
/** @deprecated Remove this when old API is removed! */
|
||||
private final TokenWrapper tokenWrapper;
|
||||
|
||||
/** @deprecated Remove this when old API is removed! */
|
||||
private static boolean onlyUseNewAPI = false;
|
||||
|
||||
/** @deprecated Remove this when old API is removed! */
|
||||
private final MethodSupport supportedMethods = getSupportedMethods(this.getClass());
|
||||
|
||||
/** @deprecated Remove this when old API is removed! */
|
||||
private static final class MethodSupport {
|
||||
final boolean hasIncrementToken, hasReusableNext, hasNext;
|
||||
|
||||
MethodSupport(Class<? extends TokenStream> clazz) {
|
||||
hasIncrementToken = isMethodOverridden(clazz, "incrementToken");
|
||||
hasReusableNext = isMethodOverridden(clazz, "next", Token.class);
|
||||
hasNext = isMethodOverridden(clazz, "next");
|
||||
}
|
||||
|
||||
private static boolean isMethodOverridden(Class<? extends TokenStream> clazz, String name, Class... params) {
|
||||
try {
|
||||
return clazz.getMethod(name, params).getDeclaringClass() != TokenStream.class;
|
||||
} catch (NoSuchMethodException e) {
|
||||
// should not happen
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** @deprecated Remove this when old API is removed! */
|
||||
private static final IdentityHashMap<Class<? extends TokenStream>,MethodSupport> knownMethodSupport =
|
||||
new IdentityHashMap<Class<? extends TokenStream>,MethodSupport>();
|
||||
|
||||
/** @deprecated Remove this when old API is removed! */
|
||||
private static MethodSupport getSupportedMethods(Class<? extends TokenStream> clazz) {
|
||||
MethodSupport supportedMethods;
|
||||
synchronized(knownMethodSupport) {
|
||||
supportedMethods = knownMethodSupport.get(clazz);
|
||||
if (supportedMethods == null) {
|
||||
knownMethodSupport.put(clazz, supportedMethods = new MethodSupport(clazz));
|
||||
}
|
||||
}
|
||||
return supportedMethods;
|
||||
}
|
||||
|
||||
/** @deprecated Remove this when old API is removed! */
|
||||
private static final class TokenWrapperAttributeFactory extends AttributeFactory {
|
||||
private final AttributeFactory delegate;
|
||||
|
||||
private TokenWrapperAttributeFactory(AttributeFactory delegate) {
|
||||
this.delegate = delegate;
|
||||
}
|
||||
|
||||
public AttributeImpl createAttributeInstance(Class<? extends Attribute> attClass) {
|
||||
return attClass.isAssignableFrom(TokenWrapper.class)
|
||||
? new TokenWrapper()
|
||||
: delegate.createAttributeInstance(attClass);
|
||||
}
|
||||
|
||||
// this is needed for TeeSinkTokenStream's check for compatibility of AttributeSource,
|
||||
// so two TokenStreams using old API have the same AttributeFactory wrapped by this one.
|
||||
public boolean equals(Object other) {
|
||||
if (this == other) return true;
|
||||
if (other instanceof TokenWrapperAttributeFactory) {
|
||||
final TokenWrapperAttributeFactory af = (TokenWrapperAttributeFactory) other;
|
||||
return this.delegate.equals(af.delegate);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
return delegate.hashCode() ^ 0x0a45ff31;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A TokenStream using the default attribute factory.
|
||||
*/
|
||||
protected TokenStream() {
|
||||
super(onlyUseNewAPI
|
||||
? AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY
|
||||
: TokenStream.DEFAULT_TOKEN_WRAPPER_ATTRIBUTE_FACTORY
|
||||
);
|
||||
tokenWrapper = initTokenWrapper(null);
|
||||
check();
|
||||
super();
|
||||
}
|
||||
|
||||
/**
|
||||
@ -182,102 +90,13 @@ public abstract class TokenStream extends AttributeSource {
|
||||
*/
|
||||
protected TokenStream(AttributeSource input) {
|
||||
super(input);
|
||||
tokenWrapper = initTokenWrapper(input);
|
||||
check();
|
||||
}
|
||||
|
||||
/**
|
||||
* A TokenStream using the supplied AttributeFactory for creating new {@link Attribute} instances.
|
||||
*/
|
||||
protected TokenStream(AttributeFactory factory) {
|
||||
super(onlyUseNewAPI
|
||||
? factory
|
||||
: new TokenWrapperAttributeFactory(factory)
|
||||
);
|
||||
tokenWrapper = initTokenWrapper(null);
|
||||
check();
|
||||
}
|
||||
|
||||
/** @deprecated Remove this when old API is removed! */
|
||||
private TokenWrapper initTokenWrapper(AttributeSource input) {
|
||||
if (onlyUseNewAPI) {
|
||||
// no wrapper needed
|
||||
return null;
|
||||
} else {
|
||||
// if possible get the wrapper from the filter's input stream
|
||||
if (input instanceof TokenStream && ((TokenStream) input).tokenWrapper != null) {
|
||||
return ((TokenStream) input).tokenWrapper;
|
||||
}
|
||||
// check that all attributes are implemented by the same TokenWrapper instance
|
||||
final Attribute att = addAttribute(TermAttribute.class);
|
||||
if (att instanceof TokenWrapper &&
|
||||
addAttribute(TypeAttribute.class) == att &&
|
||||
addAttribute(PositionIncrementAttribute.class) == att &&
|
||||
addAttribute(FlagsAttribute.class) == att &&
|
||||
addAttribute(OffsetAttribute.class) == att &&
|
||||
addAttribute(PayloadAttribute.class) == att
|
||||
) {
|
||||
return (TokenWrapper) att;
|
||||
} else {
|
||||
throw new UnsupportedOperationException(
|
||||
"If onlyUseNewAPI is disabled, all basic Attributes must be implemented by the internal class "+
|
||||
"TokenWrapper. Please make sure, that all TokenStreams/TokenFilters in this chain have been "+
|
||||
"instantiated with this flag disabled and do not add any custom instances for the basic Attributes!"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** @deprecated Remove this when old API is removed! */
|
||||
private void check() {
|
||||
if (onlyUseNewAPI && !supportedMethods.hasIncrementToken) {
|
||||
throw new UnsupportedOperationException(getClass().getName()+" does not implement incrementToken() which is needed for onlyUseNewAPI.");
|
||||
}
|
||||
|
||||
// a TokenStream subclass must at least implement one of the methods!
|
||||
if (!(supportedMethods.hasIncrementToken || supportedMethods.hasNext || supportedMethods.hasReusableNext)) {
|
||||
throw new UnsupportedOperationException(getClass().getName()+" does not implement any of incrementToken(), next(Token), next().");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* For extra performance you can globally enable the new
|
||||
* {@link #incrementToken} API using {@link Attribute}s. There will be a
|
||||
* small, but in most cases negligible performance increase by enabling this,
|
||||
* but it only works if <b>all</b> <code>TokenStream</code>s use the new API and
|
||||
* implement {@link #incrementToken}. This setting can only be enabled
|
||||
* globally.
|
||||
* <P>
|
||||
* This setting only affects <code>TokenStream</code>s instantiated after this
|
||||
* call. All <code>TokenStream</code>s already created use the other setting.
|
||||
* <P>
|
||||
* All core {@link Analyzer}s are compatible with this setting, if you have
|
||||
* your own <code>TokenStream</code>s that are also compatible, you should enable
|
||||
* this.
|
||||
* <P>
|
||||
* When enabled, tokenization may throw {@link UnsupportedOperationException}
|
||||
* s, if the whole tokenizer chain is not compatible eg one of the
|
||||
* <code>TokenStream</code>s does not implement the new <code>TokenStream</code> API.
|
||||
* <P>
|
||||
* The default is <code>false</code>, so there is the fallback to the old API
|
||||
* available.
|
||||
*
|
||||
* @deprecated This setting will no longer be needed in Lucene 3.0 as the old
|
||||
* API will be removed.
|
||||
*/
|
||||
public static void setOnlyUseNewAPI(boolean onlyUseNewAPI) {
|
||||
TokenStream.onlyUseNewAPI = onlyUseNewAPI;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns if only the new API is used.
|
||||
*
|
||||
* @see #setOnlyUseNewAPI
|
||||
* @deprecated This setting will no longer be needed in Lucene 3.0 as
|
||||
* the old API will be removed.
|
||||
*/
|
||||
public static boolean getOnlyUseNewAPI() {
|
||||
return onlyUseNewAPI;
|
||||
super(factory);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -293,7 +112,7 @@ public abstract class TokenStream extends AttributeSource {
|
||||
* <p>
|
||||
* This method is called for every token of a document, so an efficient
|
||||
* implementation is crucial for good performance. To avoid calls to
|
||||
* {@link #addAttribute(Class)} and {@link #getAttribute(Class)} or downcasts,
|
||||
* {@link #addAttribute(Class)} and {@link #getAttribute(Class)},
|
||||
* references to all {@link AttributeImpl}s that this stream uses should be
|
||||
* retrieved during instantiation.
|
||||
* <p>
|
||||
@ -303,25 +122,8 @@ public abstract class TokenStream extends AttributeSource {
|
||||
* {@link #incrementToken()}.
|
||||
*
|
||||
* @return false for end of stream; true otherwise
|
||||
*
|
||||
* <p>
|
||||
* <b>Note that this method will be defined abstract in Lucene
|
||||
* 3.0.</b>
|
||||
*/
|
||||
public boolean incrementToken() throws IOException {
|
||||
assert tokenWrapper != null;
|
||||
|
||||
final Token token;
|
||||
if (supportedMethods.hasReusableNext) {
|
||||
token = next(tokenWrapper.delegate);
|
||||
} else {
|
||||
assert supportedMethods.hasNext;
|
||||
token = next();
|
||||
}
|
||||
if (token == null) return false;
|
||||
tokenWrapper.delegate = token;
|
||||
return true;
|
||||
}
|
||||
public abstract boolean incrementToken() throws IOException;
|
||||
|
||||
/**
|
||||
* This method is called by the consumer after the last token has been
|
||||
@ -340,83 +142,6 @@ public abstract class TokenStream extends AttributeSource {
|
||||
// do nothing by default
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next token in the stream, or null at EOS. When possible, the
|
||||
* input Token should be used as the returned Token (this gives fastest
|
||||
* tokenization performance), but this is not required and a new Token may be
|
||||
* returned. Callers may re-use a single Token instance for successive calls
|
||||
* to this method.
|
||||
* <p>
|
||||
* This implicitly defines a "contract" between consumers (callers of this
|
||||
* method) and producers (implementations of this method that are the source
|
||||
* for tokens):
|
||||
* <ul>
|
||||
* <li>A consumer must fully consume the previously returned {@link Token}
|
||||
* before calling this method again.</li>
|
||||
* <li>A producer must call {@link Token#clear()} before setting the fields in
|
||||
* it and returning it</li>
|
||||
* </ul>
|
||||
* Also, the producer must make no assumptions about a {@link Token} after it
|
||||
* has been returned: the caller may arbitrarily change it. If the producer
|
||||
* needs to hold onto the {@link Token} for subsequent calls, it must clone()
|
||||
* it before storing it. Note that a {@link TokenFilter} is considered a
|
||||
* consumer.
|
||||
*
|
||||
* @param reusableToken a {@link Token} that may or may not be used to return;
|
||||
* this parameter should never be null (the callee is not required to
|
||||
* check for null before using it, but it is a good idea to assert that
|
||||
* it is not null.)
|
||||
* @return next {@link Token} in the stream or null if end-of-stream was hit
|
||||
* @deprecated The new {@link #incrementToken()} and {@link AttributeSource}
|
||||
* APIs should be used instead.
|
||||
*/
|
||||
public Token next(final Token reusableToken) throws IOException {
|
||||
assert reusableToken != null;
|
||||
|
||||
if (tokenWrapper == null)
|
||||
throw new UnsupportedOperationException("This TokenStream only supports the new Attributes API.");
|
||||
|
||||
if (supportedMethods.hasIncrementToken) {
|
||||
tokenWrapper.delegate = reusableToken;
|
||||
return incrementToken() ? tokenWrapper.delegate : null;
|
||||
} else {
|
||||
assert supportedMethods.hasNext;
|
||||
return next();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next {@link Token} in the stream, or null at EOS.
|
||||
*
|
||||
* @deprecated The returned Token is a "full private copy" (not re-used across
|
||||
* calls to {@link #next()}) but will be slower than calling
|
||||
* {@link #next(Token)} or using the new {@link #incrementToken()}
|
||||
* method with the new {@link AttributeSource} API.
|
||||
*/
|
||||
public Token next() throws IOException {
|
||||
if (tokenWrapper == null)
|
||||
throw new UnsupportedOperationException("This TokenStream only supports the new Attributes API.");
|
||||
|
||||
final Token nextToken;
|
||||
if (supportedMethods.hasIncrementToken) {
|
||||
final Token savedDelegate = tokenWrapper.delegate;
|
||||
tokenWrapper.delegate = new Token();
|
||||
nextToken = incrementToken() ? tokenWrapper.delegate : null;
|
||||
tokenWrapper.delegate = savedDelegate;
|
||||
} else {
|
||||
assert supportedMethods.hasReusableNext;
|
||||
nextToken = next(new Token());
|
||||
}
|
||||
|
||||
if (nextToken != null) {
|
||||
Payload p = nextToken.getPayload();
|
||||
if (p != null) {
|
||||
nextToken.setPayload((Payload) p.clone());
|
||||
}
|
||||
}
|
||||
return nextToken;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets this stream to the beginning. This is an optional operation, so
|
||||
* subclasses may or may not implement this method. {@link #reset()} is not needed for
|
||||
|
@ -1,166 +0,0 @@
|
||||
package org.apache.lucene.analysis;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.index.Payload;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
|
||||
/**
|
||||
* This class wraps a Token and supplies a single attribute instance
|
||||
* where the delegate token can be replaced.
|
||||
* @deprecated Will be removed, when old TokenStream API is removed.
|
||||
*/
|
||||
final class TokenWrapper extends AttributeImpl
|
||||
implements Cloneable, TermAttribute, TypeAttribute, PositionIncrementAttribute,
|
||||
FlagsAttribute, OffsetAttribute, PayloadAttribute {
|
||||
|
||||
Token delegate;
|
||||
|
||||
TokenWrapper() {
|
||||
this(new Token());
|
||||
}
|
||||
|
||||
TokenWrapper(Token delegate) {
|
||||
this.delegate = delegate;
|
||||
}
|
||||
|
||||
// TermAttribute:
|
||||
|
||||
public String term() {
|
||||
return delegate.term();
|
||||
}
|
||||
|
||||
public void setTermBuffer(char[] buffer, int offset, int length) {
|
||||
delegate.setTermBuffer(buffer, offset, length);
|
||||
}
|
||||
|
||||
public void setTermBuffer(String buffer) {
|
||||
delegate.setTermBuffer(buffer);
|
||||
}
|
||||
|
||||
public void setTermBuffer(String buffer, int offset, int length) {
|
||||
delegate.setTermBuffer(buffer, offset, length);
|
||||
}
|
||||
|
||||
public char[] termBuffer() {
|
||||
return delegate.termBuffer();
|
||||
}
|
||||
|
||||
public char[] resizeTermBuffer(int newSize) {
|
||||
return delegate.resizeTermBuffer(newSize);
|
||||
}
|
||||
|
||||
public int termLength() {
|
||||
return delegate.termLength();
|
||||
}
|
||||
|
||||
public void setTermLength(int length) {
|
||||
delegate.setTermLength(length);
|
||||
}
|
||||
|
||||
// TypeAttribute:
|
||||
|
||||
public String type() {
|
||||
return delegate.type();
|
||||
}
|
||||
|
||||
public void setType(String type) {
|
||||
delegate.setType(type);
|
||||
}
|
||||
|
||||
public void setPositionIncrement(int positionIncrement) {
|
||||
delegate.setPositionIncrement(positionIncrement);
|
||||
}
|
||||
|
||||
public int getPositionIncrement() {
|
||||
return delegate.getPositionIncrement();
|
||||
}
|
||||
|
||||
// FlagsAttribute
|
||||
|
||||
public int getFlags() {
|
||||
return delegate.getFlags();
|
||||
}
|
||||
|
||||
public void setFlags(int flags) {
|
||||
delegate.setFlags(flags);
|
||||
}
|
||||
|
||||
// OffsetAttribute
|
||||
|
||||
public int startOffset() {
|
||||
return delegate.startOffset();
|
||||
}
|
||||
|
||||
public void setOffset(int startOffset, int endOffset) {
|
||||
delegate.setOffset(startOffset, endOffset);
|
||||
}
|
||||
|
||||
public int endOffset() {
|
||||
return delegate.endOffset();
|
||||
}
|
||||
|
||||
// PayloadAttribute
|
||||
|
||||
public Payload getPayload() {
|
||||
return delegate.getPayload();
|
||||
}
|
||||
|
||||
public void setPayload(Payload payload) {
|
||||
delegate.setPayload(payload);
|
||||
}
|
||||
|
||||
// AttributeImpl
|
||||
|
||||
public void clear() {
|
||||
delegate.clear();
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return delegate.toString();
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
return delegate.hashCode();
|
||||
}
|
||||
|
||||
public boolean equals(Object other) {
|
||||
if (other instanceof TokenWrapper) {
|
||||
return ((TokenWrapper) other).delegate.equals(this.delegate);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public Object clone() {
|
||||
return new TokenWrapper((Token) delegate.clone());
|
||||
}
|
||||
|
||||
public void copyTo(AttributeImpl target) {
|
||||
if (target instanceof TokenWrapper) {
|
||||
((TokenWrapper) target).delegate = (Token) this.delegate.clone();
|
||||
} else {
|
||||
this.delegate.copyTo(target);
|
||||
}
|
||||
}
|
||||
}
|
@ -45,7 +45,7 @@ import org.apache.lucene.util.AttributeSource;
|
||||
* directory to your project and maintaining your own grammar-based tokenizer.
|
||||
*/
|
||||
|
||||
public class StandardTokenizer extends Tokenizer {
|
||||
public final class StandardTokenizer extends Tokenizer {
|
||||
/** A private instance of the JFlex-constructed scanner */
|
||||
private final StandardTokenizerImpl scanner;
|
||||
|
||||
@ -209,18 +209,6 @@ public class StandardTokenizer extends Tokenizer {
|
||||
offsetAtt.setOffset(finalOffset, finalOffset);
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next(final Token reusableToken) throws IOException {
|
||||
return super.next(reusableToken);
|
||||
}
|
||||
|
||||
/** @deprecated Will be removed in Lucene 3.0. This method is final, as it should
|
||||
* not be overridden. Delegates to the backwards compatibility layer. */
|
||||
public final Token next() throws IOException {
|
||||
return super.next();
|
||||
}
|
||||
|
||||
/*
|
||||
* (non-Javadoc)
|
||||
*
|
||||
|
@ -0,0 +1,111 @@
|
||||
package org.apache.lucene.analysis;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
|
||||
import java.io.StringReader;
|
||||
|
||||
public class TestISOLatin1AccentFilter extends BaseTokenStreamTestCase {
|
||||
public void testU() throws Exception {
|
||||
TokenStream stream = new WhitespaceTokenizer(new StringReader("Des mot clés À LA CHAÎNE À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï IJ Ð Ñ Ò Ó Ô Õ Ö Ø Œ Þ Ù Ú Û Ü Ý Ÿ à á â ã ä å æ ç è é ê ë ì í î ï ij ð ñ ò ó ô õ ö ø œ ß þ ù ú û ü ý ÿ fi fl"));
|
||||
ISOLatin1AccentFilter filter = new ISOLatin1AccentFilter(stream);
|
||||
TermAttribute termAtt = filter.getAttribute(TermAttribute.class);
|
||||
assertTermEquals("Des", filter, termAtt);
|
||||
assertTermEquals("mot", filter, termAtt);
|
||||
assertTermEquals("cles", filter, termAtt);
|
||||
assertTermEquals("A", filter, termAtt);
|
||||
assertTermEquals("LA", filter, termAtt);
|
||||
assertTermEquals("CHAINE", filter, termAtt);
|
||||
assertTermEquals("A", filter, termAtt);
|
||||
assertTermEquals("A", filter, termAtt);
|
||||
assertTermEquals("A", filter, termAtt);
|
||||
assertTermEquals("A", filter, termAtt);
|
||||
assertTermEquals("A", filter, termAtt);
|
||||
assertTermEquals("A", filter, termAtt);
|
||||
assertTermEquals("AE", filter, termAtt);
|
||||
assertTermEquals("C", filter, termAtt);
|
||||
assertTermEquals("E", filter, termAtt);
|
||||
assertTermEquals("E", filter, termAtt);
|
||||
assertTermEquals("E", filter, termAtt);
|
||||
assertTermEquals("E", filter, termAtt);
|
||||
assertTermEquals("I", filter, termAtt);
|
||||
assertTermEquals("I", filter, termAtt);
|
||||
assertTermEquals("I", filter, termAtt);
|
||||
assertTermEquals("I", filter, termAtt);
|
||||
assertTermEquals("IJ", filter, termAtt);
|
||||
assertTermEquals("D", filter, termAtt);
|
||||
assertTermEquals("N", filter, termAtt);
|
||||
assertTermEquals("O", filter, termAtt);
|
||||
assertTermEquals("O", filter, termAtt);
|
||||
assertTermEquals("O", filter, termAtt);
|
||||
assertTermEquals("O", filter, termAtt);
|
||||
assertTermEquals("O", filter, termAtt);
|
||||
assertTermEquals("O", filter, termAtt);
|
||||
assertTermEquals("OE", filter, termAtt);
|
||||
assertTermEquals("TH", filter, termAtt);
|
||||
assertTermEquals("U", filter, termAtt);
|
||||
assertTermEquals("U", filter, termAtt);
|
||||
assertTermEquals("U", filter, termAtt);
|
||||
assertTermEquals("U", filter, termAtt);
|
||||
assertTermEquals("Y", filter, termAtt);
|
||||
assertTermEquals("Y", filter, termAtt);
|
||||
assertTermEquals("a", filter, termAtt);
|
||||
assertTermEquals("a", filter, termAtt);
|
||||
assertTermEquals("a", filter, termAtt);
|
||||
assertTermEquals("a", filter, termAtt);
|
||||
assertTermEquals("a", filter, termAtt);
|
||||
assertTermEquals("a", filter, termAtt);
|
||||
assertTermEquals("ae", filter, termAtt);
|
||||
assertTermEquals("c", filter, termAtt);
|
||||
assertTermEquals("e", filter, termAtt);
|
||||
assertTermEquals("e", filter, termAtt);
|
||||
assertTermEquals("e", filter, termAtt);
|
||||
assertTermEquals("e", filter, termAtt);
|
||||
assertTermEquals("i", filter, termAtt);
|
||||
assertTermEquals("i", filter, termAtt);
|
||||
assertTermEquals("i", filter, termAtt);
|
||||
assertTermEquals("i", filter, termAtt);
|
||||
assertTermEquals("ij", filter, termAtt);
|
||||
assertTermEquals("d", filter, termAtt);
|
||||
assertTermEquals("n", filter, termAtt);
|
||||
assertTermEquals("o", filter, termAtt);
|
||||
assertTermEquals("o", filter, termAtt);
|
||||
assertTermEquals("o", filter, termAtt);
|
||||
assertTermEquals("o", filter, termAtt);
|
||||
assertTermEquals("o", filter, termAtt);
|
||||
assertTermEquals("o", filter, termAtt);
|
||||
assertTermEquals("oe", filter, termAtt);
|
||||
assertTermEquals("ss", filter, termAtt);
|
||||
assertTermEquals("th", filter, termAtt);
|
||||
assertTermEquals("u", filter, termAtt);
|
||||
assertTermEquals("u", filter, termAtt);
|
||||
assertTermEquals("u", filter, termAtt);
|
||||
assertTermEquals("u", filter, termAtt);
|
||||
assertTermEquals("y", filter, termAtt);
|
||||
assertTermEquals("y", filter, termAtt);
|
||||
assertTermEquals("fi", filter, termAtt);
|
||||
assertTermEquals("fl", filter, termAtt);
|
||||
assertFalse(filter.incrementToken());
|
||||
}
|
||||
|
||||
void assertTermEquals(String expected, TokenStream stream, TermAttribute termAtt) throws Exception {
|
||||
assertTrue(stream.incrementToken());
|
||||
assertEquals(expected, termAtt.term());
|
||||
}
|
||||
}
|
@ -18,8 +18,12 @@ package org.apache.lucene.analysis;
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.Payload;
|
||||
import org.apache.lucene.analysis.tokenattributes.TestSimpleAttributeImpls;
|
||||
import org.apache.lucene.analysis.tokenattributes.*;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.Attribute;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
|
||||
import java.io.StringReader;
|
||||
|
||||
public class TestToken extends LuceneTestCase {
|
||||
|
||||
@ -219,4 +223,33 @@ public class TestToken extends LuceneTestCase {
|
||||
assertEquals(pl, copy.getPayload());
|
||||
assertNotSame(pl, copy.getPayload());
|
||||
}
|
||||
|
||||
public interface SenselessAttribute extends Attribute {}
|
||||
|
||||
public static final class SenselessAttributeImpl extends AttributeImpl implements SenselessAttribute {
|
||||
public void copyTo(AttributeImpl target) {}
|
||||
public void clear() {}
|
||||
public boolean equals(Object o) { return (o instanceof SenselessAttributeImpl); }
|
||||
public int hashCode() { return 0; }
|
||||
}
|
||||
|
||||
public void testTokenAttributeFactory() throws Exception {
|
||||
TokenStream ts = new WhitespaceTokenizer(Token.TOKEN_ATTRIBUTE_FACTORY, new StringReader("foo bar"));
|
||||
|
||||
assertTrue("TypeAttribute is not implemented by SenselessAttributeImpl",
|
||||
ts.addAttribute(SenselessAttribute.class) instanceof SenselessAttributeImpl);
|
||||
|
||||
assertTrue("TermAttribute is not implemented by Token",
|
||||
ts.addAttribute(TermAttribute.class) instanceof Token);
|
||||
assertTrue("OffsetAttribute is not implemented by Token",
|
||||
ts.addAttribute(OffsetAttribute.class) instanceof Token);
|
||||
assertTrue("FlagsAttribute is not implemented by Token",
|
||||
ts.addAttribute(FlagsAttribute.class) instanceof Token);
|
||||
assertTrue("PayloadAttribute is not implemented by Token",
|
||||
ts.addAttribute(PayloadAttribute.class) instanceof Token);
|
||||
assertTrue("PositionIncrementAttribute is not implemented by Token",
|
||||
ts.addAttribute(PositionIncrementAttribute.class) instanceof Token);
|
||||
assertTrue("TypeAttribute is not implemented by Token",
|
||||
ts.addAttribute(TypeAttribute.class) instanceof Token);
|
||||
}
|
||||
}
|
||||
|
@ -125,4 +125,20 @@ public class TestAttributeSource extends LuceneTestCase {
|
||||
assertEquals("Token should only printed once", "("+tok.toString()+")", src.toString());
|
||||
}
|
||||
|
||||
public void testDefaultAttributeFactory() throws Exception {
|
||||
AttributeSource src = new AttributeSource();
|
||||
|
||||
assertTrue("TermAttribute is not implemented by TermAttributeImpl",
|
||||
src.addAttribute(TermAttribute.class) instanceof TermAttributeImpl);
|
||||
assertTrue("OffsetAttribute is not implemented by OffsetAttributeImpl",
|
||||
src.addAttribute(OffsetAttribute.class) instanceof OffsetAttributeImpl);
|
||||
assertTrue("FlagsAttribute is not implemented by FlagsAttributeImpl",
|
||||
src.addAttribute(FlagsAttribute.class) instanceof FlagsAttributeImpl);
|
||||
assertTrue("PayloadAttribute is not implemented by PayloadAttributeImpl",
|
||||
src.addAttribute(PayloadAttribute.class) instanceof PayloadAttributeImpl);
|
||||
assertTrue("PositionIncrementAttribute is not implemented by PositionIncrementAttributeImpl",
|
||||
src.addAttribute(PositionIncrementAttribute.class) instanceof PositionIncrementAttributeImpl);
|
||||
assertTrue("TypeAttribute is not implemented by TypeAttributeImpl",
|
||||
src.addAttribute(TypeAttribute.class) instanceof TypeAttributeImpl);
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user