LUCENE-6000: Removed StandardTokenizerInterface

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1630292 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Ryan Ernst 2014-10-09 05:58:07 +00:00
parent d5191d5674
commit 947a05c9c1
11 changed files with 19 additions and 92 deletions

View File

@ -156,6 +156,10 @@ API Changes
* LUCENE-5998: Simplify Field/SegmentInfoFormat to read+write methods. * LUCENE-5998: Simplify Field/SegmentInfoFormat to read+write methods.
(Robert Muir) (Robert Muir)
* LUCENE-6000: Removed StandardTokenizerInterface. Tokenizers now use
their jflex impl directly.
(Ryan Ernst)
Bug Fixes Bug Fixes
* LUCENE-5650: Enforce read-only access to any path outside the temporary * LUCENE-5650: Enforce read-only access to any path outside the temporary

View File

@ -49,7 +49,7 @@ import org.apache.lucene.util.AttributeFactory;
public final class ClassicTokenizer extends Tokenizer { public final class ClassicTokenizer extends Tokenizer {
/** A private instance of the JFlex-constructed scanner */ /** A private instance of the JFlex-constructed scanner */
private StandardTokenizerInterface scanner; private ClassicTokenizerImpl scanner;
public static final int ALPHANUM = 0; public static final int ALPHANUM = 0;
public static final int APOSTROPHE = 1; public static final int APOSTROPHE = 1;
@ -135,7 +135,7 @@ public final class ClassicTokenizer extends Tokenizer {
while(true) { while(true) {
int tokenType = scanner.getNextToken(); int tokenType = scanner.getNextToken();
if (tokenType == StandardTokenizerInterface.YYEOF) { if (tokenType == ClassicTokenizerImpl.YYEOF) {
return false; return false;
} }

View File

@ -33,7 +33,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
* This class implements the classic lucene StandardTokenizer up until 3.0 * This class implements the classic lucene StandardTokenizer up until 3.0
*/ */
class ClassicTokenizerImpl implements StandardTokenizerInterface { class ClassicTokenizerImpl {
/** This character denotes the end of file */ /** This character denotes the end of file */
public static final int YYEOF = -1; public static final int YYEOF = -1;

View File

@ -33,7 +33,6 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
%% %%
%class ClassicTokenizerImpl %class ClassicTokenizerImpl
%implements StandardTokenizerInterface
%unicode 3.0 %unicode 3.0
%integer %integer
%function getNextToken %function getNextToken

View File

@ -39,7 +39,7 @@ import org.apache.lucene.util.AttributeFactory;
public final class StandardTokenizer extends Tokenizer { public final class StandardTokenizer extends Tokenizer {
/** A private instance of the JFlex-constructed scanner */ /** A private instance of the JFlex-constructed scanner */
private StandardTokenizerInterface scanner; private StandardTokenizerImpl scanner;
// TODO: how can we remove these old types?! // TODO: how can we remove these old types?!
public static final int ALPHANUM = 0; public static final int ALPHANUM = 0;
@ -150,7 +150,7 @@ public final class StandardTokenizer extends Tokenizer {
while(true) { while(true) {
int tokenType = scanner.getNextToken(); int tokenType = scanner.getNextToken();
if (tokenType == StandardTokenizerInterface.YYEOF) { if (tokenType == StandardTokenizerImpl.YYEOF) {
return false; return false;
} }

View File

@ -39,7 +39,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
* </ul> * </ul>
*/ */
public final class StandardTokenizerImpl implements StandardTokenizerInterface { public final class StandardTokenizerImpl {
/** This character denotes the end of file */ /** This character denotes the end of file */
public static final int YYEOF = -1; public static final int YYEOF = -1;
@ -804,7 +804,7 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface {
if (zzInput == YYEOF && zzStartRead == zzCurrentPos) { if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
zzAtEOF = true; zzAtEOF = true;
{ {
return StandardTokenizerInterface.YYEOF; return YYEOF;
} }
} }
else { else {

View File

@ -43,7 +43,6 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
%final %final
%public %public
%class StandardTokenizerImpl %class StandardTokenizerImpl
%implements StandardTokenizerInterface
%function getNextToken %function getNextToken
%char %char
%buffer 255 %buffer 255
@ -118,7 +117,7 @@ ComplexContextEx = \p{LB:Complex_Context}
// UAX#29 WB1. sot ÷ // UAX#29 WB1. sot ÷
// WB2. ÷ eot // WB2. ÷ eot
// //
<<EOF>> { return StandardTokenizerInterface.YYEOF; } <<EOF>> { return YYEOF; }
// UAX#29 WB8. Numeric × Numeric // UAX#29 WB8. Numeric × Numeric
// WB11. Numeric (MidNum | MidNumLet | Single_Quote) × Numeric // WB11. Numeric (MidNum | MidNumLet | Single_Quote) × Numeric

View File

@ -1,74 +0,0 @@
package org.apache.lucene.analysis.standard;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import java.io.Reader;
import java.io.IOException;
/**
* Internal interface for supporting versioned grammars.
* @lucene.internal
*/
public interface StandardTokenizerInterface {
/** This token type, as returned from {@link #getNextToken()}, denotes the end of file */
public static final int YYEOF = -1;
/**
* Copies the matched text into the CharTermAttribute
*/
public void getText(CharTermAttribute t);
/**
* Returns the current position.
*/
public int yychar();
/**
* Resets the scanner to read from a new input stream.
* Does not close the old reader.
*
* All internal variables are reset, the old input stream
* <b>cannot</b> be reused (internal buffer is discarded and lost).
* Lexical state is set to <tt>ZZ_INITIAL</tt>.
*
* @param reader the new input stream
*/
public void yyreset(Reader reader);
/**
* Returns the length of the matched text region.
*/
public int yylength();
/**
* Resumes scanning until the next regular expression is matched,
* the end of input is encountered or an I/O-Error occurs.
*
* @return the next token, {@link #YYEOF} on end of stream
* @exception IOException if any I/O-Error occurs
*/
public int getNextToken() throws IOException;
/**
* Sets the scanner buffer size in chars
*/
public void setBufferSize(int numChars);
}

View File

@ -47,7 +47,7 @@ import org.apache.lucene.util.AttributeFactory;
public final class UAX29URLEmailTokenizer extends Tokenizer { public final class UAX29URLEmailTokenizer extends Tokenizer {
/** A private instance of the JFlex-constructed scanner */ /** A private instance of the JFlex-constructed scanner */
private final StandardTokenizerInterface scanner; private final UAX29URLEmailTokenizerImpl scanner;
public static final int ALPHANUM = 0; public static final int ALPHANUM = 0;
public static final int NUM = 1; public static final int NUM = 1;
@ -108,7 +108,7 @@ public final class UAX29URLEmailTokenizer extends Tokenizer {
this.scanner = getScanner(); this.scanner = getScanner();
} }
private StandardTokenizerInterface getScanner() { private UAX29URLEmailTokenizerImpl getScanner() {
return new UAX29URLEmailTokenizerImpl(input); return new UAX29URLEmailTokenizerImpl(input);
} }
@ -127,7 +127,7 @@ public final class UAX29URLEmailTokenizer extends Tokenizer {
while(true) { while(true) {
int tokenType = scanner.getNextToken(); int tokenType = scanner.getNextToken();
if (tokenType == StandardTokenizerInterface.YYEOF) { if (tokenType == UAX29URLEmailTokenizerImpl.YYEOF) {
return false; return false;
} }

View File

@ -42,7 +42,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
* </ul> * </ul>
*/ */
public final class UAX29URLEmailTokenizerImpl implements StandardTokenizerInterface { public final class UAX29URLEmailTokenizerImpl {
/** This character denotes the end of file */ /** This character denotes the end of file */
public static final int YYEOF = -1; public static final int YYEOF = -1;
@ -7204,11 +7204,11 @@ public final class UAX29URLEmailTokenizerImpl implements StandardTokenizerInterf
zzAtEOF = true; zzAtEOF = true;
switch (zzLexicalState) { switch (zzLexicalState) {
case YYINITIAL: { case YYINITIAL: {
return StandardTokenizerInterface.YYEOF; return YYEOF;
} }
case 2910: break; case 2910: break;
case AVOID_BAD_URL: { case AVOID_BAD_URL: {
return StandardTokenizerInterface.YYEOF; return YYEOF;
} }
case 2911: break; case 2911: break;
default: default:

View File

@ -46,7 +46,6 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
%final %final
%public %public
%class UAX29URLEmailTokenizerImpl %class UAX29URLEmailTokenizerImpl
%implements StandardTokenizerInterface
%function getNextToken %function getNextToken
%char %char
%xstate AVOID_BAD_URL %xstate AVOID_BAD_URL
@ -208,7 +207,7 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
// UAX#29 WB1. sot ÷ // UAX#29 WB1. sot ÷
// WB2. ÷ eot // WB2. ÷ eot
// //
<<EOF>> { return StandardTokenizerInterface.YYEOF; } <<EOF>> { return YYEOF; }
{URL} { yybegin(YYINITIAL); return URL_TYPE; } {URL} { yybegin(YYINITIAL); return URL_TYPE; }