mirror of https://github.com/apache/lucene.git
LUCENE-6000: Removed StandardTokenizerInterface
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1630292 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d5191d5674
commit
947a05c9c1
|
@ -156,6 +156,10 @@ API Changes
|
||||||
* LUCENE-5998: Simplify Field/SegmentInfoFormat to read+write methods.
|
* LUCENE-5998: Simplify Field/SegmentInfoFormat to read+write methods.
|
||||||
(Robert Muir)
|
(Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-6000: Removed StandardTokenizerInterface. Tokenizers now use
|
||||||
|
their jflex impl directly.
|
||||||
|
(Ryan Ernst)
|
||||||
|
|
||||||
Bug Fixes
|
Bug Fixes
|
||||||
|
|
||||||
* LUCENE-5650: Enforce read-only access to any path outside the temporary
|
* LUCENE-5650: Enforce read-only access to any path outside the temporary
|
||||||
|
|
|
@ -49,7 +49,7 @@ import org.apache.lucene.util.AttributeFactory;
|
||||||
|
|
||||||
public final class ClassicTokenizer extends Tokenizer {
|
public final class ClassicTokenizer extends Tokenizer {
|
||||||
/** A private instance of the JFlex-constructed scanner */
|
/** A private instance of the JFlex-constructed scanner */
|
||||||
private StandardTokenizerInterface scanner;
|
private ClassicTokenizerImpl scanner;
|
||||||
|
|
||||||
public static final int ALPHANUM = 0;
|
public static final int ALPHANUM = 0;
|
||||||
public static final int APOSTROPHE = 1;
|
public static final int APOSTROPHE = 1;
|
||||||
|
@ -135,7 +135,7 @@ public final class ClassicTokenizer extends Tokenizer {
|
||||||
while(true) {
|
while(true) {
|
||||||
int tokenType = scanner.getNextToken();
|
int tokenType = scanner.getNextToken();
|
||||||
|
|
||||||
if (tokenType == StandardTokenizerInterface.YYEOF) {
|
if (tokenType == ClassicTokenizerImpl.YYEOF) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -33,7 +33,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
* This class implements the classic lucene StandardTokenizer up until 3.0
|
* This class implements the classic lucene StandardTokenizer up until 3.0
|
||||||
*/
|
*/
|
||||||
|
|
||||||
class ClassicTokenizerImpl implements StandardTokenizerInterface {
|
class ClassicTokenizerImpl {
|
||||||
|
|
||||||
/** This character denotes the end of file */
|
/** This character denotes the end of file */
|
||||||
public static final int YYEOF = -1;
|
public static final int YYEOF = -1;
|
||||||
|
|
|
@ -33,7 +33,6 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
%%
|
%%
|
||||||
|
|
||||||
%class ClassicTokenizerImpl
|
%class ClassicTokenizerImpl
|
||||||
%implements StandardTokenizerInterface
|
|
||||||
%unicode 3.0
|
%unicode 3.0
|
||||||
%integer
|
%integer
|
||||||
%function getNextToken
|
%function getNextToken
|
||||||
|
|
|
@ -39,7 +39,7 @@ import org.apache.lucene.util.AttributeFactory;
|
||||||
|
|
||||||
public final class StandardTokenizer extends Tokenizer {
|
public final class StandardTokenizer extends Tokenizer {
|
||||||
/** A private instance of the JFlex-constructed scanner */
|
/** A private instance of the JFlex-constructed scanner */
|
||||||
private StandardTokenizerInterface scanner;
|
private StandardTokenizerImpl scanner;
|
||||||
|
|
||||||
// TODO: how can we remove these old types?!
|
// TODO: how can we remove these old types?!
|
||||||
public static final int ALPHANUM = 0;
|
public static final int ALPHANUM = 0;
|
||||||
|
@ -150,7 +150,7 @@ public final class StandardTokenizer extends Tokenizer {
|
||||||
while(true) {
|
while(true) {
|
||||||
int tokenType = scanner.getNextToken();
|
int tokenType = scanner.getNextToken();
|
||||||
|
|
||||||
if (tokenType == StandardTokenizerInterface.YYEOF) {
|
if (tokenType == StandardTokenizerImpl.YYEOF) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -39,7 +39,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
* </ul>
|
* </ul>
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public final class StandardTokenizerImpl implements StandardTokenizerInterface {
|
public final class StandardTokenizerImpl {
|
||||||
|
|
||||||
/** This character denotes the end of file */
|
/** This character denotes the end of file */
|
||||||
public static final int YYEOF = -1;
|
public static final int YYEOF = -1;
|
||||||
|
@ -804,7 +804,7 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface {
|
||||||
if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
|
if (zzInput == YYEOF && zzStartRead == zzCurrentPos) {
|
||||||
zzAtEOF = true;
|
zzAtEOF = true;
|
||||||
{
|
{
|
||||||
return StandardTokenizerInterface.YYEOF;
|
return YYEOF;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
|
|
@ -43,7 +43,6 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
%final
|
%final
|
||||||
%public
|
%public
|
||||||
%class StandardTokenizerImpl
|
%class StandardTokenizerImpl
|
||||||
%implements StandardTokenizerInterface
|
|
||||||
%function getNextToken
|
%function getNextToken
|
||||||
%char
|
%char
|
||||||
%buffer 255
|
%buffer 255
|
||||||
|
@ -118,7 +117,7 @@ ComplexContextEx = \p{LB:Complex_Context}
|
||||||
// UAX#29 WB1. sot ÷
|
// UAX#29 WB1. sot ÷
|
||||||
// WB2. ÷ eot
|
// WB2. ÷ eot
|
||||||
//
|
//
|
||||||
<<EOF>> { return StandardTokenizerInterface.YYEOF; }
|
<<EOF>> { return YYEOF; }
|
||||||
|
|
||||||
// UAX#29 WB8. Numeric × Numeric
|
// UAX#29 WB8. Numeric × Numeric
|
||||||
// WB11. Numeric (MidNum | MidNumLet | Single_Quote) × Numeric
|
// WB11. Numeric (MidNum | MidNumLet | Single_Quote) × Numeric
|
||||||
|
|
|
@ -1,74 +0,0 @@
|
||||||
package org.apache.lucene.analysis.standard;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|
||||||
|
|
||||||
import java.io.Reader;
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Internal interface for supporting versioned grammars.
|
|
||||||
* @lucene.internal
|
|
||||||
*/
|
|
||||||
public interface StandardTokenizerInterface {
|
|
||||||
|
|
||||||
/** This token type, as returned from {@link #getNextToken()}, denotes the end of file */
|
|
||||||
public static final int YYEOF = -1;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Copies the matched text into the CharTermAttribute
|
|
||||||
*/
|
|
||||||
public void getText(CharTermAttribute t);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the current position.
|
|
||||||
*/
|
|
||||||
public int yychar();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Resets the scanner to read from a new input stream.
|
|
||||||
* Does not close the old reader.
|
|
||||||
*
|
|
||||||
* All internal variables are reset, the old input stream
|
|
||||||
* <b>cannot</b> be reused (internal buffer is discarded and lost).
|
|
||||||
* Lexical state is set to <tt>ZZ_INITIAL</tt>.
|
|
||||||
*
|
|
||||||
* @param reader the new input stream
|
|
||||||
*/
|
|
||||||
public void yyreset(Reader reader);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the length of the matched text region.
|
|
||||||
*/
|
|
||||||
public int yylength();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Resumes scanning until the next regular expression is matched,
|
|
||||||
* the end of input is encountered or an I/O-Error occurs.
|
|
||||||
*
|
|
||||||
* @return the next token, {@link #YYEOF} on end of stream
|
|
||||||
* @exception IOException if any I/O-Error occurs
|
|
||||||
*/
|
|
||||||
public int getNextToken() throws IOException;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sets the scanner buffer size in chars
|
|
||||||
*/
|
|
||||||
public void setBufferSize(int numChars);
|
|
||||||
}
|
|
|
@ -47,7 +47,7 @@ import org.apache.lucene.util.AttributeFactory;
|
||||||
|
|
||||||
public final class UAX29URLEmailTokenizer extends Tokenizer {
|
public final class UAX29URLEmailTokenizer extends Tokenizer {
|
||||||
/** A private instance of the JFlex-constructed scanner */
|
/** A private instance of the JFlex-constructed scanner */
|
||||||
private final StandardTokenizerInterface scanner;
|
private final UAX29URLEmailTokenizerImpl scanner;
|
||||||
|
|
||||||
public static final int ALPHANUM = 0;
|
public static final int ALPHANUM = 0;
|
||||||
public static final int NUM = 1;
|
public static final int NUM = 1;
|
||||||
|
@ -108,7 +108,7 @@ public final class UAX29URLEmailTokenizer extends Tokenizer {
|
||||||
this.scanner = getScanner();
|
this.scanner = getScanner();
|
||||||
}
|
}
|
||||||
|
|
||||||
private StandardTokenizerInterface getScanner() {
|
private UAX29URLEmailTokenizerImpl getScanner() {
|
||||||
return new UAX29URLEmailTokenizerImpl(input);
|
return new UAX29URLEmailTokenizerImpl(input);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -127,7 +127,7 @@ public final class UAX29URLEmailTokenizer extends Tokenizer {
|
||||||
while(true) {
|
while(true) {
|
||||||
int tokenType = scanner.getNextToken();
|
int tokenType = scanner.getNextToken();
|
||||||
|
|
||||||
if (tokenType == StandardTokenizerInterface.YYEOF) {
|
if (tokenType == UAX29URLEmailTokenizerImpl.YYEOF) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -42,7 +42,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
* </ul>
|
* </ul>
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public final class UAX29URLEmailTokenizerImpl implements StandardTokenizerInterface {
|
public final class UAX29URLEmailTokenizerImpl {
|
||||||
|
|
||||||
/** This character denotes the end of file */
|
/** This character denotes the end of file */
|
||||||
public static final int YYEOF = -1;
|
public static final int YYEOF = -1;
|
||||||
|
@ -7204,11 +7204,11 @@ public final class UAX29URLEmailTokenizerImpl implements StandardTokenizerInterf
|
||||||
zzAtEOF = true;
|
zzAtEOF = true;
|
||||||
switch (zzLexicalState) {
|
switch (zzLexicalState) {
|
||||||
case YYINITIAL: {
|
case YYINITIAL: {
|
||||||
return StandardTokenizerInterface.YYEOF;
|
return YYEOF;
|
||||||
}
|
}
|
||||||
case 2910: break;
|
case 2910: break;
|
||||||
case AVOID_BAD_URL: {
|
case AVOID_BAD_URL: {
|
||||||
return StandardTokenizerInterface.YYEOF;
|
return YYEOF;
|
||||||
}
|
}
|
||||||
case 2911: break;
|
case 2911: break;
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -46,7 +46,6 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||||
%final
|
%final
|
||||||
%public
|
%public
|
||||||
%class UAX29URLEmailTokenizerImpl
|
%class UAX29URLEmailTokenizerImpl
|
||||||
%implements StandardTokenizerInterface
|
|
||||||
%function getNextToken
|
%function getNextToken
|
||||||
%char
|
%char
|
||||||
%xstate AVOID_BAD_URL
|
%xstate AVOID_BAD_URL
|
||||||
|
@ -208,7 +207,7 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
|
||||||
// UAX#29 WB1. sot ÷
|
// UAX#29 WB1. sot ÷
|
||||||
// WB2. ÷ eot
|
// WB2. ÷ eot
|
||||||
//
|
//
|
||||||
<<EOF>> { return StandardTokenizerInterface.YYEOF; }
|
<<EOF>> { return YYEOF; }
|
||||||
|
|
||||||
{URL} { yybegin(YYINITIAL); return URL_TYPE; }
|
{URL} { yybegin(YYINITIAL); return URL_TYPE; }
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue