From 947a05c9c1adf390ed2281ce2cddbc16c475252a Mon Sep 17 00:00:00 2001 From: Ryan Ernst Date: Thu, 9 Oct 2014 05:58:07 +0000 Subject: [PATCH] LUCENE-6000: Removed StandardTokenizerInterface git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1630292 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 4 + .../analysis/standard/ClassicTokenizer.java | 4 +- .../standard/ClassicTokenizerImpl.java | 2 +- .../standard/ClassicTokenizerImpl.jflex | 1 - .../analysis/standard/StandardTokenizer.java | 4 +- .../standard/StandardTokenizerImpl.java | 4 +- .../standard/StandardTokenizerImpl.jflex | 3 +- .../standard/StandardTokenizerInterface.java | 74 ------------------- .../standard/UAX29URLEmailTokenizer.java | 6 +- .../standard/UAX29URLEmailTokenizerImpl.java | 6 +- .../standard/UAX29URLEmailTokenizerImpl.jflex | 3 +- 11 files changed, 19 insertions(+), 92 deletions(-) delete mode 100644 lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerInterface.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 8f064ed4d38..b05028e188e 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -156,6 +156,10 @@ API Changes * LUCENE-5998: Simplify Field/SegmentInfoFormat to read+write methods. (Robert Muir) +* LUCENE-6000: Removed StandardTokenizerInterface. Tokenizers now use + their jflex impl directly. + (Ryan Ernst) + Bug Fixes * LUCENE-5650: Enforce read-only access to any path outside the temporary diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java index 118a41cb8b0..339ab8bb116 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizer.java @@ -49,7 +49,7 @@ import org.apache.lucene.util.AttributeFactory; public final class ClassicTokenizer extends Tokenizer { /** A private instance of the JFlex-constructed scanner */ - private StandardTokenizerInterface scanner; + private ClassicTokenizerImpl scanner; public static final int ALPHANUM = 0; public static final int APOSTROPHE = 1; @@ -135,7 +135,7 @@ public final class ClassicTokenizer extends Tokenizer { while(true) { int tokenType = scanner.getNextToken(); - if (tokenType == StandardTokenizerInterface.YYEOF) { + if (tokenType == ClassicTokenizerImpl.YYEOF) { return false; } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java index 86303ea2e93..a457868c7ac 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java @@ -33,7 +33,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; * This class implements the classic lucene StandardTokenizer up until 3.0 */ -class ClassicTokenizerImpl implements StandardTokenizerInterface { +class ClassicTokenizerImpl { /** This character denotes the end of file */ public static final int YYEOF = -1; diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex index 5dc5eab3727..6c22b7dc462 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex @@ -33,7 +33,6 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; %% %class ClassicTokenizerImpl -%implements StandardTokenizerInterface %unicode 3.0 %integer %function getNextToken diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java index fe145f5d75b..2585e924301 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizer.java @@ -39,7 +39,7 @@ import org.apache.lucene.util.AttributeFactory; public final class StandardTokenizer extends Tokenizer { /** A private instance of the JFlex-constructed scanner */ - private StandardTokenizerInterface scanner; + private StandardTokenizerImpl scanner; // TODO: how can we remove these old types?! public static final int ALPHANUM = 0; @@ -150,7 +150,7 @@ public final class StandardTokenizer extends Tokenizer { while(true) { int tokenType = scanner.getNextToken(); - if (tokenType == StandardTokenizerInterface.YYEOF) { + if (tokenType == StandardTokenizerImpl.YYEOF) { return false; } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java index 0e47d9b9d58..3fa948e72e8 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java @@ -39,7 +39,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; * */ -public final class StandardTokenizerImpl implements StandardTokenizerInterface { +public final class StandardTokenizerImpl { /** This character denotes the end of file */ public static final int YYEOF = -1; @@ -804,7 +804,7 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface { if (zzInput == YYEOF && zzStartRead == zzCurrentPos) { zzAtEOF = true; { - return StandardTokenizerInterface.YYEOF; + return YYEOF; } } else { diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex index 5318ef46378..d99b17d51d5 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex @@ -43,7 +43,6 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; %final %public %class StandardTokenizerImpl -%implements StandardTokenizerInterface %function getNextToken %char %buffer 255 @@ -118,7 +117,7 @@ ComplexContextEx = \p{LB:Complex_Context} // UAX#29 WB1. sot ÷ // WB2. ÷ eot // -<> { return StandardTokenizerInterface.YYEOF; } +<> { return YYEOF; } // UAX#29 WB8. Numeric × Numeric // WB11. Numeric (MidNum | MidNumLet | Single_Quote) × Numeric diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerInterface.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerInterface.java deleted file mode 100644 index 9c199fe228c..00000000000 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerInterface.java +++ /dev/null @@ -1,74 +0,0 @@ -package org.apache.lucene.analysis.standard; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; - -import java.io.Reader; -import java.io.IOException; - -/** - * Internal interface for supporting versioned grammars. - * @lucene.internal - */ -public interface StandardTokenizerInterface { - - /** This token type, as returned from {@link #getNextToken()}, denotes the end of file */ - public static final int YYEOF = -1; - - /** - * Copies the matched text into the CharTermAttribute - */ - public void getText(CharTermAttribute t); - - /** - * Returns the current position. - */ - public int yychar(); - - /** - * Resets the scanner to read from a new input stream. - * Does not close the old reader. - * - * All internal variables are reset, the old input stream - * cannot be reused (internal buffer is discarded and lost). - * Lexical state is set to ZZ_INITIAL. - * - * @param reader the new input stream - */ - public void yyreset(Reader reader); - - /** - * Returns the length of the matched text region. - */ - public int yylength(); - - /** - * Resumes scanning until the next regular expression is matched, - * the end of input is encountered or an I/O-Error occurs. - * - * @return the next token, {@link #YYEOF} on end of stream - * @exception IOException if any I/O-Error occurs - */ - public int getNextToken() throws IOException; - - /** - * Sets the scanner buffer size in chars - */ - public void setBufferSize(int numChars); -} diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java index 704efc94194..1d1c944d7f0 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizer.java @@ -47,7 +47,7 @@ import org.apache.lucene.util.AttributeFactory; public final class UAX29URLEmailTokenizer extends Tokenizer { /** A private instance of the JFlex-constructed scanner */ - private final StandardTokenizerInterface scanner; + private final UAX29URLEmailTokenizerImpl scanner; public static final int ALPHANUM = 0; public static final int NUM = 1; @@ -108,7 +108,7 @@ public final class UAX29URLEmailTokenizer extends Tokenizer { this.scanner = getScanner(); } - private StandardTokenizerInterface getScanner() { + private UAX29URLEmailTokenizerImpl getScanner() { return new UAX29URLEmailTokenizerImpl(input); } @@ -127,7 +127,7 @@ public final class UAX29URLEmailTokenizer extends Tokenizer { while(true) { int tokenType = scanner.getNextToken(); - if (tokenType == StandardTokenizerInterface.YYEOF) { + if (tokenType == UAX29URLEmailTokenizerImpl.YYEOF) { return false; } diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java index 96bc6c2320a..e0a43fbf392 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java @@ -42,7 +42,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; * */ -public final class UAX29URLEmailTokenizerImpl implements StandardTokenizerInterface { +public final class UAX29URLEmailTokenizerImpl { /** This character denotes the end of file */ public static final int YYEOF = -1; @@ -7204,11 +7204,11 @@ public final class UAX29URLEmailTokenizerImpl implements StandardTokenizerInterf zzAtEOF = true; switch (zzLexicalState) { case YYINITIAL: { - return StandardTokenizerInterface.YYEOF; + return YYEOF; } case 2910: break; case AVOID_BAD_URL: { - return StandardTokenizerInterface.YYEOF; + return YYEOF; } case 2911: break; default: diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex index 0da77a7cfd7..2aef7248535 100644 --- a/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex +++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex @@ -46,7 +46,6 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; %final %public %class UAX29URLEmailTokenizerImpl -%implements StandardTokenizerInterface %function getNextToken %char %xstate AVOID_BAD_URL @@ -208,7 +207,7 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost}) // UAX#29 WB1. sot ÷ // WB2. ÷ eot // - <> { return StandardTokenizerInterface.YYEOF; } + <> { return YYEOF; } {URL} { yybegin(YYINITIAL); return URL_TYPE; }