LUCENE-1150: make StandardAnalyzer tokenizer constants public again (public access was accidentally removed with LUCENE-966)

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@616248 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2008-01-29 10:51:44 +00:00
parent 66b5df6788
commit 38f8d4e837
9 changed files with 115 additions and 63 deletions

View File

@ -12,6 +12,10 @@ API Changes
pre-existing constructors; these will be removed in release 3.0.
(Steven Rowe via Mike McCandless)
2. LUCENE-1150: Re-expose StandardTokenizer's constants publicly;
this was accidentally lost with LUCENE-966. (Nicolas Lalevée via
Mike McCandless)
Bug fixes
New features

View File

@ -46,6 +46,25 @@ public class WikipediaTokenizer extends Tokenizer {
public static final String HEADING = "h";
public static final String SUB_HEADING = "sh";
public static final int ALPHANUM_ID = 0;
public static final int APOSTROPHE_ID = 1;
public static final int ACRONYM_ID = 2;
public static final int COMPANY_ID = 3;
public static final int EMAIL_ID = 4;
public static final int HOST_ID = 5;
public static final int NUM_ID = 6;
public static final int CJ_ID = 7;
public static final int INTERNAL_LINK_ID = 8;
public static final int EXTERNAL_LINK_ID = 9;
public static final int CITATION_ID = 10;
public static final int CATEGORY_ID = 11;
public static final int BOLD_ID = 12;
public static final int ITALICS_ID = 13;
public static final int BOLD_ITALICS_ID = 14;
public static final int HEADING_ID = 15;
public static final int SUB_HEADING_ID = 16;
public static final int EXTERNAL_LINK_URL_ID = 17;
public static final int TOKENS_ONLY = 0;
public static final int UNTOKENIZED_ONLY = 1;
public static final int BOTH = 2;

View File

@ -1,4 +1,4 @@
/* The following code was generated by JFlex 1.4.1 on 1/16/08 10:31 AM */
/* The following code was generated by JFlex 1.4.1 on 1/25/08 6:54 PM */
package org.apache.lucene.wikipedia.analysis;
@ -25,8 +25,8 @@ import org.apache.lucene.analysis.Token;
/**
* This class is a scanner generated by
* <a href="http://www.jflex.de/">JFlex</a> 1.4.1
* on 1/16/08 10:31 AM from the specification file
* <tt>/Volumes/User/grantingersoll/projects/lucene/Lucene-Trunk/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerImpl.jflex</tt>
* on 1/25/08 6:54 PM from the specification file
* <tt>/tango/mike/src/lucene.clean/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerImpl.jflex</tt>
*/
class WikipediaTokenizerImpl {
@ -425,24 +425,24 @@ class WikipediaTokenizerImpl {
/* user code: */
public static final int ALPHANUM = 0;
public static final int APOSTROPHE = 1;
public static final int ACRONYM = 2;
public static final int COMPANY = 3;
public static final int EMAIL = 4;
public static final int HOST = 5;
public static final int NUM = 6;
public static final int CJ = 7;
public static final int INTERNAL_LINK = 8;
public static final int EXTERNAL_LINK = 9;
public static final int CITATION = 10;
public static final int CATEGORY = 11;
public static final int BOLD = 12;
public static final int ITALICS = 13;
public static final int BOLD_ITALICS = 14;
public static final int HEADING = 15;
public static final int SUB_HEADING = 16;
public static final int EXTERNAL_LINK_URL = 17;
public static final int ALPHANUM = WikipediaTokenizer.ALPHANUM_ID;
public static final int APOSTROPHE = WikipediaTokenizer.APOSTROPHE_ID;
public static final int ACRONYM = WikipediaTokenizer.ACRONYM_ID;
public static final int COMPANY = WikipediaTokenizer.COMPANY_ID;
public static final int EMAIL = WikipediaTokenizer.EMAIL_ID;
public static final int HOST = WikipediaTokenizer.HOST_ID;
public static final int NUM = WikipediaTokenizer.NUM_ID;
public static final int CJ = WikipediaTokenizer.CJ_ID;
public static final int INTERNAL_LINK = WikipediaTokenizer.INTERNAL_LINK_ID;
public static final int EXTERNAL_LINK = WikipediaTokenizer.EXTERNAL_LINK_ID;
public static final int CITATION = WikipediaTokenizer.CITATION_ID;
public static final int CATEGORY = WikipediaTokenizer.CATEGORY_ID;
public static final int BOLD = WikipediaTokenizer.BOLD_ID;
public static final int ITALICS = WikipediaTokenizer.ITALICS_ID;
public static final int BOLD_ITALICS = WikipediaTokenizer.BOLD_ITALICS_ID;
public static final int HEADING = WikipediaTokenizer.HEADING_ID;
public static final int SUB_HEADING = WikipediaTokenizer.SUB_HEADING_ID;
public static final int EXTERNAL_LINK_URL = WikipediaTokenizer.EXTERNAL_LINK_URL_ID;
private int currentTokType;

View File

@ -30,24 +30,24 @@ import org.apache.lucene.analysis.Token;
%{
public static final int ALPHANUM = 0;
public static final int APOSTROPHE = 1;
public static final int ACRONYM = 2;
public static final int COMPANY = 3;
public static final int EMAIL = 4;
public static final int HOST = 5;
public static final int NUM = 6;
public static final int CJ = 7;
public static final int INTERNAL_LINK = 8;
public static final int EXTERNAL_LINK = 9;
public static final int CITATION = 10;
public static final int CATEGORY = 11;
public static final int BOLD = 12;
public static final int ITALICS = 13;
public static final int BOLD_ITALICS = 14;
public static final int HEADING = 15;
public static final int SUB_HEADING = 16;
public static final int EXTERNAL_LINK_URL = 17;
public static final int ALPHANUM = WikipediaTokenizer.ALPHANUM_ID;
public static final int APOSTROPHE = WikipediaTokenizer.APOSTROPHE_ID;
public static final int ACRONYM = WikipediaTokenizer.ACRONYM_ID;
public static final int COMPANY = WikipediaTokenizer.COMPANY_ID;
public static final int EMAIL = WikipediaTokenizer.EMAIL_ID;
public static final int HOST = WikipediaTokenizer.HOST_ID;
public static final int NUM = WikipediaTokenizer.NUM_ID;
public static final int CJ = WikipediaTokenizer.CJ_ID;
public static final int INTERNAL_LINK = WikipediaTokenizer.INTERNAL_LINK_ID;
public static final int EXTERNAL_LINK = WikipediaTokenizer.EXTERNAL_LINK_ID;
public static final int CITATION = WikipediaTokenizer.CITATION_ID;
public static final int CATEGORY = WikipediaTokenizer.CATEGORY_ID;
public static final int BOLD = WikipediaTokenizer.BOLD_ID;
public static final int ITALICS = WikipediaTokenizer.ITALICS_ID;
public static final int BOLD_ITALICS = WikipediaTokenizer.BOLD_ITALICS_ID;
public static final int HEADING = WikipediaTokenizer.HEADING_ID;
public static final int SUB_HEADING = WikipediaTokenizer.SUB_HEADING_ID;
public static final int EXTERNAL_LINK_URL = WikipediaTokenizer.EXTERNAL_LINK_URL_ID;
private int currentTokType;

View File

@ -41,8 +41,24 @@ import org.apache.lucene.analysis.Tokenizer;
*/
public class StandardTokenizer extends Tokenizer {
/** A private instance of the JFlex-constructed scanner */
private final StandardTokenizerImpl scanner;
/** A private instance of the JFlex-constructed scanner */
private final StandardTokenizerImpl scanner;
public static final int ALPHANUM = 0;
public static final int APOSTROPHE = 1;
public static final int ACRONYM = 2;
public static final int COMPANY = 3;
public static final int EMAIL = 4;
public static final int HOST = 5;
public static final int NUM = 6;
public static final int CJ = 7;
/**
* @deprecated this solves a bug where HOSTs that end with '.' are identified
* as ACRONYMs. It is deprecated and will be removed in the next
* release.
*/
public static final int ACRONYM_DEP = 8;
/**
* Specifies whether deprecated acronyms should be replaced with HOST type.

View File

@ -1,4 +1,4 @@
/* The following code was generated by JFlex 1.4.1 on 12/18/07 9:22 PM */
/* The following code was generated by JFlex 1.4.1 on 1/25/08 6:54 PM */
package org.apache.lucene.analysis.standard;
@ -25,8 +25,8 @@ import org.apache.lucene.analysis.Token;
/**
* This class is a scanner generated by
* <a href="http://www.jflex.de/">JFlex</a> 1.4.1
* on 12/18/07 9:22 PM from the specification file
* <tt>/Volumes/User/grantingersoll/projects/lucene/java/lucene-clean/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex</tt>
* on 1/25/08 6:54 PM from the specification file
* <tt>/tango/mike/src/lucene.clean/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex</tt>
*/
class StandardTokenizerImpl {
@ -283,20 +283,20 @@ class StandardTokenizerImpl {
/* user code: */
public static final int ALPHANUM = 0;
public static final int APOSTROPHE = 1;
public static final int ACRONYM = 2;
public static final int COMPANY = 3;
public static final int EMAIL = 4;
public static final int HOST = 5;
public static final int NUM = 6;
public static final int CJ = 7;
public static final int ALPHANUM = StandardTokenizer.ALPHANUM;
public static final int APOSTROPHE = StandardTokenizer.APOSTROPHE;
public static final int ACRONYM = StandardTokenizer.ACRONYM;
public static final int COMPANY = StandardTokenizer.COMPANY;
public static final int EMAIL = StandardTokenizer.EMAIL;
public static final int HOST = StandardTokenizer.HOST;
public static final int NUM = StandardTokenizer.NUM;
public static final int CJ = StandardTokenizer.CJ;
/**
* @deprecated this solves a bug where HOSTs that end with '.' are identified
* as ACRONYMs. It is deprecated and will be removed in the next
* release.
*/
public static final int ACRONYM_DEP = 8;
public static final int ACRONYM_DEP = StandardTokenizer.ACRONYM_DEP;
public static final String [] TOKEN_TYPES = new String [] {
"<ALPHANUM>",

View File

@ -30,20 +30,20 @@ import org.apache.lucene.analysis.Token;
%{
public static final int ALPHANUM = 0;
public static final int APOSTROPHE = 1;
public static final int ACRONYM = 2;
public static final int COMPANY = 3;
public static final int EMAIL = 4;
public static final int HOST = 5;
public static final int NUM = 6;
public static final int CJ = 7;
public static final int ALPHANUM = StandardTokenizer.ALPHANUM;
public static final int APOSTROPHE = StandardTokenizer.APOSTROPHE;
public static final int ACRONYM = StandardTokenizer.ACRONYM;
public static final int COMPANY = StandardTokenizer.COMPANY;
public static final int EMAIL = StandardTokenizer.EMAIL;
public static final int HOST = StandardTokenizer.HOST;
public static final int NUM = StandardTokenizer.NUM;
public static final int CJ = StandardTokenizer.CJ;
/**
* @deprecated this solves a bug where HOSTs that end with '.' are identified
* as ACRONYMs. It is deprecated and will be removed in the next
* release.
*/
public static final int ACRONYM_DEP = 8;
public static final int ACRONYM_DEP = StandardTokenizer.ACRONYM_DEP;
public static final String [] TOKEN_TYPES = new String [] {
"<ALPHANUM>",

View File

@ -437,7 +437,7 @@ public class FSDirectory extends Directory {
// Inherit javadoc
public IndexInput openInput(String name) throws IOException {
return new FSIndexInput(new File(directory, name));
return openInput(name, BufferedIndexInput.BUFFER_SIZE);
}
// Inherit javadoc

View File

@ -23,6 +23,7 @@ import java.util.LinkedList;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.index.Payload;
import org.apache.lucene.analysis.standard.StandardTokenizer;
public class TestAnalyzers extends LuceneTestCase {
@ -118,6 +119,18 @@ public class TestAnalyzers extends LuceneTestCase {
verifyPayload(ts);
}
// Just a compile time test, to ensure the
// StandardAnalyzer constants remain publicly accessible
public void _testStandardConstants() {
int x = StandardTokenizer.ALPHANUM;
x = StandardTokenizer.APOSTROPHE;
x = StandardTokenizer.ACRONYM;
x = StandardTokenizer.COMPANY;
x = StandardTokenizer.EMAIL;
x = StandardTokenizer.HOST;
x = StandardTokenizer.NUM;
x = StandardTokenizer.CJ;
}
}
class BuffTokenFilter extends TokenFilter {