LUCENE-1150: put back public tokenImage/TOKEN_TYPES in StandardTokenizer and WikipediaTokenizer

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@648183 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2008-04-15 08:48:41 +00:00
parent a6ff3c93df
commit 44daebab7c
7 changed files with 55 additions and 71 deletions

View File

@ -65,6 +65,31 @@ public class WikipediaTokenizer extends Tokenizer {
public static final int SUB_HEADING_ID = 16;
public static final int EXTERNAL_LINK_URL_ID = 17;
/** String token types that correspond to token type int constants */
public static final String [] TOKEN_TYPES = new String [] {
"<ALPHANUM>",
"<APOSTROPHE>",
"<ACRONYM>",
"<COMPANY>",
"<EMAIL>",
"<HOST>",
"<NUM>",
"<CJ>",
INTERNAL_LINK,
EXTERNAL_LINK,
CITATION,
CATEGORY,
BOLD,
ITALICS,
BOLD_ITALICS,
HEADING,
SUB_HEADING,
EXTERNAL_LINK_URL
};
/** @deprecated Please use {@link #TOKEN_TYPES} instead */
public static final String [] tokenImage = TOKEN_TYPES;
public static final int TOKENS_ONLY = 0;
public static final int UNTOKENIZED_ONLY = 1;
public static final int BOTH = 2;

View File

@ -1,4 +1,4 @@
/* The following code was generated by JFlex 1.4.1 on 1/25/08 6:54 PM */
/* The following code was generated by JFlex 1.4.1 on 4/15/08 4:31 AM */
package org.apache.lucene.wikipedia.analysis;
@ -25,8 +25,8 @@ import org.apache.lucene.analysis.Token;
/**
* This class is a scanner generated by
* <a href="http://www.jflex.de/">JFlex</a> 1.4.1
* on 1/25/08 6:54 PM from the specification file
* <tt>/tango/mike/src/lucene.clean/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerImpl.jflex</tt>
* on 4/15/08 4:31 AM from the specification file
* <tt>/mnt2/mike/src/lucene.clean/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerImpl.jflex</tt>
*/
class WikipediaTokenizerImpl {
@ -454,26 +454,7 @@ private int numLinkToks = 0;
//see https://issues.apache.org/jira/browse/LUCENE-1133
private int numWikiTokensSeen = 0;
public static final String [] TOKEN_TYPES = new String [] {
"<ALPHANUM>",
"<APOSTROPHE>",
"<ACRONYM>",
"<COMPANY>",
"<EMAIL>",
"<HOST>",
"<NUM>",
"<CJ>",
WikipediaTokenizer.INTERNAL_LINK,
WikipediaTokenizer.EXTERNAL_LINK,
WikipediaTokenizer.CITATION,
WikipediaTokenizer.CATEGORY,
WikipediaTokenizer.BOLD,
WikipediaTokenizer.ITALICS,
WikipediaTokenizer.BOLD_ITALICS,
WikipediaTokenizer.HEADING,
WikipediaTokenizer.SUB_HEADING,
WikipediaTokenizer.EXTERNAL_LINK_URL
};
public static final String [] TOKEN_TYPES = WikipediaTokenizer.TOKEN_TYPES;
/**
Returns the number of tokens seen inside a category or link, etc.

View File

@ -59,26 +59,7 @@ private int numLinkToks = 0;
//see https://issues.apache.org/jira/browse/LUCENE-1133
private int numWikiTokensSeen = 0;
public static final String [] TOKEN_TYPES = new String [] {
"<ALPHANUM>",
"<APOSTROPHE>",
"<ACRONYM>",
"<COMPANY>",
"<EMAIL>",
"<HOST>",
"<NUM>",
"<CJ>",
WikipediaTokenizer.INTERNAL_LINK,
WikipediaTokenizer.EXTERNAL_LINK,
WikipediaTokenizer.CITATION,
WikipediaTokenizer.CATEGORY,
WikipediaTokenizer.BOLD,
WikipediaTokenizer.ITALICS,
WikipediaTokenizer.BOLD_ITALICS,
WikipediaTokenizer.HEADING,
WikipediaTokenizer.SUB_HEADING,
WikipediaTokenizer.EXTERNAL_LINK_URL
};
public static final String [] TOKEN_TYPES = WikipediaTokenizer.TOKEN_TYPES;
/**
Returns the number of tokens seen inside a category or link, etc.
@ -346,4 +327,4 @@ DOUBLE_EQUALS = "="{2}
//ITALICS = ''({ALPHANUM}+{WHITESPACE}*)+''
//BOLD_ITALICS = '''''({ALPHANUM}+{WHITESPACE}*)+'''''
//HEADING = "="{2}({ALPHANUM}+{WHITESPACE}*)+"="{2}
//SUB_HEADING ="="{3}({ALPHANUM}+{WHITESPACE}*)+"="{3}
//SUB_HEADING ="="{3}({ALPHANUM}+{WHITESPACE}*)+"="{3}

View File

@ -59,7 +59,23 @@ public class StandardTokenizer extends Tokenizer {
* release.
*/
public static final int ACRONYM_DEP = 8;
/** String token types that correspond to token type int constants */
public static final String [] TOKEN_TYPES = new String [] {
"<ALPHANUM>",
"<APOSTROPHE>",
"<ACRONYM>",
"<COMPANY>",
"<EMAIL>",
"<HOST>",
"<NUM>",
"<CJ>",
"<ACRONYM_DEP>"
};
/** @deprecated Please use {@link #TOKEN_TYPES} instead */
public static final String [] tokenImage = TOKEN_TYPES;
/**
* Specifies whether deprecated acronyms should be replaced with HOST type.
* This is false by default to support backward compatibility.

View File

@ -1,4 +1,4 @@
/* The following code was generated by JFlex 1.4.1 on 1/25/08 6:54 PM */
/* The following code was generated by JFlex 1.4.1 on 4/15/08 4:31 AM */
package org.apache.lucene.analysis.standard;
@ -25,8 +25,8 @@ import org.apache.lucene.analysis.Token;
/**
* This class is a scanner generated by
* <a href="http://www.jflex.de/">JFlex</a> 1.4.1
* on 1/25/08 6:54 PM from the specification file
* <tt>/tango/mike/src/lucene.clean/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex</tt>
* on 4/15/08 4:31 AM from the specification file
* <tt>/mnt2/mike/src/lucene.clean/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex</tt>
*/
class StandardTokenizerImpl {
@ -298,17 +298,7 @@ public static final int CJ = StandardTokenizer.CJ;
*/
public static final int ACRONYM_DEP = StandardTokenizer.ACRONYM_DEP;
public static final String [] TOKEN_TYPES = new String [] {
"<ALPHANUM>",
"<APOSTROPHE>",
"<ACRONYM>",
"<COMPANY>",
"<EMAIL>",
"<HOST>",
"<NUM>",
"<CJ>",
"<ACRONYM_DEP>"
};
public static final String [] TOKEN_TYPES = StandardTokenizer.TOKEN_TYPES;
public final int yychar()
{

View File

@ -45,17 +45,7 @@ public static final int CJ = StandardTokenizer.CJ;
*/
public static final int ACRONYM_DEP = StandardTokenizer.ACRONYM_DEP;
public static final String [] TOKEN_TYPES = new String [] {
"<ALPHANUM>",
"<APOSTROPHE>",
"<ACRONYM>",
"<COMPANY>",
"<EMAIL>",
"<HOST>",
"<NUM>",
"<CJ>",
"<ACRONYM_DEP>"
};
public static final String [] TOKEN_TYPES = StandardTokenizer.TOKEN_TYPES;
public final int yychar()
{

View File

@ -119,7 +119,7 @@ public class TestAnalyzers extends LuceneTestCase {
verifyPayload(ts);
}
// Just a compile time test, to ensure the
// LUCENE-1150: Just a compile time test, to ensure the
// StandardAnalyzer constants remain publicly accessible
public void _testStandardConstants() {
int x = StandardTokenizer.ALPHANUM;
@ -130,6 +130,7 @@ public class TestAnalyzers extends LuceneTestCase {
x = StandardTokenizer.HOST;
x = StandardTokenizer.NUM;
x = StandardTokenizer.CJ;
String[] y = StandardTokenizer.TOKEN_TYPES;
}
}