mirror of https://github.com/apache/lucene.git
LUCENE-1150: put back public tokenImage/TOKEN_TYPES in StandardTokenizer and WikipediaTokenizer
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@648183 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a6ff3c93df
commit
44daebab7c
|
@ -65,6 +65,31 @@ public class WikipediaTokenizer extends Tokenizer {
|
|||
public static final int SUB_HEADING_ID = 16;
|
||||
public static final int EXTERNAL_LINK_URL_ID = 17;
|
||||
|
||||
/** String token types that correspond to token type int constants */
|
||||
public static final String [] TOKEN_TYPES = new String [] {
|
||||
"<ALPHANUM>",
|
||||
"<APOSTROPHE>",
|
||||
"<ACRONYM>",
|
||||
"<COMPANY>",
|
||||
"<EMAIL>",
|
||||
"<HOST>",
|
||||
"<NUM>",
|
||||
"<CJ>",
|
||||
INTERNAL_LINK,
|
||||
EXTERNAL_LINK,
|
||||
CITATION,
|
||||
CATEGORY,
|
||||
BOLD,
|
||||
ITALICS,
|
||||
BOLD_ITALICS,
|
||||
HEADING,
|
||||
SUB_HEADING,
|
||||
EXTERNAL_LINK_URL
|
||||
};
|
||||
|
||||
/** @deprecated Please use {@link #TOKEN_TYPES} instead */
|
||||
public static final String [] tokenImage = TOKEN_TYPES;
|
||||
|
||||
public static final int TOKENS_ONLY = 0;
|
||||
public static final int UNTOKENIZED_ONLY = 1;
|
||||
public static final int BOTH = 2;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* The following code was generated by JFlex 1.4.1 on 1/25/08 6:54 PM */
|
||||
/* The following code was generated by JFlex 1.4.1 on 4/15/08 4:31 AM */
|
||||
|
||||
package org.apache.lucene.wikipedia.analysis;
|
||||
|
||||
|
@ -25,8 +25,8 @@ import org.apache.lucene.analysis.Token;
|
|||
/**
|
||||
* This class is a scanner generated by
|
||||
* <a href="http://www.jflex.de/">JFlex</a> 1.4.1
|
||||
* on 1/25/08 6:54 PM from the specification file
|
||||
* <tt>/tango/mike/src/lucene.clean/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerImpl.jflex</tt>
|
||||
* on 4/15/08 4:31 AM from the specification file
|
||||
* <tt>/mnt2/mike/src/lucene.clean/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerImpl.jflex</tt>
|
||||
*/
|
||||
class WikipediaTokenizerImpl {
|
||||
|
||||
|
@ -454,26 +454,7 @@ private int numLinkToks = 0;
|
|||
//see https://issues.apache.org/jira/browse/LUCENE-1133
|
||||
private int numWikiTokensSeen = 0;
|
||||
|
||||
public static final String [] TOKEN_TYPES = new String [] {
|
||||
"<ALPHANUM>",
|
||||
"<APOSTROPHE>",
|
||||
"<ACRONYM>",
|
||||
"<COMPANY>",
|
||||
"<EMAIL>",
|
||||
"<HOST>",
|
||||
"<NUM>",
|
||||
"<CJ>",
|
||||
WikipediaTokenizer.INTERNAL_LINK,
|
||||
WikipediaTokenizer.EXTERNAL_LINK,
|
||||
WikipediaTokenizer.CITATION,
|
||||
WikipediaTokenizer.CATEGORY,
|
||||
WikipediaTokenizer.BOLD,
|
||||
WikipediaTokenizer.ITALICS,
|
||||
WikipediaTokenizer.BOLD_ITALICS,
|
||||
WikipediaTokenizer.HEADING,
|
||||
WikipediaTokenizer.SUB_HEADING,
|
||||
WikipediaTokenizer.EXTERNAL_LINK_URL
|
||||
};
|
||||
public static final String [] TOKEN_TYPES = WikipediaTokenizer.TOKEN_TYPES;
|
||||
|
||||
/**
|
||||
Returns the number of tokens seen inside a category or link, etc.
|
||||
|
|
|
@ -59,26 +59,7 @@ private int numLinkToks = 0;
|
|||
//see https://issues.apache.org/jira/browse/LUCENE-1133
|
||||
private int numWikiTokensSeen = 0;
|
||||
|
||||
public static final String [] TOKEN_TYPES = new String [] {
|
||||
"<ALPHANUM>",
|
||||
"<APOSTROPHE>",
|
||||
"<ACRONYM>",
|
||||
"<COMPANY>",
|
||||
"<EMAIL>",
|
||||
"<HOST>",
|
||||
"<NUM>",
|
||||
"<CJ>",
|
||||
WikipediaTokenizer.INTERNAL_LINK,
|
||||
WikipediaTokenizer.EXTERNAL_LINK,
|
||||
WikipediaTokenizer.CITATION,
|
||||
WikipediaTokenizer.CATEGORY,
|
||||
WikipediaTokenizer.BOLD,
|
||||
WikipediaTokenizer.ITALICS,
|
||||
WikipediaTokenizer.BOLD_ITALICS,
|
||||
WikipediaTokenizer.HEADING,
|
||||
WikipediaTokenizer.SUB_HEADING,
|
||||
WikipediaTokenizer.EXTERNAL_LINK_URL
|
||||
};
|
||||
public static final String [] TOKEN_TYPES = WikipediaTokenizer.TOKEN_TYPES;
|
||||
|
||||
/**
|
||||
Returns the number of tokens seen inside a category or link, etc.
|
||||
|
@ -346,4 +327,4 @@ DOUBLE_EQUALS = "="{2}
|
|||
//ITALICS = ''({ALPHANUM}+{WHITESPACE}*)+''
|
||||
//BOLD_ITALICS = '''''({ALPHANUM}+{WHITESPACE}*)+'''''
|
||||
//HEADING = "="{2}({ALPHANUM}+{WHITESPACE}*)+"="{2}
|
||||
//SUB_HEADING ="="{3}({ALPHANUM}+{WHITESPACE}*)+"="{3}
|
||||
//SUB_HEADING ="="{3}({ALPHANUM}+{WHITESPACE}*)+"="{3}
|
||||
|
|
|
@ -59,7 +59,23 @@ public class StandardTokenizer extends Tokenizer {
|
|||
* release.
|
||||
*/
|
||||
public static final int ACRONYM_DEP = 8;
|
||||
|
||||
|
||||
/** String token types that correspond to token type int constants */
|
||||
public static final String [] TOKEN_TYPES = new String [] {
|
||||
"<ALPHANUM>",
|
||||
"<APOSTROPHE>",
|
||||
"<ACRONYM>",
|
||||
"<COMPANY>",
|
||||
"<EMAIL>",
|
||||
"<HOST>",
|
||||
"<NUM>",
|
||||
"<CJ>",
|
||||
"<ACRONYM_DEP>"
|
||||
};
|
||||
|
||||
/** @deprecated Please use {@link #TOKEN_TYPES} instead */
|
||||
public static final String [] tokenImage = TOKEN_TYPES;
|
||||
|
||||
/**
|
||||
* Specifies whether deprecated acronyms should be replaced with HOST type.
|
||||
* This is false by default to support backward compatibility.
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* The following code was generated by JFlex 1.4.1 on 1/25/08 6:54 PM */
|
||||
/* The following code was generated by JFlex 1.4.1 on 4/15/08 4:31 AM */
|
||||
|
||||
package org.apache.lucene.analysis.standard;
|
||||
|
||||
|
@ -25,8 +25,8 @@ import org.apache.lucene.analysis.Token;
|
|||
/**
|
||||
* This class is a scanner generated by
|
||||
* <a href="http://www.jflex.de/">JFlex</a> 1.4.1
|
||||
* on 1/25/08 6:54 PM from the specification file
|
||||
* <tt>/tango/mike/src/lucene.clean/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex</tt>
|
||||
* on 4/15/08 4:31 AM from the specification file
|
||||
* <tt>/mnt2/mike/src/lucene.clean/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex</tt>
|
||||
*/
|
||||
class StandardTokenizerImpl {
|
||||
|
||||
|
@ -298,17 +298,7 @@ public static final int CJ = StandardTokenizer.CJ;
|
|||
*/
|
||||
public static final int ACRONYM_DEP = StandardTokenizer.ACRONYM_DEP;
|
||||
|
||||
public static final String [] TOKEN_TYPES = new String [] {
|
||||
"<ALPHANUM>",
|
||||
"<APOSTROPHE>",
|
||||
"<ACRONYM>",
|
||||
"<COMPANY>",
|
||||
"<EMAIL>",
|
||||
"<HOST>",
|
||||
"<NUM>",
|
||||
"<CJ>",
|
||||
"<ACRONYM_DEP>"
|
||||
};
|
||||
public static final String [] TOKEN_TYPES = StandardTokenizer.TOKEN_TYPES;
|
||||
|
||||
public final int yychar()
|
||||
{
|
||||
|
|
|
@ -45,17 +45,7 @@ public static final int CJ = StandardTokenizer.CJ;
|
|||
*/
|
||||
public static final int ACRONYM_DEP = StandardTokenizer.ACRONYM_DEP;
|
||||
|
||||
public static final String [] TOKEN_TYPES = new String [] {
|
||||
"<ALPHANUM>",
|
||||
"<APOSTROPHE>",
|
||||
"<ACRONYM>",
|
||||
"<COMPANY>",
|
||||
"<EMAIL>",
|
||||
"<HOST>",
|
||||
"<NUM>",
|
||||
"<CJ>",
|
||||
"<ACRONYM_DEP>"
|
||||
};
|
||||
public static final String [] TOKEN_TYPES = StandardTokenizer.TOKEN_TYPES;
|
||||
|
||||
public final int yychar()
|
||||
{
|
||||
|
|
|
@ -119,7 +119,7 @@ public class TestAnalyzers extends LuceneTestCase {
|
|||
verifyPayload(ts);
|
||||
}
|
||||
|
||||
// Just a compile time test, to ensure the
|
||||
// LUCENE-1150: Just a compile time test, to ensure the
|
||||
// StandardAnalyzer constants remain publicly accessible
|
||||
public void _testStandardConstants() {
|
||||
int x = StandardTokenizer.ALPHANUM;
|
||||
|
@ -130,6 +130,7 @@ public class TestAnalyzers extends LuceneTestCase {
|
|||
x = StandardTokenizer.HOST;
|
||||
x = StandardTokenizer.NUM;
|
||||
x = StandardTokenizer.CJ;
|
||||
String[] y = StandardTokenizer.TOKEN_TYPES;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue