mirror of https://github.com/apache/lucene.git
LUCENE-1150: make StandardAnalyzer tokenizer constants public again (public access was accidentally removed with LUCENE-966)
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@616248 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
66b5df6788
commit
38f8d4e837
|
@ -12,6 +12,10 @@ API Changes
|
|||
pre-existing constructors; these will be removed in release 3.0.
|
||||
(Steven Rowe via Mike McCandless)
|
||||
|
||||
2. LUCENE-1150: Re-expose StandardTokenizer's constants publicly;
|
||||
this was accidentally lost with LUCENE-966. (Nicolas Lalevée via
|
||||
Mike McCandless)
|
||||
|
||||
Bug fixes
|
||||
|
||||
New features
|
||||
|
|
|
@ -46,6 +46,25 @@ public class WikipediaTokenizer extends Tokenizer {
|
|||
public static final String HEADING = "h";
|
||||
public static final String SUB_HEADING = "sh";
|
||||
|
||||
public static final int ALPHANUM_ID = 0;
|
||||
public static final int APOSTROPHE_ID = 1;
|
||||
public static final int ACRONYM_ID = 2;
|
||||
public static final int COMPANY_ID = 3;
|
||||
public static final int EMAIL_ID = 4;
|
||||
public static final int HOST_ID = 5;
|
||||
public static final int NUM_ID = 6;
|
||||
public static final int CJ_ID = 7;
|
||||
public static final int INTERNAL_LINK_ID = 8;
|
||||
public static final int EXTERNAL_LINK_ID = 9;
|
||||
public static final int CITATION_ID = 10;
|
||||
public static final int CATEGORY_ID = 11;
|
||||
public static final int BOLD_ID = 12;
|
||||
public static final int ITALICS_ID = 13;
|
||||
public static final int BOLD_ITALICS_ID = 14;
|
||||
public static final int HEADING_ID = 15;
|
||||
public static final int SUB_HEADING_ID = 16;
|
||||
public static final int EXTERNAL_LINK_URL_ID = 17;
|
||||
|
||||
public static final int TOKENS_ONLY = 0;
|
||||
public static final int UNTOKENIZED_ONLY = 1;
|
||||
public static final int BOTH = 2;
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* The following code was generated by JFlex 1.4.1 on 1/16/08 10:31 AM */
|
||||
/* The following code was generated by JFlex 1.4.1 on 1/25/08 6:54 PM */
|
||||
|
||||
package org.apache.lucene.wikipedia.analysis;
|
||||
|
||||
|
@ -25,8 +25,8 @@ import org.apache.lucene.analysis.Token;
|
|||
/**
|
||||
* This class is a scanner generated by
|
||||
* <a href="http://www.jflex.de/">JFlex</a> 1.4.1
|
||||
* on 1/16/08 10:31 AM from the specification file
|
||||
* <tt>/Volumes/User/grantingersoll/projects/lucene/Lucene-Trunk/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerImpl.jflex</tt>
|
||||
* on 1/25/08 6:54 PM from the specification file
|
||||
* <tt>/tango/mike/src/lucene.clean/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerImpl.jflex</tt>
|
||||
*/
|
||||
class WikipediaTokenizerImpl {
|
||||
|
||||
|
@ -425,24 +425,24 @@ class WikipediaTokenizerImpl {
|
|||
|
||||
/* user code: */
|
||||
|
||||
public static final int ALPHANUM = 0;
|
||||
public static final int APOSTROPHE = 1;
|
||||
public static final int ACRONYM = 2;
|
||||
public static final int COMPANY = 3;
|
||||
public static final int EMAIL = 4;
|
||||
public static final int HOST = 5;
|
||||
public static final int NUM = 6;
|
||||
public static final int CJ = 7;
|
||||
public static final int INTERNAL_LINK = 8;
|
||||
public static final int EXTERNAL_LINK = 9;
|
||||
public static final int CITATION = 10;
|
||||
public static final int CATEGORY = 11;
|
||||
public static final int BOLD = 12;
|
||||
public static final int ITALICS = 13;
|
||||
public static final int BOLD_ITALICS = 14;
|
||||
public static final int HEADING = 15;
|
||||
public static final int SUB_HEADING = 16;
|
||||
public static final int EXTERNAL_LINK_URL = 17;
|
||||
public static final int ALPHANUM = WikipediaTokenizer.ALPHANUM_ID;
|
||||
public static final int APOSTROPHE = WikipediaTokenizer.APOSTROPHE_ID;
|
||||
public static final int ACRONYM = WikipediaTokenizer.ACRONYM_ID;
|
||||
public static final int COMPANY = WikipediaTokenizer.COMPANY_ID;
|
||||
public static final int EMAIL = WikipediaTokenizer.EMAIL_ID;
|
||||
public static final int HOST = WikipediaTokenizer.HOST_ID;
|
||||
public static final int NUM = WikipediaTokenizer.NUM_ID;
|
||||
public static final int CJ = WikipediaTokenizer.CJ_ID;
|
||||
public static final int INTERNAL_LINK = WikipediaTokenizer.INTERNAL_LINK_ID;
|
||||
public static final int EXTERNAL_LINK = WikipediaTokenizer.EXTERNAL_LINK_ID;
|
||||
public static final int CITATION = WikipediaTokenizer.CITATION_ID;
|
||||
public static final int CATEGORY = WikipediaTokenizer.CATEGORY_ID;
|
||||
public static final int BOLD = WikipediaTokenizer.BOLD_ID;
|
||||
public static final int ITALICS = WikipediaTokenizer.ITALICS_ID;
|
||||
public static final int BOLD_ITALICS = WikipediaTokenizer.BOLD_ITALICS_ID;
|
||||
public static final int HEADING = WikipediaTokenizer.HEADING_ID;
|
||||
public static final int SUB_HEADING = WikipediaTokenizer.SUB_HEADING_ID;
|
||||
public static final int EXTERNAL_LINK_URL = WikipediaTokenizer.EXTERNAL_LINK_URL_ID;
|
||||
|
||||
|
||||
private int currentTokType;
|
||||
|
|
|
@ -30,24 +30,24 @@ import org.apache.lucene.analysis.Token;
|
|||
|
||||
%{
|
||||
|
||||
public static final int ALPHANUM = 0;
|
||||
public static final int APOSTROPHE = 1;
|
||||
public static final int ACRONYM = 2;
|
||||
public static final int COMPANY = 3;
|
||||
public static final int EMAIL = 4;
|
||||
public static final int HOST = 5;
|
||||
public static final int NUM = 6;
|
||||
public static final int CJ = 7;
|
||||
public static final int INTERNAL_LINK = 8;
|
||||
public static final int EXTERNAL_LINK = 9;
|
||||
public static final int CITATION = 10;
|
||||
public static final int CATEGORY = 11;
|
||||
public static final int BOLD = 12;
|
||||
public static final int ITALICS = 13;
|
||||
public static final int BOLD_ITALICS = 14;
|
||||
public static final int HEADING = 15;
|
||||
public static final int SUB_HEADING = 16;
|
||||
public static final int EXTERNAL_LINK_URL = 17;
|
||||
public static final int ALPHANUM = WikipediaTokenizer.ALPHANUM_ID;
|
||||
public static final int APOSTROPHE = WikipediaTokenizer.APOSTROPHE_ID;
|
||||
public static final int ACRONYM = WikipediaTokenizer.ACRONYM_ID;
|
||||
public static final int COMPANY = WikipediaTokenizer.COMPANY_ID;
|
||||
public static final int EMAIL = WikipediaTokenizer.EMAIL_ID;
|
||||
public static final int HOST = WikipediaTokenizer.HOST_ID;
|
||||
public static final int NUM = WikipediaTokenizer.NUM_ID;
|
||||
public static final int CJ = WikipediaTokenizer.CJ_ID;
|
||||
public static final int INTERNAL_LINK = WikipediaTokenizer.INTERNAL_LINK_ID;
|
||||
public static final int EXTERNAL_LINK = WikipediaTokenizer.EXTERNAL_LINK_ID;
|
||||
public static final int CITATION = WikipediaTokenizer.CITATION_ID;
|
||||
public static final int CATEGORY = WikipediaTokenizer.CATEGORY_ID;
|
||||
public static final int BOLD = WikipediaTokenizer.BOLD_ID;
|
||||
public static final int ITALICS = WikipediaTokenizer.ITALICS_ID;
|
||||
public static final int BOLD_ITALICS = WikipediaTokenizer.BOLD_ITALICS_ID;
|
||||
public static final int HEADING = WikipediaTokenizer.HEADING_ID;
|
||||
public static final int SUB_HEADING = WikipediaTokenizer.SUB_HEADING_ID;
|
||||
public static final int EXTERNAL_LINK_URL = WikipediaTokenizer.EXTERNAL_LINK_URL_ID;
|
||||
|
||||
|
||||
private int currentTokType;
|
||||
|
|
|
@ -41,8 +41,24 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||
*/
|
||||
|
||||
public class StandardTokenizer extends Tokenizer {
|
||||
/** A private instance of the JFlex-constructed scanner */
|
||||
private final StandardTokenizerImpl scanner;
|
||||
/** A private instance of the JFlex-constructed scanner */
|
||||
private final StandardTokenizerImpl scanner;
|
||||
|
||||
public static final int ALPHANUM = 0;
|
||||
public static final int APOSTROPHE = 1;
|
||||
public static final int ACRONYM = 2;
|
||||
public static final int COMPANY = 3;
|
||||
public static final int EMAIL = 4;
|
||||
public static final int HOST = 5;
|
||||
public static final int NUM = 6;
|
||||
public static final int CJ = 7;
|
||||
|
||||
/**
|
||||
* @deprecated this solves a bug where HOSTs that end with '.' are identified
|
||||
* as ACRONYMs. It is deprecated and will be removed in the next
|
||||
* release.
|
||||
*/
|
||||
public static final int ACRONYM_DEP = 8;
|
||||
|
||||
/**
|
||||
* Specifies whether deprecated acronyms should be replaced with HOST type.
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* The following code was generated by JFlex 1.4.1 on 12/18/07 9:22 PM */
|
||||
/* The following code was generated by JFlex 1.4.1 on 1/25/08 6:54 PM */
|
||||
|
||||
package org.apache.lucene.analysis.standard;
|
||||
|
||||
|
@ -25,8 +25,8 @@ import org.apache.lucene.analysis.Token;
|
|||
/**
|
||||
* This class is a scanner generated by
|
||||
* <a href="http://www.jflex.de/">JFlex</a> 1.4.1
|
||||
* on 12/18/07 9:22 PM from the specification file
|
||||
* <tt>/Volumes/User/grantingersoll/projects/lucene/java/lucene-clean/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex</tt>
|
||||
* on 1/25/08 6:54 PM from the specification file
|
||||
* <tt>/tango/mike/src/lucene.clean/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex</tt>
|
||||
*/
|
||||
class StandardTokenizerImpl {
|
||||
|
||||
|
@ -283,20 +283,20 @@ class StandardTokenizerImpl {
|
|||
|
||||
/* user code: */
|
||||
|
||||
public static final int ALPHANUM = 0;
|
||||
public static final int APOSTROPHE = 1;
|
||||
public static final int ACRONYM = 2;
|
||||
public static final int COMPANY = 3;
|
||||
public static final int EMAIL = 4;
|
||||
public static final int HOST = 5;
|
||||
public static final int NUM = 6;
|
||||
public static final int CJ = 7;
|
||||
public static final int ALPHANUM = StandardTokenizer.ALPHANUM;
|
||||
public static final int APOSTROPHE = StandardTokenizer.APOSTROPHE;
|
||||
public static final int ACRONYM = StandardTokenizer.ACRONYM;
|
||||
public static final int COMPANY = StandardTokenizer.COMPANY;
|
||||
public static final int EMAIL = StandardTokenizer.EMAIL;
|
||||
public static final int HOST = StandardTokenizer.HOST;
|
||||
public static final int NUM = StandardTokenizer.NUM;
|
||||
public static final int CJ = StandardTokenizer.CJ;
|
||||
/**
|
||||
* @deprecated this solves a bug where HOSTs that end with '.' are identified
|
||||
* as ACRONYMs. It is deprecated and will be removed in the next
|
||||
* release.
|
||||
*/
|
||||
public static final int ACRONYM_DEP = 8;
|
||||
public static final int ACRONYM_DEP = StandardTokenizer.ACRONYM_DEP;
|
||||
|
||||
public static final String [] TOKEN_TYPES = new String [] {
|
||||
"<ALPHANUM>",
|
||||
|
|
|
@ -30,20 +30,20 @@ import org.apache.lucene.analysis.Token;
|
|||
|
||||
%{
|
||||
|
||||
public static final int ALPHANUM = 0;
|
||||
public static final int APOSTROPHE = 1;
|
||||
public static final int ACRONYM = 2;
|
||||
public static final int COMPANY = 3;
|
||||
public static final int EMAIL = 4;
|
||||
public static final int HOST = 5;
|
||||
public static final int NUM = 6;
|
||||
public static final int CJ = 7;
|
||||
public static final int ALPHANUM = StandardTokenizer.ALPHANUM;
|
||||
public static final int APOSTROPHE = StandardTokenizer.APOSTROPHE;
|
||||
public static final int ACRONYM = StandardTokenizer.ACRONYM;
|
||||
public static final int COMPANY = StandardTokenizer.COMPANY;
|
||||
public static final int EMAIL = StandardTokenizer.EMAIL;
|
||||
public static final int HOST = StandardTokenizer.HOST;
|
||||
public static final int NUM = StandardTokenizer.NUM;
|
||||
public static final int CJ = StandardTokenizer.CJ;
|
||||
/**
|
||||
* @deprecated this solves a bug where HOSTs that end with '.' are identified
|
||||
* as ACRONYMs. It is deprecated and will be removed in the next
|
||||
* release.
|
||||
*/
|
||||
public static final int ACRONYM_DEP = 8;
|
||||
public static final int ACRONYM_DEP = StandardTokenizer.ACRONYM_DEP;
|
||||
|
||||
public static final String [] TOKEN_TYPES = new String [] {
|
||||
"<ALPHANUM>",
|
||||
|
|
|
@ -437,7 +437,7 @@ public class FSDirectory extends Directory {
|
|||
|
||||
// Inherit javadoc
|
||||
public IndexInput openInput(String name) throws IOException {
|
||||
return new FSIndexInput(new File(directory, name));
|
||||
return openInput(name, BufferedIndexInput.BUFFER_SIZE);
|
||||
}
|
||||
|
||||
// Inherit javadoc
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.util.LinkedList;
|
|||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.index.Payload;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
|
||||
public class TestAnalyzers extends LuceneTestCase {
|
||||
|
||||
|
@ -118,6 +119,18 @@ public class TestAnalyzers extends LuceneTestCase {
|
|||
verifyPayload(ts);
|
||||
}
|
||||
|
||||
// Just a compile time test, to ensure the
|
||||
// StandardAnalyzer constants remain publicly accessible
|
||||
public void _testStandardConstants() {
|
||||
int x = StandardTokenizer.ALPHANUM;
|
||||
x = StandardTokenizer.APOSTROPHE;
|
||||
x = StandardTokenizer.ACRONYM;
|
||||
x = StandardTokenizer.COMPANY;
|
||||
x = StandardTokenizer.EMAIL;
|
||||
x = StandardTokenizer.HOST;
|
||||
x = StandardTokenizer.NUM;
|
||||
x = StandardTokenizer.CJ;
|
||||
}
|
||||
}
|
||||
|
||||
class BuffTokenFilter extends TokenFilter {
|
||||
|
|
Loading…
Reference in New Issue