mirror of https://github.com/apache/lucene.git
LUCENE-1103: Internal links should increment as all tokens do, since the first token is valid too
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@608989 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b18f6ae959
commit
79e09db401
|
@ -1,4 +1,4 @@
|
|||
/* The following code was generated by JFlex 1.4.1 on 1/4/08 3:07 PM */
|
||||
/* The following code was generated by JFlex 1.4.1 on 1/4/08 3:30 PM */
|
||||
|
||||
package org.apache.lucene.wikipedia.analysis;
|
||||
|
||||
|
@ -25,7 +25,7 @@ import org.apache.lucene.analysis.Token;
|
|||
/**
|
||||
* This class is a scanner generated by
|
||||
* <a href="http://www.jflex.de/">JFlex</a> 1.4.1
|
||||
* on 1/4/08 3:07 PM from the specification file
|
||||
* on 1/4/08 3:30 PM from the specification file
|
||||
* <tt>/Volumes/User/grantingersoll/projects/lucene/Lucene-Trunk/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerImpl.jflex</tt>
|
||||
*/
|
||||
class WikipediaTokenizerImpl {
|
||||
|
@ -790,85 +790,85 @@ final void getText(Token t, int tokType) {
|
|||
{ numLinkToks = 0; positionInc = 0; yybegin(YYINITIAL);
|
||||
}
|
||||
case 47: break;
|
||||
case 4:
|
||||
{ positionInc = 1; currentTokType = EXTERNAL_LINK_URL; yybegin(EXTERNAL_LINK_STATE);
|
||||
}
|
||||
case 48: break;
|
||||
case 39:
|
||||
{ numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL);/*end bold italics*/
|
||||
}
|
||||
case 48: break;
|
||||
case 49: break;
|
||||
case 11:
|
||||
{ currentTokType = ITALICS; yybegin(STRING); return currentTokType;/*italics*/
|
||||
}
|
||||
case 49: break;
|
||||
case 50: break;
|
||||
case 23:
|
||||
{ positionInc = 1; currentTokType = INTERNAL_LINK; yybegin(INTERNAL_LINK_STATE);
|
||||
}
|
||||
case 50: break;
|
||||
case 51: break;
|
||||
case 5:
|
||||
{ yybegin(CATEGORY_STATE); return currentTokType;
|
||||
}
|
||||
case 51: break;
|
||||
case 52: break;
|
||||
case 36:
|
||||
{ numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL);/*end sub header*/
|
||||
}
|
||||
case 52: break;
|
||||
case 53: break;
|
||||
case 8:
|
||||
{ if (numLinkToks == 0){positionInc = 0;} else{positionInc = 1;} currentTokType = EXTERNAL_LINK; yybegin(EXTERNAL_LINK_STATE); numLinkToks++; return currentTokType;
|
||||
}
|
||||
case 53: break;
|
||||
case 54: break;
|
||||
case 24:
|
||||
{ positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);
|
||||
}
|
||||
case 54: break;
|
||||
case 55: break;
|
||||
case 22:
|
||||
{ positionInc = 1; yybegin(DOUBLE_EQUALS_STATE);
|
||||
}
|
||||
case 55: break;
|
||||
case 56: break;
|
||||
case 41:
|
||||
{ positionInc = 1; currentTokType = CATEGORY; yybegin(CATEGORY_STATE);
|
||||
}
|
||||
case 56: break;
|
||||
case 57: break;
|
||||
case 18:
|
||||
{ yybegin(STRING); return currentTokType;/* STRING ALPHANUM*/
|
||||
}
|
||||
case 57: break;
|
||||
case 58: break;
|
||||
case 21:
|
||||
{ positionInc = 1; if (numBalanced == 0){numBalanced++;yybegin(TWO_SINGLE_QUOTES_STATE);} else{numBalanced = 0;}
|
||||
}
|
||||
case 58: break;
|
||||
case 59: break;
|
||||
case 1:
|
||||
{ positionInc = 1;
|
||||
}
|
||||
case 59: break;
|
||||
case 60: break;
|
||||
case 43:
|
||||
{ numBalanced = 0;currentTokType = CATEGORY;yybegin(CATEGORY_STATE);
|
||||
}
|
||||
case 60: break;
|
||||
case 61: break;
|
||||
case 25:
|
||||
{ yybegin(YYINITIAL);
|
||||
}
|
||||
case 61: break;
|
||||
case 62: break;
|
||||
case 40:
|
||||
{ positionInc = 1; yybegin(EXTERNAL_LINK_STATE); return currentTokType;
|
||||
}
|
||||
case 62: break;
|
||||
case 63: break;
|
||||
case 19:
|
||||
{ numBalanced = 0;currentTokType = EXTERNAL_LINK;yybegin(EXTERNAL_LINK_STATE);
|
||||
}
|
||||
case 63: break;
|
||||
case 64: break;
|
||||
case 13:
|
||||
{ yybegin(STRING);return currentTokType;
|
||||
}
|
||||
case 64: break;
|
||||
case 65: break;
|
||||
case 38:
|
||||
{ positionInc = 1; return EMAIL;
|
||||
}
|
||||
case 65: break;
|
||||
case 66: break;
|
||||
case 37:
|
||||
{ positionInc = 1; return ACRONYM;
|
||||
}
|
||||
case 66: break;
|
||||
case 4:
|
||||
{ positionInc = 1;currentTokType = EXTERNAL_LINK_URL; yybegin(EXTERNAL_LINK_STATE);
|
||||
}
|
||||
case 67: break;
|
||||
case 17:
|
||||
{ /* ignore STRING */
|
||||
|
@ -926,21 +926,21 @@ final void getText(Token t, int tokType) {
|
|||
{ currentTokType = HEADING; yybegin(DOUBLE_EQUALS_STATE); return currentTokType;
|
||||
}
|
||||
case 81: break;
|
||||
case 6:
|
||||
{ yybegin(INTERNAL_LINK_STATE); return currentTokType;
|
||||
}
|
||||
case 82: break;
|
||||
case 2:
|
||||
{ positionInc = 1; return ALPHANUM;
|
||||
}
|
||||
case 82: break;
|
||||
case 83: break;
|
||||
case 33:
|
||||
{ positionInc = 1; return COMPANY;
|
||||
}
|
||||
case 83: break;
|
||||
case 84: break;
|
||||
case 10:
|
||||
{ currentTokType = BOLD; yybegin(THREE_SINGLE_QUOTES_STATE);
|
||||
}
|
||||
case 84: break;
|
||||
case 6:
|
||||
{ if (numLinkToks == 1){positionInc = 0;} else{positionInc = 1;} yybegin(INTERNAL_LINK_STATE); numLinkToks++; return currentTokType;
|
||||
}
|
||||
case 85: break;
|
||||
case 26:
|
||||
{ numLinkToks = 0; yybegin(YYINITIAL);
|
||||
|
|
|
@ -193,7 +193,7 @@ DOUBLE_EQUALS = "="{2}
|
|||
//tokens within the link are incremented
|
||||
{DOUBLE_BRACKET} {positionInc = 1; currentTokType = INTERNAL_LINK; yybegin(INTERNAL_LINK_STATE);}
|
||||
{DOUBLE_BRACKET_CAT} {positionInc = 1; currentTokType = CATEGORY; yybegin(CATEGORY_STATE);}
|
||||
{EXTERNAL_LINK} {positionInc = 1;currentTokType = EXTERNAL_LINK_URL; yybegin(EXTERNAL_LINK_STATE);}
|
||||
{EXTERNAL_LINK} {positionInc = 1; currentTokType = EXTERNAL_LINK_URL; yybegin(EXTERNAL_LINK_STATE);}
|
||||
{TWO_SINGLE_QUOTES} {positionInc = 1; if (numBalanced == 0){numBalanced++;yybegin(TWO_SINGLE_QUOTES_STATE);} else{numBalanced = 0;}}
|
||||
{DOUBLE_EQUALS} {positionInc = 1; yybegin(DOUBLE_EQUALS_STATE);}
|
||||
{DOUBLE_BRACE} {positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);}
|
||||
|
@ -205,7 +205,7 @@ DOUBLE_EQUALS = "="{2}
|
|||
<INTERNAL_LINK_STATE>{
|
||||
//First {ALPHANUM} is always the link, set position to 0 for these
|
||||
//This is slightly different from EXTERNAL_LINK_STATE because that one has an explicit grammar for capturing the URL
|
||||
{ALPHANUM} {if (numLinkToks == 1){positionInc = 0;} else{positionInc = 1;} yybegin(INTERNAL_LINK_STATE); numLinkToks++; return currentTokType;}
|
||||
{ALPHANUM} {yybegin(INTERNAL_LINK_STATE); return currentTokType;}
|
||||
{DOUBLE_BRACKET_CLOSE} {numLinkToks = 0; yybegin(YYINITIAL);}
|
||||
//ignore
|
||||
. | {WHITESPACE} { positionInc = 1; }
|
||||
|
|
|
@ -171,7 +171,7 @@ public class WikipediaTokenizerTest extends TestCase {
|
|||
assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "here",
|
||||
new String(token.termBuffer(), 0, token.termLength()).equals("here") == true);
|
||||
//The link, and here should be at the same position for phrases to work
|
||||
assertTrue(token.getPositionIncrement() + " does not equal: " + 0, token.getPositionIncrement() == 0);
|
||||
assertTrue(token.getPositionIncrement() + " does not equal: " + 1, token.getPositionIncrement() == 1);
|
||||
token = tf.next(token);
|
||||
assertTrue("token is null and it shouldn't be", token != null);
|
||||
assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "again",
|
||||
|
|
Loading…
Reference in New Issue