LUCENE-1103: Internal links should increment as all tokens do, since the first token is valid too

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@608989 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Grant Ingersoll 2008-01-04 20:36:23 +00:00
parent b18f6ae959
commit 79e09db401
3 changed files with 33 additions and 33 deletions

View File

@ -1,4 +1,4 @@
/* The following code was generated by JFlex 1.4.1 on 1/4/08 3:07 PM */
/* The following code was generated by JFlex 1.4.1 on 1/4/08 3:30 PM */
package org.apache.lucene.wikipedia.analysis;
@ -25,7 +25,7 @@ import org.apache.lucene.analysis.Token;
/**
* This class is a scanner generated by
* <a href="http://www.jflex.de/">JFlex</a> 1.4.1
* on 1/4/08 3:07 PM from the specification file
* on 1/4/08 3:30 PM from the specification file
* <tt>/Volumes/User/grantingersoll/projects/lucene/Lucene-Trunk/contrib/wikipedia/src/java/org/apache/lucene/wikipedia/analysis/WikipediaTokenizerImpl.jflex</tt>
*/
class WikipediaTokenizerImpl {
@ -790,85 +790,85 @@ final void getText(Token t, int tokType) {
{ numLinkToks = 0; positionInc = 0; yybegin(YYINITIAL);
}
case 47: break;
case 4:
{ positionInc = 1; currentTokType = EXTERNAL_LINK_URL; yybegin(EXTERNAL_LINK_STATE);
}
case 48: break;
case 39:
{ numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL);/*end bold italics*/
}
case 48: break;
case 49: break;
case 11:
{ currentTokType = ITALICS; yybegin(STRING); return currentTokType;/*italics*/
}
case 49: break;
case 50: break;
case 23:
{ positionInc = 1; currentTokType = INTERNAL_LINK; yybegin(INTERNAL_LINK_STATE);
}
case 50: break;
case 51: break;
case 5:
{ yybegin(CATEGORY_STATE); return currentTokType;
}
case 51: break;
case 52: break;
case 36:
{ numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL);/*end sub header*/
}
case 52: break;
case 53: break;
case 8:
{ if (numLinkToks == 0){positionInc = 0;} else{positionInc = 1;} currentTokType = EXTERNAL_LINK; yybegin(EXTERNAL_LINK_STATE); numLinkToks++; return currentTokType;
}
case 53: break;
case 54: break;
case 24:
{ positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);
}
case 54: break;
case 55: break;
case 22:
{ positionInc = 1; yybegin(DOUBLE_EQUALS_STATE);
}
case 55: break;
case 56: break;
case 41:
{ positionInc = 1; currentTokType = CATEGORY; yybegin(CATEGORY_STATE);
}
case 56: break;
case 57: break;
case 18:
{ yybegin(STRING); return currentTokType;/* STRING ALPHANUM*/
}
case 57: break;
case 58: break;
case 21:
{ positionInc = 1; if (numBalanced == 0){numBalanced++;yybegin(TWO_SINGLE_QUOTES_STATE);} else{numBalanced = 0;}
}
case 58: break;
case 59: break;
case 1:
{ positionInc = 1;
}
case 59: break;
case 60: break;
case 43:
{ numBalanced = 0;currentTokType = CATEGORY;yybegin(CATEGORY_STATE);
}
case 60: break;
case 61: break;
case 25:
{ yybegin(YYINITIAL);
}
case 61: break;
case 62: break;
case 40:
{ positionInc = 1; yybegin(EXTERNAL_LINK_STATE); return currentTokType;
}
case 62: break;
case 63: break;
case 19:
{ numBalanced = 0;currentTokType = EXTERNAL_LINK;yybegin(EXTERNAL_LINK_STATE);
}
case 63: break;
case 64: break;
case 13:
{ yybegin(STRING);return currentTokType;
}
case 64: break;
case 65: break;
case 38:
{ positionInc = 1; return EMAIL;
}
case 65: break;
case 66: break;
case 37:
{ positionInc = 1; return ACRONYM;
}
case 66: break;
case 4:
{ positionInc = 1;currentTokType = EXTERNAL_LINK_URL; yybegin(EXTERNAL_LINK_STATE);
}
case 67: break;
case 17:
{ /* ignore STRING */
@ -926,21 +926,21 @@ final void getText(Token t, int tokType) {
{ currentTokType = HEADING; yybegin(DOUBLE_EQUALS_STATE); return currentTokType;
}
case 81: break;
case 6:
{ yybegin(INTERNAL_LINK_STATE); return currentTokType;
}
case 82: break;
case 2:
{ positionInc = 1; return ALPHANUM;
}
case 82: break;
case 83: break;
case 33:
{ positionInc = 1; return COMPANY;
}
case 83: break;
case 84: break;
case 10:
{ currentTokType = BOLD; yybegin(THREE_SINGLE_QUOTES_STATE);
}
case 84: break;
case 6:
{ if (numLinkToks == 1){positionInc = 0;} else{positionInc = 1;} yybegin(INTERNAL_LINK_STATE); numLinkToks++; return currentTokType;
}
case 85: break;
case 26:
{ numLinkToks = 0; yybegin(YYINITIAL);

View File

@ -193,7 +193,7 @@ DOUBLE_EQUALS = "="{2}
//tokens within the link are incremented
{DOUBLE_BRACKET} {positionInc = 1; currentTokType = INTERNAL_LINK; yybegin(INTERNAL_LINK_STATE);}
{DOUBLE_BRACKET_CAT} {positionInc = 1; currentTokType = CATEGORY; yybegin(CATEGORY_STATE);}
{EXTERNAL_LINK} {positionInc = 1;currentTokType = EXTERNAL_LINK_URL; yybegin(EXTERNAL_LINK_STATE);}
{EXTERNAL_LINK} {positionInc = 1; currentTokType = EXTERNAL_LINK_URL; yybegin(EXTERNAL_LINK_STATE);}
{TWO_SINGLE_QUOTES} {positionInc = 1; if (numBalanced == 0){numBalanced++;yybegin(TWO_SINGLE_QUOTES_STATE);} else{numBalanced = 0;}}
{DOUBLE_EQUALS} {positionInc = 1; yybegin(DOUBLE_EQUALS_STATE);}
{DOUBLE_BRACE} {positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);}
@ -205,7 +205,7 @@ DOUBLE_EQUALS = "="{2}
<INTERNAL_LINK_STATE>{
//First {ALPHANUM} is always the link, set position to 0 for these
//This is slightly different from EXTERNAL_LINK_STATE because that one has an explicit grammar for capturing the URL
{ALPHANUM} {if (numLinkToks == 1){positionInc = 0;} else{positionInc = 1;} yybegin(INTERNAL_LINK_STATE); numLinkToks++; return currentTokType;}
{ALPHANUM} {yybegin(INTERNAL_LINK_STATE); return currentTokType;}
{DOUBLE_BRACKET_CLOSE} {numLinkToks = 0; yybegin(YYINITIAL);}
//ignore
. | {WHITESPACE} { positionInc = 1; }

View File

@ -171,7 +171,7 @@ public class WikipediaTokenizerTest extends TestCase {
assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "here",
new String(token.termBuffer(), 0, token.termLength()).equals("here") == true);
//The link, and here should be at the same position for phrases to work
assertTrue(token.getPositionIncrement() + " does not equal: " + 0, token.getPositionIncrement() == 0);
assertTrue(token.getPositionIncrement() + " does not equal: " + 1, token.getPositionIncrement() == 1);
token = tf.next(token);
assertTrue("token is null and it shouldn't be", token != null);
assertTrue(new String(token.termBuffer(), 0, token.termLength()) + " is not equal to " + "again",