From ec2b654231e69fe1d0751a3fe39199465e08af7c Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Fri, 30 Sep 2011 16:23:24 +0000 Subject: [PATCH] LUCENE-3477: add explicit breaks in jflex sources so we don't hit compiler warnings; fix a couple other warnings git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1177723 13f79535-47bb-0310-9956-ffa450edef68 --- .../standard/ClassicTokenizerImpl.java | 16 +- .../standard/ClassicTokenizerImpl.jflex | 2 +- .../standard/SUPPLEMENTARY.jflex-macro | 2 +- .../standard/StandardTokenizerImpl.java | 14 +- .../standard/StandardTokenizerImpl.jflex | 2 +- .../standard/UAX29URLEmailTokenizerImpl.java | 16 +- .../standard/UAX29URLEmailTokenizerImpl.jflex | 2 +- .../std31/StandardTokenizerImpl31.java | 18 +- .../std31/StandardTokenizerImpl31.jflex | 2 +- .../std31/UAX29URLEmailTokenizerImpl31.java | 20 +- .../std31/UAX29URLEmailTokenizerImpl31.jflex | 2 +- .../wikipedia/WikipediaTokenizerImpl.java | 497 +++++++++--------- .../wikipedia/WikipediaTokenizerImpl.jflex | 86 +-- .../query/QueryAutoStopWordAnalyzerTest.java | 2 +- .../byTask/tasks/WriteLineDocTaskTest.java | 1 + 15 files changed, 344 insertions(+), 338 deletions(-) diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java index 1b9bf9f2674..32e990dd079 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.java @@ -1,4 +1,4 @@ -/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 2/9/11 11:45 AM */ +/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 9/30/11 12:10 PM */ package org.apache.lucene.analysis.standard; @@ -33,8 +33,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * This class is a scanner generated by * JFlex 1.5.0-SNAPSHOT - * on 2/9/11 11:45 AM from the specification file - * C:/Users/rmuir/workspace/lucene-2911/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex + * on 9/30/11 12:10 PM from the specification file + * /lucene/jflex/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex */ class ClassicTokenizerImpl implements StandardTokenizerInterface { @@ -694,17 +694,17 @@ public final void getText(CharTermAttribute t) { { return HOST; } case 13: break; - case 1: - { /* ignore */ - } - case 14: break; case 8: { return ACRONYM_DEP; } - case 15: break; + case 14: break; case 5: { return NUM; } + case 15: break; + case 1: + { /* Break so we don't hit fall-through warning: */ break;/* ignore */ + } case 16: break; case 9: { return ACRONYM; diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex index ce2bf056369..bb50a6ff46a 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ClassicTokenizerImpl.jflex @@ -127,4 +127,4 @@ WHITESPACE = \r\n | [ \r\n\t\f] {ACRONYM_DEP} { return ACRONYM_DEP; } /** Ignore the rest */ -. | {WHITESPACE} { /* ignore */ } +. | {WHITESPACE} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ } diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro index c505bf46c15..53d36f7d6f7 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro @@ -14,7 +14,7 @@ * limitations under the License. */ -// Generated using ICU4J 4.6.0.0 on Wednesday, February 9, 2011 4:45:11 PM UTC +// Generated using ICU4J 4.8.0.0 on Friday, September 30, 2011 4:10:42 PM UTC // by org.apache.lucene.analysis.icu.GenerateJFlexSupplementaryMacros diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java index 2c2d46ac2b5..cf9593944ae 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.java @@ -1,4 +1,4 @@ -/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/4/11 4:07 PM */ +/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 9/30/11 12:10 PM */ package org.apache.lucene.analysis.standard; @@ -1074,21 +1074,21 @@ public final class StandardTokenizerImpl implements StandardTokenizerInterface { { return IDEOGRAPHIC_TYPE; } case 12: break; - case 1: - { /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ - } - case 13: break; case 8: { return HANGUL_TYPE; } - case 14: break; + case 13: break; case 3: { return NUMERIC_TYPE; } - case 15: break; + case 14: break; case 7: { return HIRAGANA_TYPE; } + case 15: break; + case 1: + { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ + } case 16: break; default: if (zzInput == YYEOF && zzStartRead == zzCurrentPos) { diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex index e624074b2f8..8238851fc86 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl.jflex @@ -189,4 +189,4 @@ HiraganaEx = {Hiragana} ({Format} | {Extend})* // WB3b. ÷ (Newline | CR | LF) // WB14. Any ÷ Any // -[^] { /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ } +[^] { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ } diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java index 9e29febeb5a..82597dd0e3e 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.java @@ -1,4 +1,4 @@ -/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/4/11 7:48 PM */ +/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 9/30/11 12:10 PM */ package org.apache.lucene.analysis.standard; @@ -3712,25 +3712,25 @@ public final class UAX29URLEmailTokenizerImpl implements StandardTokenizerInterf { return SOUTH_EAST_ASIAN_TYPE; } case 12: break; + case 1: + { /* Break so we don't hit fall-through warning: */ break;/* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ + } + case 13: break; case 10: { return URL_TYPE; } - case 13: break; + case 14: break; case 9: { return EMAIL_TYPE; } - case 14: break; + case 15: break; case 4: { return KATAKANA_TYPE; } - case 15: break; + case 16: break; case 6: { return IDEOGRAPHIC_TYPE; } - case 16: break; - case 1: - { /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ - } case 17: break; case 8: { return HANGUL_TYPE; diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex index 3051c1755b6..707293faacf 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/UAX29URLEmailTokenizerImpl.jflex @@ -277,4 +277,4 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost}) // WB3b. ÷ (Newline | CR | LF) // WB14. Any ÷ Any // -[^] { /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ } +[^] { /* Break so we don't hit fall-through warning: */ break;/* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ } diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.java index 3a9eadee383..ab2b9c2955a 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.java @@ -1,4 +1,4 @@ -/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/4/11 4:07 PM */ +/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 9/30/11 12:10 PM */ package org.apache.lucene.analysis.standard.std31; @@ -34,8 +34,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * This class is a scanner generated by * JFlex 1.5.0-SNAPSHOT - * on 8/4/11 4:07 PM from the specification file - * /home/rmuir/workspace/lucene-clean-trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.jflex + * on 9/30/11 12:10 PM from the specification file + * /lucene/jflex/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.jflex */ public final class StandardTokenizerImpl31 implements StandardTokenizerInterface { @@ -1055,21 +1055,21 @@ public final class StandardTokenizerImpl31 implements StandardTokenizerInterface { return IDEOGRAPHIC_TYPE; } case 12: break; - case 1: - { /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ - } - case 13: break; case 8: { return HANGUL_TYPE; } - case 14: break; + case 13: break; case 3: { return NUMERIC_TYPE; } - case 15: break; + case 14: break; case 7: { return HIRAGANA_TYPE; } + case 15: break; + case 1: + { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ + } case 16: break; default: if (zzInput == YYEOF && zzStartRead == zzCurrentPos) { diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.jflex b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.jflex index a60fb10d915..c4b5dc9fb04 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.jflex +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/StandardTokenizerImpl31.jflex @@ -181,4 +181,4 @@ ExtendNumLetEx = {ExtendNumLet} ({Format} | {Extend})* // WB3b. ÷ (Newline | CR | LF) // WB14. Any ÷ Any // -[^] { /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ } +[^] { /* Break so we don't hit fall-through warning: */ break; /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ } diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/UAX29URLEmailTokenizerImpl31.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/UAX29URLEmailTokenizerImpl31.java index 41de96ea8d1..592f0911040 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/UAX29URLEmailTokenizerImpl31.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/UAX29URLEmailTokenizerImpl31.java @@ -1,4 +1,4 @@ -/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 8/4/11 7:33 PM */ +/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 9/30/11 12:10 PM */ package org.apache.lucene.analysis.standard.std31; @@ -34,8 +34,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * This class is a scanner generated by * JFlex 1.5.0-SNAPSHOT - * on 8/4/11 7:33 PM from the specification file - * /home/rmuir/workspace/lucene-clean-trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/UAX29URLEmailTokenizerImpl31.jflex + * on 9/30/11 12:10 PM from the specification file + * /lucene/jflex/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/UAX29URLEmailTokenizerImpl31.jflex */ public final class UAX29URLEmailTokenizerImpl31 implements StandardTokenizerInterface { @@ -3622,25 +3622,25 @@ public final class UAX29URLEmailTokenizerImpl31 implements StandardTokenizerInte { return SOUTH_EAST_ASIAN_TYPE; } case 12: break; + case 1: + { /* Break so we don't hit fall-through warning: */ break;/* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ + } + case 13: break; case 10: { return URL_TYPE; } - case 13: break; + case 14: break; case 9: { return EMAIL_TYPE; } - case 14: break; + case 15: break; case 4: { return KATAKANA_TYPE; } - case 15: break; + case 16: break; case 6: { return IDEOGRAPHIC_TYPE; } - case 16: break; - case 1: - { /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ - } case 17: break; case 8: { return HANGUL_TYPE; diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/UAX29URLEmailTokenizerImpl31.jflex b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/UAX29URLEmailTokenizerImpl31.jflex index e406831b191..bb949a6d77e 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/UAX29URLEmailTokenizerImpl31.jflex +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/standard/std31/UAX29URLEmailTokenizerImpl31.jflex @@ -266,4 +266,4 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost}) // WB3b. ÷ (Newline | CR | LF) // WB14. Any ÷ Any // -[^] { /* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ } +[^] { /* Break so we don't hit fall-through warning: */ break;/* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ } diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java b/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java index b9254c1d44c..ed7389b1888 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.java @@ -1,4 +1,4 @@ -/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 2/9/11 11:45 AM */ +/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 9/30/11 12:11 PM */ package org.apache.lucene.analysis.wikipedia; @@ -25,8 +25,8 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; /** * This class is a scanner generated by * JFlex 1.5.0-SNAPSHOT - * on 2/9/11 11:45 AM from the specification file - * C:/Users/rmuir/workspace/lucene-2911/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex + * on 9/30/11 12:11 PM from the specification file + * /lucene/jflex/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex */ class WikipediaTokenizerImpl { @@ -92,15 +92,16 @@ class WikipediaTokenizerImpl { "\1\13\1\14\1\10\1\15\1\16\1\15\1\17\1\20"+ "\1\10\1\21\1\10\4\22\1\23\1\22\1\24\1\25"+ "\1\26\3\0\1\27\14\0\1\30\1\31\1\32\1\33"+ - "\1\11\1\0\1\34\1\35\1\0\1\36\1\0\1\37"+ - "\3\0\1\40\1\41\2\42\1\41\2\43\2\0\1\42"+ - "\1\0\14\42\1\41\3\0\1\11\1\44\3\0\1\45"+ - "\1\46\5\0\1\47\4\0\1\47\2\0\2\47\2\0"+ - "\1\11\5\0\1\31\1\41\1\42\1\50\3\0\1\11"+ - "\2\0\1\51\30\0\1\52\2\0\1\53\1\54\1\55"; + "\1\11\1\0\1\34\1\35\1\36\1\0\1\37\1\0"+ + "\1\40\3\0\1\41\1\42\2\43\1\42\2\44\2\0"+ + "\1\43\1\0\14\43\1\42\3\0\1\11\1\45\3\0"+ + "\1\46\1\47\5\0\1\50\4\0\1\50\2\0\2\50"+ + "\2\0\1\11\5\0\1\31\1\42\1\43\1\51\3\0"+ + "\1\11\2\0\1\52\30\0\1\53\2\0\1\54\1\55"+ + "\1\56"; private static int [] zzUnpackAction() { - int [] result = new int[183]; + int [] result = new int[184]; int offset = 0; offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result); return result; @@ -134,23 +135,23 @@ class WikipediaTokenizerImpl { "\0\u070c\0\u0738\0\u0764\0\u0790\0\u01b8\0\u01b8\0\u07bc\0\u07e8"+ "\0\u0814\0\u01b8\0\u0840\0\u086c\0\u0898\0\u08c4\0\u08f0\0\u091c"+ "\0\u0948\0\u0974\0\u09a0\0\u09cc\0\u09f8\0\u0a24\0\u0a50\0\u0a7c"+ - "\0\u01b8\0\u01b8\0\u0aa8\0\u0ad4\0\u0b00\0\u0b00\0\u0b2c\0\u0b58"+ - "\0\u0b84\0\u0bb0\0\u0bdc\0\u0c08\0\u0c34\0\u0c60\0\u0c8c\0\u0cb8"+ - "\0\u0ce4\0\u0d10\0\u0898\0\u0d3c\0\u0d68\0\u0d94\0\u0dc0\0\u0dec"+ - "\0\u0e18\0\u0e44\0\u0e70\0\u0e9c\0\u0ec8\0\u0ef4\0\u0f20\0\u0f4c"+ - "\0\u0f78\0\u0fa4\0\u0fd0\0\u0ffc\0\u1028\0\u1054\0\u1080\0\u10ac"+ - "\0\u10d8\0\u01b8\0\u1104\0\u1130\0\u115c\0\u1188\0\u01b8\0\u11b4"+ - "\0\u11e0\0\u120c\0\u1238\0\u1264\0\u1290\0\u12bc\0\u12e8\0\u1314"+ - "\0\u1340\0\u136c\0\u1398\0\u13c4\0\u086c\0\u09f8\0\u13f0\0\u141c"+ - "\0\u1448\0\u1474\0\u14a0\0\u14cc\0\u14f8\0\u1524\0\u01b8\0\u1550"+ - "\0\u157c\0\u15a8\0\u15d4\0\u1600\0\u162c\0\u1658\0\u1684\0\u16b0"+ - "\0\u01b8\0\u16dc\0\u1708\0\u1734\0\u1760\0\u178c\0\u17b8\0\u17e4"+ - "\0\u1810\0\u183c\0\u1868\0\u1894\0\u18c0\0\u18ec\0\u1918\0\u1944"+ - "\0\u1970\0\u199c\0\u19c8\0\u19f4\0\u1a20\0\u1a4c\0\u1a78\0\u1aa4"+ - "\0\u1ad0\0\u1afc\0\u1b28\0\u1b54\0\u01b8\0\u01b8\0\u01b8"; + "\0\u01b8\0\u01b8\0\u0aa8\0\u0ad4\0\u0b00\0\u0b00\0\u01b8\0\u0b2c"+ + "\0\u0b58\0\u0b84\0\u0bb0\0\u0bdc\0\u0c08\0\u0c34\0\u0c60\0\u0c8c"+ + "\0\u0cb8\0\u0ce4\0\u0d10\0\u0898\0\u0d3c\0\u0d68\0\u0d94\0\u0dc0"+ + "\0\u0dec\0\u0e18\0\u0e44\0\u0e70\0\u0e9c\0\u0ec8\0\u0ef4\0\u0f20"+ + "\0\u0f4c\0\u0f78\0\u0fa4\0\u0fd0\0\u0ffc\0\u1028\0\u1054\0\u1080"+ + "\0\u10ac\0\u10d8\0\u01b8\0\u1104\0\u1130\0\u115c\0\u1188\0\u01b8"+ + "\0\u11b4\0\u11e0\0\u120c\0\u1238\0\u1264\0\u1290\0\u12bc\0\u12e8"+ + "\0\u1314\0\u1340\0\u136c\0\u1398\0\u13c4\0\u086c\0\u09f8\0\u13f0"+ + "\0\u141c\0\u1448\0\u1474\0\u14a0\0\u14cc\0\u14f8\0\u1524\0\u01b8"+ + "\0\u1550\0\u157c\0\u15a8\0\u15d4\0\u1600\0\u162c\0\u1658\0\u1684"+ + "\0\u16b0\0\u01b8\0\u16dc\0\u1708\0\u1734\0\u1760\0\u178c\0\u17b8"+ + "\0\u17e4\0\u1810\0\u183c\0\u1868\0\u1894\0\u18c0\0\u18ec\0\u1918"+ + "\0\u1944\0\u1970\0\u199c\0\u19c8\0\u19f4\0\u1a20\0\u1a4c\0\u1a78"+ + "\0\u1aa4\0\u1ad0\0\u1afc\0\u1b28\0\u1b54\0\u01b8\0\u01b8\0\u01b8"; private static int [] zzUnpackRowMap() { - int [] result = new int[183]; + int [] result = new int[184]; int offset = 0; offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result); return result; @@ -208,115 +209,115 @@ class WikipediaTokenizerImpl { "\3\0\3\40\16\0\4\40\7\0\2\40\1\113\12\40"+ "\3\0\3\40\2\0\1\114\67\0\4\44\7\0\15\44"+ "\3\0\3\44\24\0\1\36\55\0\1\115\43\0\4\47"+ - "\7\0\15\47\3\0\3\47\26\0\1\116\37\0\1\111"+ - "\57\0\4\52\7\0\15\52\3\0\3\52\11\0\1\117"+ + "\7\0\15\47\3\0\3\47\26\0\1\116\37\0\1\117"+ + "\57\0\4\52\7\0\15\52\3\0\3\52\11\0\1\120"+ "\4\0\4\70\7\0\15\70\3\0\3\70\16\0\4\54"+ - "\7\0\15\54\3\0\3\54\47\0\1\111\6\0\1\120"+ - "\63\0\1\121\57\0\4\62\7\0\15\62\3\0\3\62"+ - "\24\0\1\56\55\0\1\122\43\0\4\70\7\0\15\70"+ - "\3\0\3\70\14\0\1\36\1\0\4\123\1\0\3\124"+ - "\3\0\15\123\3\0\3\123\14\0\1\36\1\0\4\123"+ - "\1\0\3\124\3\0\3\123\1\125\11\123\3\0\3\123"+ - "\16\0\1\126\1\0\1\126\10\0\15\126\3\0\3\126"+ - "\16\0\1\127\1\130\1\131\1\132\7\0\15\127\3\0"+ - "\3\127\16\0\1\133\1\0\1\133\10\0\15\133\3\0"+ - "\3\133\16\0\1\134\1\135\1\134\1\135\7\0\15\134"+ - "\3\0\3\134\16\0\1\136\2\137\1\140\7\0\15\136"+ - "\3\0\3\136\16\0\1\100\2\141\10\0\15\100\3\0"+ - "\3\100\16\0\1\142\2\143\1\144\7\0\15\142\3\0"+ - "\3\142\16\0\4\135\7\0\15\135\3\0\3\135\16\0"+ - "\1\145\2\146\1\147\7\0\15\145\3\0\3\145\16\0"+ - "\1\150\2\151\1\152\7\0\15\150\3\0\3\150\16\0"+ - "\1\153\1\143\1\154\1\144\7\0\15\153\3\0\3\153"+ - "\16\0\1\155\2\130\1\132\7\0\15\155\3\0\3\155"+ - "\30\0\1\156\1\157\64\0\1\160\27\0\4\40\7\0"+ - "\2\40\1\161\12\40\3\0\3\40\2\0\1\162\101\0"+ - "\1\163\1\164\40\0\4\70\7\0\6\70\1\165\6\70"+ - "\3\0\3\70\2\0\1\166\63\0\1\167\71\0\1\170"+ - "\1\171\34\0\1\172\1\0\1\36\1\0\4\123\1\0"+ - "\3\124\3\0\15\123\3\0\3\123\16\0\4\173\1\0"+ - "\3\124\3\0\15\173\3\0\3\173\12\0\1\172\1\0"+ - "\1\36\1\0\4\123\1\0\3\124\3\0\10\123\1\174"+ - "\4\123\3\0\3\123\2\0\1\73\13\0\1\126\1\0"+ - "\1\126\10\0\15\126\3\0\3\126\3\0\1\175\1\0"+ - "\1\102\2\176\6\0\1\127\1\130\1\131\1\132\7\0"+ - "\15\127\3\0\3\127\3\0\1\177\1\0\1\102\2\200"+ - "\1\0\1\201\3\0\1\201\3\130\1\132\7\0\15\130"+ - "\3\0\3\130\3\0\1\202\1\0\1\102\2\200\1\0"+ - "\1\201\3\0\1\201\1\131\1\130\1\131\1\132\7\0"+ - "\15\131\3\0\3\131\3\0\1\203\1\0\1\102\2\176"+ - "\6\0\4\132\7\0\15\132\3\0\3\132\3\0\1\204"+ - "\2\0\1\204\7\0\1\134\1\135\1\134\1\135\7\0"+ - "\15\134\3\0\3\134\3\0\1\204\2\0\1\204\7\0"+ - "\4\135\7\0\15\135\3\0\3\135\3\0\1\176\1\0"+ - "\1\102\2\176\6\0\1\136\2\137\1\140\7\0\15\136"+ - "\3\0\3\136\3\0\1\200\1\0\1\102\2\200\1\0"+ - "\1\201\3\0\1\201\3\137\1\140\7\0\15\137\3\0"+ - "\3\137\3\0\1\176\1\0\1\102\2\176\6\0\4\140"+ - "\7\0\15\140\3\0\3\140\3\0\1\201\2\0\2\201"+ - "\1\0\1\201\3\0\1\201\3\141\10\0\15\141\3\0"+ - "\3\141\3\0\1\106\1\0\1\102\2\77\1\0\1\100"+ - "\3\0\1\100\1\142\2\143\1\144\7\0\15\142\3\0"+ - "\3\142\3\0\1\101\1\0\1\102\2\103\1\0\1\104"+ - "\3\0\1\104\3\143\1\144\7\0\15\143\3\0\3\143"+ + "\7\0\15\54\3\0\3\54\47\0\1\117\6\0\1\121"+ + "\63\0\1\122\57\0\4\62\7\0\15\62\3\0\3\62"+ + "\24\0\1\56\55\0\1\123\43\0\4\70\7\0\15\70"+ + "\3\0\3\70\14\0\1\36\1\0\4\124\1\0\3\125"+ + "\3\0\15\124\3\0\3\124\14\0\1\36\1\0\4\124"+ + "\1\0\3\125\3\0\3\124\1\126\11\124\3\0\3\124"+ + "\16\0\1\127\1\0\1\127\10\0\15\127\3\0\3\127"+ + "\16\0\1\130\1\131\1\132\1\133\7\0\15\130\3\0"+ + "\3\130\16\0\1\134\1\0\1\134\10\0\15\134\3\0"+ + "\3\134\16\0\1\135\1\136\1\135\1\136\7\0\15\135"+ + "\3\0\3\135\16\0\1\137\2\140\1\141\7\0\15\137"+ + "\3\0\3\137\16\0\1\100\2\142\10\0\15\100\3\0"+ + "\3\100\16\0\1\143\2\144\1\145\7\0\15\143\3\0"+ + "\3\143\16\0\4\136\7\0\15\136\3\0\3\136\16\0"+ + "\1\146\2\147\1\150\7\0\15\146\3\0\3\146\16\0"+ + "\1\151\2\152\1\153\7\0\15\151\3\0\3\151\16\0"+ + "\1\154\1\144\1\155\1\145\7\0\15\154\3\0\3\154"+ + "\16\0\1\156\2\131\1\133\7\0\15\156\3\0\3\156"+ + "\30\0\1\157\1\160\64\0\1\161\27\0\4\40\7\0"+ + "\2\40\1\162\12\40\3\0\3\40\2\0\1\163\101\0"+ + "\1\164\1\165\40\0\4\70\7\0\6\70\1\166\6\70"+ + "\3\0\3\70\2\0\1\167\63\0\1\170\71\0\1\171"+ + "\1\172\34\0\1\173\1\0\1\36\1\0\4\124\1\0"+ + "\3\125\3\0\15\124\3\0\3\124\16\0\4\174\1\0"+ + "\3\125\3\0\15\174\3\0\3\174\12\0\1\173\1\0"+ + "\1\36\1\0\4\124\1\0\3\125\3\0\10\124\1\175"+ + "\4\124\3\0\3\124\2\0\1\73\13\0\1\127\1\0"+ + "\1\127\10\0\15\127\3\0\3\127\3\0\1\176\1\0"+ + "\1\102\2\177\6\0\1\130\1\131\1\132\1\133\7\0"+ + "\15\130\3\0\3\130\3\0\1\200\1\0\1\102\2\201"+ + "\1\0\1\202\3\0\1\202\3\131\1\133\7\0\15\131"+ + "\3\0\3\131\3\0\1\203\1\0\1\102\2\201\1\0"+ + "\1\202\3\0\1\202\1\132\1\131\1\132\1\133\7\0"+ + "\15\132\3\0\3\132\3\0\1\204\1\0\1\102\2\177"+ + "\6\0\4\133\7\0\15\133\3\0\3\133\3\0\1\205"+ + "\2\0\1\205\7\0\1\135\1\136\1\135\1\136\7\0"+ + "\15\135\3\0\3\135\3\0\1\205\2\0\1\205\7\0"+ + "\4\136\7\0\15\136\3\0\3\136\3\0\1\177\1\0"+ + "\1\102\2\177\6\0\1\137\2\140\1\141\7\0\15\137"+ + "\3\0\3\137\3\0\1\201\1\0\1\102\2\201\1\0"+ + "\1\202\3\0\1\202\3\140\1\141\7\0\15\140\3\0"+ + "\3\140\3\0\1\177\1\0\1\102\2\177\6\0\4\141"+ + "\7\0\15\141\3\0\3\141\3\0\1\202\2\0\2\202"+ + "\1\0\1\202\3\0\1\202\3\142\10\0\15\142\3\0"+ + "\3\142\3\0\1\106\1\0\1\102\2\77\1\0\1\100"+ + "\3\0\1\100\1\143\2\144\1\145\7\0\15\143\3\0"+ + "\3\143\3\0\1\101\1\0\1\102\2\103\1\0\1\104"+ + "\3\0\1\104\3\144\1\145\7\0\15\144\3\0\3\144"+ "\3\0\1\106\1\0\1\102\2\77\1\0\1\100\3\0"+ - "\1\100\4\144\7\0\15\144\3\0\3\144\3\0\1\77"+ - "\1\0\1\102\2\77\1\0\1\100\3\0\1\100\1\145"+ - "\2\146\1\147\7\0\15\145\3\0\3\145\3\0\1\103"+ - "\1\0\1\102\2\103\1\0\1\104\3\0\1\104\3\146"+ - "\1\147\7\0\15\146\3\0\3\146\3\0\1\77\1\0"+ - "\1\102\2\77\1\0\1\100\3\0\1\100\4\147\7\0"+ - "\15\147\3\0\3\147\3\0\1\100\2\0\2\100\1\0"+ - "\1\100\3\0\1\100\1\150\2\151\1\152\7\0\15\150"+ - "\3\0\3\150\3\0\1\104\2\0\2\104\1\0\1\104"+ - "\3\0\1\104\3\151\1\152\7\0\15\151\3\0\3\151"+ + "\1\100\4\145\7\0\15\145\3\0\3\145\3\0\1\77"+ + "\1\0\1\102\2\77\1\0\1\100\3\0\1\100\1\146"+ + "\2\147\1\150\7\0\15\146\3\0\3\146\3\0\1\103"+ + "\1\0\1\102\2\103\1\0\1\104\3\0\1\104\3\147"+ + "\1\150\7\0\15\147\3\0\3\147\3\0\1\77\1\0"+ + "\1\102\2\77\1\0\1\100\3\0\1\100\4\150\7\0"+ + "\15\150\3\0\3\150\3\0\1\100\2\0\2\100\1\0"+ + "\1\100\3\0\1\100\1\151\2\152\1\153\7\0\15\151"+ + "\3\0\3\151\3\0\1\104\2\0\2\104\1\0\1\104"+ + "\3\0\1\104\3\152\1\153\7\0\15\152\3\0\3\152"+ "\3\0\1\100\2\0\2\100\1\0\1\100\3\0\1\100"+ - "\4\152\7\0\15\152\3\0\3\152\3\0\1\205\1\0"+ - "\1\102\2\77\1\0\1\100\3\0\1\100\1\153\1\143"+ - "\1\154\1\144\7\0\15\153\3\0\3\153\3\0\1\206"+ - "\1\0\1\102\2\103\1\0\1\104\3\0\1\104\1\154"+ - "\1\143\1\154\1\144\7\0\15\154\3\0\3\154\3\0"+ - "\1\203\1\0\1\102\2\176\6\0\1\155\2\130\1\132"+ - "\7\0\15\155\3\0\3\155\31\0\1\157\54\0\1\207"+ - "\64\0\1\210\26\0\4\40\7\0\15\40\3\0\1\40"+ - "\1\211\1\40\31\0\1\164\54\0\1\212\35\0\1\36"+ - "\1\0\4\123\1\0\3\124\3\0\3\123\1\213\11\123"+ - "\3\0\3\123\2\0\1\214\102\0\1\171\54\0\1\215"+ - "\34\0\1\216\52\0\1\172\3\0\4\173\7\0\15\173"+ - "\3\0\3\173\12\0\1\172\1\0\1\217\1\0\4\123"+ - "\1\0\3\124\3\0\15\123\3\0\3\123\16\0\1\220"+ - "\1\132\1\220\1\132\7\0\15\220\3\0\3\220\16\0"+ - "\4\140\7\0\15\140\3\0\3\140\16\0\4\144\7\0"+ - "\15\144\3\0\3\144\16\0\4\147\7\0\15\147\3\0"+ - "\3\147\16\0\4\152\7\0\15\152\3\0\3\152\16\0"+ - "\1\221\1\144\1\221\1\144\7\0\15\221\3\0\3\221"+ - "\16\0\4\132\7\0\15\132\3\0\3\132\16\0\4\222"+ - "\7\0\15\222\3\0\3\222\33\0\1\223\61\0\1\224"+ - "\30\0\4\40\6\0\1\225\15\40\3\0\2\40\1\226"+ - "\33\0\1\227\32\0\1\172\1\0\1\36\1\0\4\123"+ - "\1\0\3\124\3\0\10\123\1\230\4\123\3\0\3\123"+ - "\2\0\1\231\104\0\1\232\36\0\4\233\7\0\15\233"+ - "\3\0\3\233\3\0\1\175\1\0\1\102\2\176\6\0"+ - "\1\220\1\132\1\220\1\132\7\0\15\220\3\0\3\220"+ - "\3\0\1\205\1\0\1\102\2\77\1\0\1\100\3\0"+ - "\1\100\1\221\1\144\1\221\1\144\7\0\15\221\3\0"+ - "\3\221\3\0\1\204\2\0\1\204\7\0\4\222\7\0"+ - "\15\222\3\0\3\222\34\0\1\234\55\0\1\235\26\0"+ - "\1\236\60\0\4\40\6\0\1\225\15\40\3\0\3\40"+ - "\34\0\1\237\31\0\1\172\1\0\1\111\1\0\4\123"+ - "\1\0\3\124\3\0\15\123\3\0\3\123\34\0\1\240"+ - "\32\0\1\241\2\0\4\233\7\0\15\233\3\0\3\233"+ - "\35\0\1\242\62\0\1\243\20\0\1\244\77\0\1\245"+ - "\53\0\1\246\32\0\1\36\1\0\4\173\1\0\3\124"+ - "\3\0\15\173\3\0\3\173\36\0\1\247\53\0\1\250"+ - "\33\0\4\251\7\0\15\251\3\0\3\251\36\0\1\252"+ - "\53\0\1\253\54\0\1\254\61\0\1\255\11\0\1\256"+ - "\12\0\4\251\7\0\15\251\3\0\3\251\37\0\1\257"+ - "\53\0\1\260\54\0\1\261\22\0\1\13\62\0\4\262"+ - "\7\0\15\262\3\0\3\262\40\0\1\263\53\0\1\264"+ - "\43\0\1\265\26\0\2\262\1\0\2\262\1\0\2\262"+ - "\2\0\5\262\7\0\15\262\3\0\4\262\27\0\1\266"+ - "\53\0\1\267\24\0"; + "\4\153\7\0\15\153\3\0\3\153\3\0\1\206\1\0"+ + "\1\102\2\77\1\0\1\100\3\0\1\100\1\154\1\144"+ + "\1\155\1\145\7\0\15\154\3\0\3\154\3\0\1\207"+ + "\1\0\1\102\2\103\1\0\1\104\3\0\1\104\1\155"+ + "\1\144\1\155\1\145\7\0\15\155\3\0\3\155\3\0"+ + "\1\204\1\0\1\102\2\177\6\0\1\156\2\131\1\133"+ + "\7\0\15\156\3\0\3\156\31\0\1\160\54\0\1\210"+ + "\64\0\1\211\26\0\4\40\7\0\15\40\3\0\1\40"+ + "\1\212\1\40\31\0\1\165\54\0\1\213\35\0\1\36"+ + "\1\0\4\124\1\0\3\125\3\0\3\124\1\214\11\124"+ + "\3\0\3\124\2\0\1\215\102\0\1\172\54\0\1\216"+ + "\34\0\1\217\52\0\1\173\3\0\4\174\7\0\15\174"+ + "\3\0\3\174\12\0\1\173\1\0\1\220\1\0\4\124"+ + "\1\0\3\125\3\0\15\124\3\0\3\124\16\0\1\221"+ + "\1\133\1\221\1\133\7\0\15\221\3\0\3\221\16\0"+ + "\4\141\7\0\15\141\3\0\3\141\16\0\4\145\7\0"+ + "\15\145\3\0\3\145\16\0\4\150\7\0\15\150\3\0"+ + "\3\150\16\0\4\153\7\0\15\153\3\0\3\153\16\0"+ + "\1\222\1\145\1\222\1\145\7\0\15\222\3\0\3\222"+ + "\16\0\4\133\7\0\15\133\3\0\3\133\16\0\4\223"+ + "\7\0\15\223\3\0\3\223\33\0\1\224\61\0\1\225"+ + "\30\0\4\40\6\0\1\226\15\40\3\0\2\40\1\227"+ + "\33\0\1\230\32\0\1\173\1\0\1\36\1\0\4\124"+ + "\1\0\3\125\3\0\10\124\1\231\4\124\3\0\3\124"+ + "\2\0\1\232\104\0\1\233\36\0\4\234\7\0\15\234"+ + "\3\0\3\234\3\0\1\176\1\0\1\102\2\177\6\0"+ + "\1\221\1\133\1\221\1\133\7\0\15\221\3\0\3\221"+ + "\3\0\1\206\1\0\1\102\2\77\1\0\1\100\3\0"+ + "\1\100\1\222\1\145\1\222\1\145\7\0\15\222\3\0"+ + "\3\222\3\0\1\205\2\0\1\205\7\0\4\223\7\0"+ + "\15\223\3\0\3\223\34\0\1\235\55\0\1\236\26\0"+ + "\1\237\60\0\4\40\6\0\1\226\15\40\3\0\3\40"+ + "\34\0\1\240\31\0\1\173\1\0\1\117\1\0\4\124"+ + "\1\0\3\125\3\0\15\124\3\0\3\124\34\0\1\241"+ + "\32\0\1\242\2\0\4\234\7\0\15\234\3\0\3\234"+ + "\35\0\1\243\62\0\1\244\20\0\1\245\77\0\1\246"+ + "\53\0\1\247\32\0\1\36\1\0\4\174\1\0\3\125"+ + "\3\0\15\174\3\0\3\174\36\0\1\250\53\0\1\251"+ + "\33\0\4\252\7\0\15\252\3\0\3\252\36\0\1\253"+ + "\53\0\1\254\54\0\1\255\61\0\1\256\11\0\1\257"+ + "\12\0\4\252\7\0\15\252\3\0\3\252\37\0\1\260"+ + "\53\0\1\261\54\0\1\262\22\0\1\13\62\0\4\263"+ + "\7\0\15\263\3\0\3\263\40\0\1\264\53\0\1\265"+ + "\43\0\1\266\26\0\2\263\1\0\2\263\1\0\2\263"+ + "\2\0\5\263\7\0\15\263\3\0\4\263\27\0\1\267"+ + "\53\0\1\270\24\0"; private static int [] zzUnpackTrans() { int [] result = new int[7040]; @@ -359,15 +360,15 @@ class WikipediaTokenizerImpl { private static final String ZZ_ATTRIBUTE_PACKED_0 = "\12\0\1\11\7\1\1\11\3\1\1\11\6\1\1\11"+ "\2\1\1\11\14\1\1\11\6\1\2\11\3\0\1\11"+ - "\14\0\2\1\2\11\1\1\1\0\2\1\1\0\1\1"+ - "\1\0\1\1\3\0\7\1\2\0\1\1\1\0\15\1"+ - "\3\0\1\1\1\11\3\0\1\1\1\11\5\0\1\1"+ - "\4\0\1\1\2\0\2\1\2\0\1\1\5\0\1\11"+ - "\3\1\3\0\1\1\2\0\1\11\30\0\1\1\2\0"+ - "\3\11"; + "\14\0\2\1\2\11\1\1\1\0\2\1\1\11\1\0"+ + "\1\1\1\0\1\1\3\0\7\1\2\0\1\1\1\0"+ + "\15\1\3\0\1\1\1\11\3\0\1\1\1\11\5\0"+ + "\1\1\4\0\1\1\2\0\2\1\2\0\1\1\5\0"+ + "\1\11\3\1\3\0\1\1\2\0\1\11\30\0\1\1"+ + "\2\0\3\11"; private static int [] zzUnpackAttribute() { - int [] result = new int[183]; + int [] result = new int[184]; int offset = 0; offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result); return result; @@ -809,186 +810,190 @@ final int setText(StringBuilder buffer){ zzMarkedPos = zzMarkedPosL; switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) { + case 44: + { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CATEGORY; yybegin(CATEGORY_STATE);/* Break so we don't hit fall-through warning: */ break; + } + case 47: break; + case 37: + { currentTokType = BOLD_ITALICS; yybegin(FIVE_SINGLE_QUOTES_STATE); /* Break so we don't hit fall-through warning: */ break; + } + case 48: break; case 16: { currentTokType = HEADING; yybegin(DOUBLE_EQUALS_STATE); numWikiTokensSeen++; return currentTokType; } - case 46: break; - case 39: - { positionInc = 1; return ACRONYM; - } - case 47: break; - case 8: - { /* ignore */ - } - case 48: break; - case 20: - { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = EXTERNAL_LINK;yybegin(EXTERNAL_LINK_STATE); - } case 49: break; - case 35: - { positionInc = 1; return COMPANY; + case 20: + { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = EXTERNAL_LINK;yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break; } case 50: break; - case 4: - { numWikiTokensSeen = 0; positionInc = 1; currentTokType = EXTERNAL_LINK_URL; yybegin(EXTERNAL_LINK_STATE); + case 40: + { positionInc = 1; return ACRONYM; } case 51: break; - case 25: - { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE); + case 5: + { positionInc = 1; /* Break so we don't hit fall-through warning: */ break; } case 52: break; - case 43: - { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CATEGORY; yybegin(CATEGORY_STATE); + case 36: + { positionInc = 1; return COMPANY; } case 53: break; - case 22: - { numWikiTokensSeen = 0; positionInc = 1; if (numBalanced == 0){numBalanced++;yybegin(TWO_SINGLE_QUOTES_STATE);} else{numBalanced = 0;} + case 10: + { numLinkToks = 0; positionInc = 0; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break; } case 54: break; - case 34: - { positionInc = 1; return NUM; + case 15: + { currentTokType = SUB_HEADING; numWikiTokensSeen = 0; yybegin(STRING); /* Break so we don't hit fall-through warning: */ break; } case 55: break; - case 32: - { positionInc = 1; return APOSTROPHE; + case 22: + { numWikiTokensSeen = 0; positionInc = 1; if (numBalanced == 0){numBalanced++;yybegin(TWO_SINGLE_QUOTES_STATE);} else{numBalanced = 0;}/* Break so we don't hit fall-through warning: */ break; } case 56: break; - case 23: - { numWikiTokensSeen = 0; positionInc = 1; yybegin(DOUBLE_EQUALS_STATE); + case 35: + { positionInc = 1; return NUM; } case 57: break; + case 33: + { positionInc = 1; return APOSTROPHE; + } + case 58: break; case 21: { yybegin(STRING); return currentTokType;/*pipe*/ } - case 58: break; + case 59: break; + case 18: + { /* Break so we don't hit fall-through warning: */ break;/* ignore STRING */ + } + case 60: break; case 2: { positionInc = 1; return ALPHANUM; } - case 59: break; - case 29: - { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE); + case 61: break; + case 1: + { numWikiTokensSeen = 0; positionInc = 1; /* Break so we don't hit fall-through warning: */ break; } - case 60: break; + case 62: break; case 17: { yybegin(DOUBLE_BRACE_STATE); numWikiTokensSeen = 0; return currentTokType; } - case 61: break; - case 44: - { currentTokType = CATEGORY; numWikiTokensSeen = 0; yybegin(CATEGORY_STATE); - } - case 62: break; - case 26: - { yybegin(YYINITIAL); - } case 63: break; + case 39: + { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end sub header*/ + } + case 64: break; + case 29: + { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break; + } + case 65: break; + case 46: + { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = CATEGORY;yybegin(CATEGORY_STATE); /* Break so we don't hit fall-through warning: */ break; + } + case 66: break; + case 27: + { numLinkToks = 0; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break; + } + case 67: break; + case 4: + { numWikiTokensSeen = 0; positionInc = 1; currentTokType = EXTERNAL_LINK_URL; yybegin(EXTERNAL_LINK_STATE);/* Break so we don't hit fall-through warning: */ break; + } + case 68: break; + case 38: + { numBalanced = 0;currentTokType = ALPHANUM;yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end bold*/ + } + case 69: break; + case 13: + { currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break; + } + case 70: break; case 3: { positionInc = 1; return CJ; } - case 64: break; - case 38: - { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL);/*end sub header*/ + case 71: break; + case 45: + { currentTokType = CATEGORY; numWikiTokensSeen = 0; yybegin(CATEGORY_STATE); /* Break so we don't hit fall-through warning: */ break; } - case 65: break; - case 15: - { currentTokType = SUB_HEADING; numWikiTokensSeen = 0; yybegin(STRING); - } - case 66: break; - case 30: - { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL);/*end italics*/ - } - case 67: break; + case 72: break; case 6: { yybegin(CATEGORY_STATE); numWikiTokensSeen++; return currentTokType; } - case 68: break; - case 5: - { positionInc = 1; + case 73: break; + case 11: + { currentTokType = BOLD; yybegin(THREE_SINGLE_QUOTES_STATE); /* Break so we don't hit fall-through warning: */ break; } - case 69: break; + case 74: break; + case 25: + { numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);/* Break so we don't hit fall-through warning: */ break; + } + case 75: break; + case 8: + { /* Break so we don't hit fall-through warning: */ break;/* ignore */ + } + case 76: break; case 19: { yybegin(STRING); numWikiTokensSeen++; return currentTokType;/* STRING ALPHANUM*/ } - case 70: break; - case 42: + case 77: break; + case 43: { positionInc = 1; numWikiTokensSeen++; yybegin(EXTERNAL_LINK_STATE); return currentTokType; } - case 71: break; - case 27: - { numLinkToks = 0; yybegin(YYINITIAL); + case 78: break; + case 42: + { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end bold italics*/ } - case 72: break; - case 11: - { currentTokType = BOLD; yybegin(THREE_SINGLE_QUOTES_STATE); + case 79: break; + case 30: + { yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break; } - case 73: break; - case 13: - { currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); - } - case 74: break; + case 80: break; case 14: { yybegin(STRING); numWikiTokensSeen++; return currentTokType; } - case 75: break; - case 45: - { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = CATEGORY;yybegin(CATEGORY_STATE); - } - case 76: break; - case 28: - { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE); - } - case 77: break; - case 37: - { numBalanced = 0;currentTokType = ALPHANUM;yybegin(YYINITIAL);/*end bold*/ - } - case 78: break; + case 81: break; case 9: { if (numLinkToks == 0){positionInc = 0;} else{positionInc = 1;} numWikiTokensSeen++; currentTokType = EXTERNAL_LINK; yybegin(EXTERNAL_LINK_STATE); numLinkToks++; return currentTokType; } - case 79: break; + case 82: break; case 7: { yybegin(INTERNAL_LINK_STATE); numWikiTokensSeen++; return currentTokType; } - case 80: break; - case 24: - { numWikiTokensSeen = 0; positionInc = 1; currentTokType = INTERNAL_LINK; yybegin(INTERNAL_LINK_STATE); - } - case 81: break; - case 40: + case 83: break; + case 41: { positionInc = 1; return EMAIL; } - case 82: break; - case 1: - { numWikiTokensSeen = 0; positionInc = 1; - } - case 83: break; - case 18: - { /* ignore STRING */ - } case 84: break; - case 36: - { currentTokType = BOLD_ITALICS; yybegin(FIVE_SINGLE_QUOTES_STATE); + case 28: + { currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break; } case 85: break; - case 33: - { positionInc = 1; return HOST; + case 23: + { numWikiTokensSeen = 0; positionInc = 1; yybegin(DOUBLE_EQUALS_STATE);/* Break so we don't hit fall-through warning: */ break; } case 86: break; - case 31: - { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = INTERNAL_LINK;yybegin(INTERNAL_LINK_STATE); + case 34: + { positionInc = 1; return HOST; } case 87: break; - case 41: - { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL);/*end bold italics*/ + case 32: + { numBalanced = 0; numWikiTokensSeen = 0; currentTokType = INTERNAL_LINK;yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break; } case 88: break; case 12: { currentTokType = ITALICS; numWikiTokensSeen++; yybegin(STRING); return currentTokType;/*italics*/ } case 89: break; - case 10: - { numLinkToks = 0; positionInc = 0; yybegin(YYINITIAL); + case 24: + { numWikiTokensSeen = 0; positionInc = 1; currentTokType = INTERNAL_LINK; yybegin(INTERNAL_LINK_STATE);/* Break so we don't hit fall-through warning: */ break; } case 90: break; + case 31: + { numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end italics*/ + } + case 91: break; + case 26: + { yybegin(YYINITIAL);/* Break so we don't hit fall-through warning: */ break; + } + case 92: break; default: if (zzInput == YYEOF && zzStartRead == zzCurrentPos) { zzAtEOF = true; diff --git a/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex b/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex index 477c55bd030..48461434428 100644 --- a/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex +++ b/modules/analysis/common/src/java/org/apache/lucene/analysis/wikipedia/WikipediaTokenizerImpl.jflex @@ -192,108 +192,108 @@ DOUBLE_EQUALS = "="{2} //First {ALPHANUM} is always the link, set positioninc to 1 for double bracket, but then inside the internal link state //set it to 0 for the next token, such that the link and the first token are in the same position, but then subsequent //tokens within the link are incremented - {DOUBLE_BRACKET} {numWikiTokensSeen = 0; positionInc = 1; currentTokType = INTERNAL_LINK; yybegin(INTERNAL_LINK_STATE);} - {DOUBLE_BRACKET_CAT} {numWikiTokensSeen = 0; positionInc = 1; currentTokType = CATEGORY; yybegin(CATEGORY_STATE);} - {EXTERNAL_LINK} {numWikiTokensSeen = 0; positionInc = 1; currentTokType = EXTERNAL_LINK_URL; yybegin(EXTERNAL_LINK_STATE);} - {TWO_SINGLE_QUOTES} {numWikiTokensSeen = 0; positionInc = 1; if (numBalanced == 0){numBalanced++;yybegin(TWO_SINGLE_QUOTES_STATE);} else{numBalanced = 0;}} - {DOUBLE_EQUALS} {numWikiTokensSeen = 0; positionInc = 1; yybegin(DOUBLE_EQUALS_STATE);} - {DOUBLE_BRACE} {numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);} - {CITATION} {numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);} + {DOUBLE_BRACKET} {numWikiTokensSeen = 0; positionInc = 1; currentTokType = INTERNAL_LINK; yybegin(INTERNAL_LINK_STATE);/* Break so we don't hit fall-through warning: */ break;} + {DOUBLE_BRACKET_CAT} {numWikiTokensSeen = 0; positionInc = 1; currentTokType = CATEGORY; yybegin(CATEGORY_STATE);/* Break so we don't hit fall-through warning: */ break;} + {EXTERNAL_LINK} {numWikiTokensSeen = 0; positionInc = 1; currentTokType = EXTERNAL_LINK_URL; yybegin(EXTERNAL_LINK_STATE);/* Break so we don't hit fall-through warning: */ break;} + {TWO_SINGLE_QUOTES} {numWikiTokensSeen = 0; positionInc = 1; if (numBalanced == 0){numBalanced++;yybegin(TWO_SINGLE_QUOTES_STATE);} else{numBalanced = 0;}/* Break so we don't hit fall-through warning: */ break;} + {DOUBLE_EQUALS} {numWikiTokensSeen = 0; positionInc = 1; yybegin(DOUBLE_EQUALS_STATE);/* Break so we don't hit fall-through warning: */ break;} + {DOUBLE_BRACE} {numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);/* Break so we don't hit fall-through warning: */ break;} + {CITATION} {numWikiTokensSeen = 0; positionInc = 1; currentTokType = CITATION; yybegin(DOUBLE_BRACE_STATE);/* Break so we don't hit fall-through warning: */ break;} //ignore - . | {WHITESPACE} |{INFOBOX} {numWikiTokensSeen = 0; positionInc = 1; } + . | {WHITESPACE} |{INFOBOX} {numWikiTokensSeen = 0; positionInc = 1; /* Break so we don't hit fall-through warning: */ break;} } { //First {ALPHANUM} is always the link, set position to 0 for these //This is slightly different from EXTERNAL_LINK_STATE because that one has an explicit grammar for capturing the URL {ALPHANUM} {yybegin(INTERNAL_LINK_STATE); numWikiTokensSeen++; return currentTokType;} - {DOUBLE_BRACKET_CLOSE} {numLinkToks = 0; yybegin(YYINITIAL);} + {DOUBLE_BRACKET_CLOSE} {numLinkToks = 0; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;} //ignore - . | {WHITESPACE} { positionInc = 1; } + . | {WHITESPACE} { positionInc = 1; /* Break so we don't hit fall-through warning: */ break;} } { //increment the link token, but then don't increment the tokens after that which are still in the link ("http://"|"https://"){HOST}("/"?({ALPHANUM}|{P}|\?|"&"|"="|"#")*)* {positionInc = 1; numWikiTokensSeen++; yybegin(EXTERNAL_LINK_STATE); return currentTokType;} {ALPHANUM} {if (numLinkToks == 0){positionInc = 0;} else{positionInc = 1;} numWikiTokensSeen++; currentTokType = EXTERNAL_LINK; yybegin(EXTERNAL_LINK_STATE); numLinkToks++; return currentTokType;} - "]" {numLinkToks = 0; positionInc = 0; yybegin(YYINITIAL);} - {WHITESPACE} { positionInc = 1; } + "]" {numLinkToks = 0; positionInc = 0; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;} + {WHITESPACE} { positionInc = 1; /* Break so we don't hit fall-through warning: */ break;} } { {ALPHANUM} {yybegin(CATEGORY_STATE); numWikiTokensSeen++; return currentTokType;} - {DOUBLE_BRACKET_CLOSE} {yybegin(YYINITIAL);} + {DOUBLE_BRACKET_CLOSE} {yybegin(YYINITIAL);/* Break so we don't hit fall-through warning: */ break;} //ignore - . | {WHITESPACE} { positionInc = 1; } + . | {WHITESPACE} { positionInc = 1; /* Break so we don't hit fall-through warning: */ break;} } //italics { - "'" {currentTokType = BOLD; yybegin(THREE_SINGLE_QUOTES_STATE);} - "'''" {currentTokType = BOLD_ITALICS; yybegin(FIVE_SINGLE_QUOTES_STATE);} + "'" {currentTokType = BOLD; yybegin(THREE_SINGLE_QUOTES_STATE); /* Break so we don't hit fall-through warning: */ break;} + "'''" {currentTokType = BOLD_ITALICS; yybegin(FIVE_SINGLE_QUOTES_STATE); /* Break so we don't hit fall-through warning: */ break;} {ALPHANUM} {currentTokType = ITALICS; numWikiTokensSeen++; yybegin(STRING); return currentTokType;/*italics*/} //we can have links inside, let those override - {DOUBLE_BRACKET} {currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE);} - {DOUBLE_BRACKET_CAT} {currentTokType = CATEGORY; numWikiTokensSeen = 0; yybegin(CATEGORY_STATE);} - {EXTERNAL_LINK} {currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE);} + {DOUBLE_BRACKET} {currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;} + {DOUBLE_BRACKET_CAT} {currentTokType = CATEGORY; numWikiTokensSeen = 0; yybegin(CATEGORY_STATE); /* Break so we don't hit fall-through warning: */ break;} + {EXTERNAL_LINK} {currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;} //ignore - . | {WHITESPACE} { /* ignore */ } + . | {WHITESPACE} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ } } //bold { {ALPHANUM} {yybegin(STRING); numWikiTokensSeen++; return currentTokType;} //we can have links inside, let those override - {DOUBLE_BRACKET} {currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE);} - {DOUBLE_BRACKET_CAT} {currentTokType = CATEGORY; numWikiTokensSeen = 0; yybegin(CATEGORY_STATE);} - {EXTERNAL_LINK} {currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE);} + {DOUBLE_BRACKET} {currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;} + {DOUBLE_BRACKET_CAT} {currentTokType = CATEGORY; numWikiTokensSeen = 0; yybegin(CATEGORY_STATE); /* Break so we don't hit fall-through warning: */ break;} + {EXTERNAL_LINK} {currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;} //ignore - . | {WHITESPACE} { /* ignore */ } + . | {WHITESPACE} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ } } //bold italics { {ALPHANUM} {yybegin(STRING); numWikiTokensSeen++; return currentTokType;} //we can have links inside, let those override - {DOUBLE_BRACKET} {currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE);} - {DOUBLE_BRACKET_CAT} {currentTokType = CATEGORY; numWikiTokensSeen = 0; yybegin(CATEGORY_STATE);} - {EXTERNAL_LINK} {currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE);} + {DOUBLE_BRACKET} {currentTokType = INTERNAL_LINK; numWikiTokensSeen = 0; yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;} + {DOUBLE_BRACKET_CAT} {currentTokType = CATEGORY; numWikiTokensSeen = 0; yybegin(CATEGORY_STATE); /* Break so we don't hit fall-through warning: */ break;} + {EXTERNAL_LINK} {currentTokType = EXTERNAL_LINK; numWikiTokensSeen = 0; yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;} //ignore - . | {WHITESPACE} { /* ignore */ } + . | {WHITESPACE} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ } } { - "=" {currentTokType = SUB_HEADING; numWikiTokensSeen = 0; yybegin(STRING);} + "=" {currentTokType = SUB_HEADING; numWikiTokensSeen = 0; yybegin(STRING); /* Break so we don't hit fall-through warning: */ break;} {ALPHANUM} {currentTokType = HEADING; yybegin(DOUBLE_EQUALS_STATE); numWikiTokensSeen++; return currentTokType;} - {DOUBLE_EQUALS} {yybegin(YYINITIAL);} + {DOUBLE_EQUALS} {yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;} //ignore - . | {WHITESPACE} { /* ignore */ } + . | {WHITESPACE} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ } } { {ALPHANUM} {yybegin(DOUBLE_BRACE_STATE); numWikiTokensSeen = 0; return currentTokType;} - {DOUBLE_BRACE_CLOSE} {yybegin(YYINITIAL);} - {CITATION_CLOSE} {yybegin(YYINITIAL);} + {DOUBLE_BRACE_CLOSE} {yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;} + {CITATION_CLOSE} {yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;} //ignore - . | {WHITESPACE} { /* ignore */ } + . | {WHITESPACE} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ } } { - "'''''" {numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL);/*end bold italics*/} - "'''" {numBalanced = 0;currentTokType = ALPHANUM;yybegin(YYINITIAL);/*end bold*/} - "''" {numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL);/*end italics*/} - "===" {numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL);/*end sub header*/} + "'''''" {numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end bold italics*/} + "'''" {numBalanced = 0;currentTokType = ALPHANUM;yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end bold*/} + "''" {numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end italics*/} + "===" {numBalanced = 0;currentTokType = ALPHANUM; yybegin(YYINITIAL); /* Break so we don't hit fall-through warning: */ break;/*end sub header*/} {ALPHANUM} {yybegin(STRING); numWikiTokensSeen++; return currentTokType;/* STRING ALPHANUM*/} //we can have links inside, let those override - {DOUBLE_BRACKET} {numBalanced = 0; numWikiTokensSeen = 0; currentTokType = INTERNAL_LINK;yybegin(INTERNAL_LINK_STATE);} - {DOUBLE_BRACKET_CAT} {numBalanced = 0; numWikiTokensSeen = 0; currentTokType = CATEGORY;yybegin(CATEGORY_STATE);} - {EXTERNAL_LINK} {numBalanced = 0; numWikiTokensSeen = 0; currentTokType = EXTERNAL_LINK;yybegin(EXTERNAL_LINK_STATE);} + {DOUBLE_BRACKET} {numBalanced = 0; numWikiTokensSeen = 0; currentTokType = INTERNAL_LINK;yybegin(INTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;} + {DOUBLE_BRACKET_CAT} {numBalanced = 0; numWikiTokensSeen = 0; currentTokType = CATEGORY;yybegin(CATEGORY_STATE); /* Break so we don't hit fall-through warning: */ break;} + {EXTERNAL_LINK} {numBalanced = 0; numWikiTokensSeen = 0; currentTokType = EXTERNAL_LINK;yybegin(EXTERNAL_LINK_STATE); /* Break so we don't hit fall-through warning: */ break;} {PIPE} {yybegin(STRING); return currentTokType;/*pipe*/} - .|{WHITESPACE} { /* ignore STRING */ } + .|{WHITESPACE} { /* Break so we don't hit fall-through warning: */ break;/* ignore STRING */ } } @@ -315,7 +315,7 @@ DOUBLE_EQUALS = "="{2} //end wikipedia /** Ignore the rest */ -. | {WHITESPACE}|{TAGS} { /* ignore */ } +. | {WHITESPACE}|{TAGS} { /* Break so we don't hit fall-through warning: */ break;/* ignore */ } //INTERNAL_LINK = "["{2}({ALPHANUM}+{WHITESPACE}*)+"]"{2} diff --git a/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java b/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java index f041bcd4837..b0769ad4f46 100644 --- a/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java +++ b/modules/analysis/common/src/test/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzerTest.java @@ -64,7 +64,7 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase { public void testNoStopwords() throws Exception { // Note: an empty list of fields passed in - protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Collections.EMPTY_LIST, 1); + protectedAnalyzer = new QueryAutoStopWordAnalyzer(TEST_VERSION_CURRENT, appAnalyzer, reader, Collections.emptyList(), 1); TokenStream protectedTokenStream = protectedAnalyzer.tokenStream("variedField", new StringReader("quick")); assertTokenStreamContents(protectedTokenStream, new String[]{"quick"}); diff --git a/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java b/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java index c133f96e9fc..26c0f4e7379 100644 --- a/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java +++ b/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java @@ -161,6 +161,7 @@ public class WriteLineDocTaskTest extends BenchmarkTestCase { break; case GZIP: in = csFactory.createCompressorInputStream(CompressorStreamFactory.GZIP, in); + break; case PLAIN: break; // nothing to do default: