mirror of https://github.com/apache/lucene.git
LUCENE-3880: UAX29URLEmailTokenizer now recognizes emails when the mailto: scheme is prepended.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1302265 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1991b61b49
commit
c4f72f61ac
|
@ -972,6 +972,9 @@ Bug fixes
|
|||
* LUCENE-3876: Fix bug where positions for a document exceeding
|
||||
Integer.MAX_VALUE/2 would produce a corrupt index.
|
||||
(Simon Willnauer, Mike Mccandless, Robert Muir)
|
||||
|
||||
* LUCENE-3880: UAX29URLEmailTokenizer now recognizes emails when the mailto:
|
||||
scheme is prepended. (Kai Gülzau, Steve Rowe)
|
||||
|
||||
Optimizations
|
||||
|
||||
|
|
|
@ -98,6 +98,9 @@
|
|||
<jflex file="src/java/org/apache/lucene/analysis/standard/std31/UAX29URLEmailTokenizerImpl31.jflex"
|
||||
outdir="src/java/org/apache/lucene/analysis/standard/std31"
|
||||
nobak="on" />
|
||||
<jflex file="src/java/org/apache/lucene/analysis/standard/std34/UAX29URLEmailTokenizerImpl34.jflex"
|
||||
outdir="src/java/org/apache/lucene/analysis/standard/std34"
|
||||
nobak="on" />
|
||||
</target>
|
||||
|
||||
<target name="clean-jflex">
|
||||
|
|
|
@ -15,8 +15,8 @@
|
|||
*/
|
||||
|
||||
// Generated from IANA Root Zone Database <http://www.internic.net/zones/root.zone>
|
||||
// file version from Thursday, August 4, 2011 11:34:20 AM UTC
|
||||
// generated on Thursday, August 4, 2011 11:46:19 PM UTC
|
||||
// file version from Sunday, March 18, 2012 4:34:02 AM UTC
|
||||
// generated on Sunday, March 18, 2012 4:02:55 PM UTC
|
||||
// by org.apache.lucene.analysis.standard.GenerateJflexTLDMacros
|
||||
|
||||
ASCIITLD = "." (
|
||||
|
@ -79,6 +79,7 @@ ASCIITLD = "." (
|
|||
| [cC][rR]
|
||||
| [cC][uU]
|
||||
| [cC][vV]
|
||||
| [cC][wW]
|
||||
| [cC][xX]
|
||||
| [cC][yY]
|
||||
| [cC][zZ]
|
||||
|
@ -247,6 +248,7 @@ ASCIITLD = "." (
|
|||
| [sS][tT]
|
||||
| [sS][uU]
|
||||
| [sS][vV]
|
||||
| [sS][xX]
|
||||
| [sS][yY]
|
||||
| [sS][zZ]
|
||||
| [tT][cC]
|
||||
|
@ -288,6 +290,7 @@ ASCIITLD = "." (
|
|||
| [xX][nN]--3[eE]0[bB]707[eE]
|
||||
| [xX][nN]--45[bB][rR][jJ]9[cC]
|
||||
| [xX][nN]--80[aA][kK][hH][bB][yY][kK][nN][jJ]4[fF]
|
||||
| [xX][nN]--80[aA][oO]21[aA]
|
||||
| [xX][nN]--90[aA]3[aA][cC]
|
||||
| [xX][nN]--9[tT]4[bB]11[yY][iI]5[aA]
|
||||
| [xX][nN]--[cC][lL][cC][hH][cC]0[eE][aA]0[bB]2[gG]2[aA]9[gG][cC][dD]
|
||||
|
|
|
@ -23,8 +23,8 @@ import java.io.InputStreamReader;
|
|||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.standard.std31.StandardTokenizerImpl31;
|
||||
import org.apache.lucene.analysis.standard.std31.UAX29URLEmailTokenizerImpl31;
|
||||
import org.apache.lucene.analysis.standard.std34.UAX29URLEmailTokenizerImpl34;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
@ -128,8 +128,10 @@ public final class UAX29URLEmailTokenizer extends Tokenizer {
|
|||
}
|
||||
|
||||
private static StandardTokenizerInterface getScannerFor(Version matchVersion, Reader input) {
|
||||
if (matchVersion.onOrAfter(Version.LUCENE_34)) {
|
||||
if (matchVersion.onOrAfter(Version.LUCENE_36)) {
|
||||
return new UAX29URLEmailTokenizerImpl(input);
|
||||
} else if (matchVersion.onOrAfter(Version.LUCENE_34)) {
|
||||
return new UAX29URLEmailTokenizerImpl34(input);
|
||||
} else {
|
||||
return new UAX29URLEmailTokenizerImpl31(input);
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -205,6 +205,10 @@ EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
|
|||
<<EOF>> { return StandardTokenizerInterface.YYEOF; }
|
||||
|
||||
{URL} { return URL_TYPE; }
|
||||
|
||||
// LUCENE-3880: Disrupt recognition of "mailto:test" as <ALPHANUM> from "mailto:test@example.org"
|
||||
[mM][aA][iI][lL][tT][oO] / ":" {EMAIL} { return WORD_TYPE; }
|
||||
|
||||
{EMAIL} { return EMAIL_TYPE; }
|
||||
|
||||
// UAX#29 WB8. Numeric × Numeric
|
||||
|
|
|
@ -0,0 +1,334 @@
|
|||
/*
|
||||
* Copyright 2001-2005 The Apache Software Foundation.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Generated from IANA Root Zone Database <http://www.internic.net/zones/root.zone>
|
||||
// file version from Thursday, August 4, 2011 11:34:20 AM UTC
|
||||
// generated on Thursday, August 4, 2011 11:46:19 PM UTC
|
||||
// by org.apache.lucene.analysis.standard.GenerateJflexTLDMacros
|
||||
|
||||
ASCIITLD = "." (
|
||||
[aA][cC]
|
||||
| [aA][dD]
|
||||
| [aA][eE]
|
||||
| [aA][eE][rR][oO]
|
||||
| [aA][fF]
|
||||
| [aA][gG]
|
||||
| [aA][iI]
|
||||
| [aA][lL]
|
||||
| [aA][mM]
|
||||
| [aA][nN]
|
||||
| [aA][oO]
|
||||
| [aA][qQ]
|
||||
| [aA][rR]
|
||||
| [aA][rR][pP][aA]
|
||||
| [aA][sS]
|
||||
| [aA][sS][iI][aA]
|
||||
| [aA][tT]
|
||||
| [aA][uU]
|
||||
| [aA][wW]
|
||||
| [aA][xX]
|
||||
| [aA][zZ]
|
||||
| [bB][aA]
|
||||
| [bB][bB]
|
||||
| [bB][dD]
|
||||
| [bB][eE]
|
||||
| [bB][fF]
|
||||
| [bB][gG]
|
||||
| [bB][hH]
|
||||
| [bB][iI]
|
||||
| [bB][iI][zZ]
|
||||
| [bB][jJ]
|
||||
| [bB][mM]
|
||||
| [bB][nN]
|
||||
| [bB][oO]
|
||||
| [bB][rR]
|
||||
| [bB][sS]
|
||||
| [bB][tT]
|
||||
| [bB][vV]
|
||||
| [bB][wW]
|
||||
| [bB][yY]
|
||||
| [bB][zZ]
|
||||
| [cC][aA]
|
||||
| [cC][aA][tT]
|
||||
| [cC][cC]
|
||||
| [cC][dD]
|
||||
| [cC][fF]
|
||||
| [cC][gG]
|
||||
| [cC][hH]
|
||||
| [cC][iI]
|
||||
| [cC][kK]
|
||||
| [cC][lL]
|
||||
| [cC][mM]
|
||||
| [cC][nN]
|
||||
| [cC][oO]
|
||||
| [cC][oO][mM]
|
||||
| [cC][oO][oO][pP]
|
||||
| [cC][rR]
|
||||
| [cC][uU]
|
||||
| [cC][vV]
|
||||
| [cC][xX]
|
||||
| [cC][yY]
|
||||
| [cC][zZ]
|
||||
| [dD][eE]
|
||||
| [dD][jJ]
|
||||
| [dD][kK]
|
||||
| [dD][mM]
|
||||
| [dD][oO]
|
||||
| [dD][zZ]
|
||||
| [eE][cC]
|
||||
| [eE][dD][uU]
|
||||
| [eE][eE]
|
||||
| [eE][gG]
|
||||
| [eE][rR]
|
||||
| [eE][sS]
|
||||
| [eE][tT]
|
||||
| [eE][uU]
|
||||
| [fF][iI]
|
||||
| [fF][jJ]
|
||||
| [fF][kK]
|
||||
| [fF][mM]
|
||||
| [fF][oO]
|
||||
| [fF][rR]
|
||||
| [gG][aA]
|
||||
| [gG][bB]
|
||||
| [gG][dD]
|
||||
| [gG][eE]
|
||||
| [gG][fF]
|
||||
| [gG][gG]
|
||||
| [gG][hH]
|
||||
| [gG][iI]
|
||||
| [gG][lL]
|
||||
| [gG][mM]
|
||||
| [gG][nN]
|
||||
| [gG][oO][vV]
|
||||
| [gG][pP]
|
||||
| [gG][qQ]
|
||||
| [gG][rR]
|
||||
| [gG][sS]
|
||||
| [gG][tT]
|
||||
| [gG][uU]
|
||||
| [gG][wW]
|
||||
| [gG][yY]
|
||||
| [hH][kK]
|
||||
| [hH][mM]
|
||||
| [hH][nN]
|
||||
| [hH][rR]
|
||||
| [hH][tT]
|
||||
| [hH][uU]
|
||||
| [iI][dD]
|
||||
| [iI][eE]
|
||||
| [iI][lL]
|
||||
| [iI][mM]
|
||||
| [iI][nN]
|
||||
| [iI][nN][fF][oO]
|
||||
| [iI][nN][tT]
|
||||
| [iI][oO]
|
||||
| [iI][qQ]
|
||||
| [iI][rR]
|
||||
| [iI][sS]
|
||||
| [iI][tT]
|
||||
| [jJ][eE]
|
||||
| [jJ][mM]
|
||||
| [jJ][oO]
|
||||
| [jJ][oO][bB][sS]
|
||||
| [jJ][pP]
|
||||
| [kK][eE]
|
||||
| [kK][gG]
|
||||
| [kK][hH]
|
||||
| [kK][iI]
|
||||
| [kK][mM]
|
||||
| [kK][nN]
|
||||
| [kK][pP]
|
||||
| [kK][rR]
|
||||
| [kK][wW]
|
||||
| [kK][yY]
|
||||
| [kK][zZ]
|
||||
| [lL][aA]
|
||||
| [lL][bB]
|
||||
| [lL][cC]
|
||||
| [lL][iI]
|
||||
| [lL][kK]
|
||||
| [lL][rR]
|
||||
| [lL][sS]
|
||||
| [lL][tT]
|
||||
| [lL][uU]
|
||||
| [lL][vV]
|
||||
| [lL][yY]
|
||||
| [mM][aA]
|
||||
| [mM][cC]
|
||||
| [mM][dD]
|
||||
| [mM][eE]
|
||||
| [mM][gG]
|
||||
| [mM][hH]
|
||||
| [mM][iI][lL]
|
||||
| [mM][kK]
|
||||
| [mM][lL]
|
||||
| [mM][mM]
|
||||
| [mM][nN]
|
||||
| [mM][oO]
|
||||
| [mM][oO][bB][iI]
|
||||
| [mM][pP]
|
||||
| [mM][qQ]
|
||||
| [mM][rR]
|
||||
| [mM][sS]
|
||||
| [mM][tT]
|
||||
| [mM][uU]
|
||||
| [mM][uU][sS][eE][uU][mM]
|
||||
| [mM][vV]
|
||||
| [mM][wW]
|
||||
| [mM][xX]
|
||||
| [mM][yY]
|
||||
| [mM][zZ]
|
||||
| [nN][aA]
|
||||
| [nN][aA][mM][eE]
|
||||
| [nN][cC]
|
||||
| [nN][eE]
|
||||
| [nN][eE][tT]
|
||||
| [nN][fF]
|
||||
| [nN][gG]
|
||||
| [nN][iI]
|
||||
| [nN][lL]
|
||||
| [nN][oO]
|
||||
| [nN][pP]
|
||||
| [nN][rR]
|
||||
| [nN][uU]
|
||||
| [nN][zZ]
|
||||
| [oO][mM]
|
||||
| [oO][rR][gG]
|
||||
| [pP][aA]
|
||||
| [pP][eE]
|
||||
| [pP][fF]
|
||||
| [pP][gG]
|
||||
| [pP][hH]
|
||||
| [pP][kK]
|
||||
| [pP][lL]
|
||||
| [pP][mM]
|
||||
| [pP][nN]
|
||||
| [pP][rR]
|
||||
| [pP][rR][oO]
|
||||
| [pP][sS]
|
||||
| [pP][tT]
|
||||
| [pP][wW]
|
||||
| [pP][yY]
|
||||
| [qQ][aA]
|
||||
| [rR][eE]
|
||||
| [rR][oO]
|
||||
| [rR][sS]
|
||||
| [rR][uU]
|
||||
| [rR][wW]
|
||||
| [sS][aA]
|
||||
| [sS][bB]
|
||||
| [sS][cC]
|
||||
| [sS][dD]
|
||||
| [sS][eE]
|
||||
| [sS][gG]
|
||||
| [sS][hH]
|
||||
| [sS][iI]
|
||||
| [sS][jJ]
|
||||
| [sS][kK]
|
||||
| [sS][lL]
|
||||
| [sS][mM]
|
||||
| [sS][nN]
|
||||
| [sS][oO]
|
||||
| [sS][rR]
|
||||
| [sS][tT]
|
||||
| [sS][uU]
|
||||
| [sS][vV]
|
||||
| [sS][yY]
|
||||
| [sS][zZ]
|
||||
| [tT][cC]
|
||||
| [tT][dD]
|
||||
| [tT][eE][lL]
|
||||
| [tT][fF]
|
||||
| [tT][gG]
|
||||
| [tT][hH]
|
||||
| [tT][jJ]
|
||||
| [tT][kK]
|
||||
| [tT][lL]
|
||||
| [tT][mM]
|
||||
| [tT][nN]
|
||||
| [tT][oO]
|
||||
| [tT][pP]
|
||||
| [tT][rR]
|
||||
| [tT][rR][aA][vV][eE][lL]
|
||||
| [tT][tT]
|
||||
| [tT][vV]
|
||||
| [tT][wW]
|
||||
| [tT][zZ]
|
||||
| [uU][aA]
|
||||
| [uU][gG]
|
||||
| [uU][kK]
|
||||
| [uU][sS]
|
||||
| [uU][yY]
|
||||
| [uU][zZ]
|
||||
| [vV][aA]
|
||||
| [vV][cC]
|
||||
| [vV][eE]
|
||||
| [vV][gG]
|
||||
| [vV][iI]
|
||||
| [vV][nN]
|
||||
| [vV][uU]
|
||||
| [wW][fF]
|
||||
| [wW][sS]
|
||||
| [xX][nN]--0[zZ][wW][mM]56[dD]
|
||||
| [xX][nN]--11[bB]5[bB][sS]3[aA]9[aA][jJ]6[gG]
|
||||
| [xX][nN]--3[eE]0[bB]707[eE]
|
||||
| [xX][nN]--45[bB][rR][jJ]9[cC]
|
||||
| [xX][nN]--80[aA][kK][hH][bB][yY][kK][nN][jJ]4[fF]
|
||||
| [xX][nN]--90[aA]3[aA][cC]
|
||||
| [xX][nN]--9[tT]4[bB]11[yY][iI]5[aA]
|
||||
| [xX][nN]--[cC][lL][cC][hH][cC]0[eE][aA]0[bB]2[gG]2[aA]9[gG][cC][dD]
|
||||
| [xX][nN]--[dD][eE][bB][aA]0[aA][dD]
|
||||
| [xX][nN]--[fF][iI][qQ][sS]8[sS]
|
||||
| [xX][nN]--[fF][iI][qQ][zZ]9[sS]
|
||||
| [xX][nN]--[fF][pP][cC][rR][jJ]9[cC]3[dD]
|
||||
| [xX][nN]--[fF][zZ][cC]2[cC]9[eE]2[cC]
|
||||
| [xX][nN]--[gG]6[wW]251[dD]
|
||||
| [xX][nN]--[gG][eE][cC][rR][jJ]9[cC]
|
||||
| [xX][nN]--[hH]2[bB][rR][jJ]9[cC]
|
||||
| [xX][nN]--[hH][gG][bB][kK]6[aA][jJ]7[fF]53[bB][bB][aA]
|
||||
| [xX][nN]--[hH][lL][cC][jJ]6[aA][yY][aA]9[eE][sS][cC]7[aA]
|
||||
| [xX][nN]--[jJ]6[wW]193[gG]
|
||||
| [xX][nN]--[jJ][xX][aA][lL][pP][dD][lL][pP]
|
||||
| [xX][nN]--[kK][gG][bB][eE][cC][hH][tT][vV]
|
||||
| [xX][nN]--[kK][pP][rR][wW]13[dD]
|
||||
| [xX][nN]--[kK][pP][rR][yY]57[dD]
|
||||
| [xX][nN]--[lL][gG][bB][bB][aA][tT]1[aA][dD]8[jJ]
|
||||
| [xX][nN]--[mM][gG][bB][aA][aA][mM]7[aA]8[hH]
|
||||
| [xX][nN]--[mM][gG][bB][aA][yY][hH]7[gG][pP][aA]
|
||||
| [xX][nN]--[mM][gG][bB][bB][hH]1[aA]71[eE]
|
||||
| [xX][nN]--[mM][gG][bB][cC]0[aA]9[aA][zZ][cC][gG]
|
||||
| [xX][nN]--[mM][gG][bB][eE][rR][pP]4[aA]5[dD]4[aA][rR]
|
||||
| [xX][nN]--[oO]3[cC][wW]4[hH]
|
||||
| [xX][nN]--[oO][gG][bB][pP][fF]8[fF][lL]
|
||||
| [xX][nN]--[pP]1[aA][iI]
|
||||
| [xX][nN]--[pP][gG][bB][sS]0[dD][hH]
|
||||
| [xX][nN]--[sS]9[bB][rR][jJ]9[cC]
|
||||
| [xX][nN]--[wW][gG][bB][hH]1[cC]
|
||||
| [xX][nN]--[wW][gG][bB][lL]6[aA]
|
||||
| [xX][nN]--[xX][kK][cC]2[aA][lL]3[hH][yY][eE]2[aA]
|
||||
| [xX][nN]--[xX][kK][cC]2[dD][lL]3[aA]5[eE][eE]0[hH]
|
||||
| [xX][nN]--[yY][fF][rR][oO]4[iI]67[oO]
|
||||
| [xX][nN]--[yY][gG][bB][iI]2[aA][mM][mM][xX]
|
||||
| [xX][nN]--[zZ][cC][kK][zZ][aA][hH]
|
||||
| [xX][xX][xX]
|
||||
| [yY][eE]
|
||||
| [yY][tT]
|
||||
| [zZ][aA]
|
||||
| [zZ][mM]
|
||||
| [zZ][wW]
|
||||
) "."? // Accept trailing root (empty) domain
|
||||
|
|
@ -0,0 +1,125 @@
|
|||
/*
|
||||
* Copyright 2010 The Apache Software Foundation.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
// Generated using ICU4J 4.8.0.0 on Friday, September 30, 2011 4:10:42 PM UTC
|
||||
// by org.apache.lucene.analysis.icu.GenerateJFlexSupplementaryMacros
|
||||
|
||||
|
||||
ALetterSupp = (
|
||||
([\ud80d][\uDC00-\uDC2E])
|
||||
| ([\ud80c][\uDC00-\uDFFF])
|
||||
| ([\ud809][\uDC00-\uDC62])
|
||||
| ([\ud808][\uDC00-\uDF6E])
|
||||
| ([\ud81a][\uDC00-\uDE38])
|
||||
| ([\ud804][\uDC03-\uDC37\uDC83-\uDCAF])
|
||||
| ([\ud835][\uDC00-\uDC54\uDC56-\uDC9C\uDC9E\uDC9F\uDCA2\uDCA5\uDCA6\uDCA9-\uDCAC\uDCAE-\uDCB9\uDCBB\uDCBD-\uDCC3\uDCC5-\uDD05\uDD07-\uDD0A\uDD0D-\uDD14\uDD16-\uDD1C\uDD1E-\uDD39\uDD3B-\uDD3E\uDD40-\uDD44\uDD46\uDD4A-\uDD50\uDD52-\uDEA5\uDEA8-\uDEC0\uDEC2-\uDEDA\uDEDC-\uDEFA\uDEFC-\uDF14\uDF16-\uDF34\uDF36-\uDF4E\uDF50-\uDF6E\uDF70-\uDF88\uDF8A-\uDFA8\uDFAA-\uDFC2\uDFC4-\uDFCB])
|
||||
| ([\ud801][\uDC00-\uDC9D])
|
||||
| ([\ud800][\uDC00-\uDC0B\uDC0D-\uDC26\uDC28-\uDC3A\uDC3C\uDC3D\uDC3F-\uDC4D\uDC50-\uDC5D\uDC80-\uDCFA\uDD40-\uDD74\uDE80-\uDE9C\uDEA0-\uDED0\uDF00-\uDF1E\uDF30-\uDF4A\uDF80-\uDF9D\uDFA0-\uDFC3\uDFC8-\uDFCF\uDFD1-\uDFD5])
|
||||
| ([\ud803][\uDC00-\uDC48])
|
||||
| ([\ud802][\uDC00-\uDC05\uDC08\uDC0A-\uDC35\uDC37\uDC38\uDC3C\uDC3F-\uDC55\uDD00-\uDD15\uDD20-\uDD39\uDE00\uDE10-\uDE13\uDE15-\uDE17\uDE19-\uDE33\uDE60-\uDE7C\uDF00-\uDF35\uDF40-\uDF55\uDF60-\uDF72])
|
||||
)
|
||||
FormatSupp = (
|
||||
([\ud804][\uDCBD])
|
||||
| ([\ud834][\uDD73-\uDD7A])
|
||||
| ([\udb40][\uDC01\uDC20-\uDC7F])
|
||||
)
|
||||
ExtendSupp = (
|
||||
([\ud804][\uDC00-\uDC02\uDC38-\uDC46\uDC80-\uDC82\uDCB0-\uDCBA])
|
||||
| ([\ud834][\uDD65-\uDD69\uDD6D-\uDD72\uDD7B-\uDD82\uDD85-\uDD8B\uDDAA-\uDDAD\uDE42-\uDE44])
|
||||
| ([\ud800][\uDDFD])
|
||||
| ([\udb40][\uDD00-\uDDEF])
|
||||
| ([\ud802][\uDE01-\uDE03\uDE05\uDE06\uDE0C-\uDE0F\uDE38-\uDE3A\uDE3F])
|
||||
)
|
||||
NumericSupp = (
|
||||
([\ud804][\uDC66-\uDC6F])
|
||||
| ([\ud835][\uDFCE-\uDFFF])
|
||||
| ([\ud801][\uDCA0-\uDCA9])
|
||||
)
|
||||
KatakanaSupp = (
|
||||
([\ud82c][\uDC00])
|
||||
)
|
||||
MidLetterSupp = (
|
||||
[]
|
||||
)
|
||||
MidNumSupp = (
|
||||
[]
|
||||
)
|
||||
MidNumLetSupp = (
|
||||
[]
|
||||
)
|
||||
ExtendNumLetSupp = (
|
||||
[]
|
||||
)
|
||||
ExtendNumLetSupp = (
|
||||
[]
|
||||
)
|
||||
ComplexContextSupp = (
|
||||
[]
|
||||
)
|
||||
HanSupp = (
|
||||
([\ud87e][\uDC00-\uDE1D])
|
||||
| ([\ud86b][\uDC00-\uDFFF])
|
||||
| ([\ud86a][\uDC00-\uDFFF])
|
||||
| ([\ud869][\uDC00-\uDED6\uDF00-\uDFFF])
|
||||
| ([\ud868][\uDC00-\uDFFF])
|
||||
| ([\ud86e][\uDC00-\uDC1D])
|
||||
| ([\ud86d][\uDC00-\uDF34\uDF40-\uDFFF])
|
||||
| ([\ud86c][\uDC00-\uDFFF])
|
||||
| ([\ud863][\uDC00-\uDFFF])
|
||||
| ([\ud862][\uDC00-\uDFFF])
|
||||
| ([\ud861][\uDC00-\uDFFF])
|
||||
| ([\ud860][\uDC00-\uDFFF])
|
||||
| ([\ud867][\uDC00-\uDFFF])
|
||||
| ([\ud866][\uDC00-\uDFFF])
|
||||
| ([\ud865][\uDC00-\uDFFF])
|
||||
| ([\ud864][\uDC00-\uDFFF])
|
||||
| ([\ud858][\uDC00-\uDFFF])
|
||||
| ([\ud859][\uDC00-\uDFFF])
|
||||
| ([\ud85a][\uDC00-\uDFFF])
|
||||
| ([\ud85b][\uDC00-\uDFFF])
|
||||
| ([\ud85c][\uDC00-\uDFFF])
|
||||
| ([\ud85d][\uDC00-\uDFFF])
|
||||
| ([\ud85e][\uDC00-\uDFFF])
|
||||
| ([\ud85f][\uDC00-\uDFFF])
|
||||
| ([\ud850][\uDC00-\uDFFF])
|
||||
| ([\ud851][\uDC00-\uDFFF])
|
||||
| ([\ud852][\uDC00-\uDFFF])
|
||||
| ([\ud853][\uDC00-\uDFFF])
|
||||
| ([\ud854][\uDC00-\uDFFF])
|
||||
| ([\ud855][\uDC00-\uDFFF])
|
||||
| ([\ud856][\uDC00-\uDFFF])
|
||||
| ([\ud857][\uDC00-\uDFFF])
|
||||
| ([\ud849][\uDC00-\uDFFF])
|
||||
| ([\ud848][\uDC00-\uDFFF])
|
||||
| ([\ud84b][\uDC00-\uDFFF])
|
||||
| ([\ud84a][\uDC00-\uDFFF])
|
||||
| ([\ud84d][\uDC00-\uDFFF])
|
||||
| ([\ud84c][\uDC00-\uDFFF])
|
||||
| ([\ud84f][\uDC00-\uDFFF])
|
||||
| ([\ud84e][\uDC00-\uDFFF])
|
||||
| ([\ud841][\uDC00-\uDFFF])
|
||||
| ([\ud840][\uDC00-\uDFFF])
|
||||
| ([\ud843][\uDC00-\uDFFF])
|
||||
| ([\ud842][\uDC00-\uDFFF])
|
||||
| ([\ud845][\uDC00-\uDFFF])
|
||||
| ([\ud844][\uDC00-\uDFFF])
|
||||
| ([\ud847][\uDC00-\uDFFF])
|
||||
| ([\ud846][\uDC00-\uDFFF])
|
||||
)
|
||||
HiraganaSupp = (
|
||||
([\ud83c][\uDE00])
|
||||
| ([\ud82c][\uDC01])
|
||||
)
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,272 @@
|
|||
package org.apache.lucene.analysis.standard.std34;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizerInterface;
|
||||
import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
||||
/**
|
||||
* This class implements UAX29URLEmailTokenizer, except with a bug
|
||||
* (https://issues.apache.org/jira/browse/LUCENE-3880) where "mailto:"
|
||||
* URI scheme prepended to an email address will disrupt recognition
|
||||
* of the email address.
|
||||
* @deprecated This class is only for exact backwards compatibility
|
||||
*/
|
||||
@Deprecated
|
||||
%%
|
||||
|
||||
%unicode 6.0
|
||||
%integer
|
||||
%final
|
||||
%public
|
||||
%class UAX29URLEmailTokenizerImpl34
|
||||
%implements StandardTokenizerInterface
|
||||
%function getNextToken
|
||||
%char
|
||||
|
||||
%include src/java/org/apache/lucene/analysis/standard/SUPPLEMENTARY.jflex-macro
|
||||
ALetter = ([\p{WB:ALetter}] | {ALetterSupp})
|
||||
Format = ([\p{WB:Format}] | {FormatSupp})
|
||||
Numeric = ([\p{WB:Numeric}] | {NumericSupp})
|
||||
Extend = ([\p{WB:Extend}] | {ExtendSupp})
|
||||
Katakana = ([\p{WB:Katakana}] | {KatakanaSupp})
|
||||
MidLetter = ([\p{WB:MidLetter}] | {MidLetterSupp})
|
||||
MidNum = ([\p{WB:MidNum}] | {MidNumSupp})
|
||||
MidNumLet = ([\p{WB:MidNumLet}] | {MidNumLetSupp})
|
||||
ExtendNumLet = ([\p{WB:ExtendNumLet}] | {ExtendNumLetSupp})
|
||||
ComplexContext = ([\p{LB:Complex_Context}] | {ComplexContextSupp})
|
||||
Han = ([\p{Script:Han}] | {HanSupp})
|
||||
Hiragana = ([\p{Script:Hiragana}] | {HiraganaSupp})
|
||||
|
||||
// Script=Hangul & Aletter
|
||||
HangulEx = (!(!\p{Script:Hangul}|!\p{WB:ALetter})) ({Format} | {Extend})*
|
||||
// UAX#29 WB4. X (Extend | Format)* --> X
|
||||
//
|
||||
ALetterEx = {ALetter} ({Format} | {Extend})*
|
||||
// TODO: Convert hard-coded full-width numeric range to property intersection (something like [\p{Full-Width}&&\p{Numeric}]) once JFlex supports it
|
||||
NumericEx = ({Numeric} | [\uFF10-\uFF19]) ({Format} | {Extend})*
|
||||
KatakanaEx = {Katakana} ({Format} | {Extend})*
|
||||
MidLetterEx = ({MidLetter} | {MidNumLet}) ({Format} | {Extend})*
|
||||
MidNumericEx = ({MidNum} | {MidNumLet}) ({Format} | {Extend})*
|
||||
ExtendNumLetEx = {ExtendNumLet} ({Format} | {Extend})*
|
||||
|
||||
HanEx = {Han} ({Format} | {Extend})*
|
||||
HiraganaEx = {Hiragana} ({Format} | {Extend})*
|
||||
|
||||
// URL and E-mail syntax specifications:
|
||||
//
|
||||
// RFC-952: DOD INTERNET HOST TABLE SPECIFICATION
|
||||
// RFC-1035: DOMAIN NAMES - IMPLEMENTATION AND SPECIFICATION
|
||||
// RFC-1123: Requirements for Internet Hosts - Application and Support
|
||||
// RFC-1738: Uniform Resource Locators (URL)
|
||||
// RFC-3986: Uniform Resource Identifier (URI): Generic Syntax
|
||||
// RFC-5234: Augmented BNF for Syntax Specifications: ABNF
|
||||
// RFC-5321: Simple Mail Transfer Protocol
|
||||
// RFC-5322: Internet Message Format
|
||||
|
||||
%include src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro
|
||||
|
||||
DomainLabel = [A-Za-z0-9] ([-A-Za-z0-9]* [A-Za-z0-9])?
|
||||
DomainNameStrict = {DomainLabel} ("." {DomainLabel})* {ASCIITLD}
|
||||
DomainNameLoose = {DomainLabel} ("." {DomainLabel})*
|
||||
|
||||
IPv4DecimalOctet = "0"{0,2} [0-9] | "0"? [1-9][0-9] | "1" [0-9][0-9] | "2" ([0-4][0-9] | "5" [0-5])
|
||||
IPv4Address = {IPv4DecimalOctet} ("." {IPv4DecimalOctet}){3}
|
||||
IPv6Hex16Bit = [0-9A-Fa-f]{1,4}
|
||||
IPv6LeastSignificant32Bits = {IPv4Address} | ({IPv6Hex16Bit} ":" {IPv6Hex16Bit})
|
||||
IPv6Address = ({IPv6Hex16Bit} ":"){6} {IPv6LeastSignificant32Bits}
|
||||
| "::" ({IPv6Hex16Bit} ":"){5} {IPv6LeastSignificant32Bits}
|
||||
| {IPv6Hex16Bit}? "::" ({IPv6Hex16Bit} ":"){4} {IPv6LeastSignificant32Bits}
|
||||
| (({IPv6Hex16Bit} ":"){0,1} {IPv6Hex16Bit})? "::" ({IPv6Hex16Bit} ":"){3} {IPv6LeastSignificant32Bits}
|
||||
| (({IPv6Hex16Bit} ":"){0,2} {IPv6Hex16Bit})? "::" ({IPv6Hex16Bit} ":"){2} {IPv6LeastSignificant32Bits}
|
||||
| (({IPv6Hex16Bit} ":"){0,3} {IPv6Hex16Bit})? "::" {IPv6Hex16Bit} ":" {IPv6LeastSignificant32Bits}
|
||||
| (({IPv6Hex16Bit} ":"){0,4} {IPv6Hex16Bit})? "::" {IPv6LeastSignificant32Bits}
|
||||
| (({IPv6Hex16Bit} ":"){0,5} {IPv6Hex16Bit})? "::" {IPv6Hex16Bit}
|
||||
| (({IPv6Hex16Bit} ":"){0,6} {IPv6Hex16Bit})? "::"
|
||||
|
||||
URIunreserved = [-._~A-Za-z0-9]
|
||||
URIpercentEncoded = "%" [0-9A-Fa-f]{2}
|
||||
URIsubDelims = [!$&'()*+,;=]
|
||||
URIloginSegment = ({URIunreserved} | {URIpercentEncoded} | {URIsubDelims})*
|
||||
URIlogin = {URIloginSegment} (":" {URIloginSegment})? "@"
|
||||
URIquery = "?" ({URIunreserved} | {URIpercentEncoded} | {URIsubDelims} | [:@/?])*
|
||||
URIfragment = "#" ({URIunreserved} | {URIpercentEncoded} | {URIsubDelims} | [:@/?])*
|
||||
URIport = ":" [0-9]{1,5}
|
||||
URIhostStrict = ("[" {IPv6Address} "]") | {IPv4Address} | {DomainNameStrict}
|
||||
URIhostLoose = ("[" {IPv6Address} "]") | {IPv4Address} | {DomainNameLoose}
|
||||
|
||||
URIauthorityStrict = {URIhostStrict} {URIport}?
|
||||
URIauthorityLoose = {URIlogin}? {URIhostLoose} {URIport}?
|
||||
|
||||
HTTPsegment = ({URIunreserved} | {URIpercentEncoded} | [;:@&=])*
|
||||
HTTPpath = ("/" {HTTPsegment})*
|
||||
HTTPscheme = [hH][tT][tT][pP][sS]? "://"
|
||||
HTTPurlFull = {HTTPscheme} {URIauthorityLoose} {HTTPpath}? {URIquery}? {URIfragment}?
|
||||
// {HTTPurlNoScheme} excludes {URIlogin}, because it could otherwise accept e-mail addresses
|
||||
HTTPurlNoScheme = {URIauthorityStrict} {HTTPpath}? {URIquery}? {URIfragment}?
|
||||
HTTPurl = {HTTPurlFull} | {HTTPurlNoScheme}
|
||||
|
||||
FTPorFILEsegment = ({URIunreserved} | {URIpercentEncoded} | [?:@&=])*
|
||||
FTPorFILEpath = "/" {FTPorFILEsegment} ("/" {FTPorFILEsegment})*
|
||||
FTPtype = ";" [tT][yY][pP][eE] "=" [aAiIdD]
|
||||
FTPscheme = [fF][tT][pP] "://"
|
||||
FTPurl = {FTPscheme} {URIauthorityLoose} {FTPorFILEpath} {FTPtype}? {URIfragment}?
|
||||
|
||||
FILEscheme = [fF][iI][lL][eE] "://"
|
||||
FILEurl = {FILEscheme} {URIhostLoose}? {FTPorFILEpath} {URIfragment}?
|
||||
|
||||
URL = {HTTPurl} | {FTPurl} | {FILEurl}
|
||||
|
||||
EMAILquotedString = [\"] ([\u0001-\u0008\u000B\u000C\u000E-\u0021\u0023-\u005B\u005D-\u007E] | [\\] [\u0000-\u007F])* [\"]
|
||||
EMAILatomText = [A-Za-z0-9!#$%&'*+-/=?\^_`{|}~]
|
||||
EMAILlabel = {EMAILatomText}+ | {EMAILquotedString}
|
||||
EMAILlocalPart = {EMAILlabel} ("." {EMAILlabel})*
|
||||
EMAILdomainLiteralText = [\u0001-\u0008\u000B\u000C\u000E-\u005A\u005E-\u007F] | [\\] [\u0000-\u007F]
|
||||
// DFA minimization allows {IPv6Address} and {IPv4Address} to be included
|
||||
// in the {EMAILbracketedHost} definition without incurring any size penalties,
|
||||
// since {EMAILdomainLiteralText} recognizes all valid IP addresses.
|
||||
// The IP address regexes are included in {EMAILbracketedHost} simply as a
|
||||
// reminder that they are acceptable bracketed host forms.
|
||||
EMAILbracketedHost = "[" ({EMAILdomainLiteralText}* | {IPv4Address} | [iI][pP][vV] "6:" {IPv6Address}) "]"
|
||||
EMAIL = {EMAILlocalPart} "@" ({DomainNameStrict} | {EMAILbracketedHost})
|
||||
|
||||
|
||||
%{
|
||||
/** Alphanumeric sequences */
|
||||
public static final int WORD_TYPE = UAX29URLEmailTokenizer.ALPHANUM;
|
||||
|
||||
/** Numbers */
|
||||
public static final int NUMERIC_TYPE = UAX29URLEmailTokenizer.NUM;
|
||||
|
||||
/**
|
||||
* Chars in class \p{Line_Break = Complex_Context} are from South East Asian
|
||||
* scripts (Thai, Lao, Myanmar, Khmer, etc.). Sequences of these are kept
|
||||
* together as as a single token rather than broken up, because the logic
|
||||
* required to break them at word boundaries is too complex for UAX#29.
|
||||
* <p>
|
||||
* See Unicode Line Breaking Algorithm: http://www.unicode.org/reports/tr14/#SA
|
||||
*/
|
||||
public static final int SOUTH_EAST_ASIAN_TYPE = UAX29URLEmailTokenizer.SOUTHEAST_ASIAN;
|
||||
|
||||
public static final int IDEOGRAPHIC_TYPE = UAX29URLEmailTokenizer.IDEOGRAPHIC;
|
||||
|
||||
public static final int HIRAGANA_TYPE = UAX29URLEmailTokenizer.HIRAGANA;
|
||||
|
||||
public static final int KATAKANA_TYPE = UAX29URLEmailTokenizer.KATAKANA;
|
||||
|
||||
public static final int HANGUL_TYPE = UAX29URLEmailTokenizer.HANGUL;
|
||||
|
||||
public static final int EMAIL_TYPE = UAX29URLEmailTokenizer.EMAIL;
|
||||
|
||||
public static final int URL_TYPE = UAX29URLEmailTokenizer.URL;
|
||||
|
||||
public final int yychar()
|
||||
{
|
||||
return yychar;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fills CharTermAttribute with the current token text.
|
||||
*/
|
||||
public final void getText(CharTermAttribute t) {
|
||||
t.copyBuffer(zzBuffer, zzStartRead, zzMarkedPos-zzStartRead);
|
||||
}
|
||||
%}
|
||||
|
||||
%%
|
||||
|
||||
// UAX#29 WB1. sot ÷
|
||||
// WB2. ÷ eot
|
||||
//
|
||||
<<EOF>> { return StandardTokenizerInterface.YYEOF; }
|
||||
|
||||
{URL} { return URL_TYPE; }
|
||||
{EMAIL} { return EMAIL_TYPE; }
|
||||
|
||||
// UAX#29 WB8. Numeric × Numeric
|
||||
// WB11. Numeric (MidNum | MidNumLet) × Numeric
|
||||
// WB12. Numeric × (MidNum | MidNumLet) Numeric
|
||||
// WB13a. (ALetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
|
||||
// WB13b. ExtendNumLet × (ALetter | Numeric | Katakana)
|
||||
//
|
||||
{ExtendNumLetEx}* {NumericEx} ({ExtendNumLetEx}+ {NumericEx}
|
||||
| {MidNumericEx} {NumericEx}
|
||||
| {NumericEx})*
|
||||
{ExtendNumLetEx}*
|
||||
{ return NUMERIC_TYPE; }
|
||||
|
||||
// subset of the below for typing purposes only!
|
||||
{HangulEx}+
|
||||
{ return HANGUL_TYPE; }
|
||||
|
||||
{KatakanaEx}+
|
||||
{ return KATAKANA_TYPE; }
|
||||
|
||||
// UAX#29 WB5. ALetter × ALetter
|
||||
// WB6. ALetter × (MidLetter | MidNumLet) ALetter
|
||||
// WB7. ALetter (MidLetter | MidNumLet) × ALetter
|
||||
// WB9. ALetter × Numeric
|
||||
// WB10. Numeric × ALetter
|
||||
// WB13. Katakana × Katakana
|
||||
// WB13a. (ALetter | Numeric | Katakana | ExtendNumLet) × ExtendNumLet
|
||||
// WB13b. ExtendNumLet × (ALetter | Numeric | Katakana)
|
||||
//
|
||||
{ExtendNumLetEx}* ( {KatakanaEx} ({ExtendNumLetEx}* {KatakanaEx})*
|
||||
| ( {NumericEx} ({ExtendNumLetEx}+ {NumericEx} | {MidNumericEx} {NumericEx} | {NumericEx})*
|
||||
| {ALetterEx} ({ExtendNumLetEx}+ {ALetterEx} | {MidLetterEx} {ALetterEx} | {ALetterEx})* )+ )
|
||||
({ExtendNumLetEx}+ ( {KatakanaEx} ({ExtendNumLetEx}* {KatakanaEx})*
|
||||
| ( {NumericEx} ({ExtendNumLetEx}+ {NumericEx} | {MidNumericEx} {NumericEx} | {NumericEx})*
|
||||
| {ALetterEx} ({ExtendNumLetEx}+ {ALetterEx} | {MidLetterEx} {ALetterEx} | {ALetterEx})* )+ ) )*
|
||||
{ExtendNumLetEx}*
|
||||
{ return WORD_TYPE; }
|
||||
|
||||
|
||||
// From UAX #29:
|
||||
//
|
||||
// [C]haracters with the Line_Break property values of Contingent_Break (CB),
|
||||
// Complex_Context (SA/South East Asian), and XX (Unknown) are assigned word
|
||||
// boundary property values based on criteria outside of the scope of this
|
||||
// annex. That means that satisfactory treatment of languages like Chinese
|
||||
// or Thai requires special handling.
|
||||
//
|
||||
// In Unicode 6.0, only one character has the \p{Line_Break = Contingent_Break}
|
||||
// property: U+FFFC (  ) OBJECT REPLACEMENT CHARACTER.
|
||||
//
|
||||
// In the ICU implementation of UAX#29, \p{Line_Break = Complex_Context}
|
||||
// character sequences (from South East Asian scripts like Thai, Myanmar, Khmer,
|
||||
// Lao, etc.) are kept together. This grammar does the same below.
|
||||
//
|
||||
// See also the Unicode Line Breaking Algorithm:
|
||||
//
|
||||
// http://www.unicode.org/reports/tr14/#SA
|
||||
//
|
||||
{ComplexContext}+ { return SOUTH_EAST_ASIAN_TYPE; }
|
||||
|
||||
// UAX#29 WB14. Any ÷ Any
|
||||
//
|
||||
{HanEx} { return IDEOGRAPHIC_TYPE; }
|
||||
{HiraganaEx} { return HIRAGANA_TYPE; }
|
||||
|
||||
|
||||
// UAX#29 WB3. CR × LF
|
||||
// WB3a. (Newline | CR | LF) ÷
|
||||
// WB3b. ÷ (Newline | CR | LF)
|
||||
// WB14. Any ÷ Any
|
||||
//
|
||||
[^] { /* Break so we don't hit fall-through warning: */ break;/* Not numeric, word, ideographic, hiragana, or SE Asian -- ignore it. */ }
|
|
@ -352,6 +352,31 @@ public class TestUAX29URLEmailTokenizer extends BaseTokenStreamTestCase {
|
|||
(emailAnalyzer, randomTextWithEmails, emails);
|
||||
}
|
||||
|
||||
public void testMailtoSchemeEmails () throws Exception {
|
||||
// See LUCENE-3880
|
||||
BaseTokenStreamTestCase.assertAnalyzesTo(a, "mailto:test@example.org",
|
||||
new String[] {"mailto", "test@example.org"},
|
||||
new String[] { "<ALPHANUM>", "<EMAIL>" });
|
||||
|
||||
// TODO: Support full mailto: scheme URIs. See RFC 6068: http://tools.ietf.org/html/rfc6068
|
||||
BaseTokenStreamTestCase.assertAnalyzesTo
|
||||
(a, "mailto:personA@example.com,personB@example.com?cc=personC@example.com"
|
||||
+ "&subject=Subjectivity&body=Corpusivity%20or%20something%20like%20that",
|
||||
new String[] { "mailto",
|
||||
"personA@example.com",
|
||||
// TODO: recognize ',' address delimiter. Also, see examples of ';' delimiter use at: http://www.mailto.co.uk/
|
||||
",personB@example.com",
|
||||
"?cc=personC@example.com", // TODO: split field keys/values
|
||||
"subject", "Subjectivity",
|
||||
"body", "Corpusivity", "20or", "20something","20like", "20that" }, // TODO: Hex decoding + re-tokenization
|
||||
new String[] { "<ALPHANUM>",
|
||||
"<EMAIL>",
|
||||
"<EMAIL>",
|
||||
"<EMAIL>",
|
||||
"<ALPHANUM>", "<ALPHANUM>",
|
||||
"<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>" });
|
||||
}
|
||||
|
||||
public void testURLs() throws Exception {
|
||||
Reader reader = null;
|
||||
String randomTextWithURLs;
|
||||
|
|
Loading…
Reference in New Issue