diff --git a/app/assets/javascripts/pretty-text/engines/discourse-markdown/category-hashtag.js.es6 b/app/assets/javascripts/pretty-text/engines/discourse-markdown/category-hashtag.js.es6 index 5e997dfab66..dbe68836060 100644 --- a/app/assets/javascripts/pretty-text/engines/discourse-markdown/category-hashtag.js.es6 +++ b/app/assets/javascripts/pretty-text/engines/discourse-markdown/category-hashtag.js.es6 @@ -44,7 +44,7 @@ function addHashtag(buffer, matches, state) { export function setup(helper) { helper.registerPlugin(md => { const rule = { - matcher: /#([\u00C0-\u1FFF\u2C00-\uD7FF\w-:]{1,101})/, + matcher: /#([\u00C0-\u1FFF\u2C00-\uD7FF\w:-]{1,101})/, onMatch: addHashtag }; diff --git a/app/assets/javascripts/pretty-text/engines/discourse-markdown/mentions.js.es6 b/app/assets/javascripts/pretty-text/engines/discourse-markdown/mentions.js.es6 index d35500b9912..894fcc029a4 100644 --- a/app/assets/javascripts/pretty-text/engines/discourse-markdown/mentions.js.es6 +++ b/app/assets/javascripts/pretty-text/engines/discourse-markdown/mentions.js.es6 @@ -20,14 +20,44 @@ function addMention(buffer, matches, state) { export function setup(helper) { helper.registerOptions((opts, siteSettings) => { opts.features.mentions = !!siteSettings.enable_mentions; + opts.features.unicodeUsernames = !!siteSettings.unicode_usernames; }); helper.registerPlugin(md => { const rule = { - matcher: /@(\w[\w.-]{0,58}\w)|@(\w)/, + matcher: mentionRegex(md.options.discourse.features.unicodeUsernames), onMatch: addMention }; md.core.textPostProcess.ruler.push("mentions", rule); }); } + +function mentionRegex(unicodeUsernames) { + if (unicodeUsernames) { + try { + // Create the regex from a string, because Babel doesn't understand + // Unicode property escapes and completly mangles the regexp. + const alnum = "\\p{Alphabetic}\\p{Mark}\\p{Decimal_Number}"; + return new RegExp( + `@([${alnum}_][${alnum}._-]{0,58}[${alnum}])|@([${alnum}_])`, + "u" + ); + } catch (e) { + if (!(e instanceof SyntaxError)) throw e; + + // Fallback for older browsers and MiniRacer. + // Created with regexpu-core@4.5.4 by executing the following in nodejs: + // + // const rewritePattern = require('regexpu-core') + // new RegExp(rewritePattern(/[\p{Alphabetic}\p{Mark}\p{Decimal_Number}]/u.source, 'u', { 'unicodePropertyEscape': true })) + const alnum = /(?:[0-9A-Za-z\xAA\xB5\xBA\xC0-\xD6\xD8-\xF6\xF8-\u02C1\u02C6-\u02D1\u02E0-\u02E4\u02EC\u02EE\u0300-\u0374\u0376\u0377\u037A-\u037D\u037F\u0386\u0388-\u038A\u038C\u038E-\u03A1\u03A3-\u03F5\u03F7-\u0481\u0483-\u052F\u0531-\u0556\u0559\u0560-\u0588\u0591-\u05BD\u05BF\u05C1\u05C2\u05C4\u05C5\u05C7\u05D0-\u05EA\u05EF-\u05F2\u0610-\u061A\u0620-\u0669\u066E-\u06D3\u06D5-\u06DC\u06E1-\u06E8\u06DF-\u06E4\u06ED-\u06F9\u06EA-\u06FC\u06FF\u0710-\u074A\u074D-\u07B1\u07C0-\u07F5\u07FA\u07FD\u0800-\u082D\u0840-\u085B\u0860-\u086A\u08A0-\u08B4\u08B6-\u08BD\u08D3-\u08E1\u08E3-\u0963\u0966-\u096F\u0971-\u0983\u0985-\u098C\u098F\u0990\u0993-\u09A8\u09AA-\u09B0\u09B2\u09B6-\u09B9\u09BC-\u09C4\u09C7\u09C8\u09CB-\u09CE\u09D7\u09DC\u09DD\u09DF-\u09E3\u09E6-\u09F1\u09FC\u09FE\u0A01-\u0A03\u0A05-\u0A0A\u0A0F\u0A10\u0A13-\u0A28\u0A2A-\u0A30\u0A32\u0A33\u0A35\u0A36\u0A38\u0A39\u0A3C\u0A3E-\u0A42\u0A47\u0A48\u0A4B-\u0A4D\u0A51\u0A59-\u0A5C\u0A5E\u0A66-\u0A75\u0A81-\u0A83\u0A85-\u0A8D\u0A8F-\u0A91\u0A93-\u0AA8\u0AAA-\u0AB0\u0AB2\u0AB3\u0AB5-\u0AB9\u0ABC-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AD0\u0AE0-\u0AE3\u0AE6-\u0AEF\u0AF9-\u0AFF\u0B01-\u0B03\u0B05-\u0B0C\u0B0F\u0B10\u0B13-\u0B28\u0B2A-\u0B30\u0B32\u0B33\u0B35-\u0B39\u0B3C-\u0B44\u0B47\u0B48\u0B4B-\u0B4D\u0B56\u0B57\u0B5C\u0B5D\u0B5F-\u0B63\u0B66-\u0B6F\u0B71\u0B82\u0B83\u0B85-\u0B8A\u0B8E-\u0B90\u0B92-\u0B95\u0B99\u0B9A\u0B9C\u0B9E\u0B9F\u0BA3\u0BA4\u0BA8-\u0BAA\u0BAE-\u0BB9\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD0\u0BD7\u0BE6-\u0BEF\u0C00-\u0C0C\u0C0E-\u0C10\u0C12-\u0C28\u0C2A-\u0C39\u0C3D-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55\u0C56\u0C58-\u0C5A\u0C60-\u0C63\u0C66-\u0C6F\u0C80-\u0C83\u0C85-\u0C8C\u0C8E-\u0C90\u0C92-\u0CA8\u0CAA-\u0CB3\u0CB5-\u0CB9\u0CBC-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5\u0CD6\u0CDE\u0CE0-\u0CE3\u0CE6-\u0CEF\u0CF1\u0CF2\u0D00-\u0D03\u0D05-\u0D0C\u0D0E-\u0D10\u0D12-\u0D44\u0D46-\u0D48\u0D4A-\u0D4E\u0D54-\u0D57\u0D5F-\u0D63\u0D66-\u0D6F\u0D7A-\u0D7F\u0D82\u0D83\u0D85-\u0D96\u0D9A-\u0DB1\u0DB3-\u0DBB\u0DBD\u0DC0-\u0DC6\u0DCA\u0DCF-\u0DD4\u0DD6\u0DD8-\u0DDF\u0DE6-\u0DEF\u0DF2\u0DF3\u0E01-\u0E3A\u0E40-\u0E4E\u0E50-\u0E59\u0E81\u0E82\u0E84\u0E86-\u0E8A\u0E8C-\u0EA3\u0EA5\u0EA7-\u0EBD\u0EC0-\u0EC4\u0EC6\u0EC8-\u0ECD\u0ED0-\u0ED9\u0EDC-\u0EDF\u0F00\u0F18\u0F19\u0F20-\u0F29\u0F35\u0F37\u0F39\u0F3E-\u0F47\u0F49-\u0F6C\u0F71-\u0F84\u0F86-\u0F97\u0F99-\u0FBC\u0FC6\u1000-\u1049\u1050-\u109D\u10A0-\u10C5\u10C7\u10CD\u10D0-\u10FA\u10FC-\u1248\u124A-\u124D\u1250-\u1256\u1258\u125A-\u125D\u1260-\u1288\u128A-\u128D\u1290-\u12B0\u12B2-\u12B5\u12B8-\u12BE\u12C0\u12C2-\u12C5\u12C8-\u12D6\u12D8-\u1310\u1312-\u1315\u1318-\u135A\u135D-\u135F\u1380-\u138F\u13A0-\u13F5\u13F8-\u13FD\u1401-\u166C\u166F-\u167F\u1681-\u169A\u16A0-\u16EA\u16EE-\u16F8\u1700-\u170C\u170E-\u1714\u1720-\u1734\u1740-\u1753\u1760-\u176C\u176E-\u1770\u1772\u1773\u1780-\u17D3\u17D7\u17DC\u17DD\u17E0-\u17E9\u180B-\u180D\u1810-\u1819\u1820-\u1878\u1880-\u18AA\u18B0-\u18F5\u1900-\u191E\u1920-\u192B\u1930-\u193B\u1946-\u196D\u1970-\u1974\u1980-\u19AB\u19B0-\u19C9\u19D0-\u19D9\u1A00-\u1A1B\u1A20-\u1A5E\u1A60-\u1A7C\u1A7F-\u1A89\u1A90-\u1A99\u1AA7\u1AB0-\u1ABE\u1B00-\u1B4B\u1B50-\u1B59\u1B6B-\u1B73\u1B80-\u1BF3\u1C00-\u1C37\u1C40-\u1C49\u1C4D-\u1C7D\u1C80-\u1C88\u1C90-\u1CBA\u1CBD-\u1CBF\u1CD0-\u1CD2\u1CD4-\u1CFA\u1D00-\u1DF9\u1DFB-\u1F15\u1F18-\u1F1D\u1F20-\u1F45\u1F48-\u1F4D\u1F50-\u1F57\u1F59\u1F5B\u1F5D\u1F5F-\u1F7D\u1F80-\u1FB4\u1FB6-\u1FBC\u1FBE\u1FC2-\u1FC4\u1FC6-\u1FCC\u1FD0-\u1FD3\u1FD6-\u1FDB\u1FE0-\u1FEC\u1FF2-\u1FF4\u1FF6-\u1FFC\u2071\u207F\u2090-\u209C\u20D0-\u20F0\u2102\u2107\u210A-\u2113\u2115\u2119-\u211D\u2124\u2126\u2128\u212A-\u212D\u212F-\u2139\u213C-\u213F\u2145-\u2149\u214E\u2160-\u2188\u24B6-\u24E9\u2C00-\u2C2E\u2C30-\u2C5E\u2C60-\u2CE4\u2CEB-\u2CF3\u2D00-\u2D25\u2D27\u2D2D\u2D30-\u2D67\u2D6F\u2D7F-\u2D96\u2DA0-\u2DA6\u2DA8-\u2DAE\u2DB0-\u2DB6\u2DB8-\u2DBE\u2DC0-\u2DC6\u2DC8-\u2DCE\u2DD0-\u2DD6\u2DD8-\u2DDE\u2DE0-\u2DFF\u2E2F\u3005-\u3007\u3021-\u302F\u3031-\u3035\u3038-\u303C\u3041-\u3096\u3099\u309A\u309D-\u309F\u30A1-\u30FA\u30FC-\u30FF\u3105-\u312F\u3131-\u318E\u31A0-\u31BA\u31F0-\u31FF\u3400-\u4DB5\u4E00-\u9FEF\uA000-\uA48C\uA4D0-\uA4FD\uA500-\uA60C\uA610-\uA62B\uA640-\uA672\uA674-\uA67D\uA67F-\uA6F1\uA717-\uA71F\uA722-\uA788\uA78B-\uA7BF\uA7C2-\uA7C6\uA7F7-\uA827\uA840-\uA873\uA880-\uA8C5\uA8D0-\uA8D9\uA8E0-\uA8F7\uA8FB\uA8FD-\uA92D\uA930-\uA953\uA960-\uA97C\uA980-\uA9C0\uA9CF-\uA9D9\uA9E0-\uA9FE\uAA00-\uAA36\uAA40-\uAA4D\uAA50-\uAA59\uAA60-\uAA76\uAA7A-\uAAC2\uAADB-\uAADD\uAAE0-\uAAEF\uAAF2-\uAAF6\uAB01-\uAB06\uAB09-\uAB0E\uAB11-\uAB16\uAB20-\uAB26\uAB28-\uAB2E\uAB30-\uAB5A\uAB5C-\uAB67\uAB70-\uABEA\uABEC\uABED\uABF0-\uABF9\uAC00-\uD7A3\uD7B0-\uD7C6\uD7CB-\uD7FB\uF900-\uFA6D\uFA70-\uFAD9\uFB00-\uFB06\uFB13-\uFB17\uFB1D-\uFB28\uFB2A-\uFB36\uFB38-\uFB3C\uFB3E\uFB40\uFB41\uFB43\uFB44\uFB46-\uFBB1\uFBD3-\uFD3D\uFD50-\uFD8F\uFD92-\uFDC7\uFDF0-\uFDFB\uFE00-\uFE0F\uFE20-\uFE2F\uFE70-\uFE74\uFE76-\uFEFC\uFF10-\uFF19\uFF21-\uFF3A\uFF41-\uFF5A\uFF66-\uFFBE\uFFC2-\uFFC7\uFFCA-\uFFCF\uFFD2-\uFFD7\uFFDA-\uFFDC]|\uD800[\uDC00-\uDC0B\uDC0D-\uDC26\uDC28-\uDC3A\uDC3C\uDC3D\uDC3F-\uDC4D\uDC50-\uDC5D\uDC80-\uDCFA\uDD40-\uDD74\uDDFD\uDE80-\uDE9C\uDEA0-\uDED0\uDEE0\uDF00-\uDF1F\uDF2D-\uDF4A\uDF50-\uDF7A\uDF80-\uDF9D\uDFA0-\uDFC3\uDFC8-\uDFCF\uDFD1-\uDFD5]|\uD801[\uDC00-\uDC9D\uDCA0-\uDCA9\uDCB0-\uDCD3\uDCD8-\uDCFB\uDD00-\uDD27\uDD30-\uDD63\uDE00-\uDF36\uDF40-\uDF55\uDF60-\uDF67]|\uD802[\uDC00-\uDC05\uDC08\uDC0A-\uDC35\uDC37\uDC38\uDC3C\uDC3F-\uDC55\uDC60-\uDC76\uDC80-\uDC9E\uDCE0-\uDCF2\uDCF4\uDCF5\uDD00-\uDD15\uDD20-\uDD39\uDD80-\uDDB7\uDDBE\uDDBF\uDE00-\uDE03\uDE05\uDE06\uDE0C-\uDE13\uDE15-\uDE17\uDE19-\uDE35\uDE38-\uDE3A\uDE3F\uDE60-\uDE7C\uDE80-\uDE9C\uDEC0-\uDEC7\uDEC9-\uDEE6\uDF00-\uDF35\uDF40-\uDF55\uDF60-\uDF72\uDF80-\uDF91]|\uD803[\uDC00-\uDC48\uDC80-\uDCB2\uDCC0-\uDCF2\uDD00-\uDD27\uDD30-\uDD39\uDF00-\uDF1C\uDF27\uDF30-\uDF50\uDFE0-\uDFF6]|\uD804[\uDC00-\uDC46\uDC66-\uDC6F\uDC82-\uDCBA\uDC7F-\uDC82\uDCD0-\uDCE8\uDCF0-\uDCF9\uDD00-\uDD34\uDD36-\uDD3F\uDD44-\uDD46\uDD50-\uDD73\uDD76\uDD80-\uDDC4\uDDC9-\uDDCC\uDDD0-\uDDDA\uDDDC\uDE00-\uDE11\uDE13-\uDE37\uDE3E\uDE80-\uDE86\uDE88\uDE8A-\uDE8D\uDE8F-\uDE9D\uDE9F-\uDEA8\uDEB0-\uDEEA\uDEF0-\uDEF9\uDF00-\uDF03\uDF05-\uDF0C\uDF0F\uDF10\uDF13-\uDF28\uDF2A-\uDF30\uDF32\uDF33\uDF35-\uDF39\uDF3B-\uDF44\uDF47\uDF48\uDF4B-\uDF4D\uDF50\uDF57\uDF5D-\uDF63\uDF66-\uDF6C\uDF70-\uDF74]|\uD805[\uDC00-\uDC4A\uDC50-\uDC59\uDC5E\uDC5F\uDC80-\uDCC5\uDCC7\uDCD0-\uDCD9\uDD80-\uDDB5\uDDB8-\uDDC0\uDDD8-\uDDDD\uDE00-\uDE40\uDE44\uDE50-\uDE59\uDE80-\uDEB8\uDEC0-\uDEC9\uDF00-\uDF1A\uDF1D-\uDF2B\uDF30-\uDF39]|\uD806[\uDC00-\uDC3A\uDCA0-\uDCE9\uDCFF\uDDA0-\uDDA7\uDDAA-\uDDD7\uDDDA-\uDDE1\uDDE3\uDDE4\uDE00-\uDE3E\uDE47\uDE50-\uDE99\uDE9D\uDEC0-\uDEF8]|\uD807[\uDC00-\uDC08\uDC0A-\uDC36\uDC38-\uDC40\uDC50-\uDC59\uDC72-\uDC8F\uDC92-\uDCA7\uDCA9-\uDCB6\uDD00-\uDD06\uDD08\uDD09\uDD0B-\uDD36\uDD3A\uDD3C\uDD3D\uDD3F-\uDD47\uDD50-\uDD59\uDD60-\uDD65\uDD67\uDD68\uDD6A-\uDD8E\uDD90\uDD91\uDD93-\uDD98\uDDA0-\uDDA9\uDEE0-\uDEF6]|\uD808[\uDC00-\uDF99]|\uD809[\uDC00-\uDC6E\uDC80-\uDD43]|[\uD80C\uD81C-\uD820\uD840-\uD868\uD86A-\uD86C\uD86F-\uD872\uD874-\uD879][\uDC00-\uDFFF]|\uD80D[\uDC00-\uDC2E]|\uD811[\uDC00-\uDE46]|\uD81A[\uDC00-\uDE38\uDE40-\uDE5E\uDE60-\uDE69\uDED0-\uDEED\uDEF0-\uDEF4\uDF00-\uDF36\uDF40-\uDF43\uDF50-\uDF59\uDF63-\uDF77\uDF7D-\uDF8F]|\uD81B[\uDE40-\uDE7F\uDF00-\uDF4A\uDF4F-\uDF87\uDF8F-\uDF9F\uDFE0\uDFE1\uDFE3]|\uD821[\uDC00-\uDFF7]|\uD822[\uDC00-\uDEF2]|\uD82C[\uDC00-\uDD1E\uDD50-\uDD52\uDD64-\uDD67\uDD70-\uDEFB]|\uD82F[\uDC00-\uDC6A\uDC70-\uDC7C\uDC80-\uDC88\uDC90-\uDC99\uDC9D\uDC9E]|\uD834[\uDD65-\uDD69\uDD6D-\uDD72\uDD7B-\uDD82\uDD85-\uDD8B\uDDAA-\uDDAD\uDE42-\uDE44]|\uD835[\uDC00-\uDC54\uDC56-\uDC9C\uDC9E\uDC9F\uDCA2\uDCA5\uDCA6\uDCA9-\uDCAC\uDCAE-\uDCB9\uDCBB\uDCBD-\uDCC3\uDCC5-\uDD05\uDD07-\uDD0A\uDD0D-\uDD14\uDD16-\uDD1C\uDD1E-\uDD39\uDD3B-\uDD3E\uDD40-\uDD44\uDD46\uDD4A-\uDD50\uDD52-\uDEA5\uDEA8-\uDEC0\uDEC2-\uDEDA\uDEDC-\uDEFA\uDEFC-\uDF14\uDF16-\uDF34\uDF36-\uDF4E\uDF50-\uDF6E\uDF70-\uDF88\uDF8A-\uDFA8\uDFAA-\uDFC2\uDFC4-\uDFCB\uDFCE-\uDFFF]|\uD836[\uDE00-\uDE36\uDE3B-\uDE6C\uDE75\uDE84\uDE9B-\uDE9F\uDEA1-\uDEAF]|\uD838[\uDC00-\uDC06\uDC08-\uDC18\uDC1B-\uDC21\uDC23\uDC24\uDC26-\uDC2A\uDD00-\uDD2C\uDD30-\uDD3D\uDD40-\uDD49\uDD4E\uDEC0-\uDEF9]|\uD83A[\uDC00-\uDCC4\uDCD0-\uDCD6\uDD00-\uDD4B\uDD50-\uDD59]|\uD83B[\uDE00-\uDE03\uDE05-\uDE1F\uDE21\uDE22\uDE24\uDE27\uDE29-\uDE32\uDE34-\uDE37\uDE39\uDE3B\uDE42\uDE47\uDE49\uDE4B\uDE4D-\uDE4F\uDE51\uDE52\uDE54\uDE57\uDE59\uDE5B\uDE5D\uDE5F\uDE61\uDE62\uDE64\uDE67-\uDE6A\uDE6C-\uDE72\uDE74-\uDE77\uDE79-\uDE7C\uDE7E\uDE80-\uDE89\uDE8B-\uDE9B\uDEA1-\uDEA3\uDEA5-\uDEA9\uDEAB-\uDEBB]|\uD83C[\uDD30-\uDD49\uDD50-\uDD69\uDD70-\uDD89]|\uD869[\uDC00-\uDED6\uDF00-\uDFFF]|\uD86D[\uDC00-\uDF34\uDF40-\uDFFF]|\uD86E[\uDC00-\uDC1D\uDC20-\uDFFF]|\uD873[\uDC00-\uDEA1\uDEB0-\uDFFF]|\uD87A[\uDC00-\uDFE0]|\uD87E[\uDC00-\uDE1D]|\uDB40[\uDD00-\uDDEF])/ + .source; + return new RegExp( + `@((?:_|${alnum})(?:[._-]|${alnum}){0,58}${alnum})|@(?:(_|${alnum}))` + ); + } + } else { + return /@(\w[\w.-]{0,58}[^\W_])|@(\w)/; + } +} diff --git a/app/assets/javascripts/pretty-text/engines/discourse-markdown/text-post-process.js.es6 b/app/assets/javascripts/pretty-text/engines/discourse-markdown/text-post-process.js.es6 index 666c1c52b87..2abd1205988 100644 --- a/app/assets/javascripts/pretty-text/engines/discourse-markdown/text-post-process.js.es6 +++ b/app/assets/javascripts/pretty-text/engines/discourse-markdown/text-post-process.js.es6 @@ -15,9 +15,14 @@ export class TextPostProcessRuler { this.matcherIndex = []; - let rules = this.rules.map( - r => "(" + r.rule.matcher.toString().slice(1, -1) + ")" - ); + const rules = []; + const flags = new Set("g"); + + this.rules.forEach(r => { + const matcher = r.rule.matcher; + rules.push(`(${matcher.source})`); + matcher.flags.split("").forEach(f => flags.add(f)); + }); let i; let regexString = ""; @@ -41,7 +46,7 @@ export class TextPostProcessRuler { last = "x".match(regex).length - 1; } - this.matcher = new RegExp(rules.join("|"), "g"); + this.matcher = new RegExp(rules.join("|"), [...flags].join("")); return this.matcher; } diff --git a/app/controllers/users_controller.rb b/app/controllers/users_controller.rb index 4e719423378..972c1b6e449 100644 --- a/app/controllers/users_controller.rb +++ b/app/controllers/users_controller.rb @@ -313,7 +313,7 @@ class UsersController < ApplicationController params.require(:username) if !params[:email].present? return render(json: success_json) end - username = params[:username] + username = params[:username]&.unicode_normalize target_user = user_from_params_or_current_user diff --git a/app/jobs/regular/update_username.rb b/app/jobs/regular/update_username.rb index 6df365d5824..150dec13554 100644 --- a/app/jobs/regular/update_username.rb +++ b/app/jobs/regular/update_username.rb @@ -5,16 +5,27 @@ module Jobs def execute(args) @user_id = args[:user_id] - @old_username = args[:old_username] - @new_username = args[:new_username] + @old_username = args[:old_username].unicode_normalize + @new_username = args[:new_username].unicode_normalize @avatar_img = PrettyText.avatar_img(args[:avatar_template], "tiny") - @raw_mention_regex = /(?:(? 0 + def self.username_exists?(username) + username = normalize_username(username) + DB.exec(User::USERNAME_EXISTS_SQL, username: username) > 0 end def username_validator username_format_validator || begin - lower = username.downcase - existing = DB.query( - USERNAME_EXISTS_SQL, username: lower + USERNAME_EXISTS_SQL, + username: self.class.normalize_username(username) ) user_id = existing.select { |u| u.is_user }.first&.id diff --git a/app/models/username_validator.rb b/app/models/username_validator.rb index eeb4df859de..465d3a1b6ac 100644 --- a/app/models/username_validator.rb +++ b/app/models/username_validator.rb @@ -16,9 +16,10 @@ class UsernameValidator end def initialize(username) - @username = username + @username = username&.unicode_normalize @errors = [] end + attr_accessor :errors attr_reader :username @@ -27,10 +28,11 @@ class UsernameValidator end def valid_format? - username_exist? + username_present? username_length_min? username_length_max? username_char_valid? + username_char_whitelisted? username_first_char_valid? username_last_char_valid? username_no_double_special? @@ -39,62 +41,103 @@ class UsernameValidator end CONFUSING_EXTENSIONS ||= /\.(js|json|css|htm|html|xml|jpg|jpeg|png|gif|bmp|ico|tif|tiff|woff)$/i + MAX_CHARS ||= 60 + + ASCII_INVALID_CHAR_PATTERN ||= /[^\w.-]/ + UNICODE_INVALID_CHAR_PATTERN ||= /[^\p{Alnum}\p{M}._-]/ + INVALID_LEADING_CHAR_PATTERN ||= /^[^\p{Alnum}\p{M}_]+/ + INVALID_TRAILING_CHAR_PATTERN ||= /[^\p{Alnum}\p{M}]+$/ + REPEATED_SPECIAL_CHAR_PATTERN ||= /[-_.]{2,}/ private - def username_exist? + def username_present? return unless errors.empty? - unless username + + if username.blank? self.errors << I18n.t(:'user.username.blank') end end def username_length_min? return unless errors.empty? - if username.length < User.username_length.begin + + if username_grapheme_clusters.size < User.username_length.begin self.errors << I18n.t(:'user.username.short', min: User.username_length.begin) end end def username_length_max? return unless errors.empty? - if username.length > User.username_length.end + + if username_grapheme_clusters.size > User.username_length.end self.errors << I18n.t(:'user.username.long', max: User.username_length.end) + elsif username.length > MAX_CHARS + self.errors << I18n.t(:'user.username.too_long') end end def username_char_valid? return unless errors.empty? - if username =~ /[^\w.-]/ + + if self.class.invalid_char_pattern.match?(username) + self.errors << I18n.t(:'user.username.characters') + end + end + + def username_char_whitelisted? + return unless errors.empty? && self.class.char_whitelist_exists? + + if username.chars.any? { |c| !self.class.whitelisted_char?(c) } self.errors << I18n.t(:'user.username.characters') end end def username_first_char_valid? return unless errors.empty? - if username[0] =~ /\W/ + + if INVALID_LEADING_CHAR_PATTERN.match?(username_grapheme_clusters.first) self.errors << I18n.t(:'user.username.must_begin_with_alphanumeric_or_underscore') end end def username_last_char_valid? return unless errors.empty? - if username[-1] =~ /[^A-Za-z0-9]/ + + if INVALID_TRAILING_CHAR_PATTERN.match?(username_grapheme_clusters.last) self.errors << I18n.t(:'user.username.must_end_with_alphanumeric') end end def username_no_double_special? return unless errors.empty? - if username =~ /[-_.]{2,}/ + + if REPEATED_SPECIAL_CHAR_PATTERN.match?(username) self.errors << I18n.t(:'user.username.must_not_contain_two_special_chars_in_seq') end end def username_does_not_end_with_confusing_suffix? return unless errors.empty? - if username =~ CONFUSING_EXTENSIONS + + if CONFUSING_EXTENSIONS.match?(username) self.errors << I18n.t(:'user.username.must_not_end_with_confusing_suffix') end end + + def username_grapheme_clusters + @username_grapheme_clusters ||= username.grapheme_clusters + end + + def self.invalid_char_pattern + SiteSetting.unicode_usernames ? UNICODE_INVALID_CHAR_PATTERN : ASCII_INVALID_CHAR_PATTERN + end + + def self.char_whitelist_exists? + SiteSetting.unicode_usernames && SiteSetting.unicode_username_character_whitelist_regex.present? + end + + def self.whitelisted_char?(c) + c.match?(/[\w.-]/) || c.match?(SiteSetting.unicode_username_character_whitelist_regex) + end end diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index 99a811a0426..86ade46efe7 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -912,9 +912,9 @@ en: submit: "Save preferences" digest_frequency: title: "You are receiving summary emails %{frequency}" - select_title: 'Set summary emails frequency to:' - - never: 'never' + select_title: "Set summary emails frequency to:" + + never: "never" every_30_minutes: "every 30 minutes" every_hour: "hourly" daily: "daily" @@ -922,7 +922,6 @@ en: every_month: "every month" every_six_months: "every six months" - user_api_key: title: "Authorize application access" authorize: "Authorize" @@ -1474,6 +1473,8 @@ en: min_username_length: "Minimum username length in characters. WARNING: if any existing users or groups have names shorter than this, your site will break!" max_username_length: "Maximum username length in characters. WARNING: if any existing users or groups have names longer than this, your site will break!" + unicode_usernames: "Allow usernames and group names to contain Unicode letters and numbers." + unicode_username_character_whitelist: "Regular expression to allow only some Unicode characters within usernames. ASCII letters and numbers will always be allowed and don't need to be included in the whitelist." reserved_usernames: "Usernames for which signup is not allowed. Wildcard symbol * can be used to match any character zero or more times." @@ -2066,6 +2067,10 @@ en: low_weight_invalid: "You cannot set the weight to be greater or equal to 1 or smaller than 'category_search_priority_very_low_weight'." high_weight_invalid: "You cannot set the weight to be greater or equal to 1 or greater than 'category_search_priority_very_high_weight'." very_high_weight_invalid: "You cannot set the weight to be smaller than 'category_search_priority_high_weight'." + unicode_username_whitelist: + regex_invalid: "The regular expression is invalid: %{error}" + leading_trailing_slash: "The regular expression must not start and end with a slash." + unicode_usernames_avatars: "The internal system avatars do not support Unicode usernames." placeholder: sso_provider_secrets: @@ -2220,6 +2225,7 @@ en: username: short: "must be at least %{min} characters" long: "must be no more than %{max} characters" + too_long: "is too long" characters: "must only include numbers, letters, dashes, and underscores" unique: "must be unique" blank: "must be present" @@ -4272,8 +4278,7 @@ en: privacy: title: "Access" - description: - "
Is your community open to everyone, or is it restricted by membership, invitation, or approval? If you prefer, you can set things up privately, then switch over to public later.
" + description: "Is your community open to everyone, or is it restricted by membership, invitation, or approval? If you prefer, you can set things up privately, then switch over to public later.
" fields: privacy: diff --git a/config/routes.rb b/config/routes.rb index a81e46e1c4e..e30d78263d0 100644 --- a/config/routes.rb +++ b/config/routes.rb @@ -6,7 +6,7 @@ require_dependency "homepage_constraint" require_dependency "permalink_constraint" # The following constants have been replaced with `RouteFormat` and are deprecated. -USERNAME_ROUTE_FORMAT = /[\w.\-]+?/ unless defined? USERNAME_ROUTE_FORMAT +USERNAME_ROUTE_FORMAT = /[%\w.\-]+?/ unless defined? USERNAME_ROUTE_FORMAT BACKUP_ROUTE_FORMAT = /.+\.(sql\.gz|tar\.gz|tgz)/i unless defined? BACKUP_ROUTE_FORMAT Discourse::Application.routes.draw do diff --git a/config/site_settings.yml b/config/site_settings.yml index aec3fef5f2e..5ec0106d476 100644 --- a/config/site_settings.yml +++ b/config/site_settings.yml @@ -450,6 +450,15 @@ users: min: 8 max: 60 validator: "MaxUsernameLengthValidator" + unicode_usernames: + default: false + client: true + validator: "UnicodeUsernameValidator" + unicode_username_character_whitelist: + validator: "UnicodeUsernameWhitelistValidator" + default: "" + locale_default: + de: "[äöüßÄÖÜẞ]" reserved_usernames: type: list list_type: compact @@ -1107,8 +1116,9 @@ files: default: true client: true shadowed_by_global: true + validator: "ExternalSystemAvatarsValidator" external_system_avatars_url: - default: "/letter_avatar_proxy/v3/letter/{first_letter}/{color}/{size}.png" + default: "/letter_avatar_proxy/v4/letter/{first_letter}/{color}/{size}.png" client: true regex: '^((https?:)?\/)?\/.+[^\/]' shadowed_by_global: true diff --git a/lib/route_format.rb b/lib/route_format.rb index 0ebd43fbdf0..14bc0398b1c 100644 --- a/lib/route_format.rb +++ b/lib/route_format.rb @@ -1,7 +1,7 @@ module RouteFormat def self.username - /[\w.\-]+?/ + /[%\w.\-]+?/ end def self.backup diff --git a/lib/user_name_suggester.rb b/lib/user_name_suggester.rb index afff6f1223b..d9c72edebd6 100644 --- a/lib/user_name_suggester.rb +++ b/lib/user_name_suggester.rb @@ -1,33 +1,35 @@ module UserNameSuggester GENERIC_NAMES = ['i', 'me', 'info', 'support', 'admin', 'webmaster', 'hello', 'mail', 'office', 'contact', 'team'] - def self.suggest(name, allow_username = nil) - return unless name.present? - name = parse_name_from_email(name) - find_available_username_based_on(name, allow_username) + def self.suggest(name_or_email, allowed_username = nil) + return unless name_or_email.present? + + name = parse_name_from_email(name_or_email) + find_available_username_based_on(name, allowed_username) end - def self.parse_name_from_email(name) - if name =~ User::EMAIL - # When 'walter@white.com' take 'walter' - name = Regexp.last_match[1] - # When 'me@eviltrout.com' take 'eviltrout' - name = Regexp.last_match[2] if GENERIC_NAMES.include?(name) - end + def self.parse_name_from_email(name_or_email) + return name_or_email if name_or_email !~ User::EMAIL + + # When 'walter@white.com' take 'walter' + name = Regexp.last_match[1] + + # When 'me@eviltrout.com' take 'eviltrout' + name = Regexp.last_match[2] if GENERIC_NAMES.include?(name) name end - def self.find_available_username_based_on(name, allow_username = nil) + def self.find_available_username_based_on(name, allowed_username = nil) name = fix_username(name) i = 1 attempt = name - until attempt == allow_username || User.username_available?(attempt) || i > 100 + until attempt == allowed_username || User.username_available?(attempt) || i > 100 suffix = i.to_s - max_length = User.username_length.end - suffix.length - 1 - attempt = "#{name[0..max_length]}#{suffix}" + max_length = User.username_length.end - suffix.length + attempt = "#{truncate(name, max_length)}#{suffix}" i += 1 end - until attempt == allow_username || User.username_available?(attempt) || i > 200 + until attempt == allowed_username || User.username_available?(attempt) || i > 200 attempt = SecureRandom.hex[1..SiteSetting.max_username_length] i += 1 end @@ -39,28 +41,45 @@ module UserNameSuggester end def self.sanitize_username(name) - name = ActiveSupport::Inflector.transliterate(name.to_s) - # 1. replace characters that aren't allowed with '_' - name.gsub!(UsernameValidator::CONFUSING_EXTENSIONS, "_") - name.gsub!(/[^\w.-]/, "_") - # 2. removes unallowed leading characters - name.gsub!(/^\W+/, "") - # 3. removes unallowed trailing characters - name = remove_unallowed_trailing_characters(name) - # 4. unify special characters - name.gsub!(/[-_.]{2,}/, "_") - name - end + name = name.to_s - def self.remove_unallowed_trailing_characters(name) - name.gsub!(/[^A-Za-z0-9]+$/, "") + if SiteSetting.unicode_usernames + name.unicode_normalize! + else + name = ActiveSupport::Inflector.transliterate(name) + end + + name.gsub!(UsernameValidator.invalid_char_pattern, '_') + name.chars.map! { |c| UsernameValidator.whitelisted_char?(c) ? c : '_' } if UsernameValidator.char_whitelist_exists? + name.gsub!(UsernameValidator::INVALID_LEADING_CHAR_PATTERN, '') + name.gsub!(UsernameValidator::CONFUSING_EXTENSIONS, "_") + name.gsub!(UsernameValidator::INVALID_TRAILING_CHAR_PATTERN, '') + name.gsub!(UsernameValidator::REPEATED_SPECIAL_CHAR_PATTERN, '_') name end def self.rightsize_username(name) - name = name[0, User.username_length.end] - name = remove_unallowed_trailing_characters(name) - name.ljust(User.username_length.begin, '1') + name = truncate(name, User.username_length.end) + name.gsub!(UsernameValidator::INVALID_TRAILING_CHAR_PATTERN, '') + + missing_char_count = User.username_length.begin - name.grapheme_clusters.size + name << '1' * missing_char_count if missing_char_count > 0 + name end + def self.truncate(name, max_grapheme_clusters) + clusters = name.grapheme_clusters + + if clusters.size > max_grapheme_clusters + clusters = clusters[0..max_grapheme_clusters - 1] + name = clusters.join + end + + while name.length > UsernameValidator::MAX_CHARS + clusters.pop + name = clusters.join + end + + name + end end diff --git a/lib/validators/external_system_avatars_validator.rb b/lib/validators/external_system_avatars_validator.rb new file mode 100644 index 00000000000..663da04d4fa --- /dev/null +++ b/lib/validators/external_system_avatars_validator.rb @@ -0,0 +1,13 @@ +class ExternalSystemAvatarsValidator + def initialize(opts = {}) + @opts = opts + end + + def valid_value?(value) + @valid = value == "t" || !SiteSetting.unicode_usernames + end + + def error_message + I18n.t("site_settings.errors.unicode_usernames_avatars") if !@valid + end +end diff --git a/lib/validators/unicode_username_validator.rb b/lib/validators/unicode_username_validator.rb new file mode 100644 index 00000000000..9b81f9235ee --- /dev/null +++ b/lib/validators/unicode_username_validator.rb @@ -0,0 +1,13 @@ +class UnicodeUsernameValidator + def initialize(opts = {}) + @opts = opts + end + + def valid_value?(value) + @valid = SiteSetting.external_system_avatars_enabled || value == "f" + end + + def error_message + I18n.t("site_settings.errors.unicode_usernames_avatars") if !@valid + end +end diff --git a/lib/validators/unicode_username_whitelist_validator.rb b/lib/validators/unicode_username_whitelist_validator.rb new file mode 100644 index 00000000000..f3e4691f8f7 --- /dev/null +++ b/lib/validators/unicode_username_whitelist_validator.rb @@ -0,0 +1,26 @@ +class UnicodeUsernameWhitelistValidator + def initialize(opts = {}) + @opts = opts + end + + def valid_value?(value) + @error_message = nil + return true if value.blank? + + if value.match?(/^\/.*\/[imxo]*$/) + @error_message = I18n.t("site_settings.errors.unicode_username_whitelist.leading_trailing_slash") + else + begin + Regexp.new(value) + rescue RegexpError => e + @error_message = I18n.t("site_settings.errors.unicode_username_whitelist.regex_invalid", error: e.message) + end + end + + @error_message.blank? + end + + def error_message + @error_message + end +end diff --git a/spec/components/pretty_text_spec.rb b/spec/components/pretty_text_spec.rb index 8b6763c3c5e..902f8046526 100644 --- a/spec/components/pretty_text_spec.rb +++ b/spec/components/pretty_text_spec.rb @@ -336,6 +336,21 @@ describe PrettyText do expect(PrettyText.cook(". http://test/@sam")).not_to include('mention') end + context "with Unicode usernames disabled" do + before { SiteSetting.unicode_usernames = false } + + it 'does not detect mention' do + expect(PrettyText.cook("Hello @狮子")).to_not include("mention") + end + end + + context "with Unicode usernames enabled" do + before { SiteSetting.unicode_usernames = true } + + it 'does detect mention' do + expect(PrettyText.cook("Hello @狮子")).to match_html 'Hello @狮子
' + end + end end describe "code fences" do diff --git a/spec/components/user_name_suggester_spec.rb b/spec/components/user_name_suggester_spec.rb index a9137d8b747..37f4a37a931 100644 --- a/spec/components/user_name_suggester_spec.rb +++ b/spec/components/user_name_suggester_spec.rb @@ -2,16 +2,10 @@ require 'rails_helper' require 'user_name_suggester' describe UserNameSuggester do - - describe 'name heuristics' do - it 'is able to guess a decent username from an email' do - expect(UserNameSuggester.suggest('bob@bob.com')).to eq('bob') - end - end - describe '.suggest' do before do - User.stubs(:username_length).returns(3..15) + SiteSetting.min_username_length = 3 + SiteSetting.max_username_length = 15 end it "doesn't raise an error on nil username" do @@ -26,10 +20,6 @@ describe UserNameSuggester do expect(UserNameSuggester.suggest("Darth%^Vader")).to eq('Darth_Vader') end - it "transliterates some characters" do - expect(UserNameSuggester.suggest("Jørn")).to eq('Jorn') - end - it 'adds 1 to an existing username' do user = Fabricate(:user) expect(UserNameSuggester.suggest(user.username)).to eq("#{user.username}1") @@ -39,6 +29,10 @@ describe UserNameSuggester do expect(UserNameSuggester.suggest('a')).to eq('a11') end + it 'is able to guess a decent username from an email' do + expect(UserNameSuggester.suggest('bob@example.com')).to eq('bob') + end + it "has a special case for me and i emails" do expect(UserNameSuggester.suggest('me@eviltrout.com')).to eq('eviltrout') expect(UserNameSuggester.suggest('i@eviltrout.com')).to eq('eviltrout') @@ -106,6 +100,57 @@ describe UserNameSuggester do User.stubs(:username_length).returns(8..8) expect(UserNameSuggester.suggest('uuuuuuu_u')).to eq('uuuuuuu1') end - end + context "with Unicode usernames disabled" do + before { SiteSetting.unicode_usernames = false } + + it "transliterates some characters" do + expect(UserNameSuggester.suggest('Jørn')).to eq('Jorn') + end + + it "replaces Unicode characters" do + expect(UserNameSuggester.suggest('طائر')).to eq('111') + expect(UserNameSuggester.suggest('πουλί')).to eq('111') + end + end + + context "with Unicode usernames enabled" do + before { SiteSetting.unicode_usernames = true } + + it "does not transliterate" do + expect(UserNameSuggester.suggest("Jørn")).to eq('Jørn') + end + + it "does not replace Unicode characters" do + expect(UserNameSuggester.suggest('طائر')).to eq('طائر') + expect(UserNameSuggester.suggest('πουλί')).to eq('πουλί') + end + + it "shortens usernames by counting grapheme clusters" do + SiteSetting.max_username_length = 10 + expect(UserNameSuggester.suggest('बहुत-लंबा-उपयोगकर्ता-नाम')).to eq('बहुत-लंबा-उपयो') + end + + it "adds numbers if it's too short" do + expect(UserNameSuggester.suggest('鳥')).to eq('鳥11') + + # grapheme cluster consists of 3 code points + expect(UserNameSuggester.suggest('য়া')).to eq('য়া11') + end + + it "normalizes usernames" do + actual = 'Löwe' # NFD, "Lo\u0308we" + expected = 'Löwe' # NFC, "L\u00F6we" + + expect(UserNameSuggester.suggest(actual)).to eq(expected) + end + + it "does not suggest a username longer than max column size" do + SiteSetting.max_username_length = 40 + + expect(UserNameSuggester.suggest('য়া-য়া-য়া-য়া-য়া-য়া-য়া-য়া-য়া-য়া-য়া-য়া-য়া-য়া-য়া-য়া-য়া-য়া-য়া')) + .to eq('য়া-য়া-য়া-য়া-য়া-য়া-য়া-য়া-য়া-য়া-য়া-য়া-য়া-য়া-য়া') + end + end + end end diff --git a/spec/components/validators/external_system_avatars_validator_spec.rb b/spec/components/validators/external_system_avatars_validator_spec.rb new file mode 100644 index 00000000000..1057f20b3f3 --- /dev/null +++ b/spec/components/validators/external_system_avatars_validator_spec.rb @@ -0,0 +1,25 @@ +require 'rails_helper' + +describe ExternalSystemAvatarsValidator do + subject { described_class.new } + + it "disallows disabling external system avatars when Unicode usernames are enabled" do + SiteSetting.unicode_usernames = true + + expect(subject.valid_value?("f")).to eq(false) + expect(subject.error_message).to eq(I18n.t("site_settings.errors.unicode_usernames_avatars")) + + expect(subject.valid_value?("t")).to eq(true) + expect(subject.error_message).to be_blank + end + + it "allows disabling external system avatars when Unicode usernames are disabled" do + SiteSetting.unicode_usernames = false + + expect(subject.valid_value?("t")).to eq(true) + expect(subject.error_message).to be_blank + + expect(subject.valid_value?("f")).to eq(true) + expect(subject.error_message).to be_blank + end +end diff --git a/spec/components/validators/unicode_username_validator_spec.rb b/spec/components/validators/unicode_username_validator_spec.rb new file mode 100644 index 00000000000..d8d8f59cb13 --- /dev/null +++ b/spec/components/validators/unicode_username_validator_spec.rb @@ -0,0 +1,25 @@ +require 'rails_helper' + +describe UnicodeUsernameValidator do + subject { described_class.new } + + it "disallows Unicode usernames when external system avatars are disabled" do + SiteSetting.external_system_avatars_enabled = false + + expect(subject.valid_value?("t")).to eq(false) + expect(subject.error_message).to eq(I18n.t("site_settings.errors.unicode_usernames_avatars")) + + expect(subject.valid_value?("f")).to eq(true) + expect(subject.error_message).to be_blank + end + + it "allows Unicode usernames when external system avatars are enabled" do + SiteSetting.external_system_avatars_enabled = true + + expect(subject.valid_value?("t")).to eq(true) + expect(subject.error_message).to be_blank + + expect(subject.valid_value?("f")).to eq(true) + expect(subject.error_message).to be_blank + end +end diff --git a/spec/components/validators/unicode_username_whitelist_validator_spec.rb b/spec/components/validators/unicode_username_whitelist_validator_spec.rb new file mode 100644 index 00000000000..8176e120483 --- /dev/null +++ b/spec/components/validators/unicode_username_whitelist_validator_spec.rb @@ -0,0 +1,36 @@ +require 'rails_helper' + +describe UnicodeUsernameWhitelistValidator do + subject { described_class.new } + + it "allows an empty whitelist" do + expect(subject.valid_value?("")).to eq(true) + expect(subject.error_message).to be_blank + end + + it "disallows leading and trailing slashes" do + expected_error = I18n.t("site_settings.errors.unicode_username_whitelist.leading_trailing_slash") + + expect(subject.valid_value?("/foo/")).to eq(false) + expect(subject.error_message).to eq(expected_error) + + expect(subject.valid_value?("foo/")).to eq(true) + expect(subject.error_message).to be_blank + + expect(subject.valid_value?("/foo")).to eq(true) + expect(subject.error_message).to be_blank + + expect(subject.valid_value?("f/o/o")).to eq(true) + expect(subject.error_message).to be_blank + + expect(subject.valid_value?("/foo/i")).to eq(false) + expect(subject.error_message).to eq(expected_error) + end + + it "detects invalid regular expressions" do + expected_error = I18n.t("site_settings.errors.unicode_username_whitelist.regex_invalid", error: "") + + expect(subject.valid_value?("\\p{Foo}")).to eq(false) + expect(subject.error_message).to start_with(expected_error) + end +end diff --git a/spec/models/group_spec.rb b/spec/models/group_spec.rb index 5c196e90483..45988094cd6 100644 --- a/spec/models/group_spec.rb +++ b/spec/models/group_spec.rb @@ -844,4 +844,13 @@ describe Group do group = Group.find(group.id) expect(group.flair_url).to eq("fab fa-bandcamp") end + + context "Unicode usernames and group names" do + before { SiteSetting.unicode_usernames = true } + + it "should normalize the name" do + group = Fabricate(:group, name: "Bücherwurm") # NFD + expect(group.name).to eq("Bücherwurm") # NFC + end + end end diff --git a/spec/models/user_spec.rb b/spec/models/user_spec.rb index a2b2d59fad3..94a1fb9536e 100644 --- a/spec/models/user_spec.rb +++ b/spec/models/user_spec.rb @@ -477,13 +477,13 @@ describe User do describe 'username format' do def assert_bad(username) - user = Fabricate.build(:user) + user = Fabricate(:user) user.username = username expect(user.valid?).to eq(false) end def assert_good(username) - user = Fabricate.build(:user) + user = Fabricate(:user) user.username = username expect(user.valid?).to eq(true) end @@ -494,39 +494,78 @@ describe User do assert_good("abcde") end - %w{ first.last - first first-last - _name first_last + context 'when Unicode usernames are disabled' do + before { SiteSetting.unicode_usernames = false } + + %w{ + first.last + first + first-last + _name + first_last mc.hammer_nose UPPERCASE sgif - }.each do |username| - it "allows #{username}" do - assert_good(username) + }.each do |username| + it "allows #{username}" do + assert_good(username) + end + end + + %w{ + traildot. + has\ space + double__underscore + with%symbol + Exclamation! + @twitter + my@email.com + .tester + sa$sy + sam.json + sam.xml + sam.html + sam.htm + sam.js + sam.woff + sam.Png + sam.gif + }.each do |username| + it "disallows #{username}" do + assert_bad(username) + end end end - %w{ - traildot. - has\ space - double__underscore - with%symbol - Exclamation! - @twitter - my@email.com - .tester - sa$sy - sam.json - sam.xml - sam.html - sam.htm - sam.js - sam.woff - sam.Png - sam.gif - }.each do |username| - it "disallows #{username}" do - assert_bad(username) + context 'when Unicode usernames are enabled' do + before { SiteSetting.unicode_usernames = true } + + %w{ + Джофрэй + Джо.фрэй + Джофр-эй + Д.жофрэй + 乔夫雷 + 乔夫_雷 + _乔夫雷 + }.each do |username| + it "allows #{username}" do + assert_good(username) + end + end + + %w{ + .Джофрэй + Джофрэй. + Джо\ фрэй + Джоф__рэй + 乔夫雷.js + 乔夫雷. + 乔夫%雷 + }.each do |username| + it "disallows #{username}" do + assert_bad(username) + end end end end @@ -540,12 +579,12 @@ describe User do it "should not allow saving if username is reused" do @codinghorror.username = @user.username - expect(@codinghorror.save).to eq(false) + expect(@codinghorror.save).to eq(false) end it "should not allow saving if username is reused in different casing" do @codinghorror.username = @user.username.upcase - expect(@codinghorror.save).to eq(false) + expect(@codinghorror.save).to eq(false) end end @@ -585,6 +624,21 @@ describe User do Fabricate(:group, name: 'foo') expect(User.username_available?('Foo')).to eq(false) end + + context "with Unicode usernames enabled" do + before { SiteSetting.unicode_usernames = true } + + it 'returns false when the username is taken, but the Unicode normalization form is different' do + Fabricate(:user, username: "L\u00F6we") # NFC + requested_username = "Lo\u0308we" # NFD + expect(User.username_available?(requested_username)).to eq(false) + end + + it 'returns false when the username is taken and the case differs' do + Fabricate(:user, username: 'LÖWE') + expect(User.username_available?('löwe')).to eq(false) + end + end end describe '.reserved_username?' do @@ -597,7 +651,7 @@ describe User do end it 'should not allow usernames matched against an expession' do - SiteSetting.reserved_usernames = 'test)|*admin*|foo*|*bar|abc.def' + SiteSetting.reserved_usernames = "test)|*admin*|foo*|*bar|abc.def|löwe|ka\u0308fer" expect(User.reserved_username?('test')).to eq(false) expect(User.reserved_username?('abc9def')).to eq(false) @@ -610,6 +664,11 @@ describe User do expect(User.reserved_username?('bar.foo')).to eq(false) expect(User.reserved_username?('foo.bar')).to eq(true) expect(User.reserved_username?('baz.bar')).to eq(true) + + expect(User.reserved_username?('LÖwe')).to eq(true) + expect(User.reserved_username?("Lo\u0308we")).to eq(true) # NFD + expect(User.reserved_username?('löwe')).to eq(true) # NFC + expect(User.reserved_username?('käfer')).to eq(true) # NFC end end @@ -989,6 +1048,14 @@ describe User do expect(found_user).to eq bob end + it 'finds users with Unicode username' do + SiteSetting.unicode_usernames = true + user = Fabricate(:user, username: 'löwe') + + expect(User.find_by_username('LÖWE')).to eq(user) # NFC + expect(User.find_by_username("LO\u0308WE")).to eq(user) # NFD + expect(User.find_by_username("lo\u0308we")).to eq(user) # NFD + end end describe "#new_user_posting_on_first_day?" do @@ -1099,9 +1166,9 @@ describe User do before do Jobs.run_immediately! PostCreator.new(Fabricate(:user), - raw: 'whatever this is a raw post', - topic_id: topic.id, - reply_to_post_number: post.post_number).create + raw: 'whatever this is a raw post', + topic_id: topic.id, + reply_to_post_number: post.post_number).create end it "resets the `posted_too_much` threshold" do @@ -1170,7 +1237,7 @@ describe User do expect(user.small_avatar_url).to eq("//test.localhost/letter_avatar/sam/45/#{LetterAvatar.version}.png") SiteSetting.external_system_avatars_enabled = true - expect(user.small_avatar_url).to eq("//test.localhost/letter_avatar_proxy/v3/letter/s/5f9b8f/45.png") + expect(user.small_avatar_url).to eq("//test.localhost/letter_avatar_proxy/v4/letter/s/5f9b8f/45.png") end end @@ -1193,7 +1260,7 @@ describe User do describe "update_posts_read!" do context "with a UserVisit record" do let!(:user) { Fabricate(:user) } - let!(:now) { Time.zone.now } + let!(:now) { Time.zone.now } before { user.update_last_seen!(now) } it "with existing UserVisit record, increments the posts_read value" do @@ -1301,17 +1368,17 @@ describe User do before do PostCreator.new(Discourse.system_user, - title: "Welcome to our Discourse", - raw: "This is a welcome message", - archetype: Archetype.private_message, - target_usernames: [unactivated_old_with_system_pm.username], + title: "Welcome to our Discourse", + raw: "This is a welcome message", + archetype: Archetype.private_message, + target_usernames: [unactivated_old_with_system_pm.username], ).create PostCreator.new(user, - title: "Welcome to our Discourse", - raw: "This is a welcome message", - archetype: Archetype.private_message, - target_usernames: [unactivated_old_with_human_pm.username], + title: "Welcome to our Discourse", + raw: "This is a welcome message", + archetype: Archetype.private_message, + target_usernames: [unactivated_old_with_human_pm.username], ).create end @@ -1357,10 +1424,10 @@ describe User do let!(:group) { Fabricate(:group, - automatic_membership_email_domains: "bar.com|wat.com", - grant_trust_level: 1, - title: "bars and wats", - primary_group: true + automatic_membership_email_domains: "bar.com|wat.com", + grant_trust_level: 1, + title: "bars and wats", + primary_group: true ) } @@ -1394,10 +1461,10 @@ describe User do it "get attributes from the group" do user = Fabricate.build(:user, - active: true, - trust_level: 0, - email: "foo@bar.com", - password: "strongpassword4Uguys" + active: true, + trust_level: 0, + email: "foo@bar.com", + password: "strongpassword4Uguys" ) user.password_required! @@ -1659,8 +1726,8 @@ describe User do end.first expect(message.data[:recent]).to eq([ - [notification2.id, true], [notification.id, false] - ]) + [notification2.id, true], [notification.id, false] + ]) end end @@ -2015,4 +2082,26 @@ describe User do expect(Discourse.system_user).not_to be_human end end + + context "Unicode username" do + before { SiteSetting.unicode_usernames = true } + + let(:user) { Fabricate(:user, username: "Lo\u0308we") } # NFD + + it "normalizes usernames" do + expect(user.username).to eq("L\u00F6we") # NFC + expect(user.username_lower).to eq("l\u00F6we") # NFC + end + + describe ".username_exists?" do + it "normalizes username before executing query" do + expect(User.username_exists?(user.username)).to eq(true) + expect(User.username_exists?("Lo\u0308we")).to eq(true) # NFD + expect(User.username_exists?("L\u00F6we")).to eq(true) # NFC + expect(User.username_exists?("LO\u0308WE")).to eq(true) # NFD + expect(User.username_exists?("l\u00D6wE")).to eq(true) # NFC + expect(User.username_exists?("foo")).to eq(false) + end + end + end end diff --git a/spec/models/username_validator_spec.rb b/spec/models/username_validator_spec.rb index 700cea48505..e85f613220d 100644 --- a/spec/models/username_validator_spec.rb +++ b/spec/models/username_validator_spec.rb @@ -1,17 +1,205 @@ require 'rails_helper' describe UsernameValidator do - context "#valid_format?" do - it 'returns true when username is both valid and available' do - expect(UsernameValidator.new('Available').valid_format?).to eq true + def expect_valid(*usernames) + usernames.each do |username| + validator = UsernameValidator.new(username) + + aggregate_failures do + expect(validator.valid_format?).to eq(true), "expected '#{username}' to be valid" + expect(validator.errors).to be_empty + end + end + end + + def expect_invalid(*usernames, error_message:) + usernames.each do |username| + validator = UsernameValidator.new(username) + + aggregate_failures do + expect(validator.valid_format?).to eq(false), "expected '#{username}' to be invalid" + expect(validator.errors).to include(error_message) + end + end + end + + shared_examples 'ASCII username' do + it 'is invalid when the username is blank' do + expect_invalid('', error_message: I18n.t(:'user.username.blank')) end - it 'returns true when the username is valid but not available' do - expect(UsernameValidator.new(Fabricate(:user).username).valid_format?).to eq true + it 'is invalid when the username is too short' do + SiteSetting.min_username_length = 4 + + expect_invalid('a', 'ab', 'abc', + error_message: I18n.t(:'user.username.short', min: 4)) end - it 'returns false when the username is not valid' do - expect(UsernameValidator.new('not valid.name').valid_format?).to eq false + it 'is valid when the username has the minimum lenght' do + SiteSetting.min_username_length = 4 + + expect_valid('abcd') + end + + it 'is invalid when the username is too long' do + SiteSetting.max_username_length = 8 + + expect_invalid('abcdefghi', + error_message: I18n.t(:'user.username.long', max: 8)) + end + + it 'is valid when the username has the maximum lenght' do + SiteSetting.max_username_length = 8 + + expect_valid('abcdefgh') + end + + it 'is valid when the username contains alphanumeric characters, dots, underscores and dashes' do + expect_valid('ab-cd.123_ABC-xYz') + end + + it 'is invalid when the username contains non-alphanumeric characters other than dots, underscores and dashes' do + expect_invalid('abc|', 'a#bc', 'abc xyz', + error_message: I18n.t(:'user.username.characters')) + end + + it 'is valid when the username starts with a alphanumeric character or underscore' do + expect_valid('abcd', '1abc', '_abc') + end + + it 'is invalid when the username starts with a dot or dash' do + expect_invalid('.abc', '-abc', + error_message: I18n.t(:'user.username.must_begin_with_alphanumeric_or_underscore')) + end + + it 'is valid when the username ends with a alphanumeric character' do + expect_valid('abcd', 'abc9') + end + + it 'is invalid when the username ends with an underscore, a dot or dash' do + expect_invalid('abc_', 'abc.', 'abc-', + error_message: I18n.t(:'user.username.must_end_with_alphanumeric')) + end + + it 'is invalid when the username contains consecutive underscores, dots or dashes' do + expect_invalid('a__bc', 'a..bc', 'a--bc', + error_message: I18n.t(:'user.username.must_not_contain_two_special_chars_in_seq')) + end + + it 'is invalid when the username ends with certain file extensions' do + expect_invalid('abc.json', 'abc.png', + error_message: I18n.t(:'user.username.must_not_end_with_confusing_suffix')) + end + end + + context 'when Unicode usernames are disabled' do + before { SiteSetting.unicode_usernames = false } + + include_examples 'ASCII username' + + it 'is invalid when the username contains non-ASCII characters except dots, underscores and dashes' do + expect_invalid('abcö', 'abc象', + error_message: I18n.t(:'user.username.characters')) + end + end + + context 'when Unicode usernames are enabled' do + before { SiteSetting.unicode_usernames = true } + + context "ASCII usernames" do + include_examples 'ASCII username' + end + + context "Unicode usernames" do + before { SiteSetting.min_username_length = 1 } + + it 'is invalid when the username is too short' do + SiteSetting.min_username_length = 3 + + expect_invalid('鳥', 'পাখি', + error_message: I18n.t(:'user.username.short', min: 3)) + end + + it 'is valid when the username has the minimum lenght' do + SiteSetting.min_username_length = 2 + + expect_valid('পাখি', 'طائر') + end + + it 'is invalid when the username is too long' do + SiteSetting.max_username_length = 8 + + expect_invalid('חוטב_עצים', 'Holzfäller', + error_message: I18n.t(:'user.username.long', max: 8)) + end + + it 'is valid when the username has the maximum lenght' do + SiteSetting.max_username_length = 9 + + expect_valid('Дровосек', 'چوب-لباسی', 'தமிழ்-தமிழ்') + end + + it 'is invalid when the username has too many Unicode codepoints' do + SiteSetting.max_username_length = 30 + + expect_invalid('য়ায়ায়ায়ায়ায়ায়ায়ায়ায়ায়ায়ায়ায়ায়ায়ায়ায়ায়ায়ায়ায়ায়ায়ায়া', + error_message: I18n.t(:'user.username.too_long')) + end + + it 'is valid when the username contains Unicode letters' do + expect_valid('鳥', 'طائر', 'թռչուն', 'πουλί', 'পাখি', 'madár', '새', + 'پرنده', 'птица', 'fågel', 'นก', 'پرندے', 'ציפור') + end + + it 'is valid when the username contains numbers from the Nd or Nl Unicode category' do + expect_valid('arabic٠١٢٣٤٥٦٧٨٩', 'bengali০১২৩৪৫৬৭৮৯', 'romanⅥ', 'hangzhou〺') + end + + it 'is invalid when the username contains numbers from the No Unicode category' do + expect_invalid('circled㊸', 'fraction¾', + error_message: I18n.t(:'user.username.characters')) + end + + it 'is invalid when the username contains symbols or emojis' do + SiteSetting.min_username_length = 1 + + expect_invalid('©', '⇨', '“', '±', '‿', '😃', '🚗', + error_message: I18n.t(:'user.username.characters')) + end + + it 'is invalid when the username contains zero width join characters' do + expect_invalid('ണ്', 'র্যাম', + error_message: I18n.t(:'user.username.characters')) + end + + it 'is valid when the username ends with a Unicode Mark' do + expect_valid('தமிழ்') + end + + it 'allows all Unicode letters when the whitelist is empty' do + expect_valid('鳥') + end + + context "with Unicode whitelist" do + before { SiteSetting.unicode_username_character_whitelist = "[äöüÄÖÜß]" } + + it 'is invalid when username contains non-whitelisted letters' do + expect_invalid('鳥', 'francès', error_message: I18n.t(:'user.username.characters')) + end + + it 'is valid when username contains only whitelisted letters' do + expect_valid('Löwe', 'Ötzi') + end + + it 'is valid when username contains only ASCII letters and numbers regardless of whitelist' do + expect_valid('a-z_A-Z.0-9') + end + + it 'is valid after resetting the site setting' do + SiteSetting.unicode_username_character_whitelist = "" + expect_valid('鳥') + end + end end end end diff --git a/spec/services/username_changer_spec.rb b/spec/services/username_changer_spec.rb index 15bb760a13f..ef3ce2f9cb1 100644 --- a/spec/services/username_changer_spec.rb +++ b/spec/services/username_changer_spec.rb @@ -97,7 +97,7 @@ describe UsernameChanger do block.call(post) if block - UsernameChanger.change(user, 'bar') + UsernameChanger.change(user, args[:target_username] || 'bar') post.reload end @@ -130,13 +130,13 @@ describe UsernameChanger do expect(post.raw).to eq(".@bar -@bar %@bar _@bar ,@bar ;@bar @@bar") expect(post.cooked).to match_html(<<~HTML) -.@bar - -@bar - %@bar - _@bar - ,@bar - ;@bar - @@bar
+.@bar + -@bar + %@bar + _@bar + ,@bar + ;@bar + @@bar
HTML end @@ -147,38 +147,55 @@ describe UsernameChanger do expect(post.cooked).to eq(%Q()) end - it 'replaces mentions when there are trailing symbols' do - post = create_post_and_change_username(raw: "@foo. @foo, @foo: @foo; @foo-") + it 'replaces Markdown formatted mentions' do + post = create_post_and_change_username(raw: "**@foo** *@foo* _@foo_ ~~@foo~~") - expect(post.raw).to eq("@bar. @bar, @bar: @bar; @bar-") + expect(post.raw).to eq("**@bar** *@bar* _@bar_ ~~@bar~~") expect(post.cooked).to match_html(<<~HTML) -@bar. - @bar, - @bar: - @bar; - @bar-
+ HTML end - it 'does not replace mention when followed by an underscore' do - post = create_post_and_change_username(raw: "@foo_") + it 'replaces mentions when there are trailing symbols' do + post = create_post_and_change_username(raw: "@foo. @foo, @foo: @foo; @foo_ @foo-") - expect(post.raw).to eq("@foo_") - expect(post.cooked).to eq(%Q(@foo_
)) + expect(post.raw).to eq("@bar. @bar, @bar: @bar; @bar_ @bar-") + expect(post.cooked).to match_html(<<~HTML) +@bar. + @bar, + @bar: + @bar; + @bar_ + @bar-
+ HTML + end + + it 'does not replace mention in cooked when mention contains a trailing underscore' do + # Older versions of Discourse detected a trailing underscore as part of a username. + # That doesn't happen anymore, so we need to do create the `cooked` for this test manually. + post = create_post_and_change_username(raw: "@foobar @foo") do |p| + p.update_columns(raw: p.raw.gsub("@foobar", "@foo_"), cooked: p.cooked.gsub("@foobar", "@foo_")) + end + + expect(post.raw).to eq("@bar_ @bar") + expect(post.cooked).to eq(%Q(@foo_ @bar
)) end it 'does not replace mentions when there are leading alphanumeric chars' do - post = create_post_and_change_username(raw: "a@foo 2@foo") + post = create_post_and_change_username(raw: "@foo a@foo 2@foo") - expect(post.raw).to eq("a@foo 2@foo") - expect(post.cooked).to eq(%Q(a@foo 2@foo
)) + expect(post.raw).to eq("@bar a@foo 2@foo") + expect(post.cooked).to eq(%Q(@bar a@foo 2@foo
)) end it 'does not replace username within email address' do - post = create_post_and_change_username(raw: "mail@foo.com") + post = create_post_and_change_username(raw: "@foo mail@foo.com") - expect(post.raw).to eq("mail@foo.com") - expect(post.cooked).to eq(%Q()) + expect(post.raw).to eq("@bar mail@foo.com") + expect(post.cooked).to eq(%Q()) end it 'does not replace username in a mention of a similar username' do @@ -191,11 +208,11 @@ describe UsernameChanger do expect(post.raw).to eq("@bar @foobar @foo-bar @foo_bar @foo1") expect(post.cooked).to match_html(<<~HTML) -@bar - @foobar - @foo-bar - @foo_bar - @foo1
+@bar + @foobar + @foo-bar + @foo_bar + @foo1
HTML end @@ -253,6 +270,44 @@ describe UsernameChanger do expect(post.raw).to eq('@bar and @someuser') expect(post.cooked).to match_html('') end + + context "Unicode usernames" do + before { SiteSetting.unicode_usernames = true } + let(:user) { Fabricate(:user, username: 'թռչուն') } + + it 'it correctly updates mentions' do + post = create_post_and_change_username(raw: "Hello @թռչուն", target_username: 'птица') + + expect(post.raw).to eq("Hello @птица") + expect(post.cooked).to eq(%Q(Hello @птица
)) + end + + it 'does not replace mentions when there are leading alphanumeric chars' do + post = create_post_and_change_username(raw: "Hello @թռչուն 鳥@թռչուն 2@թռչուն ٩@թռչուն", target_username: 'птица') + + expect(post.raw).to eq("Hello @птица 鳥@թռչուն 2@թռչուն ٩@թռչուն") + expect(post.cooked).to eq(%Q(Hello @птица 鳥@թռչուն 2@թռչուն ٩@թռչուն
)) + end + + it 'does not replace username in a mention of a similar username' do + Fabricate(:user, username: 'թռչուն鳥') + Fabricate(:user, username: 'թռչուն-鳥') + Fabricate(:user, username: 'թռչուն_鳥') + Fabricate(:user, username: 'թռչուն٩') + + post = create_post_and_change_username(raw: "@թռչուն @թռչուն鳥 @թռչուն-鳥 @թռչուն_鳥 @թռչուն٩", target_username: 'птица') + + expect(post.raw).to eq("@птица @թռչուն鳥 @թռչուն-鳥 @թռչուն_鳥 @թռչուն٩") + expect(post.cooked).to match_html(<<~HTML) +@птица + @թռչուն鳥 + @թռչուն-鳥 + @թռչուն_鳥 + @թռչուն٩
+ HTML + end + + end end context 'quotes' do diff --git a/test/javascripts/helpers/site-settings.js b/test/javascripts/helpers/site-settings.js index c449c3c0348..6dd997e67c8 100644 --- a/test/javascripts/helpers/site-settings.js +++ b/test/javascripts/helpers/site-settings.js @@ -94,7 +94,8 @@ Discourse.SiteSettingsOriginal = { emoji_set: "emoji_one", desktop_category_page_style: "categories_and_latest_topics", enable_mentions: true, - enable_personal_messages: true + enable_personal_messages: true, + unicode_usernames: false }; Discourse.SiteSettings = jQuery.extend( true, diff --git a/test/javascripts/lib/pretty-text-test.js.es6 b/test/javascripts/lib/pretty-text-test.js.es6 index 9495122a87c..90c503f2c46 100644 --- a/test/javascripts/lib/pretty-text-test.js.es6 +++ b/test/javascripts/lib/pretty-text-test.js.es6 @@ -555,6 +555,48 @@ QUnit.test("Mentions", assert => { 'a @sam c
', "it allows mentions within HTML tags" ); + + assert.cooked( + "@_sam @1sam @ab-cd.123_ABC-xYz @sam1", + '@_sam @1sam @ab-cd.123_ABC-xYz @sam1
', + "it detects mentions of valid usernames" + ); + + assert.cooked( + "@.sam @-sam @sam. @sam_ @sam-", + '@.sam @-sam @sam. @sam_ @sam-
', + "it does not detect mentions of invalid usernames" + ); + + assert.cookedOptions( + "Hello @狮子", + { siteSettings: { unicode_usernames: false } }, + "Hello @狮子
", + "it does not detect mentions of Unicode usernames" + ); +}); + +QUnit.test("Mentions - Unicode usernames enabled", assert => { + assert.cookedOptions( + "Hello @狮子", + { siteSettings: { unicode_usernames: true } }, + 'Hello @狮子
', + "it detects mentions of Unicode usernames" + ); + + assert.cookedOptions( + "@狮子 @_狮子 @1狮子 @狮-ø.١٢٣_Ö-ழ் @狮子1", + { siteSettings: { unicode_usernames: true } }, + '@狮子 @_狮子 @1狮子 @狮-ø.١٢٣_Ö-ழ் @狮子1
', + "it detects mentions of valid Unicode usernames" + ); + + assert.cookedOptions( + "@.狮子 @-狮子 @狮子. @狮子_ @狮子-", + { siteSettings: { unicode_usernames: true } }, + '@.狮子 @-狮子 @狮子. @狮子_ @狮子-
', + "it does not detect mentions of invalid Unicode usernames" + ); }); QUnit.test("Mentions - disabled", assert => {