/*! * twitter-text-js 1.5.0 * * Copyright 2011 Twitter, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this work except in compliance with the License. * You may obtain a copy of the License at: * * http://www.apache.org/licenses/LICENSE-2.0 */ if (typeof window === "undefined" || window === null) { window = { twttr: {} }; } if (window.twttr == null) { window.twttr = {}; } if (typeof twttr === "undefined" || twttr === null) { twttr = {}; } (function() { twttr.txt = {}; twttr.txt.regexen = {}; var HTML_ENTITIES = { '&': '&', '>': '>', '<': '<', '"': '"', "'": ''' }; // HTML escaping twttr.txt.htmlEscape = function(text) { return text && text.replace(/[&"'><]/g, function(character) { return HTML_ENTITIES[character]; }); }; // Builds a RegExp function regexSupplant(regex, flags) { flags = flags || ""; if (typeof regex !== "string") { if (regex.global && flags.indexOf("g") < 0) { flags += "g"; } if (regex.ignoreCase && flags.indexOf("i") < 0) { flags += "i"; } if (regex.multiline && flags.indexOf("m") < 0) { flags += "m"; } regex = regex.source; } return new RegExp(regex.replace(/#\{(\w+)\}/g, function(match, name) { var newRegex = twttr.txt.regexen[name] || ""; if (typeof newRegex !== "string") { newRegex = newRegex.source; } return newRegex; }), flags); } twttr.txt.regexSupplant = regexSupplant; // simple string interpolation function stringSupplant(str, values) { return str.replace(/#\{(\w+)\}/g, function(match, name) { return values[name] || ""; }); } twttr.txt.stringSupplant = stringSupplant; function addCharsToCharClass(charClass, start, end) { var s = String.fromCharCode(start); if (end !== start) { s += "-" + String.fromCharCode(end); } charClass.push(s); return charClass; } twttr.txt.addCharsToCharClass = addCharsToCharClass; // Space is more than %20, U+3000 for example is the full-width space used with Kanji. Provide a short-hand // to access both the list of characters and a pattern suitible for use with String#split // Taken from: ActiveSupport::Multibyte::Handlers::UTF8Handler::UNICODE_WHITESPACE var fromCode = String.fromCharCode; var UNICODE_SPACES = [ fromCode(0x0020), // White_Space # Zs SPACE fromCode(0x0085), // White_Space # Cc fromCode(0x00A0), // White_Space # Zs NO-BREAK SPACE fromCode(0x1680), // White_Space # Zs OGHAM SPACE MARK fromCode(0x180E), // White_Space # Zs MONGOLIAN VOWEL SEPARATOR fromCode(0x2028), // White_Space # Zl LINE SEPARATOR fromCode(0x2029), // White_Space # Zp PARAGRAPH SEPARATOR fromCode(0x202F), // White_Space # Zs NARROW NO-BREAK SPACE fromCode(0x205F), // White_Space # Zs MEDIUM MATHEMATICAL SPACE fromCode(0x3000) // White_Space # Zs IDEOGRAPHIC SPACE ]; addCharsToCharClass(UNICODE_SPACES, 0x009, 0x00D); // White_Space # Cc [5] .. addCharsToCharClass(UNICODE_SPACES, 0x2000, 0x200A); // White_Space # Zs [11] EN QUAD..HAIR SPACE var INVALID_CHARS = [ fromCode(0xFFFE), fromCode(0xFEFF), // BOM fromCode(0xFFFF) // Special ]; addCharsToCharClass(INVALID_CHARS, 0x202A, 0x202E); // Directional change twttr.txt.regexen.spaces_group = regexSupplant(UNICODE_SPACES.join("")); twttr.txt.regexen.spaces = regexSupplant("[" + UNICODE_SPACES.join("") + "]"); twttr.txt.regexen.invalid_chars_group = regexSupplant(INVALID_CHARS.join("")); twttr.txt.regexen.punct = /\!'#%&'\(\)*\+,\\\-\.\/:;<=>\?@\[\]\^_{|}~\$/; var nonLatinHashtagChars = []; // Cyrillic addCharsToCharClass(nonLatinHashtagChars, 0x0400, 0x04ff); // Cyrillic addCharsToCharClass(nonLatinHashtagChars, 0x0500, 0x0527); // Cyrillic Supplement addCharsToCharClass(nonLatinHashtagChars, 0x2de0, 0x2dff); // Cyrillic Extended A addCharsToCharClass(nonLatinHashtagChars, 0xa640, 0xa69f); // Cyrillic Extended B // Hebrew addCharsToCharClass(nonLatinHashtagChars, 0x0591, 0x05bf); // Hebrew addCharsToCharClass(nonLatinHashtagChars, 0x05c1, 0x05c2); addCharsToCharClass(nonLatinHashtagChars, 0x05c4, 0x05c5); addCharsToCharClass(nonLatinHashtagChars, 0x05c7, 0x05c7); addCharsToCharClass(nonLatinHashtagChars, 0x05d0, 0x05ea); addCharsToCharClass(nonLatinHashtagChars, 0x05f0, 0x05f4); addCharsToCharClass(nonLatinHashtagChars, 0xfb12, 0xfb28); // Hebrew Presentation Forms addCharsToCharClass(nonLatinHashtagChars, 0xfb2a, 0xfb36); addCharsToCharClass(nonLatinHashtagChars, 0xfb38, 0xfb3c); addCharsToCharClass(nonLatinHashtagChars, 0xfb3e, 0xfb3e); addCharsToCharClass(nonLatinHashtagChars, 0xfb40, 0xfb41); addCharsToCharClass(nonLatinHashtagChars, 0xfb43, 0xfb44); addCharsToCharClass(nonLatinHashtagChars, 0xfb46, 0xfb4f); // Arabic addCharsToCharClass(nonLatinHashtagChars, 0x0610, 0x061a); // Arabic addCharsToCharClass(nonLatinHashtagChars, 0x0620, 0x065f); addCharsToCharClass(nonLatinHashtagChars, 0x066e, 0x06d3); addCharsToCharClass(nonLatinHashtagChars, 0x06d5, 0x06dc); addCharsToCharClass(nonLatinHashtagChars, 0x06de, 0x06e8); addCharsToCharClass(nonLatinHashtagChars, 0x06ea, 0x06ef); addCharsToCharClass(nonLatinHashtagChars, 0x06fa, 0x06fc); addCharsToCharClass(nonLatinHashtagChars, 0x06ff, 0x06ff); addCharsToCharClass(nonLatinHashtagChars, 0x0750, 0x077f); // Arabic Supplement addCharsToCharClass(nonLatinHashtagChars, 0x08a0, 0x08a0); // Arabic Extended A addCharsToCharClass(nonLatinHashtagChars, 0x08a2, 0x08ac); addCharsToCharClass(nonLatinHashtagChars, 0x08e4, 0x08fe); addCharsToCharClass(nonLatinHashtagChars, 0xfb50, 0xfbb1); // Arabic Pres. Forms A addCharsToCharClass(nonLatinHashtagChars, 0xfbd3, 0xfd3d); addCharsToCharClass(nonLatinHashtagChars, 0xfd50, 0xfd8f); addCharsToCharClass(nonLatinHashtagChars, 0xfd92, 0xfdc7); addCharsToCharClass(nonLatinHashtagChars, 0xfdf0, 0xfdfb); addCharsToCharClass(nonLatinHashtagChars, 0xfe70, 0xfe74); // Arabic Pres. Forms B addCharsToCharClass(nonLatinHashtagChars, 0xfe76, 0xfefc); addCharsToCharClass(nonLatinHashtagChars, 0x200c, 0x200c); // Zero-Width Non-Joiner // Thai addCharsToCharClass(nonLatinHashtagChars, 0x0e01, 0x0e3a); addCharsToCharClass(nonLatinHashtagChars, 0x0e40, 0x0e4e); // Hangul (Korean) addCharsToCharClass(nonLatinHashtagChars, 0x1100, 0x11ff); // Hangul Jamo addCharsToCharClass(nonLatinHashtagChars, 0x3130, 0x3185); // Hangul Compatibility Jamo addCharsToCharClass(nonLatinHashtagChars, 0xA960, 0xA97F); // Hangul Jamo Extended-A addCharsToCharClass(nonLatinHashtagChars, 0xAC00, 0xD7AF); // Hangul Syllables addCharsToCharClass(nonLatinHashtagChars, 0xD7B0, 0xD7FF); // Hangul Jamo Extended-B addCharsToCharClass(nonLatinHashtagChars, 0xFFA1, 0xFFDC); // half-width Hangul // Japanese and Chinese addCharsToCharClass(nonLatinHashtagChars, 0x30A1, 0x30FA); // Katakana (full-width) addCharsToCharClass(nonLatinHashtagChars, 0x30FC, 0x30FE); // Katakana Chouon and iteration marks (full-width) addCharsToCharClass(nonLatinHashtagChars, 0xFF66, 0xFF9F); // Katakana (half-width) addCharsToCharClass(nonLatinHashtagChars, 0xFF70, 0xFF70); // Katakana Chouon (half-width) addCharsToCharClass(nonLatinHashtagChars, 0xFF10, 0xFF19); // \ addCharsToCharClass(nonLatinHashtagChars, 0xFF21, 0xFF3A); // - Latin (full-width) addCharsToCharClass(nonLatinHashtagChars, 0xFF41, 0xFF5A); // / addCharsToCharClass(nonLatinHashtagChars, 0x3041, 0x3096); // Hiragana addCharsToCharClass(nonLatinHashtagChars, 0x3099, 0x309E); // Hiragana voicing and iteration mark addCharsToCharClass(nonLatinHashtagChars, 0x3400, 0x4DBF); // Kanji (CJK Extension A) addCharsToCharClass(nonLatinHashtagChars, 0x4E00, 0x9FFF); // Kanji (Unified) // -- Disabled as it breaks the Regex. //addCharsToCharClass(nonLatinHashtagChars, 0x20000, 0x2A6DF); // Kanji (CJK Extension B) addCharsToCharClass(nonLatinHashtagChars, 0x2A700, 0x2B73F); // Kanji (CJK Extension C) addCharsToCharClass(nonLatinHashtagChars, 0x2B740, 0x2B81F); // Kanji (CJK Extension D) addCharsToCharClass(nonLatinHashtagChars, 0x2F800, 0x2FA1F); // Kanji (CJK supplement) addCharsToCharClass(nonLatinHashtagChars, 0x3003, 0x3003); // Kanji iteration mark addCharsToCharClass(nonLatinHashtagChars, 0x3005, 0x3005); // Kanji iteration mark addCharsToCharClass(nonLatinHashtagChars, 0x303B, 0x303B); // Han iteration mark twttr.txt.regexen.nonLatinHashtagChars = regexSupplant(nonLatinHashtagChars.join("")); var latinAccentChars = []; // Latin accented characters (subtracted 0xD7 from the range, it's a confusable multiplication sign. Looks like "x") addCharsToCharClass(latinAccentChars, 0x00c0, 0x00d6); addCharsToCharClass(latinAccentChars, 0x00d8, 0x00f6); addCharsToCharClass(latinAccentChars, 0x00f8, 0x00ff); // Latin Extended A and B addCharsToCharClass(latinAccentChars, 0x0100, 0x024f); // assorted IPA Extensions addCharsToCharClass(latinAccentChars, 0x0253, 0x0254); addCharsToCharClass(latinAccentChars, 0x0256, 0x0257); addCharsToCharClass(latinAccentChars, 0x0259, 0x0259); addCharsToCharClass(latinAccentChars, 0x025b, 0x025b); addCharsToCharClass(latinAccentChars, 0x0263, 0x0263); addCharsToCharClass(latinAccentChars, 0x0268, 0x0268); addCharsToCharClass(latinAccentChars, 0x026f, 0x026f); addCharsToCharClass(latinAccentChars, 0x0272, 0x0272); addCharsToCharClass(latinAccentChars, 0x0289, 0x0289); addCharsToCharClass(latinAccentChars, 0x028b, 0x028b); // Okina for Hawaiian (it *is* a letter character) addCharsToCharClass(latinAccentChars, 0x02bb, 0x02bb); // Combining diacritics addCharsToCharClass(latinAccentChars, 0x0300, 0x036f); // Latin Extended Additional addCharsToCharClass(latinAccentChars, 0x1e00, 0x1eff); twttr.txt.regexen.latinAccentChars = regexSupplant(latinAccentChars.join("")); // A hashtag must contain characters, numbers and underscores, but not all numbers. twttr.txt.regexen.hashSigns = /[##]/; twttr.txt.regexen.hashtagAlpha = regexSupplant(/[a-z_#{latinAccentChars}#{nonLatinHashtagChars}]/i); twttr.txt.regexen.hashtagAlphaNumeric = regexSupplant(/[a-z0-9_#{latinAccentChars}#{nonLatinHashtagChars}]/i); twttr.txt.regexen.endHashtagMatch = regexSupplant(/^(?:#{hashSigns}|:\/\/)/); twttr.txt.regexen.hashtagBoundary = regexSupplant(/(?:^|$|[^&a-z0-9_#{latinAccentChars}#{nonLatinHashtagChars}])/); twttr.txt.regexen.validHashtag = regexSupplant(/(#{hashtagBoundary})(#{hashSigns})(#{hashtagAlphaNumeric}*#{hashtagAlpha}#{hashtagAlphaNumeric}*)/gi); // Mention related regex collection twttr.txt.regexen.validMentionPrecedingChars = /(?:^|[^a-zA-Z0-9_!#$%&*@@]|RT:?)/; twttr.txt.regexen.atSigns = /[@@]/; twttr.txt.regexen.validMentionOrList = regexSupplant( '(#{validMentionPrecedingChars})' + // $1: Preceding character '(#{atSigns})' + // $2: At mark '([a-zA-Z0-9_]{1,20})' + // $3: Screen name '(\/[a-zA-Z][a-zA-Z0-9_\-]{0,24})?' // $4: List (optional) , 'g'); twttr.txt.regexen.validReply = regexSupplant(/^(?:#{spaces})*#{atSigns}([a-zA-Z0-9_]{1,20})/); twttr.txt.regexen.endMentionMatch = regexSupplant(/^(?:#{atSigns}|[#{latinAccentChars}]|:\/\/)/); // URL related regex collection twttr.txt.regexen.validUrlPrecedingChars = regexSupplant(/(?:[^A-Za-z0-9@@$###{invalid_chars_group}]|^)/); twttr.txt.regexen.invalidUrlWithoutProtocolPrecedingChars = /[-_.\/]$/; twttr.txt.regexen.invalidDomainChars = stringSupplant("#{punct}#{spaces_group}#{invalid_chars_group}", twttr.txt.regexen); twttr.txt.regexen.validDomainChars = regexSupplant(/[^#{invalidDomainChars}]/); twttr.txt.regexen.validSubdomain = regexSupplant(/(?:(?:#{validDomainChars}(?:[_-]|#{validDomainChars})*)?#{validDomainChars}\.)/); twttr.txt.regexen.validDomainName = regexSupplant(/(?:(?:#{validDomainChars}(?:-|#{validDomainChars})*)?#{validDomainChars}\.)/); twttr.txt.regexen.validGTLD = regexSupplant(/(?:(?:aero|asia|biz|cat|com|coop|edu|gov|info|int|jobs|mil|mobi|museum|name|net|org|pro|tel|travel|xxx)(?=[^0-9a-zA-Z]|$))/); twttr.txt.regexen.validCCTLD = regexSupplant(/(?:(?:ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cs|cu|cv|cx|cy|cz|dd|de|dj|dk|dm|do|dz|ec|ee|eg|eh|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|ss|st|su|sv|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|za|zm|zw)(?=[^0-9a-zA-Z]|$))/); twttr.txt.regexen.validPunycode = regexSupplant(/(?:xn--[0-9a-z]+)/); twttr.txt.regexen.validDomain = regexSupplant(/(?:#{validSubdomain}*#{validDomainName}(?:#{validGTLD}|#{validCCTLD}|#{validPunycode}))/); twttr.txt.regexen.validAsciiDomain = regexSupplant(/(?:(?:[a-z0-9#{latinAccentChars}]+)\.)+(?:#{validGTLD}|#{validCCTLD}|#{validPunycode})/gi); twttr.txt.regexen.invalidShortDomain = regexSupplant(/^#{validDomainName}#{validCCTLD}$/); twttr.txt.regexen.validPortNumber = regexSupplant(/[0-9]+/); twttr.txt.regexen.validGeneralUrlPathChars = regexSupplant(/[a-z0-9!\*';:=\+,\.\$\/%#\[\]\-_~|&#{latinAccentChars}]/i); // Allow URL paths to contain balanced parens // 1. Used in Wikipedia URLs like /Primer_(film) // 2. Used in IIS sessions like /S(dfd346)/ twttr.txt.regexen.validUrlBalancedParens = regexSupplant(/\(#{validGeneralUrlPathChars}+\)/i); // Valid end-of-path chracters (so /foo. does not gobble the period). // 1. Allow =&# for empty URL parameters and other URL-join artifacts twttr.txt.regexen.validUrlPathEndingChars = regexSupplant(/[\+\-a-z0-9=_#\/#{latinAccentChars}]|(?:#{validUrlBalancedParens})/i); // Allow @ in a url, but only in the middle. Catch things like http://example.com/@user/ twttr.txt.regexen.validUrlPath = regexSupplant('(?:' + '(?:' + '#{validGeneralUrlPathChars}*' + '(?:#{validUrlBalancedParens}#{validGeneralUrlPathChars}*)*' + '#{validUrlPathEndingChars}'+ ')|(?:@#{validGeneralUrlPathChars}+\/)'+ ')', 'i'); twttr.txt.regexen.validUrlQueryChars = /[a-z0-9!?\*'\(\);:&=\+\$\/%#\[\]\-_\.,~|]/i; twttr.txt.regexen.validUrlQueryEndingChars = /[a-z0-9_&=#\/]/i; twttr.txt.regexen.extractUrl = regexSupplant( '(' + // $1 total match '(#{validUrlPrecedingChars})' + // $2 Preceeding chracter '(' + // $3 URL '(https?:\\/\\/)?' + // $4 Protocol (optional) '(#{validDomain})' + // $5 Domain(s) '(?::(#{validPortNumber}))?' + // $6 Port number (optional) '(\\/#{validUrlPath}*)?' + // $7 URL Path '(\\?#{validUrlQueryChars}*#{validUrlQueryEndingChars})?' + // $8 Query String ')' + ')' , 'gi'); twttr.txt.regexen.validTcoUrl = /^https?:\/\/t\.co\/[a-z0-9]+/i; // cashtag related regex twttr.txt.regexen.cashtag = /[a-z]{1,6}(?:[._][a-z]{1,2})?/i; twttr.txt.regexen.validCashtag = regexSupplant('(?:^|#{spaces})\\$(#{cashtag})(?=$|\\s|[#{punct}])', 'gi'); // These URL validation pattern strings are based on the ABNF from RFC 3986 twttr.txt.regexen.validateUrlUnreserved = /[a-z0-9\-._~]/i; twttr.txt.regexen.validateUrlPctEncoded = /(?:%[0-9a-f]{2})/i; twttr.txt.regexen.validateUrlSubDelims = /[!$&'()*+,;=]/i; twttr.txt.regexen.validateUrlPchar = regexSupplant('(?:' + '#{validateUrlUnreserved}|' + '#{validateUrlPctEncoded}|' + '#{validateUrlSubDelims}|' + '[:|@]' + ')', 'i'); twttr.txt.regexen.validateUrlScheme = /(?:[a-z][a-z0-9+\-.]*)/i; twttr.txt.regexen.validateUrlUserinfo = regexSupplant('(?:' + '#{validateUrlUnreserved}|' + '#{validateUrlPctEncoded}|' + '#{validateUrlSubDelims}|' + ':' + ')*', 'i'); twttr.txt.regexen.validateUrlDecOctet = /(?:[0-9]|(?:[1-9][0-9])|(?:1[0-9]{2})|(?:2[0-4][0-9])|(?:25[0-5]))/i; twttr.txt.regexen.validateUrlIpv4 = regexSupplant(/(?:#{validateUrlDecOctet}(?:\.#{validateUrlDecOctet}){3})/i); // Punting on real IPv6 validation for now twttr.txt.regexen.validateUrlIpv6 = /(?:\[[a-f0-9:\.]+\])/i; // Also punting on IPvFuture for now twttr.txt.regexen.validateUrlIp = regexSupplant('(?:' + '#{validateUrlIpv4}|' + '#{validateUrlIpv6}' + ')', 'i'); // This is more strict than the rfc specifies twttr.txt.regexen.validateUrlSubDomainSegment = /(?:[a-z0-9](?:[a-z0-9_\-]*[a-z0-9])?)/i; twttr.txt.regexen.validateUrlDomainSegment = /(?:[a-z0-9](?:[a-z0-9\-]*[a-z0-9])?)/i; twttr.txt.regexen.validateUrlDomainTld = /(?:[a-z](?:[a-z0-9\-]*[a-z0-9])?)/i; twttr.txt.regexen.validateUrlDomain = regexSupplant(/(?:(?:#{validateUrlSubDomainSegment]}\.)*(?:#{validateUrlDomainSegment]}\.)#{validateUrlDomainTld})/i); twttr.txt.regexen.validateUrlHost = regexSupplant('(?:' + '#{validateUrlIp}|' + '#{validateUrlDomain}' + ')', 'i'); // Unencoded internationalized domains - this doesn't check for invalid UTF-8 sequences twttr.txt.regexen.validateUrlUnicodeSubDomainSegment = /(?:(?:[a-z0-9]|[^\u0000-\u007f])(?:(?:[a-z0-9_\-]|[^\u0000-\u007f])*(?:[a-z0-9]|[^\u0000-\u007f]))?)/i; twttr.txt.regexen.validateUrlUnicodeDomainSegment = /(?:(?:[a-z0-9]|[^\u0000-\u007f])(?:(?:[a-z0-9\-]|[^\u0000-\u007f])*(?:[a-z0-9]|[^\u0000-\u007f]))?)/i; twttr.txt.regexen.validateUrlUnicodeDomainTld = /(?:(?:[a-z]|[^\u0000-\u007f])(?:(?:[a-z0-9\-]|[^\u0000-\u007f])*(?:[a-z0-9]|[^\u0000-\u007f]))?)/i; twttr.txt.regexen.validateUrlUnicodeDomain = regexSupplant(/(?:(?:#{validateUrlUnicodeSubDomainSegment}\.)*(?:#{validateUrlUnicodeDomainSegment}\.)#{validateUrlUnicodeDomainTld})/i); twttr.txt.regexen.validateUrlUnicodeHost = regexSupplant('(?:' + '#{validateUrlIp}|' + '#{validateUrlUnicodeDomain}' + ')', 'i'); twttr.txt.regexen.validateUrlPort = /[0-9]{1,5}/; twttr.txt.regexen.validateUrlUnicodeAuthority = regexSupplant( '(?:(#{validateUrlUserinfo})@)?' + // $1 userinfo '(#{validateUrlUnicodeHost})' + // $2 host '(?::(#{validateUrlPort}))?' //$3 port , "i"); twttr.txt.regexen.validateUrlAuthority = regexSupplant( '(?:(#{validateUrlUserinfo})@)?' + // $1 userinfo '(#{validateUrlHost})' + // $2 host '(?::(#{validateUrlPort}))?' // $3 port , "i"); twttr.txt.regexen.validateUrlPath = regexSupplant(/(\/#{validateUrlPchar}*)*/i); twttr.txt.regexen.validateUrlQuery = regexSupplant(/(#{validateUrlPchar}|\/|\?)*/i); twttr.txt.regexen.validateUrlFragment = regexSupplant(/(#{validateUrlPchar}|\/|\?)*/i); // Modified version of RFC 3986 Appendix B twttr.txt.regexen.validateUrlUnencoded = regexSupplant( '^' + // Full URL '(?:' + '([^:/?#]+):\\/\\/' + // $1 Scheme ')?' + '([^/?#]*)' + // $2 Authority '([^?#]*)' + // $3 Path '(?:' + '\\?([^#]*)' + // $4 Query ')?' + '(?:' + '#(.*)' + // $5 Fragment ')?$' , "i"); // Default CSS class for auto-linked lists (along with the url class) var DEFAULT_LIST_CLASS = "tweet-url list-slug"; // Default CSS class for auto-linked usernames (along with the url class) var DEFAULT_USERNAME_CLASS = "tweet-url username"; // Default CSS class for auto-linked hashtags (along with the url class) var DEFAULT_HASHTAG_CLASS = "tweet-url hashtag"; // Default CSS class for auto-linked cashtags (along with the url class) var DEFAULT_CASHTAG_CLASS = "tweet-url cashtag"; // Options which should not be passed as HTML attributes var OPTIONS_NOT_ATTRIBUTES = {'urlClass':true, 'listClass':true, 'usernameClass':true, 'hashtagClass':true, 'cashtagClass':true, 'usernameUrlBase':true, 'listUrlBase':true, 'hashtagUrlBase':true, 'cashtagUrlBase':true, 'usernameUrlBlock':true, 'listUrlBlock':true, 'hashtagUrlBlock':true, 'linkUrlBlock':true, 'usernameIncludeSymbol':true, 'suppressLists':true, 'suppressNoFollow':true, 'suppressDataScreenName':true, 'urlEntities':true, 'symbolTag':true, 'textWithSymbolTag':true, 'urlTarget':true, 'invisibleTagAttrs':true, 'linkAttributeBlock':true, 'linkTextBlock': true }; var BOOLEAN_ATTRIBUTES = {'disabled':true, 'readonly':true, 'multiple':true, 'checked':true}; // Simple object cloning function for simple objects function clone(o) { var r = {}; for (var k in o) { if (o.hasOwnProperty(k)) { r[k] = o[k]; } } return r; } twttr.txt.tagAttrs = function(attributes) { var htmlAttrs = ""; for (var k in attributes) { var v = attributes[k]; if (BOOLEAN_ATTRIBUTES[k]) { v = v ? k : null; } if (v == null) continue; htmlAttrs += " " + twttr.txt.htmlEscape(k) + "=\"" + twttr.txt.htmlEscape(v.toString()) + "\""; } return htmlAttrs; }; twttr.txt.linkToText = function(entity, text, attributes, options) { if (!options.suppressNoFollow) { attributes.rel = "nofollow"; } // if linkAttributeBlock is specified, call it to modify the attributes if (options.linkAttributeBlock) { options.linkAttributeBlock(entity, attributes); } // if linkTextBlock is specified, call it to get a new/modified link text if (options.linkTextBlock) { text = options.linkTextBlock(entity, text); } var d = { text: text, attr: twttr.txt.tagAttrs(attributes) }; return stringSupplant("#{text}", d); }; twttr.txt.linkToTextWithSymbol = function(entity, symbol, text, attributes, options) { var taggedSymbol = options.symbolTag ? "<" + options.symbolTag + ">" + symbol + "" : symbol; text = twttr.txt.htmlEscape(text); var taggedText = options.textWithSymbolTag ? "<" + options.textWithSymbolTag + ">" + text + "" : text; if (options.usernameIncludeSymbol || !symbol.match(twttr.txt.regexen.atSigns)) { return twttr.txt.linkToText(entity, taggedSymbol + taggedText, attributes, options); } else { return taggedSymbol + twttr.txt.linkToText(entity, taggedText, attributes, options); } }; twttr.txt.linkToHashtag = function(entity, text, options) { var hash = text.substring(entity.indices[0], entity.indices[0] + 1); var hashtag = twttr.txt.htmlEscape(entity.hashtag); var attrs = clone(options.htmlAttrs || {}); attrs.href = options.hashtagUrlBase + hashtag; attrs.title = "#" + hashtag; attrs["class"] = options.hashtagClass; return twttr.txt.linkToTextWithSymbol(entity, hash, hashtag, attrs, options); }; twttr.txt.linkToCashtag = function(entity, text, options) { var cashtag = twttr.txt.htmlEscape(entity.cashtag); var attrs = clone(options.htmlAttrs || {}); attrs.href = options.cashtagUrlBase + cashtag; attrs.title = "$" + cashtag; attrs["class"] = options.cashtagClass; return twttr.txt.linkToTextWithSymbol(entity, "$", cashtag, attrs, options); }; twttr.txt.linkToMentionAndList = function(entity, text, options) { var at = text.substring(entity.indices[0], entity.indices[0] + 1); var user = twttr.txt.htmlEscape(entity.screenName); var slashListname = twttr.txt.htmlEscape(entity.listSlug); var isList = entity.listSlug && !options.suppressLists; var attrs = clone(options.htmlAttrs || {}); attrs["class"] = (isList ? options.listClass : options.usernameClass); attrs.href = isList ? options.listUrlBase + user + slashListname : options.usernameUrlBase + user; if (!isList && !options.suppressDataScreenName) { attrs['data-screen-name'] = user; } return twttr.txt.linkToTextWithSymbol(entity, at, isList ? user + slashListname : user, attrs, options); }; twttr.txt.linkToUrl = function(entity, text, options) { var url = entity.url; var displayUrl = url; var linkText = twttr.txt.htmlEscape(displayUrl); // If the caller passed a urlEntities object (provided by a Twitter API // response with include_entities=true), we use that to render the display_url // for each URL instead of it's underlying t.co URL. var urlEntity = (options.urlEntities && options.urlEntities[url]) || entity; if (urlEntity.display_url) { linkText = twttr.txt.linkTextWithEntity(urlEntity, options); } var attrs = clone(options.htmlAttrs || {}); attrs.href = url; // set class only if urlClass is specified. if (options.urlClass) { attrs["class"] = options.urlClass; } // set target only if urlTarget is specified. if (options.urlTarget) { attrs.target = options.urlTarget; } if (!options.title && urlEntity.display_url) { attrs.title = urlEntity.expanded_url; } return twttr.txt.linkToText(entity, linkText, attrs, options); }; twttr.txt.linkTextWithEntity = function (entity, options) { var displayUrl = entity.display_url; var expandedUrl = entity.expanded_url; // Goal: If a user copies and pastes a tweet containing t.co'ed link, the resulting paste // should contain the full original URL (expanded_url), not the display URL. // // Method: Whenever possible, we actually emit HTML that contains expanded_url, and use // font-size:0 to hide those parts that should not be displayed (because they are not part of display_url). // Elements with font-size:0 get copied even though they are not visible. // Note that display:none doesn't work here. Elements with display:none don't get copied. // // Additionally, we want to *display* ellipses, but we don't want them copied. To make this happen we // wrap the ellipses in a tco-ellipsis class and provide an onCopy handler that sets display:none on // everything with the tco-ellipsis class. // // Exception: pic.twitter.com images, for which expandedUrl = "https://twitter.com/#!/username/status/1234/photo/1 // For those URLs, display_url is not a substring of expanded_url, so we don't do anything special to render the elided parts. // For a pic.twitter.com URL, the only elided part will be the "https://", so this is fine. var displayUrlSansEllipses = displayUrl.replace(/…/g, ""); // We have to disregard ellipses for matching // Note: we currently only support eliding parts of the URL at the beginning or the end. // Eventually we may want to elide parts of the URL in the *middle*. If so, this code will // become more complicated. We will probably want to create a regexp out of display URL, // replacing every ellipsis with a ".*". if (expandedUrl.indexOf(displayUrlSansEllipses) != -1) { var displayUrlIndex = expandedUrl.indexOf(displayUrlSansEllipses); var v = { displayUrlSansEllipses: displayUrlSansEllipses, // Portion of expandedUrl that precedes the displayUrl substring beforeDisplayUrl: expandedUrl.substr(0, displayUrlIndex), // Portion of expandedUrl that comes after displayUrl afterDisplayUrl: expandedUrl.substr(displayUrlIndex + displayUrlSansEllipses.length), precedingEllipsis: displayUrl.match(/^…/) ? "…" : "", followingEllipsis: displayUrl.match(/…$/) ? "…" : "" }; for (var k in v) { if (v.hasOwnProperty(k)) { v[k] = twttr.txt.htmlEscape(v[k]); } } // As an example: The user tweets "hi http://longdomainname.com/foo" // This gets shortened to "hi http://t.co/xyzabc", with display_url = "…nname.com/foo" // This will get rendered as: // // … // // http://longdomai // // // nname.com/foo // // //   // … // v['invisible'] = options.invisibleTagAttrs; return stringSupplant("#{precedingEllipsis} #{beforeDisplayUrl}#{displayUrlSansEllipses}#{afterDisplayUrl} #{followingEllipsis}", v); } return displayUrl; }; twttr.txt.autoLinkEntities = function(text, entities, options) { options = clone(options || {}); options.hashtagClass = options.hashtagClass || DEFAULT_HASHTAG_CLASS; options.hashtagUrlBase = options.hashtagUrlBase || "https://twitter.com/#!/search?q=%23"; options.cashtagClass = options.cashtagClass || DEFAULT_CASHTAG_CLASS; options.cashtagUrlBase = options.cashtagUrlBase || "https://twitter.com/#!/search?q=%24"; options.listClass = options.listClass || DEFAULT_LIST_CLASS; options.usernameClass = options.usernameClass || DEFAULT_USERNAME_CLASS; options.usernameUrlBase = options.usernameUrlBase || "https://twitter.com/"; options.listUrlBase = options.listUrlBase || "https://twitter.com/"; options.htmlAttrs = twttr.txt.extractHtmlAttrsFromOptions(options); options.invisibleTagAttrs = options.invisibleTagAttrs || "style='position:absolute;left:-9999px;'"; // remap url entities to hash var urlEntities, i, len; if(options.urlEntities) { urlEntities = {}; for(i = 0, len = options.urlEntities.length; i < len; i++) { urlEntities[options.urlEntities[i].url] = options.urlEntities[i]; } options.urlEntities = urlEntities; } var result = ""; var beginIndex = 0; // sort entities by start index entities.sort(function(a,b){ return a.indices[0] - b.indices[0]; }); for (var i = 0; i < entities.length; i++) { var entity = entities[i]; result += text.substring(beginIndex, entity.indices[0]); if (entity.url) { result += twttr.txt.linkToUrl(entity, text, options); } else if (entity.hashtag) { result += twttr.txt.linkToHashtag(entity, text, options); } else if (entity.screenName) { result += twttr.txt.linkToMentionAndList(entity, text, options); } else if (entity.cashtag) { result += twttr.txt.linkToCashtag(entity, text, options); } beginIndex = entity.indices[1]; } result += text.substring(beginIndex, text.length); return result; }; twttr.txt.autoLinkWithJSON = function(text, json, options) { // concatenate all entities var entities = []; for (var key in json) { entities = entities.concat(json[key]); } // map JSON entity to twitter-text entity for (var i = 0; i < entities.length; i++) { entity = entities[i]; if (entity.screen_name) { // this is @mention entity.screenName = entity.screen_name; } else if (entity.text) { // this is #hashtag entity.hashtag = entity.text; } } // modify indices to UTF-16 twttr.txt.modifyIndicesFromUnicodeToUTF16(text, entities); return twttr.txt.autoLinkEntities(text, entities, options); }; twttr.txt.extractHtmlAttrsFromOptions = function(options) { var htmlAttrs = {}; for (var k in options) { var v = options[k]; if (OPTIONS_NOT_ATTRIBUTES[k]) continue; if (BOOLEAN_ATTRIBUTES[k]) { v = v ? k : null; } if (v == null) continue; htmlAttrs[k] = v; } return htmlAttrs; }; twttr.txt.autoLink = function(text, options) { var entities = twttr.txt.extractEntitiesWithIndices(text, {extractUrlWithoutProtocol: false}); return twttr.txt.autoLinkEntities(text, entities, options); }; twttr.txt.autoLinkUsernamesOrLists = function(text, options) { var entities = twttr.txt.extractMentionsOrListsWithIndices(text); return twttr.txt.autoLinkEntities(text, entities, options); }; twttr.txt.autoLinkHashtags = function(text, options) { var entities = twttr.txt.extractHashtagsWithIndices(text); return twttr.txt.autoLinkEntities(text, entities, options); }; twttr.txt.autoLinkCashtags = function(text, options) { var entities = twttr.txt.extractCashtagsWithIndices(text); return twttr.txt.autoLinkEntities(text, entities, options); }; twttr.txt.autoLinkUrlsCustom = function(text, options) { var entities = twttr.txt.extractUrlsWithIndices(text, {extractUrlWithoutProtocol: false}); return twttr.txt.autoLinkEntities(text, entities, options); }; twttr.txt.removeOverlappingEntities = function(entities) { entities.sort(function(a,b){ return a.indices[0] - b.indices[0]; }); var prev = entities[0]; for (var i = 1; i < entities.length; i++) { if (prev.indices[1] > entities[i].indices[0]) { entities.splice(i, 1); i--; } else { prev = entities[i]; } } }; twttr.txt.extractEntitiesWithIndices = function(text, options) { var entities = twttr.txt.extractUrlsWithIndices(text, options) .concat(twttr.txt.extractMentionsOrListsWithIndices(text)) .concat(twttr.txt.extractHashtagsWithIndices(text, {checkUrlOverlap: false})) .concat(twttr.txt.extractCashtagsWithIndices(text)); if (entities.length == 0) { return []; } twttr.txt.removeOverlappingEntities(entities); return entities; }; twttr.txt.extractMentions = function(text) { var screenNamesOnly = [], screenNamesWithIndices = twttr.txt.extractMentionsWithIndices(text); for (var i = 0; i < screenNamesWithIndices.length; i++) { var screenName = screenNamesWithIndices[i].screenName; screenNamesOnly.push(screenName); } return screenNamesOnly; }; twttr.txt.extractMentionsWithIndices = function(text) { var mentions = []; var mentionsOrLists = twttr.txt.extractMentionsOrListsWithIndices(text); for (var i = 0 ; i < mentionsOrLists.length; i++) { mentionOrList = mentionsOrLists[i]; if (mentionOrList.listSlug == '') { mentions.push({ screenName: mentionOrList.screenName, indices: mentionOrList.indices }); } } return mentions; }; /** * Extract list or user mentions. * (Presence of listSlug indicates a list) */ twttr.txt.extractMentionsOrListsWithIndices = function(text) { if (!text || !text.match(twttr.txt.regexen.atSigns)) { return []; } var possibleNames = [], position = 0; text.replace(twttr.txt.regexen.validMentionOrList, function(match, before, atSign, screenName, slashListname, offset, chunk) { var after = chunk.slice(offset + match.length); if (!after.match(twttr.txt.regexen.endMentionMatch)) { slashListname = slashListname || ''; var startPosition = text.indexOf(atSign + screenName + slashListname, position); position = startPosition + screenName.length + slashListname.length + 1; possibleNames.push({ screenName: screenName, listSlug: slashListname, indices: [startPosition, position] }); } }); return possibleNames; }; twttr.txt.extractReplies = function(text) { if (!text) { return null; } var possibleScreenName = text.match(twttr.txt.regexen.validReply); if (!possibleScreenName || RegExp.rightContext.match(twttr.txt.regexen.endMentionMatch)) { return null; } return possibleScreenName[1]; }; twttr.txt.extractUrls = function(text, options) { var urlsOnly = [], urlsWithIndices = twttr.txt.extractUrlsWithIndices(text, options); for (var i = 0; i < urlsWithIndices.length; i++) { urlsOnly.push(urlsWithIndices[i].url); } return urlsOnly; }; twttr.txt.extractUrlsWithIndices = function(text, options) { if (!options) { options = {extractUrlsWithoutProtocol: true}; } if (!text || (options.extractUrlsWithoutProtocol ? !text.match(/\./) : !text.match(/:/))) { return []; } var urls = []; while (twttr.txt.regexen.extractUrl.exec(text)) { var before = RegExp.$2, url = RegExp.$3, protocol = RegExp.$4, domain = RegExp.$5, path = RegExp.$7; var endPosition = twttr.txt.regexen.extractUrl.lastIndex, startPosition = endPosition - url.length; // if protocol is missing and domain contains non-ASCII characters, // extract ASCII-only domains. if (!protocol) { if (!options.extractUrlsWithoutProtocol || before.match(twttr.txt.regexen.invalidUrlWithoutProtocolPrecedingChars)) { continue; } var lastUrl = null, lastUrlInvalidMatch = false, asciiEndPosition = 0; domain.replace(twttr.txt.regexen.validAsciiDomain, function(asciiDomain) { var asciiStartPosition = domain.indexOf(asciiDomain, asciiEndPosition); asciiEndPosition = asciiStartPosition + asciiDomain.length; lastUrl = { url: asciiDomain, indices: [startPosition + asciiStartPosition, startPosition + asciiEndPosition] }; lastUrlInvalidMatch = asciiDomain.match(twttr.txt.regexen.invalidShortDomain); if (!lastUrlInvalidMatch) { urls.push(lastUrl); } }); // no ASCII-only domain found. Skip the entire URL. if (lastUrl == null) { continue; } // lastUrl only contains domain. Need to add path and query if they exist. if (path) { if (lastUrlInvalidMatch) { urls.push(lastUrl); } lastUrl.url = url.replace(domain, lastUrl.url); lastUrl.indices[1] = endPosition; } } else { // In the case of t.co URLs, don't allow additional path characters. if (url.match(twttr.txt.regexen.validTcoUrl)) { url = RegExp.lastMatch; endPosition = startPosition + url.length; } urls.push({ url: url, indices: [startPosition, endPosition] }); } } return urls; }; twttr.txt.extractHashtags = function(text) { var hashtagsOnly = [], hashtagsWithIndices = twttr.txt.extractHashtagsWithIndices(text); for (var i = 0; i < hashtagsWithIndices.length; i++) { hashtagsOnly.push(hashtagsWithIndices[i].hashtag); } return hashtagsOnly; }; twttr.txt.extractHashtagsWithIndices = function(text, options) { if (!options) { options = {checkUrlOverlap: true}; } if (!text || !text.match(twttr.txt.regexen.hashSigns)) { return []; } var tags = [], position = 0; text.replace(twttr.txt.regexen.validHashtag, function(match, before, hash, hashText, offset, chunk) { var after = chunk.slice(offset + match.length); if (after.match(twttr.txt.regexen.endHashtagMatch)) return; var startPosition = text.indexOf(hash + hashText, position); position = startPosition + hashText.length + 1; tags.push({ hashtag: hashText, indices: [startPosition, position] }); }); if (options.checkUrlOverlap) { // also extract URL entities var urls = twttr.txt.extractUrlsWithIndices(text); if (urls.length > 0) { var entities = tags.concat(urls); // remove overlap twttr.txt.removeOverlappingEntities(entities); // only push back hashtags tags = []; for (var i = 0; i < entities.length; i++) { if (entities[i].hashtag) { tags.push(entities[i]); } } } } return tags; }; twttr.txt.extractCashtags = function(text) { var cashtagsOnly = [], cashtagsWithIndices = twttr.txt.extractCashtagsWithIndices(text); for (var i = 0; i < cashtagsWithIndices.length; i++) { cashtagsOnly.push(cashtagsWithIndices[i].cashtag); } return cashtagsOnly; }; twttr.txt.extractCashtagsWithIndices = function(text) { if (!text || text.indexOf("$") == -1) { return []; } var tags = [], position = 0; text.replace(twttr.txt.regexen.validCashtag, function(match, cashtag, offset, chunk) { // cashtag doesn't contain $ sign, so need to decrement index by 1. var startPosition = text.indexOf(cashtag, position) - 1; position = startPosition + cashtag.length + 1; tags.push({ cashtag: cashtag, indices: [startPosition, position] }); }); return tags; }; twttr.txt.modifyIndicesFromUnicodeToUTF16 = function(text, entities) { twttr.txt.convertUnicodeIndices(text, entities, false); }; twttr.txt.modifyIndicesFromUTF16ToUnicode = function(text, entities) { twttr.txt.convertUnicodeIndices(text, entities, true); }; twttr.txt.convertUnicodeIndices = function(text, entities, indicesInUTF16) { if (entities.length == 0) { return; } var charIndex = 0; var codePointIndex = 0; // sort entities by start index entities.sort(function(a,b){ return a.indices[0] - b.indices[0]; }); var entityIndex = 0; var entity = entities[0]; while (charIndex < text.length) { if (entity.indices[0] == (indicesInUTF16 ? charIndex : codePointIndex)) { var len = entity.indices[1] - entity.indices[0]; entity.indices[0] = indicesInUTF16 ? codePointIndex : charIndex; entity.indices[1] = entity.indices[0] + len; entityIndex++; if (entityIndex == entities.length) { // no more entity break; } entity = entities[entityIndex]; } var c = text.charCodeAt(charIndex); if (0xD800 <= c && c <= 0xDBFF && charIndex < text.length - 1) { // Found high surrogate char c = text.charCodeAt(charIndex + 1); if (0xDC00 <= c && c <= 0xDFFF) { // Found surrogate pair charIndex++; } } codePointIndex++; charIndex++; } }; // this essentially does text.split(/<|>/) // except that won't work in IE, where empty strings are ommitted // so "<>".split(/<|>/) => [] in IE, but is ["", "", ""] in all others // but "<<".split("<") => ["", "", ""] twttr.txt.splitTags = function(text) { var firstSplits = text.split("<"), secondSplits, allSplits = [], split; for (var i = 0; i < firstSplits.length; i += 1) { split = firstSplits[i]; if (!split) { allSplits.push(""); } else { secondSplits = split.split(">"); for (var j = 0; j < secondSplits.length; j += 1) { allSplits.push(secondSplits[j]); } } } return allSplits; }; twttr.txt.hitHighlight = function(text, hits, options) { var defaultHighlightTag = "em"; hits = hits || []; options = options || {}; if (hits.length === 0) { return text; } var tagName = options.tag || defaultHighlightTag, tags = ["<" + tagName + ">", ""], chunks = twttr.txt.splitTags(text), i, j, result = "", chunkIndex = 0, chunk = chunks[0], prevChunksLen = 0, chunkCursor = 0, startInChunk = false, chunkChars = chunk, flatHits = [], index, hit, tag, placed, hitSpot; for (i = 0; i < hits.length; i += 1) { for (j = 0; j < hits[i].length; j += 1) { flatHits.push(hits[i][j]); } } for (index = 0; index < flatHits.length; index += 1) { hit = flatHits[index]; tag = tags[index % 2]; placed = false; while (chunk != null && hit >= prevChunksLen + chunk.length) { result += chunkChars.slice(chunkCursor); if (startInChunk && hit === prevChunksLen + chunkChars.length) { result += tag; placed = true; } if (chunks[chunkIndex + 1]) { result += "<" + chunks[chunkIndex + 1] + ">"; } prevChunksLen += chunkChars.length; chunkCursor = 0; chunkIndex += 2; chunk = chunks[chunkIndex]; chunkChars = chunk; startInChunk = false; } if (!placed && chunk != null) { hitSpot = hit - prevChunksLen; result += chunkChars.slice(chunkCursor, hitSpot) + tag; chunkCursor = hitSpot; if (index % 2 === 0) { startInChunk = true; } else { startInChunk = false; } } else if(!placed) { placed = true; result += tag; } } if (chunk != null) { if (chunkCursor < chunkChars.length) { result += chunkChars.slice(chunkCursor); } for (index = chunkIndex + 1; index < chunks.length; index += 1) { result += (index % 2 === 0 ? chunks[index] : "<" + chunks[index] + ">"); } } return result; }; var MAX_LENGTH = 140; // Characters not allowed in Tweets var INVALID_CHARACTERS = [ // BOM fromCode(0xFFFE), fromCode(0xFEFF), // Special fromCode(0xFFFF), // Directional Change fromCode(0x202A), fromCode(0x202B), fromCode(0x202C), fromCode(0x202D), fromCode(0x202E) ]; // Returns the length of Tweet text with consideration to t.co URL replacement twttr.txt.getTweetLength = function(text, options) { if (!options) { options = { short_url_length: 20, short_url_length_https: 21 }; } var textLength = text.length; var urlsWithIndices = twttr.txt.extractUrlsWithIndices(text); for (var i = 0; i < urlsWithIndices.length; i++) { // Subtract the length of the original URL textLength += urlsWithIndices[i].indices[0] - urlsWithIndices[i].indices[1]; // Add 21 characters for URL starting with https:// // Otherwise add 20 characters if (urlsWithIndices[i].url.toLowerCase().match(/^https:\/\//)) { textLength += options.short_url_length_https; } else { textLength += options.short_url_length; } } return textLength; }; // Check the text for any reason that it may not be valid as a Tweet. This is meant as a pre-validation // before posting to api.twitter.com. There are several server-side reasons for Tweets to fail but this pre-validation // will allow quicker feedback. // // Returns false if this text is valid. Otherwise one of the following strings will be returned: // // "too_long": if the text is too long // "empty": if the text is nil or empty // "invalid_characters": if the text contains non-Unicode or any of the disallowed Unicode characters twttr.txt.isInvalidTweet = function(text) { if (!text) { return "empty"; } // Determine max length independent of URL length if (twttr.txt.getTweetLength(text) > MAX_LENGTH) { return "too_long"; } for (var i = 0; i < INVALID_CHARACTERS.length; i++) { if (text.indexOf(INVALID_CHARACTERS[i]) >= 0) { return "invalid_characters"; } } return false; }; twttr.txt.isValidTweetText = function(text) { return !twttr.txt.isInvalidTweet(text); }; twttr.txt.isValidUsername = function(username) { if (!username) { return false; } var extracted = twttr.txt.extractMentions(username); // Should extract the username minus the @ sign, hence the .slice(1) return extracted.length === 1 && extracted[0] === username.slice(1); }; var VALID_LIST_RE = regexSupplant(/^#{validMentionOrList}$/); twttr.txt.isValidList = function(usernameList) { var match = usernameList.match(VALID_LIST_RE); // Must have matched and had nothing before or after return !!(match && match[1] == "" && match[4]); }; twttr.txt.isValidHashtag = function(hashtag) { if (!hashtag) { return false; } var extracted = twttr.txt.extractHashtags(hashtag); // Should extract the hashtag minus the # sign, hence the .slice(1) return extracted.length === 1 && extracted[0] === hashtag.slice(1); }; twttr.txt.isValidUrl = function(url, unicodeDomains, requireProtocol) { if (unicodeDomains == null) { unicodeDomains = true; } if (requireProtocol == null) { requireProtocol = true; } if (!url) { return false; } var urlParts = url.match(twttr.txt.regexen.validateUrlUnencoded); if (!urlParts || urlParts[0] !== url) { return false; } var scheme = urlParts[1], authority = urlParts[2], path = urlParts[3], query = urlParts[4], fragment = urlParts[5]; if (!( (!requireProtocol || (isValidMatch(scheme, twttr.txt.regexen.validateUrlScheme) && scheme.match(/^https?$/i))) && isValidMatch(path, twttr.txt.regexen.validateUrlPath) && isValidMatch(query, twttr.txt.regexen.validateUrlQuery, true) && isValidMatch(fragment, twttr.txt.regexen.validateUrlFragment, true) )) { return false; } return (unicodeDomains && isValidMatch(authority, twttr.txt.regexen.validateUrlUnicodeAuthority)) || (!unicodeDomains && isValidMatch(authority, twttr.txt.regexen.validateUrlAuthority)); }; function isValidMatch(string, regex, optional) { if (!optional) { // RegExp["$&"] is the text of the last match // blank strings are ok, but are falsy, so we check stringiness instead of truthiness return ((typeof string === "string") && string.match(regex) && RegExp["$&"] === string); } // RegExp["$&"] is the text of the last match return (!string || (string.match(regex) && RegExp["$&"] === string)); } if (typeof module != 'undefined' && module.exports) { module.exports = twttr.txt; } }());