From e823f568a7f0812e058c87b573292c30aefc046e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Hanol?= Date: Fri, 29 Aug 2014 13:11:56 +0200 Subject: [PATCH] FIX: preprocessing regexes in vBulletin importer --- script/import_scripts/vbulletin.rb | 58 +++++++++++++++++------------- 1 file changed, 34 insertions(+), 24 deletions(-) diff --git a/script/import_scripts/vbulletin.rb b/script/import_scripts/vbulletin.rb index 09c5a4513fa..7c34a1cb7f3 100644 --- a/script/import_scripts/vbulletin.rb +++ b/script/import_scripts/vbulletin.rb @@ -350,8 +350,26 @@ class ImportScripts::VBulletin < ImportScripts::Base raw = raw.gsub(/(\\r)?\\n/, "\n") .gsub("\\t", "\t") + # remove attachments + raw = raw.gsub(/\[attach\]\d+\[\/attach\]/i, "") + + # replace all chevrons with HTML entities + # NOTE: must be before any of the "quote" processing + raw = raw.gsub(/`([^`]+)`/im) { "`" + $1.gsub("<", "\u2603") + "`" } + .gsub("<", "<") + .gsub("\u2603", "<") + + raw = raw.gsub(/`([^`]+)`/im) { "`" + $1.gsub(">", "\u2603") + "`" } + .gsub(">", ">") + .gsub("\u2603", ">") + + # [URL=...]...[/URL] + raw = raw.gsub(/\[url="?(.+?)"?\](.+)\[\/url\]/i) { "[#{$2}](#{$1})" } + # [URL]...[/URL] - raw = raw.gsub(/\[url\](.+?)\[\/url\]/i) { $1.to_s } + # [MP3]...[/MP3] + raw = raw.gsub(/\[\/?url\]/i, "") + .gsub(/\[\/?mp3\]/i, "") # [MENTION][/MENTION] raw = raw.gsub(/\[mention\](.+?)\[\/mention\]/i) do @@ -363,7 +381,7 @@ class ImportScripts::VBulletin < ImportScripts::Base end # [MENTION=][/MENTION] - raw = raw.gsub(/\[mention=(\d+)\](.+?)\[\/mention\]/i) do + raw = raw.gsub(/\[mention="?(\d+)"?\](.+?)\[\/mention\]/i) do user_id, old_username = $1, $2 if user = @users.select { |u| u[:userid] == user_id }.first old_username = @old_username_to_new_usernames[user[:username]] || user[:username] @@ -384,37 +402,29 @@ class ImportScripts::VBulletin < ImportScripts::Base end # [HTML]...[/HTML] + raw = raw.gsub(/\[html\]/i, "\n```html\n") + .gsub(/\[\/html\]/i, "\n```\n") + # [PHP]...[/PHP] - ["html", "php"].each do |language| - raw = raw.gsub(/\[#{language}\](.+?)\[\/#{language}\]/im) { "\n```#{language}\n#{$1}\n```\n" } - end + raw = raw.gsub(/\[php\]/i, "\n```php\n") + .gsub(/\[\/php\]/i, "\n```\n") + + # [HIGHLIGHT="..."] + raw = raw.gsub(/\[highlight="?(\w+)"?\]/i) { "\n```#{$1.downcase}\n" } # [CODE]...[/CODE] - raw = raw.gsub(/\[code\](.+?)\[\/code\]/im) { "\n```\n#{$1}\n```\n" } + # [HIGHLIGHT]...[/HIGHLIGHT] + raw = raw.gsub(/\[\/?code\]/i, "\n```\n") + .gsub(/\[\/?highlight\]/i, "\n```\n") - # [HIGHLIGHT="..."]...[/HIGHLIGHT] - raw = raw.gsub(/\[highlight(?:[^\]]*)\](.+)\[\/highlight\]/im) { "\n```\n#{$1}\n```\n" } - - # [SAMP]...[SAMP] - raw = raw.gsub(/\[samp\](.+?)\[\/samp\]/i) { "`#{$1}`" } + # [SAMP]...[/SAMP] + raw = raw.gsub(/\[\/?samp\]/i, "`") # [YOUTUBE][/YOUTUBE] raw = raw.gsub(/\[youtube\](.+?)\[\/youtube\]/i) { "\n//youtu.be/#{$1}\n" } # [VIDEO=youtube;]...[/VIDEO] - raw = raw.gsub(/\[video=youtube;([^\]]+)\].*\[\/video\]/i) { "\n//youtu.be/#{$1}\n" } - - # [MP3][/MP3] - raw = raw.gsub(/\[MP3\](.+?)\[\/MP3\]/i) { "\n#{$1}\n" } - - # replace all chevrons with HTML entities - raw = raw.gsub(/`([^`]+)`/im) { "`" + $1.gsub("<", "\u2603") + "`" } - .gsub("<", "<") - .gsub("\u2603", "<") - - raw = raw.gsub(/`([^`]+)`/im) { "`" + $1.gsub(">", "\u2603") + "`" } - .gsub(">", ">") - .gsub("\u2603", ">") + raw = raw.gsub(/\[video=youtube;([^\]]+)\].*?\[\/video\]/i) { "\n//youtu.be/#{$1}\n" } raw end