From aeb511e8ffb230d88c8bf2d59d31fee27012396d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Hanol?= Date: Mon, 28 May 2018 11:02:19 +0200 Subject: [PATCH] FEATURE: SMF1 importer --- script/import_scripts/base.rb | 103 ++- .../import_scripts/base/lookup_container.rb | 10 +- script/import_scripts/smf1.rb | 633 ++++++++++++++++++ 3 files changed, 686 insertions(+), 60 deletions(-) create mode 100644 script/import_scripts/smf1.rb diff --git a/script/import_scripts/base.rb b/script/import_scripts/base.rb index c8d41e29320..fc50cc13cd6 100644 --- a/script/import_scripts/base.rb +++ b/script/import_scripts/base.rb @@ -159,7 +159,7 @@ class ImportScripts::Base results.each do |result| g = yield(result) - if group_id_from_imported_group_id(g[:id]) + if g.nil? || group_id_from_imported_group_id(g[:id]) skipped += 1 else new_group = create_group(g, g[:id]) @@ -174,7 +174,7 @@ class ImportScripts::Base end end - print_status created + skipped + failed + (opts[:offset] || 0), total + print_status(created + skipped + failed + (opts[:offset] || 0), total, get_start_time("groups")) end [created, skipped] @@ -182,11 +182,13 @@ class ImportScripts::Base def create_group(opts, import_id) opts = opts.dup.tap { |o| o.delete(:id) } - import_name = opts[:name] + + import_name = opts[:name].presence || opts[:full_name] opts[:name] = UserNameSuggester.suggest(import_name) - existing = Group.where(name: opts[:name]).first - return existing if existing && existing.custom_fields["import_id"].to_s == (import_id.to_s) + existing = Group.find_by(name: opts[:name]) + return existing if existing && existing.custom_fields["import_id"].to_s == import_id.to_s + g = existing || Group.new(opts) g.custom_fields["import_id"] = import_id g.custom_fields["import_name"] = import_name @@ -208,6 +210,7 @@ class ImportScripts::Base existing = existing.where(name: 'import_id') .joins('JOIN import_ids ON val = value') .count + if existing == import_ids.length puts "Skipping #{import_ids.length} already imported #{type}" return true @@ -266,7 +269,7 @@ class ImportScripts::Base end end - print_status created + skipped + failed + (opts[:offset] || 0), total + print_status(created + skipped + failed + (opts[:offset] || 0), total, get_start_time("users")) end [created, skipped] @@ -425,7 +428,7 @@ class ImportScripts::Base created += 1 end - print_status created + skipped, total + print_status(created + skipped, total, get_start_time("categories")) end [created, skipped] @@ -599,7 +602,7 @@ class ImportScripts::Base end end - print_status created + skipped + (opts[:offset] || 0), total + print_status(created + skipped + (opts[:offset] || 0), total, get_start_time("bookmarks")) end [created, skipped] @@ -616,12 +619,12 @@ class ImportScripts::Base query.find_each do |topic| topic.update_status('closed', true, Discourse.system_user) closed_count += 1 - print_status(closed_count, total_count) + print_status(closed_count, total_count, get_start_time("close_inactive_topics")) end end def update_topic_status - puts "", "updating topic status" + puts "", "Updating topic status" Topic.exec_sql(<<~SQL) UPDATE topics AS t @@ -650,12 +653,12 @@ class ImportScripts::Base end def update_bumped_at - puts "", "updating bumped_at on topics" + puts "", "Updating bumped_at on topics" Post.exec_sql("update topics t set bumped_at = COALESCE((select max(created_at) from posts where topic_id = t.id and post_type = #{Post.types[:regular]}), bumped_at)") end def update_last_posted_at - puts "", "updating last posted at on users" + puts "", "Updating last posted at on users" sql = <<-SQL WITH lpa AS ( @@ -677,20 +680,18 @@ class ImportScripts::Base def update_user_stats puts "", "Updating topic reply counts..." - start_time = Time.now - progress_count = 0 - total_count = User.real.count + count = 0 + total = User.real.count - User.find_each do |u| + User.real.find_each do |u| u.create_user_stat if u.user_stat.nil? us = u.user_stat us.update_topic_reply_count us.save - progress_count += 1 - print_status(progress_count, total_count, start_time) + print_status(count += 1, total, get_start_time("user_stats")) end - puts "." "Updating first_post_created_at..." + puts "", "Updating first_post_created_at..." sql = <<-SQL WITH sub AS ( @@ -708,7 +709,7 @@ class ImportScripts::Base User.exec_sql(sql) - puts "Updating user post_count..." + puts "", "Updating user post_count..." sql = <<-SQL WITH sub AS ( @@ -726,7 +727,7 @@ class ImportScripts::Base User.exec_sql(sql) - puts "Updating user topic_count..." + puts "", "Updating user topic_count..." sql = <<-SQL WITH sub AS ( @@ -747,96 +748,88 @@ class ImportScripts::Base # scripts that are able to import last_seen_at from the source data should override this method def update_last_seen_at - puts "", "updating last seen at on users" + puts "", "Updating last seen at on users" User.exec_sql("UPDATE users SET last_seen_at = created_at WHERE last_seen_at IS NULL") User.exec_sql("UPDATE users SET last_seen_at = last_posted_at WHERE last_posted_at IS NOT NULL") end def update_feature_topic_users - puts "", "updating featured topic users" + puts "", "Updating featured topic users" - total_count = Topic.count - progress_count = 0 + count = 0 + total = Topic.count Topic.find_each do |topic| topic.feature_topic_users - progress_count += 1 - print_status(progress_count, total_count) + print_status(count += 1, total, get_start_time("feature_topic_user")) end end def reset_topic_counters - puts "", "resetting topic counters" + puts "", "Resetting topic counters" - total_count = Topic.count - progress_count = 0 + count = 0 + total = Topic.count Topic.find_each do |topic| Topic.reset_highest(topic.id) - progress_count += 1 - print_status(progress_count, total_count) + print_status(count += 1, total, get_start_time("topic_counters")) end end def update_category_featured_topics - puts "", "updating featured topics in categories" + puts "", "Updating featured topics in categories" - total_count = Category.count - progress_count = 0 + count = 0 + total = Category.count Category.find_each do |category| CategoryFeaturedTopic.feature_topics_for(category) - progress_count += 1 - print_status(progress_count, total_count) + print_status(count += 1, total, get_start_time("category_featured_topics")) end end def update_topic_count_replies - puts "", "updating user topic reply counts" + puts "", "Updating user topic reply counts" - total_count = User.real.count - progress_count = 0 + count = 0 + total = User.real.count User.real.find_each do |u| u.user_stat.update_topic_reply_count u.user_stat.save! - progress_count += 1 - print_status(progress_count, total_count) + print_status(count += 1, total, get_start_time("topic_count_replies")) end end def update_tl0 - puts "", "setting users with no posts to trust level 0" + puts "", "Setting users with no posts to trust level 0" - total_count = User.count - progress_count = 0 + count = 0 + total = User.count User.includes(:user_stat).find_each do |user| begin user.update_columns(trust_level: 0) if user.trust_level > 0 && user.post_count == 0 rescue Discourse::InvalidAccess - nil end - progress_count += 1 - print_status(progress_count, total_count) + print_status(count += 1, total, get_start_time("update_tl0")) end end def update_user_signup_date_based_on_first_post - puts "", "setting users' signup date based on the date of their first post" + puts "", "Setting users' signup date based on the date of their first post" - total_count = User.count - progress_count = 0 + count = 0 + total = User.count User.find_each do |user| - first = user.posts.order('created_at ASC').first - if first + if first = user.posts.order('created_at ASC').first user.created_at = first.created_at user.save! end - progress_count += 1 - print_status(progress_count, total_count) + print_status(count += 1, total, get_start_time("user_signup")) end end diff --git a/script/import_scripts/base/lookup_container.rb b/script/import_scripts/base/lookup_container.rb index 86513775a28..8922b0e2358 100644 --- a/script/import_scripts/base/lookup_container.rb +++ b/script/import_scripts/base/lookup_container.rb @@ -1,31 +1,31 @@ module ImportScripts class LookupContainer def initialize - puts 'loading existing groups...' + puts 'Loading existing groups...' @groups = {} GroupCustomField.where(name: 'import_id').pluck(:group_id, :value).each do |group_id, import_id| @groups[import_id] = group_id end - puts 'loading existing users...' + puts 'Loading existing users...' @users = {} UserCustomField.where(name: 'import_id').pluck(:user_id, :value).each do |user_id, import_id| @users[import_id] = user_id end - puts 'loading existing categories...' + puts 'Loading existing categories...' @categories = {} CategoryCustomField.where(name: 'import_id').pluck(:category_id, :value).each do |category_id, import_id| @categories[import_id] = category_id end - puts 'loading existing posts...' + puts 'Loading existing posts...' @posts = {} PostCustomField.where(name: 'import_id').pluck(:post_id, :value).each do |post_id, import_id| @posts[import_id] = post_id end - puts 'loading existing topics...' + puts 'Loading existing topics...' @topics = {} Post.joins(:topic).pluck('posts.id, posts.topic_id, posts.post_number, topics.slug').each do |p| @topics[p[0]] = { diff --git a/script/import_scripts/smf1.rb b/script/import_scripts/smf1.rb new file mode 100644 index 00000000000..3cfc5bc681f --- /dev/null +++ b/script/import_scripts/smf1.rb @@ -0,0 +1,633 @@ +require "mysql2" +require "htmlentities" +require File.expand_path(File.dirname(__FILE__) + "/base.rb") + +class ImportScripts::Smf1 < ImportScripts::Base + + BATCH_SIZE ||= 5000 + UPLOADS_DIR ||= "/mnt/hgfs/downloads/attachments" + + def initialize + super + + @htmlentities = HTMLEntities.new + + puts "Loading existing usernames..." + + @old_to_new_usernames = UserCustomField.joins(:user).where(name: "import_username").pluck("value", "users.username").to_h + + puts "Loading pm mapping..." + + @pm_mapping = {} + + Topic + .joins(:topic_allowed_users) + .where(archetype: Archetype.private_message) + .where("title NOT ILIKE 'Re: %'") + .group(:id) + .order(:id) + .pluck("string_agg(topic_allowed_users.user_id::text, ',' ORDER BY topic_allowed_users.user_id), title, topics.id") + .each do |users, title, topic_id| + @pm_mapping[users] ||= {} + @pm_mapping[users][title] ||= [] + @pm_mapping[users][title] << topic_id + end + + @client = Mysql2::Client.new( + host: ENV["DB_HOST"] || "localhost", + username: ENV["DB_USER"] || "root", + password: ENV["DB_PW"], + database: ENV["DB_NAME"], + ) + end + + def execute + check_version! + + SiteSetting.permalink_normalizations = "/(.+)\\?.*/\\1" + + import_groups + import_users + + import_categories + import_posts + import_personal_posts + + import_attachments + + import_likes + import_feedbacks + + import_banned_domains + import_banned_emails + import_banned_ips + end + + def check_version! + version = mysql_query("SELECT value FROM smf_settings WHERE variable = 'smfVersion' LIMIT 1").first["value"] + fail "Incompatible version (#{version})" unless version&.start_with?("1.") + end + + def import_groups + puts "", "Importing groups..." + + # skip administrators/moderators + groups = mysql_query("SELECT id_group, groupName FROM smf_membergroups WHERE id_group > 3").to_a + + create_groups(groups) do |g| + next if g["groupName"].blank? + + { + id: g["id_group"], + full_name: g["groupName"], + } + end + end + + def import_users + puts "", "Importing users..." + + last_user_id = -1 + total = mysql_query("SELECT COUNT(*) count FROM smf_members").first["count"] + + batches(BATCH_SIZE) do |offset| + users = mysql_query(<<~SQL + SELECT m.id_member + , memberName + , dateRegistered + , id_group + , lastLogin + , realName + , emailAddress + , personalText + , CASE WHEN birthdate > '1900-01-01' THEN birthdate ELSE NULL END birthdate + , websiteUrl + , location + , usertitle + , memberIP + , memberIP2 + , is_activated + , additionalGroups + , id_attach + , attachmentType + , filename + FROM smf_members m + LEFT JOIN smf_attachments a ON a.id_member = m.id_member + WHERE m.id_member > #{last_user_id} + ORDER BY m.id_member + LIMIT #{BATCH_SIZE} + SQL + ).to_a + + break if users.empty? + + last_user_id = users[-1]["id_member"] + user_ids = users.map { |u| u["id_member"] } + + next if all_records_exist?(:users, user_ids) + + create_users(users, total: total, offset: offset) do |u| + created_at = Time.zone.at(u["dateRegistered"]) + group_ids = [u["id_group"], *u["additionalGroups"].split(",").map(&:to_i)].uniq + + { + id: u["id_member"], + username: u["memberName"], + created_at: created_at, + first_seen_at: created_at, + primary_group_id: group_id_from_imported_group_id(u["id_group"]), + admin: group_ids.include?(1), + moderator: group_ids.include?(2) || group_ids.include?(3), + last_seen_at: Time.zone.at(u["lastLogin"]), + name: u["realName"].presence, + email: u["emailAddress"], + bio_raw: pre_process_raw(u["personalText"].presence), + date_of_birth: u["birthdate"], + website: u["website"].presence, + location: u["location"].presence, + title: u["usertitle"].presence, + registration_ip_address: u["memberIP"], + ip_address: u["memberIP2"], + active: u["is_activated"] == 1, + approved: u["is_activated"] == 1, + post_create_action: proc do |user| + # usernames + @old_to_new_usernames[u["memberName"]] = user.username + + # groups + GroupUser.transaction do + group_ids.each do |gid| + (group_id = group_id_from_imported_group_id(gid)) && GroupUser.find_or_create_by(user: user, group_id: group_id) + end + end + + # avatar + avatar_url = if u["attachmentType"] == 0 && u["id_attach"].present? + "https://yoyoexpert.com/forums/index.php?action=dlattach;attach=#{u["id_attach"]};type=avatar" + elsif u["attachmentType"] == 1 && u["filename"].present? + "https://yoyoexpert.com/forums/avatar-members/#{u["filename"]}" + end + + if avatar_url.present? + UserAvatar.import_url_for_user(avatar_url, user) rescue nil + end + end + } + end + end + end + + def import_categories + puts "", "Importing categories..." + + categories = mysql_query(<<~SQL + SELECT id_board + , id_parent + , boardOrder + , name + , description + FROM smf_boards + ORDER BY id_parent, id_board + SQL + ).to_a + + parent_categories = categories.select { |c| c["id_parent"] == 0 } + children_categories = categories.select { |c| c["id_parent"] != 0 } + + create_categories(parent_categories) do |c| + { + id: c["id_board"], + name: c["name"], + description: pre_process_raw(c["description"].presence), + position: c["boardOrder"], + post_create_action: proc do |category| + Permalink.find_or_create_by(url: "forums/index.php/board,#{c["id_board"]}.0.html", category_id: category.id) + end, + } + end + + create_categories(children_categories) do |c| + { + id: c["id_board"], + parent_category_id: category_id_from_imported_category_id(c["id_parent"]), + name: c["name"], + description: pre_process_raw(c["description"].presence), + position: c["boardOrder"], + post_create_action: proc do |category| + Permalink.find_or_create_by(url: "forums/index.php/board,#{c["id_board"]}.0.html", category_id: category.id) + end, + } + end + end + + def import_posts + puts "", "Importing posts..." + + last_post_id = -1 + total = mysql_query("SELECT COUNT(*) count FROM smf_messages").first["count"] + + batches(BATCH_SIZE) do |offset| + posts = mysql_query(<<~SQL + SELECT m.id_msg + , m.id_topic + , m.id_board + , m.posterTime + , m.id_member + , m.subject + , m.body + , t.isSticky + , t.id_first_msg + , t.numViews + FROM smf_messages m + JOIN smf_topics t ON t.id_topic = m.id_topic + WHERE m.id_msg > #{last_post_id} + ORDER BY m.id_msg + LIMIT #{BATCH_SIZE} + SQL + ).to_a + + break if posts.empty? + + last_post_id = posts[-1]["id_msg"] + post_ids = posts.map { |p| p["id_msg"] } + + next if all_records_exist?(:post, post_ids) + + create_posts(posts, total: total, offset: offset) do |p| + created_at = Time.at(p["posterTime"]) + + post = { + id: p["id_msg"], + created_at: created_at, + user_id: user_id_from_imported_user_id(p["id_member"]) || -1, + raw: pre_process_raw(p["body"]), + } + + if p["id_msg"] == p["id_first_msg"] + post[:category] = category_id_from_imported_category_id(p["id_board"]) + post[:title] = @htmlentities.decode(p["subject"]) + post[:views] = p["numViews"] + post[:pinned_at] = created_at if p["isSticky"] == 1 + post[:post_create_action] = proc do |pp| + Permalink.find_or_create_by(url: "forums/index.php/topic,#{p["id_topic"]}.0.html", topic_id: pp.topic_id) + end + elsif parent = topic_lookup_from_imported_post_id(p["id_first_msg"]) + post[:topic_id] = parent[:topic_id] + post[:post_create_action] = proc do |pp| + Permalink.find_or_create_by(url: "forums/index.php/topic,#{p["id_topic"]}.msg#{p["id_msg"]}.html", post_id: pp.id) + end + else + next + end + + post + end + end + end + + def import_personal_posts + puts "", "Importing personal posts..." + + last_post_id = -1 + total = mysql_query("SELECT COUNT(*) count FROM smf_personal_messages WHERE deletedBySender = 0").first["count"] + + batches(BATCH_SIZE) do |offset| + posts = mysql_query(<<~SQL + SELECT id_pm + , id_member_from + , msgtime + , subject + , body + , (SELECT GROUP_CONCAT(id_member) FROM smf_pm_recipients r WHERE r.id_pm = pm.id_pm) recipients + FROM smf_personal_messages pm + WHERE deletedBySender = 0 + AND id_pm > #{last_post_id} + ORDER BY id_pm + LIMIT #{BATCH_SIZE} + SQL + ).to_a + + break if posts.empty? + + last_post_id = posts[-1]["id_pm"] + post_ids = posts.map { |p| "pm-#{p["id_pm"]}" } + + next if all_records_exist?(:post, post_ids) + + create_posts(posts, total: total, offset: offset) do |p| + next unless user_id = user_id_from_imported_user_id(p["id_member_from"]) + next if p["recipients"].blank? + recipients = p["recipients"].split(",").map { |id| user_id_from_imported_user_id(id) }.compact.uniq + next if recipients.empty? + + id = "pm-#{p["id_pm"]}" + next if post_id_from_imported_post_id(id) + + post = { + id: id, + created_at: Time.at(p["msgtime"]), + user_id: user_id, + raw: pre_process_raw(p["body"]), + } + + users = (recipients + [user_id]).sort.uniq.join(",") + title = @htmlentities.decode(p["subject"]) + + if topic_id = find_pm_topic_id(users, title) + post[:topic_id] = topic_id + else + post[:archetype] = Archetype.private_message + post[:title] = title + post[:target_usernames] = User.where(id: recipients).pluck(:username) + post[:post_create_action] = proc do |p| + @pm_mapping[users] ||= {} + @pm_mapping[users][title] ||= [] + @pm_mapping[users][title] << p.topic_id + end + end + + post + end + end + end + + def find_pm_topic_id(users, title) + return unless title.start_with?("Re: ") + + return unless @pm_mapping[users] + + title = title.gsub(/^(Re: )+/i, "") + return unless @pm_mapping[users][title] + + @pm_mapping[users][title][-1] + end + + def import_attachments + puts "", "Importing attachments..." + + count = 0 + last_upload_id = -1 + total = mysql_query("SELECT COUNT(*) count FROM smf_attachments WHERE id_msg IS NOT NULL").first["count"] + + batches(BATCH_SIZE) do |offset| + uploads = mysql_query(<<~SQL + SELECT id_attach + , id_msg + , filename + , file_hash + FROM smf_attachments + WHERE id_msg IS NOT NULL + AND id_attach > #{last_upload_id} + ORDER BY id_attach + LIMIT #{BATCH_SIZE} + SQL + ).to_a + + break if uploads.empty? + + last_upload_id = uploads[-1]["id_attach"] + + uploads.each do |u| + count += 1 + + next unless post = PostCustomField.joins(:post).find_by(name: "import_id", value: u["id_msg"].to_s)&.post + + path = File.join(UPLOADS_DIR, "#{u["id_attach"]}_#{u["file_hash"]}") + next unless File.exists?(path) && File.size(path) > 0 + + if upload = create_upload(post.user_id, path, u["filename"]) + html = html_for_upload(upload, u["filename"]) + unless post.raw[html] + post.raw += "\n\n#{html}\n\n" + post.save + PostUpload.create(upload: upload, post: post) + end + end + + print_status(count, total, get_start_time("attachments")) + end + end + end + + def import_likes + return if mysql_query("SHOW TABLES LIKE 'smf_thank_you_post'").first.nil? + + puts "", "Importing likes..." + + count = 0 + total = mysql_query("SELECT COUNT(*) count FROM smf_thank_you_post WHERE thx_time > 0").first["count"] + like = PostActionType.types[:like] + + mysql_query("SELECT id_msg, id_member, thx_time FROM smf_thank_you_post WHERE thx_time > 0 ORDER BY id_thx_post").each do |l| + print_status(count += 1, total, get_start_time("likes")) + next unless post_id = post_id_from_imported_post_id(l["id_msg"]) + next unless user_id = user_id_from_imported_user_id(l["id_member"]) + next if PostAction.where(post_action_type_id: like, post_id: post_id, user_id: user_id).exists? + PostAction.create(post_action_type_id: like, post_id: post_id, user_id: user_id, created_at: Time.at(l["thx_time"])) + end + end + + FEEDBACKS ||= -"feedbacks" + + def import_feedbacks + return if mysql_query("SHOW TABLES LIKE 'smf_feedback'").first.nil? + + puts "", "Importing feedbacks..." + + User.register_custom_field_type(FEEDBACKS, :json) + + count = 0 + total = mysql_query("SELECT COUNT(*) count FROM smf_feedback WHERE approved").first["count"] + + mysql_query(<<~SQL + SELECT feedbackid + , id_member + , feedbackmember_id + , saledate + , saletype + , salevalue + , comment_short + , comment_long + FROM smf_feedback + WHERE approved + ORDER BY feedbackid + SQL + ).each do |f| + print_status(count += 1, total, get_start_time("feedbacks")) + next unless user_id_from = user_id_from_imported_user_id(f["id_member"]) + next unless user_id_to = user_id_from_imported_user_id(f["feedbackmember_id"]) + next unless user = User.find_by(id: user_id_to) + + feedbacks = user.custom_fields[FEEDBACKS] || [] + next if feedbacks.find { |ff| ff["id"] == f["feedbackid"] } + + feedbacks << { + id: f["feedbackid"], + created_at: Time.at(f["saledate"]), + from: user_id_from, + type: f["saletype"], + value: f["salevalue"], + comment_short: @htmlentities.decode(f["comment_short"]).strip.presence, + comment_long: @htmlentities.decode(f["comment_long"]).strip.presence, + } + + user.custom_fields[FEEDBACKS] = feedbacks.to_json + user.save_custom_fields + end + end + + def import_banned_domains + puts "", "Importing banned email domains..." + + blacklist = SiteSetting.email_domains_blacklist.split("|") + banned_domains = mysql_query("SELECT SUBSTRING(email_address, 3) domain FROM smf_ban_items WHERE email_address RLIKE '^%@[^%]+$' GROUP BY email_address").map { |r| r["domain"] } + + SiteSetting.email_domains_blacklist = (blacklist + banned_domains).uniq.sort.join("|") + end + + def import_banned_emails + puts "", "Importing banned emails..." + + count = 0 + + banned_emails = mysql_query("SELECT email_address FROM smf_ban_items WHERE email_address RLIKE '^[^%]+@[^%]+$' GROUP BY email_address").map { |r| r["email_address"] } + banned_emails.each do |email| + print_status(count += 1, banned_emails.size, get_start_time("banned_emails")) + ScreenedEmail.find_or_create_by(email: email) + end + end + + def import_banned_ips + puts "", "Importing banned IPs..." + + count = 0 + + banned_ips = mysql_query(<<~SQL + SELECT CONCAT_WS('.', ip_low1, ip_low2, ip_low3, ip_low4) low + , CONCAT_WS('.', ip_high1, ip_high2, ip_high3, ip_high4) high + , hits + FROM smf_ban_items + WHERE (ip_low1 + ip_low2 + ip_low3 + ip_low4 + ip_high1 + ip_high2 + ip_high3 + ip_high4) > 0 + GROUP BY low, high, hits; + SQL + ).to_a + + banned_ips.each do |r| + print_status(count += 1, banned_ips.size, get_start_time("banned_ips")) + if r["low"] == r["high"] + if !ScreenedIpAddress.where("? <<= ip_address", r["low"]).exists? + ScreenedIpAddress.create(ip_address: r["low"], match_count: r["hits"]) + end + else + low_values = r["low"].split(".").map(&:to_i) + high_values = r["high"].split(".").map(&:to_i) + first_diff = low_values.zip(high_values).count { |a, b| a == b } + first_diff -= 1 if low_values[first_diff] == 0 && high_values[first_diff] == 255 + prefix = low_values[0...first_diff] + suffix = [0] * (3 - first_diff) + mask = 8 * (first_diff + 1) + values = (low_values[first_diff]..high_values[first_diff]) + hits = (r["hits"] / [1, values.count].max).floor + values.each do |v| + range_values = prefix + [v] + suffix + ip_address = "#{range_values.join(".")}/#{mask}" + if !ScreenedIpAddress.where("? <<= ip_address", ip_address).exists? + ScreenedIpAddress.create(ip_address: ip_address, match_count: hits) + end + end + end + end + + ScreenedIpAddress.where(last_match_at: nil).update_all(last_match_at: Time.new(2000, 01, 01)) + + puts "", "Rolling up..." + ScreenedIpAddress.roll_up + end + + IGNORED_BBCODE ||= %w{ + black blue center color email flash font glow green img iurl left list move + red right shadown size table time white + } + + def pre_process_raw(raw) + return "" if raw.blank? + + raw = @htmlentities.decode(raw) + + # [acronym] + raw.gsub!(/\[acronym=([^\]]+)\](.*?)\[\/acronym\]/im, %{#{$2}}) + + # [br] + raw.gsub!(/\[br\]/i, "\n") + raw.gsub!(//i, "\n") + # [hr] + raw.gsub!(/\[hr\]/i, "
") + + # [sub] + raw.gsub!(/\[sub\](.*?)\[\/sub\]/im, "#{$1}") + # [sup] + raw.gsub!(/\[sup\](.*?)\[\/sup\]/im, "#{$1}") + + # [html] + raw.gsub!(/\[html\]/i, "\n```html\n") + raw.gsub!(/\[\/html\]/i, "\n```\n") + + # [php] + raw.gsub!(/\[php\]/i, "\n```php\n") + raw.gsub!(/\[\/php\]/i, "\n```\n") + + # [code] + raw.gsub!(/\[\/?code\]/i, "\n```\n") + + # [pre] + raw.gsub!(/\[\/?pre\]/i, "\n```\n") + + # [tt] + raw.gsub!(/\[\/?tt\]/i, "`") + + # [ftp] + raw.gsub!(/\[ftp/i, "[url") + raw.gsub!(/\[\/ftp\]/i, "[/url]") + + # [me] + raw.gsub!(/\[me=([^\]]*)\](.*?)\[\/me\]/im, "_\\* #{$1} #{$2}_") + + # [li] + raw.gsub!(/\[li\](.*?)\[\/li\]/im, "- #{$1}") + + # puts [youtube] on their own line + raw.gsub!(/\[youtube\](.*?)\[\/youtube\]/im, "\n#{$1}\n") + + IGNORED_BBCODE.each { |code| raw.gsub!(/\[#{code}[^\]]*\](.*?)\[\/#{code}\]/im, '\1') } + + # ensure [/quote] are on their own line + raw.gsub!(/\s*\[\/quote\]\s*/im, "\n[/quote]\n") + + # [quote] + raw.gsub!(/\s*\[quote (.+?)\]\s/im) { + params = $1 + post_id = params[/msg(\d+)/, 1] + username = params[/author=(.+) link=/, 1] + username = @old_to_new_usernames[username] if @old_to_new_usernames.has_key?(username) + + if t = topic_lookup_from_imported_post_id(post_id) + %{\n[quote="#{username},post:#{t[:post_number]},topic:#{t[:topic_id]}"]\n} + else + %{\n[quote="#{username}"]\n} + end + } + + # remove tapatalk mess + raw.gsub!(/Sent from .+? using \[url=.*?\].+?\[\/url\]/i, "") + raw.gsub!(/Sent from .+? using .+?\z/i, "") + + raw + end + + def mysql_query(sql) + @client.query(sql) + end + +end + +ImportScripts::Smf1.new.perform