160 lines
4.2 KiB
Ruby
160 lines
4.2 KiB
Ruby
|
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
|
||
|
require 'mongo'
|
||
|
|
||
|
# Import YahooGroups data as exported into MongoDB by:
|
||
|
# https://github.com/jonbartlett/yahoo-groups-export
|
||
|
#
|
||
|
# Optionally paste these lines into your shell before running this:
|
||
|
#
|
||
|
# =begin
|
||
|
# export CATEGORY_ID=<CATEGORY_ID>
|
||
|
# =end
|
||
|
|
||
|
class ImportScripts::YahooGroup < ImportScripts::Base
|
||
|
|
||
|
MONGODB_HOST = '192.168.10.1:27017'
|
||
|
MONGODB_DB = 'syncro'
|
||
|
|
||
|
def initialize
|
||
|
super
|
||
|
|
||
|
client = Mongo::Client.new([ MONGODB_HOST ], database: MONGODB_DB)
|
||
|
db = client.database
|
||
|
Mongo::Logger.logger.level = Logger::FATAL
|
||
|
puts "connected to db...."
|
||
|
|
||
|
@collection = client[:posts]
|
||
|
|
||
|
@user_profile_map = {}
|
||
|
|
||
|
end
|
||
|
|
||
|
def execute
|
||
|
puts "", "Importing from Mongodb...."
|
||
|
|
||
|
import_users
|
||
|
import_discussions
|
||
|
|
||
|
puts "", "Done"
|
||
|
end
|
||
|
|
||
|
def import_users
|
||
|
|
||
|
puts '', "Importing users"
|
||
|
|
||
|
# fetch distinct list of Yahoo "profile" names
|
||
|
profiles = @collection.aggregate(
|
||
|
[
|
||
|
{ "$group": { "_id": { profile: "$ygData.profile" } } }
|
||
|
]
|
||
|
)
|
||
|
|
||
|
user_id = 0
|
||
|
|
||
|
create_users(profiles.to_a) do |u|
|
||
|
|
||
|
user_id = user_id + 1
|
||
|
|
||
|
# fetch last message for profile to pickup latest user info as this may have changed
|
||
|
user_info = @collection.find("ygData.profile": u["_id"]["profile"]).sort("ygData.msgId": -1).limit(1).to_a[0]
|
||
|
|
||
|
# Store user_id to profile lookup
|
||
|
@user_profile_map.store(user_info["ygData"]["profile"], user_id)
|
||
|
|
||
|
puts "User created: #{user_info["ygData"]["profile"]}"
|
||
|
|
||
|
user =
|
||
|
{
|
||
|
id: user_id, # yahoo "userId" sequence appears to have changed mid forum life so generate this
|
||
|
username: user_info["ygData"]["profile"],
|
||
|
name: user_info["ygData"]["authorName"],
|
||
|
email: user_info["ygData"]["from"], # mandatory
|
||
|
created_at: Time.now
|
||
|
}
|
||
|
user
|
||
|
end
|
||
|
|
||
|
puts "#{user_id} users created"
|
||
|
|
||
|
end
|
||
|
|
||
|
def import_discussions
|
||
|
puts "", "Importing discussions"
|
||
|
|
||
|
topics_count = 0
|
||
|
posts_count = 0
|
||
|
|
||
|
topics = @collection.aggregate(
|
||
|
[
|
||
|
{ "$group": { "_id": { topicId: "$ygData.topicId" } } }
|
||
|
]
|
||
|
).to_a
|
||
|
|
||
|
# for each distinct topicId found
|
||
|
topics.each_with_index do |t, tidx|
|
||
|
|
||
|
# create "topic" post first.
|
||
|
# fetch topic document
|
||
|
topic_post = @collection.find("ygData.msgId": t["_id"]["topicId"]).to_a[0]
|
||
|
next if topic_post.nil?
|
||
|
|
||
|
puts "Topic: #{tidx + 1} / #{topics.count()} (#{sprintf('%.2f', ((tidx + 1).to_f / topics.count().to_f) * 100)}%) Subject: #{topic_post["ygData"]["subject"]}"
|
||
|
|
||
|
if topic_post["ygData"]["subject"].to_s.empty?
|
||
|
topic_title = "No Subject"
|
||
|
else
|
||
|
topic_title = topic_post["ygData"]["subject"]
|
||
|
end
|
||
|
|
||
|
topic = {
|
||
|
id: tidx + 1,
|
||
|
user_id: @user_profile_map[topic_post["ygData"]["profile"]] || -1,
|
||
|
raw: topic_post["ygData"]["messageBody"],
|
||
|
created_at: Time.at(topic_post["ygData"]["postDate"].to_i),
|
||
|
cook_method: Post.cook_methods[:raw_html],
|
||
|
title: topic_title,
|
||
|
category: ENV['CATEGORY_ID'],
|
||
|
custom_fields: { import_id: topic_post["ygData"]["msgId"] }
|
||
|
}
|
||
|
|
||
|
topics_count += 1
|
||
|
|
||
|
# create topic post
|
||
|
parent_post = create_post(topic, topic[:id])
|
||
|
|
||
|
# find all posts for topic id
|
||
|
posts = @collection.find("ygData.topicId": topic_post["ygData"]["topicId"]).to_a
|
||
|
|
||
|
posts.each_with_index do |p, pidx|
|
||
|
|
||
|
# skip over first post as this is created by topic above
|
||
|
next if p["ygData"]["msgId"] == topic_post["ygData"]["topicId"]
|
||
|
|
||
|
puts " Post: #{pidx + 1} / #{posts.count()}"
|
||
|
|
||
|
post = {
|
||
|
id: pidx + 1,
|
||
|
topic_id: parent_post[:topic_id],
|
||
|
user_id: @user_profile_map[p["ygData"]["profile"]] || -1,
|
||
|
raw: p["ygData"]["messageBody"],
|
||
|
created_at: Time.at(p["ygData"]["postDate"].to_i),
|
||
|
cook_method: Post.cook_methods[:raw_html],
|
||
|
custom_fields: { import_id: p["ygData"]["msgId"] }
|
||
|
}
|
||
|
|
||
|
child_post = create_post(post, post[:id])
|
||
|
|
||
|
posts_count += 1
|
||
|
|
||
|
end
|
||
|
|
||
|
end
|
||
|
|
||
|
puts "", "Imported #{topics_count} topics with #{topics_count + posts_count} posts."
|
||
|
|
||
|
end
|
||
|
|
||
|
end
|
||
|
|
||
|
ImportScripts::YahooGroup.new.perform
|