discourse/script/import_scripts/jive_api.rb

320 lines
12 KiB
Ruby
Raw Normal View History

2017-04-24 16:03:12 -04:00
require "nokogiri"
2017-04-12 13:16:45 -04:00
require "htmlentities"
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
2017-05-16 04:22:44 -04:00
# https://developers.jivesoftware.com/api/v3/cloud/rest/index.html
2017-04-12 13:16:45 -04:00
class ImportScripts::JiveApi < ImportScripts::Base
2017-04-24 16:03:12 -04:00
USER_COUNT ||= 1000
POST_COUNT ||= 100
2017-04-12 13:16:45 -04:00
STAFF_GUARDIAN ||= Guardian.new(Discourse.system_user)
2017-05-16 04:22:44 -04:00
TO_IMPORT ||= [
# Announcement & News
{ jive_object: { type: 37, id: 1004 }, filters: { created_after: 1.year.ago, type: "post" }, category_id: 7 },
# Questions & Answers / General Discussions
{ jive_object: { type: 14, id: 2006 }, filters: { created_after: 6.months.ago, type: "discussion" }, category: Proc.new { |c| c["question"] ? 5 : 21 } },
# Anywhere beta
{ jive_object: { type: 14, id: 2052 }, filters: { created_after: 6.months.ago, type: "discussion" }, category_id: 22 },
# Tips & Tricks
{ jive_object: { type: 37, id: 1284 }, filters: { type: "post" }, category_id: 6 },
{ jive_object: { type: 37, id: 1319 }, filters: { type: "post" }, category_id: 6 },
{ jive_object: { type: 37, id: 1177 }, filters: { type: "post" }, category_id: 6 },
{ jive_object: { type: 37, id: 1165 }, filters: { type: "post" }, category_id: 6 },
# Ambassadors
{ jive_object: { type: 700, id: 1001 }, filters: { type: "discussion" }, authenticated: true, category_id: 8 },
# Experts
{ jive_object: { type: 700, id: 1034 }, filters: { type: "discussion" }, authenticated: true, category_id: 15 },
# Feature Requests
{ jive_object: { type: 14, id: 2015 }, filters: { type: "idea" }, category_id: 31 },
]
2017-04-12 13:16:45 -04:00
def initialize
super
@base_uri = ENV["BASE_URI"]
@username = ENV["USERNAME"]
@password = ENV["PASSWORD"]
@htmlentities = HTMLEntities.new
end
def execute
2017-05-16 04:22:44 -04:00
update_existing_users
2017-04-12 13:16:45 -04:00
import_users
2017-05-16 04:22:44 -04:00
import_contents
import_bookmarks
2017-04-12 13:16:45 -04:00
mark_topics_as_solved
end
2017-05-16 04:22:44 -04:00
def update_existing_users
puts "", "updating existing users..."
# we just need to do this once
return if User.human_users.limit(101).count > 100
User.human_users.find_each do |user|
people = get("people/email/#{user.email}?fields=initialLogin,-resources", true)
if people && people["initialLogin"].present?
created_at = DateTime.parse(people["initialLogin"])
if user.created_at > created_at
user.update_columns(created_at: created_at)
end
end
end
end
2017-04-12 13:16:45 -04:00
def import_users
puts "", "importing users..."
imported_users = 0
2017-04-24 16:03:12 -04:00
start_index = [0, UserCustomField.where(name: "import_id").count - USER_COUNT].max
2017-04-12 13:16:45 -04:00
loop do
2017-04-24 16:03:12 -04:00
users = get("people/@all?fields=initialLogin,emails,displayName,mentionName,thumbnailUrl,-resources&count=#{USER_COUNT}&startIndex=#{start_index}", true)
2017-04-12 13:16:45 -04:00
create_users(users["list"], offset: imported_users) do |user|
{
id: user["id"],
created_at: user["initialLogin"],
email: user["emails"].find { |email| email["primary"] }["value"],
username: user["mentionName"],
name: user["displayName"],
avatar_url: user["thumbnailUrl"],
}
end
2017-04-24 16:03:12 -04:00
break if users["list"].size < USER_COUNT || users["links"].blank? || users["links"]["next"].blank?
2017-04-12 13:16:45 -04:00
imported_users += users["list"].size
break unless start_index = users["links"]["next"][/startIndex=(\d+)/, 1]
end
end
2017-05-16 04:22:44 -04:00
def import_contents
puts "", "importing contents..."
TO_IMPORT.each do |to_import|
puts Time.now
entity = to_import[:jive_object]
places = get("places?fields=placeID,name,-resources&filter=entityDescriptor(#{entity[:type]},#{entity[:id]})", to_import[:authenticated])
import_place_contents(places["list"][0], to_import) if places && places["list"].present?
end
end
def import_place_contents(place, to_import)
puts "", "importing contents for '#{place["name"]}'..."
2017-04-12 13:16:45 -04:00
start_index = 0
2017-05-16 04:22:44 -04:00
filters = "filter=status(published)"
if to_import[:filters]
filters << "&filter=type(#{to_import[:filters][:type]})" if to_import[:filters][:type].present?
filters << "&filter=creationDate(null,#{to_import[:filters][:created_after].strftime("%Y-%m-%dT%TZ")})" if to_import[:filters][:created_after].present?
end
2017-04-12 13:16:45 -04:00
loop do
2017-05-16 04:22:44 -04:00
contents = get("places/#{place["placeID"]}/contents?#{filters}&sort=dateCreatedAsc&count=#{POST_COUNT}&startIndex=#{start_index}", to_import[:authenticated])
contents["list"].each do |content|
custom_fields = { import_id: content["contentID"] }
custom_fields[:import_permalink] = content["permalink"] if content["permalink"].present?
2017-04-12 13:16:45 -04:00
topic = {
2017-05-16 04:22:44 -04:00
id: content["contentID"],
created_at: content["published"],
title: @htmlentities.decode(content["subject"]),
raw: process_raw(content["content"]["text"]),
user_id: user_id_from_imported_user_id(content["author"]["id"]) || Discourse::SYSTEM_USER_ID,
views: content["viewCount"],
custom_fields: custom_fields,
2017-04-12 13:16:45 -04:00
}
2017-05-16 04:22:44 -04:00
if to_import[:category]
topic[:category] = to_import[:category].call(content)
else
topic[:category] = to_import[:category_id]
end
2017-04-12 13:16:45 -04:00
post_id = post_id_from_imported_post_id(topic[:id])
parent_post = post_id ? Post.unscoped.find_by(id: post_id) : create_post(topic, topic[:id])
if parent_post&.id && parent_post&.topic_id
2017-05-16 04:22:44 -04:00
import_likes(content["contentID"], parent_post.id) if content["likeCount"].to_i > 0
if content["replyCount"].to_i > 0
import_comments(content["contentID"], parent_post.topic_id, to_import) if content["resources"]["comments"].present?
import_messages(content["contentID"], parent_post.topic_id, to_import) if content["resources"]["messages"].present?
end
end
2017-04-12 13:16:45 -04:00
end
2017-05-16 04:22:44 -04:00
break if contents["list"].size < POST_COUNT || contents["links"].blank? || contents["links"]["next"].blank?
break unless start_index = contents["links"]["next"][/startIndex=(\d+)/, 1]
2017-04-12 13:16:45 -04:00
end
end
2017-05-16 04:22:44 -04:00
def import_likes(content_id, post_id)
start_index = 0
loop do
2017-05-16 04:22:44 -04:00
likes = get("contents/#{content_id}/likes?&count=#{USER_COUNT}&startIndex=#{start_index}", true)
likes["list"].each do |like|
next unless user_id = user_id_from_imported_user_id(like["id"])
next if PostAction.exists?(user_id: user_id, post_id: post_id, post_action_type_id: PostActionType.types[:like])
PostAction.act(User.find(user_id), Post.find(post_id), PostActionType.types[:like])
end
break if likes["list"].size < USER_COUNT || likes["links"].blank? || likes["links"]["next"].blank?
break unless start_index = likes["links"]["next"][/startIndex=(\d+)/, 1]
end
end
2017-05-16 04:22:44 -04:00
def import_comments(content_id, topic_id, to_import)
2017-04-12 13:16:45 -04:00
start_index = 0
loop do
2017-05-16 04:22:44 -04:00
comments = get("contents/#{content_id}/comments?hierarchical=false&count=#{POST_COUNT}&startIndex=#{start_index}", to_import[:authenticated])
2017-04-12 13:16:45 -04:00
comments["list"].each do |comment|
next if post_id_from_imported_post_id(comment["id"])
post = {
id: comment["id"],
created_at: comment["published"],
topic_id: topic_id,
user_id: user_id_from_imported_user_id(comment["author"]["id"]) || Discourse::SYSTEM_USER_ID,
2017-04-24 16:03:12 -04:00
raw: process_raw(comment["content"]["text"]),
2017-04-12 13:16:45 -04:00
custom_fields: { import_id: comment["id"] },
}
2017-05-16 04:22:44 -04:00
if (parent_post_id = comment["parentID"]).to_i > 0
2017-04-12 13:16:45 -04:00
if parent = topic_lookup_from_imported_post_id(parent_post_id)
post[:reply_to_post_number] = parent[:post_number]
end
end
if created_post = create_post(post, post[:id])
2017-05-16 04:22:44 -04:00
import_likes(content_id, created_post.id) if comment["likeCount"].to_i > 0
end
2017-04-12 13:16:45 -04:00
end
2017-04-24 16:03:12 -04:00
break if comments["list"].size < POST_COUNT || comments["links"].blank? || comments["links"]["next"].blank?
2017-04-12 13:16:45 -04:00
break unless start_index = comments["links"]["next"][/startIndex=(\d+)/, 1]
end
end
2017-05-16 04:22:44 -04:00
def import_messages(content_id, topic_id, to_import)
2017-04-12 13:16:45 -04:00
start_index = 0
loop do
2017-05-16 04:22:44 -04:00
messages = get("messages/contents/#{content_id}?hierarchical=false&count=#{POST_COUNT}&startIndex=#{start_index}", to_import[:authenticated])
messages["list"].each do |message|
next if post_id_from_imported_post_id(message["id"])
post = {
id: message["id"],
created_at: message["published"],
topic_id: topic_id,
user_id: user_id_from_imported_user_id(message["author"]["id"]) || Discourse::SYSTEM_USER_ID,
raw: process_raw(message["content"]["text"]),
custom_fields: { import_id: message["id"] },
2017-04-12 13:16:45 -04:00
}
2017-05-16 04:22:44 -04:00
post[:custom_fields][:is_accepted_answer] = true if message["answer"]
2017-04-12 13:16:45 -04:00
2017-05-16 04:22:44 -04:00
if (parent_post_id = message["parentID"].to_i) > 0
if parent = topic_lookup_from_imported_post_id(parent_post_id)
post[:reply_to_post_number] = parent[:post_number]
end
end
if created_post = create_post(post, post[:id])
import_likes(content_id, created_post.id) if message["likeCount"].to_i > 0
end
2017-04-12 13:16:45 -04:00
end
2017-05-16 04:22:44 -04:00
break if messages["list"].size < POST_COUNT || messages["links"].blank? || messages["links"]["next"].blank?
break unless start_index = messages["links"]["next"][/startIndex=(\d+)/, 1]
2017-04-12 13:16:45 -04:00
end
end
def create_post(options, import_id)
post = super(options, import_id)
if Post === post
add_post(import_id, post)
add_topic(post)
end
post
end
def import_bookmarks
puts "", "importing bookmarks..."
start_index = 0
fields = "fields=author.id,favoriteObject.id,-resources,-author.resources,-favoriteObject.resources"
2017-05-16 04:22:44 -04:00
filter = "&filter=creationDate(null,2016-01-01T00:00:00Z)"
loop do
favorites = get("contents?#{fields}&filter=type(favorite)#{filter}&sort=dateCreatedAsc&count=#{POST_COUNT}&startIndex=#{start_index}")
favorites["list"].each do |favorite|
next unless user_id = user_id_from_imported_user_id(favorite["author"]["id"])
next unless post_id = post_id_from_imported_post_id(favorite["favoriteObject"]["id"])
next if PostAction.exists?(user_id: user_id, post_id: post_id, post_action_type_id: PostActionType.types[:bookmark])
PostAction.act(User.find(user_id), Post.find(post_id), PostActionType.types[:bookmark])
end
break if favorites["list"].size < POST_COUNT || favorites["links"].blank? || favorites["links"]["next"].blank?
break unless start_index = favorites["links"]["next"][/startIndex=(\d+)/, 1]
end
end
2017-04-24 16:03:12 -04:00
def process_raw(raw)
doc = Nokogiri::HTML.fragment(raw)
# convert emoticon
doc.css("span.emoticon-inline").each do |span|
name = span["class"][/emoticon_(\w+)/, 1]&.downcase
name && Emoji.exists?(name) ? span.replace(":#{name}:") : span.remove
end
# convert mentions
doc.css("a.jive-link-profile-small").each { |a| a.replace("@#{a.content}") }
# fix links
doc.css("a[href]").each do |a|
if a["href"]["#{@base_uri}/docs/DOC-"]
a["href"] = a["href"][/#{Regexp.escape(@base_uri)}\/docs\/DOC-\d+/]
elsif a["href"][@base_uri]
a.replace(a.inner_html)
end
end
2017-04-24 16:03:12 -04:00
html = doc.at(".jive-rendered-content").to_html
HtmlToMarkdown.new(html, keep_img_tags: true).to_markdown
2017-04-24 16:03:12 -04:00
end
2017-04-12 13:16:45 -04:00
def mark_topics_as_solved
puts "", "Marking topics as solved..."
PostAction.exec_sql <<-SQL
INSERT INTO topic_custom_fields (name, value, topic_id, created_at, updated_at)
SELECT 'accepted_answer_post_id', pcf.post_id, p.topic_id, p.created_at, p.created_at
FROM post_custom_fields pcf
JOIN posts p ON p.id = pcf.post_id
WHERE pcf.name = 'is_accepted_answer'
SQL
end
def get(query, authenticated=false)
tries ||= 3
command = ["curl", "--silent"]
2017-05-16 04:22:44 -04:00
command << "--user \"#{@username}:#{@password}\"" if !!authenticated
2017-04-12 13:16:45 -04:00
command << "\"#{@base_uri}/api/core/v3/#{query}\""
2017-05-16 04:22:44 -04:00
puts command.join(" ") if ENV["VERBOSE"] == "1"
2017-04-12 13:16:45 -04:00
JSON.parse `#{command.join(" ")}`
rescue
2017-04-12 13:16:45 -04:00
retry if (tries -= 1) >= 0
end
end
ImportScripts::JiveApi.new.perform