update JIVE API importer
This commit is contained in:
parent
d5630d6160
commit
29ddb3a611
|
@ -1,9 +1,12 @@
|
|||
require "nokogiri"
|
||||
require "htmlentities"
|
||||
require_relative "./../../lib/html_to_markdown.rb"
|
||||
require File.expand_path(File.dirname(__FILE__) + "/base.rb")
|
||||
|
||||
class ImportScripts::JiveApi < ImportScripts::Base
|
||||
|
||||
COUNT ||= 100
|
||||
USER_COUNT ||= 1000
|
||||
POST_COUNT ||= 100
|
||||
STAFF_GUARDIAN ||= Guardian.new(Discourse.system_user)
|
||||
|
||||
def initialize
|
||||
|
@ -26,10 +29,10 @@ class ImportScripts::JiveApi < ImportScripts::Base
|
|||
puts "", "importing users..."
|
||||
|
||||
imported_users = 0
|
||||
start_index = [0, Math.floor(PostCustomField.where(name: "import_id").count / COUNT.to_f) - COUNT].max
|
||||
start_index = [0, UserCustomField.where(name: "import_id").count - USER_COUNT].max
|
||||
|
||||
loop do
|
||||
users = get("people/@all?fields=initialLogin,emails,displayName,mentionName,thumbnailUrl,-resources&count=#{COUNT}&startIndex=#{start_index}", true)
|
||||
users = get("people/@all?fields=initialLogin,emails,displayName,mentionName,thumbnailUrl,-resources&count=#{USER_COUNT}&startIndex=#{start_index}", true)
|
||||
create_users(users["list"], offset: imported_users) do |user|
|
||||
{
|
||||
id: user["id"],
|
||||
|
@ -41,7 +44,7 @@ class ImportScripts::JiveApi < ImportScripts::Base
|
|||
}
|
||||
end
|
||||
|
||||
break if users["list"].size < COUNT || users["links"].blank? || users["links"]["next"].blank?
|
||||
break if users["list"].size < USER_COUNT || users["links"].blank? || users["links"]["next"].blank?
|
||||
imported_users += users["list"].size
|
||||
break unless start_index = users["links"]["next"][/startIndex=(\d+)/, 1]
|
||||
end
|
||||
|
@ -51,25 +54,23 @@ class ImportScripts::JiveApi < ImportScripts::Base
|
|||
puts "", "importing discussions & questions..."
|
||||
|
||||
start_index = 0
|
||||
fields = "fields=published,tags,contentID,author.id,content.text,subject,viewCount,question,-resources,-author.resources"
|
||||
fields = "fields=published,contentID,author.id,content.text,subject,viewCount,question,-resources,-author.resources"
|
||||
filter = "&filter=creationDate(null,2017-01-01T00:00:00Z)"
|
||||
|
||||
loop do
|
||||
discussions = get("contents?#{fields}&filter=status(published)&filter=type(discussion)#{filter}&sort=dateCreatedAsc&count=#{COUNT}&startIndex=#{start_index}")
|
||||
discussions = get("contents?#{fields}&filter=status(published)&filter=type(discussion)#{filter}&sort=dateCreatedAsc&count=#{POST_COUNT}&startIndex=#{start_index}")
|
||||
discussions["list"].each do |discussion|
|
||||
topic = {
|
||||
id: discussion["contentID"],
|
||||
created_at: discussion["published"],
|
||||
title: @htmlentities.decode(discussion["subject"]),
|
||||
raw: discussion["content"]["text"],
|
||||
raw: process_raw(discussion["content"]["text"]),
|
||||
user_id: user_id_from_imported_user_id(discussion["author"]["id"]) || Discourse::SYSTEM_USER_ID,
|
||||
# category: discussion["question"] ? 26 : 21,
|
||||
# category: discussion["question"] ? 5 : 21,
|
||||
views: discussion["viewCount"],
|
||||
cook_method: Post.cook_methods[:raw_html],
|
||||
custom_fields: { import_id: discussion["contentID"] },
|
||||
post_create_action: proc do |post|
|
||||
tags = discussion["tags"].compact.map(&:strip).select(&:present?)
|
||||
DiscourseTagging.tag_topic_by_names(post.topic, STAFF_GUARDIAN, tags) unless tags.empty?
|
||||
DiscourseTagging.tag_topic_by_names(post.topic, STAFF_GUARDIAN, ["legacy"])
|
||||
end
|
||||
}
|
||||
|
||||
|
@ -79,7 +80,7 @@ class ImportScripts::JiveApi < ImportScripts::Base
|
|||
import_comments(discussion["contentID"], parent_post.topic_id) if parent_post
|
||||
end
|
||||
|
||||
break if discussions["list"].size < COUNT || discussions["links"].blank? || discussions["links"]["next"].blank?
|
||||
break if discussions["list"].size < POST_COUNT || discussions["links"].blank? || discussions["links"]["next"].blank?
|
||||
break unless start_index = discussions["links"]["next"][/startIndex=(\d+)/, 1]
|
||||
end
|
||||
end
|
||||
|
@ -89,7 +90,7 @@ class ImportScripts::JiveApi < ImportScripts::Base
|
|||
fields = "fields=published,author.id,content.text,parent,answer,-resources,-author.resources"
|
||||
|
||||
loop do
|
||||
comments = get("messages/contents/#{discussion_id}?#{fields}&count=#{COUNT}&startIndex=#{start_index}")
|
||||
comments = get("messages/contents/#{discussion_id}?#{fields}&count=#{POST_COUNT}&startIndex=#{start_index}")
|
||||
comments["list"].each do |comment|
|
||||
next if post_id_from_imported_post_id(comment["id"])
|
||||
|
||||
|
@ -98,8 +99,7 @@ class ImportScripts::JiveApi < ImportScripts::Base
|
|||
created_at: comment["published"],
|
||||
topic_id: topic_id,
|
||||
user_id: user_id_from_imported_user_id(comment["author"]["id"]) || Discourse::SYSTEM_USER_ID,
|
||||
raw: comment["content"]["text"],
|
||||
cook_method: Post.cook_methods[:raw_html],
|
||||
raw: process_raw(comment["content"]["text"]),
|
||||
custom_fields: { import_id: comment["id"] },
|
||||
}
|
||||
post[:custom_fields][:is_accepted_answer] = true if comment["answer"]
|
||||
|
@ -113,7 +113,7 @@ class ImportScripts::JiveApi < ImportScripts::Base
|
|||
create_post(post, post[:id])
|
||||
end
|
||||
|
||||
break if comments["list"].size < COUNT || comments["links"].blank? || comments["links"]["next"].blank?
|
||||
break if comments["list"].size < POST_COUNT || comments["links"].blank? || comments["links"]["next"].blank?
|
||||
break unless start_index = comments["links"]["next"][/startIndex=(\d+)/, 1]
|
||||
end
|
||||
end
|
||||
|
@ -122,37 +122,61 @@ class ImportScripts::JiveApi < ImportScripts::Base
|
|||
puts "", "importing blog posts..."
|
||||
|
||||
start_index = 0
|
||||
fields = "fields=published,tags,contentID,author.id,content.text,subject,viewCount,permalink,-resources,-author.resources"
|
||||
fields = "fields=published,contentID,author.id,content.text,subject,viewCount,permalink,-resources,-author.resources"
|
||||
filter = "&filter=creationDate(null,2016-05-01T00:00:00Z)"
|
||||
|
||||
loop do
|
||||
posts = get("contents?#{fields}&filter=status(published)&filter=type(post)#{filter}&sort=dateCreatedAsc&count=#{COUNT}&startIndex=#{start_index}")
|
||||
posts = get("contents?#{fields}&filter=status(published)&filter=type(post)#{filter}&sort=dateCreatedAsc&count=#{POST_COUNT}&startIndex=#{start_index}")
|
||||
posts["list"].each do |post|
|
||||
next if post_id_from_imported_post_id(post["contentID"])
|
||||
pp = {
|
||||
id: post["contentID"],
|
||||
created_at: post["published"],
|
||||
title: @htmlentities.decode(post["subject"]),
|
||||
raw: post["content"]["text"],
|
||||
raw: process_raw(post["content"]["text"]),
|
||||
user_id: user_id_from_imported_user_id(post["author"]["id"]) || Discourse::SYSTEM_USER_ID,
|
||||
# category: 7,
|
||||
category: 7,
|
||||
views: post["viewCount"],
|
||||
cook_method: Post.cook_methods[:raw_html],
|
||||
custom_fields: { import_id: post["contentID"], import_permalink: post["permalink"] },
|
||||
post_create_action: proc do |p|
|
||||
tags = post["tags"].compact.map(&:strip).select(&:present?)
|
||||
DiscourseTagging.tag_topic_by_names(p.topic, STAFF_GUARDIAN, tags) unless tags.empty?
|
||||
DiscourseTagging.tag_topic_by_names(p.topic, STAFF_GUARDIAN, ["legacy"])
|
||||
end
|
||||
}
|
||||
|
||||
create_post(pp, pp[:id])
|
||||
end
|
||||
|
||||
break if posts["list"].size < COUNT || posts["links"].blank? || posts["links"]["next"].blank?
|
||||
break if posts["list"].size < POST_COUNT || posts["links"].blank? || posts["links"]["next"].blank?
|
||||
break unless start_index = posts["links"]["next"][/startIndex=(\d+)/, 1]
|
||||
end
|
||||
end
|
||||
|
||||
def process_raw(raw)
|
||||
doc = Nokogiri::HTML.fragment(raw)
|
||||
|
||||
# convert emoticon
|
||||
doc.css("span.emoticon-inline").each do |span|
|
||||
name = span["class"][/emoticon_(\w+)/, 1]&.downcase
|
||||
name && Emoji.exists?(name) ? span.replace(":#{name}:") : span.remove
|
||||
end
|
||||
|
||||
# convert mentions
|
||||
doc.css("a.jive-link-profile-small").each { |a| a.replace("@#{a.content}") }
|
||||
|
||||
# fix links
|
||||
# doc.css("a[href]").each do |a|
|
||||
# if a["href"]["#{@base_uri}/docs/DOC-"]
|
||||
# a["href"] = a["href"][/#{Regexp.escape(@base_uri)}\/docs\/DOC-\d+/]
|
||||
# elsif a["href"][@base_uri]
|
||||
# a.replace(a.inner_html)
|
||||
# end
|
||||
# end
|
||||
|
||||
html = doc.at(".jive-rendered-content").to_html
|
||||
|
||||
HtmlToMarkdown.new(html).to_markdown
|
||||
end
|
||||
|
||||
def mark_topics_as_solved
|
||||
puts "", "Marking topics as solved..."
|
||||
|
||||
|
|
Loading…
Reference in New Issue