improvements to the mbox import script
* ignores dot-files and empty emails * new setting to prefer HTML over plaintext emails during import * restore original site settings at the end of import * elided content of HTML mails was not put inside details block
This commit is contained in:
parent
6dda87c49b
commit
32dd1e66be
|
@ -1 +1,2 @@
|
||||||
tmp/*
|
tmp/*
|
||||||
|
settings.local.yml
|
||||||
|
|
|
@ -13,10 +13,11 @@ module ImportScripts::Mbox
|
||||||
@database = Database.new(@settings.data_dir, @settings.batch_size)
|
@database = Database.new(@settings.data_dir, @settings.batch_size)
|
||||||
end
|
end
|
||||||
|
|
||||||
def change_site_settings
|
def get_site_settings_for_import
|
||||||
super
|
settings = super
|
||||||
|
settings[:enable_staged_users] = true
|
||||||
SiteSetting.enable_staged_users = true
|
settings[:incoming_email_prefer_html] = @settings.prefer_html
|
||||||
|
settings
|
||||||
end
|
end
|
||||||
|
|
||||||
protected
|
protected
|
||||||
|
@ -120,7 +121,7 @@ module ImportScripts::Mbox
|
||||||
when Email::Receiver::formats[:markdown]
|
when Email::Receiver::formats[:markdown]
|
||||||
body = email_body
|
body = email_body
|
||||||
body << attachment_html if attachment_html.present?
|
body << attachment_html if attachment_html.present?
|
||||||
body << elided if elided.present?
|
body << Email::Receiver.elided_html(elided) if elided.present?
|
||||||
when Email::Receiver::formats[:plaintext]
|
when Email::Receiver::formats[:plaintext]
|
||||||
body = %|[plaintext]\n#{escape_tags(email_body)}\n[/plaintext]|
|
body = %|[plaintext]\n#{escape_tags(email_body)}\n[/plaintext]|
|
||||||
body << %|\n[attachments]\n#{escape_tags(attachment_html)}\n[/attachments]| if attachment_html.present?
|
body << %|\n[attachments]\n#{escape_tags(attachment_html)}\n[/attachments]| if attachment_html.present?
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
# PostgreSQL mailing lists
|
# PostgreSQL mailing lists
|
||||||
#data_dir: /data/import/postgres
|
#data_dir: /shared/import/data
|
||||||
#split_regex: "^From .*@postgresql.org.*"
|
#split_regex: "^From .*@postgresql.org.*"
|
||||||
|
|
||||||
# ruby-talk mailing list
|
# ruby-talk mailing list
|
||||||
data_dir: /data/import/ruby-talk/news/gmane/comp/lang/ruby
|
data_dir: /shared/import/data
|
||||||
split_regex: ""
|
split_regex: ""
|
||||||
|
|
||||||
default_trust_level: 1
|
default_trust_level: 1
|
||||||
|
prefer_html: false
|
||||||
|
|
|
@ -102,10 +102,12 @@ module ImportScripts::Mbox
|
||||||
|
|
||||||
if @split_regex.present?
|
if @split_regex.present?
|
||||||
each_mail(filename) do |raw_message, first_line_number, last_line_number|
|
each_mail(filename) do |raw_message, first_line_number, last_line_number|
|
||||||
yield read_mail_from_string(raw_message), filename, first_line_number, last_line_number
|
receiver = read_mail_from_string(raw_message)
|
||||||
|
yield receiver, filename, first_line_number, last_line_number if receiver.present?
|
||||||
end
|
end
|
||||||
else
|
else
|
||||||
yield read_mail_from_file(filename), filename
|
receiver = read_mail_from_file(filename)
|
||||||
|
yield receiver, filename if receiver.present?
|
||||||
end
|
end
|
||||||
|
|
||||||
mark_as_fully_indexed(category_name, filename)
|
mark_as_fully_indexed(category_name, filename)
|
||||||
|
@ -161,7 +163,7 @@ module ImportScripts::Mbox
|
||||||
end
|
end
|
||||||
|
|
||||||
def read_mail_from_string(raw_message)
|
def read_mail_from_string(raw_message)
|
||||||
Email::Receiver.new(raw_message)
|
Email::Receiver.new(raw_message) unless raw_message.blank?
|
||||||
end
|
end
|
||||||
|
|
||||||
def extract_reply_message_ids(mail)
|
def extract_reply_message_ids(mail)
|
||||||
|
@ -208,7 +210,12 @@ module ImportScripts::Mbox
|
||||||
end
|
end
|
||||||
|
|
||||||
def ignored_file?(filename, checksums)
|
def ignored_file?(filename, checksums)
|
||||||
File.directory?(filename) || metadata_file?(filename) || fully_indexed?(filename, checksums)
|
File.directory?(filename) || hidden_file?(filename) ||
|
||||||
|
metadata_file?(filename) || fully_indexed?(filename, checksums)
|
||||||
|
end
|
||||||
|
|
||||||
|
def hidden_file?(filename)
|
||||||
|
File.basename(filename).start_with?('.')
|
||||||
end
|
end
|
||||||
|
|
||||||
def metadata_file?(filename)
|
def metadata_file?(filename)
|
||||||
|
|
|
@ -11,12 +11,14 @@ module ImportScripts::Mbox
|
||||||
attr_reader :split_regex
|
attr_reader :split_regex
|
||||||
attr_reader :batch_size
|
attr_reader :batch_size
|
||||||
attr_reader :trust_level
|
attr_reader :trust_level
|
||||||
|
attr_reader :prefer_html
|
||||||
|
|
||||||
def initialize(yaml)
|
def initialize(yaml)
|
||||||
@data_dir = yaml['data_dir']
|
@data_dir = yaml['data_dir']
|
||||||
@split_regex = Regexp.new(yaml['split_regex']) unless yaml['split_regex'].empty?
|
@split_regex = Regexp.new(yaml['split_regex']) unless yaml['split_regex'].empty?
|
||||||
@batch_size = 1000 # no need to make this actually configurable at the moment
|
@batch_size = 1000 # no need to make this actually configurable at the moment
|
||||||
@trust_level = yaml['default_trust_level']
|
@trust_level = yaml['default_trust_level']
|
||||||
|
@prefer_html = yaml['prefer_html']
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue