diff --git a/script/import_scripts/mbox/importer.rb b/script/import_scripts/mbox/importer.rb index 4615f105247..db0269d8883 100644 --- a/script/import_scripts/mbox/importer.rb +++ b/script/import_scripts/mbox/importer.rb @@ -93,7 +93,10 @@ module ImportScripts::Mbox next if all_records_exist?(:posts, rows.map { |row| row['msg_id'] }) create_posts(rows, total: total_count, offset: offset) do |row| - if row['in_reply_to'].blank? + if row['email_date'].blank? + puts "Date is missing. Skipping #{row['msg_id']}" + nil + elsif row['in_reply_to'].blank? map_first_post(row) else map_reply(row) @@ -163,8 +166,8 @@ module ImportScripts::Mbox ) end - def to_time(datetime) - Time.zone.at(DateTime.iso8601(datetime)) if datetime + def to_time(timestamp) + Time.zone.at(timestamp) if timestamp end end end diff --git a/script/import_scripts/mbox/settings.yml b/script/import_scripts/mbox/settings.yml index a4b449a3d6f..cac3996a90a 100644 --- a/script/import_scripts/mbox/settings.yml +++ b/script/import_scripts/mbox/settings.yml @@ -1,7 +1,7 @@ data_dir: /shared/import/data # mbox files -split_regex: "^From .+" +split_regex: "^From .+@.+" #split_regex: "^From .+@example.com.+" # individual emails @@ -11,7 +11,7 @@ split_regex: "^From .+" #split_regex: "^=========================================================================" default_trust_level: 1 -prefer_html: false +prefer_html: true staged: true index_only: false diff --git a/script/import_scripts/mbox/support/database.rb b/script/import_scripts/mbox/support/database.rb index 6121e23a5e8..a7415a0327c 100644 --- a/script/import_scripts/mbox/support/database.rb +++ b/script/import_scripts/mbox/support/database.rb @@ -97,19 +97,20 @@ module ImportScripts::Mbox @db.execute <<-SQL WITH RECURSIVE - messages(msg_id, level, email_date) AS ( - SELECT msg_id, 0 AS level, email_date + messages(msg_id, level, email_date, in_reply_to) AS ( + SELECT msg_id, 0 AS level, email_date, in_reply_to FROM email WHERE in_reply_to IS NULL UNION ALL - SELECT e.msg_id, m.level + 1, e.email_date + SELECT e.msg_id, m.level + 1, e.email_date, e.in_reply_to FROM email e JOIN messages m ON e.in_reply_to = m.msg_id - ORDER BY level, email_date, msg_id ) INSERT INTO email_order (msg_id) - SELECT msg_id - FROM messages + SELECT c.msg_id + FROM messages c + LEFT OUTER JOIN messages p ON (c.in_reply_to = p.msg_id) + ORDER BY MAX(c.email_date, p.email_date), c.level, c.email_date, c.msg_id SQL end @@ -175,7 +176,6 @@ module ImportScripts::Mbox @db.get_first_value <<-SQL SELECT COUNT(*) FROM email - WHERE email_date IS NOT NULL SQL end @@ -185,8 +185,7 @@ module ImportScripts::Mbox raw_message, body, elided, format, attachment_count, category FROM email e JOIN email_order o USING (msg_id) - WHERE email_date IS NOT NULL AND - o.ROWID > :last_row_id + WHERE o.ROWID > :last_row_id ORDER BY o.ROWID LIMIT #{@batch_size} SQL diff --git a/script/import_scripts/mbox/support/indexer.rb b/script/import_scripts/mbox/support/indexer.rb index dfaf74f68f7..1a2260525a6 100644 --- a/script/import_scripts/mbox/support/indexer.rb +++ b/script/import_scripts/mbox/support/indexer.rb @@ -74,7 +74,7 @@ module ImportScripts::Mbox from_email: from_email, from_name: from_display_name, subject: extract_subject(receiver, category_name), - email_date: parsed_email.date&.to_s, + email_date: timestamp(parsed_email.date), raw_message: receiver.raw_email, body: body, elided: elided, @@ -256,5 +256,9 @@ module ImportScripts::Mbox def monotonic_time Process.clock_gettime(Process::CLOCK_MONOTONIC) end + + def timestamp(datetime) + Time.zone.at(datetime).to_i if datetime + end end end