diff --git a/db/migrate/20210107005832_add_imap_missing_column_to_incoming_email.rb b/db/migrate/20210107005832_add_imap_missing_column_to_incoming_email.rb new file mode 100644 index 00000000000..b338dcb390f --- /dev/null +++ b/db/migrate/20210107005832_add_imap_missing_column_to_incoming_email.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: true + +class AddImapMissingColumnToIncomingEmail < ActiveRecord::Migration[6.0] + def change + add_column :incoming_emails, :imap_missing, :boolean, default: false, null: false + end +end diff --git a/lib/imap/providers/generic.rb b/lib/imap/providers/generic.rb index 4b82e405ac5..e331c32f6ce 100644 --- a/lib/imap/providers/generic.rb +++ b/lib/imap/providers/generic.rb @@ -10,6 +10,10 @@ module Imap attr_accessor :trashed_emails, :trash_uid_validity end + class SpamMailResponse + attr_accessor :spam_emails, :spam_uid_validity + end + class BasicMail attr_accessor :uid, :message_id @@ -172,14 +176,19 @@ module Imap end # Look for the special Trash XLIST attribute. - # TODO: It might be more efficient to just store this against the group. - # Another table is looking more and more attractive.... def trash_mailbox Discourse.cache.fetch("imap_trash_mailbox_#{account_digest}", expires_in: 30.minutes) do list_mailboxes(:Trash).first end end + # Look for the special Junk XLIST attribute. + def spam_mailbox + Discourse.cache.fetch("imap_spam_mailbox_#{account_digest}", expires_in: 30.minutes) do + list_mailboxes(:Junk).first + end + end + # open the trash mailbox for inspection or writing. after the yield we # close the trash and reopen the original mailbox to continue operations. # the normal open_mailbox call can be made if more extensive trash ops @@ -196,19 +205,26 @@ module Imap trash_uid_validity end + # open the spam mailbox for inspection or writing. after the yield we + # close the spam and reopen the original mailbox to continue operations. + # the normal open_mailbox call can be made if more extensive spam ops + # need to be done. + def open_spam_mailbox(write: false) + open_mailbox_before_spam = @open_mailbox_name + open_mailbox_before_spam_write = @open_mailbox_write + + spam_uid_validity = open_mailbox(spam_mailbox, write: write)[:uid_validity] + + yield(spam_uid_validity) if block_given? + + open_mailbox(open_mailbox_before_spam, write: open_mailbox_before_spam_write) + spam_uid_validity + end + def find_trashed_by_message_ids(message_ids) trashed_emails = [] trash_uid_validity = open_trash_mailbox do - header_message_id_terms = message_ids.map do |msgid| - "HEADER Message-ID '#{Email.message_id_rfc_format(msgid)}'" - end - - # OR clauses are written in Polish notation...so the query looks like this: - # OR OR HEADER Message-ID XXXX HEADER Message-ID XXXX HEADER Message-ID XXXX - or_clauses = 'OR ' * (header_message_id_terms.length - 1) - query = "#{or_clauses}#{header_message_id_terms.join(" ")}" - - trashed_email_uids = imap.uid_search(query) + trashed_email_uids = find_uids_by_message_ids(message_ids) if trashed_email_uids.any? trashed_emails = emails(trashed_email_uids, ["UID", "ENVELOPE"]).map do |e| BasicMail.new(message_id: Email.message_id_clean(e['ENVELOPE'].message_id), uid: e['UID']) @@ -222,6 +238,36 @@ module Imap end end + def find_spam_by_message_ids(message_ids) + spam_emails = [] + spam_uid_validity = open_spam_mailbox do + spam_email_uids = find_uids_by_message_ids(message_ids) + if spam_email_uids.any? + spam_emails = emails(spam_email_uids, ["UID", "ENVELOPE"]).map do |e| + BasicMail.new(message_id: Email.message_id_clean(e['ENVELOPE'].message_id), uid: e['UID']) + end + end + end + + SpamMailResponse.new.tap do |resp| + resp.spam_emails = spam_emails + resp.spam_uid_validity = spam_uid_validity + end + end + + def find_uids_by_message_ids(message_ids) + header_message_id_terms = message_ids.map do |msgid| + "HEADER Message-ID '#{Email.message_id_rfc_format(msgid)}'" + end + + # OR clauses are written in Polish notation...so the query looks like this: + # OR OR HEADER Message-ID XXXX HEADER Message-ID XXXX HEADER Message-ID XXXX + or_clauses = 'OR ' * (header_message_id_terms.length - 1) + query = "#{or_clauses}#{header_message_id_terms.join(" ")}" + + imap.uid_search(query) + end + def trash(uid) # MOVE is way easier than doing the COPY \Deleted EXPUNGE dance ourselves. # It is supported by Gmail and Outlook. diff --git a/lib/imap/sync.rb b/lib/imap/sync.rb index 99ae2b995c0..13bc364a298 100644 --- a/lib/imap/sync.rb +++ b/lib/imap/sync.rb @@ -180,15 +180,15 @@ module Imap # This can be done because Message-ID is unique on a mail server between mailboxes, # where the UID will have changed when moving into the Trash mailbox. We need to get # the new UID from the trash. + potential_spam = [] response = @provider.find_trashed_by_message_ids(missing_message_ids) existing_incoming.each do |incoming| matching_trashed = response.trashed_emails.find { |email| email.message_id == incoming.message_id } - # if the email is not in the trash then we don't know where it is... could - # be in any mailbox on the server or could be permanently deleted. TODO - # here would be some sort of more complex detection of "where in the world - # has this UID gone?" - next if !matching_trashed + if !matching_trashed + potential_spam << incoming + next + end # if we deleted the topic/post ourselves in discourse then the post will # not exist, and this sync is just updating the old UIDs to the new ones @@ -202,6 +202,34 @@ module Imap ImapSyncLog.debug("Updating incoming ID #{incoming.id} uid data FROM [UID #{incoming.imap_uid} | UIDVALIDITY #{incoming.imap_uid_validity}] TO [UID #{matching_trashed.uid} | UIDVALIDITY #{response.trash_uid_validity}] (TRASHED)", @group) incoming.update(imap_uid_validity: response.trash_uid_validity, imap_uid: matching_trashed.uid) end + + # This can be done because Message-ID is unique on a mail server between mailboxes, + # where the UID will have changed when moving into the Trash mailbox. We need to get + # the new UID from the spam. + response = @provider.find_spam_by_message_ids(missing_message_ids) + potential_spam.each do |incoming| + matching_spam = response.spam_emails.find { |email| email.message_id == incoming.message_id } + + # if the email is not in the trash or spam then we don't know where it is... could + # be in any mailbox on the server or could be permanently deleted. + if !matching_spam + ImapSyncLog.debug("Email for incoming ID #{incoming.id} (#{incoming.message_id}) could not be found in the group mailbox, trash, or spam. It could be in another mailbox or permanently deleted.", @group) + incoming.update(imap_missing: true) + next + end + + # if we deleted the topic/post ourselves in discourse then the post will + # not exist, and this sync is just updating the old UIDs to the new ones + # in the spam, and we don't need to re-destroy the post + if incoming.post + ImapSyncLog.debug("Deleting post ID #{incoming.post_id}, topic id #{incoming.topic_id}; email has been moved to spam on the IMAP server.", @group) + PostDestroyer.new(Discourse.system_user, incoming.post).destroy + end + + # the email has moved mailboxes, we don't want to try marking as spam again next time + ImapSyncLog.debug("Updating incoming ID #{incoming.id} uid data FROM [UID #{incoming.imap_uid} | UIDVALIDITY #{incoming.imap_uid_validity}] TO [UID #{matching_spam.uid} | UIDVALIDITY #{response.spam_uid_validity}] (SPAM)", @group) + incoming.update(imap_uid_validity: response.spam_uid_validity, imap_uid: matching_spam.uid) + end end def process_new_uids(new_uids, import_mode, all_old_uids_size, all_new_uids_size) diff --git a/spec/components/imap/sync_spec.rb b/spec/components/imap/sync_spec.rb index f78cbd2eacd..f6ebf44dbe6 100644 --- a/spec/components/imap/sync_spec.rb +++ b/spec/components/imap/sync_spec.rb @@ -321,6 +321,7 @@ describe Imap::Sync do provider.stubs(:uids).with(to: 100).returns([]) provider.stubs(:uids).with(from: 101).returns([]) + provider.stubs(:find_spam_by_message_ids).returns(stub(spam_emails: [])) provider.stubs(:find_trashed_by_message_ids).returns( stub( trashed_emails: [ @@ -341,6 +342,49 @@ describe Imap::Sync do expect(Topic.with_deleted.find(incoming_100.topic_id).deleted_at).not_to eq(nil) end + it "detects previously synced UIDs are missing and deletes the posts if they are in the spam/junk mailbox" do + sync_handler.process + incoming_100 = IncomingEmail.find_by(imap_uid: 100) + provider.stubs(:uids).with.returns([]) + + provider.stubs(:uids).with(to: 100).returns([]) + provider.stubs(:uids).with(from: 101).returns([]) + provider.stubs(:find_trashed_by_message_ids).returns(stub(trashed_emails: [])) + provider.stubs(:find_spam_by_message_ids).returns( + stub( + spam_emails: [ + stub( + uid: 10, + message_id: incoming_100.message_id + ) + ], + spam_uid_validity: 99 + ) + ) + sync_handler.process + + incoming_100.reload + expect(incoming_100.imap_uid_validity).to eq(99) + expect(incoming_100.imap_uid).to eq(10) + expect(Post.with_deleted.find(incoming_100.post_id).deleted_at).not_to eq(nil) + expect(Topic.with_deleted.find(incoming_100.topic_id).deleted_at).not_to eq(nil) + end + + it "marks the incoming email as IMAP missing if it cannot be found in spam or trash" do + sync_handler.process + incoming_100 = IncomingEmail.find_by(imap_uid: 100) + provider.stubs(:uids).with.returns([]) + + provider.stubs(:uids).with(to: 100).returns([]) + provider.stubs(:uids).with(from: 101).returns([]) + provider.stubs(:find_trashed_by_message_ids).returns(stub(trashed_emails: [])) + provider.stubs(:find_spam_by_message_ids).returns(stub(spam_emails: [])) + sync_handler.process + + incoming_100.reload + expect(incoming_100.imap_missing).to eq(true) + end + it "detects the topic being deleted on the discourse site and deletes on the IMAP server and does not attempt to delete again on discourse site when deleted already by us on the IMAP server" do SiteSetting.enable_imap_write = true @@ -385,6 +429,7 @@ describe Imap::Sync do provider.stubs(:uids).with(to: 100).returns([]) provider.stubs(:uids).with(from: 101).returns([]) + provider.stubs(:find_spam_by_message_ids).returns(stub(spam_emails: [])) provider.stubs(:find_trashed_by_message_ids).returns( stub( trashed_emails: [