From 47c1ea337ef9820f0f51a34b845dc74a9e961995 Mon Sep 17 00:00:00 2001 From: Sam Date: Thu, 12 Dec 2024 10:26:05 +1100 Subject: [PATCH] FIX: allow scanning of trashed posts and deleted users for test (#1024) When a post is trashed we should still be allowed to scan it post.topic will be nil for a trashed topic even if post is trashed --- lib/ai_moderation/spam_scanner.rb | 39 +++++++++++-------- .../requests/admin/ai_spam_controller_spec.rb | 6 ++- 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/lib/ai_moderation/spam_scanner.rb b/lib/ai_moderation/spam_scanner.rb index 72b23886..3143357e 100644 --- a/lib/ai_moderation/spam_scanner.rb +++ b/lib/ai_moderation/spam_scanner.rb @@ -124,7 +124,7 @@ module DiscourseAi llm_model = llm_id ? LlmModel.find(llm_id) : settings.llm_model llm = llm_model.to_llm custom_instructions = custom_instructions || settings.custom_instructions.presence - context = build_context(post) + context = build_context(post, post.topic || Topic.with_deleted.find_by(id: post.topic_id)) prompt = completion_prompt(post, context: context, custom_instructions: custom_instructions) result = @@ -247,34 +247,39 @@ module DiscourseAi (result.present? && result.strip.downcase.start_with?("spam")) end - def self.build_context(post) + def self.build_context(post, topic = nil) + topic ||= post.topic context = [] # Clear distinction between reply and new topic if post.is_first_post? context << "NEW TOPIC POST ANALYSIS" - context << "- Topic title: #{post.topic.title}" - context << "- Category: #{post.topic.category&.name}" + context << "- Topic title: #{topic.title}" + context << "- Category: #{topic.category&.name}" else context << "REPLY POST ANALYSIS" - context << "- In topic: #{post.topic.title}" - context << "- Category: #{post.topic.category&.name}" - context << "- Topic started by: #{post.topic.user.username}" + context << "- In topic: #{topic.title}" + context << "- Category: #{topic.category&.name}" + context << "- Topic started by: #{topic.user&.username}" - # Include parent post context for replies - if post.reply_to_post.present? - parent = post.reply_to_post - context << "\nReplying to #{parent.user.username}'s post:" - context << "#{parent.raw[0..500]}..." if parent.raw.length > 500 - context << parent.raw if parent.raw.length <= 500 + if post.reply_to_post_number.present? + parent = + Post.with_deleted.find_by(topic_id: topic.id, post_number: post.reply_to_post_number) + if parent + context << "\nReplying to #{parent.user&.username}'s post:" + context << "#{parent.raw[0..500]}..." if parent.raw.length > 500 + context << parent.raw if parent.raw.length <= 500 + end end end context << "\nPost Author Information:" - context << "- Username: #{post.user.username}" - context << "- Account age: #{(Time.current - post.user.created_at).to_i / 86_400} days" - context << "- Total posts: #{post.user.post_count}" - context << "- Trust level: #{post.user.trust_level}" + if post.user # during test we may not have a user + context << "- Username: #{post.user.username}" + context << "- Account age: #{(Time.current - post.user.created_at).to_i / 86_400} days" + context << "- Total posts: #{post.user.post_count}" + context << "- Trust level: #{post.user.trust_level}" + end context << "\nPost Content (first #{MAX_RAW_SCAN_LENGTH} chars):\n" context << post.raw[0..MAX_RAW_SCAN_LENGTH] diff --git a/spec/requests/admin/ai_spam_controller_spec.rb b/spec/requests/admin/ai_spam_controller_spec.rb index 5f82d306..e0fb0bdf 100644 --- a/spec/requests/admin/ai_spam_controller_spec.rb +++ b/spec/requests/admin/ai_spam_controller_spec.rb @@ -120,7 +120,11 @@ RSpec.describe DiscourseAi::Admin::AiSpamController do before { sign_in(admin) } - it "can scan using post url" do + it "can scan using post url (even when trashed and user deleted)" do + User.where(id: spam_post2.user_id).delete_all + spam_post2.topic.trash! + spam_post2.trash! + llm2 = Fabricate(:llm_model, name: "DiffLLM") DiscourseAi::Completions::Llm.with_prepared_responses(["spam", "just because"]) do