From 2c609e165bdcf755fa631aa3d3caa57f5fa73891 Mon Sep 17 00:00:00 2001 From: Sam Date: Tue, 21 Jan 2025 17:51:21 +1100 Subject: [PATCH] FEATURE: Add user location info to spam scanner context (#1076) This adds registration and last known IP information and email to scanning context. This provides another hint for spam scanner about possible malicious users. For example registered in India, replying from Australia or email is clearly a throwaway email address. --- lib/ai_moderation/spam_scanner.rb | 42 ++++++++++++++++--- .../ai_moderation/spam_scanner_spec.rb | 26 ++++++++++++ 2 files changed, 63 insertions(+), 5 deletions(-) diff --git a/lib/ai_moderation/spam_scanner.rb b/lib/ai_moderation/spam_scanner.rb index 4f0c49b0..e775246c 100644 --- a/lib/ai_moderation/spam_scanner.rb +++ b/lib/ai_moderation/spam_scanner.rb @@ -288,11 +288,16 @@ module DiscourseAi end context << "\nPost Author Information:" - if post.user # during test we may not have a user - context << "- Username: #{post.user.username}" - context << "- Account age: #{(Time.current - post.user.created_at).to_i / 86_400} days" - context << "- Total posts: #{post.user.post_count}" - context << "- Trust level: #{post.user.trust_level}" + if user = post.user # during test we may not have a user + context << "- Username: #{user.username}\n" + context << "- Email: #{user.email}\n" + context << "- Account age: #{(Time.current - user.created_at).to_i / 86_400} days\n" + context << "- Total posts: #{user.post_count}\n" + context << "- Trust level: #{user.trust_level}\n" + if info = location_info(user) + context << "- Registration Location: #{info[:registration]}\n" if info[:registration] + context << "- Last Location: #{info[:last]}\n" if info[:last] + end end context << "\nPost Content (first #{MAX_RAW_SCAN_LENGTH} chars):\n" @@ -300,6 +305,33 @@ module DiscourseAi context.join("\n") end + def self.location_info(user) + registration, last = nil + if user.ip_address.present? + info = DiscourseIpInfo.get(user.ip_address, resolve_hostname: true) + last = "#{info[:location]} (#{info[:organization]})" if info && info[:location].present? + end + if user.registration_ip_address.present? + info = DiscourseIpInfo.get(user.registration_ip_address, resolve_hostname: true) + registration = "#{info[:location]} (#{info[:organization]})" if info && + info[:location].present? + end + + rval = nil + if registration || last + rval = { registration: registration } if registration + if last && last != registration + rval ||= {} + rval[:last] = last + end + end + + rval + rescue => e + Discourse.warn_exception(e, message: "Failed to lookup location info") + nil + end + def self.build_system_prompt(custom_instructions) base_prompt = +<<~PROMPT You are a spam detection system. Analyze the following post content and context. diff --git a/spec/lib/modules/ai_moderation/spam_scanner_spec.rb b/spec/lib/modules/ai_moderation/spam_scanner_spec.rb index ea9c1b97..e3d3d97f 100644 --- a/spec/lib/modules/ai_moderation/spam_scanner_spec.rb +++ b/spec/lib/modules/ai_moderation/spam_scanner_spec.rb @@ -289,4 +289,30 @@ RSpec.describe DiscourseAi::AiModeration::SpamScanner do expect(post.user.reload.silenced?).to eq(false) end end + + it "includes location information and email in context" do + user.update!(ip_address: "1.2.3.4", registration_ip_address: "5.6.7.8") + + ip_info_registration = { location: "New York", organization: "ISP1" } + ip_info_last = { location: "London", organization: "ISP2" } + + DiscourseIpInfo + .stubs(:get) + .with("5.6.7.8", resolve_hostname: true) + .returns(ip_info_registration) + DiscourseIpInfo.stubs(:get).with("1.2.3.4", resolve_hostname: true).returns(ip_info_last) + + prompts = nil + DiscourseAi::Completions::Llm.with_prepared_responses( + ["spam", "just because"], + ) do |_, _, _prompts| + prompts = _prompts + described_class.test_post(post) + end + + context = prompts.first.messages[1][:content] + expect(context).to include("Registration Location: New York (ISP1)") + expect(context).to include("Last Location: London (ISP2)") + expect(context).to include(user.email) + end end