FEATURE: Add user location info to spam scanner context (#1076)

This adds registration and last known IP information and email to scanning context. This provides another hint for spam scanner about possible malicious users. For example registered in India, replying from Australia or email is clearly a throwaway email address.
2025-07-01 20:12:15 +00:00 · 2025-01-21 17:51:21 +11:00 · 2025-01-21 17:51:21 +11:00 · 2c609e165b
commit 2c609e165b
parent 7957796e56
2 changed files with 63 additions and 5 deletions
--- a/lib/ai_moderation/spam_scanner.rb
+++ b/lib/ai_moderation/spam_scanner.rb
@ -288,11 +288,16 @@ module DiscourseAi
        end
        context << "\nPost Author Information:"
-        if post.user # during test we may not have a user
+        if user = post.user # during test we may not have a user
-          context << "- Username: #{post.user.username}"
+          context << "- Username: #{user.username}\n"
-          context << "- Account age: #{(Time.current - post.user.created_at).to_i / 86_400} days"
+          context << "- Email: #{user.email}\n"
-          context << "- Total posts: #{post.user.post_count}"
+          context << "- Account age: #{(Time.current - user.created_at).to_i / 86_400} days\n"
-          context << "- Trust level: #{post.user.trust_level}"
+          context << "- Total posts: #{user.post_count}\n"
          context << "- Trust level: #{user.trust_level}\n"
          if info = location_info(user)
            context << "- Registration Location: #{info[:registration]}\n" if info[:registration]
            context << "- Last Location: #{info[:last]}\n" if info[:last]
          end
        end
        context << "\nPost Content (first #{MAX_RAW_SCAN_LENGTH} chars):\n"
@ -300,6 +305,33 @@ module DiscourseAi
        context.join("\n")
      end
      def self.location_info(user)
        registration, last = nil
        if user.ip_address.present?
          info = DiscourseIpInfo.get(user.ip_address, resolve_hostname: true)
          last = "#{info[:location]} (#{info[:organization]})" if info && info[:location].present?
        end
        if user.registration_ip_address.present?
          info = DiscourseIpInfo.get(user.registration_ip_address, resolve_hostname: true)
          registration = "#{info[:location]} (#{info[:organization]})" if info &&
            info[:location].present?
        end
        rval = nil
        if registration || last
          rval = { registration: registration } if registration
          if last && last != registration
            rval ||= {}
            rval[:last] = last
          end
        end
        rval
      rescue => e
        Discourse.warn_exception(e, message: "Failed to lookup location info")
        nil
      end
      def self.build_system_prompt(custom_instructions)
        base_prompt = +<<~PROMPT
          You are a spam detection system. Analyze the following post content and context.
--- a/spec/lib/modules/ai_moderation/spam_scanner_spec.rb
+++ b/spec/lib/modules/ai_moderation/spam_scanner_spec.rb
@ -289,4 +289,30 @@ RSpec.describe DiscourseAi::AiModeration::SpamScanner do
      expect(post.user.reload.silenced?).to eq(false)
    end
  end
  it "includes location information and email in context" do
    user.update!(ip_address: "1.2.3.4", registration_ip_address: "5.6.7.8")
    ip_info_registration = { location: "New York", organization: "ISP1" }
    ip_info_last = { location: "London", organization: "ISP2" }
    DiscourseIpInfo
      .stubs(:get)
      .with("5.6.7.8", resolve_hostname: true)
      .returns(ip_info_registration)
    DiscourseIpInfo.stubs(:get).with("1.2.3.4", resolve_hostname: true).returns(ip_info_last)
    prompts = nil
    DiscourseAi::Completions::Llm.with_prepared_responses(
      ["spam", "just because"],
    ) do |_, _, _prompts|
      prompts = _prompts
      described_class.test_post(post)
    end
    context = prompts.first.messages[1][:content]
    expect(context).to include("Registration Location: New York (ISP1)")
    expect(context).to include("Last Location: London (ISP2)")
    expect(context).to include(user.email)
  end
 end