From edba5c631fc4fe6d07ccbc2c697860e3439d1587 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9gis=20Hanol?= Date: Sat, 12 Jul 2014 01:59:43 +0200 Subject: [PATCH 1/2] FEATURE: reject accounts created with an email address similar to a known spammer email --- app/models/screened_email.rb | 34 ++++++++++++++++++++++++++++- config/locales/server.en.yml | 2 ++ config/site_settings.yml | 1 + spec/models/screened_email_spec.rb | 35 ++++++++++++++++++++---------- 4 files changed, 60 insertions(+), 12 deletions(-) diff --git a/app/models/screened_email.rb b/app/models/screened_email.rb index f10c80a8bed..a7bb72f3ebf 100644 --- a/app/models/screened_email.rb +++ b/app/models/screened_email.rb @@ -17,11 +17,43 @@ class ScreenedEmail < ActiveRecord::Base end def self.should_block?(email) - screened_email = ScreenedEmail.find_by(email: email) + screened_emails = ScreenedEmail.order(created_at: :desc).limit(100) + + distances = {} + screened_emails.each { |se| distances[se.email] = levenshtein(se.email, email) } + + max_distance = SiteSetting.levenshtein_distance_spammer_emails + screened_email = screened_emails.select { |se| distances[se.email] <= max_distance } + .sort { |se| distances[se.email] } + .first + screened_email.record_match! if screened_email + screened_email && screened_email.action_type == actions[:block] end + def self.levenshtein(first, second) + matrix = [(0..first.length).to_a] + (1..second.length).each do |j| + matrix << [j] + [0] * (first.length) + end + + (1..second.length).each do |i| + (1..first.length).each do |j| + if first[j-1] == second[i-1] + matrix[i][j] = matrix[i-1][j-1] + else + matrix[i][j] = [ + matrix[i-1][j], + matrix[i][j-1], + matrix[i-1][j-1], + ].min + 1 + end + end + end + return matrix.last.last + end + end # == Schema Information diff --git a/config/locales/server.en.yml b/config/locales/server.en.yml index 55ec23ac3b7..ab12202442c 100644 --- a/config/locales/server.en.yml +++ b/config/locales/server.en.yml @@ -890,6 +890,8 @@ en: white_listed_spam_host_domains: "A pipe-delimited list of domains excluded from spam host testing, new users will be able to create an unrestricted count of posts with links to this domain" staff_like_weight: "Extra weighting factor given to likes when performed by staff." + levenshtein_distance_spammer_emails: "Number of characters different from a known spammer email." + reply_by_email_enabled: "Enable replying to topics via email" reply_by_email_address: "Template for reply by email incoming email address, for example: %{reply_key}@reply.example.com or replies+%{reply_key}@example.com" diff --git a/config/site_settings.yml b/config/site_settings.yml index a7a17f98717..ca749c54cbf 100644 --- a/config/site_settings.yml +++ b/config/site_settings.yml @@ -527,6 +527,7 @@ spam: white_listed_spam_host_domains: default: '' type: list + levenshtein_distance_spammer_emails: 2 rate_limits: unique_posts_mins: diff --git a/spec/models/screened_email_spec.rb b/spec/models/screened_email_spec.rb index 3e242034bf9..e607a7a0cf8 100644 --- a/spec/models/screened_email_spec.rb +++ b/spec/models/screened_email_spec.rb @@ -3,33 +3,34 @@ require 'spec_helper' describe ScreenedEmail do let(:email) { 'block@spamfromhome.org' } + let(:similar_email) { 'bl0ck@spamfromhome.org' } describe "new record" do it "sets a default action_type" do - described_class.create(email: email).action_type.should == described_class.actions[:block] + ScreenedEmail.create(email: email).action_type.should == ScreenedEmail.actions[:block] end it "last_match_at is null" do # If we manually load the table with some emails, we can see whether those emails # have ever been blocked by looking at last_match_at. - described_class.create(email: email).last_match_at.should be_nil + ScreenedEmail.create(email: email).last_match_at.should be_nil end end describe '#block' do context 'email is not being blocked' do it 'creates a new record with default action of :block' do - record = described_class.block(email) + record = ScreenedEmail.block(email) record.should_not be_new_record record.email.should == email - record.action_type.should == described_class.actions[:block] + record.action_type.should == ScreenedEmail.actions[:block] end it 'lets action_type be overriden' do - record = described_class.block(email, action_type: described_class.actions[:do_nothing]) + record = ScreenedEmail.block(email, action_type: ScreenedEmail.actions[:do_nothing]) record.should_not be_new_record record.email.should == email - record.action_type.should == described_class.actions[:do_nothing] + record.action_type.should == ScreenedEmail.actions[:do_nothing] end end @@ -37,22 +38,34 @@ describe ScreenedEmail do let!(:existing) { Fabricate(:screened_email, email: email) } it "doesn't create a new record" do - expect { described_class.block(email) }.to_not change { described_class.count } + expect { ScreenedEmail.block(email) }.to_not change { ScreenedEmail.count } end it "returns the existing record" do - described_class.block(email).should == existing + ScreenedEmail.block(email).should == existing end end end describe '#should_block?' do - subject { described_class.should_block?(email) } + subject { ScreenedEmail.should_block?(email) } it "returns false if a record with the email doesn't exist" do subject.should be_false end + it "returns true when there is a record with the email" do + ScreenedEmail.should_block?(email).should be_false + ScreenedEmail.create(email: email).save + ScreenedEmail.should_block?(email).should be_true + end + + it "returns true when there is a record with a similar email" do + ScreenedEmail.should_block?(email).should be_false + ScreenedEmail.create(email: similar_email).save + ScreenedEmail.should_block?(email).should be_true + end + shared_examples "when a ScreenedEmail record matches" do it "updates statistics" do Timecop.freeze(Time.zone.now) do @@ -63,13 +76,13 @@ describe ScreenedEmail do end context "action_type is :block" do - let!(:screened_email) { Fabricate(:screened_email, email: email, action_type: described_class.actions[:block]) } + let!(:screened_email) { Fabricate(:screened_email, email: email, action_type: ScreenedEmail.actions[:block]) } it { should be_true } include_examples "when a ScreenedEmail record matches" end context "action_type is :do_nothing" do - let!(:screened_email) { Fabricate(:screened_email, email: email, action_type: described_class.actions[:do_nothing]) } + let!(:screened_email) { Fabricate(:screened_email, email: email, action_type: ScreenedEmail.actions[:do_nothing]) } it { should be_false } include_examples "when a ScreenedEmail record matches" end From d19a0bc06c684de8a4ad0d6e3a45950908a89904 Mon Sep 17 00:00:00 2001 From: Jeff Atwood Date: Fri, 11 Jul 2014 17:19:45 -0700 Subject: [PATCH 2/2] add safety for max levenshtein spammer distance --- config/site_settings.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/config/site_settings.yml b/config/site_settings.yml index ca749c54cbf..e3f7ee05185 100644 --- a/config/site_settings.yml +++ b/config/site_settings.yml @@ -527,7 +527,10 @@ spam: white_listed_spam_host_domains: default: '' type: list - levenshtein_distance_spammer_emails: 2 + levenshtein_distance_spammer_emails: + default: 2 + min: 0 + max: 3 rate_limits: unique_posts_mins: