Introduction of TextSentinel to enforce title and body quality.
This commit is contained in:
parent
94e58d733e
commit
40da901e5d
|
@ -38,6 +38,7 @@ class Post < ActiveRecord::Base
|
|||
|
||||
validates_presence_of :raw, :user_id, :topic_id
|
||||
validates :raw, length: {in: SiteSetting.min_post_length..SiteSetting.max_post_length}
|
||||
validate :raw_quality
|
||||
validate :max_mention_validator
|
||||
validate :max_images_validator
|
||||
validate :max_links_validator
|
||||
|
@ -68,6 +69,18 @@ class Post < ActiveRecord::Base
|
|||
self.raw.strip! if self.raw.present?
|
||||
end
|
||||
|
||||
def raw_quality
|
||||
|
||||
sentinel = TextSentinel.new(self.raw, min_entropy: SiteSetting.body_min_entropy)
|
||||
if sentinel.valid?
|
||||
# It's possible the sentinel has cleaned up the title a bit
|
||||
self.raw = sentinel.text
|
||||
else
|
||||
errors.add(:raw, I18n.t(:is_invalid)) unless sentinel.valid?
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
# Stop us from posting the same thing too quickly
|
||||
def unique_post_validator
|
||||
return if SiteSetting.unique_posts_mins == 0
|
||||
|
|
|
@ -126,6 +126,11 @@ class SiteSetting < ActiveRecord::Base
|
|||
setting(:basic_requires_read_posts, 100)
|
||||
setting(:basic_requires_time_spent_mins, 30)
|
||||
|
||||
# Entropy checks
|
||||
setting(:title_min_entropy, 10)
|
||||
setting(:body_min_entropy, 7)
|
||||
setting(:max_word_length, 30)
|
||||
|
||||
|
||||
def self.call_mothership?
|
||||
self.enforce_global_nicknames? and self.discourse_org_access_key.present?
|
||||
|
|
|
@ -2,6 +2,7 @@ require_dependency 'slug'
|
|||
require_dependency 'avatar_lookup'
|
||||
require_dependency 'topic_view'
|
||||
require_dependency 'rate_limiter'
|
||||
require_dependency 'text_sentinel'
|
||||
|
||||
class Topic < ActiveRecord::Base
|
||||
include RateLimiter::OnCreateRecord
|
||||
|
@ -18,13 +19,14 @@ class Topic < ActiveRecord::Base
|
|||
rate_limit :limit_topics_per_day
|
||||
rate_limit :limit_private_messages_per_day
|
||||
|
||||
validate :title_quality
|
||||
validates_presence_of :title
|
||||
validates :title, length: {in: SiteSetting.min_topic_title_length..SiteSetting.max_topic_title_length}
|
||||
|
||||
serialize :meta_data, ActiveRecord::Coders::Hstore
|
||||
|
||||
validate :unique_title
|
||||
validate :nuclear_option
|
||||
|
||||
|
||||
belongs_to :category
|
||||
has_many :posts
|
||||
|
@ -113,18 +115,20 @@ class Topic < ActiveRecord::Base
|
|||
errors.add(:title, I18n.t(:has_already_been_used)) if finder.exists?
|
||||
end
|
||||
|
||||
# This is bad, but people are screwing us on try.discourse.org - soon we'll replace with
|
||||
# a much more sane validation of odd characters to allow for other languages and such.
|
||||
def nuclear_option
|
||||
|
||||
# Let presence validation catch it if it's blank
|
||||
return if title.blank?
|
||||
|
||||
title.each_char do |c|
|
||||
unless (20..126).include?(c.ord)
|
||||
errors.add(:title, I18n.t(:invalid_characters))
|
||||
return
|
||||
end
|
||||
def title_quality
|
||||
# We don't care about quality on private messages
|
||||
return if private_message?
|
||||
|
||||
sentinel = TextSentinel.new(title,
|
||||
min_entropy: SiteSetting.title_min_entropy,
|
||||
max_word_length: SiteSetting.max_word_length,
|
||||
remove_interior_spaces: true)
|
||||
if sentinel.valid?
|
||||
# It's possible the sentinel has cleaned up the title a bit
|
||||
self.title = sentinel.text
|
||||
else
|
||||
errors.add(:title, I18n.t(:is_invalid)) unless sentinel.valid?
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
@ -9,6 +9,7 @@ en:
|
|||
just_posted_that: "is too similar to what you recently posted"
|
||||
has_already_been_used: "has already been used"
|
||||
invalid_characters: "contains invalid characters"
|
||||
is_invalid: "is invalid; try to be a little more descriptive"
|
||||
|
||||
activerecord:
|
||||
attributes:
|
||||
|
@ -302,6 +303,10 @@ en:
|
|||
email_time_window_mins: "How many minutes we wait before sending a user mail, to give them a chance to see it first."
|
||||
flush_timings_secs: "How frequently we flush timing data to the server, in seconds."
|
||||
|
||||
max_word_length: "The maximum word length in a topic title"
|
||||
title_min_entropy: "The minimum entropy for a topic title"
|
||||
body_min_entropy: "The minimum entropy for post body"
|
||||
|
||||
# This section is exported to the javascript for i18n in the admin section
|
||||
admin_js:
|
||||
type_to_filter: "Type to Filter..."
|
||||
|
@ -689,7 +694,7 @@ en:
|
|||
no_posted: "You haven't posted in any topics yet."
|
||||
no_popular: "There are no popular topics. That's sad."
|
||||
|
||||
topic:
|
||||
topic:
|
||||
create_in: 'Create {{categoryName}} Topic'
|
||||
create: 'Create Topic'
|
||||
create_long: 'Create a new Topic'
|
||||
|
|
|
@ -0,0 +1,56 @@
|
|||
require 'iconv'
|
||||
|
||||
#
|
||||
# Given a string, tell us whether or not is acceptable. Also, remove stuff we don't like
|
||||
# such as leading / trailing space.
|
||||
#
|
||||
class TextSentinel
|
||||
|
||||
attr_accessor :text
|
||||
|
||||
def self.non_symbols_regexp
|
||||
/[\ -\/\[-\`\:-\@\{-\~]/m
|
||||
end
|
||||
|
||||
def initialize(text, opts=nil)
|
||||
if text.present?
|
||||
@text = Iconv.new('UTF-8//IGNORE', 'UTF-8').iconv(text.dup)
|
||||
end
|
||||
|
||||
@opts = opts || {}
|
||||
|
||||
if @text.present?
|
||||
@text.strip!
|
||||
@text.gsub!(/ +/m, ' ') if @opts[:remove_interior_spaces]
|
||||
end
|
||||
end
|
||||
|
||||
# Entropy is a number of how many unique characters the string needs.
|
||||
def entropy
|
||||
return 0 if @text.blank?
|
||||
@entropy ||= @text.each_char.to_a.uniq.size
|
||||
end
|
||||
|
||||
def valid?
|
||||
|
||||
# Blank strings are not valid
|
||||
return false if @text.blank?
|
||||
|
||||
# Entropy check if required
|
||||
return false if @opts[:min_entropy].present? and (entropy < @opts[:min_entropy])
|
||||
|
||||
# We don't have a comprehensive list of symbols, but this will eliminate some noise
|
||||
non_symbols = @text.gsub(TextSentinel.non_symbols_regexp, '').size
|
||||
return false if non_symbols == 0
|
||||
|
||||
# Don't allow super long strings without spaces
|
||||
|
||||
return false if @opts[:max_word_length] and @text =~ /\w{#{@opts[:max_word_length]},}(\s|$)/
|
||||
|
||||
# We don't allow all upper case content
|
||||
return false if @text == @text.upcase
|
||||
|
||||
true
|
||||
end
|
||||
|
||||
end
|
|
@ -11,7 +11,7 @@ describe PostCreator do
|
|||
|
||||
context 'new topic' do
|
||||
let(:category) { Fabricate(:category, user: user) }
|
||||
let(:basic_topic_params) { {title: 'hello world', raw: 'my name is fred', archetype_id: 1} }
|
||||
let(:basic_topic_params) { {title: 'hello world topic', raw: 'my name is fred', archetype_id: 1} }
|
||||
let(:image_sizes) { {'http://an.image.host/image.jpg' => {'width' => 111, 'height' => 222}} }
|
||||
|
||||
let(:creator) { PostCreator.new(user, basic_topic_params) }
|
||||
|
@ -83,7 +83,7 @@ describe PostCreator do
|
|||
let(:target_user1) { Fabricate(:coding_horror) }
|
||||
let(:target_user2) { Fabricate(:moderator) }
|
||||
let(:post) do
|
||||
PostCreator.create(user, title: 'hi there',
|
||||
PostCreator.create(user, title: 'hi there welcome to my topic',
|
||||
raw: 'this is my awesome message',
|
||||
archetype: Archetype.private_message,
|
||||
target_usernames: [target_user1.username, target_user2.username].join(','))
|
||||
|
|
|
@ -14,7 +14,7 @@ describe Search do
|
|||
context 'post indexing observer' do
|
||||
before do
|
||||
@category = Fabricate(:category, name: 'america')
|
||||
@topic = Fabricate(:topic, title: 'sam test', category: @category)
|
||||
@topic = Fabricate(:topic, title: 'sam test topic', category: @category)
|
||||
@post = Fabricate(:post, topic: @topic, raw: 'this <b>fun test</b> <img src="bla" title="my image">')
|
||||
@indexed = Topic.exec_sql("select search_data from posts_search where id = #{@post.id}").first["search_data"]
|
||||
end
|
||||
|
@ -29,7 +29,7 @@ describe Search do
|
|||
end
|
||||
|
||||
it "should pick up on title updates" do
|
||||
@topic.title = "harpi"
|
||||
@topic.title = "harpi is the new title"
|
||||
@topic.save!
|
||||
@indexed = Topic.exec_sql("select search_data from posts_search where id = #{@post.id}").first["search_data"]
|
||||
|
||||
|
|
|
@ -0,0 +1,96 @@
|
|||
# encoding: utf-8
|
||||
|
||||
require 'spec_helper'
|
||||
require 'text_sentinel'
|
||||
require 'iconv'
|
||||
|
||||
describe TextSentinel do
|
||||
|
||||
|
||||
context "entropy" do
|
||||
|
||||
|
||||
it "returns 0 for an empty string" do
|
||||
TextSentinel.new("").entropy.should == 0
|
||||
end
|
||||
|
||||
it "returns 0 for a nil string" do
|
||||
TextSentinel.new(nil).entropy.should == 0
|
||||
end
|
||||
|
||||
it "returns 1 for a string with many leading spaces" do
|
||||
TextSentinel.new((" " * 10) + "x").entropy.should == 1
|
||||
end
|
||||
|
||||
it "returns 1 for one char, even repeated" do
|
||||
TextSentinel.new("a" * 10).entropy.should == 1
|
||||
end
|
||||
|
||||
it "returns an accurate count of many chars" do
|
||||
TextSentinel.new("evil trout is evil").entropy.should == 10
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
context "cleaning up" do
|
||||
|
||||
it "strips leading or trailing whitespace" do
|
||||
TextSentinel.new(" \t test \t ").text.should == "test"
|
||||
end
|
||||
|
||||
it "allows utf-8 chars" do
|
||||
TextSentinel.new("йȝîûηыეமிᚉ⠛").text.should == "йȝîûηыეமிᚉ⠛"
|
||||
end
|
||||
|
||||
context "interior spaces" do
|
||||
|
||||
let(:spacey_string) { "hello there's weird spaces here." }
|
||||
|
||||
it "ignores intra spaces by default" do
|
||||
TextSentinel.new(spacey_string).text.should == spacey_string
|
||||
end
|
||||
|
||||
it "fixes intra spaces when enabled" do
|
||||
TextSentinel.new(spacey_string, remove_interior_spaces: true).text.should == "hello there's weird spaces here."
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
context "validity" do
|
||||
|
||||
let(:valid_string) { "This is a cool topic about Discourse" }
|
||||
|
||||
it "allows a valid string" do
|
||||
TextSentinel.new(valid_string).should be_valid
|
||||
end
|
||||
|
||||
it "doesn't allow all caps topics" do
|
||||
TextSentinel.new(valid_string.upcase).should_not be_valid
|
||||
end
|
||||
|
||||
it "enforces the minimum entropy" do
|
||||
TextSentinel.new(valid_string, min_entropy: 16).should be_valid
|
||||
end
|
||||
|
||||
it "enforces the minimum entropy" do
|
||||
TextSentinel.new(valid_string, min_entropy: 17).should_not be_valid
|
||||
end
|
||||
|
||||
it "doesn't allow a long alphanumeric string with no spaces" do
|
||||
TextSentinel.new("jfewjfoejwfojeojfoejofjeo38493824jfkjewfjeoifijeoijfoejofjeojfoewjfo834988394032jfiejoijofijeojfeojfojeofjewojfojeofjeowjfojeofjeojfoe3898439849032jfeijfwoijfoiewj",
|
||||
max_word_length: 30).should_not be_valid
|
||||
end
|
||||
|
||||
it "doesn't except junk symbols as a string" do
|
||||
TextSentinel.new("[[[").should_not be_valid
|
||||
TextSentinel.new("<<<").should_not be_valid
|
||||
TextSentinel.new("{{$!").should_not be_valid
|
||||
end
|
||||
|
||||
|
||||
end
|
||||
|
||||
|
||||
end
|
|
@ -11,11 +11,11 @@ describe TopicQuery do
|
|||
let(:admin) { Fabricate(:moderator) }
|
||||
|
||||
context 'a bunch of topics' do
|
||||
let!(:regular_topic) { Fabricate(:topic, title: 'regular', user: creator, bumped_at: 15.minutes.ago) }
|
||||
let!(:pinned_topic) { Fabricate(:topic, title: 'pinned', user: creator, pinned: true, bumped_at: 10.minutes.ago) }
|
||||
let!(:archived_topic) { Fabricate(:topic, title: 'archived', user: creator, archived: true, bumped_at: 6.minutes.ago) }
|
||||
let!(:invisible_topic) { Fabricate(:topic, title: 'invisible', user: creator, visible: false, bumped_at: 5.minutes.ago) }
|
||||
let!(:closed_topic) { Fabricate(:topic, title: 'closed', user: creator, closed: true, bumped_at: 1.minute.ago) }
|
||||
let!(:regular_topic) { Fabricate(:topic, title: 'this is a regular topic', user: creator, bumped_at: 15.minutes.ago) }
|
||||
let!(:pinned_topic) { Fabricate(:topic, title: 'this is a pinned topic', user: creator, pinned: true, bumped_at: 10.minutes.ago) }
|
||||
let!(:archived_topic) { Fabricate(:topic, title: 'this is an archived topic', user: creator, archived: true, bumped_at: 6.minutes.ago) }
|
||||
let!(:invisible_topic) { Fabricate(:topic, title: 'this is an invisible topic', user: creator, visible: false, bumped_at: 5.minutes.ago) }
|
||||
let!(:closed_topic) { Fabricate(:topic, title: 'this is a closed topic', user: creator, closed: true, bumped_at: 1.minute.ago) }
|
||||
|
||||
context 'list_popular' do
|
||||
let(:topics) { topic_query.list_popular.topics }
|
||||
|
|
|
@ -334,9 +334,9 @@ describe TopicsController do
|
|||
end
|
||||
|
||||
it 'allows a change of title' do
|
||||
xhr :put, :update, topic_id: @topic.id, slug: @topic.title, title: 'new title'
|
||||
xhr :put, :update, topic_id: @topic.id, slug: @topic.title, title: 'this is a new title for the topic'
|
||||
@topic.reload
|
||||
@topic.title.should == 'new title'
|
||||
@topic.title.should == 'this is a new title for the topic'
|
||||
end
|
||||
|
||||
it 'triggers a change of category' do
|
||||
|
|
|
@ -30,7 +30,7 @@ describe PostAlertObserver do
|
|||
context 'when editing a post' do
|
||||
it 'notifies a user of the revision' do
|
||||
lambda {
|
||||
post.revise(evil_trout, "world")
|
||||
post.revise(evil_trout, "world is the new body of the message")
|
||||
}.should change(post.user.notifications, :count).by(1)
|
||||
end
|
||||
end
|
||||
|
|
|
@ -5,9 +5,6 @@ require 'spec_helper'
|
|||
describe Topic do
|
||||
|
||||
it { should validate_presence_of :title }
|
||||
it { should_not allow_value("x" * (SiteSetting.max_topic_title_length + 1)).for(:title) }
|
||||
it { should_not allow_value("x").for(:title) }
|
||||
it { should_not allow_value((" " * SiteSetting.min_topic_title_length) + "x").for(:title) }
|
||||
|
||||
it { should belong_to :category }
|
||||
it { should belong_to :user }
|
||||
|
@ -26,14 +23,26 @@ describe Topic do
|
|||
|
||||
it { should rate_limit }
|
||||
|
||||
context 'topic title content' do
|
||||
context '.title_quality' do
|
||||
|
||||
it "strips a title when identifying length" do
|
||||
Fabricate.build(:topic, title: (" " * SiteSetting.min_topic_title_length) + "x").should_not be_valid
|
||||
end
|
||||
|
||||
it "doesn't allow a long title" do
|
||||
Fabricate.build(:topic, title: "x" * (SiteSetting.max_topic_title_length + 1)).should_not be_valid
|
||||
end
|
||||
|
||||
it "doesn't allow a short title" do
|
||||
Fabricate.build(:topic, title: "x" * (SiteSetting.min_topic_title_length + 1)).should_not be_valid
|
||||
end
|
||||
|
||||
it "allows a regular title with a few ascii characters" do
|
||||
Fabricate.build(:topic, title: "hello this is my cool topic! welcome: all;").should be_valid
|
||||
end
|
||||
|
||||
it "doesn't allow non standard ascii" do
|
||||
Fabricate.build(:topic, title: "Iñtërnâtiônàlizætiøn").should_not be_valid
|
||||
it "allows non ascii" do
|
||||
Fabricate.build(:topic, title: "Iñtërnâtiônàlizætiøn").should be_valid
|
||||
end
|
||||
|
||||
end
|
||||
|
@ -830,7 +839,7 @@ describe Topic do
|
|||
|
||||
context 'changing title' do
|
||||
before do
|
||||
topic.title = "new title"
|
||||
topic.title = "new title for the topic"
|
||||
topic.save
|
||||
end
|
||||
|
||||
|
|
Loading…
Reference in New Issue