auto replace rules in titles
This commit is contained in:
parent
33e3ad1603
commit
c5cf8be864
|
@ -40,12 +40,12 @@ class PostsController < ApplicationController
|
|||
post.image_sizes = params[:image_sizes] if params[:image_sizes].present?
|
||||
guardian.ensure_can_edit!(post)
|
||||
|
||||
# to stay consistent with the create api,
|
||||
# to stay consistent with the create api,
|
||||
# we should allow for title changes and category changes here
|
||||
# we should also move all of this to a post updater.
|
||||
if post.post_number == 1 && (params[:title] || params[:post][:category])
|
||||
if post.post_number == 1 && (params[:title] || params[:post][:category])
|
||||
post.topic.title = params[:title] if params[:title]
|
||||
Topic.transaction do
|
||||
Topic.transaction do
|
||||
post.topic.change_category(params[:post][:category])
|
||||
post.topic.save
|
||||
end
|
||||
|
|
|
@ -56,7 +56,9 @@ class TopicsController < ApplicationController
|
|||
topic.change_category(params[:category])
|
||||
end
|
||||
|
||||
render nothing: true
|
||||
# this is used to return the title to the client as it may have been
|
||||
# changed by "TextCleaner"
|
||||
render_serialized(topic, BasicTopicSerializer)
|
||||
end
|
||||
|
||||
def similar_to
|
||||
|
|
|
@ -52,16 +52,10 @@ class Post < ActiveRecord::Base
|
|||
end
|
||||
|
||||
def raw_quality
|
||||
sentinel = TextSentinel.new(raw, min_entropy: SiteSetting.body_min_entropy)
|
||||
if sentinel.valid?
|
||||
# It's possible the sentinel has cleaned up the title a bit
|
||||
self.raw = sentinel.text
|
||||
else
|
||||
errors.add(:raw, I18n.t(:is_invalid)) unless sentinel.valid?
|
||||
end
|
||||
sentinel = TextSentinel.body_sentinel(raw)
|
||||
errors.add(:raw, I18n.t(:is_invalid)) unless sentinel.valid?
|
||||
end
|
||||
|
||||
|
||||
# Stop us from posting the same thing too quickly
|
||||
def unique_post_validator
|
||||
return if SiteSetting.unique_posts_mins == 0
|
||||
|
|
|
@ -118,12 +118,7 @@ class PostAction < ActiveRecord::Base
|
|||
def message_quality
|
||||
return if message.blank?
|
||||
sentinel = TextSentinel.title_sentinel(message)
|
||||
if sentinel.valid?
|
||||
# It's possible the sentinel has cleaned up the title a bit
|
||||
self.message = sentinel.text
|
||||
else
|
||||
errors.add(:message, I18n.t(:is_invalid)) unless sentinel.valid?
|
||||
end
|
||||
errors.add(:message, I18n.t(:is_invalid)) unless sentinel.valid?
|
||||
end
|
||||
|
||||
before_create do
|
||||
|
|
|
@ -42,6 +42,9 @@ class SiteSetting < ActiveRecord::Base
|
|||
# cf. https://github.com/discourse/discourse/pull/462#issuecomment-14991562
|
||||
client_setting(:category_colors, 'BF1E2E|F1592A|F7941D|9EB83B|3AB54A|12A89D|25AAE2|0E76BD|652D90|92278F|ED207B|8C6238|231F20|808281|B3B5B4|283890')
|
||||
|
||||
# auto-replace rules for title
|
||||
setting(:title_prettify, true)
|
||||
|
||||
client_setting(:max_upload_size_kb, 1024)
|
||||
|
||||
# settings only available server side
|
||||
|
|
|
@ -3,6 +3,7 @@ require_dependency 'avatar_lookup'
|
|||
require_dependency 'topic_view'
|
||||
require_dependency 'rate_limiter'
|
||||
require_dependency 'text_sentinel'
|
||||
require_dependency 'text_cleaner'
|
||||
|
||||
class Topic < ActiveRecord::Base
|
||||
include ActionView::Helpers
|
||||
|
@ -143,8 +144,8 @@ class Topic < ActiveRecord::Base
|
|||
|
||||
sentinel = TextSentinel.title_sentinel(title)
|
||||
if sentinel.valid?
|
||||
# It's possible the sentinel has cleaned up the title a bit
|
||||
self.title = sentinel.text
|
||||
# clean up the title
|
||||
self.title = TextCleaner.clean_title(sentinel.text)
|
||||
else
|
||||
errors.add(:title, I18n.t(:is_invalid))
|
||||
end
|
||||
|
|
|
@ -495,6 +495,8 @@ en:
|
|||
max_upload_size_kb: "The maximum size of files we allow users to upload in kb - be sure to configure the limit in nginx (client_max_body_size) / apache or proxy as well."
|
||||
max_similar_results: "How many similar topics to show a user while they are composing a new topic"
|
||||
|
||||
title_prettify: "Prevent common title typos and errors, including all caps, lowercase first character, multiple ! and ?, extra . at end, etc."
|
||||
|
||||
notification_types:
|
||||
mentioned: "%{display_username} mentioned you in %{link}"
|
||||
liked: "%{display_username} liked your post in %{link}"
|
||||
|
|
|
@ -502,6 +502,8 @@ fr:
|
|||
max_upload_size_kb: "La taille maximum des fichiers que les utilisateurs peuvent envoyer en Kb - assurez-vous de configurer également cette limite dans nginx (client_max_body_size) / apache ou votre proxy."
|
||||
max_similar_results: "Nombre de discussions similaires à afficher lorsqu'un utilisateur est en train de créer une nouvelle discussion"
|
||||
|
||||
title_prettify: "Corrige les coquilles les plus communes dans les titres (intégralité du titre en majuscule, première lettre en minuscule, de multiples ! et ?, un . inutile à la fin, etc.)"
|
||||
|
||||
notification_types:
|
||||
mentioned: "%{display_username} vous a mentionné dans %{link}"
|
||||
liked: "%{display_username} a aimé votre message dans %{link}"
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
#
|
||||
# Clean up a text
|
||||
#
|
||||
class TextCleaner
|
||||
|
||||
def self.title_options
|
||||
# cf. http://meta.discourse.org/t/should-we-have-auto-replace-rules-in-titles/5687
|
||||
{
|
||||
deduplicate_exclamation_marks: SiteSetting.title_prettify,
|
||||
deduplicate_question_marks: SiteSetting.title_prettify,
|
||||
replace_all_upper_case: SiteSetting.title_prettify,
|
||||
capitalize_first_letter: SiteSetting.title_prettify,
|
||||
remove_unnecessary_period: SiteSetting.title_prettify,
|
||||
remove_extraneous_space: SiteSetting.title_prettify && SiteSetting.default_locale == "en",
|
||||
fixes_interior_spaces: true,
|
||||
strip_whitespaces: true
|
||||
}
|
||||
end
|
||||
|
||||
def self.clean_title(title)
|
||||
TextCleaner.clean(title, TextCleaner.title_options)
|
||||
end
|
||||
|
||||
def self.clean(text, opts = {})
|
||||
# Replace !!!!! with a single !
|
||||
text.gsub!(/!+/, '!') if opts[:deduplicate_exclamation_marks]
|
||||
# Replace ????? with a single ?
|
||||
text.gsub!(/\?+/, '?') if opts[:deduplicate_question_marks]
|
||||
# Replace all-caps text with regular case letters
|
||||
text.tr!('A-Z', 'a-z') if opts[:replace_all_upper_case] && (text =~ /[A-Z]+/) && (text == text.upcase)
|
||||
# Capitalize first letter
|
||||
text.sub!(/\A([a-z])/) { |first| first.capitalize } if opts[:capitalize_first_letter]
|
||||
# Remove unnecessary period at the end
|
||||
text.sub!(/([^.])\.(\s*)\z/, '\1\2') if opts[:remove_unnecessary_period]
|
||||
# Remove extraneous space before the end punctuation
|
||||
text.sub!(/\s+([!?]\s*)\z/, '\1') if opts[:remove_extraneous_space]
|
||||
# Fixes interior spaces
|
||||
text.gsub!(/ +/, ' ') if opts[:fixes_interior_spaces]
|
||||
# Strip whitespaces
|
||||
text.strip! if opts[:strip_whitespaces]
|
||||
|
||||
text
|
||||
end
|
||||
|
||||
end
|
|
@ -1,31 +1,27 @@
|
|||
#
|
||||
# Given a string, tell us whether or not is acceptable. Also, remove stuff we don't like
|
||||
# such as leading / trailing space.
|
||||
# Given a string, tell us whether or not is acceptable.
|
||||
#
|
||||
class TextSentinel
|
||||
|
||||
attr_accessor :text
|
||||
|
||||
def initialize(text, opts=nil)
|
||||
@opts = opts || {}
|
||||
@text = text.encode('UTF-8', invalid: :replace, undef: :replace, replace: '') if text.present?
|
||||
end
|
||||
|
||||
def self.non_symbols_regexp
|
||||
/[\ -\/\[-\`\:-\@\{-\~]/m
|
||||
end
|
||||
|
||||
def initialize(text, opts=nil)
|
||||
@opts = opts || {}
|
||||
|
||||
if text.present?
|
||||
@text = text.encode('UTF-8', invalid: :replace, undef: :replace, replace: '')
|
||||
@text.gsub!(/ +/m, ' ') if @opts[:remove_interior_spaces]
|
||||
@text.strip! if @opts[:strip]
|
||||
end
|
||||
def self.body_sentinel(text)
|
||||
TextSentinel.new(text, min_entropy: SiteSetting.body_min_entropy)
|
||||
end
|
||||
|
||||
def self.title_sentinel(text)
|
||||
TextSentinel.new(text,
|
||||
min_entropy: SiteSetting.title_min_entropy,
|
||||
max_word_length: SiteSetting.max_word_length,
|
||||
remove_interior_spaces: true,
|
||||
strip: true)
|
||||
max_word_length: SiteSetting.max_word_length)
|
||||
end
|
||||
|
||||
# Entropy is a number of how many unique characters the string needs.
|
||||
|
@ -35,7 +31,6 @@ class TextSentinel
|
|||
end
|
||||
|
||||
def valid?
|
||||
|
||||
# Blank strings are not valid
|
||||
return false if @text.blank? || @text.strip.blank?
|
||||
|
||||
|
@ -47,12 +42,12 @@ class TextSentinel
|
|||
return false if non_symbols == 0
|
||||
|
||||
# Don't allow super long strings without spaces
|
||||
|
||||
return false if @opts[:max_word_length] && @text =~ /\w{#{@opts[:max_word_length]},}(\s|$)/
|
||||
|
||||
# We don't allow all upper case content in english
|
||||
return false if (@text =~ /[A-Z]+/) && (@text == @text.upcase)
|
||||
|
||||
# It is valid
|
||||
true
|
||||
end
|
||||
|
||||
|
|
|
@ -0,0 +1,193 @@
|
|||
require 'spec_helper'
|
||||
require 'text_cleaner'
|
||||
|
||||
describe TextCleaner do
|
||||
|
||||
context "exclamation marks" do
|
||||
|
||||
let(:duplicated_string) { "my precious!!!!" }
|
||||
let(:deduplicated_string) { "my precious!" }
|
||||
|
||||
it "ignores multiple ! by default" do
|
||||
TextCleaner.clean(duplicated_string).should == duplicated_string
|
||||
end
|
||||
|
||||
it "deduplicates ! when enabled" do
|
||||
TextCleaner.clean(duplicated_string, deduplicate_exclamation_marks: true).should == deduplicated_string
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
context "question marks" do
|
||||
|
||||
let(:duplicated_string) { "please help me????" }
|
||||
let(:deduplicated_string) { "please help me?" }
|
||||
|
||||
it "ignores multiple ? by default" do
|
||||
TextCleaner.clean(duplicated_string).should == duplicated_string
|
||||
end
|
||||
|
||||
it "deduplicates ? when enabled" do
|
||||
TextCleaner.clean(duplicated_string, deduplicate_question_marks: true).should == deduplicated_string
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
context "all upper case text" do
|
||||
|
||||
let(:all_caps) { "ENTIRE TEXT IS ALL CAPS" }
|
||||
let(:almost_all_caps) { "ENTIRE TEXT iS ALL CAPS" }
|
||||
let(:regular_case) { "entire text is all caps" }
|
||||
|
||||
it "ignores all upper case text by default" do
|
||||
TextCleaner.clean(all_caps).should == all_caps
|
||||
end
|
||||
|
||||
it "replaces all upper case text with regular case letters when enabled" do
|
||||
TextCleaner.clean(all_caps, replace_all_upper_case: true).should == regular_case
|
||||
end
|
||||
|
||||
it "ignores almost all upper case text when enabled" do
|
||||
TextCleaner.clean(almost_all_caps, replace_all_upper_case: true).should == almost_all_caps
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
context "first letter" do
|
||||
|
||||
let(:lowercased) { "this is awesome" }
|
||||
let(:capitalized) { "This is awesome" }
|
||||
|
||||
it "ignores first letter case by default" do
|
||||
TextCleaner.clean(lowercased).should == lowercased
|
||||
TextCleaner.clean(capitalized).should == capitalized
|
||||
end
|
||||
|
||||
it "capitalizes first letter when enabled" do
|
||||
TextCleaner.clean(lowercased, capitalize_first_letter: true).should == capitalized
|
||||
TextCleaner.clean(capitalized, capitalize_first_letter: true).should == capitalized
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
context "period at the end" do
|
||||
|
||||
let(:with_period) { "oops." }
|
||||
let(:with_periods) { "oops..." }
|
||||
let(:without_period) { "oops" }
|
||||
|
||||
it "ignores unnecessary period at the end by default" do
|
||||
TextCleaner.clean(with_period).should == with_period
|
||||
end
|
||||
|
||||
it "removes unnecessary period at the end when enabled" do
|
||||
TextCleaner.clean(with_period, remove_unnecessary_period: true).should == without_period
|
||||
end
|
||||
|
||||
it "keeps ellipsis when enabled" do
|
||||
TextCleaner.clean(with_periods, remove_unnecessary_period: true).should == with_periods
|
||||
end
|
||||
|
||||
it "keeps trailing whitespaces when enabled" do
|
||||
TextCleaner.clean(with_periods + " ", remove_unnecessary_period: true).should == with_periods + " "
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
context "extraneous space" do
|
||||
|
||||
let(:with_space_exclamation) { "oops !" }
|
||||
let(:without_space_exclamation) { "oops!" }
|
||||
let(:with_space_question) { "oops ?" }
|
||||
let(:without_space_question) { "oops?" }
|
||||
|
||||
it "ignores extraneous space before the end punctuation by default" do
|
||||
TextCleaner.clean(with_space_exclamation).should == with_space_exclamation
|
||||
TextCleaner.clean(with_space_question).should == with_space_question
|
||||
end
|
||||
|
||||
it "removes extraneous space before the end punctuation when enabled" do
|
||||
TextCleaner.clean(with_space_exclamation, remove_extraneous_space: true).should == without_space_exclamation
|
||||
TextCleaner.clean(with_space_question, remove_extraneous_space: true).should == without_space_question
|
||||
end
|
||||
|
||||
it "keep trailing whitespaces when enabled" do
|
||||
TextCleaner.clean(with_space_exclamation + " ", remove_extraneous_space: true).should == without_space_exclamation + " "
|
||||
TextCleaner.clean(with_space_question + " ", remove_extraneous_space: true).should == without_space_question + " "
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
context "interior spaces" do
|
||||
|
||||
let(:spacey_string) { "hello there's weird spaces here." }
|
||||
let(:unspacey_string) { "hello there's weird spaces here." }
|
||||
|
||||
it "ignores interior spaces by default" do
|
||||
TextCleaner.clean(spacey_string).should == spacey_string
|
||||
end
|
||||
|
||||
it "fixes interior spaces when enabled" do
|
||||
TextCleaner.clean(spacey_string, fixes_interior_spaces: true).should == unspacey_string
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
context "leading and trailing whitespaces" do
|
||||
|
||||
let(:spacey_string) { " \t test \n " }
|
||||
let(:unspacey_string) { "test" }
|
||||
|
||||
it "ignores leading and trailing whitespaces by default" do
|
||||
TextCleaner.clean(spacey_string).should == spacey_string
|
||||
end
|
||||
|
||||
it "strips leading and trailing whitespaces when enabled" do
|
||||
TextCleaner.clean(spacey_string, strip_whitespaces: true).should == unspacey_string
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
context "title" do
|
||||
|
||||
it "fixes interior spaces" do
|
||||
TextCleaner.clean_title("Hello there").should == "Hello there"
|
||||
end
|
||||
|
||||
it "strips leading and trailing whitespaces" do
|
||||
TextCleaner.clean_title(" \t Hello there \n ").should == "Hello there"
|
||||
end
|
||||
|
||||
context "title_prettify site setting is enabled" do
|
||||
|
||||
before { SiteSetting.title_prettify = true }
|
||||
|
||||
it "deduplicates !" do
|
||||
TextCleaner.clean_title("Hello there!!!!").should == "Hello there!"
|
||||
end
|
||||
|
||||
it "deduplicates ?" do
|
||||
TextCleaner.clean_title("Hello there????").should == "Hello there?"
|
||||
end
|
||||
|
||||
it "replaces all upper case text with regular case letters" do
|
||||
TextCleaner.clean_title("HELLO THERE").should == "Hello there"
|
||||
end
|
||||
|
||||
it "capitalizes first letter" do
|
||||
TextCleaner.clean_title("hello there").should == "Hello there"
|
||||
end
|
||||
|
||||
it "removes unnecessary period at the end" do
|
||||
TextCleaner.clean_title("Hello there.").should == "Hello there"
|
||||
end
|
||||
|
||||
it "removes extraneous space before the end punctuation" do
|
||||
TextCleaner.clean_title("Hello there ?").should == "Hello there?"
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
end
|
|
@ -5,10 +5,12 @@ require 'text_sentinel'
|
|||
|
||||
describe TextSentinel do
|
||||
|
||||
it "allows utf-8 chars" do
|
||||
TextSentinel.new("йȝîûηыეமிᚉ⠛").text.should == "йȝîûηыეமிᚉ⠛"
|
||||
end
|
||||
|
||||
context "entropy" do
|
||||
|
||||
|
||||
it "returns 0 for an empty string" do
|
||||
TextSentinel.new("").entropy.should == 0
|
||||
end
|
||||
|
@ -35,50 +37,6 @@ describe TextSentinel do
|
|||
|
||||
end
|
||||
|
||||
context "cleaning up" do
|
||||
|
||||
it "allows utf-8 chars" do
|
||||
TextSentinel.new("йȝîûηыეமிᚉ⠛").text.should == "йȝîûηыეமிᚉ⠛"
|
||||
end
|
||||
|
||||
context "interior spaces" do
|
||||
|
||||
let(:spacey_string) { "hello there's weird spaces here." }
|
||||
let(:unspacey_string) { "hello there's weird spaces here." }
|
||||
|
||||
it "ignores intra spaces by default" do
|
||||
TextSentinel.new(spacey_string).text.should == spacey_string
|
||||
end
|
||||
|
||||
it "fixes intra spaces when enabled" do
|
||||
TextSentinel.new(spacey_string, remove_interior_spaces: true).text.should == unspacey_string
|
||||
end
|
||||
|
||||
it "fixes intra spaces in titles" do
|
||||
TextSentinel.title_sentinel(spacey_string).text.should == unspacey_string
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
context "stripping whitespace" do
|
||||
let(:spacey_string) { " \t test \t " }
|
||||
let(:unspacey_string) { "test" }
|
||||
|
||||
it "does not strip leading and trailing whitespace by default" do
|
||||
TextSentinel.new(spacey_string).text.should == spacey_string
|
||||
end
|
||||
|
||||
it "strips leading and trailing whitespace when enabled" do
|
||||
TextSentinel.new(spacey_string, strip: true).text.should == unspacey_string
|
||||
end
|
||||
|
||||
it "strips leading and trailing whitespace in titles" do
|
||||
TextSentinel.title_sentinel(spacey_string).text.should == unspacey_string
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
context "validity" do
|
||||
|
||||
let(:valid_string) { "This is a cool topic about Discourse" }
|
||||
|
@ -113,8 +71,6 @@ describe TextSentinel do
|
|||
TextSentinel.new("{{$!").should_not be_valid
|
||||
end
|
||||
|
||||
|
||||
end
|
||||
|
||||
|
||||
end
|
||||
|
|
|
@ -407,12 +407,13 @@ describe TopicsController do
|
|||
it 'succeeds' do
|
||||
xhr :put, :update, topic_id: @topic.id, slug: @topic.title
|
||||
response.should be_success
|
||||
::JSON.parse(response.body)['basic_topic'].should be_present
|
||||
end
|
||||
|
||||
it 'allows a change of title' do
|
||||
xhr :put, :update, topic_id: @topic.id, slug: @topic.title, title: 'this is a new title for the topic'
|
||||
xhr :put, :update, topic_id: @topic.id, slug: @topic.title, title: 'This is a new title for the topic'
|
||||
@topic.reload
|
||||
@topic.title.should == 'this is a new title for the topic'
|
||||
@topic.title.should == 'This is a new title for the topic'
|
||||
end
|
||||
|
||||
it 'triggers a change of category' do
|
||||
|
|
|
@ -111,20 +111,20 @@ describe Topic do
|
|||
end
|
||||
|
||||
context 'html in title' do
|
||||
let(:topic_bold) { Fabricate(:topic, title: "topic with <b>bold</b> text in its title" ) }
|
||||
let(:topic_image) { Fabricate(:topic, title: "topic with <img src='something'> image in its title" ) }
|
||||
let(:topic_script) { Fabricate(:topic, title: "<script>alert('title')</script> is my topic title" ) }
|
||||
let(:topic_bold) { Fabricate(:topic, title: "Topic with <b>bold</b> text in its title" ) }
|
||||
let(:topic_image) { Fabricate(:topic, title: "Topic with <img src='something'> image in its title" ) }
|
||||
let(:topic_script) { Fabricate(:topic, title: "Topic with <script>alert('title')</script> script in its title" ) }
|
||||
|
||||
it "escapes script contents" do
|
||||
topic_script.title.should == "is my topic title"
|
||||
topic_script.title.should == "Topic with script in its title"
|
||||
end
|
||||
|
||||
it "escapes bold contents" do
|
||||
topic_bold.title.should == "topic with bold text in its title"
|
||||
topic_bold.title.should == "Topic with bold text in its title"
|
||||
end
|
||||
|
||||
it "escapes bold contents" do
|
||||
topic_image.title.should == "topic with image in its title"
|
||||
it "escapes image contents" do
|
||||
topic_image.title.should == "Topic with image in its title"
|
||||
end
|
||||
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue