Merge pull request #172 from jeremybanks/master
Do not strip leading and trailing whitespace from raw posts
This commit is contained in:
commit
ae9bee2a91
|
@ -29,7 +29,7 @@ class Post < ActiveRecord::Base
|
||||||
has_many :post_actions
|
has_many :post_actions
|
||||||
|
|
||||||
validates_presence_of :raw, :user_id, :topic_id
|
validates_presence_of :raw, :user_id, :topic_id
|
||||||
validates :raw, length: {in: SiteSetting.min_post_length..SiteSetting.max_post_length}
|
validates :raw, stripped_length: {in: SiteSetting.min_post_length..SiteSetting.max_post_length}
|
||||||
validate :raw_quality
|
validate :raw_quality
|
||||||
validate :max_mention_validator
|
validate :max_mention_validator
|
||||||
validate :max_images_validator
|
validate :max_images_validator
|
||||||
|
@ -57,10 +57,6 @@ class Post < ActiveRecord::Base
|
||||||
TopicUser.auto_track(self.user_id, self.topic_id, TopicUser::NotificationReasons::CREATED_POST)
|
TopicUser.auto_track(self.user_id, self.topic_id, TopicUser::NotificationReasons::CREATED_POST)
|
||||||
end
|
end
|
||||||
|
|
||||||
before_validation do
|
|
||||||
self.raw.strip! if self.raw.present?
|
|
||||||
end
|
|
||||||
|
|
||||||
def raw_quality
|
def raw_quality
|
||||||
|
|
||||||
sentinel = TextSentinel.new(self.raw, min_entropy: SiteSetting.body_min_entropy)
|
sentinel = TextSentinel.new(self.raw, min_entropy: SiteSetting.body_min_entropy)
|
||||||
|
@ -212,7 +208,7 @@ class Post < ActiveRecord::Base
|
||||||
# We only filter quotes when there is exactly 1
|
# We only filter quotes when there is exactly 1
|
||||||
return cooked unless (quote_count == 1)
|
return cooked unless (quote_count == 1)
|
||||||
|
|
||||||
parent_raw = parent_post.raw.sub(/\[quote.+\/quote\]/m, '').strip
|
parent_raw = parent_post.raw.sub(/\[quote.+\/quote\]/m, '')
|
||||||
|
|
||||||
if raw[parent_raw] or (parent_raw.size < SHORT_POST_CHARS)
|
if raw[parent_raw] or (parent_raw.size < SHORT_POST_CHARS)
|
||||||
return cooked.sub(/\<aside.+\<\/aside\>/m, '')
|
return cooked.sub(/\<aside.+\<\/aside\>/m, '')
|
||||||
|
|
|
@ -0,0 +1,14 @@
|
||||||
|
class StrippedLengthValidator < ActiveModel::EachValidator
|
||||||
|
def validate_each(record, attribute, value)
|
||||||
|
unless value.nil?
|
||||||
|
stripped_length = value.strip.length
|
||||||
|
range = options[:in]
|
||||||
|
record.errors.add attribute, (options[:message] || I18n.t('errors.messages.too_short', count: range.begin)) unless
|
||||||
|
stripped_length >= range.begin
|
||||||
|
record.errors.add attribute, (options[:message] || I18n.t('errors.messages.too_long', count: range.end)) unless
|
||||||
|
stripped_length <= range.end
|
||||||
|
else
|
||||||
|
record.errors.add attribute, (options[:message] || I18n.t('errors.messages.blank'))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
|
@ -15,8 +15,8 @@ class TextSentinel
|
||||||
|
|
||||||
if text.present?
|
if text.present?
|
||||||
@text = text.encode('UTF-8', invalid: :replace, undef: :replace, replace: '')
|
@text = text.encode('UTF-8', invalid: :replace, undef: :replace, replace: '')
|
||||||
@text.strip!
|
|
||||||
@text.gsub!(/ +/m, ' ') if @opts[:remove_interior_spaces]
|
@text.gsub!(/ +/m, ' ') if @opts[:remove_interior_spaces]
|
||||||
|
@text.strip! if @opts[:strip]
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -24,19 +24,20 @@ class TextSentinel
|
||||||
TextSentinel.new(text,
|
TextSentinel.new(text,
|
||||||
min_entropy: SiteSetting.title_min_entropy,
|
min_entropy: SiteSetting.title_min_entropy,
|
||||||
max_word_length: SiteSetting.max_word_length,
|
max_word_length: SiteSetting.max_word_length,
|
||||||
remove_interior_spaces: true)
|
remove_interior_spaces: true,
|
||||||
|
strip: true)
|
||||||
end
|
end
|
||||||
|
|
||||||
# Entropy is a number of how many unique characters the string needs.
|
# Entropy is a number of how many unique characters the string needs.
|
||||||
def entropy
|
def entropy
|
||||||
return 0 if @text.blank?
|
return 0 if @text.blank?
|
||||||
@entropy ||= @text.each_char.to_a.uniq.size
|
@entropy ||= @text.strip.each_char.to_a.uniq.size
|
||||||
end
|
end
|
||||||
|
|
||||||
def valid?
|
def valid?
|
||||||
|
|
||||||
# Blank strings are not valid
|
# Blank strings are not valid
|
||||||
return false if @text.blank?
|
return false if @text.blank? || @text.strip.blank?
|
||||||
|
|
||||||
# Entropy check if required
|
# Entropy check if required
|
||||||
return false if @opts[:min_entropy].present? and (entropy < @opts[:min_entropy])
|
return false if @opts[:min_entropy].present? and (entropy < @opts[:min_entropy])
|
||||||
|
|
|
@ -37,10 +37,6 @@ describe TextSentinel do
|
||||||
|
|
||||||
context "cleaning up" do
|
context "cleaning up" do
|
||||||
|
|
||||||
it "strips leading or trailing whitespace" do
|
|
||||||
TextSentinel.new(" \t test \t ").text.should == "test"
|
|
||||||
end
|
|
||||||
|
|
||||||
it "allows utf-8 chars" do
|
it "allows utf-8 chars" do
|
||||||
TextSentinel.new("йȝîûηыეமிᚉ⠛").text.should == "йȝîûηыეமிᚉ⠛"
|
TextSentinel.new("йȝîûηыეமிᚉ⠛").text.should == "йȝîûηыეமிᚉ⠛"
|
||||||
end
|
end
|
||||||
|
@ -48,15 +44,37 @@ describe TextSentinel do
|
||||||
context "interior spaces" do
|
context "interior spaces" do
|
||||||
|
|
||||||
let(:spacey_string) { "hello there's weird spaces here." }
|
let(:spacey_string) { "hello there's weird spaces here." }
|
||||||
|
let(:unspacey_string) { "hello there's weird spaces here." }
|
||||||
|
|
||||||
it "ignores intra spaces by default" do
|
it "ignores intra spaces by default" do
|
||||||
TextSentinel.new(spacey_string).text.should == spacey_string
|
TextSentinel.new(spacey_string).text.should == spacey_string
|
||||||
end
|
end
|
||||||
|
|
||||||
it "fixes intra spaces when enabled" do
|
it "fixes intra spaces when enabled" do
|
||||||
TextSentinel.new(spacey_string, remove_interior_spaces: true).text.should == "hello there's weird spaces here."
|
TextSentinel.new(spacey_string, remove_interior_spaces: true).text.should == unspacey_string
|
||||||
end
|
end
|
||||||
|
|
||||||
|
it "fixes intra spaces in titles" do
|
||||||
|
TextSentinel.title_sentinel(spacey_string).text.should == unspacey_string
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
context "stripping whitespace" do
|
||||||
|
let(:spacey_string) { " \t test \t " }
|
||||||
|
let(:unspacey_string) { "test" }
|
||||||
|
|
||||||
|
it "does not strip leading and trailing whitespace by default" do
|
||||||
|
TextSentinel.new(spacey_string).text.should == spacey_string
|
||||||
|
end
|
||||||
|
|
||||||
|
it "strips leading and trailing whitespace when enabled" do
|
||||||
|
TextSentinel.new(spacey_string, strip: true).text.should == unspacey_string
|
||||||
|
end
|
||||||
|
|
||||||
|
it "strips leading and trailing whitespace in titles" do
|
||||||
|
TextSentinel.title_sentinel(spacey_string).text.should == unspacey_string
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue