Merge pull request #984 from Supermathie/moreentropy
More entropy for foreign titles
This commit is contained in:
commit
b2a6755a48
|
@ -590,8 +590,8 @@ en:
|
||||||
email_time_window_mins: "How many minutes we wait before sending a user mail, to give them a chance to see it first"
|
email_time_window_mins: "How many minutes we wait before sending a user mail, to give them a chance to see it first"
|
||||||
flush_timings_secs: "How frequently we flush timing data to the server, in seconds"
|
flush_timings_secs: "How frequently we flush timing data to the server, in seconds"
|
||||||
max_word_length: "The maximum allowed word length, in characters, in a topic title"
|
max_word_length: "The maximum allowed word length, in characters, in a topic title"
|
||||||
title_min_entropy: "The minimum allowed entropy (unique characters) required for a topic title"
|
title_min_entropy: "The minimum allowed entropy (unique characters, non-english count for more) required for a topic title"
|
||||||
body_min_entropy: "The minimum allowed entropy (unique characters) required for a post body"
|
body_min_entropy: "The minimum allowed entropy (unique characters, non-english count for more) required for a post body"
|
||||||
|
|
||||||
title_fancy_entities: "Convert common ASCII characters to fancy HTML entities in topic titles, ala SmartyPants http://daringfireball.net/projects/smartypants/"
|
title_fancy_entities: "Convert common ASCII characters to fancy HTML entities in topic titles, ala SmartyPants http://daringfireball.net/projects/smartypants/"
|
||||||
|
|
||||||
|
|
|
@ -21,8 +21,10 @@ class TextSentinel
|
||||||
end
|
end
|
||||||
|
|
||||||
# Entropy is a number of how many unique characters the string needs.
|
# Entropy is a number of how many unique characters the string needs.
|
||||||
|
# Non-ASCII characters are weighted heavier since they contain more "information"
|
||||||
def entropy
|
def entropy
|
||||||
@entropy ||= @text.to_s.strip.split('').uniq.size
|
chars = @text.to_s.strip.split('')
|
||||||
|
@entropy ||= chars.pack('M*'*chars.size).gsub("\n",'').split('=').uniq.size
|
||||||
end
|
end
|
||||||
|
|
||||||
def valid?
|
def valid?
|
||||||
|
|
|
@ -32,7 +32,15 @@ describe TextSentinel do
|
||||||
end
|
end
|
||||||
|
|
||||||
it "Works on foreign characters" do
|
it "Works on foreign characters" do
|
||||||
TextSentinel.new("去年十社會警告").entropy.should == 7
|
TextSentinel.new("去年十社會警告").entropy.should == 19
|
||||||
|
end
|
||||||
|
|
||||||
|
it "generates enough entropy for short foreign strings" do
|
||||||
|
TextSentinel.new("又一个测").entropy.should == 11
|
||||||
|
end
|
||||||
|
|
||||||
|
it "handles repeated foreign characters" do
|
||||||
|
TextSentinel.new("又一个测试话题" * 3).entropy.should == 18
|
||||||
end
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in New Issue