Merge pull request #939 from novemberkilo/master
Reduce complexity of Post - introduce a PostAnalyzer class
This commit is contained in:
commit
b4544eb027
|
@ -35,6 +35,7 @@ config/discourse.pill
|
||||||
# Ignore all logfiles and tempfiles.
|
# Ignore all logfiles and tempfiles.
|
||||||
/log/*.log
|
/log/*.log
|
||||||
/tmp
|
/tmp
|
||||||
|
/logfile
|
||||||
|
|
||||||
# Ignore Eclipse .project file
|
# Ignore Eclipse .project file
|
||||||
/.project
|
/.project
|
||||||
|
|
|
@ -4,7 +4,7 @@ require_dependency 'rate_limiter'
|
||||||
require_dependency 'post_revisor'
|
require_dependency 'post_revisor'
|
||||||
require_dependency 'enum'
|
require_dependency 'enum'
|
||||||
require_dependency 'trashable'
|
require_dependency 'trashable'
|
||||||
require_dependency 'post_analyser'
|
require_dependency 'post_analyzer'
|
||||||
|
|
||||||
require 'archetype'
|
require 'archetype'
|
||||||
require 'digest/sha1'
|
require 'digest/sha1'
|
||||||
|
@ -12,7 +12,6 @@ require 'digest/sha1'
|
||||||
class Post < ActiveRecord::Base
|
class Post < ActiveRecord::Base
|
||||||
include RateLimiter::OnCreateRecord
|
include RateLimiter::OnCreateRecord
|
||||||
include Trashable
|
include Trashable
|
||||||
include PostAnalyser
|
|
||||||
|
|
||||||
versioned if: :raw_changed?
|
versioned if: :raw_changed?
|
||||||
|
|
||||||
|
@ -90,11 +89,6 @@ class Post < ActiveRecord::Base
|
||||||
Digest::SHA1.hexdigest(raw.gsub(/\s+/, "").downcase)
|
Digest::SHA1.hexdigest(raw.gsub(/\s+/, "").downcase)
|
||||||
end
|
end
|
||||||
|
|
||||||
def cooked_document
|
|
||||||
self.cooked ||= cook(raw, topic_id: topic_id)
|
|
||||||
@cooked_document ||= Nokogiri::HTML.fragment(cooked)
|
|
||||||
end
|
|
||||||
|
|
||||||
def reset_cooked
|
def reset_cooked
|
||||||
@cooked_document = nil
|
@cooked_document = nil
|
||||||
self.cooked = nil
|
self.cooked = nil
|
||||||
|
@ -104,16 +98,18 @@ class Post < ActiveRecord::Base
|
||||||
@white_listed_image_classes ||= ['avatar', 'favicon', 'thumbnail']
|
@white_listed_image_classes ||= ['avatar', 'favicon', 'thumbnail']
|
||||||
end
|
end
|
||||||
|
|
||||||
# How many images are present in the post
|
def post_analyzer
|
||||||
def image_count
|
@post_analyzer = PostAnalyzer.new(raw, topic_id)
|
||||||
return 0 unless raw.present?
|
end
|
||||||
|
|
||||||
cooked_document.search("img").reject do |t|
|
%w{raw_mentions linked_hosts image_count link_count raw_links}.each do |attr|
|
||||||
dom_class = t["class"]
|
define_method(attr) do
|
||||||
if dom_class
|
PostAnalyzer.new(raw, topic_id).send(attr)
|
||||||
(Post.white_listed_image_classes & dom_class.split(" ")).count > 0
|
end
|
||||||
end
|
end
|
||||||
end.count
|
|
||||||
|
def cook(*args)
|
||||||
|
PostAnalyzer.new(raw, topic_id).cook(*args)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
@ -236,20 +232,6 @@ class Post < ActiveRecord::Base
|
||||||
Post.excerpt(cooked, maxlength, options)
|
Post.excerpt(cooked, maxlength, options)
|
||||||
end
|
end
|
||||||
|
|
||||||
# What we use to cook posts
|
|
||||||
def cook(*args)
|
|
||||||
cooked = PrettyText.cook(*args)
|
|
||||||
|
|
||||||
# If we have any of the oneboxes in the cache, throw them in right away, don't
|
|
||||||
# wait for the post processor.
|
|
||||||
dirty = false
|
|
||||||
result = Oneboxer.apply(cooked) do |url, elem|
|
|
||||||
Oneboxer.render_from_cache(url)
|
|
||||||
end
|
|
||||||
|
|
||||||
cooked = result.to_html if result.changed?
|
|
||||||
cooked
|
|
||||||
end
|
|
||||||
|
|
||||||
# A list of versions including the initial version
|
# A list of versions including the initial version
|
||||||
def all_versions
|
def all_versions
|
||||||
|
|
|
@ -0,0 +1,101 @@
|
||||||
|
class PostAnalyzer
|
||||||
|
|
||||||
|
attr_accessor :cooked, :raw
|
||||||
|
|
||||||
|
def initialize(raw, topic_id)
|
||||||
|
@raw = raw
|
||||||
|
@topic_id = topic_id
|
||||||
|
end
|
||||||
|
|
||||||
|
def cooked_document
|
||||||
|
@cooked = cook(@raw, topic_id: @topic_id)
|
||||||
|
@cooked_document = Nokogiri::HTML.fragment(@cooked)
|
||||||
|
end
|
||||||
|
|
||||||
|
# What we use to cook posts
|
||||||
|
def cook(*args)
|
||||||
|
cooked = PrettyText.cook(*args)
|
||||||
|
|
||||||
|
# If we have any of the oneboxes in the cache, throw them in right away, don't
|
||||||
|
# wait for the post processor.
|
||||||
|
dirty = false
|
||||||
|
result = Oneboxer.apply(cooked) do |url, elem|
|
||||||
|
Oneboxer.render_from_cache(url)
|
||||||
|
end
|
||||||
|
|
||||||
|
cooked = result.to_html if result.changed?
|
||||||
|
cooked
|
||||||
|
end
|
||||||
|
|
||||||
|
# How many images are present in the post
|
||||||
|
def image_count
|
||||||
|
return 0 unless @raw.present?
|
||||||
|
|
||||||
|
cooked_document.search("img").reject do |t|
|
||||||
|
dom_class = t["class"]
|
||||||
|
if dom_class
|
||||||
|
(Post.white_listed_image_classes & dom_class.split(" ")).count > 0
|
||||||
|
end
|
||||||
|
end.count
|
||||||
|
end
|
||||||
|
|
||||||
|
def raw_mentions
|
||||||
|
return [] if @raw.blank?
|
||||||
|
|
||||||
|
# We don't count mentions in quotes
|
||||||
|
return @raw_mentions if @raw_mentions.present?
|
||||||
|
raw_stripped = @raw.gsub(/\[quote=(.*)\]([^\[]*?)\[\/quote\]/im, '')
|
||||||
|
|
||||||
|
# Strip pre and code tags
|
||||||
|
doc = Nokogiri::HTML.fragment(raw_stripped)
|
||||||
|
doc.search("pre").remove
|
||||||
|
doc.search("code").remove
|
||||||
|
|
||||||
|
results = doc.to_html.scan(PrettyText.mention_matcher)
|
||||||
|
@raw_mentions = results.uniq.map { |un| un.first.downcase.gsub!(/^@/, '') }
|
||||||
|
end
|
||||||
|
|
||||||
|
# Count how many hosts are linked in the post
|
||||||
|
def linked_hosts
|
||||||
|
return {} if raw_links.blank?
|
||||||
|
|
||||||
|
return @linked_hosts if @linked_hosts.present?
|
||||||
|
|
||||||
|
@linked_hosts = {}
|
||||||
|
raw_links.each do |u|
|
||||||
|
uri = URI.parse(u)
|
||||||
|
host = uri.host
|
||||||
|
@linked_hosts[host] ||= 1
|
||||||
|
end
|
||||||
|
@linked_hosts
|
||||||
|
end
|
||||||
|
|
||||||
|
# Returns an array of all links in a post excluding mentions
|
||||||
|
def raw_links
|
||||||
|
return [] unless @raw.present?
|
||||||
|
|
||||||
|
return @raw_links if @raw_links.present?
|
||||||
|
|
||||||
|
# Don't include @mentions in the link count
|
||||||
|
@raw_links = []
|
||||||
|
cooked_document.search("a[href]").each do |l|
|
||||||
|
next if link_is_a_mention?(l)
|
||||||
|
url = l.attributes['href'].to_s
|
||||||
|
@raw_links << url
|
||||||
|
end
|
||||||
|
@raw_links
|
||||||
|
end
|
||||||
|
|
||||||
|
# How many links are present in the post
|
||||||
|
def link_count
|
||||||
|
raw_links.size
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
def link_is_a_mention?(l)
|
||||||
|
html_class = l.attributes['class']
|
||||||
|
return false if html_class.nil?
|
||||||
|
html_class.to_s == 'mention' && l.attributes['href'].to_s =~ /^\/users\//
|
||||||
|
end
|
||||||
|
end
|
|
@ -0,0 +1,166 @@
|
||||||
|
require 'spec_helper'
|
||||||
|
|
||||||
|
describe PostAnalyzer do
|
||||||
|
|
||||||
|
let(:topic) { Fabricate(:topic) }
|
||||||
|
let(:default_topic_id) { topic.id }
|
||||||
|
let(:post_args) do
|
||||||
|
{user: topic.user, topic: topic}
|
||||||
|
end
|
||||||
|
|
||||||
|
context "links" do
|
||||||
|
let(:raw_no_links) { "hello world my name is evil trout" }
|
||||||
|
let(:raw_one_link_md) { "[jlawr](http://www.imdb.com/name/nm2225369)" }
|
||||||
|
let(:raw_two_links_html) { "<a href='http://disneyland.disney.go.com/'>disney</a> <a href='http://reddit.com'>reddit</a>"}
|
||||||
|
let(:raw_three_links) { "http://discourse.org and http://discourse.org/another_url and http://www.imdb.com/name/nm2225369"}
|
||||||
|
|
||||||
|
describe "raw_links" do
|
||||||
|
it "returns a blank collection for a post with no links" do
|
||||||
|
post_analyzer = PostAnalyzer.new(raw_no_links, default_topic_id)
|
||||||
|
post_analyzer.raw_links.should be_blank
|
||||||
|
end
|
||||||
|
|
||||||
|
it "finds a link within markdown" do
|
||||||
|
post_analyzer = PostAnalyzer.new(raw_one_link_md, default_topic_id)
|
||||||
|
post_analyzer.raw_links.should == ["http://www.imdb.com/name/nm2225369"]
|
||||||
|
end
|
||||||
|
|
||||||
|
it "can find two links from html" do
|
||||||
|
post_analyzer = PostAnalyzer.new(raw_two_links_html, default_topic_id)
|
||||||
|
post_analyzer.raw_links.should == ["http://disneyland.disney.go.com/", "http://reddit.com"]
|
||||||
|
end
|
||||||
|
|
||||||
|
it "can find three links without markup" do
|
||||||
|
post_analyzer = PostAnalyzer.new(raw_three_links, default_topic_id)
|
||||||
|
post_analyzer.raw_links.should == ["http://discourse.org", "http://discourse.org/another_url", "http://www.imdb.com/name/nm2225369"]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
describe "linked_hosts" do
|
||||||
|
it "returns blank with no links" do
|
||||||
|
post_analyzer = PostAnalyzer.new(raw_no_links, default_topic_id)
|
||||||
|
post_analyzer.linked_hosts.should be_blank
|
||||||
|
end
|
||||||
|
|
||||||
|
it "returns the host and a count for links" do
|
||||||
|
post_analyzer = PostAnalyzer.new(raw_two_links_html, default_topic_id)
|
||||||
|
post_analyzer.linked_hosts.should == {"disneyland.disney.go.com" => 1, "reddit.com" => 1}
|
||||||
|
end
|
||||||
|
|
||||||
|
it "it counts properly with more than one link on the same host" do
|
||||||
|
post_analyzer = PostAnalyzer.new(raw_three_links, default_topic_id)
|
||||||
|
post_analyzer.linked_hosts.should == {"discourse.org" => 1, "www.imdb.com" => 1}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
describe "image_count" do
|
||||||
|
let(:raw_post_one_image_md) { "![sherlock](http://bbc.co.uk/sherlock.jpg)" }
|
||||||
|
let(:raw_post_two_images_html) { "<img src='http://discourse.org/logo.png'> <img src='http://bbc.co.uk/sherlock.jpg'>" }
|
||||||
|
let(:raw_post_with_avatars) { '<img alt="smiley" title=":smiley:" src="/assets/emoji/smiley.png" class="avatar"> <img alt="wink" title=":wink:" src="/assets/emoji/wink.png" class="avatar">' }
|
||||||
|
let(:raw_post_with_favicon) { '<img src="/assets/favicons/wikipedia.png" class="favicon">' }
|
||||||
|
let(:raw_post_with_thumbnail) { '<img src="/assets/emoji/smiley.png" class="thumbnail">' }
|
||||||
|
let(:raw_post_with_two_classy_images) { "<img src='http://discourse.org/logo.png' class='classy'> <img src='http://bbc.co.uk/sherlock.jpg' class='classy'>" }
|
||||||
|
|
||||||
|
it "returns 0 images for an empty post" do
|
||||||
|
post_analyzer = PostAnalyzer.new("Hello world", nil)
|
||||||
|
post_analyzer.image_count.should == 0
|
||||||
|
end
|
||||||
|
|
||||||
|
it "finds images from markdown" do
|
||||||
|
post_analyzer = PostAnalyzer.new(raw_post_one_image_md, default_topic_id)
|
||||||
|
post_analyzer.image_count.should == 1
|
||||||
|
end
|
||||||
|
|
||||||
|
it "finds images from HTML" do
|
||||||
|
post_analyzer = PostAnalyzer.new(raw_post_two_images_html, default_topic_id)
|
||||||
|
post_analyzer.image_count.should == 2
|
||||||
|
end
|
||||||
|
|
||||||
|
it "doesn't count avatars as images" do
|
||||||
|
post_analyzer = PostAnalyzer.new(raw_post_with_avatars, default_topic_id)
|
||||||
|
post_analyzer.image_count.should == 0
|
||||||
|
end
|
||||||
|
|
||||||
|
it "doesn't count favicons as images" do
|
||||||
|
post_analyzer = PostAnalyzer.new(raw_post_with_favicon, default_topic_id)
|
||||||
|
post_analyzer.image_count.should == 0
|
||||||
|
end
|
||||||
|
|
||||||
|
it "doesn't count thumbnails as images" do
|
||||||
|
post_analyzer = PostAnalyzer.new(raw_post_with_thumbnail, default_topic_id)
|
||||||
|
post_analyzer.image_count.should == 0
|
||||||
|
end
|
||||||
|
|
||||||
|
it "doesn't count whitelisted images" do
|
||||||
|
Post.stubs(:white_listed_image_classes).returns(["classy"])
|
||||||
|
post_analyzer = PostAnalyzer.new(raw_post_with_two_classy_images, default_topic_id)
|
||||||
|
post_analyzer.image_count.should == 0
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
describe "link_count" do
|
||||||
|
let(:raw_post_one_link_md) { "[sherlock](http://www.bbc.co.uk/programmes/b018ttws)" }
|
||||||
|
let(:raw_post_two_links_html) { "<a href='http://discourse.org'>discourse</a> <a href='http://twitter.com'>twitter</a>" }
|
||||||
|
let(:raw_post_with_mentions) { "hello @novemberkilo how are you doing?" }
|
||||||
|
|
||||||
|
it "returns 0 links for an empty post" do
|
||||||
|
post_analyzer = PostAnalyzer.new("Hello world", nil)
|
||||||
|
post_analyzer.link_count.should == 0
|
||||||
|
end
|
||||||
|
|
||||||
|
it "returns 0 links for a post with mentions" do
|
||||||
|
post_analyzer = PostAnalyzer.new(raw_post_with_mentions, default_topic_id)
|
||||||
|
post_analyzer.link_count.should == 0
|
||||||
|
end
|
||||||
|
|
||||||
|
it "finds links from markdown" do
|
||||||
|
post_analyzer = PostAnalyzer.new(raw_post_one_link_md, default_topic_id)
|
||||||
|
post_analyzer.link_count.should == 1
|
||||||
|
end
|
||||||
|
|
||||||
|
it "finds links from HTML" do
|
||||||
|
post_analyzer = PostAnalyzer.new(raw_post_two_links_html, default_topic_id)
|
||||||
|
post_analyzer.link_count.should == 2
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
describe "raw_mentions" do
|
||||||
|
|
||||||
|
it "returns an empty array with no matches" do
|
||||||
|
post_analyzer = PostAnalyzer.new("Hello Jake and Finn!", default_topic_id)
|
||||||
|
post_analyzer.raw_mentions.should == []
|
||||||
|
end
|
||||||
|
|
||||||
|
it "returns lowercase unique versions of the mentions" do
|
||||||
|
post_analyzer = PostAnalyzer.new("@Jake @Finn @Jake", default_topic_id)
|
||||||
|
post_analyzer.raw_mentions.should == ['jake', 'finn']
|
||||||
|
end
|
||||||
|
|
||||||
|
it "ignores pre" do
|
||||||
|
post_analyzer = PostAnalyzer.new("<pre>@Jake</pre> @Finn", default_topic_id)
|
||||||
|
post_analyzer.raw_mentions.should == ['finn']
|
||||||
|
end
|
||||||
|
|
||||||
|
it "catches content between pre tags" do
|
||||||
|
post_analyzer = PostAnalyzer.new("<pre>hello</pre> @Finn <pre></pre>", default_topic_id)
|
||||||
|
post_analyzer.raw_mentions.should == ['finn']
|
||||||
|
end
|
||||||
|
|
||||||
|
it "ignores code" do
|
||||||
|
post_analyzer = PostAnalyzer.new("@Jake <code>@Finn</code>", default_topic_id)
|
||||||
|
post_analyzer.raw_mentions.should == ['jake']
|
||||||
|
end
|
||||||
|
|
||||||
|
it "ignores quotes" do
|
||||||
|
post_analyzer = PostAnalyzer.new("[quote=\"Evil Trout\"]@Jake[/quote] @Finn", default_topic_id)
|
||||||
|
post_analyzer.raw_mentions.should == ['finn']
|
||||||
|
end
|
||||||
|
|
||||||
|
it "handles underscore in username" do
|
||||||
|
post_analyzer = PostAnalyzer.new("@Jake @Finn @Jake_Old", default_topic_id)
|
||||||
|
post_analyzer.raw_mentions.should == ['jake', 'finn', 'jake_old']
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
Loading…
Reference in New Issue