discourse/app/models/post_analyzer.rb

123 lines
3.2 KiB
Ruby
Raw Normal View History

require_dependency 'oneboxer'
class PostAnalyzer
def initialize(raw, topic_id)
@raw = raw
@topic_id = topic_id
end
# What we use to cook posts
def cook(*args)
cooked = PrettyText.cook(*args)
2015-09-25 06:14:38 -04:00
result = Oneboxer.apply(cooked, topic_id: @topic_id) do |url, _|
Oneboxer.invalidate(url) if args.last[:invalidate_oneboxes]
Oneboxer.cached_onebox url
end
cooked = result.to_html if result.changed?
cooked
end
# How many images are present in the post
def image_count
return 0 unless @raw.present?
cooked_document.search("img").reject do |t|
dom_class = t["class"]
if dom_class
(Post.white_listed_image_classes & dom_class.split(" ")).count > 0
end
end.count
end
2013-07-21 20:39:17 -04:00
# How many attachments are present in the post
def attachment_count
return 0 unless @raw.present?
2013-10-17 12:44:09 -04:00
attachments = cooked_document.css("a.attachment[href^=\"#{Discourse.store.absolute_base_url}\"]")
attachments += cooked_document.css("a.attachment[href^=\"#{Discourse.store.relative_base_url}\"]") if Discourse.store.internal?
attachments.count
2013-07-21 20:39:17 -04:00
end
def raw_mentions
return [] if @raw.blank?
return @raw_mentions if @raw_mentions.present?
2013-07-30 18:01:42 -04:00
2015-06-24 05:44:58 -04:00
# strip quotes, code blocks and oneboxes
2013-10-17 12:44:09 -04:00
cooked_stripped = cooked_document
cooked_stripped.css("aside.quote").remove
cooked_stripped.css("pre").remove
cooked_stripped.css("code").remove
2015-06-24 05:44:58 -04:00
cooked_stripped.css(".onebox").remove
@raw_mentions = cooked_stripped.to_html
.scan(PrettyText.mention_matcher)
.flatten
.map(&:downcase)
.uniq
end
# from rack ... compat with ruby 2.2
def self.parse_uri_rfc2396(uri)
@parser ||= defined?(URI::RFC2396_Parser) ? URI::RFC2396_Parser.new : URI
@parser.parse(uri)
end
# Count how many hosts are linked in the post
def linked_hosts
return {} if raw_links.blank?
return @linked_hosts if @linked_hosts.present?
@linked_hosts = {}
2013-10-17 12:44:09 -04:00
raw_links.each do |u|
2013-07-15 06:11:23 -04:00
begin
uri = self.class.parse_uri_rfc2396(u)
2013-07-15 06:11:23 -04:00
host = uri.host
@linked_hosts[host] ||= 1 unless host.nil?
2013-07-15 06:11:23 -04:00
rescue URI::InvalidURIError
# An invalid URI does not count as a raw link.
next
end
end
2013-10-17 12:44:09 -04:00
@linked_hosts
end
# Returns an array of all links in a post excluding mentions
def raw_links
return [] unless @raw.present?
return @raw_links if @raw_links.present?
@raw_links = []
2013-10-17 12:44:09 -04:00
cooked_document.search("a").each do |l|
# Don't include @mentions in the link count
next if l.attributes['href'].nil? || link_is_a_mention?(l)
url = l.attributes['href'].to_s
@raw_links << url
end
2013-10-17 12:44:09 -04:00
@raw_links
end
# How many links are present in the post
def link_count
raw_links.size
end
private
def cooked_document
@cooked_document ||= Nokogiri::HTML.fragment(cook(@raw, topic_id: @topic_id))
end
def link_is_a_mention?(l)
html_class = l.attributes['class']
return false if html_class.nil?
html_class.to_s == 'mention' && l.attributes['href'].to_s =~ /^\/users\//
end
end