discourse/app/models/topic_embed.rb

require_dependency 'nokogiri'
require_dependency 'url_helper'

class TopicEmbed < ActiveRecord::Base
  include Trashable

  belongs_to :topic
  belongs_to :post
  validates_presence_of :embed_url
  validates_uniqueness_of :embed_url

  before_validation(on: :create) do
    unless (topic_embed = TopicEmbed.with_deleted.where('deleted_at IS NOT NULL AND embed_url = ?', embed_url).first).nil?
      topic_embed.destroy!
    end
  end

  class FetchResponse
    attr_accessor :title, :body, :author
  end

  def self.normalize_url(url)
    url.downcase.sub(/\/$/, '').sub(/\-+/, '-').strip
  end

  def self.imported_from_html(url)
    "\n<hr>\n<small>#{I18n.t('embed.imported_from', link: "<a href='#{url}'>#{url}</a>")}</small>\n"
  end

  # Import an article from a source (RSS/Atom/Other)
  def self.import(user, url, title, contents)
    return unless url =~ /^https?\:\/\//

    if SiteSetting.embed_truncate
      contents = first_paragraph_from(contents)
    end
    contents ||= ''
    contents << imported_from_html(url)

    url = normalize_url(url)

    embed = TopicEmbed.find_by("lower(embed_url) = ?", url)
    content_sha1 = Digest::SHA1.hexdigest(contents)
    post = nil

    # If there is no embed, create a topic, post and the embed.
    if embed.blank?
      Topic.transaction do
        eh = EmbeddableHost.record_for_url(url)

        cook_method = if SiteSetting.embed_support_markdown
          Post.cook_methods[:regular]
        else
          Post.cook_methods[:raw_html]
        end

        creator = PostCreator.new(user,
                                  title: title,
                                  raw: absolutize_urls(url, contents),
                                  skip_validations: true,
                                  cook_method: cook_method,
                                  category: eh.try(:category_id))
        post = creator.create
        if post.present?
          TopicEmbed.create!(topic_id: post.topic_id,
                             embed_url: url,
                             content_sha1: content_sha1,
                             post_id: post.id)
        end
      end
    else
      absolutize_urls(url, contents)
      post = embed.post

      # Update the topic if it changed
      if post&.topic
        if post.user != user
          PostOwnerChanger.new(
            post_ids: [post.id],
            topic_id: post.topic_id,
            new_owner: user,
            acting_user: Discourse.system_user
          ).change_owner!

          # make sure the post returned has the right author
          post.reload
        end

        if content_sha1 != embed.content_sha1
          post.revise(
            user,
            { raw: absolutize_urls(url, contents) },
            skip_validations: true,
            bypass_rate_limiter: true
          )
          embed.update!(content_sha1: content_sha1)
        end
      end
    end

    post
  end

  def self.find_remote(url)
    require 'ruby-readability'

    url = UrlHelper.escape_uri(url)
    original_uri = URI.parse(url)
    opts = {
      tags: %w[div p code pre h1 h2 h3 b em i strong a img ul li ol blockquote],
      attributes: %w[href src class],
      remove_empty_nodes: false
    }

    opts[:whitelist] = SiteSetting.embed_whitelist_selector if SiteSetting.embed_whitelist_selector.present?
    opts[:blacklist] = SiteSetting.embed_blacklist_selector if SiteSetting.embed_blacklist_selector.present?
    embed_classname_whitelist = SiteSetting.embed_classname_whitelist if SiteSetting.embed_classname_whitelist.present?

    response = FetchResponse.new
    begin
      html = open(url, allow_redirections: :safe).read
    rescue OpenURI::HTTPError, Net::OpenTimeout
      return
    end

    raw_doc = Nokogiri::HTML(html)
    auth_element = raw_doc.at('meta[@name="author"]')
    if auth_element.present?
      response.author = User.where(username_lower: auth_element[:content].strip).first
    end

    read_doc = Readability::Document.new(html, opts)

    title = raw_doc.title || ''
    title.strip!

    if SiteSetting.embed_title_scrubber.present?
      title.sub!(Regexp.new(SiteSetting.embed_title_scrubber), '')
      title.strip!
    end
    response.title = title
    doc = Nokogiri::HTML(read_doc.content)

    tags = { 'img' => 'src', 'script' => 'src', 'a' => 'href' }
    doc.search(tags.keys.join(',')).each do |node|
      url_param = tags[node.name]
      src = node[url_param]
      unless (src.nil? || src.empty?)
        begin
          uri = URI.parse(UrlHelper.escape_uri(src))
          unless uri.host
            uri.scheme = original_uri.scheme
            uri.host = original_uri.host
            node[url_param] = uri.to_s
          end
        rescue URI::Error
          # If there is a mistyped URL, just do nothing
        end
      end
      # only allow classes in the whitelist
      allowed_classes = if embed_classname_whitelist.blank? then [] else embed_classname_whitelist.split(/[ ,]+/i) end
      doc.search('[class]:not([class=""])').each do |classnode|
        classes = classnode[:class].split(' ').select { |classname| allowed_classes.include?(classname) }
        if classes.length === 0
          classnode.delete('class')
        else
          classnode[:class] = classes.join(' ')
        end
      end
    end

    response.body = doc.to_html
    response
  end

  def self.import_remote(import_user, url, opts = nil)
    opts = opts || {}
    response = find_remote(url)
    return if response.nil?

    response.title = opts[:title] if opts[:title].present?
    import_user = response.author if response.author.present?

    TopicEmbed.import(import_user, url, response.title, response.body)
  end

  # Convert any relative URLs to absolute. RSS is annoying for this.
  def self.absolutize_urls(url, contents)
    url = normalize_url(url)
    uri = URI(UrlHelper.escape_uri(url))
    prefix = "#{uri.scheme}://#{uri.host}"
    prefix << ":#{uri.port}" if uri.port != 80 && uri.port != 443

    fragment = Nokogiri::HTML.fragment("<div>#{contents}</div>")
    fragment.css('a').each do |a|
      href = a['href']
      if href.present? && href.start_with?('/')
        a['href'] = "#{prefix}/#{href.sub(/^\/+/, '')}"
      end
    end
    fragment.css('img').each do |a|
      src = a['src']
      if src.present? && src.start_with?('/')
        a['src'] = "#{prefix}/#{src.sub(/^\/+/, '')}"
      end
    end
    fragment.at('div').inner_html
  end

  def self.topic_id_for_embed(embed_url)
    embed_url = normalize_url(embed_url).sub(/^https?\:\/\//, '')
    TopicEmbed.where("embed_url ~* ?", "^https?://#{Regexp.escape(embed_url)}$").pluck(:topic_id).first
  end

  def self.first_paragraph_from(html)
    doc = Nokogiri::HTML(html)

    result = ""
    doc.css('p').each do |p|
      if p.text.present?
        result << p.to_s
        return result if result.size >= 100
      end
    end
    return result unless result.blank?

    # If there is no first paragaph, return the first div (onebox)
    doc.css('div').first
  end

  def self.expanded_for(post)
    Rails.cache.fetch("embed-topic:#{post.topic_id}", expires_in: 10.minutes) do
      url = TopicEmbed.where(topic_id: post.topic_id).pluck(:embed_url).first
      response = TopicEmbed.find_remote(url)

      body = response.body
      body << TopicEmbed.imported_from_html(url)
      body
    end
  end

end

# == Schema Information
#
# Table name: topic_embeds
#
#  id            :integer          not null, primary key
#  topic_id      :integer          not null
#  post_id       :integer          not null
#  embed_url     :string(1000)     not null
#  content_sha1  :string(40)
#  created_at    :datetime         not null
#  updated_at    :datetime         not null
#  deleted_at    :datetime
#  deleted_by_id :integer
#
# Indexes
#
#  index_topic_embeds_on_embed_url  (embed_url) UNIQUE
#
FEATURE: Embeddable Discourse comments, now with simple-rss instead of feedzirra 2013-12-31 14:37:43 -05:00			`require_dependency 'nokogiri'`
Move escape_uri method to a more suitable place 2017-12-12 11:50:39 -05:00			`require_dependency 'url_helper'`
FEATURE: Embeddable Discourse comments, now with simple-rss instead of feedzirra 2013-12-31 14:37:43 -05:00
			`class TopicEmbed < ActiveRecord::Base`
FIX: make TopicEmbed trashable 2017-04-24 14:29:04 -04:00			`include Trashable`

FEATURE: Embeddable Discourse comments, now with simple-rss instead of feedzirra 2013-12-31 14:37:43 -05:00			`belongs_to :topic`
			`belongs_to :post`
			`validates_presence_of :embed_url`
FIX: `PG::UniqueViolation` when trying to use the same embed code Previously providing an embed code already in use would result in a logged server error. After this commit the error is gracefully bubbled up from the `PostCreator` 2015-06-15 12:08:55 -04:00			`validates_uniqueness_of :embed_url`
FEATURE: Embeddable Discourse comments, now with simple-rss instead of feedzirra 2013-12-31 14:37:43 -05:00
FIX: make TopicEmbed trashable 2017-04-24 14:29:04 -04:00			`before_validation(on: :create) do`
			`unless (topic_embed = TopicEmbed.with_deleted.where('deleted_at IS NOT NULL AND embed_url = ?', embed_url).first).nil?`
			`topic_embed.destroy!`
			`end`
			`end`

FEATURE: Support author meta tags for embedding 2016-08-30 12:01:04 -04:00			`class FetchResponse`
			`attr_accessor :title, :body, :author`
			`end`

Consider URLs the same even with a trailing slash 2014-03-19 16:33:21 -04:00			`def self.normalize_url(url)`
FIX: Strips spaces from the end of URLs 2014-04-03 15:35:31 -04:00			`url.downcase.sub(/\/$/, '').sub(/\-+/, '-').strip`
Consider URLs the same even with a trailing slash 2014-03-19 16:33:21 -04:00			`end`

Minor tweaks to importing first posts 2014-04-02 15:54:21 -04:00			`def self.imported_from_html(url)`
			`"\n<hr>\n<small>#{I18n.t('embed.imported_from', link: "<a href='#{url}'>#{url}</a>")}</small>\n"`
			`end`

FEATURE: Embeddable Discourse comments, now with simple-rss instead of feedzirra 2013-12-31 14:37:43 -05:00			`# Import an article from a source (RSS/Atom/Other)`
			`def self.import(user, url, title, contents)`
			`return unless url =~ /^https?\:\/\//`

Updated import for TypePad 2014-03-18 18:02:33 -04:00			`if SiteSetting.embed_truncate`
			`contents = first_paragraph_from(contents)`
			`end`
FIX: Sometimes `contents` is nil, don't break in that case. 2015-05-06 12:40:24 -04:00			`contents \|\|= ''`
Minor tweaks to importing first posts 2014-04-02 15:54:21 -04:00			`contents << imported_from_html(url)`
FEATURE: Embeddable Discourse comments, now with simple-rss instead of feedzirra 2013-12-31 14:37:43 -05:00
Normalize URL from Feed Entry after adding link to original in Topic Since a URL might be case sensitive, adding a link to the original Feed Entry with changed case to the Topic could end in 404. 2014-03-26 23:24:57 -04:00			`url = normalize_url(url)`

Perform the where(...).first to find_by(...) refactoring. This refactoring was automated using the command: bundle exec "ruby refactorings/where_dot_first_to_find_by/app.rb" 2014-05-06 09:41:59 -04:00			`embed = TopicEmbed.find_by("lower(embed_url) = ?", url)`
FEATURE: Embeddable Discourse comments, now with simple-rss instead of feedzirra 2013-12-31 14:37:43 -05:00			`content_sha1 = Digest::SHA1.hexdigest(contents)`
			`post = nil`

			`# If there is no embed, create a topic, post and the embed.`
			`if embed.blank?`
			`Topic.transaction do`
FEATURE: Support for a whitelist for embeddable host paths 2016-08-23 14:55:52 -04:00			`eh = EmbeddableHost.record_for_url(url)`
FEATURE: Can edit category/host relationships for embedding 2015-08-18 17:15:46 -04:00
FEATURE: support markdown rendering for embedded posts 2018-03-10 21:26:47 -05:00			`cook_method = if SiteSetting.embed_support_markdown`
			`Post.cook_methods[:regular]`
			`else`
			`Post.cook_methods[:raw_html]`
			`end`

Add site setting for category of topics created by embedding 2014-01-24 00:25:48 -05:00			`creator = PostCreator.new(user,`
			`title: title,`
			`raw: absolutize_urls(url, contents),`
			`skip_validations: true,`
FEATURE: support markdown rendering for embedded posts 2018-03-10 21:26:47 -05:00			`cook_method: cook_method,`
FEATURE: Can edit category/host relationships for embedding 2015-08-18 17:15:46 -04:00			`category: eh.try(:category_id))`
FEATURE: Embeddable Discourse comments, now with simple-rss instead of feedzirra 2013-12-31 14:37:43 -05:00			`post = creator.create`
			`if post.present?`
			`TopicEmbed.create!(topic_id: post.topic_id,`
			`embed_url: url,`
			`content_sha1: content_sha1,`
			`post_id: post.id)`
			`end`
			`end`
			`else`
Updated import for TypePad 2014-03-18 18:02:33 -04:00			`absolutize_urls(url, contents)`
FEATURE: Embeddable Discourse comments, now with simple-rss instead of feedzirra 2013-12-31 14:37:43 -05:00			`post = embed.post`
FIX: update TopicEmbed's title and user correctly 2018-08-21 06:19:03 -04:00
FEATURE: Embeddable Discourse comments, now with simple-rss instead of feedzirra 2013-12-31 14:37:43 -05:00			`# Update the topic if it changed`
FIX: update TopicEmbed's title and user correctly 2018-08-21 06:19:03 -04:00			`if post&.topic`
			`if post.user != user`
			`PostOwnerChanger.new(`
			`post_ids: [post.id],`
			`topic_id: post.topic_id,`
			`new_owner: user,`
			`acting_user: Discourse.system_user`
			`).change_owner!`

			`# make sure the post returned has the right author`
			`post.reload`
			`end`

DEV: Update test case for `TopicEmbed`. 2018-08-23 21:41:54 -04:00			`if content_sha1 != embed.content_sha1`
FIX: drop title updates through RSS feeds can create an update loop 2018-08-28 02:25:04 -04:00			`post.revise(`
			`user,`
			`{ raw: absolutize_urls(url, contents) },`
DEV: Update test case for `TopicEmbed`. 2018-08-23 21:41:54 -04:00			`skip_validations: true,`
			`bypass_rate_limiter: true`
			`)`
FIX: drop title updates through RSS feeds can create an update loop 2018-08-28 02:25:04 -04:00			`embed.update!(content_sha1: content_sha1)`
DEV: Update test case for `TopicEmbed`. 2018-08-23 21:41:54 -04:00			`end`
FEATURE: Embeddable Discourse comments, now with simple-rss instead of feedzirra 2013-12-31 14:37:43 -05:00			`end`
			`end`

			`post`
			`end`

Remote fetching of blog contents 2014-04-01 18:16:56 -04:00			`def self.find_remote(url)`
FEATURE: Embeddable Discourse comments, now with simple-rss instead of feedzirra 2013-12-31 14:37:43 -05:00			`require 'ruby-readability'`

Move escape_uri method to a more suitable place 2017-12-12 11:50:39 -05:00			`url = UrlHelper.escape_uri(url)`
FIX: do not escape already escaped chars in URL 2017-09-22 11:36:44 -04:00			`original_uri = URI.parse(url)`
FEATURE: whitelist and blacklist selectors for embeds 2014-04-15 00:06:51 -04:00			`opts = {`
			`tags: %w[div p code pre h1 h2 h3 b em i strong a img ul li ol blockquote],`
FIX: allow emoji class when crawling embedded content, add rspc-html-matchers 2015-09-24 18:20:59 -04:00			`attributes: %w[href src class],`
FEATURE: whitelist and blacklist selectors for embeds 2014-04-15 00:06:51 -04:00			`remove_empty_nodes: false`
			`}`

			`opts[:whitelist] = SiteSetting.embed_whitelist_selector if SiteSetting.embed_whitelist_selector.present?`
			`opts[:blacklist] = SiteSetting.embed_blacklist_selector if SiteSetting.embed_blacklist_selector.present?`
FIX: allow emoji class when crawling embedded content, add rspc-html-matchers 2015-09-24 18:20:59 -04:00			`embed_classname_whitelist = SiteSetting.embed_classname_whitelist if SiteSetting.embed_classname_whitelist.present?`
FEATURE: whitelist and blacklist selectors for embeds 2014-04-15 00:06:51 -04:00
FEATURE: Support author meta tags for embedding 2016-08-30 12:01:04 -04:00			`response = FetchResponse.new`
FIX: If the error is 401 unauthorized or such, just do nothing 2017-05-25 15:42:05 -04:00			`begin`
FIX: do not escape already escaped chars in URL 2017-09-22 11:36:44 -04:00			`html = open(url, allow_redirections: :safe).read`
FIX: Don't raise an error if URL timesout. 2017-08-09 23:58:56 -04:00			`rescue OpenURI::HTTPError, Net::OpenTimeout`
FIX: If the error is 401 unauthorized or such, just do nothing 2017-05-25 15:42:05 -04:00			`return`
			`end`
Minor tweaks to importing first posts 2014-04-02 15:54:21 -04:00
FEATURE: Support author meta tags for embedding 2016-08-30 12:01:04 -04:00			`raw_doc = Nokogiri::HTML(html)`
			`auth_element = raw_doc.at('meta[@name="author"]')`
			`if auth_element.present?`
			`response.author = User.where(username_lower: auth_element[:content].strip).first`
			`end`

			`read_doc = Readability::Document.new(html, opts)`

			`title = raw_doc.title \|\| ''`
FEATURE: Ability to scrub titles when importing embeddable content 2016-08-22 12:43:02 -04:00			`title.strip!`

			`if SiteSetting.embed_title_scrubber.present?`
			`title.sub!(Regexp.new(SiteSetting.embed_title_scrubber), '')`
			`title.strip!`
			`end`
FEATURE: Support author meta tags for embedding 2016-08-30 12:01:04 -04:00			`response.title = title`
			`doc = Nokogiri::HTML(read_doc.content)`
FEATURE: Ability to scrub titles when importing embeddable content 2016-08-22 12:43:02 -04:00
Add rubocop to our build. (#5004) 2017-07-27 21:20:09 -04:00			`tags = { 'img' => 'src', 'script' => 'src', 'a' => 'href' }`
Minor tweaks to importing first posts 2014-04-02 15:54:21 -04:00			`doc.search(tags.keys.join(',')).each do \|node\|`
			`url_param = tags[node.name]`
			`src = node[url_param]`
Fix `NoMethodError` in TopicEmbed#find_remote Stop TopicEmbed#find_remote from generating `NoMethodError: undefined method `empty?' for nil:NilClass` exceptions 2015-04-22 19:52:02 -04:00			`unless (src.nil? \|\| src.empty?)`
FIX: If there is an invalid URI in the import, don't throw an error 2014-04-09 11:04:45 -04:00			`begin`
Move escape_uri method to a more suitable place 2017-12-12 11:50:39 -05:00			`uri = URI.parse(UrlHelper.escape_uri(src))`
FIX: If there is an invalid URI in the import, don't throw an error 2014-04-09 11:04:45 -04:00			`unless uri.host`
			`uri.scheme = original_uri.scheme`
			`uri.host = original_uri.host`
			`node[url_param] = uri.to_s`
			`end`
FIX: store the topic links using the cooked upload url 2018-08-14 06:23:32 -04:00			`rescue URI::Error`
FIX: If there is an invalid URI in the import, don't throw an error 2014-04-09 11:04:45 -04:00			`# If there is a mistyped URL, just do nothing`
Minor tweaks to importing first posts 2014-04-02 15:54:21 -04:00			`end`
			`end`
FIX: allow emoji class when crawling embedded content, add rspc-html-matchers 2015-09-24 18:20:59 -04:00			`# only allow classes in the whitelist`
			`allowed_classes = if embed_classname_whitelist.blank? then [] else embed_classname_whitelist.split(/[ ,]+/i) end`
FIX: Don't normalize URLs before fetching them, only for saving them 2015-11-06 16:25:11 -05:00			`doc.search('[class]:not([class=""])').each do \|classnode\|`
Add rubocop to our build. (#5004) 2017-07-27 21:20:09 -04:00			`classes = classnode[:class].split(' ').select { \|classname\| allowed_classes.include?(classname) }`
FIX: allow emoji class when crawling embedded content, add rspc-html-matchers 2015-09-24 18:20:59 -04:00			`if classes.length === 0`
FIX: Don't normalize URLs before fetching them, only for saving them 2015-11-06 16:25:11 -05:00			`classnode.delete('class')`
FIX: allow emoji class when crawling embedded content, add rspc-html-matchers 2015-09-24 18:20:59 -04:00			`else`
FIX: Don't normalize URLs before fetching them, only for saving them 2015-11-06 16:25:11 -05:00			`classnode[:class] = classes.join(' ')`
FIX: allow emoji class when crawling embedded content, add rspc-html-matchers 2015-09-24 18:20:59 -04:00			`end`
			`end`
Minor tweaks to importing first posts 2014-04-02 15:54:21 -04:00			`end`

FEATURE: Support author meta tags for embedding 2016-08-30 12:01:04 -04:00			`response.body = doc.to_html`
			`response`
Remote fetching of blog contents 2014-04-01 18:16:56 -04:00			`end`
FEATURE: Embeddable Discourse comments, now with simple-rss instead of feedzirra 2013-12-31 14:37:43 -05:00
Add rubocop to our build. (#5004) 2017-07-27 21:20:09 -04:00			`def self.import_remote(import_user, url, opts = nil)`
Remote fetching of blog contents 2014-04-01 18:16:56 -04:00			`opts = opts \|\| {}`
FEATURE: Support author meta tags for embedding 2016-08-30 12:01:04 -04:00			`response = find_remote(url)`
FIX: do not escape already escaped chars in URL 2017-09-22 11:36:44 -04:00			`return if response.nil?`

FEATURE: Support author meta tags for embedding 2016-08-30 12:01:04 -04:00			`response.title = opts[:title] if opts[:title].present?`
			`import_user = response.author if response.author.present?`

			`TopicEmbed.import(import_user, url, response.title, response.body)`
FEATURE: Embeddable Discourse comments, now with simple-rss instead of feedzirra 2013-12-31 14:37:43 -05:00			`end`

			`# Convert any relative URLs to absolute. RSS is annoying for this.`
			`def self.absolutize_urls(url, contents)`
Consider URLs the same even with a trailing slash 2014-03-19 16:33:21 -04:00			`url = normalize_url(url)`
Move escape_uri method to a more suitable place 2017-12-12 11:50:39 -05:00			`uri = URI(UrlHelper.escape_uri(url))`
FEATURE: Embeddable Discourse comments, now with simple-rss instead of feedzirra 2013-12-31 14:37:43 -05:00			`prefix = "#{uri.scheme}://#{uri.host}"`
			`prefix << ":#{uri.port}" if uri.port != 80 && uri.port != 443`

Updated import for TypePad 2014-03-18 18:02:33 -04:00			`fragment = Nokogiri::HTML.fragment("<div>#{contents}</div>")`
FEATURE: Embeddable Discourse comments, now with simple-rss instead of feedzirra 2013-12-31 14:37:43 -05:00			`fragment.css('a').each do \|a\|`
			`href = a['href']`
			`if href.present? && href.start_with?('/')`
			`a['href'] = "#{prefix}/#{href.sub(/^\/+/, '')}"`
			`end`
			`end`
			`fragment.css('img').each do \|a\|`
			`src = a['src']`
			`if src.present? && src.start_with?('/')`
			`a['src'] = "#{prefix}/#{src.sub(/^\/+/, '')}"`
			`end`
			`end`
Updated import for TypePad 2014-03-18 18:02:33 -04:00			`fragment.at('div').inner_html`
FEATURE: Embeddable Discourse comments, now with simple-rss instead of feedzirra 2013-12-31 14:37:43 -05:00			`end`

			`def self.topic_id_for_embed(embed_url)`
FIX: do not create duplicate topics https://meta.discourse.org/t/duplicate-http-https-topics-are-randomly-created/77190 2018-01-04 13:13:17 -05:00			`embed_url = normalize_url(embed_url).sub(/^https?\:\/\//, '')`
FIX: properly escape embed url 2019-01-06 17:12:02 -05:00			`TopicEmbed.where("embed_url ~* ?", "^https?://#{Regexp.escape(embed_url)}$").pluck(:topic_id).first`
FEATURE: Embeddable Discourse comments, now with simple-rss instead of feedzirra 2013-12-31 14:37:43 -05:00			`end`

Updated import for TypePad 2014-03-18 18:02:33 -04:00			`def self.first_paragraph_from(html)`
			`doc = Nokogiri::HTML(html)`

			`result = ""`
			`doc.css('p').each do \|p\|`
			`if p.text.present?`
			`result << p.to_s`
			`return result if result.size >= 100`
			`end`
			`end`
			`return result unless result.blank?`

			`# If there is no first paragaph, return the first div (onebox)`
			`doc.css('div').first`
			`end`
FIX: Allow expanding posts when anonymous, add specs 2014-04-03 11:30:43 -04:00
			`def self.expanded_for(post)`
			`Rails.cache.fetch("embed-topic:#{post.topic_id}", expires_in: 10.minutes) do`
			`url = TopicEmbed.where(topic_id: post.topic_id).pluck(:embed_url).first`
FEATURE: Support author meta tags for embedding 2016-08-30 12:01:04 -04:00			`response = TopicEmbed.find_remote(url)`

			`body = response.body`
FIX: Allow expanding posts when anonymous, add specs 2014-04-03 11:30:43 -04:00			`body << TopicEmbed.imported_from_html(url)`
			`body`
			`end`
			`end`

FEATURE: Embeddable Discourse comments, now with simple-rss instead of feedzirra 2013-12-31 14:37:43 -05:00			`end`
Update annotations 2014-02-06 19:07:36 -05:00
			`# == Schema Information`
			`#`
			`# Table name: topic_embeds`
			`#`
FIX: make TopicEmbed trashable 2017-04-24 14:29:04 -04:00			`# id :integer not null, primary key`
			`# topic_id :integer not null`
			`# post_id :integer not null`
			`# embed_url :string(1000) not null`
			`# content_sha1 :string(40)`
			`# created_at :datetime not null`
			`# updated_at :datetime not null`
			`# deleted_at :datetime`
			`# deleted_by_id :integer`
Update annotations 2014-02-06 19:07:36 -05:00			`#`
			`# Indexes`
			`#`
			`# index_topic_embeds_on_embed_url (embed_url) UNIQUE`
			`#`