discourse/lib/cooked_post_processor.rb

# Post processing that we can do after a post has already been cooked.
# For example, inserting the onebox content, or image sizes/thumbnails.

require_dependency "oneboxer"

class CookedPostProcessor
  include ActionView::Helpers::NumberHelper

  def initialize(post, opts={})
    @dirty = false
    @opts = opts
    @post = post
    @doc = Nokogiri::HTML::fragment(post.cooked)
    @size_cache = {}
  end

  def post_process
    keep_reverse_index_up_to_date
    post_process_images
    post_process_oneboxes
    optimize_urls
    pull_hotlinked_images
  end

  def keep_reverse_index_up_to_date
    upload_ids = Set.new

    @doc.search("a").each do |a|
      href = a["href"].to_s
      if upload = Upload.get_from_url(href)
        upload_ids << upload.id
      end
    end

    @doc.search("img").each do |img|
      src = img["src"].to_s
      if upload = Upload.get_from_url(src)
        upload_ids << upload.id
      end
    end

    values = upload_ids.map{ |u| "(#{@post.id},#{u})" }.join(",")
    PostUpload.transaction do
      PostUpload.delete_all(post_id: @post.id)
      if upload_ids.length > 0
        PostUpload.exec_sql("INSERT INTO post_uploads (post_id, upload_id) VALUES #{values}")
      end
    end
  end

  def post_process_images
    images = extract_images
    return if images.blank?

    images.each do |img|
      src, width, height = img["src"], img["width"], img["height"]
      limit_size!(img)
      convert_to_link!(img)
      @dirty |= (src != img["src"]) || (width.to_i != img["width"].to_i) || (height.to_i != img["height"].to_i)
    end

    update_topic_image(images)
  end

  def extract_images
    # do not extract images inside oneboxes or quotes
    @doc.css("img") - @doc.css(".onebox-result img") - @doc.css(".quote img")
  end

  def limit_size!(img)
    w, h = get_size_from_image_sizes(img["src"], @opts[:image_sizes]) || get_size(img["src"])
    # limit the size of the thumbnail
    img["width"], img["height"] = ImageSizer.resize(w, h)
  end

  def get_size_from_image_sizes(src, image_sizes)
    return unless image_sizes.present?
    image_sizes.each do |image_size|
      url, size = image_size[0], image_size[1]
      return [size["width"], size["height"]] if url.include?(src)
    end
  end

  def get_size(url)
    absolute_url = url
    absolute_url = Discourse.base_url_no_prefix + absolute_url if absolute_url =~ /^\/[^\/]/
    # FastImage fails when there's no scheme
    absolute_url = (SiteSetting.use_ssl? ? "https:" : "http:") + absolute_url if absolute_url.start_with?("//")
    return unless is_valid_image_url?(absolute_url)
    # we can *always* crawl our own images
    return unless SiteSetting.crawl_images? || Discourse.store.has_been_uploaded?(url)
    @size_cache[url] ||= FastImage.size(absolute_url)
  rescue Zlib::BufError # FastImage.size raises BufError for some gifs
  end

  def is_valid_image_url?(url)
    uri = URI.parse(url)
    %w(http https).include? uri.scheme
  rescue URI::InvalidURIError
  end

  def convert_to_link!(img)
    src = img["src"]
    return unless src.present?

    width, height = img["width"].to_i, img["height"].to_i
    original_width, original_height = get_size(src)

    return if original_width.to_i <= width && original_height.to_i <= height
    return if original_width.to_i <= SiteSetting.max_image_width && original_height.to_i <= SiteSetting.max_image_height

    return if is_a_hyperlink?(img)

    if upload = Upload.get_from_url(src)
      upload.create_thumbnail!(width, height)
      # TODO: optimize_image!(img)
    end

    add_lightbox!(img, original_width, original_height, upload)

    @dirty = true
  end

  def is_a_hyperlink?(img)
    parent = img.parent
    while parent
      return if parent.name == "a"
      break unless parent.respond_to? :parent
      parent = parent.parent
    end
  end

  def add_lightbox!(img, original_width, original_height, upload=nil)
    # first, create a div to hold our lightbox
    lightbox = Nokogiri::XML::Node.new("div", @doc)
    img.add_next_sibling(lightbox)
    lightbox.add_child(img)

    # then, the link to our larger image
    a = Nokogiri::XML::Node.new("a", @doc)
    img.add_next_sibling(a)
    a["href"] = img["src"]
    a["class"] = "lightbox"
    a.add_child(img)

    # replace the image by its thumbnail
    w, h = img["width"].to_i, img["height"].to_i
    img["src"] = upload.thumbnail(w, h).url if upload && upload.has_thumbnail?(w, h)

    # then, some overlay informations
    meta = Nokogiri::XML::Node.new("div", @doc)
    meta["class"] = "meta"
    img.add_next_sibling(meta)

    filename = get_filename(upload, img["src"])
    informations = "#{original_width}x#{original_height}"
    informations << " #{number_to_human_size(upload.filesize)}" if upload

    meta.add_child create_span_node("filename", filename)
    meta.add_child create_span_node("informations", informations)
    meta.add_child create_span_node("expand")
  end

  def get_filename(upload, src)
    return File.basename(src) unless upload
    return upload.original_filename unless upload.original_filename =~ /^blob(\.png)?$/i
    return I18n.t("upload.pasted_image_filename")
  end

  def create_span_node(klass, content=nil)
    span = Nokogiri::XML::Node.new("span", @doc)
    span.content = content if content
    span["class"] = klass
    span
  end

  def update_topic_image(images)
    if @post.post_number == 1
      img = images.first
      @post.topic.update_column(:image_url, img["src"]) if img["src"].present?
    end
  end

  def post_process_oneboxes
    args = {
      post_id: @post.id,
      invalidate_oneboxes: !!@opts[:invalidate_oneboxes],
    }

    result = Oneboxer.apply(@doc) do |url, element|
      Oneboxer.onebox(url, args)
    end

    @dirty |= result.changed?
  end

  def optimize_urls
    @doc.search("a").each do |a|
      href = a["href"].to_s
      if Discourse.store.has_been_uploaded?(href)
        a["href"] = schemaless relative_to_absolute(href)
      end
    end

    @doc.search("img").each do |img|
      src = img["src"].to_s
      if Discourse.store.has_been_uploaded?(src)
        img["src"] = schemaless relative_to_absolute(src)
      end
    end
  end

  def relative_to_absolute(url)
    url =~ /^\/[^\/]/ ? (Discourse.asset_host || Discourse.base_url_no_prefix) + url : url
  end

  def schemaless(url)
    url.gsub(/^https?:/, "")
  end

  def pull_hotlinked_images
    # we don't want to run the job if we're not allowed to crawl images
    return unless SiteSetting.crawl_images?
    # we only want to run the job whenever it's changed by a user
    return if @post.updated_by == Discourse.system_user
    # make sure no other job is scheduled
    Jobs.cancel_scheduled_job(:pull_hotlinked_images, post_id: @post.id)
    # schedule the job
    delay = SiteSetting.ninja_edit_window + 1
    Jobs.enqueue_in(delay.seconds.to_i, :pull_hotlinked_images, post_id: @post.id)
  end

  def dirty?
    @dirty
  end

  def html
    @doc.try(:to_html)
  end

end
removed auto_link_images_wider_than setting 2013-07-06 13:10:53 -04:00			`# Post processing that we can do after a post has already been cooked.`
refactor CookedPostProcessor & specs 2013-07-07 19:39:08 -04:00			`# For example, inserting the onebox content, or image sizes/thumbnails.`
Initial release of Discourse 2013-02-05 14:16:51 -05:00
pull hotlinked images 2013-11-05 13:04:47 -05:00			`require_dependency "oneboxer"`
Initial release of Discourse 2013-02-05 14:16:51 -05:00
			`class CookedPostProcessor`
add lightbox overlay 2013-06-21 12:29:40 -04:00			`include ActionView::Helpers::NumberHelper`
basic lightbox support 2013-02-19 01:57:14 -05:00
Initial release of Discourse 2013-02-05 14:16:51 -05:00			`def initialize(post, opts={})`
			`@dirty = false`
			`@opts = opts`
			`@post = post`
clean up onebox application so it uses a single code path use fragments for oneboxes strip parent <p> if <div> is in it clean some tests 2013-04-10 03:52:38 -04:00			`@doc = Nokogiri::HTML::fragment(post.cooked)`
basic lightbox support 2013-02-19 01:57:14 -05:00			`@size_cache = {}`
Initial release of Discourse 2013-02-05 14:16:51 -05:00			`end`

cleaned up CookedPostProcessor and improved specs 2013-06-15 06:29:20 -04:00			`def post_process`
pull hotlinked images 2013-11-05 13:04:47 -05:00			`keep_reverse_index_up_to_date`
cleaned up CookedPostProcessor and improved specs 2013-06-15 06:29:20 -04:00			`post_process_images`
			`post_process_oneboxes`
pull hotlinked images 2013-11-05 13:04:47 -05:00			`optimize_urls`
			`pull_hotlinked_images`
Initial release of Discourse 2013-02-05 14:16:51 -05:00			`end`

pull hotlinked images 2013-11-05 13:04:47 -05:00			`def keep_reverse_index_up_to_date`
			`upload_ids = Set.new`
added a job to clean up orphan uploads 2013-10-14 08:27:41 -04:00
pull hotlinked images 2013-11-05 13:04:47 -05:00			`@doc.search("a").each do \|a\|`
			`href = a["href"].to_s`
post process attachments 2013-07-10 16:55:37 -04:00			`if upload = Upload.get_from_url(href)`
pull hotlinked images 2013-11-05 13:04:47 -05:00			`upload_ids << upload.id`
			`end`
			`end`

			`@doc.search("img").each do \|img\|`
			`src = img["src"].to_s`
			`if upload = Upload.get_from_url(src)`
			`upload_ids << upload.id`
			`end`
			`end`

			`values = upload_ids.map{ \|u\| "(#{@post.id},#{u})" }.join(",")`
			`PostUpload.transaction do`
			`PostUpload.delete_all(post_id: @post.id)`
			`if upload_ids.length > 0`
			`PostUpload.exec_sql("INSERT INTO post_uploads (post_id, upload_id) VALUES #{values}")`
post process attachments 2013-07-10 16:55:37 -04:00			`end`
			`end`
			`end`

remove trailing whitespaces :heart: 2013-02-25 11:42:20 -05:00			`def post_process_images`
refactor CookedPostProcessor & specs 2013-07-07 19:39:08 -04:00			`images = extract_images`
removed auto_link_images_wider_than setting 2013-07-06 13:10:53 -04:00			`return if images.blank?`
Initial release of Discourse 2013-02-05 14:16:51 -05:00
basic lightbox support 2013-02-19 01:57:14 -05:00			`images.each do \|img\|`
pull hotlinked images 2013-11-05 13:04:47 -05:00			`src, width, height = img["src"], img["width"], img["height"]`
			`limit_size!(img)`
			`convert_to_link!(img)`
			`@dirty \|= (src != img["src"]) \|\| (width.to_i != img["width"].to_i) \|\| (height.to_i != img["height"].to_i)`
automatically resizes images 2013-04-13 10:31:20 -04:00			`end`
basic lightbox support 2013-02-19 01:57:14 -05:00
pull hotlinked images 2013-11-05 13:04:47 -05:00			`update_topic_image(images)`
cleaned up CookedPostProcessor and improved specs 2013-06-15 06:29:20 -04:00			`end`
automatically resizes images 2013-04-13 10:31:20 -04:00
refactor CookedPostProcessor & specs 2013-07-07 19:39:08 -04:00			`def extract_images`
pull hotlinked images 2013-11-05 13:04:47 -05:00			`# do not extract images inside oneboxes or quotes`
refactor CookedPostProcessor & specs 2013-07-07 19:39:08 -04:00			`@doc.css("img") - @doc.css(".onebox-result img") - @doc.css(".quote img")`
			`end`

pull hotlinked images 2013-11-05 13:04:47 -05:00			`def limit_size!(img)`
			`w, h = get_size_from_image_sizes(img["src"], @opts[:image_sizes]) \|\| get_size(img["src"])`
			`# limit the size of the thumbnail`
			`img["width"], img["height"] = ImageSizer.resize(w, h)`
refactor CookedPostProcessor & specs 2013-07-07 19:39:08 -04:00			`end`

pull hotlinked images 2013-11-05 13:04:47 -05:00			`def get_size_from_image_sizes(src, image_sizes)`
			`return unless image_sizes.present?`
			`image_sizes.each do \|image_size\|`
			`url, size = image_size[0], image_size[1]`
			`return [size["width"], size["height"]] if url.include?(src)`
			`end`
cleaned up CookedPostProcessor and improved specs 2013-06-15 06:29:20 -04:00			`end`
post processor 2013-02-20 20:07:36 -05:00
pull hotlinked images 2013-11-05 13:04:47 -05:00			`def get_size(url)`
			`absolute_url = url`
			`absolute_url = Discourse.base_url_no_prefix + absolute_url if absolute_url =~ /^\/[^\/]/`
			`# FastImage fails when there's no scheme`
			`absolute_url = (SiteSetting.use_ssl? ? "https:" : "http:") + absolute_url if absolute_url.start_with?("//")`
			`return unless is_valid_image_url?(absolute_url)`
			`# we can always crawl our own images`
			`return unless SiteSetting.crawl_images? \|\| Discourse.store.has_been_uploaded?(url)`
			`@size_cache[url] \|\|= FastImage.size(absolute_url)`
			`rescue Zlib::BufError # FastImage.size raises BufError for some gifs`
keep uploads reverse index up to date 2013-06-17 16:46:48 -04:00			`end`

pull hotlinked images 2013-11-05 13:04:47 -05:00			`def is_valid_image_url?(url)`
			`uri = URI.parse(url)`
			`%w(http https).include? uri.scheme`
			`rescue URI::InvalidURIError`
basic lightbox support 2013-02-19 01:57:14 -05:00			`end`

pull hotlinked images 2013-11-05 13:04:47 -05:00			`def convert_to_link!(img)`
basic lightbox support 2013-02-19 01:57:14 -05:00			`src = img["src"]`
removed auto_link_images_wider_than setting 2013-07-06 13:10:53 -04:00			`return unless src.present?`
basic lightbox support 2013-02-19 01:57:14 -05:00
removed auto_link_images_wider_than setting 2013-07-06 13:10:53 -04:00			`width, height = img["width"].to_i, img["height"].to_i`
automatically resizes images 2013-04-13 10:31:20 -04:00			`original_width, original_height = get_size(src)`
basic lightbox support 2013-02-19 01:57:14 -05:00
refactor CookedPostProcessor & specs 2013-07-07 19:39:08 -04:00			`return if original_width.to_i <= width && original_height.to_i <= height`
add max_image_height site setting 2013-08-25 18:24:24 -04:00			`return if original_width.to_i <= SiteSetting.max_image_width && original_height.to_i <= SiteSetting.max_image_height`

pull hotlinked images 2013-11-05 13:04:47 -05:00			`return if is_a_hyperlink?(img)`
refactor CookedPostProcessor & specs 2013-07-07 19:39:08 -04:00
pull hotlinked images 2013-11-05 13:04:47 -05:00			`if upload = Upload.get_from_url(src)`
allow users to specify thumbnail size 2013-09-27 04:55:50 -04:00			`upload.create_thumbnail!(width, height)`
refactor CookedPostProcessor & specs 2013-07-07 19:39:08 -04:00			`# TODO: optimize_image!(img)`
			`end`
basic lightbox support 2013-02-19 01:57:14 -05:00
refactor CookedPostProcessor & specs 2013-07-07 19:39:08 -04:00			`add_lightbox!(img, original_width, original_height, upload)`

			`@dirty = true`
			`end`

pull hotlinked images 2013-11-05 13:04:47 -05:00			`def is_a_hyperlink?(img)`
basic lightbox support 2013-02-19 01:57:14 -05:00			`parent = img.parent`
			`while parent`
			`return if parent.name == "a"`
			`break unless parent.respond_to? :parent`
			`parent = parent.parent`
			`end`
refactor CookedPostProcessor & specs 2013-07-07 19:39:08 -04:00			`end`
basic lightbox support 2013-02-19 01:57:14 -05:00
refactor CookedPostProcessor & specs 2013-07-07 19:39:08 -04:00			`def add_lightbox!(img, original_width, original_height, upload=nil)`
soften the lightbox overlay 2013-06-25 20:44:20 -04:00			`# first, create a div to hold our lightbox`
refactor CookedPostProcessor & specs 2013-07-07 19:39:08 -04:00			`lightbox = Nokogiri::XML::Node.new("div", @doc)`
			`img.add_next_sibling(lightbox)`
			`lightbox.add_child(img)`

soften the lightbox overlay 2013-06-25 20:44:20 -04:00			`# then, the link to our larger image`
refactor CookedPostProcessor & specs 2013-07-07 19:39:08 -04:00			`a = Nokogiri::XML::Node.new("a", @doc)`
basic lightbox support 2013-02-19 01:57:14 -05:00			`img.add_next_sibling(a)`
pull hotlinked images 2013-11-05 13:04:47 -05:00			`a["href"] = img["src"]`
basic lightbox support 2013-02-19 01:57:14 -05:00			`a["class"] = "lightbox"`
			`a.add_child(img)`
refactor CookedPostProcessor & specs 2013-07-07 19:39:08 -04:00
			`# replace the image by its thumbnail`
pull hotlinked images 2013-11-05 13:04:47 -05:00			`w, h = img["width"].to_i, img["height"].to_i`
			`img["src"] = upload.thumbnail(w, h).url if upload && upload.has_thumbnail?(w, h)`
refactor CookedPostProcessor & specs 2013-07-07 19:39:08 -04:00
soften the lightbox overlay 2013-06-25 20:44:20 -04:00			`# then, some overlay informations`
refactor CookedPostProcessor & specs 2013-07-07 19:39:08 -04:00			`meta = Nokogiri::XML::Node.new("div", @doc)`
soften the lightbox overlay 2013-06-25 20:44:20 -04:00			`meta["class"] = "meta"`
refactor CookedPostProcessor & specs 2013-07-07 19:39:08 -04:00			`img.add_next_sibling(meta)`
add lightbox overlay 2013-06-21 12:29:40 -04:00
pull hotlinked images 2013-11-05 13:04:47 -05:00			`filename = get_filename(upload, img["src"])`
add lightbox overlay 2013-06-21 12:29:40 -04:00			`informations = "#{original_width}x#{original_height}"`
fix broken build 2013-07-24 03:24:28 -04:00			`informations << " #{number_to_human_size(upload.filesize)}" if upload`
add lightbox overlay 2013-06-21 12:29:40 -04:00
soften the lightbox overlay 2013-06-25 20:44:20 -04:00			`meta.add_child create_span_node("filename", filename)`
			`meta.add_child create_span_node("informations", informations)`
			`meta.add_child create_span_node("expand")`
add lightbox overlay 2013-06-21 12:29:40 -04:00			`end`
basic lightbox support 2013-02-19 01:57:14 -05:00
provide a way to give a default filename to pasted images in the lightbox overlay 2013-06-26 15:53:31 -04:00			`def get_filename(upload, src)`
			`return File.basename(src) unless upload`
FIX: can no longer paste image in Chrome post editor 2013-07-03 18:39:23 -04:00			`return upload.original_filename unless upload.original_filename =~ /^blob(\.png)?$/i`
pull hotlinked images 2013-11-05 13:04:47 -05:00			`return I18n.t("upload.pasted_image_filename")`
provide a way to give a default filename to pasted images in the lightbox overlay 2013-06-26 15:53:31 -04:00			`end`

add lightbox overlay 2013-06-21 12:29:40 -04:00			`def create_span_node(klass, content=nil)`
refactor CookedPostProcessor & specs 2013-07-07 19:39:08 -04:00			`span = Nokogiri::XML::Node.new("span", @doc)`
add lightbox overlay 2013-06-21 12:29:40 -04:00			`span.content = content if content`
pull hotlinked images 2013-11-05 13:04:47 -05:00			`span["class"] = klass`
add lightbox overlay 2013-06-21 12:29:40 -04:00			`span`
Initial release of Discourse 2013-02-05 14:16:51 -05:00			`end`

pull hotlinked images 2013-11-05 13:04:47 -05:00			`def update_topic_image(images)`
refactor CookedPostProcessor & specs 2013-07-07 19:39:08 -04:00			`if @post.post_number == 1`
			`img = images.first`
pull hotlinked images 2013-11-05 13:04:47 -05:00			`@post.topic.update_column(:image_url, img["src"]) if img["src"].present?`
refactor CookedPostProcessor & specs 2013-07-07 19:39:08 -04:00			`end`
			`end`

pull hotlinked images 2013-11-05 13:04:47 -05:00			`def post_process_oneboxes`
			`args = {`
			`post_id: @post.id,`
			`invalidate_oneboxes: !!@opts[:invalidate_oneboxes],`
			`}`

			`result = Oneboxer.apply(@doc) do \|url, element\|`
			`Oneboxer.onebox(url, args)`
			`end`

			`@dirty \|= result.changed?`
Initial release of Discourse 2013-02-05 14:16:51 -05:00			`end`

pull hotlinked images 2013-11-05 13:04:47 -05:00			`def optimize_urls`
			`@doc.search("a").each do \|a\|`
			`href = a["href"].to_s`
			`if Discourse.store.has_been_uploaded?(href)`
			`a["href"] = schemaless relative_to_absolute(href)`
			`end`
			`end`

			`@doc.search("img").each do \|img\|`
			`src = img["src"].to_s`
			`if Discourse.store.has_been_uploaded?(src)`
			`img["src"] = schemaless relative_to_absolute(src)`
			`end`
			`end`
Initial release of Discourse 2013-02-05 14:16:51 -05:00			`end`

pull hotlinked images 2013-11-05 13:04:47 -05:00			`def relative_to_absolute(url)`
			`url =~ /^\/[^\/]/ ? (Discourse.asset_host \|\| Discourse.base_url_no_prefix) + url : url`
			`end`

			`def schemaless(url)`
			`url.gsub(/^https?:/, "")`
clean up onebox application so it uses a single code path use fragments for oneboxes strip parent <p> if <div> is in it clean some tests 2013-04-10 03:52:38 -04:00			`end`

pull hotlinked images 2013-11-05 13:04:47 -05:00			`def pull_hotlinked_images`
			`# we don't want to run the job if we're not allowed to crawl images`
			`return unless SiteSetting.crawl_images?`
			`# we only want to run the job whenever it's changed by a user`
			`return if @post.updated_by == Discourse.system_user`
			`# make sure no other job is scheduled`
			`Jobs.cancel_scheduled_job(:pull_hotlinked_images, post_id: @post.id)`
			`# schedule the job`
			`delay = SiteSetting.ninja_edit_window + 1`
ninja_edit_window is is seconds, not minutes 2013-11-06 05:08:57 -05:00			`Jobs.enqueue_in(delay.seconds.to_i, :pull_hotlinked_images, post_id: @post.id)`
post process attachments 2013-07-10 16:55:37 -04:00			`end`

cleaned up CookedPostProcessor and improved specs 2013-06-15 06:29:20 -04:00			`def dirty?`
			`@dirty`
			`end`

			`def html`
			`@doc.try(:to_html)`
Initial release of Discourse 2013-02-05 14:16:51 -05:00			`end`

			`end`