# frozen_string_literal: true

class PostAnalyzer
  def initialize(raw, topic_id)
    @raw = raw
    @topic_id = topic_id
    @onebox_urls = []
    @found_oneboxes = false
  end

  def found_oneboxes?
    @found_oneboxes
  end

  def has_oneboxes?
    return false unless @raw.present?

    cooked_stripped
    found_oneboxes?
  end

  # What we use to cook posts
  def cook(raw, opts = {})
    cook_method = opts[:cook_method]
    return raw if cook_method == Post.cook_methods[:raw_html]

    if cook_method == Post.cook_methods[:email]
      cooked = EmailCook.new(raw).cook(opts)
    else
      cooked = PrettyText.cook(raw, opts)
    end

    limit = SiteSetting.max_oneboxes_per_post
    result =
      Oneboxer.apply(cooked, extra_paths: ".inline-onebox-loading") do |url, element|
        if opts[:invalidate_oneboxes]
          Oneboxer.invalidate(url)
          InlineOneboxer.invalidate(url)
        end
        next if element["class"] != Oneboxer::ONEBOX_CSS_CLASS
        next if limit <= 0
        limit -= 1
        @onebox_urls << url
        onebox = Oneboxer.cached_onebox(url)
        @found_oneboxes = true if onebox.present?
        onebox
      end

    if result.changed?
      PrettyText.sanitize_hotlinked_media(result.doc)
      cooked = result.to_html
    end

    cooked
  end

  # How many images are present in the post
  def embedded_media_count
    return 0 unless @raw.present?

    # TODO - do we need to look for tags other than img, video and audio?
    cooked_stripped
      .css("img", "video", "audio")
      .reject do |t|
        if dom_class = t["class"]
          (Post.allowed_image_classes & dom_class.split).count > 0
        end
      end
      .count
  end

  # How many attachments are present in the post
  def attachment_count
    return 0 unless @raw.present?

    attachments =
      cooked_stripped.css("a.attachment[href^=\"#{Discourse.store.absolute_base_url}\"]")
    attachments +=
      cooked_stripped.css(
        "a.attachment[href^=\"#{Discourse.store.relative_base_url}\"]",
      ) if Discourse.store.internal?
    attachments.count
  end

  def raw_mentions
    return [] if @raw.blank?
    return @raw_mentions if @raw_mentions.present?
    @raw_mentions = PrettyText.extract_mentions(cooked_stripped)
  end

  # from rack ... compat with ruby 2.2
  def self.parse_uri_rfc2396(uri)
    @parser ||= defined?(URI::RFC2396_Parser) ? URI::RFC2396_Parser.new : URI
    @parser.parse(uri)
  end

  # Count how many hosts are linked in the post
  def linked_hosts
    all_links = raw_links + @onebox_urls

    return {} if all_links.blank?
    return @linked_hosts if @linked_hosts.present?

    @linked_hosts = {}

    all_links.each do |u|
      begin
        uri = self.class.parse_uri_rfc2396(u)
        host = uri.host
        @linked_hosts[host] ||= 1 unless host.nil?
      rescue URI::Error
        # An invalid URI does not count as a host
        next
      end
    end

    @linked_hosts
  end

  # Returns an array of all links in a post excluding mentions
  def raw_links
    return [] unless @raw.present?
    return @raw_links if @raw_links.present?

    @raw_links = []
    cooked_stripped
      .css("a")
      .each do |l|
        # Don't include @mentions in the link count
        next if link_is_a_mention?(l)
        # Don't include heading anchor in the link count
        next if link_is_an_anchor?(l)
        # Don't include hashtags in the link count
        next if link_is_a_hashtag?(l)
        @raw_links << l["href"].to_s
      end

    @raw_links
  end

  # How many links are present in the post
  def link_count
    raw_links.size + @onebox_urls.size
  end

  def cooked_stripped
    @cooked_stripped ||=
      begin
        cooked = cook(@raw, topic_id: @topic_id)
        fragment = Nokogiri::HTML5.fragment(cooked)
        PostStripper.strip(fragment)
      end
  end

  private

  def link_is_a_mention?(l)
    href = l["href"].to_s
    l["class"].to_s["mention"] &&
      (
        href.start_with?("#{Discourse.base_path}/u/") ||
          href.start_with?("#{Discourse.base_path}/users/")
      )
  end

  def link_is_an_anchor?(l)
    l["class"].to_s["anchor"] && l["href"].to_s.start_with?("#")
  end

  def link_is_a_hashtag?(l)
    href = l["href"].to_s
    l["class"].to_s["hashtag"] &&
      (
        href.start_with?("#{Discourse.base_path}/c/") ||
          href.start_with?("#{Discourse.base_path}/tag/")
      )
  end
end