From 9bff0882c3df594850de68ade2697407c84dcd7b Mon Sep 17 00:00:00 2001
From: Krzysztof Kotlarek
Date: Tue, 5 May 2020 13:46:57 +1000
Subject: [PATCH] FEATURE: Nokogumbo (#9577)
* FEATURE: Nokogumbo
Use Nokogumbo HTML parser.
---
app/helpers/user_notifications_helper.rb | 4 +-
app/jobs/onceoff/grant_emoji.rb | 2 +-
app/jobs/onceoff/grant_onebox.rb | 2 +-
app/jobs/regular/pull_hotlinked_images.rb | 2 +-
app/jobs/regular/update_username.rb | 4 +-
app/models/category.rb | 2 +-
app/models/post.rb | 2 +-
app/models/post_analyzer.rb | 2 +-
app/models/quoted_post.rb | 2 +-
app/models/theme_field.rb | 2 +-
app/models/topic_embed.rb | 8 +-
app/services/inline_uploads.rb | 6 +-
app/services/search_indexer.rb | 2 +-
...3951_backfill_post_upload_reverse_index.rb | 2 +-
db/migrate/20140715055242_add_quoted_posts.rb | 2 +-
lib/content_security_policy/extension.rb | 3 +-
lib/cooked_post_processor.rb | 4 +-
lib/discourse_diff.rb | 2 +-
lib/email/receiver.rb | 2 +-
lib/email/styles.rb | 10 +-
.../engine/whitelisted_generic_onebox.rb | 2 +-
lib/oneboxer.rb | 6 +-
lib/post_revisor.rb | 2 +-
lib/pretty_text.rb | 26 +++--
lib/quote_comparer.rb | 2 +-
lib/retrieve_title.rb | 2 +-
lib/reviewable/conversation.rb | 2 +-
lib/tasks/emoji.rake | 2 +-
.../spec/components/pretty_text_spec.rb | 4 +-
.../lib/discourse_narrative_bot/actions.rb | 2 +-
.../advanced_user_narrative.rb | 6 +-
.../new_user_narrative.rb | 8 +-
plugins/poll/plugin.rb | 2 +-
plugins/poll/spec/lib/pretty_text_spec.rb | 2 +-
script/import_scripts/ipboard3.rb | 2 +-
script/import_scripts/jive.rb | 2 +-
script/import_scripts/jive_api.rb | 2 +-
script/import_scripts/lithium.rb | 2 +-
spec/components/cooked_post_processor_spec.rb | 107 ++++++++----------
spec/components/email/styles_spec.rb | 4 +-
spec/components/excerpt_parser_spec.rb | 2 +-
spec/components/pretty_text_spec.rb | 32 +++---
spec/lib/content_security_policy_spec.rb | 6 +-
spec/models/topic_embed_spec.rb | 4 +-
spec/requests/categories_controller_spec.rb | 2 +-
spec/requests/email_controller_spec.rb | 6 +-
spec/requests/embed_controller_spec.rb | 2 +-
.../requests/user_api_keys_controller_spec.rb | 2 +-
spec/services/username_changer_spec.rb | 34 +++---
spec/support/match_html_matcher.rb | 2 +-
50 files changed, 165 insertions(+), 179 deletions(-)
diff --git a/app/helpers/user_notifications_helper.rb b/app/helpers/user_notifications_helper.rb
index eb0293183b2..ffbf7352c2a 100644
--- a/app/helpers/user_notifications_helper.rb
+++ b/app/helpers/user_notifications_helper.rb
@@ -13,7 +13,7 @@ module UserNotificationsHelper
end
def correct_top_margin(html, desired)
- fragment = Nokogiri::HTML.fragment(html)
+ fragment = Nokogiri::HTML5.fragment(html)
if para = fragment.css("p:first").first
para["style"] = "margin-top: #{desired};"
end
@@ -32,7 +32,7 @@ module UserNotificationsHelper
end
def first_paragraphs_from(html)
- doc = Nokogiri::HTML(html)
+ doc = Nokogiri::HTML5(html)
result = +""
length = 0
diff --git a/app/jobs/onceoff/grant_emoji.rb b/app/jobs/onceoff/grant_emoji.rb
index 5f85b431b82..5abdb34d5d4 100644
--- a/app/jobs/onceoff/grant_emoji.rb
+++ b/app/jobs/onceoff/grant_emoji.rb
@@ -14,7 +14,7 @@ module Jobs
.where("cooked LIKE '%emoji%'")
.find_in_batches do |group|
group.each do |p|
- doc = Nokogiri::HTML::fragment(p.cooked)
+ doc = Nokogiri::HTML5::fragment(p.cooked)
if (doc.css("img.emoji") - doc.css(".quote img")).size > 0
to_award[p.user_id] ||= { post_id: p.id, created_at: p.created_at }
end
diff --git a/app/jobs/onceoff/grant_onebox.rb b/app/jobs/onceoff/grant_onebox.rb
index 59cf443f4e5..66d2cf26706 100644
--- a/app/jobs/onceoff/grant_onebox.rb
+++ b/app/jobs/onceoff/grant_onebox.rb
@@ -19,7 +19,7 @@ module Jobs
begin
# Note we can't use `p.cooked` here because oneboxes have been cooked out
cooked = PrettyText.cook(p.raw)
- doc = Nokogiri::HTML::fragment(cooked)
+ doc = Nokogiri::HTML5::fragment(cooked)
if doc.search('a.onebox').size > 0
to_award[p.user_id] ||= { post_id: p.id, created_at: p.created_at }
end
diff --git a/app/jobs/regular/pull_hotlinked_images.rb b/app/jobs/regular/pull_hotlinked_images.rb
index 25703659460..e7644e8ca84 100644
--- a/app/jobs/regular/pull_hotlinked_images.rb
+++ b/app/jobs/regular/pull_hotlinked_images.rb
@@ -157,7 +157,7 @@ module Jobs
end
def extract_images_from(html)
- doc = Nokogiri::HTML::fragment(html)
+ doc = Nokogiri::HTML5::fragment(html)
doc.css("img[src], a.lightbox[href]") -
doc.css("img.avatar") -
diff --git a/app/jobs/regular/update_username.rb b/app/jobs/regular/update_username.rb
index d43c119060d..7c9fcedb5a2 100644
--- a/app/jobs/regular/update_username.rb
+++ b/app/jobs/regular/update_username.rb
@@ -154,11 +154,11 @@ module Jobs
# and there is no reason to invalidate oneboxes, run the post analyzer etc.
# when only the username changes.
def update_cooked(cooked)
- doc = Nokogiri::HTML.fragment(cooked)
+ doc = Nokogiri::HTML5.fragment(cooked)
doc.css("a.mention").each do |a|
a.content = a.content.gsub(@cooked_mention_username_regex, "@#{@new_username}")
- a["href"] = a["href"].gsub(@cooked_mention_user_path_regex, "/u/#{@new_username}") if a["href"]
+ a["href"] = a["href"].gsub(@cooked_mention_user_path_regex, "/u/#{URI.escape(@new_username)}") if a["href"]
end
doc.css("aside.quote").each do |aside|
diff --git a/app/models/category.rb b/app/models/category.rb
index bbf1e9b98cb..ddab403c5d2 100644
--- a/app/models/category.rb
+++ b/app/models/category.rb
@@ -306,7 +306,7 @@ class Category < ActiveRecord::Base
@@cache_text ||= LruRedux::ThreadSafeCache.new(1000)
@@cache_text.getset(self.description) do
- text = Nokogiri::HTML.fragment(self.description).text.strip
+ text = Nokogiri::HTML5.fragment(self.description).text.strip
Rack::Utils.escape_html(text).html_safe
end
end
diff --git a/app/models/post.rb b/app/models/post.rb
index 5da5d163200..f2ee10543f3 100644
--- a/app/models/post.rb
+++ b/app/models/post.rb
@@ -953,7 +953,7 @@ class Post < ActiveRecord::Base
/\/uploads\/short-url\/[a-zA-Z0-9]+(\.[a-z0-9]+)?/
]
- fragments ||= Nokogiri::HTML::fragment(self.cooked)
+ fragments ||= Nokogiri::HTML5::fragment(self.cooked)
selectors = fragments.css("a/@href", "img/@src", "source/@src", "track/@src", "video/@poster")
links = selectors.map do |media|
diff --git a/app/models/post_analyzer.rb b/app/models/post_analyzer.rb
index 63fe9724b4d..bae36c39f86 100644
--- a/app/models/post_analyzer.rb
+++ b/app/models/post_analyzer.rb
@@ -131,7 +131,7 @@ class PostAnalyzer
def cooked_stripped
@cooked_stripped ||= begin
- doc = Nokogiri::HTML.fragment(cook(@raw, topic_id: @topic_id))
+ doc = Nokogiri::HTML5.fragment(cook(@raw, topic_id: @topic_id))
doc.css("pre .mention, aside.quote > .title, aside.quote .mention, aside.quote .mention-group, .onebox, .elided").remove
doc
end
diff --git a/app/models/quoted_post.rb b/app/models/quoted_post.rb
index 03b981e1fb3..9a6a96e9ebf 100644
--- a/app/models/quoted_post.rb
+++ b/app/models/quoted_post.rb
@@ -9,7 +9,7 @@ class QuotedPost < ActiveRecord::Base
# we are double parsing this fragment, this may be worth optimising later
def self.extract_from(post)
- doc = Nokogiri::HTML.fragment(post.cooked)
+ doc = Nokogiri::HTML5.fragment(post.cooked)
uniq = {}
diff --git a/app/models/theme_field.rb b/app/models/theme_field.rb
index 351c7b90f1a..a9f98ce4078 100644
--- a/app/models/theme_field.rb
+++ b/app/models/theme_field.rb
@@ -78,7 +78,7 @@ class ThemeField < ActiveRecord::Base
js_compiler = ThemeJavascriptCompiler.new(theme_id, self.theme.name)
- doc = Nokogiri::HTML.fragment(html)
+ doc = Nokogiri::HTML5.fragment(html)
doc.css('script[type="text/x-handlebars"]').each do |node|
name = node["name"] || node["data-template-name"] || "broken"
diff --git a/app/models/topic_embed.rb b/app/models/topic_embed.rb
index 43de234a159..a18e5b85dea 100644
--- a/app/models/topic_embed.rb
+++ b/app/models/topic_embed.rb
@@ -126,7 +126,7 @@ class TopicEmbed < ActiveRecord::Base
return
end
- raw_doc = Nokogiri::HTML(html)
+ raw_doc = Nokogiri::HTML5(html)
auth_element = raw_doc.at('meta[@name="author"]')
if auth_element.present?
response.author = User.where(username_lower: auth_element[:content].strip).first
@@ -142,7 +142,7 @@ class TopicEmbed < ActiveRecord::Base
title.strip!
end
response.title = title
- doc = Nokogiri::HTML(read_doc.content)
+ doc = Nokogiri::HTML5(read_doc.content)
tags = { 'img' => 'src', 'script' => 'src', 'a' => 'href' }
doc.search(tags.keys.join(',')).each do |node|
@@ -198,7 +198,7 @@ class TopicEmbed < ActiveRecord::Base
prefix = "#{uri.scheme}://#{uri.host}"
prefix += ":#{uri.port}" if uri.port != 80 && uri.port != 443
- fragment = Nokogiri::HTML.fragment("#{contents}
")
+ fragment = Nokogiri::HTML5.fragment("#{contents}
")
fragment.css('a').each do |a|
href = a['href']
if href.present? && href.start_with?('/')
@@ -220,7 +220,7 @@ class TopicEmbed < ActiveRecord::Base
end
def self.first_paragraph_from(html)
- doc = Nokogiri::HTML(html)
+ doc = Nokogiri::HTML5(html)
result = +""
doc.css('p').each do |p|
diff --git a/app/services/inline_uploads.rb b/app/services/inline_uploads.rb
index e75348bccee..0facc329fba 100644
--- a/app/services/inline_uploads.rb
+++ b/app/services/inline_uploads.rb
@@ -16,7 +16,7 @@ class InlineUploads
end
end
- cooked_fragment = Nokogiri::HTML::fragment(PrettyText.cook(markdown, disable_emojis: true))
+ cooked_fragment = Nokogiri::HTML5::fragment(PrettyText.cook(markdown, disable_emojis: true))
link_occurences = []
cooked_fragment.traverse do |node|
@@ -183,7 +183,7 @@ class InlineUploads
def self.match_anchor(markdown, external_href: false)
markdown.scan(/(()([^<\a>]*?)<\/a>)/i) do |match|
- node = Nokogiri::HTML::fragment(match[0]).children[0]
+ node = Nokogiri::HTML5::fragment(match[0]).children[0]
href = node.attributes["href"]&.value
if href && (matched_uploads(href).present? || external_href)
@@ -199,7 +199,7 @@ class InlineUploads
def self.match_img(markdown, external_src: false)
markdown.scan(/(<(?!img)[^<>]+\/?>)?(\s*)( \n]+>)/i) do |match|
- node = Nokogiri::HTML::fragment(match[2].strip).children[0]
+ node = Nokogiri::HTML5::fragment(match[2].strip).children[0]
src = node.attributes["src"]&.value
if src && (matched_uploads(src).present? || external_src)
diff --git a/app/services/search_indexer.rb b/app/services/search_indexer.rb
index 17817c4c099..ceeaaf27f25 100644
--- a/app/services/search_indexer.rb
+++ b/app/services/search_indexer.rb
@@ -191,7 +191,7 @@ class SearchIndexer
def self.scrub(html, strip_diacritics: false)
return +"" if html.blank?
- document = Nokogiri::HTML("#{html}
", nil, Encoding::UTF_8.to_s)
+ document = Nokogiri::HTML5("#{html}
", nil, Encoding::UTF_8.to_s)
nodes = document.css(
"div.#{CookedPostProcessor::LIGHTBOX_WRAPPER_CSS_CLASS}"
diff --git a/db/migrate/20131014203951_backfill_post_upload_reverse_index.rb b/db/migrate/20131014203951_backfill_post_upload_reverse_index.rb
index 793da0ce64a..0c5f4bb3e67 100644
--- a/db/migrate/20131014203951_backfill_post_upload_reverse_index.rb
+++ b/db/migrate/20131014203951_backfill_post_upload_reverse_index.rb
@@ -8,7 +8,7 @@ class BackfillPostUploadReverseIndex < ActiveRecord::Migration[4.2]
# fill the reverse index up
Post.select([:id, :cooked]).find_each do |post|
- doc = Nokogiri::HTML::fragment(post.cooked)
+ doc = Nokogiri::HTML5::fragment(post.cooked)
# images
doc.search("img").each { |img| add_to_reverse_index(img['src'], post.id) }
# thumbnails and/or attachments
diff --git a/db/migrate/20140715055242_add_quoted_posts.rb b/db/migrate/20140715055242_add_quoted_posts.rb
index 47550f92513..d3052b8a07c 100644
--- a/db/migrate/20140715055242_add_quoted_posts.rb
+++ b/db/migrate/20140715055242_add_quoted_posts.rb
@@ -30,7 +30,7 @@ SQL
results.each do |row|
post_id, max_id = row["id"].to_i
- doc = Nokogiri::HTML.fragment(row["cooked"])
+ doc = Nokogiri::HTML5.fragment(row["cooked"])
uniq = {}
diff --git a/lib/content_security_policy/extension.rb b/lib/content_security_policy/extension.rb
index 4c8231b60a1..93eab088e41 100644
--- a/lib/content_security_policy/extension.rb
+++ b/lib/content_security_policy/extension.rb
@@ -61,7 +61,8 @@ class ContentSecurityPolicy
auto_script_src_extension = { script_src: [] }
html_fields.each(&:ensure_baked!)
doc = html_fields.map(&:value_baked).join("\n")
- Nokogiri::HTML.fragment(doc).css('script[src]').each do |node|
+
+ Nokogiri::HTML5.fragment(doc).css('script[src]').each do |node|
src = node['src']
uri = URI(src)
diff --git a/lib/cooked_post_processor.rb b/lib/cooked_post_processor.rb
index a2d75bfdf9e..725258cc10d 100644
--- a/lib/cooked_post_processor.rb
+++ b/lib/cooked_post_processor.rb
@@ -24,7 +24,7 @@ class CookedPostProcessor
@cooking_options = @cooking_options.symbolize_keys
cooked = post.cook(post.raw, @cooking_options)
- @doc = Nokogiri::HTML::fragment(cooked)
+ @doc = Nokogiri::HTML5::fragment(cooked)
@has_oneboxes = post.post_analyzer.found_oneboxes?
@size_cache = {}
@@ -95,7 +95,7 @@ class CookedPostProcessor
return if previous.blank?
- previous_text = Nokogiri::HTML::fragment(previous).text.strip
+ previous_text = Nokogiri::HTML5::fragment(previous).text.strip
quoted_text = @doc.css("aside.quote:first-child blockquote").first&.text&.strip || ""
return if previous_text.gsub(/(\s){2,}/, '\1') != quoted_text.gsub(/(\s){2,}/, '\1')
diff --git a/lib/discourse_diff.rb b/lib/discourse_diff.rb
index c2b31716237..cdf37870201 100644
--- a/lib/discourse_diff.rb
+++ b/lib/discourse_diff.rb
@@ -168,7 +168,7 @@ class DiscourseDiff
end
def tokenize_html_blocks(html)
- Nokogiri::HTML.fragment(html).search("./*").map(&:to_html)
+ Nokogiri::HTML5.fragment(html).search("./*").map(&:to_html)
end
def tokenize_html(html)
diff --git a/lib/email/receiver.rb b/lib/email/receiver.rb
index 69f7ccf2704..3cb73a8976d 100644
--- a/lib/email/receiver.rb
+++ b/lib/email/receiver.rb
@@ -338,7 +338,7 @@ module Email
markdown, elided_markdown = if html.present?
# use the first html extracter that matches
if html_extracter = HTML_EXTRACTERS.select { |_, r| html[r] }.min_by { |_, r| html =~ r }
- doc = Nokogiri::HTML.fragment(html)
+ doc = Nokogiri::HTML5.fragment(html)
self.public_send(:"extract_from_#{html_extracter[0]}", doc)
else
markdown = HtmlToMarkdown.new(html, keep_img_tags: true, keep_cid_imgs: true).to_markdown
diff --git a/lib/email/styles.rb b/lib/email/styles.rb
index 955398aad8c..69bfc7ec233 100644
--- a/lib/email/styles.rb
+++ b/lib/email/styles.rb
@@ -15,7 +15,7 @@ module Email
def initialize(html, opts = nil)
@html = html
@opts = opts || {}
- @fragment = Nokogiri::HTML.fragment(@html)
+ @fragment = Nokogiri::HTML5.parse(@html)
@custom_styles = nil
end
@@ -161,7 +161,7 @@ module Email
src_uri = i["data-original-href"].present? ? URI(i["data-original-href"]) : URI(i['src'])
# If an iframe is protocol relative, use SSL when displaying it
display_src = "#{src_uri.scheme || 'https'}://#{src_uri.host}#{src_uri.path}#{src_uri.query.nil? ? '' : '?' + src_uri.query}#{src_uri.fragment.nil? ? '' : '#' + src_uri.fragment}"
- i.replace "#{CGI.escapeHTML(display_src)}
"
+ i.replace(Nokogiri::HTML5.fragment("
#{CGI.escapeHTML(display_src)}
"))
rescue URI::Error
# If the URL is weird, remove the iframe
i.remove
@@ -242,7 +242,11 @@ module Email
strip_classes_and_ids
replace_relative_urls
replace_secure_media_urls
- @fragment.to_html
+ include_body? ? @fragment.at("body").to_html : @fragment.at("body").children.to_html
+ end
+
+ def include_body?
+ @html =~ /
/i
end
def strip_avatars_and_emojis
diff --git a/lib/onebox/engine/whitelisted_generic_onebox.rb b/lib/onebox/engine/whitelisted_generic_onebox.rb
index a10f22e83b4..7a46a0d1e5d 100644
--- a/lib/onebox/engine/whitelisted_generic_onebox.rb
+++ b/lib/onebox/engine/whitelisted_generic_onebox.rb
@@ -24,7 +24,7 @@ module Onebox
return true if WhitelistedGenericOnebox.html_providers.include?(data[:provider_name])
if data[:html]["iframe"]
- fragment = Nokogiri::HTML::fragment(data[:html])
+ fragment = Nokogiri::HTML5::fragment(data[:html])
if iframe = fragment.at_css("iframe")
src = iframe["src"]
return src.present? && SiteSetting.allowed_iframes.split("|").any? { |url| src.start_with?(url) }
diff --git a/lib/oneboxer.rb b/lib/oneboxer.rb
index 4e3ab68cf8d..d3625f19d8b 100644
--- a/lib/oneboxer.rb
+++ b/lib/oneboxer.rb
@@ -78,7 +78,7 @@ module Oneboxer
# Parse URLs out of HTML, returning the document when finished.
def self.each_onebox_link(string_or_doc, extra_paths: [])
doc = string_or_doc
- doc = Nokogiri::HTML::fragment(doc) if doc.is_a?(String)
+ doc = Nokogiri::HTML5::fragment(doc) if doc.is_a?(String)
onebox_links = doc.css("a.#{ONEBOX_CSS_CLASS}", *extra_paths)
if onebox_links.present?
@@ -94,14 +94,14 @@ module Oneboxer
def self.apply(string_or_doc, extra_paths: nil)
doc = string_or_doc
- doc = Nokogiri::HTML::fragment(doc) if doc.is_a?(String)
+ doc = Nokogiri::HTML5::fragment(doc) if doc.is_a?(String)
changed = false
each_onebox_link(doc, extra_paths: extra_paths) do |url, element|
onebox, _ = yield(url, element)
if onebox
- parsed_onebox = Nokogiri::HTML::fragment(onebox)
+ parsed_onebox = Nokogiri::HTML5::fragment(onebox)
next unless parsed_onebox.children.count > 0
if element&.parent&.node_name&.downcase == "p" &&
diff --git a/lib/post_revisor.rb b/lib/post_revisor.rb
index 9da306db091..fd85696cf06 100644
--- a/lib/post_revisor.rb
+++ b/lib/post_revisor.rb
@@ -579,7 +579,7 @@ class PostRevisor
def update_category_description
return unless category = Category.find_by(topic_id: @topic.id)
- doc = Nokogiri::HTML.fragment(@post.cooked)
+ doc = Nokogiri::HTML5.fragment(@post.cooked)
doc.css("img").remove
if html = doc.css("p").first&.inner_html&.strip
diff --git a/lib/pretty_text.rb b/lib/pretty_text.rb
index 73c18d0099c..9f3f65e3507 100644
--- a/lib/pretty_text.rb
+++ b/lib/pretty_text.rb
@@ -259,7 +259,7 @@ module PrettyText
sanitized = markdown(working_text, options)
- doc = Nokogiri::HTML.fragment(sanitized)
+ doc = Nokogiri::HTML5.fragment(sanitized)
if !options[:omit_nofollow] && SiteSetting.add_rel_nofollow_to_user_content
add_rel_nofollow_to_user_content(doc)
@@ -269,7 +269,11 @@ module PrettyText
add_mentions(doc, user_id: opts[:user_id])
end
- doc.to_html
+ scrubber = Loofah::Scrubber.new do |node|
+ node.remove if node.name == 'script'
+ end
+ loofah_fragment = Loofah.fragment(doc.to_html)
+ loofah_fragment.scrub!(scrubber).to_html
end
def self.add_rel_nofollow_to_user_content(doc)
@@ -282,7 +286,7 @@ module PrettyText
doc.css("a").each do |l|
href = l["href"].to_s
begin
- uri = URI(href)
+ uri = URI(URI.escape(href))
site_uri ||= URI(Discourse.base_url)
if !uri.host.present? ||
@@ -305,7 +309,7 @@ module PrettyText
def self.extract_links(html)
links = []
- doc = Nokogiri::HTML.fragment(html)
+ doc = Nokogiri::HTML5.fragment(html)
# remove href inside quotes & elided part
doc.css("aside.quote a, .elided a").each { |a| a["href"] = "" }
@@ -338,7 +342,7 @@ module PrettyText
def self.excerpt(html, max_length, options = {})
# TODO: properly fix this HACK in ExcerptParser without introducing XSS
- doc = Nokogiri::HTML.fragment(html)
+ doc = Nokogiri::HTML5.fragment(html)
DiscourseEvent.trigger(:reduce_excerpt, doc, options)
strip_image_wrapping(doc)
strip_oneboxed_media(doc)
@@ -350,7 +354,7 @@ module PrettyText
return string if string.blank?
# If the user is not basic, strip links from their bio
- fragment = Nokogiri::HTML.fragment(string)
+ fragment = Nokogiri::HTML5.fragment(string)
fragment.css('a').each { |a| a.replace(a.inner_html) }
fragment.to_html
end
@@ -395,14 +399,14 @@ module PrettyText
def self.strip_secure_media(doc)
doc.css("a[href]").each do |a|
if Upload.secure_media_url?(a["href"])
- target = %w(video audio).include?(a&.parent&.parent&.name) ? a.parent.parent : a
+ target = %w(video audio).include?(a&.parent&.name) ? a.parent : a
target.replace "#{I18n.t("emails.secure_media_placeholder")}
"
end
end
end
def self.format_for_email(html, post = nil)
- doc = Nokogiri::HTML.fragment(html)
+ doc = Nokogiri::HTML5.fragment(html)
DiscourseEvent.trigger(:reduce_cooked, doc, post)
strip_secure_media(doc) if post&.with_secure_media?
strip_image_wrapping(doc)
@@ -462,13 +466,13 @@ module PrettyText
case type
when USER_TYPE
- element['href'] = "#{Discourse::base_uri}/u/#{name}"
+ element['href'] = "#{Discourse::base_uri}/u/#{URI.escape(name)}"
when GROUP_MENTIONABLE_TYPE
element['class'] = 'mention-group notify'
- element['href'] = "#{Discourse::base_uri}/groups/#{name}"
+ element['href'] = "#{Discourse::base_uri}/groups/#{URI.escape(name)}"
when GROUP_TYPE
element['class'] = 'mention-group'
- element['href'] = "#{Discourse::base_uri}/groups/#{name}"
+ element['href'] = "#{Discourse::base_uri}/groups/#{URI.escape(name)}"
end
end
end
diff --git a/lib/quote_comparer.rb b/lib/quote_comparer.rb
index 5da2891a7e1..74f39f84a77 100644
--- a/lib/quote_comparer.rb
+++ b/lib/quote_comparer.rb
@@ -18,7 +18,7 @@ class QuoteComparer
def modified?
return true if @text.blank? || @parent_post.blank?
- parent_text = Nokogiri::HTML::fragment(@parent_post.cooked).text.delete(QuoteComparer.whitespace)
+ parent_text = Nokogiri::HTML5::fragment(@parent_post.cooked).text.delete(QuoteComparer.whitespace)
text = @text.delete(QuoteComparer.whitespace)
!parent_text.include?(text)
diff --git a/lib/retrieve_title.rb b/lib/retrieve_title.rb
index 3c55ec7aecf..227da9f0cbd 100644
--- a/lib/retrieve_title.rb
+++ b/lib/retrieve_title.rb
@@ -11,7 +11,7 @@ module RetrieveTitle
def self.extract_title(html)
title = nil
- if doc = Nokogiri::HTML(html)
+ if doc = Nokogiri::HTML5(html)
title = doc.at('title')&.inner_text
diff --git a/lib/reviewable/conversation.rb b/lib/reviewable/conversation.rb
index 53eba48ef1a..696959dd922 100644
--- a/lib/reviewable/conversation.rb
+++ b/lib/reviewable/conversation.rb
@@ -17,7 +17,7 @@ class Reviewable < ActiveRecord::Base
def self.excerpt(cooked)
excerpt = ::Post.excerpt(cooked, 250, keep_emoji_images: true)
# remove the first link if it's the first node
- fragment = Nokogiri::HTML.fragment(excerpt)
+ fragment = Nokogiri::HTML5.fragment(excerpt)
if fragment.children.first == fragment.css("a:first").first && fragment.children.first
fragment.children.first.remove
end
diff --git a/lib/tasks/emoji.rake b/lib/tasks/emoji.rake
index ad05c0d1b24..ace173d55c2 100644
--- a/lib/tasks/emoji.rake
+++ b/lib/tasks/emoji.rake
@@ -353,7 +353,7 @@ def generate_emoji_groups(keywords, sections)
puts "Generating groups..."
list = open(EMOJI_ORDERING_URL).read
- doc = Nokogiri::HTML(list)
+ doc = Nokogiri::HTML5(list)
table = doc.css("table")[0]
EMOJI_GROUPS.map do |group|
diff --git a/plugins/discourse-details/spec/components/pretty_text_spec.rb b/plugins/discourse-details/spec/components/pretty_text_spec.rb
index aa768305ecf..dd0cf14a722 100644
--- a/plugins/discourse-details/spec/components/pretty_text_spec.rb
+++ b/plugins/discourse-details/spec/components/pretty_text_spec.rb
@@ -8,7 +8,7 @@ describe PrettyText do
let(:post) { Fabricate(:post) }
it "supports details tag" do
- cooked_html = <<~HTML
+ cooked_html = <<~HTML.gsub("\n", "")
foo
@@ -17,7 +17,7 @@ describe PrettyText do
HTML
expect(cooked_html).to match_html(cooked_html)
- expect(PrettyText.cook("[details=foo]\nbar\n[/details]")).to match_html(cooked_html)
+ expect(PrettyText.cook("[details=foo]\nbar\n[/details]").gsub("\n", "")).to match_html(cooked_html)
end
it "deletes elided content" do
diff --git a/plugins/discourse-narrative-bot/lib/discourse_narrative_bot/actions.rb b/plugins/discourse-narrative-bot/lib/discourse_narrative_bot/actions.rb
index 342e1491284..56afacf2af7 100644
--- a/plugins/discourse-narrative-bot/lib/discourse_narrative_bot/actions.rb
+++ b/plugins/discourse-narrative-bot/lib/discourse_narrative_bot/actions.rb
@@ -68,7 +68,7 @@ module DiscourseNarrativeBot
end
def bot_mentioned?(post)
- doc = Nokogiri::HTML.fragment(post.cooked)
+ doc = Nokogiri::HTML5.fragment(post.cooked)
valid = false
diff --git a/plugins/discourse-narrative-bot/lib/discourse_narrative_bot/advanced_user_narrative.rb b/plugins/discourse-narrative-bot/lib/discourse_narrative_bot/advanced_user_narrative.rb
index aa66cb1802a..52f1cdab65a 100644
--- a/plugins/discourse-narrative-bot/lib/discourse_narrative_bot/advanced_user_narrative.rb
+++ b/plugins/discourse-narrative-bot/lib/discourse_narrative_bot/advanced_user_narrative.rb
@@ -280,7 +280,7 @@ module DiscourseNarrativeBot
topic_id = @post.topic_id
return unless valid_topic?(topic_id)
- if Nokogiri::HTML.fragment(@post.cooked).css('.hashtag').size > 0
+ if Nokogiri::HTML5.fragment(@post.cooked).css('.hashtag').size > 0
raw = <<~RAW
#{I18n.t("#{I18N_KEY}.category_hashtag.reply", i18n_post_args)}
@@ -331,7 +331,7 @@ module DiscourseNarrativeBot
topic_id = @post.topic_id
return unless valid_topic?(topic_id)
- if Nokogiri::HTML.fragment(@post.cooked).css(".poll").size > 0
+ if Nokogiri::HTML5.fragment(@post.cooked).css(".poll").size > 0
raw = <<~RAW
#{I18n.t("#{I18N_KEY}.poll.reply", i18n_post_args)}
@@ -354,7 +354,7 @@ module DiscourseNarrativeBot
fake_delay
- if Nokogiri::HTML.fragment(@post.cooked).css("details").size > 0
+ if Nokogiri::HTML5.fragment(@post.cooked).css("details").size > 0
reply_to(@post, I18n.t("#{I18N_KEY}.details.reply", i18n_post_args))
else
reply_to(@post, I18n.t("#{I18N_KEY}.details.not_found", i18n_post_args)) unless @data[:attempted]
diff --git a/plugins/discourse-narrative-bot/lib/discourse_narrative_bot/new_user_narrative.rb b/plugins/discourse-narrative-bot/lib/discourse_narrative_bot/new_user_narrative.rb
index 5e9a1f38a54..9898ba085e8 100644
--- a/plugins/discourse-narrative-bot/lib/discourse_narrative_bot/new_user_narrative.rb
+++ b/plugins/discourse-narrative-bot/lib/discourse_narrative_bot/new_user_narrative.rb
@@ -326,7 +326,7 @@ module DiscourseNarrativeBot
cooked = @post.post_analyzer.cook(@post.raw, {})
- if Nokogiri::HTML.fragment(cooked).css("img").size > 0
+ if Nokogiri::HTML5.fragment(cooked).css("img").size > 0
set_state_data(:post_id, @post.id)
if get_state_data(:liked)
@@ -366,7 +366,7 @@ module DiscourseNarrativeBot
post_topic_id = @post.topic_id
return unless valid_topic?(post_topic_id)
- if Nokogiri::HTML.fragment(@post.cooked).css("b", "strong", "em", "i", ".bbcode-i", ".bbcode-b").size > 0
+ if Nokogiri::HTML5.fragment(@post.cooked).css("b", "strong", "em", "i", ".bbcode-i", ".bbcode-b").size > 0
raw = <<~RAW
#{I18n.t("#{I18N_KEY}.formatting.reply", i18n_post_args)}
@@ -390,7 +390,7 @@ module DiscourseNarrativeBot
post_topic_id = @post.topic_id
return unless valid_topic?(post_topic_id)
- doc = Nokogiri::HTML.fragment(@post.cooked)
+ doc = Nokogiri::HTML5.fragment(@post.cooked)
if doc.css(".quote").size > 0
raw = <<~RAW
@@ -416,7 +416,7 @@ module DiscourseNarrativeBot
post_topic_id = @post.topic_id
return unless valid_topic?(post_topic_id)
- doc = Nokogiri::HTML.fragment(@post.cooked)
+ doc = Nokogiri::HTML5.fragment(@post.cooked)
if doc.css(".emoji").size > 0
raw = <<~RAW
diff --git a/plugins/poll/plugin.rb b/plugins/poll/plugin.rb
index e0c1057e36d..78fa0ba548a 100644
--- a/plugins/poll/plugin.rb
+++ b/plugins/poll/plugin.rb
@@ -350,7 +350,7 @@ after_initialize do
# in the validators instead of cooking twice
cooked = PrettyText.cook(raw, topic_id: topic_id, user_id: user_id)
- Nokogiri::HTML(cooked).css("div.poll").map do |p|
+ Nokogiri::HTML5(cooked).css("div.poll").map do |p|
poll = { "options" => [], "name" => DiscoursePoll::DEFAULT_POLL_NAME }
# attributes
diff --git a/plugins/poll/spec/lib/pretty_text_spec.rb b/plugins/poll/spec/lib/pretty_text_spec.rb
index db8af253a8e..c4d09d2aacd 100644
--- a/plugins/poll/spec/lib/pretty_text_spec.rb
+++ b/plugins/poll/spec/lib/pretty_text_spec.rb
@@ -131,7 +131,7 @@ describe PrettyText do
MD
onebox = Oneboxer.onebox_raw(post.full_url, user_id: Fabricate(:user).id)
- doc = Nokogiri::HTML(onebox[:preview])
+ doc = Nokogiri::HTML5(onebox[:preview])
expect(onebox[:preview]).to include("A post with a poll")
expect(onebox[:preview]).to include("poll ")
diff --git a/script/import_scripts/ipboard3.rb b/script/import_scripts/ipboard3.rb
index 0791e2b3a00..4e7638633df 100644
--- a/script/import_scripts/ipboard3.rb
+++ b/script/import_scripts/ipboard3.rb
@@ -376,7 +376,7 @@ class ImportScripts::IPBoard3 < ImportScripts::Base
raw.gsub!(/<(.+)> <\/\1>/, "\n\n")
- doc = Nokogiri::HTML.fragment(raw)
+ doc = Nokogiri::HTML5.fragment(raw)
doc.css("blockquote.ipsBlockquote").each do |bq|
post_id = post_id_from_imported_post_id(bq["data-cid"])
diff --git a/script/import_scripts/jive.rb b/script/import_scripts/jive.rb
index 8d386a52c58..f380b2bfee2 100644
--- a/script/import_scripts/jive.rb
+++ b/script/import_scripts/jive.rb
@@ -218,7 +218,7 @@ class ImportScripts::Jive < ImportScripts::Base
raw = raw.dup
raw = raw[5..-6]
- doc = Nokogiri::HTML.fragment(raw)
+ doc = Nokogiri::HTML5.fragment(raw)
doc.css('img').each do |img|
img.remove if img['class'] == "jive-image"
end
diff --git a/script/import_scripts/jive_api.rb b/script/import_scripts/jive_api.rb
index cf6df4d5bef..cee4928227d 100644
--- a/script/import_scripts/jive_api.rb
+++ b/script/import_scripts/jive_api.rb
@@ -297,7 +297,7 @@ class ImportScripts::JiveApi < ImportScripts::Base
end
def process_raw(raw)
- doc = Nokogiri::HTML.fragment(raw)
+ doc = Nokogiri::HTML5.fragment(raw)
# convert emoticon
doc.css("span.emoticon-inline").each do |span|
diff --git a/script/import_scripts/lithium.rb b/script/import_scripts/lithium.rb
index ac18a3fa36f..161618a7c16 100644
--- a/script/import_scripts/lithium.rb
+++ b/script/import_scripts/lithium.rb
@@ -913,7 +913,7 @@ SQL
raw.sub!(match, content)
end
- doc = Nokogiri::HTML.fragment(raw)
+ doc = Nokogiri::HTML5.fragment(raw)
doc.css("a,img,li-image").each do |l|
upload_name, image, linked_upload = [nil] * 3
diff --git a/spec/components/cooked_post_processor_spec.rb b/spec/components/cooked_post_processor_spec.rb
index 47262f1342e..51e9eadf69f 100644
--- a/spec/components/cooked_post_processor_spec.rb
+++ b/spec/components/cooked_post_processor_spec.rb
@@ -453,10 +453,8 @@ describe CookedPostProcessor do
it "generates overlay information" do
cpp.post_process
- expect(cpp.html).to match_html <<~HTML
-
+ expect(cpp.html).to match_html <<~HTML.rstrip
+
HTML
expect(cpp).to be_dirty
@@ -475,7 +473,7 @@ describe CookedPostProcessor do
cpp.post_process
- expect(cpp.html).to match_html <<~HTML
+ expect(cpp.html).to match_html <<~HTML.rstrip
HTML
end
@@ -491,7 +489,7 @@ describe CookedPostProcessor do
cpp.post_process
- expect(cpp.html).to match_html <<~HTML
+ expect(cpp.html).to match_html <<~HTML.rstrip
HTML
end
@@ -619,10 +617,8 @@ describe CookedPostProcessor do
it "crops the image" do
cpp.post_process
- expect(cpp.html).to match_html <<~HTML
-
+ expect(cpp.html).to match_html <<~HTML.rstrip
+
HTML
expect(cpp).to be_dirty
@@ -652,10 +648,8 @@ describe CookedPostProcessor do
it "generates overlay information" do
cpp.post_process
- expect(cpp.html). to match_html <<~HTML
-
+ expect(cpp.html). to match_html <<~HTML.rstrip
+
HTML
expect(cpp).to be_dirty
@@ -665,10 +659,8 @@ describe CookedPostProcessor do
upload.update!(original_filename: "> .png")
cpp.post_process
- expect(cpp.html).to match_html <<~HTML
-
+ expect(cpp.html).to match_html <<~HTML.rstrip
+
HTML
end
@@ -693,10 +685,8 @@ describe CookedPostProcessor do
it "generates overlay information using image title and ignores alt" do
cpp.post_process
- expect(cpp.html).to match_html <<~HTML
-
+ expect(cpp.html).to match_html <<~HTML.rstrip
+
HTML
expect(cpp).to be_dirty
@@ -723,10 +713,8 @@ describe CookedPostProcessor do
it "generates overlay information using image title" do
cpp.post_process
- expect(cpp.html).to match_html <<~HTML
-
+ expect(cpp.html).to match_html <<~HTML.rstrip
+
HTML
expect(cpp).to be_dirty
@@ -753,10 +741,8 @@ describe CookedPostProcessor do
it "generates overlay information using image alt" do
cpp.post_process
- expect(cpp.html).to match_html <<~HTML
-
+ expect(cpp.html).to match_html <<~HTML.rstrip
+
HTML
expect(cpp).to be_dirty
@@ -993,7 +979,7 @@ describe CookedPostProcessor do
cpp = CookedPostProcessor.new(post, disable_loading_image: true)
cpp.post_process
- doc = Nokogiri::HTML::fragment(cpp.html)
+ doc = Nokogiri::HTML5::fragment(cpp.html)
expect(doc.css('.lightbox-wrapper').size).to eq(1)
expect(doc.css('img').first['srcset']).to_not eq(nil)
end
@@ -1008,7 +994,7 @@ describe CookedPostProcessor do
cpp = CookedPostProcessor.new(post, disable_loading_image: true)
cpp.post_process
- doc = Nokogiri::HTML::fragment(cpp.html)
+ doc = Nokogiri::HTML5::fragment(cpp.html)
expect(doc.css('.lightbox-wrapper').size).to eq(0)
expect(doc.css('img').first['srcset']).to_not eq(nil)
end
@@ -1023,7 +1009,7 @@ describe CookedPostProcessor do
cpp = CookedPostProcessor.new(post, disable_loading_image: true)
cpp.post_process
- doc = Nokogiri::HTML::fragment(cpp.html)
+ doc = Nokogiri::HTML5::fragment(cpp.html)
expect(doc.css('.lightbox-wrapper').size).to eq(0)
expect(doc.css('img').first['srcset']).to_not eq(nil)
end
@@ -1227,7 +1213,7 @@ describe CookedPostProcessor do
it "uses schemaless url for uploads" do
cpp.optimize_urls
- expect(cpp.html).to match_html <<~HTML
+ expect(cpp.html).to match_html <<~HTML.rstrip
Link
Google
@@ -1242,7 +1228,7 @@ describe CookedPostProcessor do
it "uses schemaless CDN url for http uploads" do
Rails.configuration.action_controller.stubs(:asset_host).returns("http://my.cdn.com")
cpp.optimize_urls
- expect(cpp.html).to match_html <<~HTML
+ expect(cpp.html).to match_html <<~HTML.rstrip
Link
Google
@@ -1255,7 +1241,7 @@ describe CookedPostProcessor do
it "doesn't use schemaless CDN url for https uploads" do
Rails.configuration.action_controller.stubs(:asset_host).returns("https://my.cdn.com")
cpp.optimize_urls
- expect(cpp.html).to match_html <<~HTML
+ expect(cpp.html).to match_html <<~HTML.rstrip
Link
Google
@@ -1269,7 +1255,7 @@ describe CookedPostProcessor do
SiteSetting.login_required = true
Rails.configuration.action_controller.stubs(:asset_host).returns("http://my.cdn.com")
cpp.optimize_urls
- expect(cpp.html).to match_html <<~HTML
+ expect(cpp.html).to match_html <<~HTML.rstrip
Link
Google
@@ -1283,7 +1269,7 @@ describe CookedPostProcessor do
SiteSetting.prevent_anons_from_downloading_files = true
Rails.configuration.action_controller.stubs(:asset_host).returns("http://my.cdn.com")
cpp.optimize_urls
- expect(cpp.html).to match_html <<~HTML
+ expect(cpp.html).to match_html <<~HTML.rstrip
Link
Google
@@ -1318,7 +1304,7 @@ describe CookedPostProcessor do
cpp = CookedPostProcessor.new(the_post)
cpp.optimize_urls
- expect(cpp.html).to match_html <<~HTML
+ expect(cpp.html).to match_html <<~HTML.rstrip
This post has a local emoji and an external upload
HTML
@@ -1336,7 +1322,7 @@ describe CookedPostProcessor do
cpp = CookedPostProcessor.new(the_post)
cpp.optimize_urls
- expect(cpp.html).to match_html <<~HTML
+ expect(cpp.html).to match_html <<~HTML.rstrip
This post has a local emoji and an external upload
HTML
@@ -1357,18 +1343,20 @@ describe CookedPostProcessor do
the_post = Fabricate(:post, raw: "This post has an S3 video onebox:\n#{video_upload.url}")
- cpp = CookedPostProcessor.new(the_post)
+ cpp = CookedPostProcessor.new(the_post.reload)
+ cpp.post_process_oneboxes
+
+ cpp = CookedPostProcessor.new(the_post.reload)
cpp.post_process_oneboxes
expect(cpp.html).to match_html <<~HTML
- This post has an S3 video onebox:
-
+ This post has an S3 video onebox:
+
HTML
end
@@ -1384,13 +1372,12 @@ describe CookedPostProcessor do
secure_url = video_upload.url.sub(SiteSetting.s3_cdn_url, "#{Discourse.base_url}/secure-media-uploads")
- expect(cpp.html).to match_html <<~HTML
+ expect(cpp.html).to match_html <<~HTML.rstrip
This post has an S3 video onebox:
@@ -1416,7 +1403,7 @@ describe CookedPostProcessor do
stub_request(:head, audio_upload.url)
stub_request(:get, image_upload.url)
- raw = <<~RAW
+ raw = <<~RAW.rstrip
This post has a video upload.
#{video_upload.url}
@@ -1435,19 +1422,17 @@ describe CookedPostProcessor do
secure_video_url = video_upload.url.sub(SiteSetting.s3_cdn_url, "#{Discourse.base_url}/secure-media-uploads")
secure_audio_url = audio_upload.url.sub(SiteSetting.s3_cdn_url, "#{Discourse.base_url}/secure-media-uploads")
- expect(cpp.html).to match_html <<~HTML
- This post has a video upload.
+ expect(cpp.html).to match_html <<~HTML.rstrip
+ This post has a video upload.
-
- This post has an audio upload.
- #{secure_audio_url}
+ This post has an audio upload.
+ #{secure_audio_url}
And an image upload.
@@ -1616,7 +1601,7 @@ describe CookedPostProcessor do
let(:post) { build(:post) }
let(:cpp) { CookedPostProcessor.new(post) }
- let(:doc) { Nokogiri::HTML::fragment('') }
+ let(:doc) { Nokogiri::HTML5::fragment('') }
it "is true when the image is inside a link" do
img = doc.css("img#linked_image").first
diff --git a/spec/components/email/styles_spec.rb b/spec/components/email/styles_spec.rb
index bdcd27aa066..74addfe29dd 100644
--- a/spec/components/email/styles_spec.rb
+++ b/spec/components/email/styles_spec.rb
@@ -8,14 +8,14 @@ describe Email::Styles do
def basic_fragment(html)
styler = Email::Styles.new(html)
styler.format_basic
- Nokogiri::HTML.fragment(styler.to_html)
+ Nokogiri::HTML5.fragment(styler.to_html)
end
def html_fragment(html)
styler = Email::Styles.new(html)
styler.format_basic
styler.format_html
- Nokogiri::HTML.fragment(styler.to_html)
+ Nokogiri::HTML5.fragment(styler.to_html)
end
context "basic formatter" do
diff --git a/spec/components/excerpt_parser_spec.rb b/spec/components/excerpt_parser_spec.rb
index 8f0654bab5f..363dd049e0a 100644
--- a/spec/components/excerpt_parser_spec.rb
+++ b/spec/components/excerpt_parser_spec.rb
@@ -18,7 +18,7 @@ describe ExcerptParser do
HTML
- expect(ExcerptParser.get_excerpt(html, 50, {})).to match_html(<<~HTML)
+ expect(ExcerptParser.get_excerpt(html, 50, {})).to match_html(<<~HTML.rstrip)
FOO BAR
Lorem ipsum dolor sit amet, consectetur adi…
HTML
diff --git a/spec/components/pretty_text_spec.rb b/spec/components/pretty_text_spec.rb
index 784b607972a..8969a526bd5 100644
--- a/spec/components/pretty_text_spec.rb
+++ b/spec/components/pretty_text_spec.rb
@@ -184,7 +184,7 @@ describe PrettyText do
-
#{user.username}:
+ #{user.username}:
ddd
@@ -206,7 +206,7 @@ describe PrettyText do
-
#{user.username}:
+ #{user.username}:
ddd
@@ -227,7 +227,7 @@ describe PrettyText do
-
#{user.username}:
+ #{user.username}:
ddd
@@ -254,7 +254,7 @@ describe PrettyText do
ddd
@@ -828,7 +828,7 @@ describe PrettyText do
describe "strip_image_wrapping" do
def strip_image_wrapping(html)
- doc = Nokogiri::HTML.fragment(html)
+ doc = Nokogiri::HTML5.fragment(html)
described_class.strip_image_wrapping(doc)
doc.to_html
end
@@ -1122,7 +1122,7 @@ describe PrettyText do
it "can handle mixed lists" do
# known bug in old md engine
cooked = PrettyText.cook("* a\n\n1. b")
- expect(cooked).to match_html("\nb \n ")
+ expect(cooked).to match_html("\n\nb \n ")
end
it "can handle traditional vs non traditional newlines" do
@@ -1342,13 +1342,13 @@ HTML
it "supports img bbcode" do
cooked = PrettyText.cook "[img]http://www.image/test.png[/img]"
- html = "
"
+ html = "
"
expect(cooked).to eq(html)
end
it "provides safety for img bbcode" do
cooked = PrettyText.cook "[img]http://aaa.com[/img]"
- html = '
'
+ html = '
'
expect(cooked).to eq(html)
end
@@ -1433,10 +1433,10 @@ HTML
html = <<~HTML
-
-
+
+
-
+
HTML
expect(cooked).to eq(html.strip)
@@ -1452,11 +1452,11 @@ HTML
MD
html = <<~HTML
-
-
-
-
-
+
+
+
+
+
HTML
expect(cooked).to eq(html.strip)
diff --git a/spec/lib/content_security_policy_spec.rb b/spec/lib/content_security_policy_spec.rb
index 168887cd473..4196b29d669 100644
--- a/spec/lib/content_security_policy_spec.rb
+++ b/spec/lib/content_security_policy_spec.rb
@@ -217,9 +217,9 @@ describe ContentSecurityPolicy do
policy # call this first to make sure further actions clear the cache
theme.set_field(target: :common, name: "header", value: <<~SCRIPT)
-
-
-
+
+
+
SCRIPT
diff --git a/spec/models/topic_embed_spec.rb b/spec/models/topic_embed_spec.rb
index c45a2209e10..ddfbe115ecf 100644
--- a/spec/models/topic_embed_spec.rb
+++ b/spec/models/topic_embed_spec.rb
@@ -14,7 +14,7 @@ describe TopicEmbed do
fab!(:user) { Fabricate(:user) }
let(:title) { "How to turn a fish from good to evil in 30 seconds" }
let(:url) { 'http://eviltrout.com/123' }
- let(:contents) { "hello world new post hello " }
+ let(:contents) { "hello world new post hello
" }
fab!(:embeddable_host) { Fabricate(:embeddable_host) }
it "returns nil when the URL is malformed" do
@@ -46,7 +46,7 @@ describe TopicEmbed do
it "Supports updating the post content" do
expect do
- TopicEmbed.import(user, url, "New title received", "muhahaha new contents!")
+ TopicEmbed.import(user, url, "New title received", "muhahaha new contents!
")
end.to change { topic_embed.reload.content_sha1 }
expect(topic_embed.topic.title).to eq("New title received")
diff --git a/spec/requests/categories_controller_spec.rb b/spec/requests/categories_controller_spec.rb
index 97293742d35..e4b0cec35c6 100644
--- a/spec/requests/categories_controller_spec.rb
+++ b/spec/requests/categories_controller_spec.rb
@@ -11,7 +11,7 @@ describe CategoriesController do
it 'web crawler view has correct urls for subfolder install' do
set_subfolder "/forum"
get '/categories', headers: { 'HTTP_USER_AGENT' => 'Googlebot' }
- html = Nokogiri::HTML(response.body)
+ html = Nokogiri::HTML5(response.body)
expect(html.css('body.crawler')).to be_present
expect(html.css("a[href=\"/forum/c/#{category.slug}\"]")).to be_present
end
diff --git a/spec/requests/email_controller_spec.rb b/spec/requests/email_controller_spec.rb
index 10658907b63..ec60265f667 100644
--- a/spec/requests/email_controller_spec.rb
+++ b/spec/requests/email_controller_spec.rb
@@ -231,7 +231,7 @@ RSpec.describe EmailController do
navigate_to_unsubscribe
- source = Nokogiri::HTML::fragment(response.body)
+ source = Nokogiri::HTML5::fragment(response.body)
expect(source.css(".combobox option").map(&:inner_text)).to eq(slow_digest_frequencies)
end
@@ -242,7 +242,7 @@ RSpec.describe EmailController do
navigate_to_unsubscribe
- source = Nokogiri::HTML::fragment(response.body)
+ source = Nokogiri::HTML5::fragment(response.body)
expect(source.css(".combobox option[selected='selected']")[0]['value']).to eq(six_months_freq.to_s)
end
@@ -253,7 +253,7 @@ RSpec.describe EmailController do
navigate_to_unsubscribe
- source = Nokogiri::HTML::fragment(response.body)
+ source = Nokogiri::HTML5::fragment(response.body)
expect(source.css(".combobox option[selected='selected']")[0]['value']).to eq(never_frequency.to_s)
end
end
diff --git a/spec/requests/embed_controller_spec.rb b/spec/requests/embed_controller_spec.rb
index 71d6eef807d..ac0d6c3aa83 100644
--- a/spec/requests/embed_controller_spec.rb
+++ b/spec/requests/embed_controller_spec.rb
@@ -146,7 +146,7 @@ describe EmbedController do
get '/embed/comments', params: { embed_url: embed_url }, headers: headers
- html = Nokogiri::HTML.fragment(response.body)
+ html = Nokogiri::HTML5.fragment(response.body)
css_link = html.at("link[data-target=embedded_theme]").attribute("href").value
get css_link
diff --git a/spec/requests/user_api_keys_controller_spec.rb b/spec/requests/user_api_keys_controller_spec.rb
index 8148bef88a6..c612eb55a1d 100644
--- a/spec/requests/user_api_keys_controller_spec.rb
+++ b/spec/requests/user_api_keys_controller_spec.rb
@@ -238,7 +238,7 @@ describe UserApiKeysController do
SiteSetting.min_trust_level_for_user_api_key = 0
post "/user-api-key", params: args
expect(response.status).not_to eq(302)
- payload = Nokogiri::HTML(response.body).at('code').content
+ payload = Nokogiri::HTML5(response.body).at('code').content
encrypted = Base64.decode64(payload)
key = OpenSSL::PKey::RSA.new(private_key)
parsed = JSON.parse(key.private_decrypt(encrypted))
diff --git a/spec/services/username_changer_spec.rb b/spec/services/username_changer_spec.rb
index 0013c82a942..416c986ca9e 100644
--- a/spec/services/username_changer_spec.rb
+++ b/spec/services/username_changer_spec.rb
@@ -142,7 +142,7 @@ describe UsernameChanger do
post = create_post_and_change_username(raw: ".@foo -@foo %@foo _@foo ,@foo ;@foo @@foo")
expect(post.raw).to eq(".@bar -@bar %@bar _@bar ,@bar ;@bar @@bar")
- expect(post.cooked).to match_html(<<~HTML)
+ expect(post.cooked).to match_html(<<~HTML.rstrip)
.@bar
-@bar
%@bar
@@ -164,7 +164,7 @@ describe UsernameChanger do
post = create_post_and_change_username(raw: "**@foo** *@foo* _@foo_ ~~@foo~~")
expect(post.raw).to eq("**@bar** *@bar* _@bar_ ~~@bar~~")
- expect(post.cooked).to match_html(<<~HTML)
+ expect(post.cooked).to match_html(<<~HTML.rstrip)
@bar
@bar
@bar
@@ -176,7 +176,7 @@ describe UsernameChanger do
post = create_post_and_change_username(raw: "@foo. @foo, @foo: @foo; @foo_ @foo-")
expect(post.raw).to eq("@bar. @bar, @bar: @bar; @bar_ @bar-")
- expect(post.cooked).to match_html(<<~HTML)
+ expect(post.cooked).to match_html(<<~HTML.rstrip)
@bar .
@bar ,
@bar :
@@ -220,12 +220,8 @@ describe UsernameChanger do
post = create_post_and_change_username(raw: "@foo @foobar @foo-bar @foo_bar @foo1")
expect(post.raw).to eq("@bar @foobar @foo-bar @foo_bar @foo1")
- expect(post.cooked).to match_html(<<~HTML)
-
@bar
- @foobar
- @foo-bar
- @foo_bar
- @foo1
+ expect(post.cooked).to match_html(<<~HTML.rstrip)
+ @bar @foobar @foo-bar @foo_bar @foo1
HTML
end
@@ -311,12 +307,8 @@ describe UsernameChanger do
post = create_post_and_change_username(raw: "@թռչուն @թռչուն鳥 @թռչուն-鳥 @թռչուն_鳥 @թռչուն٩", target_username: 'птица')
expect(post.raw).to eq("@птица @թռչուն鳥 @թռչուն-鳥 @թռչուն_鳥 @թռչուն٩")
- expect(post.cooked).to match_html(<<~HTML)
- @птица
- @թռչուն鳥
- @թռչուն-鳥
- @թռչուն_鳥
- @թռչուն٩
+ expect(post.cooked).to match_html(<<~HTML.rstrip)
+ @птица @թռչուն鳥 @թռչուն-鳥 @թռչուն_鳥 @թռչուն٩
HTML
end
@@ -364,7 +356,7 @@ describe UsernameChanger do
dolor sit amet
RAW
- expect(post.cooked).to match_html(<<~HTML)
+ expect(post.cooked).to match_html(<<~HTML.rstrip)
Lorem ipsum