DEV: Update nokogiri to 1.18.1 (#30554)
Nokogiri/libxml is now more strict in terms of params it receives. It uses kwargs vs options object (I fixed an issue there in #30545) doesn't accept nil/blank html (fixed here) and most importantly handles encoding in a different way. It seems to require explicitly specifying UTF8. * Build(deps): Bump nokogiri from 1.16.8 to 1.18.1 Bumps [nokogiri](https://github.com/sparklemotion/nokogiri) from 1.16.8 to 1.18.1. - [Release notes](https://github.com/sparklemotion/nokogiri/releases) - [Changelog](https://github.com/sparklemotion/nokogiri/blob/main/CHANGELOG.md) - [Commits](https://github.com/sparklemotion/nokogiri/compare/v1.16.8...v1.18.1) --- updated-dependencies: - dependency-name: nokogiri dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
This commit is contained in:
parent
c1a46995a7
commit
affe26f0dd
|
@ -273,13 +273,13 @@ GEM
|
|||
net-smtp (0.5.0)
|
||||
net-protocol
|
||||
nio4r (2.7.4)
|
||||
nokogiri (1.16.8-aarch64-linux)
|
||||
nokogiri (1.18.1-aarch64-linux-gnu)
|
||||
racc (~> 1.4)
|
||||
nokogiri (1.16.8-arm64-darwin)
|
||||
nokogiri (1.18.1-arm64-darwin)
|
||||
racc (~> 1.4)
|
||||
nokogiri (1.16.8-x86_64-darwin)
|
||||
nokogiri (1.18.1-x86_64-darwin)
|
||||
racc (~> 1.4)
|
||||
nokogiri (1.16.8-x86_64-linux)
|
||||
nokogiri (1.18.1-x86_64-linux-gnu)
|
||||
racc (~> 1.4)
|
||||
oauth (1.1.0)
|
||||
oauth-tty (~> 1.0, >= 1.0.1)
|
||||
|
|
|
@ -365,7 +365,7 @@ class SearchIndexer
|
|||
return +"" if html.blank?
|
||||
|
||||
begin
|
||||
document = Nokogiri.HTML5("<div>#{html}</div>", nil, Encoding::UTF_8.to_s)
|
||||
document = Nokogiri.HTML5("<div>#{html}</div>", encoding: Encoding::UTF_8)
|
||||
rescue ArgumentError
|
||||
return +""
|
||||
end
|
||||
|
@ -401,7 +401,7 @@ class SearchIndexer
|
|||
end
|
||||
|
||||
html_scrubber = new
|
||||
Nokogiri::HTML::SAX::Parser.new(html_scrubber).parse(document.to_html)
|
||||
Nokogiri::HTML4::SAX::Parser.new(html_scrubber, Encoding::UTF_8).parse(document.to_html)
|
||||
html_scrubber.scrubbed.squish
|
||||
end
|
||||
|
||||
|
|
|
@ -277,7 +277,7 @@ class DiscourseDiff
|
|||
|
||||
def self.tokenize(html)
|
||||
me = new
|
||||
parser = Nokogiri::HTML::SAX::Parser.new(me)
|
||||
parser = Nokogiri::HTML4::SAX::Parser.new(me, Encoding::UTF_8)
|
||||
parser.parse("<html><body>#{html}</body></html>")
|
||||
me.tokens
|
||||
end
|
||||
|
|
|
@ -27,10 +27,11 @@ class ExcerptParser < Nokogiri::XML::SAX::Document
|
|||
end
|
||||
|
||||
def self.get_excerpt(html, length, options)
|
||||
html ||= ""
|
||||
return "" if html.blank?
|
||||
|
||||
length = html.length if html.include?("excerpt") && CUSTOM_EXCERPT_REGEX === html
|
||||
me = self.new(length, options)
|
||||
parser = Nokogiri::HTML::SAX::Parser.new(me)
|
||||
parser = Nokogiri::HTML4::SAX::Parser.new(me, Encoding::UTF_8)
|
||||
catch(:done) { parser.parse(html) }
|
||||
excerpt = me.excerpt.strip
|
||||
excerpt = excerpt.gsub(/\s*\n+\s*/, "\n\n") if options[:keep_onebox_source] ||
|
||||
|
|
|
@ -483,6 +483,8 @@ module PrettyText
|
|||
end
|
||||
|
||||
def self.excerpt(html, max_length, options = {})
|
||||
return "" if html.blank?
|
||||
|
||||
# TODO: properly fix this HACK in ExcerptParser without introducing XSS
|
||||
doc = Nokogiri::HTML5.fragment(html)
|
||||
DiscourseEvent.trigger(:reduce_excerpt, doc, options)
|
||||
|
|
|
@ -24,7 +24,7 @@ module RetrieveTitle
|
|||
|
||||
doc = nil
|
||||
begin
|
||||
doc = Nokogiri.HTML5(html, nil, encoding)
|
||||
doc = Nokogiri.HTML5(html, encoding:)
|
||||
rescue ArgumentError
|
||||
# invalid HTML (Eg: too many attributes, status tree too deep) - ignore
|
||||
# Error in nokogumbo is not specialized, uses generic ArgumentError
|
||||
|
|
|
@ -17,7 +17,7 @@ class ImportScripts::Disqus < ImportScripts::Base
|
|||
abort("Category #{IMPORT_CATEGORY} not found") if @category.blank?
|
||||
|
||||
@parser = DisqusSAX.new
|
||||
doc = Nokogiri::XML::SAX::Parser.new(@parser)
|
||||
doc = Nokogiri::XML::SAX::Parser.new(@parser, Encoding::UTF_8)
|
||||
doc.parse_file(IMPORT_FILE)
|
||||
@parser.normalize
|
||||
|
||||
|
|
Loading…
Reference in New Issue