Enhancements to link-checker
* Handle validation of non-HTML assets (zip files for samples) * Cleaned the documentation Signed-off-by: Miki <mehranb@amazon.com>
This commit is contained in:
parent
23e4562f54
commit
41c01358b9
@ -19,68 +19,53 @@ require "pathname"
|
|||||||
|
|
||||||
module Jekyll::LinkChecker
|
module Jekyll::LinkChecker
|
||||||
|
|
||||||
##
|
|
||||||
# The collection that will get stores as the output
|
# The collection that will get stores as the output
|
||||||
|
|
||||||
@urls = {}
|
@urls = {}
|
||||||
|
|
||||||
##
|
|
||||||
# Pattern to identify documents that should be excluded based on their URL
|
# Pattern to identify documents that should be excluded based on their URL
|
||||||
|
@excluded_paths = /(\.(css|js|json|map|xml|txt|yml)$|\/version-selector\.tpl$)/i.freeze
|
||||||
|
|
||||||
@excluded_paths = /(\.(css|js|json|map|xml|txt|yml)$)/i.freeze
|
|
||||||
|
|
||||||
##
|
|
||||||
# Pattern to identify certain HTML tags whose content should be excluded from indexing
|
# Pattern to identify certain HTML tags whose content should be excluded from indexing
|
||||||
|
|
||||||
@href_matcher = /<a[^>]+href=(['"])(.+?)\1/im.freeze
|
@href_matcher = /<a[^>]+href=(['"])(.+?)\1/im.freeze
|
||||||
|
|
||||||
##
|
|
||||||
# Pattern to check for external URLs
|
# Pattern to check for external URLs
|
||||||
|
|
||||||
@external_matcher = /^https?:\/\//.freeze
|
@external_matcher = /^https?:\/\//.freeze
|
||||||
|
|
||||||
##
|
|
||||||
# List of domains to ignore
|
# List of domains to ignore
|
||||||
@ignored_domains = %w[localhost]
|
@ignored_domains = %w[localhost]
|
||||||
|
|
||||||
##
|
|
||||||
# Pattern of local paths to ignore
|
# Pattern of local paths to ignore
|
||||||
@ignored_paths = /(^\/javadocs\/)/.freeze
|
@ignored_paths = /(^\/javadocs\/)/.freeze
|
||||||
|
|
||||||
##
|
# Pattern to exclude when adding the `index.html` suffix to paths
|
||||||
|
@need_no_suffix = /\.(?!html)[^\/]+$/.freeze
|
||||||
|
|
||||||
# Valid response codes for successful links
|
# Valid response codes for successful links
|
||||||
@success_codes = %w[200 302]
|
@success_codes = %w[200 302]
|
||||||
|
|
||||||
##
|
|
||||||
# Questionable response codes for successful links
|
# Questionable response codes for successful links
|
||||||
@questionable_codes = %w[301 403 429]
|
@questionable_codes = %w[301 403 429]
|
||||||
|
|
||||||
##
|
|
||||||
# Holds the list of failures
|
# Holds the list of failures
|
||||||
@failures = []
|
@failures = []
|
||||||
|
|
||||||
##
|
|
||||||
# Driven by environment variables, it indicates a need to check external links
|
# Driven by environment variables, it indicates a need to check external links
|
||||||
@check_external_links
|
@check_external_links
|
||||||
|
|
||||||
##
|
|
||||||
# Driven by environment variables, it indicates the need to fail the build for dead links
|
# Driven by environment variables, it indicates the need to fail the build for dead links
|
||||||
@should_build_fatally
|
@should_build_fatally
|
||||||
|
|
||||||
|
|
||||||
##
|
|
||||||
# Initializes the singleton by recording the site
|
# Initializes the singleton by recording the site
|
||||||
|
# return [void]
|
||||||
def self.init(site)
|
def self.init(site)
|
||||||
@site = site
|
@site = site
|
||||||
@urls = {}
|
@urls = {}
|
||||||
@failures = []
|
@failures = []
|
||||||
end
|
end
|
||||||
|
|
||||||
##
|
|
||||||
# Processes a Document or Page and adds the links to a collection
|
# Processes a Document or Page and adds the links to a collection
|
||||||
# It also checks for anchors to parts of the same page/doc
|
# It also checks for anchors that link to parts of the same page/doc
|
||||||
|
# return [void]
|
||||||
def self.process(page)
|
def self.process(page)
|
||||||
return if @excluded_paths.match(page.path)
|
return if @excluded_paths.match(page.path)
|
||||||
|
|
||||||
@ -98,9 +83,8 @@ module Jekyll::LinkChecker
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
##
|
# Verifies the validity of all the destinations gathered in @urls
|
||||||
# Saves the collection as a JSON file
|
# return [void]
|
||||||
|
|
||||||
def self.verify(site)
|
def self.verify(site)
|
||||||
if ENV.key?('JEKYLL_CHECK_EXTERNAL_LINKS')
|
if ENV.key?('JEKYLL_CHECK_EXTERNAL_LINKS')
|
||||||
@check_external_links = true
|
@check_external_links = true
|
||||||
@ -132,9 +116,9 @@ module Jekyll::LinkChecker
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
##
|
# Check if an internal or external URL is accessible
|
||||||
# Check if URL is accessible
|
# @param url [String] the url to check
|
||||||
|
# @return [Boolean]
|
||||||
def self.check(url)
|
def self.check(url)
|
||||||
match = @base_url_matcher.match(url)
|
match = @base_url_matcher.match(url)
|
||||||
unless match.nil?
|
unless match.nil?
|
||||||
@ -149,9 +133,9 @@ module Jekyll::LinkChecker
|
|||||||
return self.check_internal(url)
|
return self.check_internal(url)
|
||||||
end
|
end
|
||||||
|
|
||||||
##
|
|
||||||
# Check if an external URL is accessible by making a HEAD call
|
# Check if an external URL is accessible by making a HEAD call
|
||||||
|
# @param url [String] the url to check
|
||||||
|
# @return [Boolean]
|
||||||
def self.check_external(url)
|
def self.check_external(url)
|
||||||
uri = URI(url)
|
uri = URI(url)
|
||||||
return true if @ignored_domains.include? uri.host
|
return true if @ignored_domains.include? uri.host
|
||||||
@ -172,14 +156,18 @@ module Jekyll::LinkChecker
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
##
|
|
||||||
# Check if an internal link is accessible
|
# Check if an internal link is accessible
|
||||||
|
# @param url [String] the url to check
|
||||||
|
# @return [Boolean]
|
||||||
def self.check_internal(url)
|
def self.check_internal(url)
|
||||||
return true if @ignored_paths =~ url
|
return true if @ignored_paths =~ url
|
||||||
|
|
||||||
path, hash = url.split('#')
|
path, hash = url.split('#')
|
||||||
|
|
||||||
|
if @need_no_suffix =~ path
|
||||||
|
filename = File.join(@site.config["destination"], path)
|
||||||
|
return File.file?(filename)
|
||||||
|
else
|
||||||
unless path.end_with? 'index.html'
|
unless path.end_with? 'index.html'
|
||||||
path << '/' unless path.end_with? '/'
|
path << '/' unless path.end_with? '/'
|
||||||
path << 'index.html' unless path.end_with? 'index.html'
|
path << 'index.html' unless path.end_with? 'index.html'
|
||||||
@ -205,28 +193,25 @@ module Jekyll::LinkChecker
|
|||||||
redirect << '#' + hash unless hash.nil? || hash.empty?
|
redirect << '#' + hash unless hash.nil? || hash.empty?
|
||||||
return self.check(redirect)
|
return self.check(redirect)
|
||||||
end
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
# Before any Document or Page is processed, initialize the LinkChecker
|
# Before any Document or Page is processed, initialize the LinkChecker
|
||||||
|
|
||||||
Jekyll::Hooks.register :site, :pre_render do |site|
|
Jekyll::Hooks.register :site, :pre_render do |site|
|
||||||
Jekyll::LinkChecker.init(site)
|
Jekyll::LinkChecker.init(site)
|
||||||
end
|
end
|
||||||
|
|
||||||
# Process a Page as soon as its content is ready
|
# Process a Page as soon as its content is ready
|
||||||
|
|
||||||
Jekyll::Hooks.register :pages, :post_convert do |page|
|
Jekyll::Hooks.register :pages, :post_convert do |page|
|
||||||
Jekyll::LinkChecker.process(page)
|
Jekyll::LinkChecker.process(page)
|
||||||
end
|
end
|
||||||
|
|
||||||
# Process a Document as soon as its content is ready
|
# Process a Document as soon as its content is ready
|
||||||
|
|
||||||
Jekyll::Hooks.register :documents, :post_convert do |document|
|
Jekyll::Hooks.register :documents, :post_convert do |document|
|
||||||
Jekyll::LinkChecker.process(document)
|
Jekyll::LinkChecker.process(document)
|
||||||
end
|
end
|
||||||
|
|
||||||
# Verify gathered links after Jekyll is done writing all its stuff
|
# Verify gathered links after Jekyll is done writing all its stuff
|
||||||
|
|
||||||
Jekyll::Hooks.register :site, :post_write do |site|
|
Jekyll::Hooks.register :site, :post_write do |site|
|
||||||
Jekyll::LinkChecker.verify(site)
|
Jekyll::LinkChecker.verify(site)
|
||||||
end
|
end
|
Loading…
x
Reference in New Issue
Block a user