2013-02-10 19:02:57 -05:00
|
|
|
class RobotsTxtController < ApplicationController
|
|
|
|
layout false
|
2017-08-31 00:06:56 -04:00
|
|
|
skip_before_action :preload_json, :check_xhr, :redirect_to_login_if_required
|
2013-02-10 19:02:57 -05:00
|
|
|
|
2018-04-11 16:05:02 -04:00
|
|
|
# NOTE: order is important!
|
|
|
|
DISALLOWED_PATHS ||= %w{
|
|
|
|
/auth/cas
|
|
|
|
/auth/facebook/callback
|
|
|
|
/auth/twitter/callback
|
|
|
|
/auth/google/callback
|
|
|
|
/auth/yahoo/callback
|
|
|
|
/auth/github/callback
|
|
|
|
/auth/cas/callback
|
|
|
|
/assets/browser-update*.js
|
|
|
|
/users/
|
|
|
|
/u/
|
2018-06-12 05:47:45 -04:00
|
|
|
/my/
|
2018-04-11 16:05:02 -04:00
|
|
|
/badges/
|
|
|
|
/search
|
|
|
|
/search/
|
|
|
|
/tags
|
|
|
|
/tags/
|
|
|
|
/email/
|
|
|
|
/session
|
|
|
|
/session/
|
|
|
|
/admin
|
|
|
|
/admin/
|
|
|
|
/user-api-key
|
|
|
|
/user-api-key/
|
|
|
|
/*?api_key*
|
|
|
|
/*?*api_key*
|
|
|
|
/groups
|
|
|
|
/groups/
|
|
|
|
/t/*/*.rss
|
|
|
|
/tags/*.rss
|
|
|
|
/c/*.rss
|
|
|
|
}
|
|
|
|
|
2013-02-10 19:02:57 -05:00
|
|
|
def index
|
2018-04-16 15:43:20 -04:00
|
|
|
if SiteSetting.allow_index_in_robots_txt?
|
|
|
|
@robots_info = fetch_robots_info
|
|
|
|
render :index, content_type: 'text/plain'
|
|
|
|
else
|
|
|
|
render :no_index, content_type: 'text/plain'
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
# If you are hosting Discourse in a subfolder, you will need to create your robots.txt
|
|
|
|
# in the root of your web server with the appropriate paths. This method will return
|
|
|
|
# JSON that can be used by a script to create a robots.txt that works well with your
|
|
|
|
# existing site.
|
|
|
|
def builder
|
|
|
|
render json: fetch_robots_info
|
|
|
|
end
|
|
|
|
|
|
|
|
protected
|
|
|
|
|
|
|
|
def fetch_robots_info
|
|
|
|
deny_paths = DISALLOWED_PATHS.map { |p| Discourse.base_uri + p }
|
|
|
|
deny_all = [ "#{Discourse.base_uri}/" ]
|
|
|
|
|
|
|
|
result = {
|
|
|
|
header: "# See http://www.robotstxt.org/robotstxt.html for documentation on how to use the robots.txt file",
|
|
|
|
agents: []
|
|
|
|
}
|
|
|
|
|
|
|
|
if SiteSetting.whitelisted_crawler_user_agents.present?
|
|
|
|
SiteSetting.whitelisted_crawler_user_agents.split('|').each do |agent|
|
|
|
|
result[:agents] << { name: agent, disallow: deny_paths }
|
|
|
|
end
|
|
|
|
|
|
|
|
result[:agents] << { name: '*', disallow: deny_all }
|
|
|
|
elsif SiteSetting.blacklisted_crawler_user_agents.present?
|
|
|
|
result[:agents] << { name: '*', disallow: deny_paths }
|
|
|
|
SiteSetting.blacklisted_crawler_user_agents.split('|').each do |agent|
|
|
|
|
result[:agents] << { name: agent, disallow: deny_all }
|
2018-03-15 17:10:45 -04:00
|
|
|
end
|
|
|
|
else
|
2018-04-16 15:43:20 -04:00
|
|
|
result[:agents] << { name: '*', disallow: deny_paths }
|
2018-03-15 17:10:45 -04:00
|
|
|
end
|
|
|
|
|
2018-04-16 15:43:20 -04:00
|
|
|
if SiteSetting.slow_down_crawler_user_agents.present?
|
|
|
|
SiteSetting.slow_down_crawler_user_agents.split('|').each do |agent|
|
|
|
|
result[:agents] << {
|
|
|
|
name: agent,
|
|
|
|
delay: SiteSetting.slow_down_crawler_rate,
|
|
|
|
disallow: deny_paths
|
|
|
|
}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
result
|
2013-02-10 19:02:57 -05:00
|
|
|
end
|
2018-04-16 15:43:20 -04:00
|
|
|
|
2013-02-10 19:02:57 -05:00
|
|
|
end
|