FEATURE: provide extra signal about content age to crawlers

Adds Last-Modified field to help teach crawlers not to crawl old content
This commit is contained in:
Sam 2018-04-13 14:58:33 +10:00
parent 6179c0ce51
commit 3632b8d8d6
2 changed files with 23 additions and 0 deletions

View File

@ -117,6 +117,14 @@ class TopicsController < ApplicationController
canonical_url UrlHelper.absolute_without_cdn(@topic_view.canonical_path)
# provide hint to crawlers only for now
# we would like to give them a bit more signal about age of data
if use_crawler_layout?
if last_modified = @topic_view.posts&.map { |p| p.updated_at }&.max&.httpdate
response.headers['Last-Modified'] = last_modified
end
end
perform_show_response
rescue Discourse::InvalidAccess => ex

View File

@ -554,11 +554,21 @@ RSpec.describe TopicsController do
context "when a crawler" do
it "renders with the crawler layout, and handles proper pagination" do
page1_time = 3.months.ago
page2_time = 2.months.ago
page3_time = 1.month.ago
freeze_time page1_time
topic = Fabricate(:topic)
Fabricate(:post, topic_id: topic.id)
Fabricate(:post, topic_id: topic.id)
freeze_time page2_time
Fabricate(:post, topic_id: topic.id)
Fabricate(:post, topic_id: topic.id)
freeze_time page3_time
Fabricate(:post, topic_id: topic.id)
# ugly, but no inteface to set this and we don't want to create
@ -575,15 +585,20 @@ RSpec.describe TopicsController do
expect(body).to_not have_tag(:meta, with: { name: 'fragment' })
expect(body).to include('<link rel="next" href="' + topic.relative_url + "?page=2")
expect(response.headers['Last-Modified']).to eq(page1_time.httpdate)
get topic.url + "?page=2", env: { "HTTP_USER_AGENT" => user_agent }
body = response.body
expect(response.headers['Last-Modified']).to eq(page2_time.httpdate)
expect(body).to include('<link rel="prev" href="' + topic.relative_url)
expect(body).to include('<link rel="next" href="' + topic.relative_url + "?page=3")
get topic.url + "?page=3", env: { "HTTP_USER_AGENT" => user_agent }
body = response.body
expect(response.headers['Last-Modified']).to eq(page3_time.httpdate)
expect(body).to include('<link rel="prev" href="' + topic.relative_url + "?page=2")
end
end