when writing cache, remove elements from the user agents list. also return a message and content type when blocking a crawler.

This commit is contained in:
Neil Lalonde 2018-03-27 13:44:14 -04:00
parent a84bb81ab5
commit 4d12ff2e8a
3 changed files with 5 additions and 5 deletions

View File

@ -30,9 +30,9 @@ class WebCrawlerRequest < ActiveRecord::Base
self.last_flush = Time.now.utc
date = date.to_date
ua_list_key = user_agent_list_key(date)
$redis.smembers(user_agent_list_key(date)).each do |user_agent, _|
while user_agent = $redis.spop(ua_list_key)
val = get_and_reset(redis_key(user_agent, date))
next if val == 0
@ -57,7 +57,7 @@ class WebCrawlerRequest < ActiveRecord::Base
$redis.del redis_key(user_agent, date)
end
$redis.del list_key
$redis.del(list_key)
end
protected

View File

@ -167,7 +167,7 @@ class Middleware::RequestTracker
if block_crawler(request)
log_request = false
result = [403, {}, []]
result = [403, { 'Content-Type' => 'text/plain' }, ['Crawler is not allowed']]
return result
end

View File

@ -291,7 +291,7 @@ describe Middleware::RequestTracker do
def expect_blocked_response(status, _, response)
expect(status).to eq(403)
expect(response).to be_blank
expect(response).to eq(['Crawler is not allowed'])
end
it "applies whitelisted_crawler_user_agents correctly" do