when writing cache, remove elements from the user agents list. also return a message and content type when blocking a crawler.
This commit is contained in:
parent
a84bb81ab5
commit
4d12ff2e8a
|
@ -30,9 +30,9 @@ class WebCrawlerRequest < ActiveRecord::Base
|
|||
self.last_flush = Time.now.utc
|
||||
|
||||
date = date.to_date
|
||||
ua_list_key = user_agent_list_key(date)
|
||||
|
||||
$redis.smembers(user_agent_list_key(date)).each do |user_agent, _|
|
||||
|
||||
while user_agent = $redis.spop(ua_list_key)
|
||||
val = get_and_reset(redis_key(user_agent, date))
|
||||
|
||||
next if val == 0
|
||||
|
@ -57,7 +57,7 @@ class WebCrawlerRequest < ActiveRecord::Base
|
|||
$redis.del redis_key(user_agent, date)
|
||||
end
|
||||
|
||||
$redis.del list_key
|
||||
$redis.del(list_key)
|
||||
end
|
||||
|
||||
protected
|
||||
|
|
|
@ -167,7 +167,7 @@ class Middleware::RequestTracker
|
|||
|
||||
if block_crawler(request)
|
||||
log_request = false
|
||||
result = [403, {}, []]
|
||||
result = [403, { 'Content-Type' => 'text/plain' }, ['Crawler is not allowed']]
|
||||
return result
|
||||
end
|
||||
|
||||
|
|
|
@ -291,7 +291,7 @@ describe Middleware::RequestTracker do
|
|||
|
||||
def expect_blocked_response(status, _, response)
|
||||
expect(status).to eq(403)
|
||||
expect(response).to be_blank
|
||||
expect(response).to eq(['Crawler is not allowed'])
|
||||
end
|
||||
|
||||
it "applies whitelisted_crawler_user_agents correctly" do
|
||||
|
|
Loading…
Reference in New Issue