27 lines
736 B
Ruby
27 lines
736 B
Ruby
|
module Jobs
|
||
|
|
||
|
class CleanUpCrawlerStats < Jobs::Scheduled
|
||
|
every 1.day
|
||
|
|
||
|
def execute(args)
|
||
|
WebCrawlerRequest.where('date < ?', WebCrawlerRequest.max_record_age.ago).delete_all
|
||
|
|
||
|
# keep count of only the top user agents
|
||
|
WebCrawlerRequest.exec_sql <<~SQL
|
||
|
WITH ranked_requests AS (
|
||
|
SELECT row_number() OVER (ORDER BY count DESC) as row_number, id
|
||
|
FROM web_crawler_requests
|
||
|
WHERE date = '#{1.day.ago.strftime("%Y-%m-%d")}'
|
||
|
)
|
||
|
DELETE FROM web_crawler_requests
|
||
|
WHERE id IN (
|
||
|
SELECT ranked_requests.id
|
||
|
FROM ranked_requests
|
||
|
WHERE row_number > #{WebCrawlerRequest.max_records_per_day}
|
||
|
)
|
||
|
SQL
|
||
|
end
|
||
|
end
|
||
|
|
||
|
end
|