Improved crawler detection: add Twitterbot, Facebook, curl, Bing, Baidu.

2014-03-15 20:01:46 +05:30 · 2014-03-15 20:01:46 +05:30 · e3702ecb30
parent 5e93d60e9b
commit e3702ecb30
2 changed files with 5 additions and 1 deletions
--- a/lib/crawler_detection.rb
+++ b/lib/crawler_detection.rb
@ -1,5 +1,5 @@
 module CrawlerDetection
  def self.crawler?(user_agent)
-    !/Googlebot|Mediapartners|AdsBot/.match(user_agent).nil?
+    !/Googlebot|Mediapartners|AdsBot|curl|Twitterbot|facebookexternalhit|bingbot|Baiduspider/.match(user_agent).nil?
  end
 end
--- a/spec/components/crawler_detection_spec.rb
+++ b/spec/components/crawler_detection_spec.rb
@ -15,6 +15,10 @@ describe CrawlerDetection do
      described_class.crawler?("(compatible; Mediapartners-Google/2.1; +http://www.google.com/bot.html)").should == true
      described_class.crawler?("Mediapartners-Google").should == true
      described_class.crawler?("AdsBot-Google (+http://www.google.com/adsbot.html)").should == true
+      described_class.crawler?("Twitterbot").should == true
+      described_class.crawler?("facebookexternalhit/1.1 (+http(s)://www.facebook.com/externalhit_uatext.php)").should == true
+      described_class.crawler?("Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)").should == true
+      described_class.crawler?("Baiduspider+(+http://www.baidu.com/search/spider.htm)").should == true
    end

    it "returns false for non-crawler user agents" do