globalized wikipedia onebox

This commit is contained in:
Régis Hanol 2013-03-07 03:30:40 +01:00
parent a0949ca992
commit 621995f35a
2 changed files with 29 additions and 12 deletions

View File

@ -3,7 +3,7 @@ require_dependency 'oneboxer/handlebars_onebox'
module Oneboxer module Oneboxer
class WikipediaOnebox < HandlebarsOnebox class WikipediaOnebox < HandlebarsOnebox
matcher /^https?:\/\/.*wikipedia.(com|org)\/.*$/ matcher /^https?:\/\/.*wikipedia\.(com|org)\/.*$/
favicon 'wikipedia.png' favicon 'wikipedia.png'
def template def template
@ -11,11 +11,10 @@ module Oneboxer
end end
def translate_url def translate_url
m = @url.match(/wiki\/(?<identifier>[^#\/]+)/mi) m = @url.match(/^https?:\/\/((?<subdomain>.+)\.)?wikipedia\.(com|org)\/wiki\/(?<identifier>[^#\/]+)/mi)
subdomain = m[:subdomain] || "en"
article_id = CGI::unescape(m[:identifier]) article_id = CGI::unescape(m[:identifier])
return "http://en.m.wikipedia.org/w/index.php?title=#{URI::encode(article_id)}" "http://#{subdomain}.m.wikipedia.org/w/index.php?title=#{URI::encode(article_id)}"
@url
end end
def parse(data) def parse(data)
@ -25,7 +24,7 @@ module Oneboxer
result = {} result = {}
title = html_doc.at('title').inner_html title = html_doc.at('title').inner_html
result[:title] = title.gsub!(/ - Wikipedia, the free encyclopedia/, '') if title.present? result[:title] = title.gsub!(/ - Wikipedia.*$/, '') if title.present?
# get the first image > 150 pix high # get the first image > 150 pix high
images = html_doc.search("img").select { |img| img['height'].to_i > 150 } images = html_doc.search("img").select { |img| img['height'].to_i > 150 }

View File

@ -5,14 +5,32 @@ require 'oneboxer'
require 'oneboxer/wikipedia_onebox' require 'oneboxer/wikipedia_onebox'
describe Oneboxer::WikipediaOnebox do describe Oneboxer::WikipediaOnebox do
before(:each) do
@o = Oneboxer::WikipediaOnebox.new("http://en.wikipedia.org/wiki/Ruby")
FakeWeb.register_uri(:get, @o.translate_url, :response => fixture_file('oneboxer/wikipedia.response'))
FakeWeb.register_uri(:get, 'http://en.m.wikipedia.org/wiki/Ruby', :response => fixture_file('oneboxer/wikipedia_redirected.response'))
end
it "generates the expected onebox for Wikipedia" do it "generates the expected onebox for Wikipedia" do
@o.onebox.should == expected_wikipedia_result o = Oneboxer::WikipediaOnebox.new('http://en.wikipedia.org/wiki/Ruby')
FakeWeb.register_uri(:get, o.translate_url, :response => fixture_file('oneboxer/wikipedia.response'))
FakeWeb.register_uri(:get, 'http://en.m.wikipedia.org/wiki/Ruby', :response => fixture_file('oneboxer/wikipedia_redirected.response'))
o.onebox.should == expected_wikipedia_result
end
it "accepts .com extention" do
o = Oneboxer::WikipediaOnebox.new('http://en.wikipedia.com/wiki/Postgres')
o.translate_url.should == 'http://en.m.wikipedia.org/w/index.php?title=Postgres'
end
it "encodes identifier" do
o = Oneboxer::WikipediaOnebox.new('http://en.wikipedia.com/wiki/Café')
o.translate_url.should == 'http://en.m.wikipedia.org/w/index.php?title=Caf%C3%A9'
end
it "defaults to en locale" do
o = Oneboxer::WikipediaOnebox.new('http://wikipedia.org/wiki/Ruby_on_rails')
o.translate_url.should == 'http://en.m.wikipedia.org/w/index.php?title=Ruby_on_rails'
end
it "generates localized url" do
o = Oneboxer::WikipediaOnebox.new('http://fr.wikipedia.org/wiki/Ruby')
o.translate_url.should == 'http://fr.m.wikipedia.org/w/index.php?title=Ruby'
end end
private private