Replace Hpricot with Nokogiri

This commit is contained in:
Jaime Iniesta 2013-02-12 09:46:45 -05:00
parent 84a167725d
commit 6995e75d41
15 changed files with 36 additions and 34 deletions

View File

@ -20,7 +20,6 @@ gem 'fastimage'
gem 'fog', require: false
gem 'has_ip_address'
gem 'hiredis'
gem 'hpricot'
gem 'i18n-js'
gem 'jquery-rails'
gem 'multi_json'

View File

@ -192,7 +192,6 @@ GEM
highline (1.6.15)
hike (1.2.1)
hiredis (0.4.5)
hpricot (0.8.6)
httpauth (0.2.0)
i18n (0.6.1)
i18n-js (2.1.2)
@ -464,7 +463,6 @@ DEPENDENCIES
guard-spork
has_ip_address
hiredis
hpricot
i18n-js
image_optim
jasminerice

View File

@ -4,7 +4,6 @@ require_dependency 'rate_limiter'
require_dependency 'post_revisor'
require 'archetype'
require 'hpricot'
require 'digest/sha1'
class Post < ActiveRecord::Base

View File

@ -31,7 +31,7 @@ The following Ruby Gems are used in Discourse:
* [vestal_versions](https://rubygems.org/gems/vestal_versions)
* [coffee-rails](https://rubygems.org/gems/coffee-rails)
* [uglifier](https://rubygems.org/gems/uglifier)
* [hpricot](https://rubygems.org/gems/hpricot)
* [nokogiri](https://rubygems.org/gems/nokogiri)
* [uuidtools](https://rubygems.org/gems/uuidtools)
* [rinku](https://rubygems.org/gems/rinku)
* [ruby-openid](https://rubygems.org/gems/ruby-openid)

View File

@ -9,7 +9,7 @@ class CookedPostProcessor
@dirty = false
@opts = opts
@post = post
@doc = Hpricot(post.cooked)
@doc = Nokogiri::HTML(post.cooked)
end
def dirty?

View File

@ -34,7 +34,7 @@ module Oneboxer
if Whitelist.allowed?(url)
page_html = open(url).read
if page_html.present?
doc = Hpricot(page_html)
doc = Nokogiri::HTML(page_html)
# See if if it has an oembed thing we can use
(doc/"link[@type='application/json+oembed']").each do |oembed|
@ -56,7 +56,7 @@ module Oneboxer
# Parse URLs out of HTML, returning the document when finished.
def self.each_onebox_link(string_or_doc)
doc = string_or_doc
doc = Hpricot(doc) if doc.is_a?(String)
doc = Nokogiri::HTML(doc) if doc.is_a?(String)
onebox_links = doc.search("a.onebox")
if onebox_links.present?

View File

@ -22,19 +22,19 @@ module Oneboxer
end
def parse(data)
hp = Hpricot(data)
html_doc = Nokogiri::HTML(data)
result = {}
result[:title] = hp.at("h1")
result[:title] = html_doc.at("h1")
result[:title] = result[:title].inner_html if result[:title].present?
image = hp.at(".main-image img")
image = html_doc.at(".main-image img")
result[:image] = image['src'] if image
result[:by_info] = hp.at("#by-line")
result[:by_info] = html_doc.at("#by-line")
result[:by_info] = BaseOnebox.remove_whitespace(result[:by_info].inner_html) if result[:by_info].present?
summary = hp.at("#description-and-details-content")
summary = html_doc.at("#description-and-details-content")
result[:text] = summary.inner_html if summary.present?
result

View File

@ -12,20 +12,20 @@ module Oneboxer
def parse(data)
hp = Hpricot(data)
html_doc = Nokogiri::HTML(data)
result = {}
m = hp.at("h1.doc-banner-title")
m = html_doc.at("h1.doc-banner-title")
result[:title] = m.inner_text if m
m = hp.at("div#doc-original-text")
m = html_doc.at("div#doc-original-text")
if m
result[:text] = BaseOnebox.replace_tags_with_spaces(m.inner_html)
result[:text] = result[:text][0..MAX_TEXT]
end
m = hp.at("div.doc-banner-icon img")
m = html_doc.at("div.doc-banner-icon img")
result[:image] = m['src'] if m
result

View File

@ -17,17 +17,17 @@ module Oneboxer
def parse(data)
hp = Hpricot(data)
html_doc = Nokogiri::HTML(data)
result = {}
m = hp.at("h1")
m = html_doc.at("h1")
result[:title] = m.inner_text if m
m = hp.at("h4 ~ p")
m = html_doc.at("h4 ~ p")
result[:text] = m.inner_text[0..MAX_TEXT] if m
m = hp.at(".product img.artwork")
m = html_doc.at(".product img.artwork")
result[:image] = m['src'] if m
result

View File

@ -9,7 +9,7 @@ module Oneboxer
page_html = open(@url).read
return nil if page_html.blank?
doc = Hpricot(page_html)
doc = Nokogiri::HTML(page_html)
# Flikrs oembed just stopped returning images for no reason. Let's use opengraph instead.
open_graph = Oneboxer.parse_open_graph(doc)

View File

@ -20,23 +20,23 @@ module Oneboxer
def parse(data)
hp = Hpricot(data)
html_doc = Nokogiri::HTML(data)
result = {}
title = hp.at('title').inner_html
title = html_doc.at('title').inner_html
result[:title] = title.gsub!(/ - Wikipedia, the free encyclopedia/, '') if title.present?
# get the first image > 150 pix high
images = hp.search("img").select { |img| img['height'].to_i > 150 }
images = html_doc.search("img").select { |img| img['height'].to_i > 150 }
result[:image] = "http:#{images[0]["src"]}" unless images.empty?
# remove the table from mobile layout, as it can contain paras in some rare cases
hp.search("table").remove
html_doc.search("table").remove
# get all the paras
paras = hp.search("p")
paras = html_doc.search("p")
text = ""
unless paras.empty?

View File

@ -19,7 +19,10 @@ describe CookedPostProcessor do
end
it 'inserts the onebox' do
@cpp.html.should == "GANGNAM STYLE"
@cpp.html.should == <<EXPECTED
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
<html><body>GANGNAM STYLE</body></html>
EXPECTED
end
end

View File

@ -26,7 +26,7 @@ private
<h3><a href="http://www.amazon.com/Ruby-Programming-Language-David-Flanagan/dp/0596516177" target="_blank">The Ruby Programming Language (Paperback)</a></h3>
<h4>David Flanagan, Yukihiro Matsumoto</h4>
The Ruby Programming Language is the authoritative guide to Ruby&nbsp;...
The Ruby Programming Language is the authoritative guide to Ruby ...
</div>
<div class='clearfix'></div>

View File

@ -25,7 +25,10 @@ private
<img src="https://lh5.ggpht.com/wrYYVu74XNUu2WHk0aSZEqgdCDCNti9Fl0_dJnhgR6jY04ajQgVg5ABMatfcTDsB810=w124" class="thumbnail">
<h3><a href="https://play.google.com/store/apps/details?id=com.moosoft.parrot" target="_blank">Talking Parrot</a></h3>
Listen to the parrot repeat what you say. A Fun application for all ages. Upgrade to Talking Parrot Pro to save sounds, set them as your ringtone and control recording. Press the MENU button to access the settings where you can change the record time and repeat count. This app uses anonymous usage stats to understand and improve performance. Comments and feedback welcome.
Listen to the parrot repeat what you say. A Fun application for all ages. Upgrade to Talking Parrot Pro to save sounds, set them as your ringtone and control recording.
Press the MENU button to access the settings where you can change the record time and repeat count.
This app uses anonymous usage stats to understand and improve performance.
Comments and feedback welcome.
</div>
<div class='clearfix'></div>
</div>

View File

@ -145,16 +145,16 @@ describe Oneboxer do
it 'yields each url and element when given a string' do
result = Oneboxer.each_onebox_link(@html) do |url, element|
element.is_a?(Hpricot::Elem).should be_true
element.is_a?(Nokogiri::XML::Element).should be_true
url.should == 'http://discourse.org'
end
result.kind_of?(Hpricot::Doc).should be_true
result.kind_of?(Nokogiri::HTML::Document).should be_true
end
it 'yields each url and element when given a doc' do
doc = Hpricot(@html)
doc = Nokogiri::HTML(@html)
Oneboxer.each_onebox_link(doc) do |url, element|
element.is_a?(Hpricot::Elem).should be_true
element.is_a?(Nokogiri::XML::Element).should be_true
url.should == 'http://discourse.org'
end
end