Replace Hpricot with Nokogiri
This commit is contained in:
parent
84a167725d
commit
6995e75d41
1
Gemfile
1
Gemfile
|
@ -20,7 +20,6 @@ gem 'fastimage'
|
|||
gem 'fog', require: false
|
||||
gem 'has_ip_address'
|
||||
gem 'hiredis'
|
||||
gem 'hpricot'
|
||||
gem 'i18n-js'
|
||||
gem 'jquery-rails'
|
||||
gem 'multi_json'
|
||||
|
|
|
@ -192,7 +192,6 @@ GEM
|
|||
highline (1.6.15)
|
||||
hike (1.2.1)
|
||||
hiredis (0.4.5)
|
||||
hpricot (0.8.6)
|
||||
httpauth (0.2.0)
|
||||
i18n (0.6.1)
|
||||
i18n-js (2.1.2)
|
||||
|
@ -464,7 +463,6 @@ DEPENDENCIES
|
|||
guard-spork
|
||||
has_ip_address
|
||||
hiredis
|
||||
hpricot
|
||||
i18n-js
|
||||
image_optim
|
||||
jasminerice
|
||||
|
|
|
@ -4,7 +4,6 @@ require_dependency 'rate_limiter'
|
|||
require_dependency 'post_revisor'
|
||||
|
||||
require 'archetype'
|
||||
require 'hpricot'
|
||||
require 'digest/sha1'
|
||||
|
||||
class Post < ActiveRecord::Base
|
||||
|
|
|
@ -31,7 +31,7 @@ The following Ruby Gems are used in Discourse:
|
|||
* [vestal_versions](https://rubygems.org/gems/vestal_versions)
|
||||
* [coffee-rails](https://rubygems.org/gems/coffee-rails)
|
||||
* [uglifier](https://rubygems.org/gems/uglifier)
|
||||
* [hpricot](https://rubygems.org/gems/hpricot)
|
||||
* [nokogiri](https://rubygems.org/gems/nokogiri)
|
||||
* [uuidtools](https://rubygems.org/gems/uuidtools)
|
||||
* [rinku](https://rubygems.org/gems/rinku)
|
||||
* [ruby-openid](https://rubygems.org/gems/ruby-openid)
|
||||
|
|
|
@ -9,7 +9,7 @@ class CookedPostProcessor
|
|||
@dirty = false
|
||||
@opts = opts
|
||||
@post = post
|
||||
@doc = Hpricot(post.cooked)
|
||||
@doc = Nokogiri::HTML(post.cooked)
|
||||
end
|
||||
|
||||
def dirty?
|
||||
|
|
|
@ -34,7 +34,7 @@ module Oneboxer
|
|||
if Whitelist.allowed?(url)
|
||||
page_html = open(url).read
|
||||
if page_html.present?
|
||||
doc = Hpricot(page_html)
|
||||
doc = Nokogiri::HTML(page_html)
|
||||
|
||||
# See if if it has an oembed thing we can use
|
||||
(doc/"link[@type='application/json+oembed']").each do |oembed|
|
||||
|
@ -56,7 +56,7 @@ module Oneboxer
|
|||
# Parse URLs out of HTML, returning the document when finished.
|
||||
def self.each_onebox_link(string_or_doc)
|
||||
doc = string_or_doc
|
||||
doc = Hpricot(doc) if doc.is_a?(String)
|
||||
doc = Nokogiri::HTML(doc) if doc.is_a?(String)
|
||||
|
||||
onebox_links = doc.search("a.onebox")
|
||||
if onebox_links.present?
|
||||
|
|
|
@ -22,19 +22,19 @@ module Oneboxer
|
|||
end
|
||||
|
||||
def parse(data)
|
||||
hp = Hpricot(data)
|
||||
html_doc = Nokogiri::HTML(data)
|
||||
|
||||
result = {}
|
||||
result[:title] = hp.at("h1")
|
||||
result[:title] = html_doc.at("h1")
|
||||
result[:title] = result[:title].inner_html if result[:title].present?
|
||||
|
||||
image = hp.at(".main-image img")
|
||||
image = html_doc.at(".main-image img")
|
||||
result[:image] = image['src'] if image
|
||||
|
||||
result[:by_info] = hp.at("#by-line")
|
||||
result[:by_info] = html_doc.at("#by-line")
|
||||
result[:by_info] = BaseOnebox.remove_whitespace(result[:by_info].inner_html) if result[:by_info].present?
|
||||
|
||||
summary = hp.at("#description-and-details-content")
|
||||
summary = html_doc.at("#description-and-details-content")
|
||||
result[:text] = summary.inner_html if summary.present?
|
||||
|
||||
result
|
||||
|
|
|
@ -12,20 +12,20 @@ module Oneboxer
|
|||
|
||||
def parse(data)
|
||||
|
||||
hp = Hpricot(data)
|
||||
html_doc = Nokogiri::HTML(data)
|
||||
|
||||
result = {}
|
||||
|
||||
m = hp.at("h1.doc-banner-title")
|
||||
m = html_doc.at("h1.doc-banner-title")
|
||||
result[:title] = m.inner_text if m
|
||||
|
||||
m = hp.at("div#doc-original-text")
|
||||
m = html_doc.at("div#doc-original-text")
|
||||
if m
|
||||
result[:text] = BaseOnebox.replace_tags_with_spaces(m.inner_html)
|
||||
result[:text] = result[:text][0..MAX_TEXT]
|
||||
end
|
||||
|
||||
m = hp.at("div.doc-banner-icon img")
|
||||
m = html_doc.at("div.doc-banner-icon img")
|
||||
result[:image] = m['src'] if m
|
||||
|
||||
result
|
||||
|
|
|
@ -17,17 +17,17 @@ module Oneboxer
|
|||
|
||||
def parse(data)
|
||||
|
||||
hp = Hpricot(data)
|
||||
html_doc = Nokogiri::HTML(data)
|
||||
|
||||
result = {}
|
||||
|
||||
m = hp.at("h1")
|
||||
m = html_doc.at("h1")
|
||||
result[:title] = m.inner_text if m
|
||||
|
||||
m = hp.at("h4 ~ p")
|
||||
m = html_doc.at("h4 ~ p")
|
||||
result[:text] = m.inner_text[0..MAX_TEXT] if m
|
||||
|
||||
m = hp.at(".product img.artwork")
|
||||
m = html_doc.at(".product img.artwork")
|
||||
result[:image] = m['src'] if m
|
||||
|
||||
result
|
||||
|
|
|
@ -9,7 +9,7 @@ module Oneboxer
|
|||
|
||||
page_html = open(@url).read
|
||||
return nil if page_html.blank?
|
||||
doc = Hpricot(page_html)
|
||||
doc = Nokogiri::HTML(page_html)
|
||||
|
||||
# Flikrs oembed just stopped returning images for no reason. Let's use opengraph instead.
|
||||
open_graph = Oneboxer.parse_open_graph(doc)
|
||||
|
|
|
@ -20,23 +20,23 @@ module Oneboxer
|
|||
|
||||
def parse(data)
|
||||
|
||||
hp = Hpricot(data)
|
||||
html_doc = Nokogiri::HTML(data)
|
||||
|
||||
result = {}
|
||||
|
||||
title = hp.at('title').inner_html
|
||||
title = html_doc.at('title').inner_html
|
||||
result[:title] = title.gsub!(/ - Wikipedia, the free encyclopedia/, '') if title.present?
|
||||
|
||||
# get the first image > 150 pix high
|
||||
images = hp.search("img").select { |img| img['height'].to_i > 150 }
|
||||
images = html_doc.search("img").select { |img| img['height'].to_i > 150 }
|
||||
|
||||
result[:image] = "http:#{images[0]["src"]}" unless images.empty?
|
||||
|
||||
# remove the table from mobile layout, as it can contain paras in some rare cases
|
||||
hp.search("table").remove
|
||||
html_doc.search("table").remove
|
||||
|
||||
# get all the paras
|
||||
paras = hp.search("p")
|
||||
paras = html_doc.search("p")
|
||||
text = ""
|
||||
|
||||
unless paras.empty?
|
||||
|
|
|
@ -19,7 +19,10 @@ describe CookedPostProcessor do
|
|||
end
|
||||
|
||||
it 'inserts the onebox' do
|
||||
@cpp.html.should == "GANGNAM STYLE"
|
||||
@cpp.html.should == <<EXPECTED
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
||||
<html><body>GANGNAM STYLE</body></html>
|
||||
EXPECTED
|
||||
end
|
||||
|
||||
end
|
||||
|
|
|
@ -26,7 +26,7 @@ private
|
|||
<h3><a href="http://www.amazon.com/Ruby-Programming-Language-David-Flanagan/dp/0596516177" target="_blank">The Ruby Programming Language (Paperback)</a></h3>
|
||||
<h4>David Flanagan, Yukihiro Matsumoto</h4>
|
||||
|
||||
The Ruby Programming Language is the authoritative guide to Ruby ...
|
||||
The Ruby Programming Language is the authoritative guide to Ruby ...
|
||||
|
||||
</div>
|
||||
<div class='clearfix'></div>
|
||||
|
|
|
@ -25,7 +25,10 @@ private
|
|||
<img src="https://lh5.ggpht.com/wrYYVu74XNUu2WHk0aSZEqgdCDCNti9Fl0_dJnhgR6jY04ajQgVg5ABMatfcTDsB810=w124" class="thumbnail">
|
||||
<h3><a href="https://play.google.com/store/apps/details?id=com.moosoft.parrot" target="_blank">Talking Parrot</a></h3>
|
||||
|
||||
Listen to the parrot repeat what you say. A Fun application for all ages. Upgrade to Talking Parrot Pro to save sounds, set them as your ringtone and control recording. Press the MENU button to access the settings where you can change the record time and repeat count. This app uses anonymous usage stats to understand and improve performance. Comments and feedback welcome.
|
||||
Listen to the parrot repeat what you say. A Fun application for all ages. Upgrade to Talking Parrot Pro to save sounds, set them as your ringtone and control recording.
|
||||
Press the MENU button to access the settings where you can change the record time and repeat count.
|
||||
This app uses anonymous usage stats to understand and improve performance.
|
||||
Comments and feedback welcome.
|
||||
</div>
|
||||
<div class='clearfix'></div>
|
||||
</div>
|
||||
|
|
|
@ -145,16 +145,16 @@ describe Oneboxer do
|
|||
|
||||
it 'yields each url and element when given a string' do
|
||||
result = Oneboxer.each_onebox_link(@html) do |url, element|
|
||||
element.is_a?(Hpricot::Elem).should be_true
|
||||
element.is_a?(Nokogiri::XML::Element).should be_true
|
||||
url.should == 'http://discourse.org'
|
||||
end
|
||||
result.kind_of?(Hpricot::Doc).should be_true
|
||||
result.kind_of?(Nokogiri::HTML::Document).should be_true
|
||||
end
|
||||
|
||||
it 'yields each url and element when given a doc' do
|
||||
doc = Hpricot(@html)
|
||||
doc = Nokogiri::HTML(@html)
|
||||
Oneboxer.each_onebox_link(doc) do |url, element|
|
||||
element.is_a?(Hpricot::Elem).should be_true
|
||||
element.is_a?(Nokogiri::XML::Element).should be_true
|
||||
url.should == 'http://discourse.org'
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue