Use rchardet instead of charlock_holmes gem
This commit is contained in:
parent
5d421fb946
commit
a115aae45f
2
Gemfile
2
Gemfile
|
@ -180,7 +180,7 @@ gem 'rqrcode'
|
|||
|
||||
gem 'sshkey', require: false
|
||||
|
||||
gem 'charlock_holmes', require: false
|
||||
gem 'rchardet', require: false
|
||||
|
||||
if ENV["IMPORT"] == "1"
|
||||
gem 'mysql2'
|
||||
|
|
|
@ -75,7 +75,6 @@ GEM
|
|||
uniform_notifier (~> 1.11.0)
|
||||
byebug (10.0.2)
|
||||
certified (1.0.0)
|
||||
charlock_holmes (0.7.6)
|
||||
chunky_png (1.3.10)
|
||||
claide (1.0.2)
|
||||
claide-plugins (0.9.2)
|
||||
|
@ -321,6 +320,7 @@ GEM
|
|||
ffi (>= 1.0.6)
|
||||
msgpack (>= 0.4.3)
|
||||
trollop (>= 1.16.2)
|
||||
rchardet (1.8.0)
|
||||
redis (4.0.1)
|
||||
redis-namespace (1.6.0)
|
||||
redis (>= 3.0.4)
|
||||
|
@ -457,7 +457,6 @@ DEPENDENCIES
|
|||
bullet
|
||||
byebug
|
||||
certified
|
||||
charlock_holmes
|
||||
cppjieba_rb
|
||||
danger
|
||||
discourse_image_optim
|
||||
|
@ -523,6 +522,7 @@ DEPENDENCIES
|
|||
rb-fsevent
|
||||
rb-inotify (~> 0.9)
|
||||
rbtrace
|
||||
rchardet
|
||||
redis
|
||||
redis-namespace
|
||||
rinku
|
||||
|
|
|
@ -90,7 +90,7 @@ module Jobs
|
|||
def parsed_feed
|
||||
raw_feed, encoding = fetch_rss
|
||||
encoded_feed = Encodings.try_utf8(raw_feed, encoding) if encoding
|
||||
encoded_feed = Encodings.to_utf8(raw_feed, encoding_hint: encoding) unless encoded_feed
|
||||
encoded_feed = Encodings.to_utf8(raw_feed) unless encoded_feed
|
||||
|
||||
return nil if encoded_feed.blank?
|
||||
|
||||
|
|
|
@ -1,20 +1,12 @@
|
|||
require 'charlock_holmes'
|
||||
require 'rchardet'
|
||||
|
||||
module Encodings
|
||||
BINARY_SCAN_LENGTH = 0
|
||||
def self.to_utf8(string)
|
||||
result = CharDet.detect(string)
|
||||
|
||||
def self.to_utf8(string, encoding_hint: nil, delete_bom: true)
|
||||
detector = CharlockHolmes::EncodingDetector.new(BINARY_SCAN_LENGTH)
|
||||
result = detector.detect(string, encoding_hint&.to_s)
|
||||
|
||||
if result && result[:encoding]
|
||||
string = CharlockHolmes::Converter.convert(string, result[:encoding], Encoding::UTF_8.name)
|
||||
else
|
||||
string = string.encode(Encoding::UTF_8, undef: :replace, invalid: :replace, replace: '')
|
||||
end
|
||||
|
||||
delete_bom!(string) if delete_bom
|
||||
string
|
||||
encoded_string = try_utf8(string, result['encoding']) if result && result['encoding']
|
||||
encoded_string = force_utf8(string) if encoded_string.nil?
|
||||
encoded_string
|
||||
end
|
||||
|
||||
def self.try_utf8(string, source_encoding)
|
||||
|
@ -26,6 +18,14 @@ module Encodings
|
|||
nil
|
||||
end
|
||||
|
||||
def self.force_utf8(string)
|
||||
encoded_string = string.encode(Encoding::UTF_8,
|
||||
undef: :replace,
|
||||
invalid: :replace,
|
||||
replace: '')
|
||||
delete_bom!(encoded_string)
|
||||
end
|
||||
|
||||
def self.delete_bom!(string)
|
||||
string.sub!(/\A\xEF\xBB\xBF/, '') unless string.blank?
|
||||
string
|
||||
|
|
|
@ -1,9 +1,9 @@
|
|||
require 'rails_helper'
|
||||
|
||||
describe Encodings do
|
||||
def to_utf8(filename, encoding_hint = nil)
|
||||
def to_utf8(filename)
|
||||
string = File.read("#{Rails.root}/spec/fixtures/encodings/#{filename}").chomp
|
||||
Encodings.to_utf8(string, encoding_hint: encoding_hint)
|
||||
Encodings.to_utf8(string)
|
||||
end
|
||||
|
||||
context "unicode" do
|
||||
|
|
Loading…
Reference in New Issue