FEATURE: remove dependency of Redcarpet
PERF: cache fancy_title in topics table New pure ruby implementation is far more flexible and easier to amend.
This commit is contained in:
parent
9507573d5e
commit
ffb8cb8cac
1
Gemfile
1
Gemfile
|
@ -37,7 +37,6 @@ gem 'babel-transpiler'
|
|||
gem 'message_bus'
|
||||
gem 'rails_multisite', path: 'vendor/gems/rails_multisite'
|
||||
|
||||
gem 'redcarpet', require: false
|
||||
gem 'fast_xs'
|
||||
|
||||
gem 'fast_xor'
|
||||
|
|
|
@ -320,7 +320,6 @@ GEM
|
|||
ffi (>= 1.0.6)
|
||||
msgpack (>= 0.4.3)
|
||||
trollop (>= 1.16.2)
|
||||
redcarpet (3.3.2)
|
||||
redis (3.2.1)
|
||||
redis-namespace (1.5.2)
|
||||
redis (~> 3.0, >= 3.0.4)
|
||||
|
@ -510,7 +509,6 @@ DEPENDENCIES
|
|||
rb-fsevent
|
||||
rb-inotify (~> 0.9)
|
||||
rbtrace
|
||||
redcarpet
|
||||
redis
|
||||
rest-client
|
||||
rinku
|
||||
|
|
|
@ -5,6 +5,7 @@ require_dependency 'rate_limiter'
|
|||
require_dependency 'text_sentinel'
|
||||
require_dependency 'text_cleaner'
|
||||
require_dependency 'archetype'
|
||||
require_dependency 'html_prettify'
|
||||
|
||||
class Topic < ActiveRecord::Base
|
||||
include ActionView::Helpers::SanitizeHelper
|
||||
|
@ -164,6 +165,9 @@ class Topic < ActiveRecord::Base
|
|||
cancel_auto_close_job
|
||||
ensure_topic_has_a_category
|
||||
end
|
||||
if title_changed?
|
||||
write_attribute :fancy_title, Topic.fancy_title(title)
|
||||
end
|
||||
end
|
||||
|
||||
after_save do
|
||||
|
@ -267,17 +271,28 @@ class Topic < ActiveRecord::Base
|
|||
apply_per_day_rate_limit_for("pms", :max_private_messages_per_day)
|
||||
end
|
||||
|
||||
def self.fancy_title(title)
|
||||
escaped = ERB::Util.html_escape(title)
|
||||
return unless escaped
|
||||
HtmlPrettify.render(escaped)
|
||||
end
|
||||
|
||||
def fancy_title
|
||||
sanitized_title = ERB::Util.html_escape(title)
|
||||
return ERB::Util.html_escape(title) unless SiteSetting.title_fancy_entities?
|
||||
|
||||
return unless sanitized_title
|
||||
return sanitized_title unless SiteSetting.title_fancy_entities?
|
||||
unless fancy_title = read_attribute(:fancy_title)
|
||||
|
||||
# We don't always have to require this, if fancy is disabled
|
||||
# see: http://meta.discourse.org/t/pattern-for-defer-loading-gems-and-profiling-with-perftools-rb/4629
|
||||
require 'redcarpet' unless defined? Redcarpet
|
||||
fancy_title = Topic.fancy_title(title)
|
||||
write_attribute(:fancy_title, fancy_title)
|
||||
|
||||
Redcarpet::Render::SmartyPants.render(sanitized_title)
|
||||
unless new_record?
|
||||
# make sure data is set in table, this also allows us to change algorithm
|
||||
# by simply nulling this column
|
||||
exec_sql("UPDATE topics SET fancy_title = :fancy_title where id = :id", id: self.id, fancy_title: fancy_title)
|
||||
end
|
||||
end
|
||||
|
||||
fancy_title
|
||||
end
|
||||
|
||||
def pending_posts_count
|
||||
|
@ -698,6 +713,7 @@ class Topic < ActiveRecord::Base
|
|||
def title=(t)
|
||||
slug = Slug.for(t.to_s)
|
||||
write_attribute(:slug, slug)
|
||||
write_attribute(:fancy_title, nil)
|
||||
write_attribute(:title,t)
|
||||
end
|
||||
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
class AddFancyTitleToTopic < ActiveRecord::Migration
|
||||
def change
|
||||
add_column :topics, :fancy_title, :string, limit: 400, null: true
|
||||
end
|
||||
end
|
|
@ -0,0 +1,407 @@
|
|||
# heavily based off
|
||||
# https://github.com/vmg/redcarpet/blob/master/ext/redcarpet/html_smartypants.c
|
||||
# and
|
||||
# https://github.com/jmcnevin/rubypants/blob/master/lib/rubypants/core.rb
|
||||
# 99% of the code here is by Jeremy McNevin
|
||||
#
|
||||
# This Source File is available under BSD/MIT license as well as standard GPL
|
||||
#
|
||||
|
||||
class HtmlPrettify < String
|
||||
def self.render(html)
|
||||
new(html).to_html
|
||||
end
|
||||
|
||||
# Create a new RubyPants instance with the text in +string+.
|
||||
#
|
||||
# Allowed elements in the options array:
|
||||
#
|
||||
# 0 :: do nothing
|
||||
# 1 :: enable all, using only em-dash shortcuts
|
||||
# 2 :: enable all, using old school en- and em-dash shortcuts (*default*)
|
||||
# 3 :: enable all, using inverted old school en and em-dash shortcuts
|
||||
# -1 :: stupefy (translate HTML entities to their ASCII-counterparts)
|
||||
#
|
||||
# If you don't like any of these defaults, you can pass symbols to change
|
||||
# RubyPants' behavior:
|
||||
#
|
||||
# <tt>:quotes</tt> :: quotes
|
||||
# <tt>:backticks</tt> :: backtick quotes (``double'' only)
|
||||
# <tt>:allbackticks</tt> :: backtick quotes (``double'' and `single')
|
||||
# <tt>:dashes</tt> :: dashes
|
||||
# <tt>:oldschool</tt> :: old school dashes
|
||||
# <tt>:inverted</tt> :: inverted old school dashes
|
||||
# <tt>:ellipses</tt> :: ellipses
|
||||
# <tt>:convertquotes</tt> :: convert <tt>"</tt> entities to
|
||||
# <tt>"</tt>
|
||||
# <tt>:stupefy</tt> :: translate RubyPants HTML entities
|
||||
# to their ASCII counterparts.
|
||||
#
|
||||
# In addition, you can customize the HTML entities that will be injected by
|
||||
# passing in a hash for the final argument. The defaults for these entities
|
||||
# are as follows:
|
||||
#
|
||||
# <tt>:single_left_quote</tt> :: <tt>‘</tt>
|
||||
# <tt>:double_left_quote</tt> :: <tt>“</tt>
|
||||
# <tt>:single_right_quote</tt> :: <tt>’</tt>
|
||||
# <tt>:double_right_quote</tt> :: <tt>”</tt>
|
||||
# <tt>:em_dash</tt> :: <tt>—</tt>
|
||||
# <tt>:en_dash</tt> :: <tt>–</tt>
|
||||
# <tt>:ellipsis</tt> :: <tt>…</tt>
|
||||
# <tt>:html_quote</tt> :: <tt>" </tt>
|
||||
#
|
||||
def initialize(string, options=[2], entities = {})
|
||||
super string
|
||||
|
||||
@options = [*options]
|
||||
@entities = default_entities.update(entities)
|
||||
end
|
||||
|
||||
# Apply SmartyPants transformations.
|
||||
def to_html
|
||||
do_quotes = do_backticks = do_dashes = do_ellipses = nil
|
||||
|
||||
if @options.include?(0)
|
||||
# Do nothing.
|
||||
return self
|
||||
elsif @options.include?(1)
|
||||
# Do everything, turn all options on.
|
||||
do_quotes = do_backticks = do_ellipses = true
|
||||
do_dashes = :normal
|
||||
elsif @options.include?(2)
|
||||
# Do everything, turn all options on, use old school dash shorthand.
|
||||
do_quotes = do_backticks = do_ellipses = true
|
||||
do_dashes = :oldschool
|
||||
elsif @options.include?(3)
|
||||
# Do everything, turn all options on, use inverted old school
|
||||
# dash shorthand.
|
||||
do_quotes = do_backticks = do_ellipses = true
|
||||
do_dashes = :inverted
|
||||
elsif @options.include?(-1)
|
||||
do_stupefy = true
|
||||
else
|
||||
do_quotes = @options.include?(:quotes)
|
||||
do_backticks = @options.include?(:backticks)
|
||||
do_backticks = :both if @options.include?(:allbackticks)
|
||||
do_dashes = :normal if @options.include?(:dashes)
|
||||
do_dashes = :oldschool if @options.include?(:oldschool)
|
||||
do_dashes = :inverted if @options.include?(:inverted)
|
||||
do_ellipses = @options.include?(:ellipses)
|
||||
do_stupefy = @options.include?(:stupefy)
|
||||
end
|
||||
|
||||
# Parse the HTML
|
||||
tokens = tokenize
|
||||
|
||||
# Keep track of when we're inside <pre> or <code> tags.
|
||||
in_pre = false
|
||||
|
||||
# Here is the result stored in.
|
||||
result = ""
|
||||
|
||||
# This is a cheat, used to get some context for one-character
|
||||
# tokens that consist of just a quote char. What we do is remember
|
||||
# the last character of the previous text token, to use as context
|
||||
# to curl single- character quote tokens correctly.
|
||||
prev_token_last_char = nil
|
||||
|
||||
tokens.each do |token|
|
||||
if token.first == :tag
|
||||
result << token[1]
|
||||
if token[1] =~ %r!<(/?)(?:pre|code|kbd|script|math)[\s>]!
|
||||
in_pre = ($1 != "/") # Opening or closing tag?
|
||||
end
|
||||
else
|
||||
t = token[1]
|
||||
|
||||
# Remember last char of this token before processing.
|
||||
last_char = t[-1].chr
|
||||
|
||||
unless in_pre
|
||||
|
||||
t.gsub!("'", "'")
|
||||
|
||||
t = process_escapes t
|
||||
|
||||
t.gsub!(""", '"')
|
||||
|
||||
if do_dashes
|
||||
t = educate_dashes t if do_dashes == :normal
|
||||
t = educate_dashes_oldschool t if do_dashes == :oldschool
|
||||
t = educate_dashes_inverted t if do_dashes == :inverted
|
||||
end
|
||||
|
||||
t = educate_ellipses t if do_ellipses
|
||||
|
||||
t = educate_fractions t
|
||||
|
||||
# Note: backticks need to be processed before quotes.
|
||||
if do_backticks
|
||||
t = educate_backticks t
|
||||
t = educate_single_backticks t if do_backticks == :both
|
||||
end
|
||||
|
||||
if do_quotes
|
||||
if t == "'"
|
||||
# Special case: single-character ' token
|
||||
if prev_token_last_char =~ /\S/
|
||||
t = entity(:single_right_quote)
|
||||
else
|
||||
t = entity(:single_left_quote)
|
||||
end
|
||||
elsif t == '"'
|
||||
# Special case: single-character " token
|
||||
if prev_token_last_char =~ /\S/
|
||||
t = entity(:double_right_quote)
|
||||
else
|
||||
t = entity(:double_left_quote)
|
||||
end
|
||||
else
|
||||
# Normal case:
|
||||
t = educate_quotes t
|
||||
end
|
||||
end
|
||||
|
||||
t = stupefy_entities t if do_stupefy
|
||||
end
|
||||
|
||||
prev_token_last_char = last_char
|
||||
result << t
|
||||
end
|
||||
end
|
||||
|
||||
# Done
|
||||
result
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
# Return the string, with after processing the following backslash
|
||||
# escape sequences. This is useful if you want to force a "dumb" quote
|
||||
# or other character to appear.
|
||||
#
|
||||
# Escaped are:
|
||||
# \\ \" \' \. \- \`
|
||||
#
|
||||
def process_escapes(str)
|
||||
str = str.gsub('\\\\', '\')
|
||||
str.gsub!('\"', '"')
|
||||
str.gsub!("\\\'", ''')
|
||||
str.gsub!('\.', '.')
|
||||
str.gsub!('\-', '-')
|
||||
str.gsub!('\`', '`')
|
||||
str
|
||||
end
|
||||
|
||||
# The string, with each instance of "<tt>--</tt>" translated to an
|
||||
# em-dash HTML entity.
|
||||
#
|
||||
def educate_dashes(str)
|
||||
str.
|
||||
gsub(/--/, entity(:em_dash))
|
||||
end
|
||||
|
||||
# The string, with each instance of "<tt>--</tt>" translated to an
|
||||
# en-dash HTML entity, and each "<tt>---</tt>" translated to an
|
||||
# em-dash HTML entity.
|
||||
#
|
||||
def educate_dashes_oldschool(str)
|
||||
str.
|
||||
gsub(/---/, entity(:em_dash)).
|
||||
gsub(/--/, entity(:en_dash))
|
||||
end
|
||||
|
||||
# Return the string, with each instance of "<tt>--</tt>" translated
|
||||
# to an em-dash HTML entity, and each "<tt>---</tt>" translated to
|
||||
# an en-dash HTML entity. Two reasons why: First, unlike the en- and
|
||||
# em-dash syntax supported by +educate_dashes_oldschool+, it's
|
||||
# compatible with existing entries written before SmartyPants 1.1,
|
||||
# back when "<tt>--</tt>" was only used for em-dashes. Second,
|
||||
# em-dashes are more common than en-dashes, and so it sort of makes
|
||||
# sense that the shortcut should be shorter to type. (Thanks to
|
||||
# Aaron Swartz for the idea.)
|
||||
#
|
||||
def educate_dashes_inverted(str)
|
||||
str.
|
||||
gsub(/---/, entity(:en_dash)).
|
||||
gsub(/--/, entity(:em_dash))
|
||||
end
|
||||
|
||||
# Return the string, with each instance of "<tt>...</tt>" translated
|
||||
# to an ellipsis HTML entity. Also converts the case where there are
|
||||
# spaces between the dots.
|
||||
#
|
||||
def educate_ellipses(str)
|
||||
str.
|
||||
gsub('...', entity(:ellipsis)).
|
||||
gsub('. . .', entity(:ellipsis))
|
||||
end
|
||||
|
||||
# Return the string, with "<tt>``backticks''</tt>"-style single quotes
|
||||
# translated into HTML curly quote entities.
|
||||
#
|
||||
def educate_backticks(str)
|
||||
str.
|
||||
gsub("``", entity(:double_left_quote)).
|
||||
gsub("''", entity(:double_right_quote))
|
||||
end
|
||||
|
||||
# Return the string, with "<tt>`backticks'</tt>"-style single quotes
|
||||
# translated into HTML curly quote entities.
|
||||
#
|
||||
def educate_single_backticks(str)
|
||||
str.
|
||||
gsub("`", entity(:single_left_quote)).
|
||||
gsub("'", entity(:single_right_quote))
|
||||
end
|
||||
|
||||
def educate_fractions(str)
|
||||
str.gsub(/(\s+|^)(1\/4|1\/2|3\/4)([,.;\s]|$)/) do
|
||||
frac =
|
||||
if $2 == "1/2".freeze
|
||||
entity(:frac12)
|
||||
elsif $2 == "1/4".freeze
|
||||
entity(:frac14)
|
||||
elsif $2 == "3/4".freeze
|
||||
entity(:frac34)
|
||||
end
|
||||
"#{$1}#{frac}#{$3}"
|
||||
end
|
||||
end
|
||||
|
||||
# Return the string, with "educated" curly quote HTML entities.
|
||||
#
|
||||
def educate_quotes(str)
|
||||
punct_class = '[!"#\$\%\'()*+,\-.\/:;<=>?\@\[\\\\\]\^_`{|}~]'
|
||||
|
||||
# normalize html
|
||||
str = str.dup
|
||||
# Special case if the very first character is a quote followed by
|
||||
# punctuation at a non-word-break. Close the quotes by brute
|
||||
# force:
|
||||
str.gsub!(/^'(?=#{punct_class}\B)/,
|
||||
entity(:single_right_quote))
|
||||
str.gsub!(/^"(?=#{punct_class}\B)/,
|
||||
entity(:double_right_quote))
|
||||
|
||||
# Special case for double sets of quotes, e.g.:
|
||||
# <p>He said, "'Quoted' words in a larger quote."</p>
|
||||
str.gsub!(/"'(?=\w)/,
|
||||
"#{entity(:double_left_quote)}#{entity(:single_left_quote)}")
|
||||
str.gsub!(/'"(?=\w)/,
|
||||
"#{entity(:single_left_quote)}#{entity(:double_left_quote)}")
|
||||
|
||||
# Special case for decade abbreviations (the '80s):
|
||||
str.gsub!(/'(?=\d\ds)/,
|
||||
entity(:single_right_quote))
|
||||
|
||||
close_class = %![^\ \t\r\n\\[\{\(\-]!
|
||||
dec_dashes = "#{entity(:en_dash)}|#{entity(:em_dash)}"
|
||||
|
||||
# Get most opening single quotes:
|
||||
str.gsub!(/(\s| |=|--|&[mn]dash;|#{dec_dashes}|ȁ[34];)'(?=\w)/,
|
||||
'\1' + entity(:single_left_quote))
|
||||
|
||||
# Single closing quotes:
|
||||
str.gsub!(/(#{close_class})'/,
|
||||
'\1' + entity(:single_right_quote))
|
||||
str.gsub!(/'(\s|s\b|$)/,
|
||||
entity(:single_right_quote) + '\1')
|
||||
|
||||
# Any remaining single quotes should be opening ones:
|
||||
str.gsub!(/'/,
|
||||
entity(:single_left_quote))
|
||||
|
||||
# Get most opening double quotes:
|
||||
str.gsub!(/(\s| |=|--|&[mn]dash;|#{dec_dashes}|ȁ[34];)"(?=\w)/,
|
||||
'\1' + entity(:double_left_quote))
|
||||
|
||||
# Double closing quotes:
|
||||
str.gsub!(/(#{close_class})"/,
|
||||
'\1' + entity(:double_right_quote))
|
||||
str.gsub!(/"(\s|s\b|$)/,
|
||||
entity(:double_right_quote) + '\1')
|
||||
|
||||
# Any remaining quotes should be opening ones:
|
||||
str.gsub!(/"/,
|
||||
entity(:double_left_quote))
|
||||
|
||||
str
|
||||
end
|
||||
|
||||
# Return the string, with each RubyPants HTML entity translated to
|
||||
# its ASCII counterpart.
|
||||
#
|
||||
# Note: This is not reversible (but exactly the same as in SmartyPants)
|
||||
#
|
||||
def stupefy_entities(str)
|
||||
new_str = str.dup
|
||||
|
||||
{
|
||||
:en_dash => '-',
|
||||
:em_dash => '--',
|
||||
:single_left_quote => "'",
|
||||
:single_right_quote => "'",
|
||||
:double_left_quote => '"',
|
||||
:double_right_quote => '"',
|
||||
:ellipsis => '...'
|
||||
}.each do |k,v|
|
||||
new_str.gsub!(/#{entity(k)}/, v)
|
||||
end
|
||||
|
||||
new_str
|
||||
end
|
||||
|
||||
# Return an array of the tokens comprising the string. Each token is
|
||||
# either a tag (possibly with nested, tags contained therein, such
|
||||
# as <tt><a href="<MTFoo>"></tt>, or a run of text between
|
||||
# tags. Each element of the array is a two-element array; the first
|
||||
# is either :tag or :text; the second is the actual value.
|
||||
#
|
||||
# Based on the <tt>_tokenize()</tt> subroutine from Brad Choate's
|
||||
# MTRegex plugin. <http://www.bradchoate.com/past/mtregex.php>
|
||||
#
|
||||
# This is actually the easier variant using tag_soup, as used by
|
||||
# Chad Miller in the Python port of SmartyPants.
|
||||
#
|
||||
def tokenize
|
||||
tag_soup = /([^<]*)(<[^>]*>)/
|
||||
|
||||
tokens = []
|
||||
|
||||
prev_end = 0
|
||||
|
||||
scan(tag_soup) do
|
||||
tokens << [:text, $1] if $1 != ""
|
||||
tokens << [:tag, $2]
|
||||
prev_end = $~.end(0)
|
||||
end
|
||||
|
||||
if prev_end < size
|
||||
tokens << [:text, self[prev_end..-1]]
|
||||
end
|
||||
|
||||
tokens
|
||||
end
|
||||
|
||||
def default_entities
|
||||
{
|
||||
single_left_quote: "‘",
|
||||
double_left_quote: "“",
|
||||
single_right_quote: "’",
|
||||
double_right_quote: "”",
|
||||
em_dash: "—",
|
||||
en_dash: "–",
|
||||
ellipsis: "…",
|
||||
html_quote: """,
|
||||
frac12: "½",
|
||||
frac14: "¼",
|
||||
frac34: "¾",
|
||||
}
|
||||
end
|
||||
|
||||
def entity(key)
|
||||
@entities[key]
|
||||
end
|
||||
|
||||
end
|
|
@ -0,0 +1,30 @@
|
|||
require 'spec_helper'
|
||||
require 'html_prettify'
|
||||
|
||||
describe HtmlPrettify do
|
||||
|
||||
def t(source, expected)
|
||||
expect(HtmlPrettify.render(source)).to eq(expected)
|
||||
end
|
||||
|
||||
it 'correctly prettifies html' do
|
||||
t "<p>All's well!</p>", "<p>All’s well!</p>"
|
||||
t "<p>Eatin' Lunch'.</p>", "<p>Eatin’ Lunch’.</p>"
|
||||
t "<p>a 1/4. is a fraction but not 1/4/2000</p>", "<p>a ¼. is a fraction but not 1/4/2000</p>"
|
||||
t "<p>Well that'll be the day</p>", "<p>Well that’ll be the day</p>"
|
||||
t %(<p>"Quoted text"</p>), %(<p>“Quoted text”</p>)
|
||||
t "<p>I've been meaning to tell you ..</p>", "<p>I’ve been meaning to tell you ..</p>"
|
||||
t "<p>single `backticks` in HTML should be preserved</p>", "<p>single `backticks` in HTML should be preserved</p>"
|
||||
t "<p>double hyphen -- ndash --- mdash</p>", "<p>double hyphen – ndash — mdash</p>"
|
||||
t "a long time ago...", "a long time ago…"
|
||||
t "is 'this a mistake'?", "is ‘this a mistake’?"
|
||||
t ERB::Util.html_escape("'that went well'"), "‘that went well’"
|
||||
t '"that went well"', "“that went well”"
|
||||
t ERB::Util.html_escape('"that went well"'), "“that went well”"
|
||||
|
||||
t 'src="test.png"> yay', "src=“test.png”> yay"
|
||||
|
||||
t ERB::Util.html_escape('<img src="test.png"> yay'), "<img src=“test.png”> yay"
|
||||
end
|
||||
|
||||
end
|
|
@ -214,7 +214,7 @@ describe Topic do
|
|||
|
||||
context 'title_fancy_entities disabled' do
|
||||
before do
|
||||
SiteSetting.stubs(:title_fancy_entities).returns(false)
|
||||
SiteSetting.title_fancy_entities = false
|
||||
end
|
||||
|
||||
it "doesn't add entities to the title" do
|
||||
|
@ -224,11 +224,26 @@ describe Topic do
|
|||
|
||||
context 'title_fancy_entities enabled' do
|
||||
before do
|
||||
SiteSetting.stubs(:title_fancy_entities).returns(true)
|
||||
SiteSetting.title_fancy_entities = true
|
||||
end
|
||||
|
||||
it "converts the title to have fancy entities" do
|
||||
it "converts the title to have fancy entities and updates" do
|
||||
expect(topic.fancy_title).to eq("“this topic” – has “fancy stuff”")
|
||||
topic.title = "this is my test hello world... yay"
|
||||
topic.user.save!
|
||||
topic.save!
|
||||
topic.reload
|
||||
expect(topic.fancy_title).to eq("This is my test hello world… yay")
|
||||
|
||||
topic.title = "I made a change to the title"
|
||||
topic.save!
|
||||
|
||||
topic.reload
|
||||
expect(topic.fancy_title).to eq("I made a change to the title")
|
||||
|
||||
# another edge case
|
||||
topic.title = "this is another edge case"
|
||||
expect(topic.fancy_title).to eq("this is another edge case")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
Loading…
Reference in New Issue