From 08e6718722a4279588f34fec486bdaa0e7da6b4a Mon Sep 17 00:00:00 2001 From: David Taylor Date: Thu, 22 Apr 2021 18:49:21 +0100 Subject: [PATCH] FEATURE: Improve formatting for Slack transcript messages (#70) - Fix multi-line code blocks - Add strikethrough support - Fix HTML entities inside code blocks - Do not process formatting inside code blocks - Ensure links are never created with no URL - Replace - with _ in emoji names --- .../provider/slack/slack_message.rb | 43 ++++++- .../provider/slack/slack_transcript_spec.rb | 112 ++++++++++++++++++ 2 files changed, 152 insertions(+), 3 deletions(-) diff --git a/lib/discourse_chat/provider/slack/slack_message.rb b/lib/discourse_chat/provider/slack/slack_message.rb index e1b4828..7b0cbac 100644 --- a/lib/discourse_chat/provider/slack/slack_message.rb +++ b/lib/discourse_chat/provider/slack/slack_message.rb @@ -29,11 +29,27 @@ module DiscourseChat::Provider::SlackProvider def text text = @raw['text'].nil? ? "" : @raw['text'] + pre = {} + + # Extract code blocks and replace with placeholder + text = text.gsub(/```(.*?)```/m) do |match| + key = "pre:" + SecureRandom.alphanumeric(50) + pre[key] = HTMLEntities.new.decode $1 + "\n```\n#{key}\n```\n" + end + + # # Extract inline code and replace with placeholder + text = text.gsub(/(?/) do |match| group = $1 parts = group.split('|') - link = parts[0].start_with?('@', '#', '!') ? '' : parts[0] + link = parts[0].start_with?('@', '#', '!') ? nil : parts[0] text = parts.length > 1 ? parts[1] : parts[0] if parts[0].start_with?('@') @@ -46,14 +62,35 @@ module DiscourseChat::Provider::SlackProvider next "@#{user_name}" end - "[#{text}](#{link})" + if link.nil? + text + elsif link == text + "<#{link}>" + else + "[#{text}](#{link})" + end end # Add an extra * to each side for bold - text = text.gsub(/\*(.*?)\*/) do |match| + text = text.gsub(/\*.*?\*/) do |match| "*#{match}*" end + # Add an extra ~ to each side for strikethrough + text = text.gsub(/~.*?~/) do |match| + "~#{match}~" + end + + # Replace emoji - with _ + text = text.gsub(/:[a-z0-9_-]+:/) do |match| + match.gsub("-") { "_" } + end + + # Restore pre-formatted code block content + pre.each do |key, value| + text = text.gsub(key) { value } + end + text end diff --git a/spec/lib/discourse_chat/provider/slack/slack_transcript_spec.rb b/spec/lib/discourse_chat/provider/slack/slack_transcript_spec.rb index e8e1571..fb5f89f 100644 --- a/spec/lib/discourse_chat/provider/slack/slack_transcript_spec.rb +++ b/spec/lib/discourse_chat/provider/slack/slack_transcript_spec.rb @@ -333,5 +333,117 @@ RSpec.describe DiscourseChat::Provider::SlackProvider::SlackTranscript do expect(first_ui[:text]).to eq(transcript.first_message.raw_text) end end + + describe "message formatting" do + it 'handles code block newlines' do + message = DiscourseChat::Provider::SlackProvider::SlackMessage.new( + { + "type" => "message", + "user" => "U5Z773QLS", + "text" => "Here is some code```my code\nwith newline```", + "ts" => "1501093331.439776" + }, + transcript + ) + expect(message.text).to eq(<<~MD) + Here is some code + ``` + my code + with newline + ``` + MD + end + + it 'handles multiple code blocks' do + message = DiscourseChat::Provider::SlackProvider::SlackMessage.new( + { + "type" => "message", + "user" => "U5Z773QLS", + "text" => "Here is some code```my code\nwith newline```and another```some more code```", + "ts" => "1501093331.439776" + }, + transcript + ) + expect(message.text).to eq(<<~MD) + Here is some code + ``` + my code + with newline + ``` + and another + ``` + some more code + ``` + MD + end + + it 'handles strikethrough' do + message = DiscourseChat::Provider::SlackProvider::SlackMessage.new( + { + "type" => "message", + "user" => "U5Z773QLS", + "text" => "Some ~strikethrough~", + "ts" => "1501093331.439776" + }, + transcript + ) + expect(message.text).to eq("Some ~~strikethrough~~") + end + + it 'handles slack links' do + message = DiscourseChat::Provider::SlackProvider::SlackMessage.new( + { + "type" => "message", + "user" => "U5Z773QLS", + "text" => "A link to , , , <#channel>, <@user>", + "ts" => "1501093331.439776" + }, + transcript + ) + expect(message.text).to eq("A link to [google](https://google.com), , , #channel, @user") + end + + it 'does not format things inside backticks' do + message = DiscourseChat::Provider::SlackProvider::SlackMessage.new( + { + "type" => "message", + "user" => "U5Z773QLS", + "text" => "You can strikethrough like `~this~`, bold like `*this*` and link like `[https://example.com](https://example.com)`", + "ts" => "1501093331.439776" + }, + transcript + ) + expect(message.text).to eq("You can strikethrough like `~this~`, bold like `*this*` and link like `[https://example.com](https://example.com)`") + end + + it 'unescapes html in backticks' do + # Because Slack escapes HTML entities, even in backticks + message = DiscourseChat::Provider::SlackProvider::SlackMessage.new( + { + "type" => "message", + "user" => "U5Z773QLS", + "text" => "The code is `<stuff>`", + "ts" => "1501093331.439776" + }, + transcript + ) + expect(message.text).to eq("The code is ``") + end + + it 'updates emoji dashes to underscores' do + # Discourse does not allow dashes in emoji names, so this helps communities have matching custom emojis + message = DiscourseChat::Provider::SlackProvider::SlackMessage.new( + { + "type" => "message", + "user" => "U5Z773QLS", + "text" => "This is :my-emoji:", + "ts" => "1501093331.439776" + }, + transcript + ) + expect(message.text).to eq("This is :my_emoji:") + end + end + end end