FIX: don't extract divs with a 'gmail_default' class

2018-05-03 12:29:21 +02:00 · 2018-05-03 12:29:21 +02:00 · 2d561a0422
parent 50a2508604
commit 2d561a0422
1 changed files with 10 additions and 10 deletions
--- a/lib/email/receiver.rb
+++ b/lib/email/receiver.rb
@ -296,21 +296,21 @@ module Email
    end

    HTML_EXTRACTERS ||= [
-      [:gmail, / class="gmail_/],
-      [:outlook, / id="(divRplyFwdMsg|Signature)"/],
-      [:word, / class="WordSection1"/],
-      [:exchange, / name="message(Body|Reply)Section"/],
-      [:apple_mail, / id="AppleMailSignature"/],
-      [:mozilla, / class="moz-/],
-      [:protonmail, / class="protonmail_/],
-      [:zimbra, / data-marker="__/],
+      [:gmail, /class="gmail_(?!default)/],
+      [:outlook, /id="(divRplyFwdMsg|Signature)"/],
+      [:word, /class="WordSection1"/],
+      [:exchange, /name="message(Body|Reply)Section"/],
+      [:apple_mail, /id="AppleMailSignature"/],
+      [:mozilla, /class="moz-/],
+      [:protonmail, /class="protonmail_/],
+      [:zimbra, /data-marker="__/],
      [:newton, /(id|class)="cm_/],
    ]

    def extract_from_gmail(doc)
      # GMail adds a bunch of 'gmail_' prefixed classes like: gmail_signature, gmail_extra, gmail_quote
-      # Just elide them all
-      elided = doc.css("*[class^='gmail_']").remove
+      # Just elide them all except for 'gmail_default'
+      elided = doc.css("*[class^='gmail_']:not([class*='gmail_default'])").remove
      to_markdown(doc.to_html, elided.to_html)
    end