FIX: don't extract divs with a 'gmail_default' class
This commit is contained in:
parent
50a2508604
commit
2d561a0422
|
@ -296,7 +296,7 @@ module Email
|
||||||
end
|
end
|
||||||
|
|
||||||
HTML_EXTRACTERS ||= [
|
HTML_EXTRACTERS ||= [
|
||||||
[:gmail, / class="gmail_/],
|
[:gmail, /class="gmail_(?!default)/],
|
||||||
[:outlook, /id="(divRplyFwdMsg|Signature)"/],
|
[:outlook, /id="(divRplyFwdMsg|Signature)"/],
|
||||||
[:word, /class="WordSection1"/],
|
[:word, /class="WordSection1"/],
|
||||||
[:exchange, /name="message(Body|Reply)Section"/],
|
[:exchange, /name="message(Body|Reply)Section"/],
|
||||||
|
@ -309,8 +309,8 @@ module Email
|
||||||
|
|
||||||
def extract_from_gmail(doc)
|
def extract_from_gmail(doc)
|
||||||
# GMail adds a bunch of 'gmail_' prefixed classes like: gmail_signature, gmail_extra, gmail_quote
|
# GMail adds a bunch of 'gmail_' prefixed classes like: gmail_signature, gmail_extra, gmail_quote
|
||||||
# Just elide them all
|
# Just elide them all except for 'gmail_default'
|
||||||
elided = doc.css("*[class^='gmail_']").remove
|
elided = doc.css("*[class^='gmail_']:not([class*='gmail_default'])").remove
|
||||||
to_markdown(doc.to_html, elided.to_html)
|
to_markdown(doc.to_html, elided.to_html)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue