FEATURE: HTML to Markdown conversion using native JavaScript ES6 classes (#5425)
This commit is contained in:
parent
0a863dd031
commit
f0497ee9c4
|
@ -9,6 +9,7 @@ import { emojiUrlFor } from 'discourse/lib/text';
|
|||
import { getRegister } from 'discourse-common/lib/get-owner';
|
||||
import { findRawTemplate } from 'discourse/lib/raw-templates';
|
||||
import { determinePostReplaceSelection, clipboardData } from 'discourse/lib/utilities';
|
||||
import toMarkdown from 'discourse/lib/to-markdown';
|
||||
import { ajax } from 'discourse/lib/ajax';
|
||||
import { popupAjaxError } from 'discourse/lib/ajax-error';
|
||||
import deprecated from 'discourse-common/lib/deprecated';
|
||||
|
@ -647,7 +648,7 @@ export default Ember.Component.extend({
|
|||
|
||||
const { clipboard, types } = clipboardData(e);
|
||||
let plainText = clipboard.getData("text/plain");
|
||||
const html = clipboard.getData("text/html");
|
||||
let html = clipboard.getData("text/html");
|
||||
let handled = false;
|
||||
|
||||
if (plainText) {
|
||||
|
@ -657,30 +658,19 @@ export default Ember.Component.extend({
|
|||
this.appEvents.trigger('composer:insert-text', table);
|
||||
handled = true;
|
||||
}
|
||||
|
||||
if (html && html.includes("urn:schemas-microsoft-com:office:word")) {
|
||||
html = ""; // use plain text data for microsoft word
|
||||
}
|
||||
}
|
||||
|
||||
if (this.siteSettings.enable_rich_text_paste && html && !handled) {
|
||||
const placeholder = `${ plainText || I18n.t('pasting') }`;
|
||||
const self = this;
|
||||
const markdown = toMarkdown(html);
|
||||
|
||||
this.appEvents.trigger('composer:insert-text', placeholder);
|
||||
handled = true;
|
||||
|
||||
ajax('/composer/parse_html', {
|
||||
type: 'POST',
|
||||
data: { html }
|
||||
}).then(response => {
|
||||
if (response.markdown) {
|
||||
self.appEvents.trigger('composer:replace-text', placeholder, response.markdown);
|
||||
} else if (!plainText) {
|
||||
self.appEvents.trigger('composer:replace-text', placeholder, "");
|
||||
}
|
||||
}).catch(error => {
|
||||
if (!plainText) {
|
||||
self.appEvents.trigger('composer:replace-text', placeholder, "");
|
||||
popupAjaxError(error);
|
||||
}
|
||||
});
|
||||
if (!plainText || plainText.length < markdown.length) {
|
||||
this.appEvents.trigger('composer:insert-text', markdown);
|
||||
handled = true;
|
||||
}
|
||||
}
|
||||
|
||||
const uploadFiles = types.includes("Files") && !plainText && !handled;
|
||||
|
|
|
@ -5,4 +5,4 @@ export default function parseHTML(rawHtml) {
|
|||
|
||||
parser.parseComplete(rawHtml);
|
||||
return builder.dom;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,285 @@
|
|||
import parseHTML from 'discourse/helpers/parse-html';
|
||||
|
||||
const trimLeft = text => text.replace(/^\s+/,"");
|
||||
const trimRight = text => text.replace(/\s+$/,"");
|
||||
|
||||
class Tag {
|
||||
constructor(name, prefix = "", suffix = "") {
|
||||
this.name = name;
|
||||
this.prefix = prefix;
|
||||
this.suffix = suffix;
|
||||
}
|
||||
|
||||
decorate(text) {
|
||||
if (this.prefix || this.suffix) {
|
||||
return [this.prefix, text, this.suffix].join("");
|
||||
}
|
||||
|
||||
return text;
|
||||
}
|
||||
|
||||
toMarkdown() {
|
||||
const text = this.element.innerMarkdown();
|
||||
|
||||
if (text && text.trim()) {
|
||||
return this.decorate(text);
|
||||
}
|
||||
|
||||
return text;
|
||||
}
|
||||
|
||||
static blocks() {
|
||||
return ["address", "article", "aside", "blockquote", "dd", "div", "dl", "dt", "fieldset",
|
||||
"figcaption", "figure", "footer", "form", "header", "hgroup", "hr", "main", "nav",
|
||||
"ol", "p", "pre", "section", "table", "ul"];
|
||||
}
|
||||
|
||||
static headings() {
|
||||
return ["h1", "h2", "h3", "h4", "h5", "h6"];
|
||||
}
|
||||
|
||||
static emphases() {
|
||||
return [ ["b", "**"], ["strong", "**"], ["i", "_"], ["em", "_"], ["s", "~~"], ["strike", "~~"] ];
|
||||
}
|
||||
|
||||
static slices() {
|
||||
return ["dt", "dd", "tr", "thead", "tbody", "tfoot"];
|
||||
}
|
||||
|
||||
static trimmable() {
|
||||
return [...Tag.blocks(), ...Tag.headings(), ...Tag.slices(), "li", "td", "th", "br", "hr"];
|
||||
}
|
||||
|
||||
static block(name, prefix, suffix) {
|
||||
return class extends Tag {
|
||||
constructor() {
|
||||
super(name, prefix, suffix);
|
||||
}
|
||||
|
||||
decorate(text) {
|
||||
return `\n\n${this.prefix}${text}${this.suffix}\n\n`;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
static heading(name, i) {
|
||||
const prefix = `${[...Array(i)].map(() => "#").join("")} `;
|
||||
return Tag.block(name, prefix, "");
|
||||
}
|
||||
|
||||
static emphasis(name, decorator) {
|
||||
return class extends Tag {
|
||||
constructor() {
|
||||
super(name, decorator, decorator);
|
||||
}
|
||||
|
||||
decorate(text) {
|
||||
text = text.trim();
|
||||
|
||||
if (text.includes("\n")) {
|
||||
this.prefix = `<${this.name}>`;
|
||||
this.suffix = `</${this.name}>`;
|
||||
}
|
||||
|
||||
return super.decorate(text);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
static replace(name, text) {
|
||||
return class extends Tag {
|
||||
constructor() {
|
||||
super(name, "", "");
|
||||
this.text = text;
|
||||
}
|
||||
|
||||
toMarkdown() {
|
||||
return this.text;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
static link() {
|
||||
return class extends Tag {
|
||||
constructor() {
|
||||
super("a");
|
||||
}
|
||||
|
||||
decorate(text) {
|
||||
const attr = this.element.attributes;
|
||||
|
||||
if (attr && attr.href && text !== attr.href) {
|
||||
return "[" + text + "](" + attr.href + ")";
|
||||
}
|
||||
|
||||
return text;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
static image() {
|
||||
return class extends Tag {
|
||||
constructor() {
|
||||
super("img");
|
||||
}
|
||||
|
||||
toMarkdown() {
|
||||
const e = this.element;
|
||||
const attr = e.attributes;
|
||||
const pAttr = e.parent && e.parent.attributes;
|
||||
const src = (attr && attr.src) || (pAttr && pAttr.src);
|
||||
|
||||
if (src) {
|
||||
const alt = (attr && attr.alt) || (pAttr && pAttr.alt) || "";
|
||||
return "![" + alt + "](" + src + ")";
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
static slice(name, prefix, suffix) {
|
||||
return class extends Tag {
|
||||
constructor() {
|
||||
super(name, prefix, suffix);
|
||||
}
|
||||
|
||||
decorate(text) {
|
||||
if (!this.element.next) {
|
||||
this.suffix = "";
|
||||
}
|
||||
return `${text}${this.suffix}`;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
static cell(name) {
|
||||
return Tag.slice(name, "", " ");
|
||||
}
|
||||
|
||||
static li() {
|
||||
return class extends Tag.slice("li", "", "\n") {
|
||||
decorate(text) {
|
||||
const indent = this.element.filterParentNames("ul").slice(1).map(() => " ").join("");
|
||||
return super.decorate(`${indent}* ${trimLeft(text)}`);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
const tags = [
|
||||
...Tag.blocks().map((b) => Tag.block(b)),
|
||||
...Tag.headings().map((h, i) => Tag.heading(h, i + 1)),
|
||||
...Tag.slices().map((s) => Tag.slice(s, "", "\n")),
|
||||
...Tag.emphases().map((e) => Tag.emphasis(e[0], e[1])),
|
||||
Tag.cell("td"), Tag.cell("th"),
|
||||
Tag.replace("br", "\n"), Tag.replace("hr", "\n---\n"), Tag.replace("head", ""),
|
||||
Tag.li(), Tag.link(), Tag.image(),
|
||||
|
||||
// TO-DO CREATE: code, tbody, ins, del, blockquote, small, large
|
||||
// UPDATE: ol, pre, thead, th, td
|
||||
];
|
||||
|
||||
class Element {
|
||||
constructor(element, parent, previous, next) {
|
||||
this.name = element.name;
|
||||
this.type = element.type;
|
||||
this.data = element.data;
|
||||
this.children = element.children;
|
||||
this.attributes = element.attributes;
|
||||
|
||||
if (parent) {
|
||||
this.parent = parent;
|
||||
this.parentNames = (parent.parentNames || []).slice();
|
||||
this.parentNames.push(parent.name);
|
||||
}
|
||||
this.previous = previous;
|
||||
this.next = next;
|
||||
}
|
||||
|
||||
tag() {
|
||||
const tag = new (tags.filter(t => (new t().name === this.name))[0] || Tag)();
|
||||
tag.element = this;
|
||||
return tag;
|
||||
}
|
||||
|
||||
innerMarkdown() {
|
||||
return Element.parseChildren(this);
|
||||
}
|
||||
|
||||
leftTrimmable() {
|
||||
return this.previous && Tag.trimmable().includes(this.previous.name);
|
||||
}
|
||||
|
||||
rightTrimmable() {
|
||||
return this.next && Tag.trimmable().includes(this.next.name);
|
||||
}
|
||||
|
||||
text() {
|
||||
let text = this.data || "";
|
||||
|
||||
if (this.leftTrimmable()) {
|
||||
text = trimLeft(text);
|
||||
}
|
||||
|
||||
if (this.rightTrimmable()) {
|
||||
text = trimRight(text);
|
||||
}
|
||||
|
||||
text = text.replace(/[ \t]+/g, " ");
|
||||
|
||||
return text;
|
||||
}
|
||||
|
||||
toMarkdown() {
|
||||
switch(this.type) {
|
||||
case "text":
|
||||
return this.text();
|
||||
break;
|
||||
case "tag":
|
||||
return this.tag().toMarkdown();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
filterParentNames(name) {
|
||||
return this.parentNames.filter(p => p === name);
|
||||
}
|
||||
|
||||
static toMarkdown(element, parent, prev, next) {
|
||||
return new Element(element, parent, prev, next).toMarkdown();
|
||||
}
|
||||
|
||||
static parseChildren(parent) {
|
||||
return Element.parse(parent.children, parent);
|
||||
}
|
||||
|
||||
static parse(elements, parent = null) {
|
||||
if (elements) {
|
||||
let result = [];
|
||||
|
||||
for (let i = 0; i < elements.length; i++) {
|
||||
const prev = (i === 0) ? null : elements[i-1];
|
||||
const next = (i === elements.length) ? null : elements[i+1];
|
||||
|
||||
result.push(Element.toMarkdown(elements[i], parent, prev, next));
|
||||
}
|
||||
|
||||
return result.join("");
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
export default function toMarkdown(html) {
|
||||
try {
|
||||
let markdown = Element.parse(parseHTML(html)).trim();
|
||||
markdown = markdown.replace(/^<b>/, "").replace(/<\/b>$/, "").trim(); // fix for google doc copy paste
|
||||
return markdown.replace(/\r/g, "").replace(/\n \n/g, "\n\n").replace(/\n{3,}/g, "\n\n");
|
||||
} catch(err) {
|
||||
return "";
|
||||
}
|
||||
}
|
|
@ -37,3 +37,4 @@
|
|||
//= require virtual-dom
|
||||
//= require virtual-dom-amd
|
||||
//= require highlight.js
|
||||
//= require htmlparser.js
|
||||
|
|
|
@ -2,7 +2,7 @@ import createStore from 'helpers/create-store';
|
|||
|
||||
QUnit.module("lib:category-link");
|
||||
|
||||
import parseHTML from 'helpers/parse-html';
|
||||
import parseHTML from 'discourse/helpers/parse-html';
|
||||
import { categoryBadgeHTML } from "discourse/helpers/category-link";
|
||||
|
||||
QUnit.test("categoryBadge without a category", assert => {
|
||||
|
@ -44,4 +44,4 @@ QUnit.test("allowUncategorized", assert => {
|
|||
|
||||
assert.blank(categoryBadgeHTML(uncategorized), "it doesn't return HTML for uncategorized by default");
|
||||
assert.present(categoryBadgeHTML(uncategorized, {allowUncategorized: true}), "it returns HTML");
|
||||
});
|
||||
});
|
||||
|
|
|
@ -0,0 +1,126 @@
|
|||
import toMarkdown from 'discourse/lib/to-markdown';
|
||||
|
||||
QUnit.module("lib:to-markdown");
|
||||
|
||||
QUnit.test("converts styles between normal words", assert => {
|
||||
const html = `Line with <s>styles</s> <b><i>between</i></b> words.`;
|
||||
const markdown = `Line with ~~styles~~ **_between_** words.`;
|
||||
assert.equal(toMarkdown(html), markdown);
|
||||
});
|
||||
|
||||
QUnit.test("converts inline nested styles", assert => {
|
||||
let html = `<em>Italicised line with <strong>some random</strong> <b>bold</b> words.</em>`;
|
||||
let markdown = `_Italicised line with **some random** **bold** words._`;
|
||||
assert.equal(toMarkdown(html), markdown);
|
||||
|
||||
html = `<i class="fa">Italicised line
|
||||
with <b title="strong">some
|
||||
random</b> <s>bold</s> words.</i>`;
|
||||
markdown = `<i>Italicised line\n with <b>some\n random</b> ~~bold~~ words.</i>`;
|
||||
assert.equal(toMarkdown(html), markdown);
|
||||
});
|
||||
|
||||
QUnit.test("converts a link", assert => {
|
||||
const html = `<a href="https://discourse.org">Discourse</a>`;
|
||||
const markdown = `[Discourse](https://discourse.org)`;
|
||||
assert.equal(toMarkdown(html), markdown);
|
||||
});
|
||||
|
||||
QUnit.test("put raw URL instead of converting the link", assert => {
|
||||
let url = "https://discourse.org";
|
||||
const html = () => `<a href="${url}">${url}</a>`;
|
||||
|
||||
assert.equal(toMarkdown(html()), url);
|
||||
|
||||
url = "discourse.org/t/topic-slug/1";
|
||||
assert.equal(toMarkdown(html()), url);
|
||||
});
|
||||
|
||||
QUnit.test("skip empty link", assert => {
|
||||
assert.equal(toMarkdown(`<a href="https://example.com"></a>`), "");
|
||||
});
|
||||
|
||||
QUnit.test("converts heading tags", assert => {
|
||||
const html = `
|
||||
<h1>Heading 1</h1>
|
||||
<h2>Heading 2</h2>
|
||||
|
||||
\t <h3>Heading 3</h3>
|
||||
|
||||
|
||||
<h4>Heading 4</h4>
|
||||
|
||||
|
||||
|
||||
<h5>Heading 5</h5>
|
||||
|
||||
|
||||
|
||||
|
||||
<h6>Heading 6</h6>
|
||||
`;
|
||||
const markdown = `# Heading 1\n\n## Heading 2\n\n### Heading 3\n\n#### Heading 4\n\n##### Heading 5\n\n###### Heading 6`;
|
||||
assert.equal(toMarkdown(html), markdown);
|
||||
});
|
||||
|
||||
QUnit.test("converts ul and ol list tags", assert => {
|
||||
const html = `
|
||||
<ul>
|
||||
<li>Item 1</li>
|
||||
<li>
|
||||
Item 2
|
||||
<ul>
|
||||
<li>Sub Item 1</li>
|
||||
<li>Sub Item 2</li>
|
||||
<ul><li>Sub <i>Sub</i> Item 1</li><li>Sub <b>Sub</b> Item 2</li></ul>
|
||||
</ul>
|
||||
</li>
|
||||
<li>Item 3</li>
|
||||
</ul>
|
||||
`;
|
||||
const markdown = `* Item 1\n* Item 2\n\n * Sub Item 1\n * Sub Item 2\n\n * Sub _Sub_ Item 1\n * Sub **Sub** Item 2\n\n* Item 3`;
|
||||
assert.equal(toMarkdown(html), markdown);
|
||||
});
|
||||
|
||||
QUnit.test("stripes unwanted inline tags", assert => {
|
||||
const html = `
|
||||
<p>Lorem ipsum <span>dolor sit amet, consectetur</span> <strike>elit.</strike></p>
|
||||
<p>Ut minim veniam, <label>quis nostrud</label> laboris <nisi> ut aliquip ex ea</nisi> commodo.</p>
|
||||
`;
|
||||
const markdown = `Lorem ipsum dolor sit amet, consectetur ~~elit.~~\n\nUt minim veniam, quis nostrud laboris ut aliquip ex ea commodo.`;
|
||||
assert.equal(toMarkdown(html), markdown);
|
||||
});
|
||||
|
||||
QUnit.test("converts table as readable", assert => {
|
||||
const html = `<address>Discourse Avenue</address><b>laboris</b>
|
||||
<table>
|
||||
<thead> <tr><th>Heading 1</th><th>Head 2</th></tr> </thead>
|
||||
<tbody>
|
||||
<tr><td>Lorem</td><td>ipsum</td></tr>
|
||||
<tr><td><b>dolor</b></td> <td><i>sit amet</i></td></tr></tbody>
|
||||
</table>
|
||||
`;
|
||||
const markdown = `Discourse Avenue\n\n**laboris**\n\nHeading 1 Head 2\n\nLorem ipsum\n**dolor** _sit amet_`;
|
||||
assert.equal(toMarkdown(html), markdown);
|
||||
});
|
||||
|
||||
QUnit.test("converts img tag", assert => {
|
||||
const url = "https://example.com/image.png";
|
||||
let html = `<img src="${url}">`;
|
||||
assert.equal(toMarkdown(html), `![](${url})`);
|
||||
|
||||
html = `<div><span><img src="${url}" alt="description" /></span></div>`;
|
||||
assert.equal(toMarkdown(html), `![description](${url})`);
|
||||
|
||||
html = `<a href="http://example.com"><img src="${url}" alt="description" /></a>`;
|
||||
assert.equal(toMarkdown(html), `[![description](${url})](http://example.com)`);
|
||||
|
||||
html = `<a href="http://example.com">description <img src="${url}" /></a>`;
|
||||
assert.equal(toMarkdown(html), `[description ![](${url})](http://example.com)`);
|
||||
|
||||
html = `<img alt="description" />`;
|
||||
assert.equal(toMarkdown(html), "");
|
||||
|
||||
html = `<a><img src="${url}" alt="description" /></a>`;
|
||||
assert.equal(toMarkdown(html), `![description](${url})`);
|
||||
});
|
Loading…
Reference in New Issue