Remove unwanted spaces between HTML tags and support Word documents
This commit is contained in:
parent
3bc53f2946
commit
4935ae4338
|
@ -662,8 +662,6 @@ export default Ember.Component.extend({
|
|||
if (table) {
|
||||
this.appEvents.trigger('composer:insert-text', table);
|
||||
handled = true;
|
||||
} else if (html && html.includes("urn:schemas-microsoft-com:office:word")) {
|
||||
html = ""; // use plain text data for microsoft word
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -38,15 +38,15 @@ class Tag {
|
|||
}
|
||||
|
||||
static emphases() {
|
||||
return [ ["b", "**"], ["strong", "**"], ["i", "_"], ["em", "_"], ["s", "~~"], ["strike", "~~"] ];
|
||||
return [ ["b", "**"], ["strong", "**"], ["i", "*"], ["em", "*"], ["s", "~~"], ["strike", "~~"] ];
|
||||
}
|
||||
|
||||
static slices() {
|
||||
return ["dt", "dd", "tr", "thead", "tbody", "tfoot"];
|
||||
return ["dt", "dd", "thead", "tbody", "tfoot"];
|
||||
}
|
||||
|
||||
static trimmable() {
|
||||
return [...Tag.blocks(), ...Tag.headings(), ...Tag.slices(), "li", "td", "th", "br", "hr", "blockquote", "table", "ol"];
|
||||
return [...Tag.blocks(), ...Tag.headings(), ...Tag.slices(), "li", "td", "th", "br", "hr", "blockquote", "table", "ol", "tr"];
|
||||
}
|
||||
|
||||
static block(name, prefix, suffix) {
|
||||
|
@ -73,14 +73,17 @@ class Tag {
|
|||
}
|
||||
|
||||
decorate(text) {
|
||||
text = text.trim();
|
||||
|
||||
if (text.includes("\n")) {
|
||||
this.prefix = `<${this.name}>`;
|
||||
this.suffix = `</${this.name}>`;
|
||||
}
|
||||
|
||||
return super.decorate(text);
|
||||
let space = text.match(/^\s/) || [""];
|
||||
this.prefix = space[0] + this.prefix;
|
||||
space = text.match(/\s$/) || [""];
|
||||
this.suffix = this.suffix + space[0];
|
||||
|
||||
return super.decorate(text.trim());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -182,10 +185,6 @@ class Tag {
|
|||
throw "Unsupported format inside Markdown table cells";
|
||||
}
|
||||
|
||||
if (!this.element.next) {
|
||||
this.suffix = "|";
|
||||
}
|
||||
|
||||
return this.decorate(text);
|
||||
}
|
||||
};
|
||||
|
@ -268,6 +267,17 @@ class Tag {
|
|||
};
|
||||
}
|
||||
|
||||
static tr() {
|
||||
return class extends Tag.slice("tr", "|\n") {
|
||||
decorate(text) {
|
||||
if (!this.element.next) {
|
||||
this.suffix = "|";
|
||||
}
|
||||
return `${text}${this.suffix}`;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
const tags = [
|
||||
|
@ -278,7 +288,7 @@ const tags = [
|
|||
Tag.cell("td"), Tag.cell("th"),
|
||||
Tag.replace("br", "\n"), Tag.replace("hr", "\n---\n"), Tag.replace("head", ""),
|
||||
Tag.keep("ins"), Tag.keep("del"), Tag.keep("small"), Tag.keep("big"),
|
||||
Tag.li(), Tag.link(), Tag.image(), Tag.code(), Tag.blockquote(), Tag.table(),, Tag.ol(),
|
||||
Tag.li(), Tag.link(), Tag.image(), Tag.code(), Tag.blockquote(), Tag.table(), Tag.ol(), Tag.tr(),
|
||||
];
|
||||
|
||||
class Element {
|
||||
|
@ -375,6 +385,19 @@ class Element {
|
|||
}
|
||||
}
|
||||
|
||||
function trimUnwantedSpaces(html) {
|
||||
const body = html.match(/<body[^>]*>([\s\S]*?)<\/body>/);
|
||||
html = body ? body[1] : html;
|
||||
html = html.replace(/\r|\n| /g, " ");
|
||||
|
||||
let match;
|
||||
while (match = html.match(/<[^\s>]+[^>]*>\s{2,}<[^\s>]+[^>]*>/)) {
|
||||
html = html.replace(match[0], match[0].replace(/>\s{2,}</, "> <"));
|
||||
}
|
||||
|
||||
return html;
|
||||
}
|
||||
|
||||
function putPlaceholders(html) {
|
||||
const codeRegEx = /<code[^>]*>([\s\S]*?)<\/code>/gi;
|
||||
const origHtml = html;
|
||||
|
@ -390,7 +413,7 @@ function putPlaceholders(html) {
|
|||
match = codeRegEx.exec(origHtml);
|
||||
}
|
||||
|
||||
const elements = parseHTML(html);
|
||||
const elements = parseHTML(trimUnwantedSpaces(html));
|
||||
return { elements, placeholders };
|
||||
}
|
||||
|
||||
|
@ -406,7 +429,7 @@ export default function toMarkdown(html) {
|
|||
const { elements, placeholders } = putPlaceholders(html);
|
||||
let markdown = Element.parse(elements).trim();
|
||||
markdown = markdown.replace(/^<b>/, "").replace(/<\/b>$/, "").trim(); // fix for google doc copy paste
|
||||
markdown = markdown.replace(/\r/g, "").replace(/\n \n/g, "\n\n").replace(/\n{3,}/g, "\n\n");
|
||||
markdown = markdown.replace(/ +\n/g, "\n").replace(/\n \n/g, "\n\n").replace(/\n{3,}/g, "\n\n");
|
||||
return replacePlaceholders(markdown, placeholders);
|
||||
} catch(err) {
|
||||
return "";
|
||||
|
|
|
@ -4,19 +4,21 @@ QUnit.module("lib:to-markdown");
|
|||
|
||||
QUnit.test("converts styles between normal words", assert => {
|
||||
const html = `Line with <s>styles</s> <b><i>between</i></b> words.`;
|
||||
const markdown = `Line with ~~styles~~ **_between_** words.`;
|
||||
const markdown = `Line with ~~styles~~ ***between*** words.`;
|
||||
assert.equal(toMarkdown(html), markdown);
|
||||
|
||||
assert.equal(toMarkdown("A <b>bold </b>word"), "A **bold** word");
|
||||
});
|
||||
|
||||
QUnit.test("converts inline nested styles", assert => {
|
||||
let html = `<em>Italicised line with <strong>some random</strong> <b>bold</b> words.</em>`;
|
||||
let markdown = `_Italicised line with **some random** **bold** words._`;
|
||||
let markdown = `*Italicised line with **some random** **bold** words.*`;
|
||||
assert.equal(toMarkdown(html), markdown);
|
||||
|
||||
html = `<i class="fa">Italicised line
|
||||
with <b title="strong">some
|
||||
with <b title="strong">some<br>
|
||||
random</b> <s>bold</s> words.</i>`;
|
||||
markdown = `<i>Italicised line\n with <b>some\n random</b> ~~bold~~ words.</i>`;
|
||||
markdown = `<i>Italicised line with <b>some\nrandom</b> ~~bold~~ words.</i>`;
|
||||
assert.equal(toMarkdown(html), markdown);
|
||||
});
|
||||
|
||||
|
@ -26,7 +28,7 @@ QUnit.test("converts a link", assert => {
|
|||
assert.equal(toMarkdown(html), markdown);
|
||||
|
||||
html = `<a href="https://discourse.org">Disc\n\n\nour\n\nse</a>`;
|
||||
markdown = `[Disc\nour\nse](https://discourse.org)`;
|
||||
markdown = `[Disc our se](https://discourse.org)`;
|
||||
assert.equal(toMarkdown(html), markdown);
|
||||
});
|
||||
|
||||
|
@ -82,7 +84,7 @@ QUnit.test("converts ul list tag", assert => {
|
|||
<li>Item 3</li>
|
||||
</ul>
|
||||
`;
|
||||
const markdown = `* Item 1\n* Item 2\n\n * Sub Item 1\n * Sub Item 2\n\n * Sub _Sub_ Item 1\n * Sub **Sub** Item 2\n\n* Item 3`;
|
||||
const markdown = `* Item 1\n* Item 2\n\n * Sub Item 1\n * Sub Item 2\n\n * Sub *Sub* Item 1\n * Sub **Sub** Item 2\n\n* Item 3`;
|
||||
assert.equal(toMarkdown(html), markdown);
|
||||
});
|
||||
|
||||
|
@ -101,10 +103,12 @@ QUnit.test("converts table tags", assert => {
|
|||
<thead> <tr><th>Heading 1</th><th>Head 2</th></tr> </thead>
|
||||
<tbody>
|
||||
<tr><td>Lorem</td><td>ipsum</td></tr>
|
||||
<tr><td><b>dolor</b></td> <td><i>sit amet</i></td></tr></tbody>
|
||||
<tr><td><b>dolor</b></td> <td><i>sit amet</i></td> </tr>
|
||||
|
||||
</tbody>
|
||||
</table>
|
||||
`;
|
||||
const markdown = `Discourse Avenue\n\n**laboris**\n\n|Heading 1|Head 2|\n| --- | --- |\n|Lorem|ipsum|\n|**dolor**|_sit amet_|`;
|
||||
const markdown = `Discourse Avenue\n\n**laboris**\n\n|Heading 1|Head 2|\n| --- | --- |\n|Lorem|ipsum|\n|**dolor**|*sit amet*|`;
|
||||
assert.equal(toMarkdown(html), markdown);
|
||||
});
|
||||
|
||||
|
@ -164,11 +168,11 @@ QUnit.test("supporting html tags by keeping them", assert => {
|
|||
output = `[Lorem <del>ipsum dolor</del> sit](http://example.com).`;
|
||||
assert.equal(toMarkdown(html), output);
|
||||
|
||||
html = `Lorem <del>ipsum \n\n dolor</del> sit.`;
|
||||
html = `Lorem <del>ipsum dolor</del> sit.`;
|
||||
assert.equal(toMarkdown(html), html);
|
||||
|
||||
html = `Lorem <a href="http://example.com"><del>ipsum \n\n\n dolor</del> sit.</a>`;
|
||||
output = `Lorem [<del>ipsum \n dolor</del> sit.](http://example.com)`;
|
||||
output = `Lorem [<del>ipsum dolor</del> sit.](http://example.com)`;
|
||||
assert.equal(toMarkdown(html), output);
|
||||
});
|
||||
|
||||
|
@ -223,6 +227,6 @@ QUnit.test("converts ol list tag", assert => {
|
|||
<li>Item 3</li>
|
||||
</ol>
|
||||
`;
|
||||
const markdown = `Testing\n\n1. Item 1\n2. Item 2\n\n 100. Sub Item 1\n 101. Sub Item 2\n\n * Sub _Sub_ Item 1\n * Sub **Sub** Item 2\n\n3. Item 3`;
|
||||
const markdown = `Testing\n\n1. Item 1\n2. Item 2\n\n 100. Sub Item 1\n 101. Sub Item 2\n\n * Sub *Sub* Item 1\n * Sub **Sub** Item 2\n\n3. Item 3`;
|
||||
assert.equal(toMarkdown(html), markdown);
|
||||
});
|
||||
|
|
Loading…
Reference in New Issue