Remove unwanted spaces between HTML tags and support Word documents
This commit is contained in:
parent
3bc53f2946
commit
4935ae4338
|
@ -662,8 +662,6 @@ export default Ember.Component.extend({
|
||||||
if (table) {
|
if (table) {
|
||||||
this.appEvents.trigger('composer:insert-text', table);
|
this.appEvents.trigger('composer:insert-text', table);
|
||||||
handled = true;
|
handled = true;
|
||||||
} else if (html && html.includes("urn:schemas-microsoft-com:office:word")) {
|
|
||||||
html = ""; // use plain text data for microsoft word
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -38,15 +38,15 @@ class Tag {
|
||||||
}
|
}
|
||||||
|
|
||||||
static emphases() {
|
static emphases() {
|
||||||
return [ ["b", "**"], ["strong", "**"], ["i", "_"], ["em", "_"], ["s", "~~"], ["strike", "~~"] ];
|
return [ ["b", "**"], ["strong", "**"], ["i", "*"], ["em", "*"], ["s", "~~"], ["strike", "~~"] ];
|
||||||
}
|
}
|
||||||
|
|
||||||
static slices() {
|
static slices() {
|
||||||
return ["dt", "dd", "tr", "thead", "tbody", "tfoot"];
|
return ["dt", "dd", "thead", "tbody", "tfoot"];
|
||||||
}
|
}
|
||||||
|
|
||||||
static trimmable() {
|
static trimmable() {
|
||||||
return [...Tag.blocks(), ...Tag.headings(), ...Tag.slices(), "li", "td", "th", "br", "hr", "blockquote", "table", "ol"];
|
return [...Tag.blocks(), ...Tag.headings(), ...Tag.slices(), "li", "td", "th", "br", "hr", "blockquote", "table", "ol", "tr"];
|
||||||
}
|
}
|
||||||
|
|
||||||
static block(name, prefix, suffix) {
|
static block(name, prefix, suffix) {
|
||||||
|
@ -73,14 +73,17 @@ class Tag {
|
||||||
}
|
}
|
||||||
|
|
||||||
decorate(text) {
|
decorate(text) {
|
||||||
text = text.trim();
|
|
||||||
|
|
||||||
if (text.includes("\n")) {
|
if (text.includes("\n")) {
|
||||||
this.prefix = `<${this.name}>`;
|
this.prefix = `<${this.name}>`;
|
||||||
this.suffix = `</${this.name}>`;
|
this.suffix = `</${this.name}>`;
|
||||||
}
|
}
|
||||||
|
|
||||||
return super.decorate(text);
|
let space = text.match(/^\s/) || [""];
|
||||||
|
this.prefix = space[0] + this.prefix;
|
||||||
|
space = text.match(/\s$/) || [""];
|
||||||
|
this.suffix = this.suffix + space[0];
|
||||||
|
|
||||||
|
return super.decorate(text.trim());
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
@ -182,10 +185,6 @@ class Tag {
|
||||||
throw "Unsupported format inside Markdown table cells";
|
throw "Unsupported format inside Markdown table cells";
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!this.element.next) {
|
|
||||||
this.suffix = "|";
|
|
||||||
}
|
|
||||||
|
|
||||||
return this.decorate(text);
|
return this.decorate(text);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -268,6 +267,17 @@ class Tag {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static tr() {
|
||||||
|
return class extends Tag.slice("tr", "|\n") {
|
||||||
|
decorate(text) {
|
||||||
|
if (!this.element.next) {
|
||||||
|
this.suffix = "|";
|
||||||
|
}
|
||||||
|
return `${text}${this.suffix}`;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const tags = [
|
const tags = [
|
||||||
|
@ -278,7 +288,7 @@ const tags = [
|
||||||
Tag.cell("td"), Tag.cell("th"),
|
Tag.cell("td"), Tag.cell("th"),
|
||||||
Tag.replace("br", "\n"), Tag.replace("hr", "\n---\n"), Tag.replace("head", ""),
|
Tag.replace("br", "\n"), Tag.replace("hr", "\n---\n"), Tag.replace("head", ""),
|
||||||
Tag.keep("ins"), Tag.keep("del"), Tag.keep("small"), Tag.keep("big"),
|
Tag.keep("ins"), Tag.keep("del"), Tag.keep("small"), Tag.keep("big"),
|
||||||
Tag.li(), Tag.link(), Tag.image(), Tag.code(), Tag.blockquote(), Tag.table(),, Tag.ol(),
|
Tag.li(), Tag.link(), Tag.image(), Tag.code(), Tag.blockquote(), Tag.table(), Tag.ol(), Tag.tr(),
|
||||||
];
|
];
|
||||||
|
|
||||||
class Element {
|
class Element {
|
||||||
|
@ -375,6 +385,19 @@ class Element {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function trimUnwantedSpaces(html) {
|
||||||
|
const body = html.match(/<body[^>]*>([\s\S]*?)<\/body>/);
|
||||||
|
html = body ? body[1] : html;
|
||||||
|
html = html.replace(/\r|\n| /g, " ");
|
||||||
|
|
||||||
|
let match;
|
||||||
|
while (match = html.match(/<[^\s>]+[^>]*>\s{2,}<[^\s>]+[^>]*>/)) {
|
||||||
|
html = html.replace(match[0], match[0].replace(/>\s{2,}</, "> <"));
|
||||||
|
}
|
||||||
|
|
||||||
|
return html;
|
||||||
|
}
|
||||||
|
|
||||||
function putPlaceholders(html) {
|
function putPlaceholders(html) {
|
||||||
const codeRegEx = /<code[^>]*>([\s\S]*?)<\/code>/gi;
|
const codeRegEx = /<code[^>]*>([\s\S]*?)<\/code>/gi;
|
||||||
const origHtml = html;
|
const origHtml = html;
|
||||||
|
@ -390,7 +413,7 @@ function putPlaceholders(html) {
|
||||||
match = codeRegEx.exec(origHtml);
|
match = codeRegEx.exec(origHtml);
|
||||||
}
|
}
|
||||||
|
|
||||||
const elements = parseHTML(html);
|
const elements = parseHTML(trimUnwantedSpaces(html));
|
||||||
return { elements, placeholders };
|
return { elements, placeholders };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -406,7 +429,7 @@ export default function toMarkdown(html) {
|
||||||
const { elements, placeholders } = putPlaceholders(html);
|
const { elements, placeholders } = putPlaceholders(html);
|
||||||
let markdown = Element.parse(elements).trim();
|
let markdown = Element.parse(elements).trim();
|
||||||
markdown = markdown.replace(/^<b>/, "").replace(/<\/b>$/, "").trim(); // fix for google doc copy paste
|
markdown = markdown.replace(/^<b>/, "").replace(/<\/b>$/, "").trim(); // fix for google doc copy paste
|
||||||
markdown = markdown.replace(/\r/g, "").replace(/\n \n/g, "\n\n").replace(/\n{3,}/g, "\n\n");
|
markdown = markdown.replace(/ +\n/g, "\n").replace(/\n \n/g, "\n\n").replace(/\n{3,}/g, "\n\n");
|
||||||
return replacePlaceholders(markdown, placeholders);
|
return replacePlaceholders(markdown, placeholders);
|
||||||
} catch(err) {
|
} catch(err) {
|
||||||
return "";
|
return "";
|
||||||
|
|
|
@ -4,19 +4,21 @@ QUnit.module("lib:to-markdown");
|
||||||
|
|
||||||
QUnit.test("converts styles between normal words", assert => {
|
QUnit.test("converts styles between normal words", assert => {
|
||||||
const html = `Line with <s>styles</s> <b><i>between</i></b> words.`;
|
const html = `Line with <s>styles</s> <b><i>between</i></b> words.`;
|
||||||
const markdown = `Line with ~~styles~~ **_between_** words.`;
|
const markdown = `Line with ~~styles~~ ***between*** words.`;
|
||||||
assert.equal(toMarkdown(html), markdown);
|
assert.equal(toMarkdown(html), markdown);
|
||||||
|
|
||||||
|
assert.equal(toMarkdown("A <b>bold </b>word"), "A **bold** word");
|
||||||
});
|
});
|
||||||
|
|
||||||
QUnit.test("converts inline nested styles", assert => {
|
QUnit.test("converts inline nested styles", assert => {
|
||||||
let html = `<em>Italicised line with <strong>some random</strong> <b>bold</b> words.</em>`;
|
let html = `<em>Italicised line with <strong>some random</strong> <b>bold</b> words.</em>`;
|
||||||
let markdown = `_Italicised line with **some random** **bold** words._`;
|
let markdown = `*Italicised line with **some random** **bold** words.*`;
|
||||||
assert.equal(toMarkdown(html), markdown);
|
assert.equal(toMarkdown(html), markdown);
|
||||||
|
|
||||||
html = `<i class="fa">Italicised line
|
html = `<i class="fa">Italicised line
|
||||||
with <b title="strong">some
|
with <b title="strong">some<br>
|
||||||
random</b> <s>bold</s> words.</i>`;
|
random</b> <s>bold</s> words.</i>`;
|
||||||
markdown = `<i>Italicised line\n with <b>some\n random</b> ~~bold~~ words.</i>`;
|
markdown = `<i>Italicised line with <b>some\nrandom</b> ~~bold~~ words.</i>`;
|
||||||
assert.equal(toMarkdown(html), markdown);
|
assert.equal(toMarkdown(html), markdown);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -26,7 +28,7 @@ QUnit.test("converts a link", assert => {
|
||||||
assert.equal(toMarkdown(html), markdown);
|
assert.equal(toMarkdown(html), markdown);
|
||||||
|
|
||||||
html = `<a href="https://discourse.org">Disc\n\n\nour\n\nse</a>`;
|
html = `<a href="https://discourse.org">Disc\n\n\nour\n\nse</a>`;
|
||||||
markdown = `[Disc\nour\nse](https://discourse.org)`;
|
markdown = `[Disc our se](https://discourse.org)`;
|
||||||
assert.equal(toMarkdown(html), markdown);
|
assert.equal(toMarkdown(html), markdown);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -82,7 +84,7 @@ QUnit.test("converts ul list tag", assert => {
|
||||||
<li>Item 3</li>
|
<li>Item 3</li>
|
||||||
</ul>
|
</ul>
|
||||||
`;
|
`;
|
||||||
const markdown = `* Item 1\n* Item 2\n\n * Sub Item 1\n * Sub Item 2\n\n * Sub _Sub_ Item 1\n * Sub **Sub** Item 2\n\n* Item 3`;
|
const markdown = `* Item 1\n* Item 2\n\n * Sub Item 1\n * Sub Item 2\n\n * Sub *Sub* Item 1\n * Sub **Sub** Item 2\n\n* Item 3`;
|
||||||
assert.equal(toMarkdown(html), markdown);
|
assert.equal(toMarkdown(html), markdown);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -101,10 +103,12 @@ QUnit.test("converts table tags", assert => {
|
||||||
<thead> <tr><th>Heading 1</th><th>Head 2</th></tr> </thead>
|
<thead> <tr><th>Heading 1</th><th>Head 2</th></tr> </thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
<tr><td>Lorem</td><td>ipsum</td></tr>
|
<tr><td>Lorem</td><td>ipsum</td></tr>
|
||||||
<tr><td><b>dolor</b></td> <td><i>sit amet</i></td></tr></tbody>
|
<tr><td><b>dolor</b></td> <td><i>sit amet</i></td> </tr>
|
||||||
|
|
||||||
|
</tbody>
|
||||||
</table>
|
</table>
|
||||||
`;
|
`;
|
||||||
const markdown = `Discourse Avenue\n\n**laboris**\n\n|Heading 1|Head 2|\n| --- | --- |\n|Lorem|ipsum|\n|**dolor**|_sit amet_|`;
|
const markdown = `Discourse Avenue\n\n**laboris**\n\n|Heading 1|Head 2|\n| --- | --- |\n|Lorem|ipsum|\n|**dolor**|*sit amet*|`;
|
||||||
assert.equal(toMarkdown(html), markdown);
|
assert.equal(toMarkdown(html), markdown);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -164,11 +168,11 @@ QUnit.test("supporting html tags by keeping them", assert => {
|
||||||
output = `[Lorem <del>ipsum dolor</del> sit](http://example.com).`;
|
output = `[Lorem <del>ipsum dolor</del> sit](http://example.com).`;
|
||||||
assert.equal(toMarkdown(html), output);
|
assert.equal(toMarkdown(html), output);
|
||||||
|
|
||||||
html = `Lorem <del>ipsum \n\n dolor</del> sit.`;
|
html = `Lorem <del>ipsum dolor</del> sit.`;
|
||||||
assert.equal(toMarkdown(html), html);
|
assert.equal(toMarkdown(html), html);
|
||||||
|
|
||||||
html = `Lorem <a href="http://example.com"><del>ipsum \n\n\n dolor</del> sit.</a>`;
|
html = `Lorem <a href="http://example.com"><del>ipsum \n\n\n dolor</del> sit.</a>`;
|
||||||
output = `Lorem [<del>ipsum \n dolor</del> sit.](http://example.com)`;
|
output = `Lorem [<del>ipsum dolor</del> sit.](http://example.com)`;
|
||||||
assert.equal(toMarkdown(html), output);
|
assert.equal(toMarkdown(html), output);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -223,6 +227,6 @@ QUnit.test("converts ol list tag", assert => {
|
||||||
<li>Item 3</li>
|
<li>Item 3</li>
|
||||||
</ol>
|
</ol>
|
||||||
`;
|
`;
|
||||||
const markdown = `Testing\n\n1. Item 1\n2. Item 2\n\n 100. Sub Item 1\n 101. Sub Item 2\n\n * Sub _Sub_ Item 1\n * Sub **Sub** Item 2\n\n3. Item 3`;
|
const markdown = `Testing\n\n1. Item 1\n2. Item 2\n\n 100. Sub Item 1\n 101. Sub Item 2\n\n * Sub *Sub* Item 1\n * Sub **Sub** Item 2\n\n3. Item 3`;
|
||||||
assert.equal(toMarkdown(html), markdown);
|
assert.equal(toMarkdown(html), markdown);
|
||||||
});
|
});
|
||||||
|
|
Loading…
Reference in New Issue