From ece3ac967d90cb2e9744a2ebd55dcbee90cdb980 Mon Sep 17 00:00:00 2001 From: MeiMei <30769358+mei23@users.noreply.github.com> Date: Sun, 26 Sep 2021 01:57:38 +0900 Subject: [PATCH] Tune mfmToHtml (#7841) * Tune mfmToHtml * typo * add --- src/mfm/from-html.ts | 124 ++++++++++++++++++++++++++++++++++++++----- test/mfm.ts | 24 +++++++++ 2 files changed, 136 insertions(+), 12 deletions(-) diff --git a/src/mfm/from-html.ts b/src/mfm/from-html.ts index 4c8e2dbec..14279f338 100644 --- a/src/mfm/from-html.ts +++ b/src/mfm/from-html.ts @@ -5,7 +5,9 @@ import { URL } from 'url'; const urlRegex = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+/; const urlRegexFull = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+$/; -export function fromHtml(html: string, hashtagNames?: string[]): string { +export function fromHtml(html: string, hashtagNames?: string[]): string | null { + if (html == null) return null; + const dom = parse5.parseFragment(html); let text = ''; @@ -19,6 +21,7 @@ export function fromHtml(html: string, hashtagNames?: string[]): string { function getText(node: parse5.Node): string { if (treeAdapter.isTextNode(node)) return node.value; if (!treeAdapter.isElementNode(node)) return ''; + if (node.nodeName === 'br') return '\n'; if (node.childNodes) { return node.childNodes.map(n => getText(n)).join(''); @@ -27,6 +30,14 @@ export function fromHtml(html: string, hashtagNames?: string[]): string { return ''; } + function appendChildren(childNodes: parse5.ChildNode[]): void { + if (childNodes) { + for (const n of childNodes) { + analyze(n); + } + } + } + function analyze(node: parse5.Node) { if (treeAdapter.isTextNode(node)) { text += node.value; @@ -42,6 +53,7 @@ export function fromHtml(html: string, hashtagNames?: string[]): string { break; case 'a': + { const txt = getText(node); const rel = node.attrs.find(x => x.name === 'rel'); const href = node.attrs.find(x => x.name === 'href'); @@ -87,23 +99,111 @@ export function fromHtml(html: string, hashtagNames?: string[]): string { text += generateLink(); } break; + } + + case 'h1': + { + text += '【'; + appendChildren(node.childNodes); + text += '】\n'; + break; + } + + case 'b': + case 'strong': + { + text += '**'; + appendChildren(node.childNodes); + text += '**'; + break; + } + + case 'small': + { + text += ''; + appendChildren(node.childNodes); + text += ''; + break; + } + + case 's': + case 'del': + { + text += '~~'; + appendChildren(node.childNodes); + text += '~~'; + break; + } + + case 'i': + case 'em': + { + text += ''; + appendChildren(node.childNodes); + text += ''; + break; + } + + // block code (
)
+ case 'pre': {
+ if (node.childNodes.length === 1 && node.childNodes[0].nodeName === 'code') {
+ text += '```\n';
+ text += getText(node.childNodes[0]);
+ text += '\n```\n';
+ } else {
+ appendChildren(node.childNodes);
+ }
+ break;
+ }
+
+ // inline code ()
+ case 'code': {
+ text += '`';
+ appendChildren(node.childNodes);
+ text += '`';
+ break;
+ }
+
+ case 'blockquote': {
+ const t = getText(node);
+ if (t) {
+ text += '> ';
+ text += t.split('\n').join(`\n> `);
+ }
+ break;
+ }
case 'p':
+ case 'h2':
+ case 'h3':
+ case 'h4':
+ case 'h5':
+ case 'h6':
+ {
text += '\n\n';
- if (node.childNodes) {
- for (const n of node.childNodes) {
- analyze(n);
- }
- }
+ appendChildren(node.childNodes);
break;
+ }
- default:
- if (node.childNodes) {
- for (const n of node.childNodes) {
- analyze(n);
- }
- }
+ // other block elements
+ case 'div':
+ case 'header':
+ case 'footer':
+ case 'article':
+ case 'li':
+ case 'dt':
+ case 'dd':
+ {
+ text += '\n';
+ appendChildren(node.childNodes);
break;
+ }
+
+ default: // includes inline elements
+ {
+ appendChildren(node.childNodes);
+ break;
+ }
}
}
}
diff --git a/test/mfm.ts b/test/mfm.ts
index d9b98cdac..ecf886ad6 100644
--- a/test/mfm.ts
+++ b/test/mfm.ts
@@ -19,6 +19,30 @@ describe('toHtml', () => {
});
describe('fromHtml', () => {
+ it('p', () => {
+ assert.deepStrictEqual(fromHtml('a
b
'), 'a\n\nb');
+ });
+
+ it('block element', () => {
+ assert.deepStrictEqual(fromHtml('ab'), 'a\nb');
+ });
+
+ it('inline element', () => {
+ assert.deepStrictEqual(fromHtml('- a
- b
'), 'a\nb');
+ });
+
+ it('block code', () => {
+ assert.deepStrictEqual(fromHtml('a\nb
'), '```\na\nb\n```');
+ });
+
+ it('inline code', () => {
+ assert.deepStrictEqual(fromHtml('a
'), '`a`');
+ });
+
+ it('quote', () => {
+ assert.deepStrictEqual(fromHtml('a\nb
'), '> a\n> b');
+ });
+
it('br', () => {
assert.deepStrictEqual(fromHtml('abc
d
'), 'abc\n\nd');
});