From ece3ac967d90cb2e9744a2ebd55dcbee90cdb980 Mon Sep 17 00:00:00 2001 From: MeiMei <30769358+mei23@users.noreply.github.com> Date: Sun, 26 Sep 2021 01:57:38 +0900 Subject: [PATCH] Tune mfmToHtml (#7841) * Tune mfmToHtml * typo * add --- src/mfm/from-html.ts | 124 ++++++++++++++++++++++++++++++++++++++----- test/mfm.ts | 24 +++++++++ 2 files changed, 136 insertions(+), 12 deletions(-) diff --git a/src/mfm/from-html.ts b/src/mfm/from-html.ts index 4c8e2dbec..14279f338 100644 --- a/src/mfm/from-html.ts +++ b/src/mfm/from-html.ts @@ -5,7 +5,9 @@ import { URL } from 'url'; const urlRegex = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+/; const urlRegexFull = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+$/; -export function fromHtml(html: string, hashtagNames?: string[]): string { +export function fromHtml(html: string, hashtagNames?: string[]): string | null { + if (html == null) return null; + const dom = parse5.parseFragment(html); let text = ''; @@ -19,6 +21,7 @@ export function fromHtml(html: string, hashtagNames?: string[]): string { function getText(node: parse5.Node): string { if (treeAdapter.isTextNode(node)) return node.value; if (!treeAdapter.isElementNode(node)) return ''; + if (node.nodeName === 'br') return '\n'; if (node.childNodes) { return node.childNodes.map(n => getText(n)).join(''); @@ -27,6 +30,14 @@ export function fromHtml(html: string, hashtagNames?: string[]): string { return ''; } + function appendChildren(childNodes: parse5.ChildNode[]): void { + if (childNodes) { + for (const n of childNodes) { + analyze(n); + } + } + } + function analyze(node: parse5.Node) { if (treeAdapter.isTextNode(node)) { text += node.value; @@ -42,6 +53,7 @@ export function fromHtml(html: string, hashtagNames?: string[]): string { break; case 'a': + { const txt = getText(node); const rel = node.attrs.find(x => x.name === 'rel'); const href = node.attrs.find(x => x.name === 'href'); @@ -87,23 +99,111 @@ export function fromHtml(html: string, hashtagNames?: string[]): string { text += generateLink(); } break; + } + + case 'h1': + { + text += '【'; + appendChildren(node.childNodes); + text += '】\n'; + break; + } + + case 'b': + case 'strong': + { + text += '**'; + appendChildren(node.childNodes); + text += '**'; + break; + } + + case 'small': + { + text += ''; + appendChildren(node.childNodes); + text += ''; + break; + } + + case 's': + case 'del': + { + text += '~~'; + appendChildren(node.childNodes); + text += '~~'; + break; + } + + case 'i': + case 'em': + { + text += ''; + appendChildren(node.childNodes); + text += ''; + break; + } + + // block code (
)
+			case 'pre': {
+				if (node.childNodes.length === 1 && node.childNodes[0].nodeName === 'code') {
+					text += '```\n';
+					text += getText(node.childNodes[0]);
+					text += '\n```\n';
+				} else {
+					appendChildren(node.childNodes);
+				}
+				break;
+			}
+
+			// inline code ()
+			case 'code': {
+				text += '`';
+				appendChildren(node.childNodes);
+				text += '`';
+				break;
+			}
+
+			case 'blockquote': {
+				const t = getText(node);
+				if (t) {
+					text += '> ';
+					text += t.split('\n').join(`\n> `);
+				}
+				break;
+			}
 
 			case 'p':
+			case 'h2':
+			case 'h3':
+			case 'h4':
+			case 'h5':
+			case 'h6':
+			{
 				text += '\n\n';
-				if (node.childNodes) {
-					for (const n of node.childNodes) {
-						analyze(n);
-					}
-				}
+				appendChildren(node.childNodes);
 				break;
+			}
 
-			default:
-				if (node.childNodes) {
-					for (const n of node.childNodes) {
-						analyze(n);
-					}
-				}
+			// other block elements
+			case 'div':
+			case 'header':
+			case 'footer':
+			case 'article':
+			case 'li':
+			case 'dt':
+			case 'dd':
+			{
+				text += '\n';
+				appendChildren(node.childNodes);
 				break;
+			}
+
+			default:	// includes inline elements
+			{
+				appendChildren(node.childNodes);
+				break;
+			}
 		}
 	}
 }
diff --git a/test/mfm.ts b/test/mfm.ts
index d9b98cdac..ecf886ad6 100644
--- a/test/mfm.ts
+++ b/test/mfm.ts
@@ -19,6 +19,30 @@ describe('toHtml', () => {
 });
 
 describe('fromHtml', () => {
+	it('p', () => {
+		assert.deepStrictEqual(fromHtml('

a

b

'), 'a\n\nb'); + }); + + it('block element', () => { + assert.deepStrictEqual(fromHtml('
a
b
'), 'a\nb'); + }); + + it('inline element', () => { + assert.deepStrictEqual(fromHtml('
  • a
  • b
'), 'a\nb'); + }); + + it('block code', () => { + assert.deepStrictEqual(fromHtml('
a\nb
'), '```\na\nb\n```'); + }); + + it('inline code', () => { + assert.deepStrictEqual(fromHtml('a'), '`a`'); + }); + + it('quote', () => { + assert.deepStrictEqual(fromHtml('
a\nb
'), '> a\n> b'); + }); + it('br', () => { assert.deepStrictEqual(fromHtml('

abc

d

'), 'abc\n\nd'); });