calckey/src/mfm/from-html.ts

110 lines
2.5 KiB
TypeScript
Raw Normal View History

import * as parse5 from 'parse5';
import treeAdapter = require('parse5/lib/tree-adapters/default');
import { URL } from 'url';
const urlRegex = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+/;
const urlRegexFull = /^https?:\/\/[\w\/:%#@$&?!()\[\]~.,=+\-]+$/;
export function fromHtml(html: string, hashtagNames?: string[]): string {
const dom = parse5.parseFragment(html);
let text = '';
for (const n of dom.childNodes) {
analyze(n);
}
return text.trim();
function getText(node: parse5.Node): string {
if (treeAdapter.isTextNode(node)) return node.value;
if (!treeAdapter.isElementNode(node)) return '';
if (node.childNodes) {
return node.childNodes.map(n => getText(n)).join('');
}
return '';
}
function analyze(node: parse5.Node) {
if (treeAdapter.isTextNode(node)) {
text += node.value;
return;
}
// Skip comment or document type node
if (!treeAdapter.isElementNode(node)) return;
switch (node.nodeName) {
case 'br':
text += '\n';
break;
case 'a':
const txt = getText(node);
const rel = node.attrs.find(x => x.name === 'rel');
const href = node.attrs.find(x => x.name === 'href');
// ハッシュタグ
if (hashtagNames && href && hashtagNames.map(x => x.toLowerCase()).includes(txt.toLowerCase())) {
text += txt;
// メンション
2018-12-12 02:47:07 +00:00
} else if (txt.startsWith('@') && !(rel && rel.value.match(/^me /))) {
const part = txt.split('@');
if (part.length === 2 && href) {
//#region ホスト名部分が省略されているので復元する
2018-09-01 14:12:51 +00:00
const acct = `${txt}@${(new URL(href.value)).hostname}`;
text += acct;
//#endregion
2020-04-03 23:46:54 +00:00
} else if (part.length === 3) {
text += txt;
}
// その他
} else {
const generateLink = () => {
if (!href && !txt) {
return '';
}
if (!href) {
return txt;
}
if (!txt || txt === href.value) { // #6383: Missing text node
if (href.value.match(urlRegexFull)) {
return href.value;
} else {
return `<${href.value}>`;
}
}
if (href.value.match(urlRegex) && !href.value.match(urlRegexFull)) {
return `[${txt}](<${href.value}>)`; // #6846
} else {
return `[${txt}](${href.value})`;
}
};
text += generateLink();
}
break;
case 'p':
text += '\n\n';
if (node.childNodes) {
for (const n of node.childNodes) {
analyze(n);
}
}
break;
default:
if (node.childNodes) {
for (const n of node.childNodes) {
analyze(n);
}
}
break;
}
}
}