From da17e8978a9f7e7a2a24385d209fe72b06a4ac5c Mon Sep 17 00:00:00 2001 From: naskya Date: Sun, 26 Mar 2023 07:31:12 +0900 Subject: [PATCH] Create a KaTeX macro (small subset version) parser and expander --- packages/client/src/scripts/katex-macro.ts | 280 +++++++++++++++++++++ 1 file changed, 280 insertions(+) create mode 100644 packages/client/src/scripts/katex-macro.ts diff --git a/packages/client/src/scripts/katex-macro.ts b/packages/client/src/scripts/katex-macro.ts new file mode 100644 index 0000000000..62d4871054 --- /dev/null +++ b/packages/client/src/scripts/katex-macro.ts @@ -0,0 +1,280 @@ +type KaTeXCommand = { + args: number; + rule: (string | number)[]; +}; + +function parseSingleKaTeXCommand(src: string): [string, KaTeXCommand] { + const invalid: [string, KaTeXCommand] = ["", { args: 0, rule: [] }]; + + const skipSpaces = (pos: number): number => { + while (src[pos] === " ") + ++pos; + return pos; + }; + + if (!src.startsWith("\\newcommand") || src.slice(-1) !== "}") + return invalid; + + // current index we are checking (= "\\newcommand".length) + let currentPos: number = 11; + currentPos = skipSpaces(currentPos); + + // parse {\name}, (\name), or [\name] + let bracket: string; + if (src[currentPos] === "{") + bracket = "{}"; + else if (src[currentPos] === "(") + bracket = "()"; + else if (src[currentPos] === "[") + bracket = "[]"; + else + return invalid; + + ++currentPos; + currentPos = skipSpaces(currentPos); + + if (src[currentPos] !== "\\") + return invalid; + + const closeNameBracketPos: number = src.indexOf(bracket[1], currentPos); + if (closeNameBracketPos === -1) + return invalid; + + const name: string = src.slice(currentPos + 1, closeNameBracketPos).trim(); + if (!/^[a-zA-Z]+$/.test(name)) + return invalid; + + currentPos = skipSpaces(closeNameBracketPos + 1); + + let command: KaTeXCommand = { args: 0, rule: [] }; + + // parse [number of arguments] (optional) + if (src[currentPos] === "[") { + const closeArgsBracketPos: number = src.indexOf("]", currentPos); + command.args = Number(src.slice(currentPos + 1, closeArgsBracketPos).trim()); + currentPos = closeArgsBracketPos + 1; + + if (Number.isNaN(command.args) || command.args < 0) + return invalid; + } else if (src[currentPos] === "{") { + command.args = 0; + } else { + return invalid; + } + + currentPos = skipSpaces(currentPos); + + // parse {rule} + if (src[currentPos] !== "{") + return invalid; + + ++currentPos; + currentPos = skipSpaces(currentPos); + + while (currentPos < src.length - 1) { + let numbersignPos: number = -1; + let isEscaped: boolean = false; + + for (let i = currentPos; i < src.length - 1; ++i) { + if (src[i] !== "\\" && src[i] !== "#") { + isEscaped = false; + continue; + } + if (src[i] === "\\") { + isEscaped = !isEscaped; + continue; + } + if (!isEscaped && src[i] === "#") { + numbersignPos = i; + break; + } + } + if (numbersignPos === -1) { + command.rule.push(src.slice(currentPos, -1)); + break; + } + + const argIndexEndPos = src.slice(numbersignPos + 1).search(/[^\d]/) + numbersignPos; + const argIndex: number = Number(src.slice(numbersignPos + 1, argIndexEndPos + 1)); + + if (Number.isNaN(argIndex) || argIndex < 1 || command.args < argIndex) + return invalid; + + if (currentPos !== numbersignPos) + command.rule.push(src.slice(currentPos, numbersignPos)); + command.rule.push(argIndex); + + currentPos = argIndexEndPos + 1; + } + + if (command.args === 0) + return [name, command]; + else + return [name + bracket[0], command]; +} + +export function parseKaTeXCommands(src: string): string { + let result: { [name: string]: KaTeXCommand } = {}; + + for (const s of src.split("\n")) { + const [name, command]: [string, KaTeXCommand] = parseSingleKaTeXCommand(s.trim()); + if (name !== "") + result[name] = command; + } + + return JSON.stringify(result); +} + +export function expandKaTeXCommand(src: string, commandsAsJsonString: string): string { + const commands = JSON.parse(commandsAsJsonString); + + const bracketKinds = 3; + const openBracketId: { [bracket: string]: number } = {"(": 0, "{": 1, "[": 2}; + const closeBracketId: { [bracket: string]: number } = {")": 0, "}": 1, "]": 2}; + const openBracketFromId = ["(", "{", "["]; + const closeBracketFromId = [")", "}", "]"]; + + // mappings from open brackets to their corresponding close brackets + type BracketMapping = { [openBracketPos: number]: number }; + + const bracketMapping = ((): BracketMapping => { + let result: BracketMapping = {}; + const n = src.length; + + let depths = new Array(bracketKinds).fill(0); // current bracket depth for "()", "{}", and "[]" + let buffer = Array.from(Array(bracketKinds), () => Array(n)); + + let isEscaped = false; + + for (let i = 0; i < n; ++i) { + if (!isEscaped && src[i] === "\\" && i + 1 < n && ["{", "}", "\\"].includes(src[i+1])) { + isEscaped = true; + continue; + } + if (isEscaped + || (src[i] !== "\\" + && !openBracketFromId.includes(src[i]) + && !closeBracketFromId.includes(src[i]))) + { + isEscaped = false; + continue; + } + isEscaped = false; + + if (openBracketFromId.includes(src[i])) { + const id: number = openBracketId[src[i]]; + buffer[id][depths[id]] = i; + ++depths[id]; + } else if (closeBracketFromId.includes(src[i])) { + const id: number = closeBracketId[src[i]]; + --depths[id]; + result[buffer[id][depths[id]]] = i; + } + } + + return result; + })(); + + function expandSingleKaTeXCommand(expandedArgs: string[], commandName: string): string { + let result = ""; + for (const block of commands[commandName].rule) { + if (typeof block === "string") + result += block; + else + result += expandedArgs[block - 1]; + } + return result; + } + + let numberOfExpansions = 0; + const maxNumberOfExpansions = 200; // to prevent infinite expansion loop + + // only expand src.slice(beginPos, endPos) + function expandKaTeXCommandImpl(beginPos: number, endPos: number): string { + if (endPos <= beginPos) + return ""; + + const raw: string = src.slice(beginPos, endPos + 1); + const fallback: string = raw; // returned for invalid inputs + + if (maxNumberOfExpansions <= numberOfExpansions) + return fallback; + ++numberOfExpansions; + + // search for a custom command + let checkedPos = beginPos - 1; + let commandName = ""; + let commandBackslashPos = 0; + + // for commands w/o args: unused + // w/ args: the first open bracket ("(", "{", or "[") after cmd name + let commandArgBeginPos = 0; + + // for commands w/o args: the end of cmd name + // w/ args: the closing bracket of the last arg + let commandArgEndPos = 0; + + while (checkedPos < endPos) { + checkedPos = src.indexOf("\\", checkedPos + 1); + + // there is no command to expand + if (checkedPos === -1) + return raw; + + // is it a custom command? + let nonAlphaPos = src.slice(checkedPos + 1).search(/[^A-Za-z]/) + checkedPos + 1; + + let commandNameCandidate = src.slice(checkedPos + 1, nonAlphaPos); + if (commands.hasOwnProperty(commandNameCandidate)) { + // this is a custom command without args + commandBackslashPos = checkedPos; + commandArgEndPos = nonAlphaPos - 1; + commandName = commandNameCandidate; + break; + } + + let nextOpenBracketPos = endPos; + for (let i = 0; i < bracketKinds; ++i) { + const pos = src.indexOf(openBracketFromId[i], checkedPos + 1); + if (pos !== -1 && pos < nextOpenBracketPos) + nextOpenBracketPos = pos; + } + + if (nextOpenBracketPos === endPos) + return fallback; // there is no open bracket + + commandNameCandidate += src[nextOpenBracketPos]; + + if (commands.hasOwnProperty(commandNameCandidate)) { + commandBackslashPos = checkedPos; + commandArgBeginPos = nextOpenBracketPos; + commandArgEndPos = nextOpenBracketPos; // to search the first arg from here + commandName = commandNameCandidate; + break; + } + } + + const numArgs: number = commands[commandName].args; + const openBracket: string = commandName.slice(-1); + + let expandedArgs = new Array(numArgs); + + for (let i = 0; i < numArgs; ++i) { + // find the first open bracket after what we've searched + const nextOpenBracketPos = src.indexOf(openBracket, commandArgEndPos); + if (nextOpenBracketPos === -1) + return fallback; // not enough arguments are provided + if (!bracketMapping[nextOpenBracketPos]) + return fallback; // found open bracket doesn't correspond to any close bracket + + commandArgEndPos = bracketMapping[nextOpenBracketPos]; + expandedArgs[i] = expandKaTeXCommandImpl(nextOpenBracketPos + 1, commandArgEndPos); + } + + return src.slice(beginPos, commandBackslashPos) + + expandSingleKaTeXCommand(expandedArgs, commandName) + + expandKaTeXCommandImpl(commandArgEndPos + 1, endPos); + } + + return expandKaTeXCommandImpl(0, src.length); +}