Create a KaTeX macro (small subset version) parser and expander
This commit is contained in:
parent
450f798f12
commit
da17e8978a
|
@ -0,0 +1,280 @@
|
||||||
|
type KaTeXCommand = {
|
||||||
|
args: number;
|
||||||
|
rule: (string | number)[];
|
||||||
|
};
|
||||||
|
|
||||||
|
function parseSingleKaTeXCommand(src: string): [string, KaTeXCommand] {
|
||||||
|
const invalid: [string, KaTeXCommand] = ["", { args: 0, rule: [] }];
|
||||||
|
|
||||||
|
const skipSpaces = (pos: number): number => {
|
||||||
|
while (src[pos] === " ")
|
||||||
|
++pos;
|
||||||
|
return pos;
|
||||||
|
};
|
||||||
|
|
||||||
|
if (!src.startsWith("\\newcommand") || src.slice(-1) !== "}")
|
||||||
|
return invalid;
|
||||||
|
|
||||||
|
// current index we are checking (= "\\newcommand".length)
|
||||||
|
let currentPos: number = 11;
|
||||||
|
currentPos = skipSpaces(currentPos);
|
||||||
|
|
||||||
|
// parse {\name}, (\name), or [\name]
|
||||||
|
let bracket: string;
|
||||||
|
if (src[currentPos] === "{")
|
||||||
|
bracket = "{}";
|
||||||
|
else if (src[currentPos] === "(")
|
||||||
|
bracket = "()";
|
||||||
|
else if (src[currentPos] === "[")
|
||||||
|
bracket = "[]";
|
||||||
|
else
|
||||||
|
return invalid;
|
||||||
|
|
||||||
|
++currentPos;
|
||||||
|
currentPos = skipSpaces(currentPos);
|
||||||
|
|
||||||
|
if (src[currentPos] !== "\\")
|
||||||
|
return invalid;
|
||||||
|
|
||||||
|
const closeNameBracketPos: number = src.indexOf(bracket[1], currentPos);
|
||||||
|
if (closeNameBracketPos === -1)
|
||||||
|
return invalid;
|
||||||
|
|
||||||
|
const name: string = src.slice(currentPos + 1, closeNameBracketPos).trim();
|
||||||
|
if (!/^[a-zA-Z]+$/.test(name))
|
||||||
|
return invalid;
|
||||||
|
|
||||||
|
currentPos = skipSpaces(closeNameBracketPos + 1);
|
||||||
|
|
||||||
|
let command: KaTeXCommand = { args: 0, rule: [] };
|
||||||
|
|
||||||
|
// parse [number of arguments] (optional)
|
||||||
|
if (src[currentPos] === "[") {
|
||||||
|
const closeArgsBracketPos: number = src.indexOf("]", currentPos);
|
||||||
|
command.args = Number(src.slice(currentPos + 1, closeArgsBracketPos).trim());
|
||||||
|
currentPos = closeArgsBracketPos + 1;
|
||||||
|
|
||||||
|
if (Number.isNaN(command.args) || command.args < 0)
|
||||||
|
return invalid;
|
||||||
|
} else if (src[currentPos] === "{") {
|
||||||
|
command.args = 0;
|
||||||
|
} else {
|
||||||
|
return invalid;
|
||||||
|
}
|
||||||
|
|
||||||
|
currentPos = skipSpaces(currentPos);
|
||||||
|
|
||||||
|
// parse {rule}
|
||||||
|
if (src[currentPos] !== "{")
|
||||||
|
return invalid;
|
||||||
|
|
||||||
|
++currentPos;
|
||||||
|
currentPos = skipSpaces(currentPos);
|
||||||
|
|
||||||
|
while (currentPos < src.length - 1) {
|
||||||
|
let numbersignPos: number = -1;
|
||||||
|
let isEscaped: boolean = false;
|
||||||
|
|
||||||
|
for (let i = currentPos; i < src.length - 1; ++i) {
|
||||||
|
if (src[i] !== "\\" && src[i] !== "#") {
|
||||||
|
isEscaped = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (src[i] === "\\") {
|
||||||
|
isEscaped = !isEscaped;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (!isEscaped && src[i] === "#") {
|
||||||
|
numbersignPos = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (numbersignPos === -1) {
|
||||||
|
command.rule.push(src.slice(currentPos, -1));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
const argIndexEndPos = src.slice(numbersignPos + 1).search(/[^\d]/) + numbersignPos;
|
||||||
|
const argIndex: number = Number(src.slice(numbersignPos + 1, argIndexEndPos + 1));
|
||||||
|
|
||||||
|
if (Number.isNaN(argIndex) || argIndex < 1 || command.args < argIndex)
|
||||||
|
return invalid;
|
||||||
|
|
||||||
|
if (currentPos !== numbersignPos)
|
||||||
|
command.rule.push(src.slice(currentPos, numbersignPos));
|
||||||
|
command.rule.push(argIndex);
|
||||||
|
|
||||||
|
currentPos = argIndexEndPos + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (command.args === 0)
|
||||||
|
return [name, command];
|
||||||
|
else
|
||||||
|
return [name + bracket[0], command];
|
||||||
|
}
|
||||||
|
|
||||||
|
export function parseKaTeXCommands(src: string): string {
|
||||||
|
let result: { [name: string]: KaTeXCommand } = {};
|
||||||
|
|
||||||
|
for (const s of src.split("\n")) {
|
||||||
|
const [name, command]: [string, KaTeXCommand] = parseSingleKaTeXCommand(s.trim());
|
||||||
|
if (name !== "")
|
||||||
|
result[name] = command;
|
||||||
|
}
|
||||||
|
|
||||||
|
return JSON.stringify(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
export function expandKaTeXCommand(src: string, commandsAsJsonString: string): string {
|
||||||
|
const commands = JSON.parse(commandsAsJsonString);
|
||||||
|
|
||||||
|
const bracketKinds = 3;
|
||||||
|
const openBracketId: { [bracket: string]: number } = {"(": 0, "{": 1, "[": 2};
|
||||||
|
const closeBracketId: { [bracket: string]: number } = {")": 0, "}": 1, "]": 2};
|
||||||
|
const openBracketFromId = ["(", "{", "["];
|
||||||
|
const closeBracketFromId = [")", "}", "]"];
|
||||||
|
|
||||||
|
// mappings from open brackets to their corresponding close brackets
|
||||||
|
type BracketMapping = { [openBracketPos: number]: number };
|
||||||
|
|
||||||
|
const bracketMapping = ((): BracketMapping => {
|
||||||
|
let result: BracketMapping = {};
|
||||||
|
const n = src.length;
|
||||||
|
|
||||||
|
let depths = new Array<number>(bracketKinds).fill(0); // current bracket depth for "()", "{}", and "[]"
|
||||||
|
let buffer = Array.from(Array<number[]>(bracketKinds), () => Array<number>(n));
|
||||||
|
|
||||||
|
let isEscaped = false;
|
||||||
|
|
||||||
|
for (let i = 0; i < n; ++i) {
|
||||||
|
if (!isEscaped && src[i] === "\\" && i + 1 < n && ["{", "}", "\\"].includes(src[i+1])) {
|
||||||
|
isEscaped = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (isEscaped
|
||||||
|
|| (src[i] !== "\\"
|
||||||
|
&& !openBracketFromId.includes(src[i])
|
||||||
|
&& !closeBracketFromId.includes(src[i])))
|
||||||
|
{
|
||||||
|
isEscaped = false;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
isEscaped = false;
|
||||||
|
|
||||||
|
if (openBracketFromId.includes(src[i])) {
|
||||||
|
const id: number = openBracketId[src[i]];
|
||||||
|
buffer[id][depths[id]] = i;
|
||||||
|
++depths[id];
|
||||||
|
} else if (closeBracketFromId.includes(src[i])) {
|
||||||
|
const id: number = closeBracketId[src[i]];
|
||||||
|
--depths[id];
|
||||||
|
result[buffer[id][depths[id]]] = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
})();
|
||||||
|
|
||||||
|
function expandSingleKaTeXCommand(expandedArgs: string[], commandName: string): string {
|
||||||
|
let result = "";
|
||||||
|
for (const block of commands[commandName].rule) {
|
||||||
|
if (typeof block === "string")
|
||||||
|
result += block;
|
||||||
|
else
|
||||||
|
result += expandedArgs[block - 1];
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
let numberOfExpansions = 0;
|
||||||
|
const maxNumberOfExpansions = 200; // to prevent infinite expansion loop
|
||||||
|
|
||||||
|
// only expand src.slice(beginPos, endPos)
|
||||||
|
function expandKaTeXCommandImpl(beginPos: number, endPos: number): string {
|
||||||
|
if (endPos <= beginPos)
|
||||||
|
return "";
|
||||||
|
|
||||||
|
const raw: string = src.slice(beginPos, endPos + 1);
|
||||||
|
const fallback: string = raw; // returned for invalid inputs
|
||||||
|
|
||||||
|
if (maxNumberOfExpansions <= numberOfExpansions)
|
||||||
|
return fallback;
|
||||||
|
++numberOfExpansions;
|
||||||
|
|
||||||
|
// search for a custom command
|
||||||
|
let checkedPos = beginPos - 1;
|
||||||
|
let commandName = "";
|
||||||
|
let commandBackslashPos = 0;
|
||||||
|
|
||||||
|
// for commands w/o args: unused
|
||||||
|
// w/ args: the first open bracket ("(", "{", or "[") after cmd name
|
||||||
|
let commandArgBeginPos = 0;
|
||||||
|
|
||||||
|
// for commands w/o args: the end of cmd name
|
||||||
|
// w/ args: the closing bracket of the last arg
|
||||||
|
let commandArgEndPos = 0;
|
||||||
|
|
||||||
|
while (checkedPos < endPos) {
|
||||||
|
checkedPos = src.indexOf("\\", checkedPos + 1);
|
||||||
|
|
||||||
|
// there is no command to expand
|
||||||
|
if (checkedPos === -1)
|
||||||
|
return raw;
|
||||||
|
|
||||||
|
// is it a custom command?
|
||||||
|
let nonAlphaPos = src.slice(checkedPos + 1).search(/[^A-Za-z]/) + checkedPos + 1;
|
||||||
|
|
||||||
|
let commandNameCandidate = src.slice(checkedPos + 1, nonAlphaPos);
|
||||||
|
if (commands.hasOwnProperty(commandNameCandidate)) {
|
||||||
|
// this is a custom command without args
|
||||||
|
commandBackslashPos = checkedPos;
|
||||||
|
commandArgEndPos = nonAlphaPos - 1;
|
||||||
|
commandName = commandNameCandidate;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
let nextOpenBracketPos = endPos;
|
||||||
|
for (let i = 0; i < bracketKinds; ++i) {
|
||||||
|
const pos = src.indexOf(openBracketFromId[i], checkedPos + 1);
|
||||||
|
if (pos !== -1 && pos < nextOpenBracketPos)
|
||||||
|
nextOpenBracketPos = pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nextOpenBracketPos === endPos)
|
||||||
|
return fallback; // there is no open bracket
|
||||||
|
|
||||||
|
commandNameCandidate += src[nextOpenBracketPos];
|
||||||
|
|
||||||
|
if (commands.hasOwnProperty(commandNameCandidate)) {
|
||||||
|
commandBackslashPos = checkedPos;
|
||||||
|
commandArgBeginPos = nextOpenBracketPos;
|
||||||
|
commandArgEndPos = nextOpenBracketPos; // to search the first arg from here
|
||||||
|
commandName = commandNameCandidate;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const numArgs: number = commands[commandName].args;
|
||||||
|
const openBracket: string = commandName.slice(-1);
|
||||||
|
|
||||||
|
let expandedArgs = new Array<string>(numArgs);
|
||||||
|
|
||||||
|
for (let i = 0; i < numArgs; ++i) {
|
||||||
|
// find the first open bracket after what we've searched
|
||||||
|
const nextOpenBracketPos = src.indexOf(openBracket, commandArgEndPos);
|
||||||
|
if (nextOpenBracketPos === -1)
|
||||||
|
return fallback; // not enough arguments are provided
|
||||||
|
if (!bracketMapping[nextOpenBracketPos])
|
||||||
|
return fallback; // found open bracket doesn't correspond to any close bracket
|
||||||
|
|
||||||
|
commandArgEndPos = bracketMapping[nextOpenBracketPos];
|
||||||
|
expandedArgs[i] = expandKaTeXCommandImpl(nextOpenBracketPos + 1, commandArgEndPos);
|
||||||
|
}
|
||||||
|
|
||||||
|
return src.slice(beginPos, commandBackslashPos)
|
||||||
|
+ expandSingleKaTeXCommand(expandedArgs, commandName)
|
||||||
|
+ expandKaTeXCommandImpl(commandArgEndPos + 1, endPos);
|
||||||
|
}
|
||||||
|
|
||||||
|
return expandKaTeXCommandImpl(0, src.length);
|
||||||
|
}
|
Loading…
Reference in New Issue