241 lines
6.5 KiB
JavaScript
241 lines
6.5 KiB
JavaScript
/**
|
|
* @fileoverview Utility functions to locate the source text of each code unit in the value of a string literal or template token.
|
|
* @author Francesco Trotta
|
|
*/
|
|
|
|
"use strict";
|
|
|
|
/**
|
|
* Represents a code unit produced by the evaluation of a JavaScript common token like a string
|
|
* literal or template token.
|
|
*/
|
|
class CodeUnit {
|
|
constructor(start, source) {
|
|
this.start = start;
|
|
this.source = source;
|
|
}
|
|
|
|
get end() {
|
|
return this.start + this.length;
|
|
}
|
|
|
|
get length() {
|
|
return this.source.length;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* An object used to keep track of the position in a source text where the next characters will be read.
|
|
*/
|
|
class TextReader {
|
|
constructor(source) {
|
|
this.source = source;
|
|
this.pos = 0;
|
|
}
|
|
|
|
/**
|
|
* Advances the reading position of the specified number of characters.
|
|
* @param {number} length Number of characters to advance.
|
|
* @returns {void}
|
|
*/
|
|
advance(length) {
|
|
this.pos += length;
|
|
}
|
|
|
|
/**
|
|
* Reads characters from the source.
|
|
* @param {number} [offset=0] The offset where reading starts, relative to the current position.
|
|
* @param {number} [length=1] Number of characters to read.
|
|
* @returns {string} A substring of source characters.
|
|
*/
|
|
read(offset = 0, length = 1) {
|
|
const start = offset + this.pos;
|
|
|
|
return this.source.slice(start, start + length);
|
|
}
|
|
}
|
|
|
|
const SIMPLE_ESCAPE_SEQUENCES =
|
|
{ __proto__: null, b: "\b", f: "\f", n: "\n", r: "\r", t: "\t", v: "\v" };
|
|
|
|
/**
|
|
* Reads a hex escape sequence.
|
|
* @param {TextReader} reader The reader should be positioned on the first hexadecimal digit.
|
|
* @param {number} length The number of hexadecimal digits.
|
|
* @returns {string} A code unit.
|
|
*/
|
|
function readHexSequence(reader, length) {
|
|
const str = reader.read(0, length);
|
|
const charCode = parseInt(str, 16);
|
|
|
|
reader.advance(length);
|
|
return String.fromCharCode(charCode);
|
|
}
|
|
|
|
/**
|
|
* Reads a Unicode escape sequence.
|
|
* @param {TextReader} reader The reader should be positioned after the "u".
|
|
* @returns {string} A code unit.
|
|
*/
|
|
function readUnicodeSequence(reader) {
|
|
const regExp = /\{(?<hexDigits>[\dA-Fa-f]+)\}/uy;
|
|
|
|
regExp.lastIndex = reader.pos;
|
|
const match = regExp.exec(reader.source);
|
|
|
|
if (match) {
|
|
const codePoint = parseInt(match.groups.hexDigits, 16);
|
|
|
|
reader.pos = regExp.lastIndex;
|
|
return String.fromCodePoint(codePoint);
|
|
}
|
|
return readHexSequence(reader, 4);
|
|
}
|
|
|
|
/**
|
|
* Reads an octal escape sequence.
|
|
* @param {TextReader} reader The reader should be positioned after the first octal digit.
|
|
* @param {number} maxLength The maximum number of octal digits.
|
|
* @returns {string} A code unit.
|
|
*/
|
|
function readOctalSequence(reader, maxLength) {
|
|
const [octalStr] = reader.read(-1, maxLength).match(/^[0-7]+/u);
|
|
|
|
reader.advance(octalStr.length - 1);
|
|
const octal = parseInt(octalStr, 8);
|
|
|
|
return String.fromCharCode(octal);
|
|
}
|
|
|
|
/**
|
|
* Reads an escape sequence or line continuation.
|
|
* @param {TextReader} reader The reader should be positioned on the backslash.
|
|
* @returns {string} A string of zero, one or two code units.
|
|
*/
|
|
function readEscapeSequenceOrLineContinuation(reader) {
|
|
const char = reader.read(1);
|
|
|
|
reader.advance(2);
|
|
const unitChar = SIMPLE_ESCAPE_SEQUENCES[char];
|
|
|
|
if (unitChar) {
|
|
return unitChar;
|
|
}
|
|
switch (char) {
|
|
case "x":
|
|
return readHexSequence(reader, 2);
|
|
case "u":
|
|
return readUnicodeSequence(reader);
|
|
case "\r":
|
|
if (reader.read() === "\n") {
|
|
reader.advance(1);
|
|
}
|
|
|
|
// fallthrough
|
|
case "\n":
|
|
case "\u2028":
|
|
case "\u2029":
|
|
return "";
|
|
case "0":
|
|
case "1":
|
|
case "2":
|
|
case "3":
|
|
return readOctalSequence(reader, 3);
|
|
case "4":
|
|
case "5":
|
|
case "6":
|
|
case "7":
|
|
return readOctalSequence(reader, 2);
|
|
default:
|
|
return char;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Reads an escape sequence or line continuation and generates the respective `CodeUnit` elements.
|
|
* @param {TextReader} reader The reader should be positioned on the backslash.
|
|
* @returns {Generator<CodeUnit>} Zero, one or two `CodeUnit` elements.
|
|
*/
|
|
function *mapEscapeSequenceOrLineContinuation(reader) {
|
|
const start = reader.pos;
|
|
const str = readEscapeSequenceOrLineContinuation(reader);
|
|
const end = reader.pos;
|
|
const source = reader.source.slice(start, end);
|
|
|
|
switch (str.length) {
|
|
case 0:
|
|
break;
|
|
case 1:
|
|
yield new CodeUnit(start, source);
|
|
break;
|
|
default:
|
|
yield new CodeUnit(start, source);
|
|
yield new CodeUnit(start, source);
|
|
break;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Parses a string literal.
|
|
* @param {string} source The string literal to parse, including the delimiting quotes.
|
|
* @returns {CodeUnit[]} A list of code units produced by the string literal.
|
|
*/
|
|
function parseStringLiteral(source) {
|
|
const reader = new TextReader(source);
|
|
const quote = reader.read();
|
|
|
|
reader.advance(1);
|
|
const codeUnits = [];
|
|
|
|
for (;;) {
|
|
const char = reader.read();
|
|
|
|
if (char === quote) {
|
|
break;
|
|
}
|
|
if (char === "\\") {
|
|
codeUnits.push(...mapEscapeSequenceOrLineContinuation(reader));
|
|
} else {
|
|
codeUnits.push(new CodeUnit(reader.pos, char));
|
|
reader.advance(1);
|
|
}
|
|
}
|
|
return codeUnits;
|
|
}
|
|
|
|
/**
|
|
* Parses a template token.
|
|
* @param {string} source The template token to parse, including the delimiting sequences `` ` ``, `${` and `}`.
|
|
* @returns {CodeUnit[]} A list of code units produced by the template token.
|
|
*/
|
|
function parseTemplateToken(source) {
|
|
const reader = new TextReader(source);
|
|
|
|
reader.advance(1);
|
|
const codeUnits = [];
|
|
|
|
for (;;) {
|
|
const char = reader.read();
|
|
|
|
if (char === "`" || char === "$" && reader.read(1) === "{") {
|
|
break;
|
|
}
|
|
if (char === "\\") {
|
|
codeUnits.push(...mapEscapeSequenceOrLineContinuation(reader));
|
|
} else {
|
|
let unitSource;
|
|
|
|
if (char === "\r" && reader.read(1) === "\n") {
|
|
unitSource = "\r\n";
|
|
} else {
|
|
unitSource = char;
|
|
}
|
|
codeUnits.push(new CodeUnit(reader.pos, unitSource));
|
|
reader.advance(unitSource.length);
|
|
}
|
|
}
|
|
return codeUnits;
|
|
}
|
|
|
|
module.exports = { parseStringLiteral, parseTemplateToken };
|