mirror of
https://github.com/nodejs/node.git
synced 2025-04-29 22:40:57 +00:00

PR-URL: https://github.com/nodejs/node/pull/45736 Reviewed-By: Yagiz Nizipli <yagiz@nizipli.com> Reviewed-By: Moshe Atlow <moshe@atlow.co.il>
541 lines
12 KiB
JavaScript
541 lines
12 KiB
JavaScript
'use strict';
|
|
|
|
const {
|
|
ArrayPrototypePop,
|
|
ArrayPrototypePush,
|
|
MathMax,
|
|
SafeSet,
|
|
StringPrototypeCodePointAt,
|
|
StringPrototypeTrim,
|
|
} = primordials;
|
|
const {
|
|
codes: { ERR_TAP_LEXER_ERROR },
|
|
} = require('internal/errors');
|
|
|
|
const { isZeroWidthCodePoint } = require('internal/util/inspect');
|
|
|
|
const kEOL = '';
|
|
const kEOF = '';
|
|
|
|
const TokenKind = {
|
|
EOF: 'EOF',
|
|
EOL: 'EOL',
|
|
NEWLINE: 'NewLine',
|
|
NUMERIC: 'Numeric',
|
|
LITERAL: 'Literal',
|
|
KEYWORD: 'Keyword',
|
|
WHITESPACE: 'Whitespace',
|
|
COMMENT: 'Comment',
|
|
DASH: 'Dash',
|
|
PLUS: 'Plus',
|
|
HASH: 'Hash',
|
|
ESCAPE: 'Escape',
|
|
UNKNOWN: 'Unknown',
|
|
|
|
// TAP tokens
|
|
TAP: 'TAPKeyword',
|
|
TAP_VERSION: 'VersionKeyword',
|
|
TAP_PLAN: 'PlanKeyword',
|
|
TAP_TEST_POINT: 'TestPointKeyword',
|
|
TAP_SUBTEST_POINT: 'SubTestPointKeyword',
|
|
TAP_TEST_OK: 'TestOkKeyword',
|
|
TAP_TEST_NOTOK: 'TestNotOkKeyword',
|
|
TAP_YAML_START: 'YamlStartKeyword',
|
|
TAP_YAML_END: 'YamlEndKeyword',
|
|
TAP_YAML_BLOCK: 'YamlKeyword',
|
|
TAP_PRAGMA: 'PragmaKeyword',
|
|
TAP_BAIL_OUT: 'BailOutKeyword',
|
|
};
|
|
|
|
class Token {
|
|
constructor({ kind, value, stream }) {
|
|
const valueLength = ('' + value).length;
|
|
this.kind = kind;
|
|
this.value = value;
|
|
this.location = {
|
|
line: stream.line,
|
|
column: MathMax(stream.column - valueLength + 1, 1), // 1 based
|
|
start: MathMax(stream.pos - valueLength, 0), // zero based
|
|
end: stream.pos - (value === '' ? 0 : 1), // zero based
|
|
};
|
|
|
|
// EOF is a special case
|
|
if (value === TokenKind.EOF) {
|
|
const eofPosition = stream.input.length + 1; // We consider EOF to be outside the stream
|
|
this.location.start = eofPosition;
|
|
this.location.end = eofPosition;
|
|
this.location.column = stream.column + 1; // 1 based
|
|
}
|
|
}
|
|
}
|
|
|
|
class InputStream {
|
|
constructor(input) {
|
|
this.input = input;
|
|
this.pos = 0;
|
|
this.column = 0;
|
|
this.line = 1;
|
|
}
|
|
|
|
eof() {
|
|
return this.peek() === undefined;
|
|
}
|
|
|
|
peek(offset = 0) {
|
|
return this.input[this.pos + offset];
|
|
}
|
|
|
|
next() {
|
|
const char = this.peek();
|
|
if (char === undefined) {
|
|
return undefined;
|
|
}
|
|
|
|
this.pos++;
|
|
this.column++;
|
|
if (char === '\n') {
|
|
this.line++;
|
|
this.column = 0;
|
|
}
|
|
|
|
return char;
|
|
}
|
|
}
|
|
|
|
class TapLexer {
|
|
static Keywords = new SafeSet([
|
|
'TAP',
|
|
'version',
|
|
'ok',
|
|
'not',
|
|
'...',
|
|
'---',
|
|
'..',
|
|
'pragma',
|
|
'-',
|
|
'+',
|
|
|
|
// NOTE: "Skip", "Todo" and "Bail out!" literals are deferred to the parser
|
|
]);
|
|
|
|
#isComment = false;
|
|
#source = null;
|
|
#line = 1;
|
|
#column = 0;
|
|
#escapeStack = [];
|
|
#lastScannedToken = null;
|
|
|
|
constructor(source) {
|
|
this.#source = new InputStream(source);
|
|
this.#lastScannedToken = new Token({
|
|
kind: TokenKind.EOL,
|
|
value: kEOL,
|
|
stream: this.#source,
|
|
});
|
|
}
|
|
|
|
scan() {
|
|
const tokens = [];
|
|
let chunk = [];
|
|
while (!this.eof()) {
|
|
const token = this.#scanToken();
|
|
|
|
// Remember the last scanned token (except for whitespace)
|
|
if (token.kind !== TokenKind.WHITESPACE) {
|
|
this.#lastScannedToken = token;
|
|
}
|
|
|
|
ArrayPrototypePush(chunk, token);
|
|
if (token.kind === TokenKind.NEWLINE) {
|
|
// Store the current chunk + NEWLINE token
|
|
ArrayPrototypePush(tokens, chunk);
|
|
chunk = [];
|
|
}
|
|
}
|
|
|
|
if (chunk.length > 0) {
|
|
ArrayPrototypePush(chunk, this.#scanEOL());
|
|
ArrayPrototypePush(tokens, chunk);
|
|
}
|
|
|
|
// send EOF as a separate chunk
|
|
ArrayPrototypePush(tokens, [this.#scanEOF()]);
|
|
|
|
return tokens;
|
|
}
|
|
|
|
next() {
|
|
return this.#source.next();
|
|
}
|
|
|
|
eof() {
|
|
return this.#source.eof();
|
|
}
|
|
|
|
error(message, token, expected = '') {
|
|
this.#source.error(message, token, expected);
|
|
}
|
|
|
|
#scanToken() {
|
|
const char = this.next();
|
|
|
|
if (this.#isEOFSymbol(char)) {
|
|
return this.#scanEOF();
|
|
} else if (this.#isNewLineSymbol(char)) {
|
|
return this.#scanNewLine(char);
|
|
} else if (this.#isNumericSymbol(char)) {
|
|
return this.#scanNumeric(char);
|
|
} else if (this.#isDashSymbol(char)) {
|
|
return this.#scanDash(char);
|
|
} else if (this.#isPlusSymbol(char)) {
|
|
return this.#scanPlus(char);
|
|
} else if (this.#isHashSymbol(char)) {
|
|
return this.#scanHash(char);
|
|
} else if (this.#isEscapeSymbol(char)) {
|
|
return this.#scanEscapeSymbol(char);
|
|
} else if (this.#isWhitespaceSymbol(char)) {
|
|
return this.#scanWhitespace(char);
|
|
} else if (this.#isLiteralSymbol(char)) {
|
|
return this.#scanLiteral(char);
|
|
}
|
|
|
|
throw new ERR_TAP_LEXER_ERROR(
|
|
`Unexpected character: ${char} at line ${this.#line}, column ${
|
|
this.#column
|
|
}`,
|
|
);
|
|
}
|
|
|
|
#scanNewLine(char) {
|
|
// In case of odd number of ESCAPE symbols, we need to clear the remaining
|
|
// escape chars from the stack and start fresh for the next line.
|
|
this.#escapeStack = [];
|
|
|
|
// We also need to reset the comment flag
|
|
this.#isComment = false;
|
|
|
|
return new Token({
|
|
kind: TokenKind.NEWLINE,
|
|
value: char,
|
|
stream: this.#source,
|
|
});
|
|
}
|
|
|
|
#scanEOL() {
|
|
return new Token({
|
|
kind: TokenKind.EOL,
|
|
value: kEOL,
|
|
stream: this.#source,
|
|
});
|
|
}
|
|
|
|
#scanEOF() {
|
|
this.#isComment = false;
|
|
|
|
return new Token({
|
|
kind: TokenKind.EOF,
|
|
value: kEOF,
|
|
stream: this.#source,
|
|
});
|
|
}
|
|
|
|
#scanEscapeSymbol(char) {
|
|
// If the escape symbol has been escaped (by previous symbol),
|
|
// or if the next symbol is a whitespace symbol,
|
|
// then consume it as a literal.
|
|
if (
|
|
this.#hasTheCurrentCharacterBeenEscaped() ||
|
|
this.#source.peek(1) === TokenKind.WHITESPACE
|
|
) {
|
|
ArrayPrototypePop(this.#escapeStack);
|
|
return new Token({
|
|
kind: TokenKind.LITERAL,
|
|
value: char,
|
|
stream: this.#source,
|
|
});
|
|
}
|
|
|
|
// Otherwise, consume the escape symbol as an escape symbol that should be ignored by the parser
|
|
// we also need to push the escape symbol to the escape stack
|
|
// and consume the next character as a literal (done in the next turn)
|
|
ArrayPrototypePush(this.#escapeStack, char);
|
|
return new Token({
|
|
kind: TokenKind.ESCAPE,
|
|
value: char,
|
|
stream: this.#source,
|
|
});
|
|
}
|
|
|
|
#scanWhitespace(char) {
|
|
return new Token({
|
|
kind: TokenKind.WHITESPACE,
|
|
value: char,
|
|
stream: this.#source,
|
|
});
|
|
}
|
|
|
|
#scanDash(char) {
|
|
// Peek next 3 characters and check if it's a YAML start marker
|
|
const marker = char + this.#source.peek() + this.#source.peek(1);
|
|
|
|
if (this.#isYamlStartSymbol(marker)) {
|
|
this.next(); // consume second -
|
|
this.next(); // consume third -
|
|
|
|
return new Token({
|
|
kind: TokenKind.TAP_YAML_START,
|
|
value: marker,
|
|
stream: this.#source,
|
|
});
|
|
}
|
|
|
|
return new Token({
|
|
kind: TokenKind.DASH,
|
|
value: char,
|
|
stream: this.#source,
|
|
});
|
|
}
|
|
|
|
#scanPlus(char) {
|
|
return new Token({
|
|
kind: TokenKind.PLUS,
|
|
value: char,
|
|
stream: this.#source,
|
|
});
|
|
}
|
|
|
|
#scanHash(char) {
|
|
const lastCharacter = this.#source.peek(-2);
|
|
const nextToken = this.#source.peek();
|
|
|
|
// If we encounter a hash symbol at the beginning of a line,
|
|
// we consider it as a comment
|
|
if (!lastCharacter || this.#isNewLineSymbol(lastCharacter)) {
|
|
this.#isComment = true;
|
|
return new Token({
|
|
kind: TokenKind.COMMENT,
|
|
value: char,
|
|
stream: this.#source,
|
|
});
|
|
}
|
|
|
|
// The only valid case where a hash symbol is considered as a hash token
|
|
// is when it's preceded by a whitespace symbol and followed by a non-hash symbol
|
|
if (
|
|
this.#isWhitespaceSymbol(lastCharacter) &&
|
|
!this.#isHashSymbol(nextToken)
|
|
) {
|
|
return new Token({
|
|
kind: TokenKind.HASH,
|
|
value: char,
|
|
stream: this.#source,
|
|
});
|
|
}
|
|
|
|
const charHasBeenEscaped = this.#hasTheCurrentCharacterBeenEscaped();
|
|
if (this.#isComment || charHasBeenEscaped) {
|
|
if (charHasBeenEscaped) {
|
|
ArrayPrototypePop(this.#escapeStack);
|
|
}
|
|
|
|
return new Token({
|
|
kind: TokenKind.LITERAL,
|
|
value: char,
|
|
stream: this.#source,
|
|
});
|
|
}
|
|
|
|
// As a fallback, we consume the hash symbol as a literal
|
|
return new Token({
|
|
kind: TokenKind.LITERAL,
|
|
value: char,
|
|
stream: this.#source,
|
|
});
|
|
}
|
|
|
|
#scanLiteral(char) {
|
|
let word = char;
|
|
while (!this.#source.eof()) {
|
|
const nextChar = this.#source.peek();
|
|
if (this.#isLiteralSymbol(nextChar)) {
|
|
word += this.#source.next();
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
|
|
word = StringPrototypeTrim(word);
|
|
|
|
if (TapLexer.Keywords.has(word)) {
|
|
const token = this.#scanTAPKeyword(word);
|
|
if (token) {
|
|
return token;
|
|
}
|
|
}
|
|
|
|
if (this.#isYamlEndSymbol(word)) {
|
|
return new Token({
|
|
kind: TokenKind.TAP_YAML_END,
|
|
value: word,
|
|
stream: this.#source,
|
|
});
|
|
}
|
|
|
|
return new Token({
|
|
kind: TokenKind.LITERAL,
|
|
value: word,
|
|
stream: this.#source,
|
|
});
|
|
}
|
|
|
|
#scanTAPKeyword(word) {
|
|
const isLastScannedTokenEOLorNewLine =
|
|
TokenKind.EOL === this.#lastScannedToken.kind ||
|
|
TokenKind.NEWLINE === this.#lastScannedToken.kind;
|
|
|
|
if (word === 'TAP' && isLastScannedTokenEOLorNewLine) {
|
|
return new Token({
|
|
kind: TokenKind.TAP,
|
|
value: word,
|
|
stream: this.#source,
|
|
});
|
|
}
|
|
|
|
if (word === 'version' && this.#lastScannedToken.kind === TokenKind.TAP) {
|
|
return new Token({
|
|
kind: TokenKind.TAP_VERSION,
|
|
value: word,
|
|
stream: this.#source,
|
|
});
|
|
}
|
|
|
|
if (word === '..' && this.#lastScannedToken.kind === TokenKind.NUMERIC) {
|
|
return new Token({
|
|
kind: TokenKind.TAP_PLAN,
|
|
value: word,
|
|
stream: this.#source,
|
|
});
|
|
}
|
|
|
|
if (word === 'not' && isLastScannedTokenEOLorNewLine) {
|
|
return new Token({
|
|
kind: TokenKind.TAP_TEST_NOTOK,
|
|
value: word,
|
|
stream: this.#source,
|
|
});
|
|
}
|
|
|
|
if (
|
|
word === 'ok' &&
|
|
(this.#lastScannedToken.kind === TokenKind.TAP_TEST_NOTOK ||
|
|
isLastScannedTokenEOLorNewLine)
|
|
) {
|
|
return new Token({
|
|
kind: TokenKind.TAP_TEST_OK,
|
|
value: word,
|
|
stream: this.#source,
|
|
});
|
|
}
|
|
|
|
if (word === 'pragma' && isLastScannedTokenEOLorNewLine) {
|
|
return new Token({
|
|
kind: TokenKind.TAP_PRAGMA,
|
|
value: word,
|
|
stream: this.#source,
|
|
});
|
|
}
|
|
|
|
return null;
|
|
}
|
|
|
|
#scanNumeric(char) {
|
|
let number = char;
|
|
while (!this.#source.eof()) {
|
|
const nextChar = this.#source.peek();
|
|
if (this.#isNumericSymbol(nextChar)) {
|
|
number += nextChar;
|
|
this.#source.next();
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
return new Token({
|
|
kind: TokenKind.NUMERIC,
|
|
value: number,
|
|
stream: this.#source,
|
|
});
|
|
}
|
|
|
|
#hasTheCurrentCharacterBeenEscaped() {
|
|
// Use the escapeStack to keep track of the escape characters
|
|
return this.#escapeStack.length > 0;
|
|
}
|
|
|
|
#isNumericSymbol(char) {
|
|
return char >= '0' && char <= '9';
|
|
}
|
|
|
|
#isLiteralSymbol(char) {
|
|
if (typeof char !== 'string') return false;
|
|
const charCode = StringPrototypeCodePointAt(char);
|
|
|
|
if (isZeroWidthCodePoint(charCode)) return false;
|
|
if (this.#isWhitespaceSymbol(char)) return false;
|
|
const MAX_ASCII_CHAR_CODE = 0b111_1111; // ASCII is 7-bit long
|
|
// Allow all non-latin characters.
|
|
if (charCode > MAX_ASCII_CHAR_CODE) return true;
|
|
const ZERO = 48; // 0
|
|
const NINE = 58; // 9
|
|
// Disallow numeric values
|
|
if (charCode >= ZERO && char <= NINE) return false;
|
|
|
|
// Disallow characters with special meaning in TAP
|
|
const HASH = 35; // #
|
|
const BACKSLASH = 92; // \
|
|
const PLUS = 43; // +
|
|
const DASH = 45; // -
|
|
|
|
// Disallow characters with special meaning in TAP
|
|
return charCode !== HASH && charCode !== BACKSLASH &&
|
|
charCode !== PLUS && charCode !== DASH;
|
|
}
|
|
|
|
#isWhitespaceSymbol(char) {
|
|
return char === ' ' || char === '\t';
|
|
}
|
|
|
|
#isEOFSymbol(char) {
|
|
return char === undefined;
|
|
}
|
|
|
|
#isNewLineSymbol(char) {
|
|
return char === '\n' || char === '\r';
|
|
}
|
|
|
|
#isHashSymbol(char) {
|
|
return char === '#';
|
|
}
|
|
|
|
#isDashSymbol(char) {
|
|
return char === '-';
|
|
}
|
|
|
|
#isPlusSymbol(char) {
|
|
return char === '+';
|
|
}
|
|
|
|
#isEscapeSymbol(char) {
|
|
return char === '\\';
|
|
}
|
|
|
|
#isYamlStartSymbol(char) {
|
|
return char === '---';
|
|
}
|
|
|
|
#isYamlEndSymbol(char) {
|
|
return char === '...';
|
|
}
|
|
}
|
|
|
|
module.exports = { TapLexer, TokenKind };
|