node/lib/internal/readline/utils.js
Ruben Bridgewater 5baae143c7 readline: improve unicode support and tab completion
1. This reduces the number of write operations used during tab
   completion.
2. The tab completion calculated the string width using the length
   of the string instead of using the actual width. That is fixed.
3. The key decoder is now capable of handling characters composed
   out of two code points. That reduces the number of "keypress"
   events that are emitted which again lowers the amount of writes
   triggered.

PR-URL: https://github.com/nodejs/node/pull/31288
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Rich Trott <rtrott@gmail.com>
2020-01-11 14:06:33 -08:00

489 lines
15 KiB
JavaScript

'use strict';
const {
RegExp,
Symbol,
} = primordials;
// Regex used for ansi escape code splitting
// Adopted from https://github.com/chalk/ansi-regex/blob/master/index.js
// License: MIT, authors: @sindresorhus, Qix-, arjunmehta and LitoMore
// Matches all ansi escape code sequences in a string
const ansiPattern = '[\\u001B\\u009B][[\\]()#;?]*' +
'(?:(?:(?:[a-zA-Z\\d]*(?:;[-a-zA-Z\\d\\/#&.:=?%@~_]*)*)?\\u0007)' +
'|(?:(?:\\d{1,4}(?:;\\d{0,4})*)?[\\dA-PR-TZcf-ntqry=><~]))';
const ansi = new RegExp(ansiPattern, 'g');
const kUTF16SurrogateThreshold = 0x10000; // 2 ** 16
const kEscape = '\x1b';
const kSubstringSearch = Symbol('kSubstringSearch');
let getStringWidth;
function CSI(strings, ...args) {
let ret = `${kEscape}[`;
for (let n = 0; n < strings.length; n++) {
ret += strings[n];
if (n < args.length)
ret += args[n];
}
return ret;
}
CSI.kEscape = kEscape;
CSI.kClearToLineBeginning = CSI`1K`;
CSI.kClearToLineEnd = CSI`0K`;
CSI.kClearLine = CSI`2K`;
CSI.kClearScreenDown = CSI`0J`;
// TODO(BridgeAR): Treat combined characters as single character, i.e,
// 'a\u0301' and '\u0301a' (both have the same visual output).
// Check Canonical_Combining_Class in
// http://userguide.icu-project.org/strings/properties
function charLengthLeft(str, i) {
if (i <= 0)
return 0;
if ((i > 1 && str.codePointAt(i - 2) >= kUTF16SurrogateThreshold) ||
str.codePointAt(i - 1) >= kUTF16SurrogateThreshold) {
return 2;
}
return 1;
}
function charLengthAt(str, i) {
if (str.length <= i) {
// Pretend to move to the right. This is necessary to autocomplete while
// moving to the right.
return 1;
}
return str.codePointAt(i) >= kUTF16SurrogateThreshold ? 2 : 1;
}
if (internalBinding('config').hasIntl) {
const icu = internalBinding('icu');
// icu.getStringWidth(string, ambiguousAsFullWidth, expandEmojiSequence)
// Defaults: ambiguousAsFullWidth = false; expandEmojiSequence = true;
// TODO(BridgeAR): Expose the options to the user. That is probably the
// best thing possible at the moment, since it's difficult to know what
// the receiving end supports.
getStringWidth = function getStringWidth(str) {
let width = 0;
str = stripVTControlCharacters(str);
for (let i = 0; i < str.length; i++) {
// Try to avoid calling into C++ by first handling the ASCII portion of
// the string. If it is fully ASCII, we skip the C++ part.
const code = str.charCodeAt(i);
if (code >= 127) {
width += icu.getStringWidth(str.slice(i));
break;
}
width += code >= 32 ? 1 : 0;
}
return width;
};
} else {
/**
* Returns the number of columns required to display the given string.
*/
getStringWidth = function getStringWidth(str) {
let width = 0;
str = stripVTControlCharacters(str);
for (const char of str) {
const code = char.codePointAt(0);
if (isFullWidthCodePoint(code)) {
width += 2;
} else if (!isZeroWidthCodePoint(code)) {
width++;
}
}
return width;
};
/**
* Returns true if the character represented by a given
* Unicode code point is full-width. Otherwise returns false.
*/
const isFullWidthCodePoint = (code) => {
// Code points are partially derived from:
// http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt
return code >= 0x1100 && (
code <= 0x115f || // Hangul Jamo
code === 0x2329 || // LEFT-POINTING ANGLE BRACKET
code === 0x232a || // RIGHT-POINTING ANGLE BRACKET
// CJK Radicals Supplement .. Enclosed CJK Letters and Months
(code >= 0x2e80 && code <= 0x3247 && code !== 0x303f) ||
// Enclosed CJK Letters and Months .. CJK Unified Ideographs Extension A
(code >= 0x3250 && code <= 0x4dbf) ||
// CJK Unified Ideographs .. Yi Radicals
(code >= 0x4e00 && code <= 0xa4c6) ||
// Hangul Jamo Extended-A
(code >= 0xa960 && code <= 0xa97c) ||
// Hangul Syllables
(code >= 0xac00 && code <= 0xd7a3) ||
// CJK Compatibility Ideographs
(code >= 0xf900 && code <= 0xfaff) ||
// Vertical Forms
(code >= 0xfe10 && code <= 0xfe19) ||
// CJK Compatibility Forms .. Small Form Variants
(code >= 0xfe30 && code <= 0xfe6b) ||
// Halfwidth and Fullwidth Forms
(code >= 0xff01 && code <= 0xff60) ||
(code >= 0xffe0 && code <= 0xffe6) ||
// Kana Supplement
(code >= 0x1b000 && code <= 0x1b001) ||
// Enclosed Ideographic Supplement
(code >= 0x1f200 && code <= 0x1f251) ||
// Miscellaneous Symbols and Pictographs 0x1f300 - 0x1f5ff
// Emoticons 0x1f600 - 0x1f64f
(code >= 0x1f300 && code <= 0x1f64f) ||
// CJK Unified Ideographs Extension B .. Tertiary Ideographic Plane
(code >= 0x20000 && code <= 0x3fffd)
);
};
const isZeroWidthCodePoint = (code) => {
return code <= 0x1F || // C0 control codes
(code > 0x7F && code <= 0x9F) || // C1 control codes
(code >= 0x0300 && code <= 0x036F) || // Combining Diacritical Marks
(code >= 0x200B && code <= 0x200F) || // Modifying Invisible Characters
(code >= 0xFE00 && code <= 0xFE0F) || // Variation Selectors
(code >= 0xFE20 && code <= 0xFE2F) || // Combining Half Marks
(code >= 0xE0100 && code <= 0xE01EF); // Variation Selectors
};
}
/**
* Tries to remove all VT control characters. Use to estimate displayed
* string width. May be buggy due to not running a real state machine
*/
function stripVTControlCharacters(str) {
return str.replace(ansi, '');
}
/*
Some patterns seen in terminal key escape codes, derived from combos seen
at http://www.midnight-commander.org/browser/lib/tty/key.c
ESC letter
ESC [ letter
ESC [ modifier letter
ESC [ 1 ; modifier letter
ESC [ num char
ESC [ num ; modifier char
ESC O letter
ESC O modifier letter
ESC O 1 ; modifier letter
ESC N letter
ESC [ [ num ; modifier char
ESC [ [ 1 ; modifier letter
ESC ESC [ num char
ESC ESC O letter
- char is usually ~ but $ and ^ also happen with rxvt
- modifier is 1 +
(shift * 1) +
(left_alt * 2) +
(ctrl * 4) +
(right_alt * 8)
- two leading ESCs apparently mean the same as one leading ESC
*/
function* emitKeys(stream) {
while (true) {
let ch = yield;
let s = ch;
let escaped = false;
const key = {
sequence: null,
name: undefined,
ctrl: false,
meta: false,
shift: false
};
if (ch === kEscape) {
escaped = true;
s += (ch = yield);
if (ch === kEscape) {
s += (ch = yield);
}
}
if (escaped && (ch === 'O' || ch === '[')) {
// ANSI escape sequence
let code = ch;
let modifier = 0;
if (ch === 'O') {
// ESC O letter
// ESC O modifier letter
s += (ch = yield);
if (ch >= '0' && ch <= '9') {
modifier = (ch >> 0) - 1;
s += (ch = yield);
}
code += ch;
} else if (ch === '[') {
// ESC [ letter
// ESC [ modifier letter
// ESC [ [ modifier letter
// ESC [ [ num char
s += (ch = yield);
if (ch === '[') {
// \x1b[[A
// ^--- escape codes might have a second bracket
code += ch;
s += (ch = yield);
}
/*
* Here and later we try to buffer just enough data to get
* a complete ascii sequence.
*
* We have basically two classes of ascii characters to process:
*
*
* 1. `\x1b[24;5~` should be parsed as { code: '[24~', modifier: 5 }
*
* This particular example is featuring Ctrl+F12 in xterm.
*
* - `;5` part is optional, e.g. it could be `\x1b[24~`
* - first part can contain one or two digits
*
* So the generic regexp is like /^\d\d?(;\d)?[~^$]$/
*
*
* 2. `\x1b[1;5H` should be parsed as { code: '[H', modifier: 5 }
*
* This particular example is featuring Ctrl+Home in xterm.
*
* - `1;5` part is optional, e.g. it could be `\x1b[H`
* - `1;` part is optional, e.g. it could be `\x1b[5H`
*
* So the generic regexp is like /^((\d;)?\d)?[A-Za-z]$/
*
*/
const cmdStart = s.length - 1;
// Skip one or two leading digits
if (ch >= '0' && ch <= '9') {
s += (ch = yield);
if (ch >= '0' && ch <= '9') {
s += (ch = yield);
}
}
// skip modifier
if (ch === ';') {
s += (ch = yield);
if (ch >= '0' && ch <= '9') {
s += yield;
}
}
/*
* We buffered enough data, now trying to extract code
* and modifier from it
*/
const cmd = s.slice(cmdStart);
let match;
if ((match = cmd.match(/^(\d\d?)(;(\d))?([~^$])$/))) {
code += match[1] + match[4];
modifier = (match[3] || 1) - 1;
} else if ((match = cmd.match(/^((\d;)?(\d))?([A-Za-z])$/))) {
code += match[4];
modifier = (match[3] || 1) - 1;
} else {
code += cmd;
}
}
// Parse the key modifier
key.ctrl = !!(modifier & 4);
key.meta = !!(modifier & 10);
key.shift = !!(modifier & 1);
key.code = code;
// Parse the key itself
switch (code) {
/* xterm/gnome ESC O letter */
case 'OP': key.name = 'f1'; break;
case 'OQ': key.name = 'f2'; break;
case 'OR': key.name = 'f3'; break;
case 'OS': key.name = 'f4'; break;
/* xterm/rxvt ESC [ number ~ */
case '[11~': key.name = 'f1'; break;
case '[12~': key.name = 'f2'; break;
case '[13~': key.name = 'f3'; break;
case '[14~': key.name = 'f4'; break;
/* from Cygwin and used in libuv */
case '[[A': key.name = 'f1'; break;
case '[[B': key.name = 'f2'; break;
case '[[C': key.name = 'f3'; break;
case '[[D': key.name = 'f4'; break;
case '[[E': key.name = 'f5'; break;
/* common */
case '[15~': key.name = 'f5'; break;
case '[17~': key.name = 'f6'; break;
case '[18~': key.name = 'f7'; break;
case '[19~': key.name = 'f8'; break;
case '[20~': key.name = 'f9'; break;
case '[21~': key.name = 'f10'; break;
case '[23~': key.name = 'f11'; break;
case '[24~': key.name = 'f12'; break;
/* xterm ESC [ letter */
case '[A': key.name = 'up'; break;
case '[B': key.name = 'down'; break;
case '[C': key.name = 'right'; break;
case '[D': key.name = 'left'; break;
case '[E': key.name = 'clear'; break;
case '[F': key.name = 'end'; break;
case '[H': key.name = 'home'; break;
/* xterm/gnome ESC O letter */
case 'OA': key.name = 'up'; break;
case 'OB': key.name = 'down'; break;
case 'OC': key.name = 'right'; break;
case 'OD': key.name = 'left'; break;
case 'OE': key.name = 'clear'; break;
case 'OF': key.name = 'end'; break;
case 'OH': key.name = 'home'; break;
/* xterm/rxvt ESC [ number ~ */
case '[1~': key.name = 'home'; break;
case '[2~': key.name = 'insert'; break;
case '[3~': key.name = 'delete'; break;
case '[4~': key.name = 'end'; break;
case '[5~': key.name = 'pageup'; break;
case '[6~': key.name = 'pagedown'; break;
/* putty */
case '[[5~': key.name = 'pageup'; break;
case '[[6~': key.name = 'pagedown'; break;
/* rxvt */
case '[7~': key.name = 'home'; break;
case '[8~': key.name = 'end'; break;
/* rxvt keys with modifiers */
case '[a': key.name = 'up'; key.shift = true; break;
case '[b': key.name = 'down'; key.shift = true; break;
case '[c': key.name = 'right'; key.shift = true; break;
case '[d': key.name = 'left'; key.shift = true; break;
case '[e': key.name = 'clear'; key.shift = true; break;
case '[2$': key.name = 'insert'; key.shift = true; break;
case '[3$': key.name = 'delete'; key.shift = true; break;
case '[5$': key.name = 'pageup'; key.shift = true; break;
case '[6$': key.name = 'pagedown'; key.shift = true; break;
case '[7$': key.name = 'home'; key.shift = true; break;
case '[8$': key.name = 'end'; key.shift = true; break;
case 'Oa': key.name = 'up'; key.ctrl = true; break;
case 'Ob': key.name = 'down'; key.ctrl = true; break;
case 'Oc': key.name = 'right'; key.ctrl = true; break;
case 'Od': key.name = 'left'; key.ctrl = true; break;
case 'Oe': key.name = 'clear'; key.ctrl = true; break;
case '[2^': key.name = 'insert'; key.ctrl = true; break;
case '[3^': key.name = 'delete'; key.ctrl = true; break;
case '[5^': key.name = 'pageup'; key.ctrl = true; break;
case '[6^': key.name = 'pagedown'; key.ctrl = true; break;
case '[7^': key.name = 'home'; key.ctrl = true; break;
case '[8^': key.name = 'end'; key.ctrl = true; break;
/* misc. */
case '[Z': key.name = 'tab'; key.shift = true; break;
default: key.name = 'undefined'; break;
}
} else if (ch === '\r') {
// carriage return
key.name = 'return';
} else if (ch === '\n') {
// Enter, should have been called linefeed
key.name = 'enter';
} else if (ch === '\t') {
// tab
key.name = 'tab';
} else if (ch === '\b' || ch === '\x7f') {
// backspace or ctrl+h
key.name = 'backspace';
key.meta = escaped;
} else if (ch === kEscape) {
// escape key
key.name = 'escape';
key.meta = escaped;
} else if (ch === ' ') {
key.name = 'space';
key.meta = escaped;
} else if (!escaped && ch <= '\x1a') {
// ctrl+letter
key.name = String.fromCharCode(ch.charCodeAt(0) + 'a'.charCodeAt(0) - 1);
key.ctrl = true;
} else if (/^[0-9A-Za-z]$/.test(ch)) {
// Letter, number, shift+letter
key.name = ch.toLowerCase();
key.shift = /^[A-Z]$/.test(ch);
key.meta = escaped;
} else if (escaped) {
// Escape sequence timeout
key.name = ch.length ? undefined : 'escape';
key.meta = true;
}
key.sequence = s;
if (s.length !== 0 && (key.name !== undefined || escaped)) {
/* Named character or sequence */
stream.emit('keypress', escaped ? undefined : s, key);
} else if (charLengthAt(s, 0) === s.length) {
/* Single unnamed character, e.g. "." */
stream.emit('keypress', s, key);
}
/* Unrecognized or broken escape sequence, don't emit anything */
}
}
// This runs in O(n log n).
function commonPrefix(strings) {
if (!strings || strings.length === 0) {
return '';
}
if (strings.length === 1) {
return strings[0];
}
const sorted = strings.slice().sort();
const min = sorted[0];
const max = sorted[sorted.length - 1];
for (let i = 0; i < min.length; i++) {
if (min[i] !== max[i]) {
return min.slice(0, i);
}
}
return min;
}
module.exports = {
charLengthAt,
charLengthLeft,
commonPrefix,
emitKeys,
getStringWidth,
kSubstringSearch,
stripVTControlCharacters,
CSI
};