mirror of
https://github.com/nodejs/node.git
synced 2025-05-07 15:35:41 +00:00

A lot of strings that are going to be passed to `getStringWidth()` are ASCII strings, for which the calculation is rather easy and calling into C++ can be skipped. confidence improvement accuracy (*) (**) (***) misc/getstringwidth.js n=100000 type='ascii' *** 328.99 % ±21.73% ±29.25% ±38.77% misc/getstringwidth.js n=100000 type='emojiseq' 2.94 % ±7.66% ±10.19% ±13.26% misc/getstringwidth.js n=100000 type='fullwidth' 4.70 % ±5.64% ±7.50% ±9.76% PR-URL: https://github.com/nodejs/node/pull/29301 Reviewed-By: Gus Caplan <me@gus.host> Reviewed-By: Trivikram Kamat <trivikr.dev@gmail.com> Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl> Reviewed-By: Colin Ihrig <cjihrig@gmail.com> Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Luigi Pinca <luigipinca@gmail.com> Reviewed-By: Minwoo Jung <minwoo@nodesource.com> Reviewed-By: Rich Trott <rtrott@gmail.com>
94 lines
3.9 KiB
JavaScript
94 lines
3.9 KiB
JavaScript
// Flags: --expose_internals
|
|
'use strict';
|
|
const common = require('../common');
|
|
|
|
if (!common.hasIntl)
|
|
common.skip('missing Intl');
|
|
|
|
const assert = require('assert');
|
|
const readline = require('internal/readline/utils');
|
|
|
|
// Test column width
|
|
|
|
// Ll (Lowercase Letter): LATIN SMALL LETTER A
|
|
assert.strictEqual(readline.getStringWidth('a'), 1);
|
|
assert.strictEqual(readline.getStringWidth(0x0061), 1);
|
|
// Lo (Other Letter)
|
|
assert.strictEqual(readline.getStringWidth('丁'), 2);
|
|
assert.strictEqual(readline.getStringWidth(0x4E01), 2);
|
|
// Surrogate pairs
|
|
assert.strictEqual(readline.getStringWidth('\ud83d\udc78\ud83c\udfff'), 2);
|
|
assert.strictEqual(readline.getStringWidth('👅'), 2);
|
|
// Cs (Surrogate): High Surrogate
|
|
assert.strictEqual(readline.getStringWidth('\ud83d'), 1);
|
|
// Cs (Surrogate): Low Surrogate
|
|
assert.strictEqual(readline.getStringWidth('\udc78'), 1);
|
|
// Cc (Control): NULL
|
|
assert.strictEqual(readline.getStringWidth(0), 0);
|
|
// Cc (Control): BELL
|
|
assert.strictEqual(readline.getStringWidth(0x0007), 0);
|
|
// Cc (Control): LINE FEED
|
|
assert.strictEqual(readline.getStringWidth('\n'), 0);
|
|
// Cf (Format): SOFT HYPHEN
|
|
assert.strictEqual(readline.getStringWidth(0x00AD), 1);
|
|
// Cf (Format): LEFT-TO-RIGHT MARK
|
|
// Cf (Format): RIGHT-TO-LEFT MARK
|
|
assert.strictEqual(readline.getStringWidth('\u200Ef\u200F'), 1);
|
|
// Cn (Unassigned): Not a character
|
|
assert.strictEqual(readline.getStringWidth(0x10FFEF), 1);
|
|
// Cn (Unassigned): Not a character (but in a CJK range)
|
|
assert.strictEqual(readline.getStringWidth(0x3FFEF), 2);
|
|
// Mn (Nonspacing Mark): COMBINING ACUTE ACCENT
|
|
assert.strictEqual(readline.getStringWidth(0x0301), 0);
|
|
// Mc (Spacing Mark): BALINESE ADEG ADEG
|
|
// Chosen as its Canonical_Combining_Class is not 0, but is not a 0-width
|
|
// character.
|
|
assert.strictEqual(readline.getStringWidth(0x1B44), 1);
|
|
// Me (Enclosing Mark): COMBINING ENCLOSING CIRCLE
|
|
assert.strictEqual(readline.getStringWidth(0x20DD), 0);
|
|
|
|
// The following is an emoji sequence. In some implementations, it is
|
|
// represented as a single glyph, in other implementations as a sequence
|
|
// of individual glyphs. By default, the algorithm will assume the single
|
|
// glyph interpretation and return a value of 2. By passing the
|
|
// expandEmojiSequence: true option, each component will be counted
|
|
// individually.
|
|
assert.strictEqual(readline.getStringWidth('👩👩👧👧'), 2);
|
|
assert.strictEqual(
|
|
readline.getStringWidth('👩👩👧👧', { expandEmojiSequence: true }), 8);
|
|
|
|
// By default, unicode characters whose width is considered ambiguous will
|
|
// be considered half-width. For these characters, getStringWidth will return
|
|
// 1. In some contexts, however, it is more appropriate to consider them full
|
|
// width. By default, the algorithm will assume half width. By passing
|
|
// the ambiguousAsFullWidth: true option, ambiguous characters will be counted
|
|
// as 2 columns.
|
|
assert.strictEqual(readline.getStringWidth('\u01d4'), 1);
|
|
assert.strictEqual(
|
|
readline.getStringWidth('\u01d4', { ambiguousAsFullWidth: true }), 2);
|
|
|
|
// Control chars and combining chars are zero
|
|
assert.strictEqual(readline.getStringWidth('\u200E\n\u220A\u20D2'), 1);
|
|
|
|
// Test that the fast path for ASCII characters yields results consistent
|
|
// with the 'slow' path.
|
|
for (const ambiguousAsFullWidth of [ false, true ]) {
|
|
for (let i = 0; i < 256; i++) {
|
|
const char = String.fromCharCode(i);
|
|
assert.strictEqual(
|
|
readline.getStringWidth(i, { ambiguousAsFullWidth }),
|
|
readline.getStringWidth(char, { ambiguousAsFullWidth }));
|
|
assert.strictEqual(
|
|
readline.getStringWidth(char + '🎉', { ambiguousAsFullWidth }),
|
|
readline.getStringWidth(char, { ambiguousAsFullWidth }) + 2);
|
|
|
|
if (i < 32 || (i >= 127 && i < 160)) { // Control character
|
|
assert.strictEqual(
|
|
readline.getStringWidth(i, { ambiguousAsFullWidth }), 0);
|
|
} else if (i < 127) { // Regular ASCII character
|
|
assert.strictEqual(
|
|
readline.getStringWidth(i, { ambiguousAsFullWidth }), 1);
|
|
}
|
|
}
|
|
}
|