diff --git a/normal/charset.c b/normal/charset.c index 7c455a357..00bbcb815 100644 --- a/normal/charset.c +++ b/normal/charset.c @@ -16,6 +16,25 @@ * along with GRUB. If not, see . */ +/* + Current problems with Unicode rendering: + - B and BN bidi type characters (ignored) + - Mc type characters with combining class 0 (treated as non-combining) + - Mn type characters with combining class 0 (treated as non-combining) + - Me type characters with combining class 0 (treated as non-combining) + - Cf type characters (ignored) + - Cc type characters (ignored) + - Line-breaking rules (e.g. Zs type characters) + - Indic languages + - Arabic shaping + - Zl and Zp characters + - Combining characters of types 7, 8, 9, 21, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 84, 91, 103, 107, 118, 122, 129, 130, 132, + 218, 224, 226, 233, 234 + - Private use surrogates + - Private use characters (not really a problem) + */ + /* Convert a (possibly null-terminated) UTF-8 string of at most SRCSIZE bytes (if SRCSIZE is -1, it is ignored) in length to a UTF-16 string. Return the number of characters converted. DEST must be able to hold diff --git a/util/import_unicode.py b/util/import_unicode.py index 0fc36c5c2..e0e53b655 100644 --- a/util/import_unicode.py +++ b/util/import_unicode.py @@ -45,12 +45,35 @@ for line in infile: print ("UnicodeData.txt uses combination type %d. Conflict." \ % curcombtype) raise + if sp[2] != "Lu" and sp[2] != "Ll" and sp[2] != "Lt" and sp[2] != "Lm" \ + and sp[2] != "Lo"\ + and sp[2] != "Me" and sp[2] != "Mc" and sp[2] != "Mn" \ + and sp[2] != "Nd" and sp[2] != "Nl" and sp[2] != "No" \ + and sp[2] != "Pc" and sp[2] != "Pd" and sp[2] != "Ps" \ + and sp[2] != "Pe" and sp[2] != "Pi" and sp[2] != "Pf" \ + and sp[2] != "Po" \ + and sp[2] != "Sm" and sp[2] != "Sc" and sp[2] != "Sk" \ + and sp[2] != "So"\ + and sp[2] != "Zs" and sp[2] != "Zl" and sp[2] != "Zp" \ + and sp[2] != "Cc" and sp[2] != "Cf" and sp[2] != "Cs" \ + and sp[2] != "Co": + print ("WARNING: Unknown type %s" % sp[2]) if curcombtype == 0 and sp[2] == "Me": curcombtype = 253 if curcombtype == 0 and sp[2] == "Mc": curcombtype = 254 if curcombtype == 0 and sp[2] == "Mn": curcombtype = 255 + if (curcombtype >= 2 and curcombtype <= 6) \ + or (curcombtype >= 37 and curcombtype != 84 and curcombtype != 91 and curcombtype != 103 and curcombtype != 107 and curcombtype != 118 and curcombtype != 122 and curcombtype != 129 and curcombtype != 130 and curcombtype != 132 and curcombtype != 202 and \ + curcombtype != 214 and curcombtype != 216 and \ + curcombtype != 218 and curcombtype != 220 and \ + curcombtype != 222 and curcombtype != 224 and curcombtype != 226 and curcombtype != 228 and \ + curcombtype != 230 and curcombtype != 232 and curcombtype != 233 and \ + curcombtype != 234 and \ + curcombtype != 240 and curcombtype != 253 and \ + curcombtype != 254 and curcombtype != 255): + print ("WARNING: Unknown combining type %d" % curcombtype) if lastcode + 1 != curcode or curbiditype != lastbiditype \ or curcombtype != lastcombtype or curmirrortype != lastmirrortype: if begincode != -2 and (lastbiditype != "L" or lastcombtype != 0 or \