From 907010013e87af5aafd049b0b2a1de9fc987ca20 Mon Sep 17 00:00:00 2001 From: rmkaplan <69548581+rmkaplan@users.noreply.github.com> Date: Mon, 3 Feb 2025 10:47:12 -0800 Subject: [PATCH] Add back character sets that had characters outside 16 bit plane (#1964) * Add back character sets that had characters outside 16 bit plane * Update XCCS-353=SYMBOLS3.TXT Update title line * Update UNICODE.TEDIT * Fix charset names * Reorganized the tables, added requested interfaces * Use a single hash * Top-level array branch beats a single hash * cleanup UNICODE.TRANSLATE macro * Fix slug in outcharfn * Remove a stray line * Another try, would work for raw * Remove duplicates, redo hashing * Getting complete maps in both directions * Initializing * Only the latest file versions * Add back gothic mappings --- library/UNICODE | 952 ++++---- library/UNICODE.LCOM | Bin 32423 -> 30651 bytes library/UNICODE.TEDIT | Bin 19010 -> 19461 bytes unicode/xerox/INVERTED-UNICODE-MAPPINGS.TXT | 2095 +++++++++-------- unicode/xerox/UNICODE-MAPPINGS.TXT | 460 ++-- unicode/xerox/XCCS-353=SYMBOLS3.TXT | 4 +- unicode/xerox/XCCS-51=RUNIC-GOTHIC.TXT | 58 +- ...KNOWN1.TXT => XCCS-56=DECORATED-RULES.TXT} | 4 +- ...OWN2.TXT => XCCS-57=VERTICAL-JAPANESE.TXT} | 4 +- 9 files changed, 1947 insertions(+), 1630 deletions(-) rename unicode/xerox/{XCCS-56=UNKNOWN1.TXT => XCCS-56=DECORATED-RULES.TXT} (96%) rename unicode/xerox/{XCCS-57=UNKNOWN2.TXT => XCCS-57=VERTICAL-JAPANESE.TXT} (98%) diff --git a/library/UNICODE b/library/UNICODE index 64b2d5d1..3c917000 100644 --- a/library/UNICODE +++ b/library/UNICODE @@ -1,19 +1,21 @@ (DEFINE-FILE-INFO PACKAGE "INTERLISP" READTABLE "INTERLISP" BASE 10) -(FILECREATED "26-Aug-2024 16:58:36" {WMEDLEY}UNICODE.;74 100982 +(FILECREATED "31-Jan-2025 17:47:03" {WMEDLEY}UNICODE.;128 98991 :EDIT-BY rmk - :CHANGES-TO (FNS UNICODE-EXTEND-TRANSLATION?) + :CHANGES-TO (FNS READ-UNICODE-MAPPING MERGE-UNICODE-TRANSLATION-TABLES + MAKE-UNICODE-TRANSLATION-TABLES ALL-UNICODE-MAPPINGS) - :PREVIOUS-DATE "27-Mar-2024 23:07:42" {WMEDLEY}UNICODE.;73) + :PREVIOUS-DATE "27-Jan-2025 16:46:36" {WMEDLEY}UNICODE.;127) (PRETTYCOMPRINT UNICODECOMS) (RPAQQ UNICODECOMS ((COMS (* ; "External formats") - (FNS UTF8.OUTCHARFN UTF8.INCCODEFN UTF8.PEEKCCODEFN \UTF8.BACKCCODEFN) + (FNS UTF8.OUTCHARFN UTF8.SLUG.OUTCHARFN UTF8.INCCODEFN UTF8.PEEKCCODEFN + \UTF8.BACKCCODEFN) (FNS UTF16BE.OUTCHARFN UTF16BE.INCCODEFN UTF16BE.PEEKCCODEFN \UTF16BE.BACKCCODEFN) (FNS UTF16LE.OUTCHARFN UTF16LE.INCCODEFN UTF16LE.PEEKCCODEFN \UTF16LE.BACKCCODEFN) (FNS READBOM WRITEBOM) @@ -21,11 +23,11 @@ (FNS MAKE-UNICODE-FORMATS) (P (MAKE-UNICODE-FORMATS EXTERNALEOL)) (ADDVARS (*DEFAULT-EXTERNALFORMATS* (UNIX :UTF-8))) - (FNS UNICODE.UNMAPPED UNICODE-EXTEND-TRANSLATION? UTF8.BINCODE \UTF8.FETCHCODE) + (FNS UTF8.BINCODE \UTF8.FETCHCODE) (FNS UTF8.VALIDATE UTF8-SIZE-FROM-BYTE1 NUTF8-BYTE1-BYTES NUTF8-CODE-BYTES NUTF8-STRING-BYTES) (DECLARE%: EVAL@COMPILE DONTCOPY (MACROS UNICODE.TRANSLATE \UTF8.GETBASEBYTE)) - (FNS XTOUCODE UTOXCODE)) + (FNS XTOUCODE UTOXCODE XTOUCODE? UTOXCODE?)) (* ;; "") @@ -35,23 +37,25 @@ (FNS READ-UNICODE-MAPPING-FILENAMES READ-UNICODE-MAPPING)) [COMS (* ;  "Make translation tables for UTF external formats") - (FNS MAKE-UNICODE-TRANSLATION-TABLES MERGE-UNICODE-TRANSLATION-TABLES - MERGE-UNICODE-TRANSLATION-TABLES1) - (FNS INVERT-ALL-UNICODE-MAPPINGS ALL-UNICODE-MAPPINGS) + (FNS MAKE-UNICODE-TRANSLATION-TABLES MERGE-UNICODE-TRANSLATION-TABLES UNICODE.UNMAPPED + UNICODE-EXTEND-TRANSLATION?) + (FNS ALL-UNICODE-MAPPINGS) (INITVARS (*XCCSTOUNICODE*) (*UNICODETOXCCS*) - (*INVERTED-UNICODE-MAPPINGS*)) - (GLOBALVARS *XCCSTOUNICODE* *UNICODETOXCCS*) - [DECLARE%: DONTEVAL@LOAD DOCOPY (P (MAKE-UNICODE-TRANSLATION-TABLES 'DEFAULT] + (*XCCS-LOADED-CHARSETS*) + (*UNICODE-LOADED-CHARSETS*)) + (GLOBALVARS *XCCSTOUNICODE* *UNICODETOXCCS* *NEXT-PRIVATE-UNICODE* + *NEXT-PRIVATE-XCCSCODE* *XCCS-LOADED-CHARSETS* *UNICODE-LOADED-CHARSETS*) (DECLARE%: EVAL@COMPILE DONTCOPY - (* ;; "These control the layout of the translation tables. Since many of the upper panels are sparse, doing it per-panel (128) seems more space-efficient, and residual alists can be shorter") + (* ;; "There are 6400 private Unicodes in 25 256-code charsets. For XCCS we map to a contiguous region of unused/reserved--private isn't big enough.") - (CONSTANTS (TRANSLATION-SEGMENT-SIZE 128) - (MAX-ALIST-LENGTH 10) - (N-TRANSLATION-SEGMENTS (IQUOTIENT 65536 TRANSLATION-SEGMENT-SIZE)) - (TRANSLATION-SHIFT (INTEGERLENGTH (SUB1 TRANSLATION-SEGMENT-SIZE))) - (TRANSLATION-MASK (SUB1 TRANSLATION-SEGMENT-SIZE] + (CONSTANTS (FIRST-PRIVATE-UNICODE (HEXNUM? "E000")) + (LAST-PRIVATE-UNICODE (HEXNUM? "F8FF")) + (FIRST-PRIVATE-XCCSCODE (CHARCODE "200,0")) + (LAST-PRIVATE-XCCSCODE (CHARCODE "230,377"))) + (MACROS TRUECODEP)) + (DECLARE%: DONTEVAL@LOAD DOCOPY (P (MAKE-UNICODE-TRANSLATION-TABLES 'ALL] (* ;; "") @@ -68,8 +72,9 @@ 16] (VARS UNICODE-MAPPING-HEADER)) (FNS UTF8HEXSTRING XTOUSTRING XCCSSTRING) - (FNS UNHEXSTRING) - (FNS SHOWCHARS) + (COMS (* ; "debugging") + (FNS SHOWCHARS) + (DECLARE%: DOEVAL@LOAD DONTCOPY (MACROS HEXCHAR OCTALCHAR))) (DECLARE%: EVAL@COMPILE DONTCOPY (FILES (FROM LOADUPS) EXPORTS.ALL)) (PROP (FILETYPE) @@ -82,7 +87,8 @@ (DEFINEQ (UTF8.OUTCHARFN - [LAMBDA (STREAM CHARCODE RAW) (* ; "Edited 31-Jan-2024 00:32 by rmk") + [LAMBDA (STREAM CHARCODE RAW) (* ; "Edited 20-Jan-2025 20:45 by rmk") + (* ; "Edited 31-Jan-2024 00:32 by rmk") (* ; "Edited 8-Aug-2021 13:02 by rmk:") (* ; "Edited 17-Aug-2020 08:45 by rmk:") (* ; "Edited 30-Jan-2020 23:08 by rmk:") @@ -127,6 +133,18 @@ (LOADBYTE C 0 6))) ELSE (ERROR "CHARCODE too big for UTF8" C]) +(UTF8.SLUG.OUTCHARFN + [LAMBDA (STREAM CODE RAW) (* ; "Edited 21-Jan-2025 18:37 by rmk") + (* ; "Edited 14-Jan-2025 12:39 by rmk") + + (* ;; "Produces Unicode Representative FFFD as a slug for XCCS unmapped characters") + + (UTF8.OUTCHARFN STREAM (OR (CL:IF RAW + CODE + (XTOUCODE? CODE)) + (CONSTANT (HEXNUM? "FFFD"))) + T]) + (UTF8.INCCODEFN [LAMBDA (STREAM COUNTP RAW) (* ; "Edited 2-Feb-2024 11:44 by rmk") (* ; "Edited 30-Jan-2024 22:56 by rmk") @@ -501,7 +519,8 @@ (DEFINEQ (READBOM - [LAMBDA (STREAM COUNTP) (* ; "Edited 11-Mar-2024 23:53 by rmk") + [LAMBDA (STREAM COUNTP) (* ; "Edited 17-Jan-2025 11:29 by rmk") + (* ; "Edited 11-Mar-2024 23:53 by rmk") (* ; "Edited 10-Mar-2024 13:01 by rmk") (* ;; "If COUNTP, this must be under a generic \INCCODE that binds *BYTECOUNTER*") @@ -512,29 +531,29 @@ (DECLARE (USEDFREE *BYTECOUNTER*)) (SELECTC (\PEEKBIN STREAM T) - ((UNHEXSTRING "EF") + ((HEXNUM? "EF") (BIN STREAM) - (if (EQ (CONSTANT (UNHEXSTRING "BB")) + (if (EQ (CONSTANT (HEXNUM? "BB")) (\PEEKBIN STREAM T)) then (BIN STREAM) - (if (EQ (CONSTANT (UNHEXSTRING "BF")) + (if (EQ (CONSTANT (HEXNUM? "BF")) (\PEEKBIN STREAM T)) then (BIN STREAM) (CL:WHEN COUNTP (add *BYTECOUNTER* 3)) :UTF-8 else (\BACKFILEPTR STREAM)) else (\BACKFILEPTR STREAM))) - ((UNHEXSTRING "FE") + ((HEXNUM? "FE") (BIN STREAM) - (if (EQ (CONSTANT (UNHEXSTRING "FF")) + (if (EQ (CONSTANT (HEXNUM? "FF")) (\PEEKBIN STREAM T)) then (BIN STREAM) (CL:WHEN COUNTP (add *BYTECOUNTER* 2)) :UTF-16BE else (\BACKFILEPTR STREAM))) - ((UNHEXSTRING "FF") + ((HEXNUM? "FF") (BIN STREAM) - (if (EQ (CONSTANT (UNHEXSTRING "FE")) + (if (EQ (CONSTANT (HEXNUM? "FE")) (\PEEKBIN STREAM T)) then (BIN STREAM) (CL:WHEN COUNTP (add *BYTECOUNTER* 2)) @@ -543,20 +562,21 @@ NIL]) (WRITEBOM - [LAMBDA (STREAM FORMAT) (* ; "Edited 16-Mar-2024 20:53 by rmk") + [LAMBDA (STREAM FORMAT) (* ; "Edited 17-Jan-2025 11:29 by rmk") + (* ; "Edited 16-Mar-2024 20:53 by rmk") (* ; "Edited 11-Mar-2024 23:53 by rmk") (* ; "Edited 10-Mar-2024 13:01 by rmk") (* ;; "Writes a BOM that represents FORMAT (:UTF-8, :UTF16-BE, :UTF16-LE") (SELECTQ FORMAT - (:UTF-8 (BOUT STREAM (CONSTANT (UNHEXSTRING "EF"))) - (BOUT STREAM (CONSTANT (UNHEXSTRING "BB"))) - (BOUT STREAM (CONSTANT (UNHEXSTRING "BF")))) - (:UTF-16BE (BOUT STREAM (CONSTANT (UNHEXSTRING "FE"))) - (BOUT STREAM (CONSTANT (UNHEXSTRING "FF")))) - (:UTF-16LE (BOUT STREAM (CONSTANT (UNHEXSTRING "FF"))) - (BOUT STREAM (UNHEXSTRING "FE"))) + (:UTF-8 (BOUT STREAM (CONSTANT (HEXNUM? "EF"))) + (BOUT STREAM (CONSTANT (HEXNUM? "BB"))) + (BOUT STREAM (CONSTANT (HEXNUM? "BF")))) + (:UTF-16BE (BOUT STREAM (CONSTANT (HEXNUM? "FE"))) + (BOUT STREAM (CONSTANT (HEXNUM? "FF")))) + (:UTF-16LE (BOUT STREAM (CONSTANT (HEXNUM? "FF"))) + (BOUT STREAM (HEXNUM? "FE"))) NIL]) ) @@ -564,7 +584,8 @@ (DEFINEQ (MAKE-UNICODE-FORMATS - [LAMBDA (EXTERNALEOL) (* ; "Edited 10-Mar-2024 11:55 by rmk") + [LAMBDA (EXTERNALEOL) (* ; "Edited 17-Jan-2025 18:38 by rmk") + (* ; "Edited 10-Mar-2024 11:55 by rmk") (* ; "Edited 8-Dec-2023 15:19 by rmk") (* ; "Edited 19-Jul-2022 15:36 by rmk") (* ; "Edited 6-Aug-2021 16:08 by rmk:") @@ -614,7 +635,10 @@ (\UTF16LE.BACKCCODEFN STREAM COUNTP T] [FUNCTION (LAMBDA (STREAM CHARCODE) (UTF16LE.OUTCHARFN STREAM CHARCODE T] - NIL EXTERNALEOL NIL NIL NIL (FUNCTION NILL]) + NIL EXTERNALEOL NIL NIL NIL (FUNCTION NILL)) + (\INSTALL.EXTERNALFORMAT (create EXTERNALFORMAT using (FIND-FORMAT :UTF-8) + NAME _ :UTF-8-SLUG OUTCHARFN _ + (FUNCTION UTF8.SLUG.OUTCHARFN]) ) (MAKE-UNICODE-FORMATS EXTERNALEOL) @@ -622,75 +646,6 @@ (ADDTOVAR *DEFAULT-EXTERNALFORMATS* (UNIX :UTF-8)) (DEFINEQ -(UNICODE.UNMAPPED - [LAMBDA (CODE TRANSLATION-TABLE ALREADYTRIED) (* ; "Edited 2-Feb-2024 23:52 by rmk") - (* ; "Edited 31-Jan-2024 10:07 by rmk") - (* ; "Edited 11-Aug-2020 20:23 by rmk:") - - (* ;; "This is the slow fall-out when UNICODE.TRANSLATE determines that CODED has no fast mapping in TRANSLATION-TABLE.") - - (* ;; "") - - (* ;; "If we have not already dummied it up, we try to extend the current table by finding and merging the character set that has a mapping for CODE.") - - (* ;; "") - - (* ;; "When a proper mapping is not available we gin up a distinct unused code and put it in the hash array. If CODE has not previously been seen, we allocate a new code in the forward unmapped hasharray and put the inverse in the backward array.") - - (* ;; "ALREADYTRIED suppresses the recursion through the extension attempt.") - - (* ;; - "ALREADYTRIED breaks the loop of finding that CODE's character set doesn't have a mapping for CODE.") - - (LET (INVERSE NEXTCODE (FORWARD (CL:SVREF TRANSLATION-TABLE N-TRANSLATION-SEGMENTS))) - (if (GETHASH CODE (CAR FORWARD)) - elseif (AND (NOT ALREADYTRIED) - (UNICODE-EXTEND-TRANSLATION? CODE TRANSLATION-TABLE)) - elseif (AND (ILEQ CODE (CADDR FORWARD)) - (IGEQ CODE (CADDDR FORWARD))) - then (ERROR "UNMAPPED CODE IS EITHER XCCS-UNUSED OR UNICODE-PRIVATE" CODE) - else (SETQ INVERSE (CL:SVREF TRANSLATION-TABLE (ADD1 N-TRANSLATION-SEGMENTS))) - (SETQ NEXTCODE (ADD (CADR INVERSE) - 1)) - (CL:WHEN (IGREATERP NEXTCODE (CADDR INVERSE)) - (ERROR "EXHAUSTED RANGE FOR UNMAPPED CODES" CODE)) - (PUTHASH CODE NEXTCODE (CAR FORWARD)) - (PUTHASH NEXTCODE CODE (CAR INVERSE)) - NEXTCODE]) - -(UNICODE-EXTEND-TRANSLATION? - [LAMBDA (CODE TRANSLATION-TABLE) (* ; "Edited 26-Aug-2024 16:49 by rmk") - (* ; "Edited 27-Mar-2024 23:02 by rmk") - (* ; "Edited 5-Feb-2024 13:48 by rmk") - (* ; "Edited 3-Feb-2024 12:40 by rmk") - - (* ;; "There is currently no mapping for CODE in TRANSLATION-TABLE, hopefully just because the relevant character-set mapping has not been installed. We infer from TRANSLATION-TABLE whether CODE is an XCCS or UNICODE code and look for the proper mapping table (forward or inverted) for its character set. ") - - (LET (MAPPING FILE (INVERTED (EQ TRANSLATION-TABLE *UNICODETOXCCS*))) - (SETQ FILE (FINDFILE (CL:IF INVERTED - 'INVERTED-UNICODE-MAPPINGS.TXT - 'UNICODE-MAPPINGS.TXT) - T UNICODEDIRECTORIES)) - (CL:WHEN FILE - [SETQ MAPPING (CL:WITH-OPEN-FILE (STREAM FILE :INPUT) - (CL:WHEN (FFILEPOS (CONCAT "[" (LRSH CODE 8) - " ") - STREAM NIL NIL NIL T) - (READ STREAM] - (CL:WHEN MAPPING - - (* ;; - "Merge MAPPING into both tables, respecting the direction indicated by TRANSLATION-TABLE.") - - (if INVERTED - then (MERGE-UNICODE-TRANSLATION-TABLES MAPPING *UNICODETOXCCS* *XCCSTOUNICODE*) - else (MERGE-UNICODE-TRANSLATION-TABLES MAPPING *XCCSTOUNICODE* *UNICODETOXCCS*)) - - (* ;; - "Hopefully we have now installed and can retrieve the mapping for CODE in its translation table.") - - (UNICODE.TRANSLATE CODE TRANSLATION-TABLE T)))]) - (UTF8.BINCODE [LAMBDA (STREAM RAW) (* ; "Edited 4-Feb-2024 01:06 by rmk") (* ; "Edited 1-Feb-2024 11:21 by rmk") @@ -883,15 +838,20 @@ (DECLARE%: EVAL@COMPILE DONTCOPY (DECLARE%: EVAL@COMPILE -(PUTPROPS UNICODE.TRANSLATE MACRO [OPENLAMBDA (CODE TRANSLATION-TABLE ALREADYTRIED) - (LET [(X (CL:SVREF TRANSLATION-TABLE (LRSH CODE TRANSLATION-SHIFT - ] - (OR [COND - ((LISTP X) - (CDR (FASSOC (LOGAND CODE TRANSLATION-MASK) - X))) - (X (CL:SVREF X (LOGAND CODE TRANSLATION-MASK] - (UNICODE.UNMAPPED CODE TRANSLATION-TABLE ALREADYTRIED]) +(PUTPROPS UNICODE.TRANSLATE MACRO [OPENLAMBDA (CODE TRANSLATION-TABLE DONTFAKE RETURNALL) + + (* ;; "If RETURNALL and there are alternatives in the RANG, the list is returned. Othewise just the first one if the fake flag allows ") + + (LET [(RANGE (OR (GETHASH CODE TRANSLATION-TABLE) + (UNICODE.UNMAPPED CODE TRANSLATION-TABLE + DONTFAKE] + (CL:WHEN RANGE + (if (AND RETURNALL (CDR RANGE)) + then RANGE + else (SETQ RANGE (CAR RANGE)) + (CL:IF DONTFAKE + (TRUECODEP RANGE TRANSLATION-TABLE) + RANGE)))]) (PUTPROPS \UTF8.GETBASEBYTE MACRO ((BASE OFFSET ERROR?) (* ;  "Fetches the OFFSET'th byte from BASE, checking for UTF-8 validity if ERROR?") @@ -906,12 +866,38 @@ (DEFINEQ (XTOUCODE - [LAMBDA (XCCSCODE) (* ; "Edited 9-Aug-2020 09:04 by rmk:") + [LAMBDA (XCCSCODE) (* ; "Edited 9-Aug-2020 09:04 by rmk:") (UNICODE.TRANSLATE XCCSCODE *XCCSTOUNICODE*]) (UTOXCODE - [LAMBDA (UNICODE) (* ; "Edited 9-Aug-2020 09:04 by rmk:") - (UNICODE.TRANSLATE UNICODE *UNICODETOXCCS*]) + [LAMBDA (UNNICODE) (* ; "Edited 16-Jan-2025 23:46 by rmk") + (* ; "Edited 9-Aug-2020 09:04 by rmk:") + (UNICODE.TRANSLATE UNNICODE *UNICODETOXCCS*]) + +(XTOUCODE? + [LAMBDA (XCCSCODE) (* ; "Edited 20-Jan-2025 20:38 by rmk") + (* ; "Edited 18-Jan-2025 11:44 by rmk") + (* ; "Edited 15-Jan-2025 19:51 by rmk") + (* ; "Edited 14-Jan-2025 13:14 by rmk") + (* ; "Edited 9-Aug-2020 09:04 by rmk:") + + (* ;; "Returns the Unix range-code(s) corresponding to XCCSCODE if there are true mapppings, otherwise NIL. Alternative codes are returned in a list, the code itself is returned for a singleton.") + + (UNICODE.TRANSLATE XCCSCODE *XCCSTOUNICODE* T T]) + +(UTOXCODE? + [LAMBDA (UNICODE) (* ; "Edited 19-Jan-2025 21:14 by rmk") + (* ; "Edited 18-Jan-2025 11:46 by rmk") + (* ; "Edited 15-Jan-2025 19:51 by rmk") + (* ; "Edited 14-Jan-2025 13:14 by rmk") + (* ; "Edited 9-Aug-2020 09:04 by rmk:") + + (* ;; "Returns the XCCS range-code(s) corresponding to UNICODE if there are true mapppings, otherwise NIL. ") + + (* ;; + " NOTE: Alternative codes are returned in a list, the code itself is returned for a singleton.") + + (UNICODE.TRANSLATE UNICODE *UNICODETOXCCS* T T]) ) @@ -936,6 +922,10 @@ (GREEK "46") (CYRILLIC "47") (FORMS "50") + (RUNIC-GOTHIC "51") + (MORE-CYRILLIC "52") + (UNKNOWN1 "56") + (UNKNOWN2 "57") (JIS "60-166") (ARABIC "340") (HEBREW "341") @@ -946,10 +936,16 @@ (BENGALI "346") (GURMUKHI "347") (THAI-LAO "350") + (SYMBOLS3 "353") + (EXTENDED-ITC-DINGBATS "354") + (ITC-DINGBATS1 "355") (SYMBOLS2 "356") (SYMBOLS1 "357") (LIGATURES "360") (ACCENTED-LATIN1 "361") + (ACCENTED-LATIN2 "362") + (ACCENTED-GREEK1 "363") + (ACCENTED-GREEK2 "364") (MORE-ARABIC "365") (GRAPHIC-VARIANTS "375") (DEFAULT LATIN ACCENTED-LATIN1 EXTENDED-LATIN SYMBOLS1 SYMBOLS2 FORMS JAPANESE-SYMBOLS1 @@ -958,46 +954,60 @@ (DEFINEQ (READ-UNICODE-MAPPING-FILENAMES - [LAMBDA (FILESPEC) (* ; "Edited 3-Feb-2024 11:00 by rmk") + [LAMBDA (FILESPEC) (* ; "Edited 27-Jan-2025 16:46 by rmk") + (* ; "Edited 21-Jan-2025 22:51 by rmk") + (* ; "Edited 19-Jan-2025 12:21 by rmk") + (* ; "Edited 3-Feb-2024 11:00 by rmk") (* ; "Edited 30-Jan-2024 08:45 by rmk") (* ; "Edited 26-Jan-2024 14:02 by mth") - (* ; "Edited 5-Aug-2020 15:59 by kaplan") + (* ; "Edited 5-Aug-2020 15:59 by kaplan") + (* ; "Edited 4-Aug-2020 17:31 by rmk:") - (* ;; "FILESPEC can be a file name, character-set name, the name of a collection of character sets, an XCCS character code, or list of those. Maps those into the names of files that contain the indicated Unicode mappings.") - (* ; "Edited 4-Aug-2020 17:31 by rmk:") - (DECLARE (USEDFREE UNICODEDIRECTORIES XCCS-CHARSETS)) - (FOR F X CSI INSIDE FILESPEC - JOIN - (* ;; "Last case hopes to pick up all the tables that are grouped together in a subdirectory (e.g. if F is JIS)") + (* ;; "FILESPEC can be a file name, character-set name, the name of a collection of character sets, an XCCS character code, or a list of those. Maps those into the names of files that contain the indicated Unicode mappings.") - [OR (CL:WHEN (CHARCODEP F) (* ; + (CL:REMOVE-DUPLICATES + [if (EQ FILESPEC 'ALL) + then + (* ;; + "Perhaps should figure out which files in the directories and subdirectories are relevant?") + + (READ-UNICODE-MAPPING-FILENAMES (for N in XCCS-CHARSETS collect (CAR N))) + else (FOR F X CSI INSIDE FILESPEC + JOIN + (* ;; "Last case hopes to pick up all the tables that are grouped together in a subdirectory (e.g. if F is JIS)") + + (OR (CL:WHEN (CHARCODEP F) (* ;  "An XCCS code can retrieve its character set") - (for D FN (FOCTAL _ (OCTALSTRING (LRSH F 8))) inside UNICODEDIRECTORIES - when [SETQ FN (FILDIR (PACKFILENAME 'DIRECTORY D 'BODY (CONCAT 'XCCS- FOCTAL - '=*) - 'EXTENSION - 'TXT] do (RETURN FN))) - (MKLIST (FINDFILE (PACKFILENAME 'BODY F 'EXTENSION 'TXT) - T UNICODEDIRECTORIES)) - (for D inside UNICODEDIRECTORIES - when [SETQ $$VAL (OR (FILDIR (PACKFILENAME 'NAME (CONCAT "XCCS-*=" F) - 'EXTENSION - 'TXT - 'BODY D)) - (FILDIR (PACKFILENAME 'NAME (CONCAT "XCCS-" F "=*") - 'EXTENSION - 'TXT - 'BODY D] do (RETURN $$VAL)) - (AND (SETQ CSI (ASSOC F XCCS-CHARSETS)) - (READ-UNICODE-MAPPING-FILENAMES (CDR CSI) - UNICODEDIRECTORIES)) - (for D inside UNICODEDIRECTORIES when (DIRECTORYNAMEP (SETQ D - (CONCAT D ">" F ">"))) - join (FILDIR (CONCAT D ">*.TXT;*"] - finally (RETURN (CL:REMOVE-DUPLICATES $$VAL :TEST (FUNCTION STREQUAL]) + (for D FN (FOCTAL _ (OCTALSTRING (LRSH F 8))) inside UNICODEDIRECTORIES + when (SETQ FN (FILDIR (PACKFILENAME 'DIRECTORY D 'BODY + (CONCAT 'XCCS- FOCTAL '=*) + 'EXTENSION + 'TXT + 'VERSION ""))) do (RETURN FN))) + (MKLIST (FINDFILE (PACKFILENAME 'BODY F 'EXTENSION 'TXT 'VERSION "") + T UNICODEDIRECTORIES)) + (for D inside UNICODEDIRECTORIES + when [SETQ $$VAL (OR (FILDIR (PACKFILENAME 'NAME (CONCAT "XCCS-*=" F) + 'EXTENSION + 'TXT + 'VERSION "" 'BODY D)) + (FILDIR (PACKFILENAME 'NAME (CONCAT "XCCS-" F "=*") + 'EXTENSION + 'TXT + 'VERSION "" 'BODY D] + do (RETURN $$VAL)) + (AND (SETQ CSI (ASSOC F XCCS-CHARSETS)) + (READ-UNICODE-MAPPING-FILENAMES (CDR CSI))) + (for D inside UNICODEDIRECTORIES + when (DIRECTORYNAMEP (SETQ D (CONCAT D ">" F ">"))) + join (FILDIR (CONCAT D ">*.TXT;"] + :TEST + (FUNCTION STRING.EQUAL]) (READ-UNICODE-MAPPING - [LAMBDA (FILESPEC NOPRINT NOERROR) (* ; "Edited 3-Feb-2024 00:21 by rmk") + [LAMBDA (FILESPEC PRINT NOERROR) (* ; "Edited 31-Jan-2025 17:43 by rmk") + (* ; "Edited 17-Jan-2025 16:41 by rmk") + (* ; "Edited 3-Feb-2024 00:21 by rmk") (* ; "Edited 5-Jan-2024 12:26 by rmk") (* ; "Edited 3-Jul-2021 13:37 by rmk:") @@ -1019,28 +1029,34 @@ (FOR FILE [SEPBITTABLE _ (MAKEBITTABLE (CHARCODE (TAB SPACE] IN (READ-UNICODE-MAPPING-FILENAMES FILESPEC) - JOIN (CL:WITH-OPEN-FILE (STREAM FILE :DIRECTION :INPUT :EXTERNAL-FORMAT :UTF-8-RAW) - (BIND LINE NAME CHARSET START - FIRST (CL:UNLESS (FILEPOS "Name:" STREAM NIL NIL NIL T) + JOIN + (* ;; "External format :THROUGH means read as bytes, so the Unicode UTF-8 comments cannot cause reading problems.") + + (CL:WITH-OPEN-FILE (STREAM FILE :DIRECTION :INPUT :EXTERNAL-FORMAT `(:THROUGH LF)) + (bind LINE NAME CHARSET START + first (CL:UNLESS (FILEPOS "Name:" STREAM NIL NIL NIL T) (ERROR "NOT A UNICODE MAPPING FILE" (FULLNAME STREAM))) (SETQ NAME (CL:STRING-TRIM " " (CL:READ-LINE STREAM NIL NIL))) (SETQ CHARSET (CL:IF (FILEPOS "XCCS charset:" STREAM NIL NIL NIL T) (CL:STRING-TRIM " " (CL:READ-LINE STREAM NIL NIL)) "")) - (CL:UNLESS NOPRINT (* ; "Strip off XCCS in front of name") + (CL:WHEN PRINT (* ; "Strip off XCCS in front of name") (PRINTOUT T T CHARSET " " [SUBSTRING NAME (CONSTANT (ADD1 (NCHARS "XCCS"] - T)) WHILE (SETQ LINE (CL:READ-LINE STREAM NIL NIL)) - WHEN (SETQ START (STRPOSL SEPBITTABLE LINE 1 T)) - UNLESS (EQ (CHARCODE %#) + T)) while (SETQ LINE (CL:READ-LINE STREAM NIL NIL)) + when (SETQ START (STRPOSL SEPBITTABLE LINE 1 T)) + unless (EQ (CHARCODE %#) (NTHCHARCODE LINE START)) - COLLECT (BIND END WHILE [SETQ END (OR (STRPOSL SEPBITTABLE LINE START) - (ADD1 (NCHARS LINE] - COLLECT [CHARCODE.DECODE (SUBSTRING LINE START (SUB1 END) + collect (bind END CODES while [SETQ END (OR (STRPOSL SEPBITTABLE LINE START) + (ADD1 (NCHARS LINE] + collect [CHARCODE.DECODE (SUBSTRING LINE START (SUB1 END) (CONSTANT (CONCAT] - REPEATWHILE (AND (SETQ START (STRPOSL SEPBITTABLE LINE END T)) + repeatwhile (AND (SETQ START (STRPOSL SEPBITTABLE LINE END T)) (NEQ (CHARCODE %#) - (NTHCHARCODE LINE START]) + (NTHCHARCODE LINE START))) + finally (CL:WHEN (CDDR $$VAL) + (* ; "Combiners go into a CADR list") + (RPLACD $$VAL (CONS (CDR $$VAL))))]) ) @@ -1050,336 +1066,268 @@ (DEFINEQ (MAKE-UNICODE-TRANSLATION-TABLES - [LAMBDA (MAPPING REINSTALL) (* ; "Edited 3-Feb-2024 00:24 by rmk") + [LAMBDA (MAPPING REINSTALL) (* ; "Edited 31-Jan-2025 17:46 by rmk") + (* ; "Edited 26-Jan-2025 19:36 by rmk") + (* ; "Edited 22-Jan-2025 14:22 by rmk") + (* ; "Edited 19-Jan-2025 15:08 by rmk") + (* ; "Edited 18-Jan-2025 11:52 by rmk") + (* ; "Edited 3-Feb-2024 00:24 by rmk") (* ; "Edited 30-Jan-2024 09:54 by rmk") (* ; "Edited 21-Aug-2021 13:12 by rmk:") (* ; "Edited 17-Aug-2020 08:46 by rmk:") (CL:UNLESS [AND (LISTP MAPPING) - (FOR PAIR IN MAPPING AS I TO 10 ALWAYS (AND (LISTP PAIR) - (CHARCODEP (CAR PAIR)) - (CHARCODEP (CADR PAIR] - (SETQ MAPPING (READ-UNICODE-MAPPING MAPPING T))) + (FOR PAIR R IN MAPPING AS I TO 10 + ALWAYS (AND (LISTP PAIR) + (CHARCODEP (CAR PAIR)) + [FIXP (SETQ R (CAR (MKLIST (CADR PAIR] + (CHARCODEP (IABS R] - (* ;; "MAPPING is the list of numeric code correspondence pairs constructed by applying READ-UNICODE-MAPPING to a Unicode mapping file.") + (* ;; "Seems like the argument is not already a list of mapping pairs (perhaps with a combiner), presumably a list of charsets to be read.") - (* ;; "This produces two recoding arrays, one maps left-side codes into right-side codes (e.g. XCCS or ISO8859-1 to Unicode), for printing, the other maps right-side (Unicode) codes to corresponding right-side codes (e.g. XCCS).") + (SETQ MAPPING (READ-UNICODE-MAPPING MAPPING))) + + (* ;; "MAPPING is the list of numeric code correspondence pairs constructed by applying READ-UNICODE-MAPPING to Unicode mapping files.") + + (* ;; "This updates or produces two recoding arrays, one maps left-side codes into right-side codes (e.g. XCCS or ISO8859-1 to Unicode), for printing, the other maps right-side (Unicode) codes to corresponding right-side codes (e.g. XCCS).") (* ;; "") - (* ;; "We assume that the left-to-right mapping into Unicode is functional, so that each left code maps to a unique right (Unicode) code, because Unicode is presumably the most refined coding scheme. But several Unicode codes may map to the same left code, for logically different codes that happen to have the same glyphs. In that case the heuristic is to map each %"from%" code to the lowest of the possible %"to%" codes. This means that round-trip reading/writing or writing/reading from one or both starting points may not always be lossless.") - - (* ;; " ") - - (* ;; " Each recoding array has 256 elements, one for each possible high-order byte of a character code. An array entry is either NIL, a 256-array of codes indexed by low-order bytes, or an alist of (lower-order-bytes . codes). The latter is used to save space for sparsely populated character sets.") + (* ;; "If REINSTALL is T, the new mapping vectors replace the current maps in the *XCCSTOUNICODE* and *UNICODETOXCCS* global variables. Otherwise we create new tables (mostly for comparison and debugging).") (* ;; "") - (* ;; "The element 256 of each array contains a hash table for characters that might be encountered in XCCS memory or Unicode files for which there is no mapping. Element 257 contains the corresponding inverse unmapped hash-array, so that UNICODE.TRANSLATE can update them consistently.") - - (* ;; "") - - (* ;; "UNICODE.TRANSLATE assigns an unmapped Unicode character to a %"not used%" XCCS code position (from 5,0 to 40,FF, leaving other low not-used sets for other internal uses (TEDIT?).") - - (* ;; "") - - (* ;; - "An unmapped XCCS character is assigned a code in the %"private use%" code blocks 0xE000-F8FF") - - (* ;; "") - - (* ;; "If REINSTALL is T, the new mapping vectors replace the current maps in the *XCCSTOUNICODE* and *UNICODETOXCCS* global variables. Values are also installed if those variables are NIL.") - - (* ;; "") - - (LET ((LTORARRAY (CL:MAKE-ARRAY (IPLUS 2 N-TRANSLATION-SEGMENTS) - :INITIAL-ELEMENT NIL)) - (RTOLARRAY (CL:MAKE-ARRAY (IPLUS 2 N-TRANSLATION-SEGMENTS) - :INITIAL-ELEMENT NIL))) - - (* ;; "The left-to-right direction (into Unicode). We start by distributing the mappings into alists in arrays indexed by the higher-order (charaset set byte). The second loop converts long alists into arrays.") - - [FOR M LEFTC RBASE RCODES IN MAPPING EACHTIME (SETQ RCODES (CDR M)) - (SETQ RBASE (CAR RCODES)) - UNLESS (IGEQ RBASE MISSINGCODE) DO (SETQ LEFTC (CAR M)) - - (* ;; "(CDR RCODES) contains combiners on the base") - - (CL:PUSH (CONS (LOGAND LEFTC TRANSLATION-MASK) - (CL:IF (CDR RCODES) - RCODES - RBASE)) - (CL:SVREF LTORARRAY (LRSH LEFTC - TRANSLATION-SHIFT] - (FOR I CSA FROM 0 TO (SUB1 N-TRANSLATION-SEGMENTS) - WHEN (IGREATERP (LENGTH (CL:SVREF LTORARRAY I)) - MAX-ALIST-LENGTH) DO - (* ;; "Leave it alone if the alist is short") - - (SETQ CSA (CL:MAKE-ARRAY TRANSLATION-SEGMENT-SIZE - :INITIAL-ELEMENT NIL)) - (FOR P IN (CL:SVREF LTORARRAY I) - DO (CL:SETF (CL:SVREF CSA (LOGAND (CAR P) - TRANSLATION-MASK)) - (CDR P))) - (CL:SETF (CL:SVREF LTORARRAY I) - CSA)) - - (* ;; "") - - (* ;; "Now the right-to-left direction (from Unicode). Here we have to detect and compensate for ambiguity.") - - (FOR M LEFTC RBASE RCOMBINERS PREV IN MAPPING EACHTIME (SETQ RBASE (CADR M)) - (SETQ RCOMBINERS (CDDR M)) - UNLESS (OR (IGEQ RBASE MISSINGCODE) - RCOMBINERS) DO - (* ;; - "Have we already seen an explicit mapping from right to left?") - - (SETQ LEFTC (CAR M)) - [SETQ PREV (ASSOC (LOGAND RBASE TRANSLATION-MASK) - (CL:SVREF RTOLARRAY (LRSH RBASE - TRANSLATION-SHIFT] - (IF (NULL PREV) - THEN (CL:PUSH (CONS (LOGAND RBASE TRANSLATION-MASK) - LEFTC) - (CL:SVREF RTOLARRAY (LRSH RBASE - TRANSLATION-SHIFT))) - ELSEIF (IGREATERP (CDR PREV) - LEFTC) - THEN (RPLACD PREV LEFTC))) - (FOR I CSA FROM 0 TO (SUB1 N-TRANSLATION-SEGMENTS) - WHEN (IGREATERP (LENGTH (CL:SVREF RTOLARRAY I)) - MAX-ALIST-LENGTH) DO - (* ;; "Long list, make an array") - - (SETQ CSA (CL:MAKE-ARRAY TRANSLATION-SEGMENT-SIZE - :INITIAL-ELEMENT NIL)) - (FOR P IN (CL:SVREF RTOLARRAY I) - DO (CL:SETF (CL:SVREF CSA (LOGAND (CAR P) - TRANSLATION-MASK)) - (CDR P))) - (CL:SETF (CL:SVREF RTOLARRAY I) - CSA)) - - (* ;; "") - - (* ;; "Allocate the hash arrays for future out-of-map codes. We we have to keep track of the next available and last possible codes, as well as the first available, for error checking.") - - (CL:SETF (CL:SVREF LTORARRAY N-TRANSLATION-SEGMENTS) - (LIST (HASHARRAY 10) - (CHARCODE.DECODE "5,0") - (CHARCODE.DECODE "40,0") - (CHARCODE.DECODE "5,0"))) - (CL:SETF (CL:SVREF RTOLARRAY N-TRANSLATION-SEGMENTS) - (LIST (HASHARRAY 10) - (CHARCODE.DECODE "U+E000") - (CHARCODE.DECODE "U+F8FF") - (CHARCODE.DECODE "U+E000"))) - - (* ;; "Now put in the inverse unmapped hash arrays") - - (CL:SETF (CL:SVREF LTORARRAY (ADD1 N-TRANSLATION-SEGMENTS)) - (CL:SVREF RTOLARRAY N-TRANSLATION-SEGMENTS)) - (CL:SETF (CL:SVREF RTOLARRAY (ADD1 N-TRANSLATION-SEGMENTS)) - (CL:SVREF LTORARRAY N-TRANSLATION-SEGMENTS)) - - (* ;; "") - - (CL:WHEN [OR REINSTALL (NULL (GETATOMVAL '*XCCSTOUNICODE*] - (SETQ *XCCSTOUNICODE* LTORARRAY) - (SETQ *UNICODETOXCCS* RTOLARRAY)) - (LIST LTORARRAY RTOLARRAY]) + (if REINSTALL + then (SETQ *XCCS-LOADED-CHARSETS* (SETQ *UNICODE-LOADED-CHARSETS* NIL)) + (SETQ *NEXT-PRIVATE-XCCSCODE* FIRST-PRIVATE-XCCSCODE) + (SETQ *NEXT-PRIVATE-UNICODE* FIRST-PRIVATE-UNICODE) + (LET [(TABLE (HASHARRAY (LENGTH MAPPING))) + (INVERSETABLE (HASHARRAY (LENGTH MAPPING] + (MERGE-UNICODE-TRANSLATION-TABLES NIL MAPPING TABLE INVERSETABLE) + (SETQ *XCCSTOUNICODE* TABLE) + (SETQ *UNICODETOXCCS* INVERSETABLE) + (LIST *XCCSTOUNICODE* *UNICODETOXCCS*)) + else (CL:UNLESS (BOUNDP '*NEXT-PRIVATE-XCCSCODE*) + (SETQ *NEXT-PRIVATE-XCCSCODE* FIRST-PRIVATE-XCCSCODE) + (SETQ *NEXT-PRIVATE-UNICODE* FIRST-PRIVATE-UNICODE)) + (MERGE-UNICODE-TRANSLATION-TABLES NIL MAPPING]) (MERGE-UNICODE-TRANSLATION-TABLES - [LAMBDA (ADDITION TABLE INVERSETABLE) (* ; "Edited 27-Mar-2024 12:10 by rmk") + [LAMBDA (INVERSE MAPPING TABLE INVERSETABLE) (* ; "Edited 31-Jan-2025 17:45 by rmk") + (* ; "Edited 26-Jan-2025 12:58 by rmk") + (* ; "Edited 22-Jan-2025 08:20 by rmk") + (* ; "Edited 19-Jan-2025 15:58 by rmk") + (* ; "Edited 18-Jan-2025 11:49 by rmk") + (* ; "Edited 27-Mar-2024 12:10 by rmk") (* ; "Edited 3-Feb-2024 12:46 by rmk") (* ; "Edited 31-Jan-2024 10:06 by rmk") - (* ;; "ADDITION is a pair containing a mapping array and its inverse, or a list that maps codes in the forward direction. ") + (* ;; "MAPPINGS is a list of pairs that map domain codes to range codes. TABLE and INVERSETABLE default to *XCCSTOUNICODE* *UNICODETOXCCS* respectively. ") - (* ;; "The forward ADDITION mappings are merged destructively into TABLE and its inverses are merged into INVERSETABLE. ") + (CL:UNLESS TABLE + [SETQ TABLE (OR *XCCSTOUNICODE* (SETQ *XCCSTOUNICODE* (HASHARRAY (LENGTH MAPPING]) + (CL:UNLESS INVERSETABLE + [SETQ INVERSETABLE (OR *UNICODETOXCCS* (SETQ *UNICODETOXCCS* (HASHARRAY (LENGTH MAPPING]) + (for M D R OLDR in MAPPING first (CL:IF INVERSE (swap TABLE INVERSETABLE)) + eachtime (SETQ D (CAR M)) + (SETQ R (CADR M)) - (CL:UNLESS (AND (LISTP ADDITION) - (CL:ARRAYP (CAR ADDITION)) - (CL:ARRAYP (CADR ADDITION))) + (* ;; "We don't do combiners and we don't go outside of SMALLP's") + unless (OR (LISTP D) + (LISTP R)) when (AND (SMALLP D) + (SMALLP R)) do - (* ;; "Make temporary mapping arrays when ADDTION is a list of corresponding code-pairs.") + (* ;; "The (CONS R OLDR) deals with alternatives: (U X1) (U X2) => (U (X1 X2)), lowest code first. Those are only possible in the U-to-X direction when the tables contain (X1 U) and (X2 U). There are no duplicates/alternative table entries in the X-to-U direction.") - (SETQ ADDITION (MAKE-UNICODE-TRANSLATION-TABLES ADDITION))) - (MERGE-UNICODE-TRANSLATION-TABLES1 (CAR ADDITION) - TABLE) - (MERGE-UNICODE-TRANSLATION-TABLES1 (CADR ADDITION) - INVERSETABLE) + (SETQ OLDR (GETHASH D TABLE)) + (CL:UNLESS (MEMB R OLDR) + (PUTHASH D (SORT (CONS R OLDR)) + TABLE)) + (swap D R) + (SETQ OLDR (GETHASH D INVERSETABLE)) + (CL:UNLESS (MEMB R OLDR) + (PUTHASH D (SORT (CONS R OLDR)) + INVERSETABLE))) (LIST TABLE INVERSETABLE]) -(MERGE-UNICODE-TRANSLATION-TABLES1 - [LAMBDA (ADDARRAY TARGETARRAY) (* ; "Edited 2-Feb-2024 13:18 by rmk") - (* ; "Edited 31-Jan-2024 00:22 by rmk") - (for I TELT AELT (A _ ADDARRAY) from 0 TO (SUB1 N-TRANSLATION-SEGMENTS) - when (SETQ AELT (CL:SVREF A I)) - do - (SETQ TELT (CL:SVREF TARGETARRAY I)) - (CL:WHEN (EQ I 97)) - (CL:SETF - (CL:SVREF TARGETARRAY I) - (if TELT - then - (* ;; "Have to resolve, union giving priority to AELT. Have to deal with ALIST vs array cases on both sides.") +(UNICODE.UNMAPPED + [LAMBDA (CODE TABLE DONTFAKE) (* ; "Edited 22-Jan-2025 08:19 by rmk") + (* ; "Edited 19-Jan-2025 22:02 by rmk") + (* ; "Edited 18-Jan-2025 12:02 by rmk") + (* ; "Edited 2-Feb-2024 23:52 by rmk") + (* ; "Edited 31-Jan-2024 10:07 by rmk") + (* ; "Edited 11-Aug-2020 20:23 by rmk:") - (if (LISTP TELT) - then (if (LISTP AELT) - then - (* ;; "2 alists") + (* ;; "This is the slow fall-out when UNICODE.TRANSLATE determines that CODE has no fast mapping in TRANSLATION-TABLE.") - (SETQ TELT (APPEND AELT (for TE in TELT - unless (ASSOC (CAR TE) - AELT) collect TE))) + (* ;; "") - (* ;; "Make an array if alist is too big") + (* ;; "If we have not already installed the mapping segment for that code, we try to retrieve it from the numberic file. If that segment mapping doesn't exist or doesn't have an entry for CODE, we fake up a mapping with a negative range in both directions. One way or the other, there will be an entry for that segment in both mapping vectors.") - (if (IGREATERP (LENGTH TELT) - MAX-ALIST-LENGTH) - then (for TE (TARRAY _ (CL:MAKE-ARRAY TRANSLATION-SEGMENT-SIZE - :INITIAL-ELEMENT NIL)) - in TELT do (CL:SETF (CL:SVREF TARRAY (CAR TE)) - (CDR TE)) finally (RETURN TARRAY)) - else TELT) - else - (* ;; - "Old Alist with new array. Copy the TELT's into empty array positions") + (* ;; "") - (for TE TINDEX in TELT eachtime (SETQ TINDEX (CAR TE)) - unless (CL:SVREF AELT TINDEX) - do (CL:SETF (CL:SVREF AELT TINDEX) - (CDR TE))) - AELT) - elseif (LISTP AELT) - then - (* ;; "Old array with new Alist") + (PROG ((INVERSE (EQ TABLE *UNICODETOXCCS*)) + RANGE HASH) - (for AE in AELT do (CL:SETF (CL:SVREF TELT (CAR AE)) - (CDR AE))) - TELT - else - (* ;; "2 arrays. Smash AE value into TELT") + (* ;; "If we already looked up CODE's character set in a file, then we have already filled in its information in the translation table. If it didn't have a code for a particular character, then we fake it here. Faked codes are negative, so we can detect them easily, and interpret them with IABS.") - (for J AE from 0 to (SUB1 TRANSLATION-SEGMENT-SIZE) - when (SETQ AE (CL:SVREF AELT J)) do (CL:SETF (CL:SVREF TELT J) - AE)) - TELT) - else AELT]) + (CL:WHEN (AND (UNICODE-EXTEND-TRANSLATION? CODE TABLE) + (SETQ RANGE (GETHASH CODE TABLE))) + + (* ;; "We might have gotten the segment that didn't have an entry for CODE.") + + (RETURN RANGE)) + + (* ;; "") + + (CL:UNLESS DONTFAKE + + (* ;; "Our attempt at extending the known tables did not provide a mapping for CODE. So we fake it up with the next unused private code in the code space. ") + + (* ;; "The number of possible faked mappings is determined by the number of private-use Unicodes, since the XCCS character space is pretty sparse. The codes don't have to come from the same part of the code space, and the NEXTCODEs are saved in global variables. The last available codes are constants.") + + (CL:WHEN (IEQP *NEXT-PRIVATE-XCCSCODE* LAST-PRIVATE-XCCSCODE) + (* ; + "Same number of available codes both ways") + (ERROR "EXHAUSTED RANGE FOR UNMAPPED CODES")) + (if INVERSE + then (SETQ RANGE *NEXT-PRIVATE-XCCSCODE*) + (add *NEXT-PRIVATE-XCCSCODE* 1) + else (SETQ RANGE *NEXT-PRIVATE-UNICODE*) + (add *NEXT-PRIVATE-UNICODE* 1)) + (MERGE-UNICODE-TRANSLATION-TABLES INVERSE (CONS (LIST CODE RANGE))) + + (* ;; "CONS because of LIST convention so we can eventually distinguish combiners.") + + (RETURN (CONS RANGE)))]) + +(UNICODE-EXTEND-TRANSLATION? + [LAMBDA (CODE TABLE) (* ; "Edited 26-Jan-2025 11:26 by rmk") + (* ; "Edited 21-Jan-2025 22:31 by rmk") + (* ; "Edited 18-Jan-2025 12:40 by rmk") + (* ; "Edited 13-Jan-2025 23:50 by rmk") + (* ; "Edited 26-Aug-2024 16:49 by rmk") + (* ; "Edited 27-Mar-2024 23:02 by rmk") + (* ; "Edited 5-Feb-2024 13:48 by rmk") + (* ; "Edited 3-Feb-2024 12:40 by rmk") + + (* ;; "There is currently no mapping for CODE in TABLE, hopefully just because the relevant character-set mapping has not been installed. We infer from TABLE whether CODE is an XCCS or UNICODE code and look for the proper mapping table (forward or inverted) for its character set. ") + + (* ;; "We record which character sets we have already expanded so we don't do them again.") + + (LET ((CHARSET (\CHARSET CODE)) + (INVERSE (EQ TABLE *UNICODETOXCCS*)) + MAPPING FILE) + + (* ;; "If we already looked for CHARSET in the file and found anything, it has already been merged. Otherwise, it would just fail again") + + (CL:UNLESS (MEMB CHARSET (CL:IF INVERSE + *UNICODE-LOADED-CHARSETS* + *XCCS-LOADED-CHARSETS*)) + + (* ;; "Don't try this charset again.") + + (CL:IF INVERSE + (push *UNICODE-LOADED-CHARSETS* CHARSET) + (push *XCCS-LOADED-CHARSETS* CHARSET)) + (SETQ FILE (FINDFILE (CL:IF INVERSE + 'INVERTED-UNICODE-MAPPINGS.TXT + 'UNICODE-MAPPINGS.TXT) + T UNICODEDIRECTORIES)) + + (* ;; "The mappings files are indexed by CHARSET.") + + (CL:WHEN [AND FILE (SETQ MAPPING (CL:WITH-OPEN-FILE (STREAM FILE :INPUT) + (CL:WHEN (FILEPOS (CONCAT "[" CHARSET " ") + STREAM NIL NIL NIL T) + (READ STREAM] + + (* ;; + "Merge MAPPING into both tables, respecting the direction indicated by TABLE. ") + + (MERGE-UNICODE-TRANSLATION-TABLES INVERSE MAPPING) + T))]) ) (DEFINEQ -(INVERT-ALL-UNICODE-MAPPINGS - [LAMBDA (FILE) (* ; "Edited 27-Mar-2024 14:50 by rmk") - (* ; "Edited 5-Feb-2024 13:14 by rmk") - (* ; "Edited 3-Feb-2024 09:16 by rmk") - - (* ;; "Reads all the XCCS-to-UNICODE mapping files that we know about, and produces a 2-level index that maps each UNICODE code back to the one or more XCCS corresponding XCCS codes.") - - (* ;; "The first index level groups all the unicode codes that have the same high-ordere byte. The index is sorted by the high-order bytes, the pairs within each group are sorted by their unicode. If a given unicode maps to multiple XCCS codes, the pair with the lowest XCCS code comes first.") - - (* ;; "GIven a UCODE, the lookup for the lowest XCCS is") - - (* ;; " (CADR (ASSOC UCODE (CADR (ASSOC (LRSH UCODE 8) INDEX)))).") - - (* ;; "If FILE is given, the resulting is written to that file.") - - (LET (INDEX) - [for M in (READ-UNICODE-MAPPING (for N in XCCS-CHARSETS collect (CAR N)) - T) - do (push [CDR (OR (ASSOC (LRSH (CADR M) - 8) - INDEX) - (CAR (push INDEX (CONS (LRSH (CADR M) - 8] - (LIST (CADR M) - (CAR M] - - (* ;; "Sort within groups to get the lowest XCCS code first. But also push the sublists down an extra CONS, so that a subsequent READ will get them all.") - - [for I in INDEX do (change (CDR I) - (CONS (SORT DATUM (FUNCTION (LAMBDA (M1 M2) - (OR (ILESSP (CAR M1) - (CAR M2)) - (AND (EQ (CAR M1) - (CAR M2)) - (ILESSP (CADR M1) - (CADR M2] - (SETQ INDEX (SORT INDEX T)) (* ; "Sort groups") - (if FILE - then (CL:WITH-OPEN-FILE (STREAM [PACKFILENAME 'DIRECTORY (CAR (MKLIST UNICODEDIRECTORIES - )) - 'BODY - (CL:IF (EQ FILE T) - 'INVERTED-UNICODE-MAPPINGS.TXT - (PACKFILENAME 'BODY FILE 'EXTENSION - 'TXT))] - :DIRECTION :OUTPUT :IF-EXISTS :NEW-VERSION) - - (* ;; "We can FFILEPOS for %"[nnn %" then READ. Or just READFILE") - - (for I in INDEX do (PRINTOUT STREAM "[" (CAR I) - " " - (CADR I) - "]" T)) - (FULLNAME STREAM)) - else INDEX]) - (ALL-UNICODE-MAPPINGS - [LAMBDA (FILE) (* ; "Edited 27-Mar-2024 14:48 by rmk") + [LAMBDA (INVERTED FILE) (* ; "Edited 31-Jan-2025 17:46 by rmk") + (* ; "Edited 26-Jan-2025 13:40 by rmk") + (* ; "Edited 22-Jan-2025 14:07 by rmk") + (* ; "Edited 19-Jan-2025 12:20 by rmk") + (* ; "Edited 17-Jan-2025 22:32 by rmk") + (* ; "Edited 15-Jan-2025 09:49 by rmk") + (* ; "Edited 27-Mar-2024 14:48 by rmk") (* ; "Edited 5-Feb-2024 13:14 by rmk") (* ; "Edited 3-Feb-2024 09:16 by rmk") - (* ;; "Reads all the XCCS-to-UNICODE mapping files that we know about, and iproduces a 2-level index that maps each XCCS code to the corresponding UNICODE.") + (* ;; "Reads all the XCCS-to-UNICODE mapping files that we know about, and produces a 2-level index that maps between XCCS codes and UNICODE codes, depending on INVERTED.") - (* ;; "The first index level groups all the XCCS codes in the same character set. The index is sorted by the high-order bytes, the pairs within each group are sorted by their XCCS code. ") + (* ;; "The first index level segments all the domain codes according to their character sets. The segments are sorted by character set, the pairs within each segment are sorted by their domain codes. ") - (* ;; "GIven a XCCS code, the lookup for the corresonding Unicode is") + (* ;; + "E.g. if INVERTED=NIL and given a XCCS code, the lookup for the corresponding Unicode(s) is") - (* ;; " (CADR (ASSOC XCCSCODE (CADR (ASSOC (LRSH XCCSCODE 8) INDEX)))).") + (* ;; " (CADR (ASSOC XCCSCODE (\CHARSET XCCSCODE) INDEX)))).") - (* ;; "If FILE is given, the resulting is written to that file. If FILE is T, the file is UNICODE-MAPPINGS.TXT") + (* ;; "If FILE is not NIL, the result is written to a file. If FILE is T, the file is either UNICODE-MAPPINGS.TXT or INVERTED-UNICODED-MAPPINGS.TXT, depending on INVERTED.") (LET (INDEX) - [for M in (READ-UNICODE-MAPPING (for N in XCCS-CHARSETS collect (CAR N)) - T) - do (push [CDR (OR (ASSOC (LRSH (CAR M) - 8) - INDEX) - (CAR (push INDEX (CONS (LRSH (CAR M) - 8] - (LIST (CAR M) - (CADR M] + (for PAIR DOMAIN RANGE CHARSET in (READ-UNICODE-MAPPING 'ALL) eachtime (SETQ DOMAIN + (CAR PAIR)) + (SETQ RANGE (CADR PAIR)) + + (* ;; + "(LISTP RANGE) is a combiner, ignored for now.") + unless (LISTP RANGE) do (CL:WHEN INVERTED (SWAP DOMAIN RANGE)) - (* ;; "Push the sublists down an extra CONS, so that a subsequent READ will get them all.") + (* ;; + "One segment for each high-byte character set. This aligns with UNICODE-EXTEND.TRANSLATION?") - [for I in INDEX do (change (CDR I) - (CONS (SORT DATUM (FUNCTION (LAMBDA (M1 M2) - (OR (ILESSP (CAR M1) - (CAR M2)) - (AND (EQ (CAR M1) - (CAR M2)) - (ILESSP (CADR M1) - (CADR M2] - (SETQ INDEX (SORT INDEX T)) (* ; "Sort groups") + [SETQ CHARSET (OR (ASSOC (\CHARSET DOMAIN) + INDEX) + (CAR (push INDEX (CONS (\CHARSET DOMAIN] + + (* ;; "For alternative mappings (in the U-to-X direction) we end up with (D R1 R2 ...). (CADR is the first (and almost always) the only one.") + + (pushnew [CDR (OR (ASSOC DOMAIN (CDR CHARSET)) + (CAR (push (CDR CHARSET) + (CONS DOMAIN] + RANGE)) + + (* ;; "Push the charset mappings down an extra CONS, so that a subsequent READ will get them all after a FILEPOS search for super-paren [") + + [for CS in INDEX do (for M in (CDR CS) when (CDDR M) do + (* ;; + "Sort the range alternatives, if any") + + (change (CDR M) + (SORT DATUM))) + + (* ;; "Sort by domain codes and push down a level") + + (change (CDR CS) + (CONS (SORT DATUM T] + (SETQ INDEX (SORT INDEX T)) (* ; "Sort character sets") (if FILE - then (CL:WITH-OPEN-FILE (STREAM [PACKFILENAME 'DIRECTORY (CAR (MKLIST UNICODEDIRECTORIES - )) - 'BODY - (CL:IF (EQ FILE T) - 'UNICODE-MAPPINGS.TXT - (PACKFILENAME 'BODY FILE 'EXTENSION - 'TXT))] - :DIRECTION :OUTPUT :IF-EXISTS :NEW-VERSION) + then (SETQ FILE (PACKFILENAME 'BODY (if (NEQ FILE T) + then FILE + elseif INVERTED + then 'INVERTED-UNICODE-MAPPINGS + else 'UNICODE-MAPPINGS) + 'DIRECTORY + (CAR (MKLIST UNICODEDIRECTORIES)) + 'EXTENSION + 'TXT)) + (CL:WITH-OPEN-FILE (STREAM FILE :DIRECTION :OUTPUT :IF-EXISTS :NEW-VERSION) - (* ;; "We can FFILEPOS for %"[nnn %" then READ. Or just READFILE") + (* ;; + "We can FILEPOS for %"[nnn %" then READ for each segment. Or just READFILE to get them all.") (for I in INDEX do (PRINTOUT STREAM "[" (CAR I) " " (CADR I) - "]" T)) + "]" T T)) + (PRINTOUT STREAM "STOP" T) (FULLNAME STREAM)) else INDEX]) ) @@ -1388,35 +1336,49 @@ (RPAQ? *UNICODETOXCCS* ) -(RPAQ? *INVERTED-UNICODE-MAPPINGS* ) +(RPAQ? *XCCS-LOADED-CHARSETS* ) + +(RPAQ? *UNICODE-LOADED-CHARSETS* ) (DECLARE%: DOEVAL@COMPILE DONTCOPY -(GLOBALVARS *XCCSTOUNICODE* *UNICODETOXCCS*) -) -(DECLARE%: DONTEVAL@LOAD DOCOPY - -(MAKE-UNICODE-TRANSLATION-TABLES 'DEFAULT) +(GLOBALVARS *XCCSTOUNICODE* *UNICODETOXCCS* *NEXT-PRIVATE-UNICODE* *NEXT-PRIVATE-XCCSCODE* + *XCCS-LOADED-CHARSETS* *UNICODE-LOADED-CHARSETS*) ) (DECLARE%: EVAL@COMPILE DONTCOPY (DECLARE%: EVAL@COMPILE -(RPAQQ TRANSLATION-SEGMENT-SIZE 128) +(RPAQ FIRST-PRIVATE-UNICODE (HEXNUM? "E000")) -(RPAQQ MAX-ALIST-LENGTH 10) +(RPAQ LAST-PRIVATE-UNICODE (HEXNUM? "F8FF")) -(RPAQ N-TRANSLATION-SEGMENTS (IQUOTIENT 65536 TRANSLATION-SEGMENT-SIZE)) +(RPAQ FIRST-PRIVATE-XCCSCODE (CHARCODE "200,0")) -(RPAQ TRANSLATION-SHIFT (INTEGERLENGTH (SUB1 TRANSLATION-SEGMENT-SIZE))) - -(RPAQ TRANSLATION-MASK (SUB1 TRANSLATION-SEGMENT-SIZE)) +(RPAQ LAST-PRIVATE-XCCSCODE (CHARCODE "230,377")) -(CONSTANTS (TRANSLATION-SEGMENT-SIZE 128) - (MAX-ALIST-LENGTH 10) - (N-TRANSLATION-SEGMENTS (IQUOTIENT 65536 TRANSLATION-SEGMENT-SIZE)) - (TRANSLATION-SHIFT (INTEGERLENGTH (SUB1 TRANSLATION-SEGMENT-SIZE))) - (TRANSLATION-MASK (SUB1 TRANSLATION-SEGMENT-SIZE))) +(CONSTANTS (FIRST-PRIVATE-UNICODE (HEXNUM? "E000")) + (LAST-PRIVATE-UNICODE (HEXNUM? "F8FF")) + (FIRST-PRIVATE-XCCSCODE (CHARCODE "200,0")) + (LAST-PRIVATE-XCCSCODE (CHARCODE "230,377"))) ) + +(DECLARE%: EVAL@COMPILE + +(PUTPROPS TRUECODEP MACRO (OPENLAMBDA (RANGE TABLE) + + (* ;; "Return NIL if RANGE is a fake range in TABLE, otherwise RANGE.") + + (CL:UNLESS (CL:IF (EQ TABLE *XCCSTOUNICODE*) + (AND (IGEQ RANGE FIRST-PRIVATE-UNICODE) + (ILEQ RANGE LAST-PRIVATE-UNICODE)) + (AND (IGEQ RANGE FIRST-PRIVATE-XCCSCODE) + (ILEQ RANGE LAST-PRIVATE-XCCSCODE))) + RANGE))) +) +) +(DECLARE%: DONTEVAL@LOAD DOCOPY + +(MAKE-UNICODE-TRANSLATION-TABLES 'ALL) ) @@ -1806,25 +1768,11 @@ "," (OCTALSTRING (LOGAND CODE 255]) ) -(DEFINEQ -(UNHEXSTRING - [LAMBDA (HSTRING) (* ; "Edited 10-Mar-2024 12:56 by rmk") - (* ;; "Converts a hexstring to its number.") - (for I B (N _ 0) from 1 while (SETQ B (NTHCHARCODE HSTRING I)) - do [SETQ N (IPLUS (LLSH N 4) - (if (AND (IGEQ B (CHARCODE 0)) - (ILEQ B (CHARCODE 9))) - then (IDIFFERENCE B (CHARCODE 0)) - elseif (AND (IGEQ (SETQ B (UCASECODE B)) - (CHARCODE A)) - (ILEQ B (CHARCODE F))) - then (IPLUS 10 (IDIFFERENCE B (CHARCODE A))) - else (ERROR "INVALID HEX CHARACTER" (NTHCHARCODE HSTRING I] - finally (RETURN N]) -) +(* ; "debugging") + (DEFINEQ (SHOWCHARS @@ -1857,6 +1805,18 @@ (CHARACTER C) T]) ) +(DECLARE%: DOEVAL@LOAD DONTCOPY +(DECLARE%: EVAL@COMPILE + +(PUTPROPS HEXCHAR MACRO ((CODE) + (HEXSTRING CODE))) + +(PUTPROPS OCTALCHAR MACRO [(CODE) + (CONCAT (OCTALSTRING (\CHARSET CODE)) + "," + (OCTALSTRING (LOGAND CODE 255]) +) +) (DECLARE%: EVAL@COMPILE DONTCOPY (FILESLOAD (FROM LOADUPS) @@ -1865,23 +1825,23 @@ (PUTPROPS UNICODE FILETYPE :TCOMPL) (DECLARE%: DONTCOPY - (FILEMAP (NIL (4068 18214 (UTF8.OUTCHARFN 4078 . 6876) (UTF8.INCCODEFN 6878 . 12490) (UTF8.PEEKCCODEFN - 12492 . 17232) (\UTF8.BACKCCODEFN 17234 . 18212)) (18215 22469 (UTF16BE.OUTCHARFN 18225 . 19135) ( -UTF16BE.INCCODEFN 19137 . 20153) (UTF16BE.PEEKCCODEFN 20155 . 21386) (\UTF16BE.BACKCCODEFN 21388 . -22467)) (22470 26757 (UTF16LE.OUTCHARFN 22480 . 23487) (UTF16LE.INCCODEFN 23489 . 24505) ( -UTF16LE.PEEKCCODEFN 24507 . 25674) (\UTF16LE.BACKCCODEFN 25676 . 26755)) (26758 29687 (READBOM 26768 - . 28772) (WRITEBOM 28774 . 29685)) (29717 32907 (MAKE-UNICODE-FORMATS 29727 . 32905)) (33004 41527 ( -UNICODE.UNMAPPED 33014 . 35088) (UNICODE-EXTEND-TRANSLATION? 35090 . 37150) (UTF8.BINCODE 37152 . -39731) (\UTF8.FETCHCODE 39733 . 41525)) (41528 47049 (UTF8.VALIDATE 41538 . 44135) ( -UTF8-SIZE-FROM-BYTE1 44137 . 44569) (NUTF8-BYTE1-BYTES 44571 . 45308) (NUTF8-CODE-BYTES 45310 . 46367) - (NUTF8-STRING-BYTES 46369 . 47047)) (48480 48829 (XTOUCODE 48490 . 48658) (UTOXCODE 48660 . 48827)) ( -49772 55818 (READ-UNICODE-MAPPING-FILENAMES 49782 . 52729) (READ-UNICODE-MAPPING 52731 . 55816)) ( -55885 69215 (MAKE-UNICODE-TRANSLATION-TABLES 55895 . 64967) (MERGE-UNICODE-TRANSLATION-TABLES 64969 . -66103) (MERGE-UNICODE-TRANSLATION-TABLES1 66105 . 69213)) (69216 76324 (INVERT-ALL-UNICODE-MAPPINGS -69226 . 72847) (ALL-UNICODE-MAPPINGS 72849 . 76322)) (77292 89723 (WRITE-UNICODE-MAPPING 77302 . 81052 -) (WRITE-UNICODE-INCLUDED 81054 . 85776) (WRITE-UNICODE-MAPPING-HEADER 85778 . 87026) ( -WRITE-UNICODE-MAPPING-FILENAME 87028 . 88558) (HEXSTRING 88560 . 89721)) (89724 90400 ( -XCCS-UTF8-AFTER-OPEN 89734 . 90398)) (92925 98427 (UTF8HEXSTRING 92935 . 95140) (XTOUSTRING 95142 . -98062) (XCCSSTRING 98064 . 98425)) (98428 99316 (UNHEXSTRING 98438 . 99314)) (99317 100827 (SHOWCHARS -99327 . 100825))))) + (FILEMAP (NIL (4369 19181 (UTF8.OUTCHARFN 4379 . 7286) (UTF8.SLUG.OUTCHARFN 7288 . 7843) ( +UTF8.INCCODEFN 7845 . 13457) (UTF8.PEEKCCODEFN 13459 . 18199) (\UTF8.BACKCCODEFN 18201 . 19179)) ( +19182 23436 (UTF16BE.OUTCHARFN 19192 . 20102) (UTF16BE.INCCODEFN 20104 . 21120) (UTF16BE.PEEKCCODEFN +21122 . 22353) (\UTF16BE.BACKCCODEFN 22355 . 23434)) (23437 27724 (UTF16LE.OUTCHARFN 23447 . 24454) ( +UTF16LE.INCCODEFN 24456 . 25472) (UTF16LE.PEEKCCODEFN 25474 . 26641) (\UTF16LE.BACKCCODEFN 26643 . +27722)) (27725 30772 (READBOM 27735 . 29804) (WRITEBOM 29806 . 30770)) (30802 34367 ( +MAKE-UNICODE-FORMATS 30812 . 34365)) (34464 38849 (UTF8.BINCODE 34474 . 37053) (\UTF8.FETCHCODE 37055 + . 38847)) (38850 44371 (UTF8.VALIDATE 38860 . 41457) (UTF8-SIZE-FROM-BYTE1 41459 . 41891) ( +NUTF8-BYTE1-BYTES 41893 . 42630) (NUTF8-CODE-BYTES 42632 . 43689) (NUTF8-STRING-BYTES 43691 . 44369)) +(46099 48246 (XTOUCODE 46109 . 46281) (UTOXCODE 46283 . 46564) (XTOUCODE? 46566 . 47387) (UTOXCODE? +47389 . 48244)) (49483 56904 (READ-UNICODE-MAPPING-FILENAMES 49493 . 53181) (READ-UNICODE-MAPPING +53183 . 56902)) (56971 68830 (MAKE-UNICODE-TRANSLATION-TABLES 56981 . 60073) ( +MERGE-UNICODE-TRANSLATION-TABLES 60075 . 62788) (UNICODE.UNMAPPED 62790 . 66005) ( +UNICODE-EXTEND-TRANSLATION? 66007 . 68828)) (68831 74210 (ALL-UNICODE-MAPPINGS 68841 . 74208)) (75801 +88232 (WRITE-UNICODE-MAPPING 75811 . 79561) (WRITE-UNICODE-INCLUDED 79563 . 84285) ( +WRITE-UNICODE-MAPPING-HEADER 84287 . 85535) (WRITE-UNICODE-MAPPING-FILENAME 85537 . 87067) (HEXSTRING +87069 . 88230)) (88233 88909 (XCCS-UTF8-AFTER-OPEN 88243 . 88907)) (91434 96936 (UTF8HEXSTRING 91444 + . 93649) (XTOUSTRING 93651 . 96571) (XCCSSTRING 96573 . 96934)) (96963 98473 (SHOWCHARS 96973 . 98471 +))))) STOP diff --git a/library/UNICODE.LCOM b/library/UNICODE.LCOM index 296dd873b197b7e9e461a775bdc91d583738fd8e..b081f62401872b32cfb5f0c223fd55eddf29051e 100644 GIT binary patch delta 9527 zcmb_ieQ*1X5l9Xs7+-+H@wownBr z?(@F8E9(O?nO;5P-FM&5XP@Wye6IfZ6Xxc}%;hn=z14EL< zl|~#2X_pyD&9{DK~{8`AQ+^#(o)|03FWyWS>qWR^&%$r}mQTTP{MwESH z{-Sd4rI13WSFU}yu-2s9Tiz8?{+a&GfB!>mB&5tA`e^>5{&eluBH0jT3L)hVnK}P~ zK2!`Tr+)F{+Q|(|pEH&Y!BSW|f46$Xsh>A2{M4P6RwQjJjl!iBx5>pzMG-|Z88w4= ze-J%B2<$?<&u6p*$!PJM-r|DBC?{3>{Gu@`1dJ9h=&hj91+zv|N?09=^pB>JaykvN z@ArApNF*1Pvr)w9yF}CkAjBu3zVUE4n^W~geMt1-jV&%SACR!r(o^&bi1!8qoR5d4 zKIHfL1ivo!Y+`hzQT7VrQAQp^3c&yn0Szz8<^Y6AH!^Jn0hvRCSLEN2*AZcr*DS1j zGE0GA1x9hG0ZyEEFB%8h$WDjcmXwmPxb(x7RG9P{n8ROgIfh^8w3(UjwU+*)<;zU+ z50h({@;doybb3S5`V=`$hhLIDRN?p5wf9Xf5g6ITHqje2*oNO}O>}Vq)Td9%sf265 zEU$?DZ9i!fwU@=;ckjbr+tj^<`JFZJrnYoTNT#Qb=ku>FF<&fweeEQ3C{as|_E4n; z*dPS?W?F=RWYALbS=KXK1j*k-jSvv?pO9pHKL(Qxc4QdIgCRGaK;YDf=Thy{QY&Z+bK~X(m$j zvQ?=r9||G;VIo8!tt{`2-d5f{I#}BEa2j7=@H*@Ey&!1KF8F=TEc9^(3$bkN+w2QA zi5u)NNSl>-{GwID)-J9HjBa3Mjs;d~lwD6~cd)lwV_A@&KXre)rJ_CFj=noFSlat= zs$XKP^7Xb;E7L6n;3t@}MzOC)VAXU(gMZ&Pu-Pv(5$*Hh1=m(=?&$iZ5h~9czzoSc zSapO_6C3m*djCxL91m}*vw{?#{n+1WCm6-Y+B=Ge*AP^KN>fxms-e%_04O>1c2fZMx-|0{;0 zw>1?1vKVf!As+z>Ua!4Rh8#ITx4$`C7ns-?6zj;Ddc zu#y_`7=z%HlX5)bSzUxr?f7C!0xL&ivj!*42YhG%20W67jKJOk`a+9&bul?HH<6C) z`T`u~)#swpPj>t#i!Zr4@wZ&P>{nU*7q0oEpAH<8ixa%fh~_wd719Krfw@3J2jVt}p9ind159N?l%15D<#z3%YWE2n>uozV?Od&C=b8f^A6~l9iSK%> zMRMS1N{gQUKM!npL0{u5gUfMS#&bnI(9xpkhPO{J4}NiW+$;T%81N=u1t3(cW0nZ!yTNaN3jZv{L#veQ zuMAvFe+I(6wS-gZ4dQ=VL;R04;@^EB@j&}XJ@L(wChyZA3-UemcLSPlm%G6bAxIxo1a@UeNFA^DDRAqIlG?tW|F15mpjOWO{#-V_(bjQ zpqlyS(#lcu>uUR?wNm)1+P;&4hT6_fwpZwZs&$ou6m4GJVSiL3V=WFKPso7@(p>?u{Qv2%C{l8Va^=0SLU;{bk#J7UOr+#w1h55BqA?2LfWAhA955}1 z;{Wr;~|gKPvtpCK{c`wCrPA2 zG3hbR%|ccv!7iyG(gP0-`@j)YSs(>eQX>eG7NP;fqNkA`9ufnYNK!o#@eGZo6ADlj z$wJUW&cM$~8;c?zgdlEc0*PD@9f@SJikgD9h;rk=EXdD@<4Vz90TL3*3BfOEYXpe= zj5UH!plbx6wq}jMYiszx>h^p<+aBC*v1Tq05%|GAaEgMa$%u!oC~dsRtqKiH#_CqY zy%9<|pnmmKjuW(nevYKZz?c5W;ZZOsK0g`&2IP<*K^OpDVG^*xBj;m%{jkdcClaLq zJdr!6-C=*H6T*}ravtm>4FNfc#r8lOJUbZ!plDrv2c3wAumCzaf!y2{?3EmNF1>Rt zh?vMEiw_G)h`_XjotP~FWh3c9C6|*2LA=l*qBKyPBTXNYy__Vv5tG}{Vq$EkTd8u4 zZ7t;UvzkFE+4ZhfZKzZyQBy-TL@V9s3wKjfwA}T8d6#J4TVvhJI4lZ}z(I76W3Mim zULJGA=Vqx&KDqqkc#7^jzw2)4A8qONV-EZs(YxWG+EH*!TEPxM$NSxp`}n5RZv}j~26EAiIyw?XiD5VXS4r6IM@ou-GNN&m_zZTZ*}Sa<=17v`F{3sUpLRf4i`S*dcmZ zxTxPh(C*qq%KMt8jcqN2k}1loBwd%H*Qdxn^a7#&qLq0w5&icPj+w7tjxltP>tBhn zOkVR1%rOC7CkFc5$@snR#GgAEe~-?2t+2E*1&$~&_s-HvhMHd5U$Ps-RTgyWDurrt z&hz+Q$oUB30LhTFrG~)uLZrHCD>xxo^8{9flqfkQ35Zm+ptFq5xmh z?GG5;ydO^`w-t%cPk}mm0Le$d;&}7`0F)anO8t-#fA#@s5USQKs~LMhX@~>OZHk_} zP(ms;#2cKKYnlj_q5iBYiC})}M^>qTOn@HfNvQHrWT+ZzkTO*@%JC$u4Wn-StMcxm zQMwh8ni$GxIE1Sf|8OV?*c9raOGl_RNI<3qE^QOWwH)H0~k+`{( zi9v-Z8HYdprvb3nY6i?Xh$k6I4uT03jARBZBfmOR@ar?dKx;F>S{fx-F<666QKabX0o3x| zntqtCfteK0L@e2NZ3f@d1e5?xUjE71U8L1rQhLwGsgVeXt||wR35Zcm<%S_oga@uF zHZQgMAg7aNC~RqU`Mb8AMt$ogqqJ2{43=V4hLAqIck=GD8$0+@$~ zjDY{IA6ZoqKPbXObXaKIZ+xAed-egT*3FEFckS-QKicy-e0fg`etoynqs8*z3VGrQ z1^Cco;!I`}L%DMpuivwen61Z?M-mD+b?~`R1|Ncyn?zlav9uW0zwqpy zAy$y6XSuy+&mi3xq$wIsndTWJ)*{MjXdRKeu60!MdYVCnjn!V)@#k9^6C1i?h}db4 zDlbquU7}Y_Q_d*giqc1iCdseYJt;3xYkrAdXOB=dIhMcvsFqb&@ZSeq_M_J=N{ntc zOT|o-g2=fC!XgNA69eIdnuRbCUff^{2U#1DHsR`i4h8*4O-E9Q#;|%+33UXbL;wd4 zz7Y(_gH#E<-~_aFts9^qHK3$GQBgp|E6Fa_FX5DHN0G>zNWa-6LKXNSBjRddy?I%k zC2cUIRsF0klk{wlc#j$j+_fR(wH#D`;pvz5bC z_c|dvysm+b@?7uMZ*)pwx>b)_2%oDw2i}oSiZoshN(jnL;0qCOkpy=f0^QQO@SjHx zt&@Dn315iPPRzvj^^aznd#NURJ=^WduBGxAmtEiVQ%)#4!ls!R4b z`2L_dHpqvx2H;g|Kw&w#tvw(ZYR5y80vkGtBKZh0?CWzhK}_ZRz3cF4A=&yKOCF!IzIY;aWP@57#TB zUHH41*3$iBZ<*oo)}(Kn#f2o2QaOjJ@QTV%asc z2k)HrGnUf$^j&s?HU-`rR+pp``_20s>af~}^VN@-v{svrMB@5aP5j)+gFsB>q#OSm zZM}jv;{$V>aC}a{Z*Sj>ug-19-<=CsEqaO8?RF#Aa4L(P^B;LuKbk@j7(P`(YIlf? z_n`v^NaBZFVKtd0&z1wIgidW|Y`R}v{b)>VYn!$w!sKTF&oA))2ij7YWHE|pRnuo! zK5{JnXrXVa!SPo8xaDCE`61@{p(o*2<{iMYFD=@OXJ*h8{QE_(7o9#cdjWnYfkqc* zrx)`xXUXp=Admtad&o1IE=-*{J9F-AKN?;Z!nB0^sq==V;bA(@QokdS&`~0AkTJ-Bww%1z zyYB(J8gWtGRKdGX2n1kjpdl%TGZ3a&a+y*1J_|J)H{P_^d-a*UJ4k(k{OOpo)PV{x zHT^4p%}g601I*=Tj-NkqVs_yK0UyBBg0fL{jD{)DF%$_WN(V5Btk5b-q~&(&LVx<~W2HYjJIYLE2(COMYA&jTk<-VK<19%v6Y+#6GD-qU zOwh}d-qBP%rH+BKbNc)>&!DkBczfiEDOu$7dp(?=4$B#NPzht}`EJ12zVor|@QWIe zj*N_ElB4mcLOSS5Fe&G^s$emo=tOp+CXOUzN-pdff+T5B1|-0!NJeWrff&IDKixsD z>_l=&hhSnVIlfN9~#Plf!H zr1}B6K*T>if5>L3j_60=0~d~1EKwyRkH{$*;g>H&eGj$*7FQ-MA3pT3I0ygFg?(m# LO{xEx&$s9?U;T8IY~c+M#hCZC7+l)D*+o zA=ZB1xujlpyI^f)_}=fH$9G=8?|JzA8_H+jR{nIfUJrH}51s4Oy}F-i!Ja_4#}|C` zeCLsnznwWen`*UikM8rZOH-3`7cR~ert)m+y%#TCzPpEsf9lgw{a{KOTszAeMBr`^#uXeOfl>8O#74zSIm$~lP-D+jiRFh{SqJFJT(MGh$<>K?Q%L*Gr#!7y#0yi zrvE^B&QzZ*T#YY(A`*8{b$R}^**k3U<;6phxW%^cwb!hgNW5_W>xHY8ulX;}6{!Q7 z`XcdHscqrq)s`22{)6?6H+B8sT&3p#dZKIL&9#m{-PG}ya4Oo$3oBuZmo>$Om3CX3 z&l_gZbegA`-tA+j`>>Zx4|P|mpkAf?fhwh~)_SXDS~y&()q+};@`tLFjlo|r&t&;X%IxO@(GKSKi%-?G*?nGJ{QIc`ny!bK?(Gg~0UZ$T zX2C$f7pxq*yQLM*G1w8%=k#Gv)8Lm43G65oNq|Uvi7G z7#B=burM0!^-OxIsaW-o2FN>reSgx>Hvf4c7m|> z6X*zGH#ecf7Z#u1-B;tM&3t+HG2yCr70+!^u%QuqgpK29(-C$Ouc3l^D#C6=*!8i# zl6Bm%wSBEg(7wj7n+e+21X0-6*t&gneaU!iXwng@m2wl23&37OMOB0*?6oG{JrG?5 z0Zkf^-VA|TiqMXA`^5|O_lVuK4K+Sm&YgRXPQ0gvu;Eo#v9Wj}!YUu*k?l78nTtnb zH_YoN`$|1>wiWF|Ahr+Ds^VD75Upy6sB24g7OE@|i_h2YKcH!W${M=0)s3(2yZCW! z$Ki0`mPJ4bY+4sBLh}ZM;SAU4w1VGu`ij2Il4m<_$MINme|pk-JDrL}EXIwGT(~*8 zwQPOfh+QA=D;<^VGS^N^jnleJtR_zDiW7BhX|E~US?j7bp5At0WpeA@X`E4a;MP_7 zT|Ilk-J6%d{M}-1?;i1S*S_0}L+t-=IyQ=f=&Wn(CG1OGsA+zSScm-T0sIo+cZp}~ z>Mcz|;D3FKvuK;yO6Z3=k?7A_=ub-Ye;ERhxGnxIy+WwB^vW;9|ISy${Xo1&@P>w88TGLn7Vdvz zsAqeB0qRS0d;de(F_MZ!$1>TpnL1XQ+}FEhtL?vSHk%@v4_+1gU>g5hwnCnCe?WYI z)r*7d{iPqW|5C*p_wFmEqGMUIzVNm*W~NR=(;1?@Y>=l%jr0IB;fomwxP1fT7y+)I ziH6ywoYXR{iKDe^Fv&h&k*q5+piBQaG~ zmP>mhv7!p##+` zBiWddi9x6NR%tj!7K{SH3ZVz3auNUBySJV}ksgc6UhR&QnSeqJi>G>9?_~im^Hc>| zE{~ZR7B#c6Xqu5p>l{gqWUwc2zADC|b2x3DfQu~tq4#_f?du@zpH~Znu`{~OZd2qc zj=w5Kj~>3)?`5ZxClY2R%X|Swn+(^D4N5@s3_K+an(8;Q)oE!O^WdMKc}`&fkNDov zeQF>qzTU5k!}s59j^X6|)@Y!7448--Nsu}tmLN1nW*I;M?X}LE-1*ICD#ZP5#V4VX zRLhrs6DJw_=sBRu_ON`;oRFJ;GXJ#wgA2B+d08kAdd8{N$S-N;B6mBY^@x>(7{Yle;HH zOU{!(lo^ipL*Su=Dr^VLbhJOqp*J&Bjvk~mDAdhFvl+xQ&+b%hO2lc@#FpwzDJFOz z(6n$}4O(OBm&~{nrNuq*S0ypp==9u?%9kcxOQW)1yxQAl+iAn!%xI%zlDB^*|LC-P zNkfn39o)6_d%WJYbSOURZaDqPggV-2sb^QU8uIM5{VUqY@@E%T-fe#}mD^pOek7&f zl~TVd2jaWe#!EidSs&Vkc*tgxbIa8(fy>JH!b;lySKO6rC{O<(cPV&tmzr~xWlyOF z;|_5=S+}1ztn0nxTUS3Pl+MQDhWcE+4t@S~- z$WCeIgEAPdxF^((`E;_8q|c#wpyr7;MeVRz>znC_kE_iPcMqCgs~xPt#&(EdD(xt& zu1;IiyT$P%2a60*K~=l4@TRFiG9{cXuYvhBMxZ2#2ci*CCVJA*BtH@D92gl+n3!=i z1Lfg^k?P|E+N6~oH;2kpvce0xLOF(Sdpo5hQN||Ll3X@g|@d^j)tW9J3a!Lg+TUT z1u&y+SZbJgj&_hPzPH1Jqx0bt?{LU^!x!-MQiZphdF1dOq{`4p(jC3xQU88%*gPZ} zv^W$+#l%XX&qRm&%&g@PNoh}Ucmt^{iM}+;S^y#+-%(>zoENuBe2z9u=kqfb`b+hd zqBU5SR~t!LmQeljYFWOu(sR**JTY5q_pogDl~H#|*md7M)w)wpEZ80$Z5%p3BW2@( z#UBi%B;UfJH%s4fMjWG!VnX-s?B=d%cdiy9kAgqf_jS3RGnX9kY4`Zp!pfC+E2Mjt zyIOO#<@s;MTyo0OlTBpaVUB%z=|A;vD~cw**6$NP^EaznKr{scYKn{FfkELtxJCSN zbh~&m@WRe8>wy{U40ooDQE@rw6;B8CVnFT}gmIV!eB{yi{6tDu$_II20gXywz(IOt z120H_w$bcjvP)JlslGsCDiD!Tf7a{PHrU^hes95rsLqRA> z=4C0(1?eIh$|r|HZVjXy3Il?aLt#LWawrT4QVvB50y0F5Fdj%b62^lihJ;Zujfz!* z#DE~flB5CQC{igc2hJ>!G#qA6Bn^jbaeu}j!Uqe57p9$#o*>H0AnQpu9&q>ZjX9_L z4C(koIug;rVwTdcT8NSJ2#K)x0K~)x(u5&U2~tiNeLzZ{12A;MY>l1dCYf2h;N)g` z!bqo$akTTZ1dL<#t1?R{Ia9bo2a5IsA)8H&9L=G-n5hQSNM$UsmP3k z?HOCSpdw;dfZarRJTN&6FXB@?`Ykuvhr@H>rURp-ESdn_rbb|^Y|Xltm=QU zs*A2h-o+22UhS?+IGt_`lJjFG66;d9ej$Yr*6nsG`+1P--2Y5nUDD}1K>d5(dH*69 z+{N$XVcr7<38!*U#zjtrB|#uh)Xe+%Q651h*ejk&?V*HK+rndffDclw@p-v7a<*Sc z>?J#0PFk--5>u^Jo*Rn2_1MDAFUIZU&)@uPT-i=uh)sTdGOpqif1jCNxcMhYpbwpY zapC6s;SH#{dIk1;cI75_EiZ=oZUyQfk!vjT+KQ`4Hoq2LBCl5;HLeiriGjPm7GUaa;;_VQPo_VoOSE+ z;wWz`Ww=MlwUl`)cqVzPHTU8x*BWviOPx5nu4>sUxzkoF-s0vb`Dt3n5&i&?*ysG&hWE5Z>=lo zY!33ST%n9+@8;9IfOU0YU0n-r=WT5v67@t+(in<%TGABq1|i3(W7!xqCESD!m40{} zl}aC+gf+|WgLz&3Dnd!=6c=at@Cg`E9oni|*)@QqJVF*7dhC`ma+NC1#A-XZz7&*O zlhY_X+th+C1vZw&qR>2>3sD9gzs4#E$7{pdU>7zP_%wJ!dx|jqA?PUtXJ*zk5}naR zG#Q0y4!~!*rO*e>y*WtoLH}+JTYS*Lo68Vm-onoxIDJbY`64!D*^Qx?~Tv5R9H>4?#*J2=fAx3?JD;ErdqWA^u!U4iL&F#nNkd}+M3Pm; z{%2*vB!msDm@61Us|-#n(gx8nw1ay~%`iSX(wt$6wUBfIV2MG|j4_>PA`8NY)?BKQ1vf2KqZvOgE(P`Yt8|+J7-e|>m zr&jguxgk=#s|w25CvBpadtCERD7m|oL_?__inXcC-AgXs1pm-Y%DzF(HO$ZRrq-Oh zJpY!|q@G+ynYSoVv%E#kwL&MhJ!9#_`QrT0!paEjhJv6g!RxP;8lbfsd40|+<3re7 z?uB}*RdclqD??brkwOvHdO!Db{jJ6JI&^O!OK@-~ceKpIO71>9)m#sZ;eAqDzBaUQ z^QQ^!zm^Lw?!o#!+@BjL^I)!zNBF=A@b*J>`sYiOFhq)Ng91TdR?&@6ji>2OFg2h< zSdk>ToeZ}-q`+l6#qq($V-;v8SQ`>;Iu0_awL{wWVM|rW6EdAkPI|xjv;$&gQ0lYR zMj1z$bzINOEq>B_Q1tfo>G()A3H zP+6gs%q#|_OBj%jA|(__DhR=p4nf>giQq`HQ^tcLA+IP7>?zuow0K=?}zPpT`24?tQc; zFU98fQd`^ts}jaiZM9O>k75?HvB=`@P?csIt*>N86xlQ~^XA7SF~8UPmQVSRanepo zR8-@Zp*v*7+6h#$A-qaHG81(Dj7)~KMGhIpfTrzZbND^6&>xQ6hB4lbmQM+ z4t7SzkbXw487YDu{C0qHml`7L9xKk4EfM}wpp7Kr(zb&$zc3C zVE)EChIy+6bBXdD$u*EGfcZL*f9IWp+%|gTR{(kcI|jM*FRLIAt^>Iyh6ncp+@UJC zVQ<%Gy0jp+c|!q|tOaiDJ%M};+&f{GSHUg#pc{A88dUTqOb0h&8uhD~M%8*u!|F;* z<3?yTgGJf#3Z{M49PXy{J)9P+#hozn>rk#^t^0uPI}`5G-N(wR8VZ!YZ~k1d6so9C z9HNr$#J)R}T_Th9;Q5|x7oN4p_K1r^+e%;0e#MT@;#gqcE?ht%`_$`U@FqQnap424 z-RWuHCB8NG(>r#-SnO)U$Bmi#(!Y(X>K0idUON?3c9r&@ep4-4O(>Ix-tD{AT@Y54 zIb>J*AC%#S-j+H$W@Q5*R+G5GQ_jQa;{Hx5(UXsxwdTwCD|q9}pN|*1m!FImyf4j% z3KQ|=r{ej-`|@^OO;T(U0E-!IWiUd{o*Q!&|yL z*K8N3&bYTZ*lD_B5&w9mQHV1KMd?ht;t=0G(<|JQEufB0`h=^wUOY0nA7778hQ!gr z_QrPBHkis(0kC>4&cqA7P2z`>uZtV`2K&9tQTlm)zXI=@-KkZ&tZBM>l_9x3STEjn zRu}ge`?hw#6KHdYZ=CHA-=1z2Ukx^k12fwk_~DZ=lE}8Vw~OI(Zc)rTiXH4;F!yAq zr!G&i$+_7pm*xuW%JdW~OuTRIp(||eto*uiX=3(rVdBcn+$_5?ai%bJxr?#P%PSH^0~jPz~1&dx;p)~1M4 z!I`b+k~A_y>~_62bRvLkr!ySAG;?K2tmNIL$(g4VTqqH$R&VRUoJ!W9TZMr5$a%lw zDERGz`2Otu&dn7L5u7{Wh?!~Qn2|Do;*ZV6 j2*EWSaDyq(BiuSyaqDPH4;#miGchsLP&#vQeB1v5h=~k? diff --git a/library/UNICODE.TEDIT b/library/UNICODE.TEDIT index 6168fe276f5f1f9e5d55d822774fc7dad42d2dfd..24b030a6d547ac811ccb2731074323d6d9ac268a 100644 GIT binary patch delta 4736 zcmai0TTfh76rN#v;nIp0ur0mq3T-I?+M>1;tS|yY(P6+cQ)?RA4sd`8bY?o|Oe?KA zsYze7HpXL&k0$;Bl^4^*#9QKnQDfpmA52X2*~_bmG45~8tdE%vjgWJ)&$qtyt;=5f zocZ$I;H~$AZzr}qn@s1ExfRoUwD-x6FMb}}IPF|@(tcY_zki`-(f_9AjoSW1Jkova zc+~%)rq{n68ZXof($_s5O^nrr&DCTsl}vjkGjECpb8Ru>I%YBvA3hyE6N#B@E;E;( zbzI{uIwp}$&1U8t<1XhHjI1J&$cUNGxF(r%OgiJ4IcGj~ zEbYvhR9eK~(d}irRn$w)EIV$G!C!scM)MYPjpc9o|zss6XB_;@z|LE)!t+NrClxl%k5hSjF&Mpj=AjQ79^Z~jEA3zj+$h8&XgPl z=(4${W*3oKGQaFuKR>$Xz=fRSW}R6tb=6s3aeB=7ygHebYvNtVzF0!3M!-qe&0n_B zY^>Q#+D*CA)DA5U#U3`=IUojp<0lC(jkpL~OhS)}4>!(>9c?;HCO#YRNI@;*$v&rFZILX7ZlNIbJ@OHnH)j z3;_4rTTFV~bj@c=aT?4#5?Xz$=a)MFi@qlR)xI5d&4Eo5Ho%#f|3Tl4V94KnvfbXp zW%oBvE?Fcy)I1yr)Y$((AZV{Z2sis!XYU)Z1h5$kYYrx`03s$h4B0CX@?-rw3+f8n zpuo6Uty+8Ege4qiuuv*2h$NPqSd?2d>{@0Jkf$5>a~lGMlIbBeF{J1~n4yevSjV&? z+Ko@j0pwW^00~^cqCC&70D_R3@WZEDI>0VscJaKu$VQv5E70J7G|=jQIqmRM{k`$y> zBLam*2NpKG&pxc_z?&7KH=@^xMTHkRnxKC>TsOwH52ypo2;K$&;x_G~lq@R3DYw)A z_f%`4D#CH~Wa`}9^|hdIP@Q&SfpD)WqT!CH1=eCWZPI5eyjd8tVn;c`oQFfI!?Yez zWP^X_>DI!OKC+VM6yYdJ@6qxtFd;QO$W8LZ?V=*o*{6t*!8xi3laWzjohWEtIW1Vg zi_uy5E#)v4TTzYB>qYe z&h+_BeYTajg|!J+88sEVT`sdjy&Fs3YiSaZStxfJQ726z$n-2?>qo}yx+cK^Z-YDb zOCms6jx7*2UA79yd3&*+>>2{d-Ck zL?EzN5os;Bj9GoO4=cGU+NKBHFeY1$cI<(xat#nB-GhV8!T_#E_$mC!T4J zHaYbAOudi?dSxxe7xI2sC<6z60=Rv43W}eLS3uTQR>(XC3uMZi*7?N1dJ|L?0q{5g z_*ONPyISZ42Ts+Sr&|>i&2nMRJhxTKMkt;2K$8mV4pD{Pc8BaM54hOiPA`7=`K_Xx zG#?&BK9pGg&2V$UU91cdmr8?wrd&ptS zbi;zw5fdV_E}i)jV%xU%#-cQ5fu9AHUK7%OOG&tqGGrg58O)hOrcT_O6Rbks^c6bn zJbQRom23SwBdyyR8nP(0*yDG`)_(Y-?uvr~p97E)uJr>$<_Q+|OJ3DLq$l7`*{gDT z*k#EB9C-obX*W+dfAUbM13$<&f_2DW8rs=I1E-Na`N@(bCOHS;EJMSuRIzAFo# z;9nfvQ;0!8R7!=)t4saXbAyBBBJXxA;mo;ZHJU~zC>3%IiU2KC#Fk-#9?x7gG6eVX zgm(de7OwwE_;saddQp}ulDOU1C3uv|?Z6V0;y#Ronl`LFHv9Q{7{&-J_i0URJ& AlK=n! delta 4235 zcmZ`*U2KzO6zVyuvi z3*&|Q8sn8-c;TIa?2g1}yfQH+CVIhz7bae7G{%^CGvDX5=b_&^oAk^3KIc5=Jm)4G%;LN$zfo9QZT@XXaC~B|*cS1&kEv+h{7CoSIAy zn+2KH0+@2a%*>nFLcUZkuFRI_=emu_44q9Uk*Dkg)*LIQoSnHgZ({Kmhli&!6Vv0V z;faxC9KqwYNoE4J_%TTV?p$Fho6AEHX4$#9Tsc?BcbU}q#pH#l@7wQr z_KUtKpFj3pt*AZdyb^G&2V|^3$Qhf};~@;1ocFXMVsc1eP!XgB`X~SleBqGrbfBWJ z^WCOn_-%E6BnZr4pc;HG5F{=)|JOxjK1eN*5ch2%4dUr?L(Lrw>obJ`!1oml>MgAR zU>UQ&q}pSUMd*3h)ERMaD9~aP0};F4-w|fB+CCVFxH_r=#%y+=-DU=MRuEdzE~T@W zWkuBG5dh${sAUUG5FwE;*aH)0BD7u~<4PDXgFIE5dyx{YG)yVS-NlrCgvgdsPv} z=~P6V@#s;6Vw!LYC6KPyf&%RrfW_=Ad{upzi>+ux$a+axMBEm02|WAh;J&C7QacR^ z^^{&gLrf6+$#Vys;Odc~?N{~Qg#o;&Mty0K2G*yN8w$V~eu3V zFpqB_VOo0&V5l5~ytxs00stg5^ivj9-X6V5YFsG%SLH(7(@<{&o<_gg+^}-EC|f|1nt9u_Q*E{OkA>vn{3`MZAE~` zg2)+=kc#^TYwg!3iwGZC2;@BM{=MR)%X!}+0`P4>$waPOuRpS%E5$?yz(v?Od*azh zg=|Z3;lXW3JxJy1KV7AZqeBq%2*O}M zw#wpu1b|3~?Vp4D=A^@z%JmJFvyujGU-nZK1UF6FXHfJ#&W~>h>IciMdfM1djWvuckY98KWxC6*0&qL>Vyjh zv`DTrru@!j(TCxUfMED(sz -# "353" UNKNOWN +# "353" SYMBOLS3 0xEB21 0x2119 # ℙ DOUBLE-STRUCK CAPITAL P 0xEB22 0x210B # ℋ SCRIPT CAPITAL H 0xEB23 0x2110 # ℐ SCRIPT CAPITAL I @@ -53,7 +53,7 @@ 0xEB28 0x203D # ‽ INTERROBANG 0xEB29 0x2318 # ⌘ PLACE OF INTEREST SIGN 0xEB2B 0x210C # ℌ BLACK-LETTER CAPITAL H -0xEB2D 0x1D53D # 𝔽 MATHEMATICAL DOUBLE-STRUCK CAPITAL F +0xEB2D 0x1D53D # MATHEMATICAL DOUBLE-STRUCK CAPITAL F 0xEB2E 0x21C5 # ⇅ UPWARDS ARROW LEFTWARDS OF DOWNWARDS ARROW 0xEB2F 0x21F5 # ⇵ DOWNWARDS ARROW LEFTWARDS OF UPWARDS ARROW 0xEB30 0x21E2 # ⇢ RIGHTWARDS DASHED ARROW diff --git a/unicode/xerox/XCCS-51=RUNIC-GOTHIC.TXT b/unicode/xerox/XCCS-51=RUNIC-GOTHIC.TXT index b39dfd2a..9ace6d0a 100644 --- a/unicode/xerox/XCCS-51=RUNIC-GOTHIC.TXT +++ b/unicode/xerox/XCCS-51=RUNIC-GOTHIC.TXT @@ -42,7 +42,7 @@ # Any comments or problems, contact -# "51" UNKNOWN +# "51" RUNIC-GOTHIC 0x2922 0x16A0 # ᚠ RUNIC LETTER FEHU FEOH FE F 0x2924 0x16A2 # ᚢ RUNIC LETTER URUZ UR U 0x2927 0x16A6 # ᚦ RUNIC LETTER THURISAZ THURS THORN @@ -87,31 +87,31 @@ 0x29B5 0x16A3 # ᚣ RUNIC LETTER YR 0x29B6 0x16E0 # ᛠ RUNIC LETTER EAR 0x29B8 0x16E1 # ᛡ RUNIC LETTER IOR -0x29E1 0x10330 # 𐌰 GOTHIC LETTER AHSA -0x29E2 0x10331 # 𐌱 GOTHIC LETTER BAIRKAN -0x29E3 0x10332 # 𐌲 GOTHIC LETTER GIBA -0x29E4 0x10333 # 𐌳 GOTHIC LETTER DAGS -0x29E5 0x10334 # 𐌴 GOTHIC LETTER AIHVUS -0x29E6 0x10335 # 𐌵 GOTHIC LETTER QAIRTHRA -0x29E7 0x10336 # 𐌶 GOTHIC LETTER IUJA -0x29E8 0x10337 # 𐌷 GOTHIC LETTER HAGL -0x29E9 0x10338 # 𐌸 GOTHIC LETTER THIUTH -0x29EA 0x10339 0x0308 # 𐌹̈ GOTHIC LETTER EIS; COMBINING DIAERESIS -0x29EB 0x10339 # 𐌹 GOTHIC LETTER EIS -0x29EC 0x1033A # 𐌺 GOTHIC LETTER KUSMA -0x29ED 0x1033B # 𐌻 GOTHIC LETTER LAGUS -0x29EE 0x1033C # 𐌼 GOTHIC LETTER MANNA -0x29EF 0x1033D # 𐌽 GOTHIC LETTER NAUTHS -0x29F0 0x1033E # 𐌾 GOTHIC LETTER JER -0x29F1 0x1033F # 𐌿 GOTHIC LETTER URUS -0x29F2 0x10340 # 𐍀 GOTHIC LETTER PAIRTHRA -0x29F3 0x10341 # 𐍁 GOTHIC LETTER NINETY -0x29F4 0x10342 # 𐍂 GOTHIC LETTER RAIDA -0x29F5 0x10343 # 𐍃 GOTHIC LETTER SAUIL -0x29F6 0x10344 # 𐍄 GOTHIC LETTER TEIWS -0x29F7 0x10345 # 𐍅 GOTHIC LETTER WINJA -0x29F8 0x10346 # 𐍆 GOTHIC LETTER FAIHU -0x29F9 0x10347 # 𐍇 GOTHIC LETTER IGGWS -0x29FA 0x10348 # 𐍈 GOTHIC LETTER HWAIR -0x29FB 0x10349 # 𐍉 GOTHIC LETTER OTHAL -0x29FC 0x1034A # 𐍊 GOTHIC LETTER NINE HUNDRED +0x29E1 0x10330 # GOTHIC LETTER AHSA +0x29E2 0x10331 # GOTHIC LETTER BAIRKAN +0x29E3 0x10332 # GOTHIC LETTER GIBA +0x29E4 0x10333 # GOTHIC LETTER DAGS +0x29E5 0x10334 # GOTHIC LETTER AIHVUS +0x29E6 0x10335 # GOTHIC LETTER QAIRTHRA +0x29E7 0x10336 # GOTHIC LETTER IUJA +0x29E8 0x10337 # GOTHIC LETTER HAGL +0x29E9 0x10338 # GOTHIC LETTER THIUTH +0x29EA 0x10339 0x0308 # GOTHIC LETTER EIS; COMBINING DIAERESIS +0x29EB 0x10339 # GOTHIC LETTER EIS +0x29EC 0x1033A # GOTHIC LETTER KUSMA +0x29ED 0x1033B # GOTHIC LETTER LAGUS +0x29EE 0x1033C # GOTHIC LETTER MANNA +0x29EF 0x1033D # GOTHIC LETTER NAUTHS +0x29F0 0x1033E # GOTHIC LETTER JER +0x29F1 0x1033F # GOTHIC LETTER URUS +0x29F2 0x10340 # GOTHIC LETTER PAIRTHRA +0x29F3 0x10341 # GOTHIC LETTER NINETY +0x29F4 0x10342 # GOTHIC LETTER RAIDA +0x29F5 0x10343 # GOTHIC LETTER SAUIL +0x29F6 0x10344 # GOTHIC LETTER TEIWS +0x29F7 0x10345 # GOTHIC LETTER WINJA +0x29F8 0x10346 # GOTHIC LETTER FAIHU +0x29F9 0x10347 # GOTHIC LETTER IGGWS +0x29FA 0x10348 # GOTHIC LETTER HWAIR +0x29FB 0x10349 # GOTHIC LETTER OTHAL +0x29FC 0x1034A # GOTHIC LETTER NINE HUNDRED diff --git a/unicode/xerox/XCCS-56=UNKNOWN1.TXT b/unicode/xerox/XCCS-56=DECORATED-RULES.TXT similarity index 96% rename from unicode/xerox/XCCS-56=UNKNOWN1.TXT rename to unicode/xerox/XCCS-56=DECORATED-RULES.TXT index 24764415..da8d029f 100644 --- a/unicode/xerox/XCCS-56=UNKNOWN1.TXT +++ b/unicode/xerox/XCCS-56=DECORATED-RULES.TXT @@ -1,7 +1,7 @@ # # Name: XCCS (XC-3-1-1-0) to Unicode # Unicode version: 3.0 -# XCCS charset: 56 UNKNOWN +# XCCS charset: 56 DECORATED-RULES # Table version: 0.1 # Table format: Format A # Date: 9-Aug-2021 @@ -42,7 +42,7 @@ # Any comments or problems, contact -# "56" UNKNOWN +# "56" DECORATED-RULES 0x2E21 0x2500 # ─ BOX DRAWINGS LIGHT HORIZONTAL 0x2E22 0x23AF # ⎯ HORIZONTAL LINE EXTENSION 0x2E23 0x2501 # ━ BOX DRAWINGS HEAVY HORIZONTAL diff --git a/unicode/xerox/XCCS-57=UNKNOWN2.TXT b/unicode/xerox/XCCS-57=VERTICAL-JAPANESE.TXT similarity index 98% rename from unicode/xerox/XCCS-57=UNKNOWN2.TXT rename to unicode/xerox/XCCS-57=VERTICAL-JAPANESE.TXT index fd40c26d..191b5eb2 100644 --- a/unicode/xerox/XCCS-57=UNKNOWN2.TXT +++ b/unicode/xerox/XCCS-57=VERTICAL-JAPANESE.TXT @@ -1,7 +1,7 @@ # # Name: XCCS (XC-3-1-1-0) to Unicode # Unicode version: 3.0 -# XCCS charset: 57 UNKNOWN +# XCCS charset: 57 VERTICAL-JAPANESE # Table version: 0.1 # Table format: Format A # Date: 9-Aug-2021 @@ -42,7 +42,7 @@ # Any comments or problems, contact -# "57" UNKNOWN +# "57" VERTICAL-JAPANESE 0x2F24 0xFE33 # ︳ PRESENTATION FORM FOR VERTICAL LOW LINE 0x2F26 0xFE31 # ︱ PRESENTATION FORM FOR VERTICAL EM DASH 0x2F2B 0x22EE # ⋮ VERTICAL ELLIPSIS