From 9e7a6b0657e9fdef8cc041defdd92f17104783ea Mon Sep 17 00:00:00 2001 From: rmkaplan Date: Thu, 11 Sep 2025 23:43:07 -0700 Subject: [PATCH] Unicode to MCCS See new Unicode documentation --- library/UNICODE | 723 +++++++++++------- library/UNICODE.LCOM | Bin 30651 -> 34301 bytes ...PINGS.TXT => MCCS-TO-UNICODE-MAPPINGS.TXT} | 24 +- ...PINGS.TXT => UNICODE-TO-MCCS-MAPPINGS.TXT} | 41 +- unicode/xerox/XCCS-0=LATIN.TXT | 21 +- 5 files changed, 484 insertions(+), 325 deletions(-) rename unicode/xerox/{UNICODE-MAPPINGS.TXT => MCCS-TO-UNICODE-MAPPINGS.TXT} (99%) rename unicode/xerox/{INVERTED-UNICODE-MAPPINGS.TXT => UNICODE-TO-MCCS-MAPPINGS.TXT} (98%) diff --git a/library/UNICODE b/library/UNICODE index 3c917000..ef8e809b 100644 --- a/library/UNICODE +++ b/library/UNICODE @@ -1,13 +1,13 @@ (DEFINE-FILE-INFO PACKAGE "INTERLISP" READTABLE "INTERLISP" BASE 10) -(FILECREATED "31-Jan-2025 17:47:03" {WMEDLEY}UNICODE.;128 98991 +(FILECREATED " 9-Sep-2025 08:59:44" {WMEDLEY}UNICODE.;171 111736 :EDIT-BY rmk - :CHANGES-TO (FNS READ-UNICODE-MAPPING MERGE-UNICODE-TRANSLATION-TABLES - MAKE-UNICODE-TRANSLATION-TABLES ALL-UNICODE-MAPPINGS) + :CHANGES-TO (FNS MTOUTF8STRING N-MCHARS UTF8TOMSTRING) + (VARS UNICODECOMS) - :PREVIOUS-DATE "27-Jan-2025 16:46:36" {WMEDLEY}UNICODE.;127) + :PREVIOUS-DATE " 7-Sep-2025 20:30:24" {WMEDLEY}UNICODE.;170) (PRETTYCOMPRINT UNICODECOMS) @@ -24,10 +24,11 @@ (P (MAKE-UNICODE-FORMATS EXTERNALEOL)) (ADDVARS (*DEFAULT-EXTERNALFORMATS* (UNIX :UTF-8))) (FNS UTF8.BINCODE \UTF8.FETCHCODE) - (FNS UTF8.VALIDATE UTF8-SIZE-FROM-BYTE1 NUTF8-BYTE1-BYTES NUTF8-CODE-BYTES - NUTF8-STRING-BYTES) + (FNS UTF8.VALIDATE NUTF8-BYTE1-BYTES NUTF8-CODE-BYTES NUTF8-STRING-BYTES N-MCHARS) (DECLARE%: EVAL@COMPILE DONTCOPY (MACROS UNICODE.TRANSLATE \UTF8.GETBASEBYTE)) - (FNS XTOUCODE UTOXCODE XTOUCODE? UTOXCODE?)) + (FNS MTOUCODE UTOMCODE MTOUCODE? UTOMCODE? MTOUSTRING UTOMSTRING MTOUTF8STRING + UTF8TOMSTRING) + (FNS XTOUCODE UTOXCODE XTOUCODE? UTOXCODE? XTOUSTRING UTOXSTRING XTOUTF8STRING)) (* ;; "") @@ -39,21 +40,21 @@  "Make translation tables for UTF external formats") (FNS MAKE-UNICODE-TRANSLATION-TABLES MERGE-UNICODE-TRANSLATION-TABLES UNICODE.UNMAPPED UNICODE-EXTEND-TRANSLATION?) - (FNS ALL-UNICODE-MAPPINGS) - (INITVARS (*XCCSTOUNICODE*) - (*UNICODETOXCCS*) - (*XCCS-LOADED-CHARSETS*) + (FNS ALL-UNICODE-MAPPINGS XCCSJAPANESECHARSETS) + (INITVARS (*MCCSTOUNICODE*) + (*UNICODETOMCCS*) + (*MCCS-LOADED-CHARSETS*) (*UNICODE-LOADED-CHARSETS*)) - (GLOBALVARS *XCCSTOUNICODE* *UNICODETOXCCS* *NEXT-PRIVATE-UNICODE* - *NEXT-PRIVATE-XCCSCODE* *XCCS-LOADED-CHARSETS* *UNICODE-LOADED-CHARSETS*) + (GLOBALVARS *MCCSTOUNICODE* *UNICODETOMCCS* *NEXT-PRIVATE-UNICODE* + *NEXT-PRIVATE-MCCSCODE* *MCCS-LOADED-CHARSETS* *UNICODE-LOADED-CHARSETS*) (DECLARE%: EVAL@COMPILE DONTCOPY (* ;; "There are 6400 private Unicodes in 25 256-code charsets. For XCCS we map to a contiguous region of unused/reserved--private isn't big enough.") (CONSTANTS (FIRST-PRIVATE-UNICODE (HEXNUM? "E000")) (LAST-PRIVATE-UNICODE (HEXNUM? "F8FF")) - (FIRST-PRIVATE-XCCSCODE (CHARCODE "200,0")) - (LAST-PRIVATE-XCCSCODE (CHARCODE "230,377"))) + (FIRST-PRIVATE-MCCSCODE (CHARCODE "200,0")) + (LAST-PRIVATE-MCCSCODE (CHARCODE "230,377"))) (MACROS TRUECODEP)) (DECLARE%: DONTEVAL@LOAD DOCOPY (P (MAKE-UNICODE-TRANSLATION-TABLES 'ALL] @@ -61,7 +62,7 @@ (COMS (* ; "Write Unicode mapping files") (FNS WRITE-UNICODE-MAPPING WRITE-UNICODE-INCLUDED WRITE-UNICODE-MAPPING-HEADER - WRITE-UNICODE-MAPPING-FILENAME HEXSTRING) + WRITE-UNICODE-MAPPING-FILENAME) (FNS XCCS-UTF8-AFTER-OPEN) (* ;; "Automate dumping of a documentation prefix") @@ -71,7 +72,7 @@ (UNDEFINEDCODE (CL:PARSE-INTEGER "FFFF" :RADIX 16] (VARS UNICODE-MAPPING-HEADER)) - (FNS UTF8HEXSTRING XTOUSTRING XCCSSTRING) + (FNS UTF8HEXSTRING) (COMS (* ; "debugging") (FNS SHOWCHARS) (DECLARE%: DOEVAL@LOAD DONTCOPY (MACROS HEXCHAR OCTALCHAR))) @@ -87,7 +88,8 @@ (DEFINEQ (UTF8.OUTCHARFN - [LAMBDA (STREAM CHARCODE RAW) (* ; "Edited 20-Jan-2025 20:45 by rmk") + [LAMBDA (STREAM CHARCODE RAW) (* ; "Edited 24-Apr-2025 15:43 by rmk") + (* ; "Edited 20-Jan-2025 20:45 by rmk") (* ; "Edited 31-Jan-2024 00:32 by rmk") (* ; "Edited 8-Aug-2021 13:02 by rmk:") (* ; "Edited 17-Aug-2020 08:45 by rmk:") @@ -95,7 +97,7 @@ (* ;; "Perhaps the translation table should already do the mapping for EOL to LF, but that seems to be a separate property of the stream. Also, CRLF=2 bytes.") - (* ;; "Print UTF8 sequence for CHARCODE. Do not do XCCS to Unicode translation if RAW.") + (* ;; "Print UTF8 sequence for CHARCODE. Do not do MCCS to Unicode translation if RAW.") (IF (EQ CHARCODE (CHARCODE EOL)) THEN (FREPLACE (STREAM CHARPOSITION) OF STREAM WITH 0) @@ -104,7 +106,7 @@ (IPLUS16 1 DATUM)) (FOR C INSIDE (CL:IF RAW CHARCODE - (UNICODE.TRANSLATE CHARCODE *XCCSTOUNICODE*)) + (UNICODE.TRANSLATE CHARCODE *MCCSTOUNICODE*)) DO (IF (ILESSP C 128) THEN (\BOUT STREAM C) ELSEIF (ILESSP C 2048) @@ -134,10 +136,11 @@ ELSE (ERROR "CHARCODE too big for UTF8" C]) (UTF8.SLUG.OUTCHARFN - [LAMBDA (STREAM CODE RAW) (* ; "Edited 21-Jan-2025 18:37 by rmk") + [LAMBDA (STREAM CODE RAW) (* ; "Edited 24-Apr-2025 15:43 by rmk") + (* ; "Edited 21-Jan-2025 18:37 by rmk") (* ; "Edited 14-Jan-2025 12:39 by rmk") - (* ;; "Produces Unicode Representative FFFD as a slug for XCCS unmapped characters") + (* ;; "Produces Unicode Representative FFFD as a slug for MCCS unmapped characters") (UTF8.OUTCHARFN STREAM (OR (CL:IF RAW CODE @@ -146,12 +149,13 @@ T]) (UTF8.INCCODEFN - [LAMBDA (STREAM COUNTP RAW) (* ; "Edited 2-Feb-2024 11:44 by rmk") + [LAMBDA (STREAM COUNTP RAW) (* ; "Edited 24-Apr-2025 15:44 by rmk") + (* ; "Edited 2-Feb-2024 11:44 by rmk") (* ; "Edited 30-Jan-2024 22:56 by rmk") (* ; "Edited 6-Aug-2021 16:02 by rmk:") (* ; "Edited 6-Aug-2020 17:13 by rmk:") - (* ;; "Do not do UNICODE to XCSS translation if RAW.") + (* ;; "Do not do UNICODE to MCSS translation if RAW.") (* ;; "Test for smallp because the stream's End-of-file operation may suppress the error") @@ -231,19 +235,20 @@ 6) (LOADBYTE BYTE4 0 6]) (CL:UNLESS (OR RAW (NOT (SMALLP CODE))) - (SETQ CODE (UNICODE.TRANSLATE CODE *UNICODETOXCCS*))) + (SETQ CODE (UNICODE.TRANSLATE CODE *UNICODETOMCCS*))) (CL:WHEN COUNTP (SETQ *BYTECOUNTER* COUNT)) CODE]) (UTF8.PEEKCCODEFN - [LAMBDA (STREAM NOERROR RAW) (* ; "Edited 2-Feb-2024 11:48 by rmk") + [LAMBDA (STREAM NOERROR RAW) (* ; "Edited 24-Apr-2025 15:44 by rmk") + (* ; "Edited 2-Feb-2024 11:48 by rmk") (* ; "Edited 14-Jun-2021 22:53 by rmk:") (* ;; "Modeled this after \EUCPEEK on LLREAD. In the multi-byte (non-ASCII) case, backs the file pointer to the beginning by the proper number of \BACKFILEPTRs, and returns a count of 0. Returns NIL if NOERROR and either invalid UTF8 or end of file.") (* ;; "Could be that the caller takes care of backing up the file position if the number of binned-bytes is returned.") - (* ;; "Do not do UNICODE to XCCS translation if RAW") + (* ;; "Do not do UNICODE to MCCS translation if RAW") (PROG (BYTE1 BYTE2 BYTE3 BYTE4 CODE) (SETQ BYTE1 (\PEEKBIN STREAM NOERROR)) @@ -318,7 +323,7 @@ elseif NOERROR else (ERROR "INVALID UTF8 SEQUENCE" (LIST BYTE1 BYTE2 BYTE3 BYTE4] (CL:WHEN (AND CODE (NOT RAW)) - (SETQ CODE (UNICODE.TRANSLATE CODE *UNICODETOXCCS*))) + (SETQ CODE (UNICODE.TRANSLATE CODE *UNICODETOMCCS*))) (RETURN CODE]) (\UTF8.BACKCCODEFN @@ -340,11 +345,12 @@ (DEFINEQ (UTF16BE.OUTCHARFN - [LAMBDA (STREAM CHARCODE RAW) (* ; "Edited 31-Jan-2024 00:32 by rmk") + [LAMBDA (STREAM CHARCODE RAW) (* ; "Edited 24-Apr-2025 15:44 by rmk") + (* ; "Edited 31-Jan-2024 00:32 by rmk") (* ; "Edited 8-Aug-2021 13:09 by rmk:") (* ; "Edited 30-Jan-2020 23:08 by rmk:") - (* ;; "PRINT UTF16 sequence for CHARCODE. Do not do XCCS to UNICODE translation if RAW.") + (* ;; "PRINT UTF16 sequence for CHARCODE. Do not do MCCS to UNICODE translation if RAW.") (* ;; "Not sure about EOL conversion if truly %"raw%"") @@ -354,14 +360,15 @@ (IPLUS16 1 DATUM))) (FOR C INSIDE (CL:IF RAW CHARCODE - (UNICODE.TRANSLATE CHARCODE *XCCSTOUNICODE*)) DO (\WOUT STREAM C]) + (UNICODE.TRANSLATE CHARCODE *MCCSTOUNICODE*)) DO (\WOUT STREAM C]) (UTF16BE.INCCODEFN - [LAMBDA (STREAM COUNTP RAW) (* ; "Edited 10-Mar-2024 12:00 by rmk") + [LAMBDA (STREAM COUNTP RAW) (* ; "Edited 24-Apr-2025 15:45 by rmk") + (* ; "Edited 10-Mar-2024 12:00 by rmk") (* ; "Edited 6-Aug-2021 16:05 by rmk:") (* ;; - "Do not do UNICODE to XCCS translation if RAW. Test for SMALLPin case of funky EOF behavior") + "Do not do UNICODE to MCCS translation if RAW. Test for SMALLPin case of funky EOF behavior") (DECLARE (USEDFREE *BYTECOUNTER*)) (LET (CODE BYTE1 BYTE2 COUNT) @@ -372,18 +379,19 @@ HIBYTE _ (\BIN STREAM) LOBYTE _ (\BIN STREAM))) (CL:UNLESS RAW - (SETQ CODE (UNICODE.TRANSLATE CODE *UNICODETOXCCS*))) + (SETQ CODE (UNICODE.TRANSLATE CODE *UNICODETOMCCS*))) (CL:WHEN COUNTP (SETQ *BYTECOUNTER* COUNT)) CODE ELSE (ERROR "ODD NUMBER OF BYTES IN UTF16 FILE" STREAM]) (UTF16BE.PEEKCCODEFN - [LAMBDA (STREAM NOERROR RAW) (* ; "Edited 10-Mar-2024 12:01 by rmk") + [LAMBDA (STREAM NOERROR RAW) (* ; "Edited 24-Apr-2025 15:45 by rmk") + (* ; "Edited 10-Mar-2024 12:01 by rmk") (* ; "Edited 14-Jun-2021 22:58 by rmk:") (* ;; "Could be that the caller takes care of backing up the file position if the number of binned-bytes is returned.") - (* ;; "Do not do UNICODE to XCCS translation if RAW") + (* ;; "Do not do UNICODE to MCCS translation if RAW") (LET (BYTE1 BYTE2 CODE) (SETQ BYTE1 (\PEEKBIN STREAM NOERROR)) @@ -397,7 +405,7 @@ LOBYTE _ BYTE2)) (CL:IF RAW CODE - (UNICODE.TRANSLATE CODE *UNICODETOXCCS*)) + (UNICODE.TRANSLATE CODE *UNICODETOMCCS*)) ELSEIF NOERROR THEN NIL) ELSEIF NOERROR @@ -405,7 +413,8 @@ ELSE (ERROR "INVALID UTF16 CHARACTER" (LIST BYTE1 BYTE2]) (\UTF16BE.BACKCCODEFN - [LAMBDA (STREAM COUNTP RAW) (* ; "Edited 10-Mar-2024 12:02 by rmk") + [LAMBDA (STREAM COUNTP RAW) (* ; "Edited 24-Apr-2025 15:28 by rmk") + (* ; "Edited 10-Mar-2024 12:02 by rmk") (* ; "Edited 19-Jul-2022 15:14 by rmk") (* ; "Edited 6-Aug-2021 16:07 by rmk:") @@ -421,7 +430,7 @@ LOBYTE _ BYTE2)) (CL:IF RAW CODE - (UNICODE.TRANSLATE CODE *UNICODETOXCCS*)) + (UNICODE.TRANSLATE CODE *UNICODETOMCCS*)) ELSEIF COUNTP THEN (SETQ *BYTECOUNTER* -1) NIL)))]) @@ -429,11 +438,12 @@ (DEFINEQ (UTF16LE.OUTCHARFN - [LAMBDA (STREAM CHARCODE RAW) (* ; "Edited 10-Mar-2024 11:58 by rmk") + [LAMBDA (STREAM CHARCODE RAW) (* ; "Edited 24-Apr-2025 15:45 by rmk") + (* ; "Edited 10-Mar-2024 11:58 by rmk") (* ; "Edited 8-Aug-2021 13:09 by rmk:") (* ; "Edited 30-Jan-2020 23:08 by rmk:") - (* ;; "PRINT UTF16 sequence for CHARCODE. Do not do XCCS to UNICODE translation if RAW.") + (* ;; "PRINT UTF16 sequence for CHARCODE. Do not do MCCS to UNICODE translation if RAW.") (* ;; "Not sure about EOL conversion if truly %"raw%"") @@ -443,16 +453,17 @@ (IPLUS16 1 DATUM))) (FOR C INSIDE (CL:IF RAW CHARCODE - (UNICODE.TRANSLATE CHARCODE *XCCSTOUNICODE*)) + (UNICODE.TRANSLATE CHARCODE *MCCSTOUNICODE*)) DO (BOUT STREAM (fetch LOBYTE of CHARCODE)) (BOUT STREAM (fetch HIBYTE of CHARCODE]) (UTF16LE.INCCODEFN - [LAMBDA (STREAM COUNTP RAW) (* ; "Edited 10-Mar-2024 12:03 by rmk") + [LAMBDA (STREAM COUNTP RAW) (* ; "Edited 24-Apr-2025 15:45 by rmk") + (* ; "Edited 10-Mar-2024 12:03 by rmk") (* ; "Edited 6-Aug-2021 16:05 by rmk:") (* ;; - "Do not do UNICODE to XCCS translation if RAW. Test for SMALLPin case of funky EOF behavior") + "Do not do UNICODE to MCCS translation if RAW. Test for SMALLPin case of funky EOF behavior") (DECLARE (USEDFREE *BYTECOUNTER*)) (LET (CODE BYTE1 BYTE2 COUNT) @@ -463,18 +474,19 @@ LOBYTE _ (\BIN STREAM) HIBYTE _ (\BIN STREAM))) (CL:UNLESS RAW - (SETQ CODE (UNICODE.TRANSLATE CODE *UNICODETOXCCS*))) + (SETQ CODE (UNICODE.TRANSLATE CODE *UNICODETOMCCS*))) (CL:WHEN COUNTP (SETQ *BYTECOUNTER* COUNT)) CODE ELSE (ERROR "ODD NUMBER OF BYTES IN UTF16 FILE" STREAM]) (UTF16LE.PEEKCCODEFN - [LAMBDA (STREAM NOERROR RAW) (* ; "Edited 10-Mar-2024 11:43 by rmk") + [LAMBDA (STREAM NOERROR RAW) (* ; "Edited 24-Apr-2025 15:46 by rmk") + (* ; "Edited 10-Mar-2024 11:43 by rmk") (* ; "Edited 14-Jun-2021 22:58 by rmk:") (* ;; "Could be that the caller takes care of backing up the file position if the number of binned-bytes is returned.") - (* ;; "Do not do UNICODE to XCCS translation if RAW") + (* ;; "Do not do UNICODE to MCCS translation if RAW") (LET (BYTE1 BYTE2 CODE) (SETQ BYTE1 (\PEEKBIN STREAM NOERROR)) @@ -487,7 +499,7 @@ BYTE1)) (CL:IF RAW CODE - (UNICODE.TRANSLATE CODE *UNICODETOXCCS*)) + (UNICODE.TRANSLATE CODE *UNICODETOMCCS*)) ELSEIF NOERROR THEN NIL) ELSEIF NOERROR @@ -495,7 +507,8 @@ ELSE (ERROR "INVALID UTF16 CHARACTER" (LIST BYTE1 BYTE2]) (\UTF16LE.BACKCCODEFN - [LAMBDA (STREAM COUNTP RAW) (* ; "Edited 10-Mar-2024 12:04 by rmk") + [LAMBDA (STREAM COUNTP RAW) (* ; "Edited 24-Apr-2025 15:28 by rmk") + (* ; "Edited 10-Mar-2024 12:04 by rmk") (* ; "Edited 19-Jul-2022 15:14 by rmk") (* ; "Edited 6-Aug-2021 16:07 by rmk:") @@ -511,7 +524,7 @@ LOBYTE _ (\PEEKBIN STREAM))) (CL:IF RAW CODE - (UNICODE.TRANSLATE CODE *UNICODETOXCCS*)) + (UNICODE.TRANSLATE CODE *UNICODETOMCCS*)) ELSEIF COUNTP THEN (SETQ *BYTECOUNTER* -1) NIL)))]) @@ -647,7 +660,8 @@ (DEFINEQ (UTF8.BINCODE - [LAMBDA (STREAM RAW) (* ; "Edited 4-Feb-2024 01:06 by rmk") + [LAMBDA (STREAM RAW) (* ; "Edited 24-Apr-2025 15:28 by rmk") + (* ; "Edited 4-Feb-2024 01:06 by rmk") (* ; "Edited 1-Feb-2024 11:21 by rmk") (* ; "Edited 28-Dec-2023 13:32 by rmk") (* ; "Edited 6-Aug-2021 16:02 by rmk:") @@ -691,7 +705,7 @@ 0 6] (CL:IF RAW CODE - (UNICODE.TRANSLATE CODE *UNICODETOXCCS*))]) + (UNICODE.TRANSLATE CODE *UNICODETOMCCS*))]) (\UTF8.FETCHCODE [LAMBDA (CODESIZE BUFFER BYTEOFFSET) (* ; "Edited 28-Dec-2023 13:32 by rmk") @@ -775,19 +789,6 @@ (ILESSP BYTE 128))) 4)))]) -(UTF8-SIZE-FROM-BYTE1 - [LAMBDA (BYTE1) (* ; "Edited 2-Feb-2024 11:50 by rmk") - - (* ;; "Returns the number of bytes of a UTF-8 code, given that BYTE1 is the first (header) byte of the sequence.") - - (if (ILEQ BYTE1 127) - then 1 - elseif (ILEQ BYTE1 223) - then 2 - elseif (ILEQ BYTE1 239) - then 3 - else 4]) - (NUTF8-BYTE1-BYTES [LAMBDA (BYTE1) (* ; "Edited 3-Feb-2024 15:00 by rmk") (* ; "Edited 8-Jan-2024 10:57 by rmk") @@ -826,14 +827,23 @@ ELSE (ERROR "INVALID UTF-8 CODE"]) (NUTF8-STRING-BYTES - [LAMBDA (STRING RAW) (* ; "Edited 3-Feb-2024 21:32 by rmk") + [LAMBDA (STRING RAW) (* ; "Edited 2-Sep-2025 10:40 by rmk") + (* ; "Edited 24-Apr-2025 15:37 by rmk") + (* ; "Edited 3-Feb-2024 21:32 by rmk") (* ; "Edited 10-Aug-2020 09:06 by rmk:") - (* ;; "Returns the number of bytes it would take to represent STRING in UTF8, assuming it is an XCCS string unless RAWFLG. ") + (* ;; "Returns the number of bytes it would take to represent STRING in UTF8, assuming it is an MCCS string unless RAWFLG. ") - (FOR I C FROM 1 WHILE (SETQ C (NTHCHARCODE STRING I)) SUM (NUTF8-CODE-BYTES (CL:IF RAW + (for I C from 1 while (SETQ C (NTHCHARCODE STRING I)) sum (NUTF8-CODE-BYTES (CL:IF RAW C - (XTOUCODE C))]) + (MTOUCODE C))]) + +(N-MCHARS + [LAMBDA (UTF8STRING) (* ; "Edited 9-Sep-2025 08:35 by rmk") + + (* ;; "Returns the number of MCCS characters coded in UTF8STRING") + + (for I B from 1 while (SETQ B (NTHCHARCODE UTF8STRING I)) by (NUTF8-BYTE1-BYTES B) count T]) ) (DECLARE%: EVAL@COMPILE DONTCOPY (DECLARE%: EVAL@COMPILE @@ -865,17 +875,168 @@ ) (DEFINEQ +(MTOUCODE + [LAMBDA (MCODE) (* ; "Edited 4-Sep-2025 15:10 by rmk") + (* ; "Edited 24-Apr-2025 10:19 by rmk") + (* ; "Edited 9-Aug-2020 09:04 by rmk:") + (UNICODE.TRANSLATE MCODE *MCCSTOUNICODE*]) + +(UTOMCODE + [LAMBDA (UNNICODE) (* ; "Edited 24-Apr-2025 10:17 by rmk") + (* ; "Edited 16-Jan-2025 23:46 by rmk") + (* ; "Edited 9-Aug-2020 09:04 by rmk:") + (UNICODE.TRANSLATE UNNICODE *UNICODETOMCCS*]) + +(MTOUCODE? + [LAMBDA (MCODE) (* ; "Edited 4-Sep-2025 15:09 by rmk") + (* ; "Edited 24-Apr-2025 10:18 by rmk") + (* ; "Edited 20-Jan-2025 20:38 by rmk") + (* ; "Edited 18-Jan-2025 11:44 by rmk") + (* ; "Edited 15-Jan-2025 19:51 by rmk") + (* ; "Edited 14-Jan-2025 13:14 by rmk") + (* ; "Edited 9-Aug-2020 09:04 by rmk:") + + (* ;; "Returns the Unix range-code(s) corresponding to MCODE if there are true mapppings, otherwise NIL. Alternative codes are returned in a list, the code itself is returned for a singleton.") + + (UNICODE.TRANSLATE MCODE *MCCSTOUNICODE* T T]) + +(UTOMCODE? + [LAMBDA (UNICODE) (* ; "Edited 24-Apr-2025 10:18 by rmk") + (* ; "Edited 19-Jan-2025 21:14 by rmk") + (* ; "Edited 18-Jan-2025 11:46 by rmk") + (* ; "Edited 15-Jan-2025 19:51 by rmk") + (* ; "Edited 14-Jan-2025 13:14 by rmk") + (* ; "Edited 9-Aug-2020 09:04 by rmk:") + + (* ;; "Returns the MCCS range-code(s) corresponding to UNICODE if there are true mapppings, otherwise NIL. ") + + (* ;; + " NOTE: Alternative codes are returned in a list, the code itself is returned for a singleton.") + + (UNICODE.TRANSLATE UNICODE *UNICODETOMCCS* T T]) + +(MTOUSTRING + [LAMBDA (MSTRING DESTRUCTIVE) (* ; "Edited 2-Sep-2025 12:19 by rmk") + (* ; "Edited 29-Apr-2025 12:01 by rmk") + + (* ;; "Converts MCCS codes in MSTRING to Unicodes.") + + (for I MCODE (USTRING _ (CL:IF DESTRUCTIVE + MSTRING + (CONCAT MSTRING))) from 1 while (SETQ MCODE (NTHCHARCODE MSTRING I)) + do (RPLCHARCODE USTRING I (MTOUCODE MCODE)) finally (RETURN USTRING]) + +(UTOMSTRING + [LAMBDA (USTRING DESTRUCTIVE) (* ; "Edited 2-Sep-2025 12:18 by rmk") + (* ; "Edited 29-Apr-2025 12:00 by rmk") + + (* ;; "Converts Unicodes to MCCS codes in USTRING.") + + (for I UCODE (MSTRING _ (CL:IF DESTRUCTIVE + USTRING + (CONCAT USTRING))) from 1 while (SETQ UCODE (NTHCHARCODE USTRING I)) + do (RPLCHARCODE MSTRING I (UTOMCODE UCODE)) finally (RETURN MSTRING]) + +(MTOUTF8STRING + [LAMBDA (MSTRING) (* ; "Edited 9-Sep-2025 07:51 by rmk") + (* ; "Edited 4-Sep-2025 15:13 by rmk") + (* ; "Edited 2-Sep-2025 11:12 by rmk") + (* ; "Edited 24-Apr-2025 15:37 by rmk") + (* ; "Edited 3-Feb-2024 14:55 by rmk") + (* ; "Edited 10-Aug-2020 21:42 by rmk:") + + (* ;; + "Produces a string that contains the UTF8 bytes that represent the characters in MSTRING. ") + + (* ;; "The resulting string will not be directly interpretable inside Medley.") + + (if (if (STRINGP MSTRING) + then (OR (ffetch (STRINGP FATSTRINGP) of MSTRING) + (thereis C instring MSTRING suchthat (IGEQ C 128))) + elseif (LITATOM MSTRING) + then (OR (ffetch (LITATOM FATPNAMEP) of MSTRING) + (thereis C inatom MSTRING suchthat (IGEQ C 128))) + else T) + then (LET [(USTR (ALLOCSTRING (NUTF8-STRING-BYTES MSTRING] + (for I UCODE MCODE (SINDEX _ 0) from 1 while (SETQ MCODE (NTHCHARCODE MSTRING I)) + do (SETQ UCODE (MTOUCODE MCODE)) + (if (ILESSP UCODE 128) + then (RPLCHARCODE USTR (ADD SINDEX 1) + UCODE) + elseif (ILESSP UCODE 2048) + then (* ; "x800") + (RPLCHARCODE USTR (ADD SINDEX 1) + (LOGOR (LLSH 3 6) + (LRSH UCODE 6))) + (RPLCHARCODE USTR (ADD SINDEX 1) + (LOGOR (LLSH 2 6) + (LOADBYTE UCODE 0 6))) + elseif (ILESSP UCODE 65536) + then (* ; "x10000") + (RPLCHARCODE USTR (ADD SINDEX 1) + (LOGOR (LLSH 7 5) + (LRSH UCODE 12))) + (RPLCHARCODE USTR (ADD SINDEX 1) + (LOGOR (LLSH 2 6) + (LOADBYTE UCODE 6 6))) + (RPLCHARCODE USTR (ADD SINDEX 1) + (LOGOR (LLSH 2 6) + (LOADBYTE UCODE 0 6))) + elseif (ILESSP UCODE 2097152) + then (* ; "x200000") + (RPLCHARCODE USTR (ADD SINDEX 1) + (LOGOR (LLSH 15 4) + (LRSH UCODE 18))) + (RPLCHARCODE USTR (ADD SINDEX 1) + (LOGOR (LLSH 2 6) + (LOADBYTE UCODE 12 6))) + (RPLCHARCODE USTR (ADD SINDEX 1) + (LOGOR (LLSH 2 6) + (LOADBYTE UCODE 6 6))) + (RPLCHARCODE USTR (ADD SINDEX 1) + (LOGOR (LLSH 2 6) + (LOADBYTE UCODE 0 6))) + else (SHOULDNT))) + USTR) + else MSTRING]) + +(UTF8TOMSTRING + [LAMBDA (UTF8STRING) (* ; "Edited 9-Sep-2025 08:59 by rmk") + (CL:UNLESS (OR (STRINGP UTF8STRING) + (LITATOM UTF8STRING)) + (SETQ UTF8STRING (MKSTRING UTF8STRING))) + (CL:WHEN (ffetch (STRINGP FATSTRINGP) of UTF8STRING) + (\ILLEGAL.ARG UTF8STRING)) + (LET* ((NMCHARS (N-MCHARS UTF8STRING)) + (MSTRING (ALLOCSTRING NMCHARS))) + [for M NBYTES BYTE1 (BASE _ (ffetch (STRINGP BASE) of UTF8STRING)) from 1 to NMCHARS + as OFFSET from (fetch (STRINGP OFFST) of MSTRING) by NBYTES + do (SETQ BYTE1 (\GETBASEBYTE BASE OFFSET)) + (SETQ NBYTES (NUTF8-BYTE1-BYTES BYTE1)) + (RPLCHARCODE MSTRING M (UTOMCODE (\UTF8.FETCHCODE NBYTES BASE OFFSET] + MSTRING]) +) +(DEFINEQ + (XTOUCODE - [LAMBDA (XCCSCODE) (* ; "Edited 9-Aug-2020 09:04 by rmk:") - (UNICODE.TRANSLATE XCCSCODE *XCCSTOUNICODE*]) + [LAMBDA (XCODE) (* ; "Edited 4-Sep-2025 15:09 by rmk") + (* ; "Edited 24-May-2025 23:16 by rmk") + (* ; "Edited 24-Apr-2025 15:27 by rmk") + (* ; "Edited 9-Aug-2020 09:04 by rmk:") + (UNICODE.TRANSLATE (XTOMCODE XCODE) + *MCCSTOUNICODE*]) (UTOXCODE - [LAMBDA (UNNICODE) (* ; "Edited 16-Jan-2025 23:46 by rmk") + [LAMBDA (UNICODE) (* ; "Edited 24-May-2025 23:17 by rmk") + (* ; "Edited 24-Apr-2025 15:28 by rmk") + (* ; "Edited 16-Jan-2025 23:46 by rmk") (* ; "Edited 9-Aug-2020 09:04 by rmk:") - (UNICODE.TRANSLATE UNNICODE *UNICODETOXCCS*]) + (MTOXCODE (UNICODE.TRANSLATE UNICODE *UNICODETOMCCS*]) (XTOUCODE? - [LAMBDA (XCCSCODE) (* ; "Edited 20-Jan-2025 20:38 by rmk") + [LAMBDA (XCCSCODE) (* ; "Edited 24-May-2025 23:18 by rmk") + (* ; "Edited 24-Apr-2025 15:27 by rmk") + (* ; "Edited 20-Jan-2025 20:38 by rmk") (* ; "Edited 18-Jan-2025 11:44 by rmk") (* ; "Edited 15-Jan-2025 19:51 by rmk") (* ; "Edited 14-Jan-2025 13:14 by rmk") @@ -883,10 +1044,13 @@ (* ;; "Returns the Unix range-code(s) corresponding to XCCSCODE if there are true mapppings, otherwise NIL. Alternative codes are returned in a list, the code itself is returned for a singleton.") - (UNICODE.TRANSLATE XCCSCODE *XCCSTOUNICODE* T T]) + (UNICODE.TRANSLATE (XTOMCODE XCCSCODE) + *MCCSTOUNICODE* T T]) (UTOXCODE? - [LAMBDA (UNICODE) (* ; "Edited 19-Jan-2025 21:14 by rmk") + [LAMBDA (UNICODE) (* ; "Edited 24-May-2025 23:19 by rmk") + (* ; "Edited 24-Apr-2025 15:28 by rmk") + (* ; "Edited 19-Jan-2025 21:14 by rmk") (* ; "Edited 18-Jan-2025 11:46 by rmk") (* ; "Edited 15-Jan-2025 19:51 by rmk") (* ; "Edited 14-Jan-2025 13:14 by rmk") @@ -897,7 +1061,47 @@ (* ;;  " NOTE: Alternative codes are returned in a list, the code itself is returned for a singleton.") - (UNICODE.TRANSLATE UNICODE *UNICODETOXCCS* T T]) + (MTOXCODE (UNICODE.TRANSLATE UNICODE *UNICODETOMCCS* T T]) + +(XTOUSTRING + [LAMBDA (XSTRING DESTRUCTIVE) (* ; "Edited 2-Sep-2025 12:00 by rmk") + (* ; "Edited 29-Apr-2025 12:01 by rmk") + + (* ;; "Converts XCCS codes in XSTRING to Unicodes.") + + (for I UCODE XCODE (USTRING _ (CL:IF DESTRUCTIVE + XSTRING + (CONCAT XSTRING))) from 1 while (SETQ XCODE (NTHCHARCODE + XSTRING I)) + do (RPLCHARCODE USTRING I (XTOUCODE XCODE)) finally (RETURN USTRING]) + +(UTOXSTRING + [LAMBDA (USTRING DESTRUCTIVE) (* ; "Edited 2-Sep-2025 11:54 by rmk") + (* ; "Edited 29-Apr-2025 12:00 by rmk") + + (* ;; "Converts Unicodes in USTRING to XCCS codes.") + + (for I XCODE UCODE (XSTRING _ (CL:IF DESTRUCTIVE + USTRING + (CONCAT USTRING))) from 1 while (SETQ UCODE (NTHCHARCODE + USTRING I)) + unless (EQ UCODE (SETQ XCODE (UTOXCODE UCODE))) do (RPLCHARCODE XSTRING I XCODE) + finally (RETURN XSTRING]) + +(XTOUTF8STRING + [LAMBDA (XSTRING) (* ; "Edited 4-Sep-2025 18:37 by rmk") + (* ; "Edited 2-Sep-2025 11:37 by rmk") + (* ; "Edited 29-Apr-2025 12:53 by rmk") + (* ; "Edited 24-Apr-2025 15:42 by rmk") + (* ; "Edited 3-Feb-2024 14:55 by rmk") + (* ; "Edited 10-Aug-2020 21:42 by rmk:") + + (* ;; "Produces a string that contains the UTF8 bytes that represent the characters in XSTRING. Applies the ") + + (* ;; "The resulting string will not be interpretable inside Medley.") + + (for I C (MSTRING _ (CONCAT XSTRING)) from 1 while (SETQ C (NTHCHARCODE XSTRING I)) + do (RPLCHARCODE MSTRING I (XTOMCODE C)) finally (RETURN (MTOUTF8STRING MSTRING]) ) @@ -954,7 +1158,8 @@ (DEFINEQ (READ-UNICODE-MAPPING-FILENAMES - [LAMBDA (FILESPEC) (* ; "Edited 27-Jan-2025 16:46 by rmk") + [LAMBDA (FILESPEC) (* ; "Edited 4-Sep-2025 00:11 by rmk") + (* ; "Edited 27-Jan-2025 16:46 by rmk") (* ; "Edited 21-Jan-2025 22:51 by rmk") (* ; "Edited 19-Jan-2025 12:21 by rmk") (* ; "Edited 3-Feb-2024 11:00 by rmk") @@ -1005,7 +1210,9 @@ (FUNCTION STRING.EQUAL]) (READ-UNICODE-MAPPING - [LAMBDA (FILESPEC PRINT NOERROR) (* ; "Edited 31-Jan-2025 17:43 by rmk") + [LAMBDA (FILESPEC PRINT NOERROR) (* ; "Edited 4-Sep-2025 00:17 by rmk") + (* ; "Edited 24-Apr-2025 15:32 by rmk") + (* ; "Edited 31-Jan-2025 17:43 by rmk") (* ; "Edited 17-Jan-2025 16:41 by rmk") (* ; "Edited 3-Feb-2024 00:21 by rmk") (* ; "Edited 5-Jan-2024 12:26 by rmk") @@ -1013,7 +1220,7 @@ (* ;; "Combines the char-mapping tables from FILES coded in the Uncode-CDROM format. Comments prefixed by # and") - (* ;; " Column 1: Input hex code in the format 0xXXXX") + (* ;; " Column 1: XCCS input hex code in the format 0xXXXX") (* ;; " Column 2: Corresponding Unicode code-sequence in the format") @@ -1025,15 +1232,15 @@ (* ;; "") - (* ;; "Result is a list of (fromcode tocode1 ... tocoden) integer lists (almost always with only a single tocode") + (* ;; "Result is a list of (fromcode tocode1 ... tocoden) integer lists (almost always with only a single tocode, where fromcode is an MCCS code and the tocodes are corresponding Unicodes.") - (FOR FILE [SEPBITTABLE _ (MAKEBITTABLE (CHARCODE (TAB SPACE] IN (READ-UNICODE-MAPPING-FILENAMES + (for FILE [SEPBITTABLE _ (MAKEBITTABLE (CHARCODE (TAB SPACE] in (READ-UNICODE-MAPPING-FILENAMES FILESPEC) - JOIN + join (* ;; "External format :THROUGH means read as bytes, so the Unicode UTF-8 comments cannot cause reading problems.") (CL:WITH-OPEN-FILE (STREAM FILE :DIRECTION :INPUT :EXTERNAL-FORMAT `(:THROUGH LF)) - (bind LINE NAME CHARSET START + (bind LINE NAME CHARSET START MAP first (CL:UNLESS (FILEPOS "Name:" STREAM NIL NIL NIL T) (ERROR "NOT A UNICODE MAPPING FILE" (FULLNAME STREAM))) (SETQ NAME (CL:STRING-TRIM " " (CL:READ-LINE STREAM NIL NIL))) @@ -1047,16 +1254,22 @@ when (SETQ START (STRPOSL SEPBITTABLE LINE 1 T)) unless (EQ (CHARCODE %#) (NTHCHARCODE LINE START)) - collect (bind END CODES while [SETQ END (OR (STRPOSL SEPBITTABLE LINE START) - (ADD1 (NCHARS LINE] - collect [CHARCODE.DECODE (SUBSTRING LINE START (SUB1 END) - (CONSTANT (CONCAT] - repeatwhile (AND (SETQ START (STRPOSL SEPBITTABLE LINE END T)) - (NEQ (CHARCODE %#) - (NTHCHARCODE LINE START))) - finally (CL:WHEN (CDDR $$VAL) + collect [SETQ MAP (bind END CODES while [SETQ END (OR (STRPOSL SEPBITTABLE LINE + START) + (ADD1 (NCHARS LINE] + collect [CHARCODE.DECODE (SUBSTRING LINE START + (SUB1 END) + (CONSTANT (CONCAT] + repeatwhile (AND (SETQ START (STRPOSL SEPBITTABLE LINE END + T)) + (NEQ (CHARCODE %#) + (NTHCHARCODE LINE START))) + finally (CL:WHEN (CDDR $$VAL) (* ; "Combiners go into a CADR list") - (RPLACD $$VAL (CONS (CDR $$VAL))))]) + (RPLACD $$VAL (CONS (CDR $$VAL))))] + (change (CAR MAP) + (XTOMCODE DATUM)) + MAP]) ) @@ -1066,7 +1279,9 @@ (DEFINEQ (MAKE-UNICODE-TRANSLATION-TABLES - [LAMBDA (MAPPING REINSTALL) (* ; "Edited 31-Jan-2025 17:46 by rmk") + [LAMBDA (MAPPING REINSTALL) (* ; "Edited 4-Sep-2025 00:30 by rmk") + (* ; "Edited 24-Apr-2025 15:47 by rmk") + (* ; "Edited 31-Jan-2025 17:46 by rmk") (* ; "Edited 26-Jan-2025 19:36 by rmk") (* ; "Edited 22-Jan-2025 14:22 by rmk") (* ; "Edited 19-Jan-2025 15:08 by rmk") @@ -1097,22 +1312,23 @@ (* ;; "") (if REINSTALL - then (SETQ *XCCS-LOADED-CHARSETS* (SETQ *UNICODE-LOADED-CHARSETS* NIL)) - (SETQ *NEXT-PRIVATE-XCCSCODE* FIRST-PRIVATE-XCCSCODE) + then (SETQ *MCCS-LOADED-CHARSETS* (SETQ *UNICODE-LOADED-CHARSETS* NIL)) + (SETQ *NEXT-PRIVATE-MCCSCODE* FIRST-PRIVATE-MCCSCODE) (SETQ *NEXT-PRIVATE-UNICODE* FIRST-PRIVATE-UNICODE) (LET [(TABLE (HASHARRAY (LENGTH MAPPING))) (INVERSETABLE (HASHARRAY (LENGTH MAPPING] (MERGE-UNICODE-TRANSLATION-TABLES NIL MAPPING TABLE INVERSETABLE) - (SETQ *XCCSTOUNICODE* TABLE) - (SETQ *UNICODETOXCCS* INVERSETABLE) - (LIST *XCCSTOUNICODE* *UNICODETOXCCS*)) - else (CL:UNLESS (BOUNDP '*NEXT-PRIVATE-XCCSCODE*) - (SETQ *NEXT-PRIVATE-XCCSCODE* FIRST-PRIVATE-XCCSCODE) + (SETQ *MCCSTOUNICODE* TABLE) + (SETQ *UNICODETOMCCS* INVERSETABLE) + (LIST *MCCSTOUNICODE* *UNICODETOMCCS*)) + else (CL:UNLESS (BOUNDP '*NEXT-PRIVATE-MCCSCODE*) + (SETQ *NEXT-PRIVATE-MCCSCODE* FIRST-PRIVATE-MCCSCODE) (SETQ *NEXT-PRIVATE-UNICODE* FIRST-PRIVATE-UNICODE)) (MERGE-UNICODE-TRANSLATION-TABLES NIL MAPPING]) (MERGE-UNICODE-TRANSLATION-TABLES - [LAMBDA (INVERSE MAPPING TABLE INVERSETABLE) (* ; "Edited 31-Jan-2025 17:45 by rmk") + [LAMBDA (INVERSE MAPPING TABLE INVERSETABLE) (* ; "Edited 24-Apr-2025 15:28 by rmk") + (* ; "Edited 1-Feb-2025 21:42 by rmk") (* ; "Edited 26-Jan-2025 12:58 by rmk") (* ; "Edited 22-Jan-2025 08:20 by rmk") (* ; "Edited 19-Jan-2025 15:58 by rmk") @@ -1124,33 +1340,32 @@ (* ;; "MAPPINGS is a list of pairs that map domain codes to range codes. TABLE and INVERSETABLE default to *XCCSTOUNICODE* *UNICODETOXCCS* respectively. ") (CL:UNLESS TABLE - [SETQ TABLE (OR *XCCSTOUNICODE* (SETQ *XCCSTOUNICODE* (HASHARRAY (LENGTH MAPPING]) + [SETQ TABLE (OR *MCCSTOUNICODE* (SETQ *MCCSTOUNICODE* (HASHARRAY (LENGTH MAPPING]) (CL:UNLESS INVERSETABLE - [SETQ INVERSETABLE (OR *UNICODETOXCCS* (SETQ *UNICODETOXCCS* (HASHARRAY (LENGTH MAPPING]) + [SETQ INVERSETABLE (OR *UNICODETOMCCS* (SETQ *UNICODETOMCCS* (HASHARRAY (LENGTH MAPPING]) (for M D R OLDR in MAPPING first (CL:IF INVERSE (swap TABLE INVERSETABLE)) eachtime (SETQ D (CAR M)) (SETQ R (CADR M)) - (* ;; "We don't do combiners and we don't go outside of SMALLP's") + (* ;; "We don't do combiners, but we are allowing non-SMALLP's") unless (OR (LISTP D) - (LISTP R)) when (AND (SMALLP D) - (SMALLP R)) do + (LISTP R)) do + (* ;; "The (CONS R OLDR) deals with alternatives: (U X1) (U X2) => (U (X1 X2)), lowest code first. Those are only possible in the U-to-X direction when the tables contain (X1 U) and (X2 U). There are no duplicates/alternative table entries in the X-to-U direction.") - (* ;; "The (CONS R OLDR) deals with alternatives: (U X1) (U X2) => (U (X1 X2)), lowest code first. Those are only possible in the U-to-X direction when the tables contain (X1 U) and (X2 U). There are no duplicates/alternative table entries in the X-to-U direction.") - - (SETQ OLDR (GETHASH D TABLE)) - (CL:UNLESS (MEMB R OLDR) - (PUTHASH D (SORT (CONS R OLDR)) - TABLE)) - (swap D R) - (SETQ OLDR (GETHASH D INVERSETABLE)) - (CL:UNLESS (MEMB R OLDR) - (PUTHASH D (SORT (CONS R OLDR)) - INVERSETABLE))) + (SETQ OLDR (GETHASH D TABLE)) + (CL:UNLESS (MEMB R OLDR) + (PUTHASH D (SORT (CONS R OLDR)) + TABLE)) + (swap D R) + (SETQ OLDR (GETHASH D INVERSETABLE)) + (CL:UNLESS (MEMB R OLDR) + (PUTHASH D (SORT (CONS R OLDR)) + INVERSETABLE))) (LIST TABLE INVERSETABLE]) (UNICODE.UNMAPPED - [LAMBDA (CODE TABLE DONTFAKE) (* ; "Edited 22-Jan-2025 08:19 by rmk") + [LAMBDA (CODE TABLE DONTFAKE) (* ; "Edited 24-Apr-2025 15:48 by rmk") + (* ; "Edited 22-Jan-2025 08:19 by rmk") (* ; "Edited 19-Jan-2025 22:02 by rmk") (* ; "Edited 18-Jan-2025 12:02 by rmk") (* ; "Edited 2-Feb-2024 23:52 by rmk") @@ -1165,7 +1380,7 @@ (* ;; "") - (PROG ((INVERSE (EQ TABLE *UNICODETOXCCS*)) + (PROG ((INVERSE (EQ TABLE *UNICODETOMCCS*)) RANGE HASH) (* ;; "If we already looked up CODE's character set in a file, then we have already filled in its information in the translation table. If it didn't have a code for a particular character, then we fake it here. Faked codes are negative, so we can detect them easily, and interpret them with IABS.") @@ -1183,15 +1398,15 @@ (* ;; "Our attempt at extending the known tables did not provide a mapping for CODE. So we fake it up with the next unused private code in the code space. ") - (* ;; "The number of possible faked mappings is determined by the number of private-use Unicodes, since the XCCS character space is pretty sparse. The codes don't have to come from the same part of the code space, and the NEXTCODEs are saved in global variables. The last available codes are constants.") + (* ;; "The number of possible faked mappings is determined by the number of private-use Unicodes, since the MCCS character space is pretty sparse. The codes don't have to come from the same part of the code space, and the NEXTCODEs are saved in global variables. The last available codes are constants.") - (CL:WHEN (IEQP *NEXT-PRIVATE-XCCSCODE* LAST-PRIVATE-XCCSCODE) + (CL:WHEN (IEQP *NEXT-PRIVATE-MCCSCODE* LAST-PRIVATE-MCCSCODE) (* ;  "Same number of available codes both ways") (ERROR "EXHAUSTED RANGE FOR UNMAPPED CODES")) (if INVERSE - then (SETQ RANGE *NEXT-PRIVATE-XCCSCODE*) - (add *NEXT-PRIVATE-XCCSCODE* 1) + then (SETQ RANGE *NEXT-PRIVATE-MCCSCODE*) + (add *NEXT-PRIVATE-MCCSCODE* 1) else (SETQ RANGE *NEXT-PRIVATE-UNICODE*) (add *NEXT-PRIVATE-UNICODE* 1)) (MERGE-UNICODE-TRANSLATION-TABLES INVERSE (CONS (LIST CODE RANGE))) @@ -1201,7 +1416,10 @@ (RETURN (CONS RANGE)))]) (UNICODE-EXTEND-TRANSLATION? - [LAMBDA (CODE TABLE) (* ; "Edited 26-Jan-2025 11:26 by rmk") + [LAMBDA (CODE TABLE) (* ; "Edited 4-Sep-2025 00:34 by rmk") + (* ; "Edited 29-Jun-2025 16:44 by rmk") + (* ; "Edited 24-Apr-2025 15:49 by rmk") + (* ; "Edited 26-Jan-2025 11:26 by rmk") (* ; "Edited 21-Jan-2025 22:31 by rmk") (* ; "Edited 18-Jan-2025 12:40 by rmk") (* ; "Edited 13-Jan-2025 23:50 by rmk") @@ -1210,28 +1428,28 @@ (* ; "Edited 5-Feb-2024 13:48 by rmk") (* ; "Edited 3-Feb-2024 12:40 by rmk") - (* ;; "There is currently no mapping for CODE in TABLE, hopefully just because the relevant character-set mapping has not been installed. We infer from TABLE whether CODE is an XCCS or UNICODE code and look for the proper mapping table (forward or inverted) for its character set. ") + (* ;; "There is currently no mapping for CODE in TABLE, hopefully just because the relevant character-set mapping has not been installed. We infer from TABLE whether CODE is an MCCS or UNICODE code and look for the proper mapping table (forward or inverted) for its character set. ") (* ;; "We record which character sets we have already expanded so we don't do them again.") (LET ((CHARSET (\CHARSET CODE)) - (INVERSE (EQ TABLE *UNICODETOXCCS*)) + (INVERSE (EQ TABLE *UNICODETOMCCS*)) MAPPING FILE) (* ;; "If we already looked for CHARSET in the file and found anything, it has already been merged. Otherwise, it would just fail again") (CL:UNLESS (MEMB CHARSET (CL:IF INVERSE *UNICODE-LOADED-CHARSETS* - *XCCS-LOADED-CHARSETS*)) + *MCCS-LOADED-CHARSETS*)) (* ;; "Don't try this charset again.") (CL:IF INVERSE (push *UNICODE-LOADED-CHARSETS* CHARSET) - (push *XCCS-LOADED-CHARSETS* CHARSET)) + (push *MCCS-LOADED-CHARSETS* CHARSET)) (SETQ FILE (FINDFILE (CL:IF INVERSE - 'INVERTED-UNICODE-MAPPINGS.TXT - 'UNICODE-MAPPINGS.TXT) + 'UNICODE-TO-MCCS-MAPPINGS + MCCS-TO-UNICODE-MAPPINGS) T UNICODEDIRECTORIES)) (* ;; "The mappings files are indexed by CHARSET.") @@ -1250,7 +1468,8 @@ (DEFINEQ (ALL-UNICODE-MAPPINGS - [LAMBDA (INVERTED FILE) (* ; "Edited 31-Jan-2025 17:46 by rmk") + [LAMBDA (INVERTED FILE) (* ; "Edited 24-Apr-2025 15:51 by rmk") + (* ; "Edited 31-Jan-2025 17:46 by rmk") (* ; "Edited 26-Jan-2025 13:40 by rmk") (* ; "Edited 22-Jan-2025 14:07 by rmk") (* ; "Edited 19-Jan-2025 12:20 by rmk") @@ -1260,16 +1479,16 @@ (* ; "Edited 5-Feb-2024 13:14 by rmk") (* ; "Edited 3-Feb-2024 09:16 by rmk") - (* ;; "Reads all the XCCS-to-UNICODE mapping files that we know about, and produces a 2-level index that maps between XCCS codes and UNICODE codes, depending on INVERTED.") + (* ;; "Reads all the XCCS-to-UNICODE mapping files that we know about, and produces a 2-level index that maps between MCCS codes and UNICODE codes, depending on INVERTED.") (* ;; "The first index level segments all the domain codes according to their character sets. The segments are sorted by character set, the pairs within each segment are sorted by their domain codes. ") (* ;;  "E.g. if INVERTED=NIL and given a XCCS code, the lookup for the corresponding Unicode(s) is") - (* ;; " (CADR (ASSOC XCCSCODE (\CHARSET XCCSCODE) INDEX)))).") + (* ;; " (CADR (ASSOC MCCSCODE (\CHARSET MCCSCODE) INDEX)))).") - (* ;; "If FILE is not NIL, the result is written to a file. If FILE is T, the file is either UNICODE-MAPPINGS.TXT or INVERTED-UNICODED-MAPPINGS.TXT, depending on INVERTED.") + (* ;; "If FILE is not NIL, the result is written to a file. If FILE is T, the file is either MCCS-TO-UNICODE-MAPPINGS.TXT or UNICODE-TO-MCCS-MAPPINGS.TXT, depending on INVERTED.") (LET (INDEX) (for PAIR DOMAIN RANGE CHARSET in (READ-UNICODE-MAPPING 'ALL) eachtime (SETQ DOMAIN @@ -1287,7 +1506,7 @@ INDEX) (CAR (push INDEX (CONS (\CHARSET DOMAIN] - (* ;; "For alternative mappings (in the U-to-X direction) we end up with (D R1 R2 ...). (CADR is the first (and almost always) the only one.") + (* ;; "For alternative mappings (in the U-to-M direction) we end up with (D R1 R2 ...). (CADR is the first (and almost always) the only one.") (pushnew [CDR (OR (ASSOC DOMAIN (CDR CHARSET)) (CAR (push (CDR CHARSET) @@ -1312,8 +1531,8 @@ then (SETQ FILE (PACKFILENAME 'BODY (if (NEQ FILE T) then FILE elseif INVERTED - then 'INVERTED-UNICODE-MAPPINGS - else 'UNICODE-MAPPINGS) + then 'UNICODE-TO-MCCS-MAPPINGS + else 'MCCS-TO-UNICODE-MAPPINGS) 'DIRECTORY (CAR (MKLIST UNICODEDIRECTORIES)) 'EXTENSION @@ -1330,19 +1549,43 @@ (PRINTOUT STREAM "STOP" T) (FULLNAME STREAM)) else INDEX]) + +(XCCSJAPANESECHARSETS + [LAMBDA (OCTAL FILE) (* ; "Edited 11-Jun-2025 23:00 by rmk") + + (* ;; "Returns the list of numbers for the Japanese character sets.") + + (for F POS CS in (READ-UNICODE-MAPPING-FILENAMES "JIS") + when (SETQ POS (STRPOS "XCCS-" F 1 NIL NIL T)) + collect [SETQ CS (SUBSTRING F POS (SUB1 (STRPOS '=JIS F POS] + (CL:IF OCTAL + CS + (MKATOM (CONCAT CS "Q"))) + finally (SORT $$VAL) + (CL:WHEN FILE + (RETURN (CL:WITH-OPEN-FILE (STREAM (PACKFILENAME 'BODY (CL:IF (EQ FILE T) + "JAPANESECHARSETS" + FILE) + 'DIRECTORY + (CAR (MKLIST UNICODEDIRECTORIES)) + 'EXTENSION + 'TXT) + :DIRECTION :OUTPUT :IF-EXISTS :NEW-VERSION) + (PRINT $$VAL STREAM) + (FULLNAME STREAM))))]) ) -(RPAQ? *XCCSTOUNICODE* ) +(RPAQ? *MCCSTOUNICODE* ) -(RPAQ? *UNICODETOXCCS* ) +(RPAQ? *UNICODETOMCCS* ) -(RPAQ? *XCCS-LOADED-CHARSETS* ) +(RPAQ? *MCCS-LOADED-CHARSETS* ) (RPAQ? *UNICODE-LOADED-CHARSETS* ) (DECLARE%: DOEVAL@COMPILE DONTCOPY -(GLOBALVARS *XCCSTOUNICODE* *UNICODETOXCCS* *NEXT-PRIVATE-UNICODE* *NEXT-PRIVATE-XCCSCODE* - *XCCS-LOADED-CHARSETS* *UNICODE-LOADED-CHARSETS*) +(GLOBALVARS *MCCSTOUNICODE* *UNICODETOMCCS* *NEXT-PRIVATE-UNICODE* *NEXT-PRIVATE-MCCSCODE* + *MCCS-LOADED-CHARSETS* *UNICODE-LOADED-CHARSETS*) ) (DECLARE%: EVAL@COMPILE DONTCOPY (DECLARE%: EVAL@COMPILE @@ -1351,15 +1594,15 @@ (RPAQ LAST-PRIVATE-UNICODE (HEXNUM? "F8FF")) -(RPAQ FIRST-PRIVATE-XCCSCODE (CHARCODE "200,0")) +(RPAQ FIRST-PRIVATE-MCCSCODE (CHARCODE "200,0")) -(RPAQ LAST-PRIVATE-XCCSCODE (CHARCODE "230,377")) +(RPAQ LAST-PRIVATE-MCCSCODE (CHARCODE "230,377")) (CONSTANTS (FIRST-PRIVATE-UNICODE (HEXNUM? "E000")) (LAST-PRIVATE-UNICODE (HEXNUM? "F8FF")) - (FIRST-PRIVATE-XCCSCODE (CHARCODE "200,0")) - (LAST-PRIVATE-XCCSCODE (CHARCODE "230,377"))) + (FIRST-PRIVATE-MCCSCODE (CHARCODE "200,0")) + (LAST-PRIVATE-MCCSCODE (CHARCODE "230,377"))) ) (DECLARE%: EVAL@COMPILE @@ -1368,11 +1611,11 @@ (* ;; "Return NIL if RANGE is a fake range in TABLE, otherwise RANGE.") - (CL:UNLESS (CL:IF (EQ TABLE *XCCSTOUNICODE*) + (CL:UNLESS (CL:IF (EQ TABLE *MCCSTOUNICODE*) (AND (IGEQ RANGE FIRST-PRIVATE-UNICODE) (ILEQ RANGE LAST-PRIVATE-UNICODE)) - (AND (IGEQ RANGE FIRST-PRIVATE-XCCSCODE) - (ILEQ RANGE LAST-PRIVATE-XCCSCODE))) + (AND (IGEQ RANGE FIRST-PRIVATE-MCCSCODE) + (ILEQ RANGE LAST-PRIVATE-MCCSCODE))) RANGE))) ) ) @@ -1581,29 +1824,6 @@ (CAR UNICODEDIRECTORIES) 'EXTENSION 'TXT]) - -(HEXSTRING - [LAMBDA (N WIDTH) (* ; "Edited 23-Jul-2020 08:28 by rmk:") - (* ; "Edited 20-Dec-93 17:51 by rmk:") - - (* ;; - "Converts positive numbers to Hex strings, padding on the right with 0 up to WIDTH if given.") - - (CL:UNLESS (FIXP N) - (SETQ N (CHARCODE.DECODE N))) - (LET [CHAR (STR (ALLOCSTRING [IMAX (OR WIDTH 0) - (FOR I (LEFT _ N) FROM 0 UNTIL (EQ LEFT 0) - DO (SETQ LEFT (LRSH LEFT 4)) - FINALLY (RETURN (MAX I 1] - (CHARCODE 0] - (FOR I FROM -1 BY -1 UNTIL (EQ N 0) - DO (SETQ CHAR (LOGAND N 15)) - [RPLCHARCODE STR I (IF (ILESSP CHAR 10) - THEN (+ CHAR (CHARCODE 0)) - ELSE (+ (- CHAR 10) - (CHARCODE A] - (SETQ N (LRSH N 4))) - STR]) ) (DEFINEQ @@ -1705,68 +1925,6 @@ (LOGOR (LLSH 2 6) (LOADBYTE CHARCODE 0 6))) ELSE (ERROR "CHARCODE too big for UTF8" CHARCODE]) - -(XTOUSTRING - [LAMBDA (XCCSSTRING RAWFLG) (* ; "Edited 3-Feb-2024 14:55 by rmk") - (* ; "Edited 10-Aug-2020 21:42 by rmk:") - - (* ;; "Produces a string that contains the UTF8 bytes that represent the characters in XCCSSTRING. Applies the XCCSTOUNICODE translation unless RAWFLG. ") - - (* ;; "The resulting string will not be readable inside Medley.") - - (LET [(USTR (ALLOCSTRING (NUTF8-STRING-BYTES XCCSSTRING RAWFLG] - (FOR I CHARCODE (SINDEX _ 0) FROM 1 WHILE (SETQ CHARCODE (NTHCHARCODE XCCSSTRING I)) - DO (CL:UNLESS RAWFLG - (SETQ CHARCODE (XTOUCODE CHARCODE))) - (IF (ILESSP CHARCODE 128) - THEN (RPLCHARCODE USTR (ADD SINDEX 1) - CHARCODE) - ELSEIF (ILESSP CHARCODE 2048) - THEN (* ; "x800") - (RPLCHARCODE USTR (ADD SINDEX 1) - (LOGOR (LLSH 3 6) - (LRSH CHARCODE 6))) - (RPLCHARCODE USTR (ADD SINDEX 1) - (LOGOR (LLSH 2 6) - (LOADBYTE CHARCODE 0 6))) - ELSEIF (ILESSP CHARCODE 65536) - THEN (* ; "x10000") - (RPLCHARCODE USTR (ADD SINDEX 1) - (LOGOR (LLSH 7 5) - (LRSH CHARCODE 12))) - (RPLCHARCODE USTR (ADD SINDEX 1) - (LOGOR (LLSH 2 6) - (LOADBYTE CHARCODE 6 6))) - (RPLCHARCODE USTR (ADD SINDEX 1) - (LOGOR (LLSH 2 6) - (LOADBYTE CHARCODE 0 6))) - ELSEIF (ILESSP CHARCODE 2097152) - THEN (* ; "x200000") - (RPLCHARCODE USTR (ADD SINDEX 1) - (LOGOR (LLSH 15 4) - (LRSH CHARCODE 18))) - (RPLCHARCODE USTR (ADD SINDEX 1) - (LOGOR (LLSH 2 6) - (LOADBYTE CHARCODE 12 6))) - (RPLCHARCODE USTR (ADD SINDEX 1) - (LOGOR (LLSH 2 6) - (LOADBYTE CHARCODE 6 6))) - (RPLCHARCODE USTR (ADD SINDEX 1) - (LOGOR (LLSH 2 6) - (LOADBYTE CHARCODE 0 6))) - ELSE (SHOULDNT))) - USTR]) - -(XCCSSTRING - [LAMBDA (CODE) (* ; "Edited 13-Aug-2020 12:16 by rmk:") - - (* ;; "Returns XCCS character representation of string %"cset,char%"") - - (CL:UNLESS (FIXP CODE) - (SETQ CODE (CHCON1 CODE))) - (CONCAT (OCTALSTRING (LRSH CODE 8)) - "," - (OCTALSTRING (LOGAND CODE 255]) ) @@ -1776,40 +1934,39 @@ (DEFINEQ (SHOWCHARS - [LAMBDA (FROMCHAR TOCHAR FONT) (* ; "Edited 26-Jan-2024 14:18 by mth") + [LAMBDA (FONT FROMCHAR TOCHAR ONELINE) (* ; "Edited 7-Sep-2025 20:29 by rmk") + (* ; "Edited 2-Sep-2025 10:26 by rmk") + (* ; "Edited 24-Jul-2025 11:30 by rmk") + (* ; "Edited 8-Jun-2025 20:05 by rmk") + (* ; "Edited 26-Jan-2024 14:18 by mth") (* ; "Edited 1-Aug-2020 09:27 by rmk:") - (RESETFORM (DSPFONT (OR FONT '(CLASSIC 12)) - T) - (CL:WHEN (AND (SMALLP FROMCHAR) - (NOT TOCHAR)) - - (* ;; - "If a small number, assume it's an octal (in decimal) character set, no need for string quotes") - - (SETQ TOCHAR (CONCAT FROMCHAR "," 376)) - (SETQ FROMCHAR (CONCAT FROMCHAR "," 41))) - (CL:UNLESS (SMALLP FROMCHAR) - (SETQ FROMCHAR (CHARCODE.DECODE FROMCHAR))) - (CL:UNLESS (SMALLP TOCHAR) - (SETQ TOCHAR (CL:IF TOCHAR - (CHARCODE.DECODE TOCHAR) - FROMCHAR))) - (for C from FROMCHAR to TOCHAR unless (AND (IGEQ (LOGAND C 255) - 127) - (ILEQ (LOGAND C 255) - (PLUS 128 33))) - do (PRINTOUT T .P2 (CONCAT (OCTALSTRING (LRSH C 8)) - "," - (OCTALSTRING (LOGAND C 255))) - 10 - (CHARACTER C) - T]) + [SETQ FONT (FONTCREATE (OR FONT '(CLASSIC 12] + (RESETLST + [LET ((OLDFONT (DSPFONT NIL T)) + CHARS) + (CL:UNLESS (CHARCODEP FROMCHAR) + (SETQ FROMCHAR (OR (CHARCODE.DECODE FROMCHAR T) + FROMCHAR))) + (SETQ CHARS (if (CHARCODEP FROMCHAR) + then (CL:UNLESS (CHARCODEP TOCHAR) + (SETQ TOCHAR (OR (CHARCODE.DECODE FROMCHAR) + FROMCHAR))) + (for C from FROMCHAR to TOCHAR collect C) + else (CHCON FROMCHAR))) + [RESETSAVE OLDFONT '(PROGN (DSPFONT OLDVALUE] + (TERPRI) + (for C in CHARS do (PRINTOUT T .FONT OLDFONT (CONCAT (OCTALSTRING (\CHARSET C)) + "," + (OCTALSTRING (\CHAR8CODE C))) + 10 .FONT FONT (CHARACTER C)) + (CL:UNLESS ONELINE (PRINTOUT T T]) + (TERPRI]) ) (DECLARE%: DOEVAL@LOAD DONTCOPY (DECLARE%: EVAL@COMPILE (PUTPROPS HEXCHAR MACRO ((CODE) - (HEXSTRING CODE))) + (HEXSTRING CODE))) (PUTPROPS OCTALCHAR MACRO [(CODE) (CONCAT (OCTALSTRING (\CHARSET CODE)) @@ -1825,23 +1982,25 @@ (PUTPROPS UNICODE FILETYPE :TCOMPL) (DECLARE%: DONTCOPY - (FILEMAP (NIL (4369 19181 (UTF8.OUTCHARFN 4379 . 7286) (UTF8.SLUG.OUTCHARFN 7288 . 7843) ( -UTF8.INCCODEFN 7845 . 13457) (UTF8.PEEKCCODEFN 13459 . 18199) (\UTF8.BACKCCODEFN 18201 . 19179)) ( -19182 23436 (UTF16BE.OUTCHARFN 19192 . 20102) (UTF16BE.INCCODEFN 20104 . 21120) (UTF16BE.PEEKCCODEFN -21122 . 22353) (\UTF16BE.BACKCCODEFN 22355 . 23434)) (23437 27724 (UTF16LE.OUTCHARFN 23447 . 24454) ( -UTF16LE.INCCODEFN 24456 . 25472) (UTF16LE.PEEKCCODEFN 25474 . 26641) (\UTF16LE.BACKCCODEFN 26643 . -27722)) (27725 30772 (READBOM 27735 . 29804) (WRITEBOM 29806 . 30770)) (30802 34367 ( -MAKE-UNICODE-FORMATS 30812 . 34365)) (34464 38849 (UTF8.BINCODE 34474 . 37053) (\UTF8.FETCHCODE 37055 - . 38847)) (38850 44371 (UTF8.VALIDATE 38860 . 41457) (UTF8-SIZE-FROM-BYTE1 41459 . 41891) ( -NUTF8-BYTE1-BYTES 41893 . 42630) (NUTF8-CODE-BYTES 42632 . 43689) (NUTF8-STRING-BYTES 43691 . 44369)) -(46099 48246 (XTOUCODE 46109 . 46281) (UTOXCODE 46283 . 46564) (XTOUCODE? 46566 . 47387) (UTOXCODE? -47389 . 48244)) (49483 56904 (READ-UNICODE-MAPPING-FILENAMES 49493 . 53181) (READ-UNICODE-MAPPING -53183 . 56902)) (56971 68830 (MAKE-UNICODE-TRANSLATION-TABLES 56981 . 60073) ( -MERGE-UNICODE-TRANSLATION-TABLES 60075 . 62788) (UNICODE.UNMAPPED 62790 . 66005) ( -UNICODE-EXTEND-TRANSLATION? 66007 . 68828)) (68831 74210 (ALL-UNICODE-MAPPINGS 68841 . 74208)) (75801 -88232 (WRITE-UNICODE-MAPPING 75811 . 79561) (WRITE-UNICODE-INCLUDED 79563 . 84285) ( -WRITE-UNICODE-MAPPING-HEADER 84287 . 85535) (WRITE-UNICODE-MAPPING-FILENAME 85537 . 87067) (HEXSTRING -87069 . 88230)) (88233 88909 (XCCS-UTF8-AFTER-OPEN 88243 . 88907)) (91434 96936 (UTF8HEXSTRING 91444 - . 93649) (XTOUSTRING 93651 . 96571) (XCCSSTRING 96573 . 96934)) (96963 98473 (SHOWCHARS 96973 . 98471 -))))) + (FILEMAP (NIL (4433 19681 (UTF8.OUTCHARFN 4443 . 7459) (UTF8.SLUG.OUTCHARFN 7461 . 8125) ( +UTF8.INCCODEFN 8127 . 13848) (UTF8.PEEKCCODEFN 13850 . 18699) (\UTF8.BACKCCODEFN 18701 . 19679)) ( +19682 24372 (UTF16BE.OUTCHARFN 19692 . 20711) (UTF16BE.INCCODEFN 20713 . 21838) (UTF16BE.PEEKCCODEFN +21840 . 23180) (\UTF16BE.BACKCCODEFN 23182 . 24370)) (24373 29096 (UTF16LE.OUTCHARFN 24383 . 25499) ( +UTF16LE.INCCODEFN 25501 . 26626) (UTF16LE.PEEKCCODEFN 26628 . 27904) (\UTF16LE.BACKCCODEFN 27906 . +29094)) (29097 32144 (READBOM 29107 . 31176) (WRITEBOM 31178 . 32142)) (32174 35739 ( +MAKE-UNICODE-FORMATS 32184 . 35737)) (35836 40330 (UTF8.BINCODE 35846 . 38534) (\UTF8.FETCHCODE 38536 + . 40328)) (40331 45958 (UTF8.VALIDATE 40341 . 42938) (NUTF8-BYTE1-BYTES 42940 . 43677) ( +NUTF8-CODE-BYTES 43679 . 44736) (NUTF8-STRING-BYTES 44738 . 45634) (N-MCHARS 45636 . 45956)) (47686 +56555 (MTOUCODE 47696 . 48083) (UTOMCODE 48085 . 48475) (MTOUCODE? 48477 . 49510) (UTOMCODE? 49512 . +50476) (MTOUSTRING 50478 . 51063) (UTOMSTRING 51065 . 51650) (MTOUTF8STRING 51652 . 55658) ( +UTF8TOMSTRING 55660 . 56553)) (56556 62258 (XTOUCODE 56566 . 57084) (UTOXCODE 57086 . 57594) ( +XTOUCODE? 57596 . 58657) (UTOXCODE? 58659 . 59742) (XTOUSTRING 59744 . 60437) (UTOXSTRING 60439 . +61180) (XTOUTF8STRING 61182 . 62256)) (63495 71791 (READ-UNICODE-MAPPING-FILENAMES 63505 . 67302) ( +READ-UNICODE-MAPPING 67304 . 71789)) (71858 84200 (MAKE-UNICODE-TRANSLATION-TABLES 71868 . 75178) ( +MERGE-UNICODE-TRANSLATION-TABLES 75180 . 77724) (UNICODE.UNMAPPED 77726 . 81050) ( +UNICODE-EXTEND-TRANSLATION? 81052 . 84198)) (84201 91037 (ALL-UNICODE-MAPPINGS 84211 . 89700) ( +XCCSJAPANESECHARSETS 89702 . 91035)) (92628 103896 (WRITE-UNICODE-MAPPING 92638 . 96388) ( +WRITE-UNICODE-INCLUDED 96390 . 101112) (WRITE-UNICODE-MAPPING-HEADER 101114 . 102362) ( +WRITE-UNICODE-MAPPING-FILENAME 102364 . 103894)) (103897 104573 (XCCS-UTF8-AFTER-OPEN 103907 . 104571) +) (107098 109315 (UTF8HEXSTRING 107108 . 109313)) (109342 111222 (SHOWCHARS 109352 . 111220))))) STOP diff --git a/library/UNICODE.LCOM b/library/UNICODE.LCOM index b081f62401872b32cfb5f0c223fd55eddf29051e..c9729778b2089dba3498b0258dc44b0abc8b3723 100644 GIT binary patch delta 7463 zcmbtZdu$uWndefHZ85e?%CamoiZiAiM^sFiyL>Zn#1**`SKJ59U5Qd;S9MG$lquUm z?uzRJ?JDUD*Sl-tPEr&onxsWz1ZbPe;tAKpiP@yMYm%aAi{=mK+H0Ffuf_F%N@#Jp zUhd-iX7)i+k=p`w5NdZG-+c2ue&26qFaMVLzrS-oRPIBmkaqgs5EtfRD4a~kQt4>) zLT$uIutDSrpk%X`<{~Dw<*CXnX1ykuT7YKTW#zjfUmIPiG zaxiKWo$-UrK2RF5ESreLQCi4}dT4AG-FLQz(x93$A!s38MMIu(Wuumk@@?t{(a<#6 z(Uc-))tr!pZ3I!eSQ4g1bxI56Ko?LY(KHa}38y3BG#3pZ55B+*ZcBuN9wHJy)|;q* z$NgQVPtImFFpw#67)4`fSgt?X`RR_`qz4+pQ8a9IgWc-sZSUA!kwm1eYjV5b-Y74U z0(tneyKcj;^t??whfxI9Xp#8Z&Ph`ZSS;Swlkny2o>;iG2S3>Rn@v}$e|FbFdrBnH zI;H+EeIm2Z+*7+SvQ6+OO0t;u1?p=5fGweYT>T?^=5|?W_Terx&Ht|2at6yi7tAYEYTuLhBY6g^L(oPljp~J1im=O<7&GfmxIjplK?ri_-!%xRvf;?IU1qcg^50 zWImap=54Y2O225?{03t9M!p8Ne@pbbh9G@QMaecaim((^r~+KL)dz?TqpYfAdEN2G zR1<9+-3`+{M0p#DuCKhcJxBS!SobO2P)+OoN#)zI)6$=fBFFd<P}xdbV)(+SUAGCrzWP&$M+vV9I)~Fu7jeXULC8g&)4? zdO#|?@bu!6xc*gf^@fE{6>3-KmfxuMF3)8uk<8kPmU-U&atAuIwxZNBzOiok`E4(` z=6hatZJqZvWUH^bHs9NjRPTCyzuNnGwMPQEABR6LTv=RF*1xI{!BXw&@#UXYdzTiz za*3JDbQ>hT%p)|ePSC?zM=MmE&3am-u*!aax)f- zAT6&>NjXJFv1p<#Q)5uHTQfHn4wHAoN#$sweLytIwoQn#skZk>D%$=YOSZpngo<#x zJT4Ij>@X~R=Hk&d^x&dN#7mM2C7F^HE*i05-)yjOKb1na+`^Yi{A@^{EeauJN*)tR zC>ce^Cj@jNF5*71RZgl-P9&2hRpSf7K))LD8_|T2{y)| zNIZ_>VD-33CEf#X2}ElY&3c7AYVa6C)`~?GMPmk0r-&j*rl`=0fr`c2-cu(Z1?O8x z@f1zZCry;N%eAyAYJA@8ZkT_3j4N*?9FwmZZ8ILoyyOy1;1TJV2gDaUdT}CKCWV3phqcBV8%aX7x6qc2M9^_k#7B&A zLC@xC{b0cbn^bm)Q`^Os;gS)jg?cj{YYUYa5Cog?MpNjxC`rNuFOBe}2^5JsQ_Xle zYX%I|U%6m~+D4ZLJu_C#ZVo2?b;Ma$C z9Z4Yw%{;H=ArwF|Eg-d0Bav`;m3#j@%w_^ebftN1&)+s zsvN>UAG!rU80@B8XBgIMAsF~;!CUVvxtqHCm`!wFpt{diRt{I(7ep^JOSe~9`{KD@ zxWx7PZ!72i``TyDEs^Ip#dBY{wqE~f)!F9Qb+!o}*|^O=vhyR1jvpI7Fg8+g*N2&J zQ3dD^RU*G$@5_S{e+x?F*DvOY4*%GZ@3qa5r0m2o$9@AaapU4quGKJy%oBRJJ+5BqDjNdi zaRV48z=mnnmaS~Q6S8SQm3E(S7UPmN!>lE?{$YDZ#On0yVIgGm0CLfULGLDX7*qvF zYf}}xZzCKB%&<)v7Eo!Rc|;*Ru*r_=@4xNqT?3#fY>U(gV2y&3gNFo?`63=0*3KTPZ0i`>U18!j3I~ zPp*2O)kJ+jebR6SQD@M$XPq{fc!17a>3779F>rdssT(ZdyfYHc!{$hG7@;G>`1I`))*%*1 zSPSk`cmpE15d;Y=)_;2Y-!OwsRpW5apa?U12tsQb?o&c=LO$x>(;U!VQ64{fu>&|J z7Vmh<9pmt~vk`oF^j6w5JzAu4L%JHW%2x=&x(Jt08iZ^D8@b(`t_xe-_{wO2Igei- zJ=nE0y-@DKJ2Ij>lEV9v2L>UbvQj!Lslb<5u(jQtCk}yPDHPDi5vZ*4mgbYsG-4&w+c{$aO zKgJ&g_d^^05g&x7cleJ$bfoGZ8WTv!q#$H8+OQ}0Pwo>`-e$XJp+sGGUE=aFH8%?= z08|(@0ZarGo8*hUB4`3(?!a6%{GUgqa|aJj^AZ@7Bk9kD0nr9s9WLfr(M<0m%?tWd zJ=JN&J($F?Vg!-9KZTbCKqd+%&ar)yHSw3TH-T%zBrz(R^lNq@0TNHtp-5)o7d--- zM7XGuzfn+|oQTaWi10*xM`*8_)#@n?eKKiSOi=e8=iXO^9yr0671A z55XF~vat4uiD>k_+>1{L19(c_iNiN{;lCHUjcv4}X%w}GHmF{TNYp-ziGNb9YUAdK zm6#;caTfWly+(&$$y`t#RH$Cm+Enc2$<-Z&s{hWX^KO8kMB&gg%Wo52441h`HVQlx z3dE)oRuw=b5VWZ=t8%dkfOx$`?8C+q&LgnbW)6T=9q&N_*vt!jI1C`f!vAJwK$+mP z1+eZ;fqEJUH-(NAK6WHv5d1khDZ-LK$n0DJws1Yk97@GdS`o^jX`!Tvs)ABsNT>0T zFat=cp(OM|IVRbagpQn865t*RhQ(7zpMjK%B7he`p;M!&ab%8AOS1@$2PE*xT_l_0 zP~cbq@Gp)6M3qng&R{68F$Xe7{h31~X>L8K;;u3C>NxaLWx zhf9!)`nXz>;f@#zFw9j1NmOWIhFAtEkd6Pfwadj+7ifI=d`#IqNIg~VF`HI9%RQ%h z?zfs(j+V`Pk8h9$az#S>z4w>BOb);9KPn3|3f+S z-16I&kXah3CeQNI=VpX9%Dw(mz1v8#h|9+-jF0Kan~F3FZ_@CkV+)Jde%bB9@6PWv z%Kdgf?Y3UZeHRz#dt;ZoWAzEb{yC^Px=;cLfg`mEB`iF_Wgys(&q5dCSg<#}^(V%G z({vO8lL1dhDe^$X0xp~E2p$2}v%0}cc4|QMP*`x@;V!KyjD(4`JPWt$;2B*l5@=-M z`U1{f_8U|Ua_m$PoEC{@q&$z14++4K;e>BD0ToV>7lRG}Rv&Y^?9srunxo?=rxmHo zq2$0|D~w=u*oGNWG^ioJ1BW4~p+F54OX`GzhOCKX80b4C1cN~j+~$;taBaW_R3OJB z+%%nh;Q-XcMX~Q>7y)-F&g2P!5rMqh0SGRzqm{y{9mND%sp-dIurn2+G}6JiIy0*# zWu%V1%cc4DyYW9v?p>ha0v-nQSQZBUOEQ3JG^LcG>XOX0QHg_ITYn3QIJ&I)tqAPE z0jX`DPmGhOzj9J?<8bA`pvSxqFkNk3`PgTBT)fARjf&5(>gR}F;uTFIcJY{Z3)T$~ zxp)b}2P6C267{ij6+u`!X&H5HCq^lAx5BIC&bpn?(~n00;#wCyRN&Rh;e&)Z+6O{% zXt89pol5ZVf*-$AneKw`W{L*Ddo%vP>0iUtO)5JmEh?f;zjO@&1zEh?e*>Ws!_eV@ z_lNG@gO~WdVY0gkNgd-QI&>qu+eiRC*mrb~e*?J#vBV6+7S+Ltdj=T~4xQOw|G+(+ z4nEd`NqDVQ#xDYH-^_H7dhlp%61?cyL2AARPJ$Rkp8Dgpo=#Zvo9B_&+^}b;ROFA1 R673$kdw2c!=fAu4{{fP4a%cbm delta 4096 zcmZ`6ZERE5)z1zgGmtnYB-GGyV<6b3j^2mAVg&fbevY60{GNI5C2;_ewwNs;4zNI1 zB6XNao%*9c+V0W{iS-8+V`x*SBwp09s`IAp$NqF%rBvF~{-|hCr6B}S`=_0%?A&WR z*dfHqd-tAm&$;LOocH}})(;*qZ;W;coIm~i89&eRLBxflfpC-+-kj?1=ObQZcQ<0$ zNHh?P23R+mId$^v^ts96sRBB6@!ZV$y-^%Hunq6J5XVRKt`lhT400!j6$Let_xJa^ zQQ`RuK(sF3pxbNrBxOa4>yl_n3Dk6XT{Jhs515^8d;|?d1_n5Mmf6vgR1G^3fI(Qi z(2EiLgBTt!isp1_MAn84e*!onHy>`$H53hmqC%+3(8`C)TdXbCu6#UhnA)%^$F+py zLxCXj<;#0pUbSr_7yz&VJU!Oh){Md{pu$buH^395W>jpXigyM&CHolLxyJt0Ev(%~U)k z>ST+H{L|LBf$w{ATh~~h82=YSAHL@~Mo~W0WG#F{*k@H3%nIH3E$?{W8fq>ATAN_k#8ud>3#x%n8n<|?((PMD;pdfA_^HOhd35VY68D{F!4hk z874$@79|lUAVbOx%BCp}DiQ$%P)?UslPrTsWVuMdix_iri-qYt*Lv0hf1}$c3Wdon ztz=#9s)2q$c6Rcp)F7BUwAI^ibfJ5ddeiFRS19bm(VckuP@n@&Q`E6{=PlPpU3l)$ zQ>9I#u8c~di}nn?W$NGKT??^{s&!5Mhmvn5>?ms7S}h2j7LOpX=y-NSkqxu_^5KsdD;L4z#|3;zL^iNh`MUU}ZGt$mnyuUbB&RCn zO+Q$51XAALfa99tzv^I)e=I~ecAax91bZIemj*f>F{Gsd_g?D4a`Gy38{bL3&_0r} zmG$%nW+9t_2Tl!Tve@F`!eNvfHdCUJf)dLiL(@&fg*cRzvV-7w0S!qtY=D1sQV7*f z@`2h(4u3v0#1avpa5R+%3W5s0HsC>*e0enW62s*2KV=s@|0}zC9x2fj&v-}rri~LK z_;2ao(f4!5dHn0lKDz!mliOZPhJ>t3aZ}S}$?zh}l~x9iC~js3&nkP`UilFTkI51I zt|Hq&8&Z*Xa(o!Ym1tbi43ho9x=&%v;{Dm(&xVkelT;MpP*PSCqzNGKJTVXC7sqob zDh^Vku|b5D^sY3I1^_6lU{(|kpqgO*teDHm>X6ZI=1mleAXy!ebm%4i^$$QD%@Yr{ zq96ngUE*)Dhrvq3ttr)Kr?v(eymn|QF8sEdhqw!tmsN=&3rs9DmFuOz*K_Y%V{0!$ z94PffF`xhlXi!Uxp)e8^g%p$CQ$4tHVng}X=xwVF(B;C>Z{e7j36c$2zTyTsZyk?MiCu>tCi`y90l>Q&UTSn7cRS?6lg_h0OBxN~Yv2vNXRt zJ2yYzXN& zrQQfoC_s#$K?A0W4N#3pX7?u~^77hMSs}j2$IIEtoV7$cO0yM&RT@wnDlbHnl&8j! zsU^s9Y7AW*O)5jw8pOv{E>L^BTZmaisv9*7=r(n_(`k#z6n5--$|2sYmd4Tt#mkxH zcT(x8rK1_DkzY`P z&zlIQzx%3zrn1VVRA)0cUgvV8#s%0EA2u`jW*sI^;#_C^N*l@MiafP+dw#`g@$isS zMoJr2640~wQiyNzA0G%b?|Gql&o-auU|?$WKNk!(?}cW59NT=p6lp%chFplC)sce_ zHI5u)0%?piKvqW=8hCVWF>1jE`O6}n9SCzl9>_Qp3I>G`3BkBgYK%RSS2ReGWgpm# zi4_ENvzA(4&^Pr_thjQQ?r21i78e3CH7=SkEF&#$ib{l>I`I+gNdOT_@X8;c$K$yS}zhSb+rCma+v_n=skL7nB_l4tR9(b>DSydcUA z+RFI5;xi?g9-pN7Q<jJx??4TJq&FoU`ZY8{Xp@ln9IQYi5M7J|3Th-A5us5+ zI1oJe0mlED(Kv{)Ml*5jK+_()SE?77S{Xs$$+aLDm}?AyUv*VCGzUW(7#ayYeL?qv zo!1U&FiN->^#aU7wTVW3x@1UZQq!~0qh%;TNv#CALh5hT9kk@s-2}b~GZu@DHqF7s zjnpw!Lru6`6YzubLy^>K^ifU8>Ur8Y4kNB|;bl!_^`EKD&l z(k6^tQd3RnH3E#IYpEO1VCM3Tvx>EQy|EtI@xveQc0Qgrcz$+|D=Wrz%|P~AH|#fc zQ8g6u4Th&@p2Bx$_V#$a$m2r?4j{K1R&gzBki8vC=f_$-@I?np@xU(p*YjU>0A5yQ zbA%Y%18w>r7hJW*Ot-z4w%L%!jt^aO`Qhl0q78})J?tY{+VC~EW}3&}OWSwYpENMm z=7Fm(K8=4r+r!w)AHCR4#;DJJ;$!mVjW6wI`q63^hI@(3UvP@oqm zibpOUYqMu%1G>@>zJGBCUb>iQvm+0c6{Fex4qC8k`A>LZX?D+cJM|)UBJhG%sEE3> zH;VAjX45_P1o$$p<;Fk&7@NRAB(_lb&TM-N2)q1K