From a90b7ed73d3572ea286a9b2660636a3701f9f0de Mon Sep 17 00:00:00 2001 From: Matt Heffron Date: Fri, 26 Jan 2024 14:38:04 -0800 Subject: [PATCH] A few fixes to UNICODE that I stumbled across. READ-UNICODE-MAPPING-FILENAMES returned a bare string if FILESPEC matched 1 file (first clause of the (OR...) in join), this caused READ-UNICODE-MAPPING to fail. SHOWCHARS referenced variable CODE that should have been C Added FILETYPE property to UNICODE to specify TCOMPL compiler. (Other changes are formatting by pretty printer, not mine.) --- library/UNICODE | 178 ++++++++++++++++++++++--------------------- library/UNICODE.LCOM | Bin 23748 -> 23942 bytes 2 files changed, 90 insertions(+), 88 deletions(-) diff --git a/library/UNICODE b/library/UNICODE index 18f9ad7b..96e3268a 100644 --- a/library/UNICODE +++ b/library/UNICODE @@ -1,18 +1,20 @@ (DEFINE-FILE-INFO PACKAGE "INTERLISP" READTABLE "INTERLISP" BASE 10) -(FILECREATED " 8-Jan-2024 10:58:06" {WMEDLEY}UNICODE.;212 72240 +(FILECREATED "26-Jan-2024 14:19:50" {LIB}UNICODE.;4 72688 - :EDIT-BY rmk + :EDIT-BY "mth" - :CHANGES-TO (FNS NUTF8CODEBYTES) + :CHANGES-TO (FNS MAKE-UNICODE-FORMATS MAKE-UNICODE-TRANSLATION-TABLES SHOWCHARS + READ-UNICODE-MAPPING-FILENAMES) + (VARS UNICODECOMS) - :PREVIOUS-DATE " 5-Jan-2024 17:25:29" {WMEDLEY}UNICODE.;211) + :PREVIOUS-DATE " 8-Jan-2024 10:58:06" {LIB}UNICODE.;1) (PRETTYCOMPRINT UNICODECOMS) (RPAQQ UNICODECOMS - [(COMS + ((COMS (* ;; "External formats") (FNS UTF8.OUTCHARFN UTF8.INCCODEFN UTF8.PEEKCCODEFN \UTF8.BACKCCODEFN) @@ -61,7 +63,7 @@ XCCSSTRING) (FNS \UTF8.FETCHCODE) (FNS SHOWCHARS) - (DECLARE%: EVAL@COMPILE DONTCOPY (FILES (FROM LOADUPS) + [DECLARE%: EVAL@COMPILE DONTCOPY (FILES (FROM LOADUPS) EXPORTS.ALL) (* ;; "These control the layout of the translation tables. Since many of the upper panels are sparse, doing it per-panel (128) seems more space-efficient, and residual alists can be shorter") @@ -70,7 +72,9 @@ (MAX-ALIST-LENGTH 10) (N-TRANSLATION-SEGMENTS (IQUOTIENT 65536 TRANSLATION-SEGMENT-SIZE)) (TRANSLATION-SHIFT (INTEGERLENGTH (SUB1 TRANSLATION-SEGMENT-SIZE))) - (TRANSLATION-MASK (SUB1 TRANSLATION-SEGMENT-SIZE]) + (TRANSLATION-MASK (SUB1 TRANSLATION-SEGMENT-SIZE] + (PROP (FILETYPE) + UNICODE))) @@ -528,16 +532,19 @@ (DEFINEQ (READ-UNICODE-MAPPING-FILENAMES - [LAMBDA (FILESPEC DIRS) (* ; "Edited 5-Jan-2024 17:24 by rmk") + [LAMBDA (FILESPEC DIRS) (* ; "Edited 26-Jan-2024 14:02 by mth") + (* ; "Edited 5-Jan-2024 17:24 by rmk") (* ; "Edited 5-Aug-2020 15:59 by kaplan") (* ; "Edited 4-Aug-2020 17:31 by rmk:") + (DECLARE (USEDFREE UNICODEDIRECTORIES XCCS-SET-NAMES)) (CL:UNLESS DIRS (SETQ DIRS UNICODEDIRECTORIES)) (FOR F X CSI INSIDE FILESPEC JOIN (* ;;  "Last case hopes to pick up tables that are gruped together in a subdirectory (e.g. JIS)") - (OR (FINDFILE (PACKFILENAME 'BODY F 'EXTENSION 'TXT) - T DIRS) + (OR (MKLIST (FINDFILE (PACKFILENAME 'BODY F 'EXTENSION + 'TXT) + T DIRS)) (for D inside DIRS when (SETQ D (FILDIR (PACKFILENAME 'NAME (CONCAT "XCCS-*=" F) @@ -876,7 +883,7 @@ (DEFINEQ (MAKE-UNICODE-TRANSLATION-TABLES - [LAMBDA (MAPPING LTORVAR RTOLVAR) (* ; "Edited 21-Aug-2021 13:12 by rmk:") + [LAMBDA (MAPPING LTORVAR RTOLVAR) (* ; "Edited 21-Aug-2021 13:12 by rmk:") (* ; "Edited 17-Aug-2020 08:46 by rmk:") (* ;; "MAPPING is the list of numeric code correspondence pairs constructed by applying READ-UNICODE-MAPPING to a Unicode mapping file.") @@ -902,7 +909,7 @@ (* ;; "") (* ;; - "An unmapped XCCS character is assigned a code in the %"private use%" code blocks 0xE000-F8FF") + "An unmapped XCCS character is assigned a code in the %"private use%" code blocks 0xE000-F8FF") (* ;; "") @@ -918,75 +925,67 @@ (* ;; "The left-to-right direction (into Unicode). We start by distributing the mappings into alists in arrays indexed by the higher-order (charaset set byte). The second loop converts long alists into arrays.") [FOR M LEFTC RBASE RCODES IN MAPPING EACHTIME (SETQ RCODES (CDR M)) - (SETQ RBASE (CAR RCODES)) + (SETQ RBASE (CAR RCODES)) UNLESS (IGEQ RBASE MISSINGCODE) DO (SETQ LEFTC (CAR M)) - (* ;; - "(CDR RCODES) contains combiners on the base") + (* ;; "(CDR RCODES) contains combiners on the base") - (CL:PUSH (CONS (LOGAND LEFTC TRANSLATION-MASK) - (CL:IF (CDR RCODES) - RCODES - RBASE)) - (CL:SVREF LTORARRAY (LRSH LEFTC - TRANSLATION-SHIFT - ] + (CL:PUSH (CONS (LOGAND LEFTC TRANSLATION-MASK) + (CL:IF (CDR RCODES) + RCODES + RBASE)) + (CL:SVREF LTORARRAY (LRSH LEFTC + TRANSLATION-SHIFT] (FOR I CSA FROM 0 TO (SUB1 N-TRANSLATION-SEGMENTS) WHEN (IGREATERP (LENGTH (CL:SVREF LTORARRAY I)) - MAX-ALIST-LENGTH) - DO + MAX-ALIST-LENGTH) DO + (* ;; "Leave it alone if the alist is short") - (* ;; "Leave it alone if the alist is short") - - (SETQ CSA (CL:MAKE-ARRAY TRANSLATION-SEGMENT-SIZE :INITIAL-ELEMENT NIL)) - (FOR P IN (CL:SVREF LTORARRAY I) - DO (CL:SETF (CL:SVREF CSA (LOGAND (CAR P) - TRANSLATION-MASK)) - (CDR P))) - (CL:SETF (CL:SVREF LTORARRAY I) - CSA)) + (SETQ CSA (CL:MAKE-ARRAY TRANSLATION-SEGMENT-SIZE + :INITIAL-ELEMENT NIL)) + (FOR P IN (CL:SVREF LTORARRAY I) + DO (CL:SETF (CL:SVREF CSA (LOGAND (CAR P) + TRANSLATION-MASK)) + (CDR P))) + (CL:SETF (CL:SVREF LTORARRAY I) + CSA)) (* ;; "") (* ;; "Now the right-to-left direction (from Unicode). Here we have to detect and compensate for ambiguity.") (FOR M LEFTC RBASE RCOMBINERS PREV IN MAPPING EACHTIME (SETQ RBASE (CADR M)) - (SETQ RCOMBINERS (CDDR M)) + (SETQ RCOMBINERS (CDDR M)) UNLESS (OR (IGEQ RBASE MISSINGCODE) - RCOMBINERS) DO + RCOMBINERS) DO + (* ;; + "Have we already seen an explicit mapping from right to left?") - (* ;; - "Have we already seen an explicit mapping from right to left?") - - (SETQ LEFTC (CAR M)) - [SETQ PREV (ASSOC (LOGAND RBASE TRANSLATION-MASK) - (CL:SVREF RTOLARRAY (LRSH RBASE - TRANSLATION-SHIFT - ] - (IF (NULL PREV) - THEN (CL:PUSH (CONS (LOGAND RBASE - TRANSLATION-MASK) - LEFTC) - (CL:SVREF RTOLARRAY (LRSH RBASE - TRANSLATION-SHIFT - ))) - ELSEIF (IGREATERP (CDR PREV) - LEFTC) - THEN (RPLACD PREV LEFTC))) + (SETQ LEFTC (CAR M)) + [SETQ PREV (ASSOC (LOGAND RBASE TRANSLATION-MASK) + (CL:SVREF RTOLARRAY (LRSH RBASE + TRANSLATION-SHIFT] + (IF (NULL PREV) + THEN (CL:PUSH (CONS (LOGAND RBASE TRANSLATION-MASK) + LEFTC) + (CL:SVREF RTOLARRAY (LRSH RBASE + TRANSLATION-SHIFT))) + ELSEIF (IGREATERP (CDR PREV) + LEFTC) + THEN (RPLACD PREV LEFTC))) (FOR I CSA FROM 0 TO (SUB1 N-TRANSLATION-SEGMENTS) WHEN (IGREATERP (LENGTH (CL:SVREF RTOLARRAY I)) - MAX-ALIST-LENGTH) - DO + MAX-ALIST-LENGTH) DO + (* ;; "Long list, make an array") - (* ;; "Long list, make an array") - - (SETQ CSA (CL:MAKE-ARRAY TRANSLATION-SEGMENT-SIZE :INITIAL-ELEMENT NIL)) - (FOR P IN (CL:SVREF RTOLARRAY I) - DO (CL:SETF (CL:SVREF CSA (LOGAND (CAR P) - TRANSLATION-MASK)) - (CDR P))) - (CL:SETF (CL:SVREF RTOLARRAY I) - CSA)) + (SETQ CSA (CL:MAKE-ARRAY TRANSLATION-SEGMENT-SIZE + :INITIAL-ELEMENT NIL)) + (FOR P IN (CL:SVREF RTOLARRAY I) + DO (CL:SETF (CL:SVREF CSA (LOGAND (CAR P) + TRANSLATION-MASK)) + (CDR P))) + (CL:SETF (CL:SVREF RTOLARRAY I) + CSA)) (* ;; "") @@ -1285,14 +1284,15 @@ (DEFINEQ (SHOWCHARS - [LAMBDA (FROMCHAR TOCHAR FONT) (* ; "Edited 1-Aug-2020 09:27 by rmk:") + [LAMBDA (FROMCHAR TOCHAR FONT) (* ; "Edited 26-Jan-2024 14:18 by mth") + (* ; "Edited 1-Aug-2020 09:27 by rmk:") (RESETFORM (DSPFONT (OR FONT '(CLASSIC 12)) T) (CL:WHEN (AND (SMALLP FROMCHAR) (NOT TOCHAR)) (* ;; - "If a small number, assume it's an octal (in decimal) character set, no need for string quotes") + "If a small number, assume it's an octal (in decimal) character set, no need for string quotes") (SETQ TOCHAR (CONCAT FROMCHAR "," 376)) (SETQ FROMCHAR (CONCAT FROMCHAR "," 41))) @@ -1302,16 +1302,16 @@ (SETQ TOCHAR (CL:IF TOCHAR (CHARCODE.DECODE TOCHAR) FROMCHAR))) - (FOR C FROM FROMCHAR TO TOCHAR UNLESS (AND (IGEQ (LOGAND C 255) - 127) - (ILEQ (LOGAND C 255) - (PLUS 128 33))) - DO (PRINTOUT T .P2 (CONCAT (OCTALSTRING (LRSH CODE 8)) - "," - (OCTALSTRING (LOGAND CODE 255))) - 10 - (CHARACTER C) - T]) + (for C from FROMCHAR to TOCHAR unless (AND (IGEQ (LOGAND C 255) + 127) + (ILEQ (LOGAND C 255) + (PLUS 128 33))) + do (PRINTOUT T .P2 (CONCAT (OCTALSTRING (LRSH C 8)) + "," + (OCTALSTRING (LOGAND C 255))) + 10 + (CHARACTER C) + T]) ) (DECLARE%: EVAL@COMPILE DONTCOPY @@ -1338,17 +1338,19 @@ (TRANSLATION-MASK (SUB1 TRANSLATION-SEGMENT-SIZE))) ) ) + +(PUTPROPS UNICODE FILETYPE :TCOMPL) (DECLARE%: DONTCOPY - (FILEMAP (NIL (3950 18041 (UTF8.OUTCHARFN 3960 . 6791) (UTF8.INCCODEFN 6793 . 12283) (UTF8.PEEKCCODEFN - 12285 . 17059) (\UTF8.BACKCCODEFN 17061 . 18039)) (18042 21823 (UTF16BE.OUTCHARFN 18052 . 18876) ( -UTF16BE.INCCODEFN 18878 . 19777) (UTF16BE.PEEKCCODEFN 19779 . 20850) (\UTF16BE.BACKCCODEFN 20852 . -21821)) (21853 24134 (MAKE-UNICODE-FORMATS 21863 . 24132)) (24231 25537 (UNICODE.UNMAPPED 24241 . -25535)) (25538 26214 (XCCS-UTF8-AFTER-OPEN 25548 . 26212)) (27670 28019 (XTOUCODE 27680 . 27848) ( -UTOXCODE 27850 . 28017)) (28059 44757 (READ-UNICODE-MAPPING-FILENAMES 28069 . 30519) ( -READ-UNICODE-MAPPING 30521 . 33497) (WRITE-UNICODE-MAPPING 33499 . 37249) (WRITE-UNICODE-INCLUDED -37251 . 41973) (WRITE-UNICODE-MAPPING-HEADER 41975 . 43223) (WRITE-UNICODE-MAPPING-FILENAME 43225 . -44755)) (48071 56550 (MAKE-UNICODE-TRANSLATION-TABLES 48081 . 56548)) (57055 68253 (UTF-8.VALIDATE -57065 . 60067) (HEXSTRING 60069 . 61230) (UTF8HEXSTRING 61232 . 63437) (NUTF8CODEBYTES 63439 . 64392) -(NUTF8STRINGBYTES 64394 . 64875) (XTOUSTRING 64877 . 67888) (XCCSSTRING 67890 . 68251)) (68254 70058 ( -\UTF8.FETCHCODE 68264 . 70056)) (70059 71528 (SHOWCHARS 70069 . 71526))))) + (FILEMAP (NIL (4111 18202 (UTF8.OUTCHARFN 4121 . 6952) (UTF8.INCCODEFN 6954 . 12444) (UTF8.PEEKCCODEFN + 12446 . 17220) (\UTF8.BACKCCODEFN 17222 . 18200)) (18203 21984 (UTF16BE.OUTCHARFN 18213 . 19037) ( +UTF16BE.INCCODEFN 19039 . 19938) (UTF16BE.PEEKCCODEFN 19940 . 21011) (\UTF16BE.BACKCCODEFN 21013 . +21982)) (22014 24295 (MAKE-UNICODE-FORMATS 22024 . 24293)) (24392 25698 (UNICODE.UNMAPPED 24402 . +25696)) (25699 26375 (XCCS-UTF8-AFTER-OPEN 25709 . 26373)) (27831 28180 (XTOUCODE 27841 . 28009) ( +UTOXCODE 28011 . 28178)) (28220 45174 (READ-UNICODE-MAPPING-FILENAMES 28230 . 30936) ( +READ-UNICODE-MAPPING 30938 . 33914) (WRITE-UNICODE-MAPPING 33916 . 37666) (WRITE-UNICODE-INCLUDED +37668 . 42390) (WRITE-UNICODE-MAPPING-HEADER 42392 . 43640) (WRITE-UNICODE-MAPPING-FILENAME 43642 . +45172)) (48488 56912 (MAKE-UNICODE-TRANSLATION-TABLES 48498 . 56910)) (57417 68615 (UTF-8.VALIDATE +57427 . 60429) (HEXSTRING 60431 . 61592) (UTF8HEXSTRING 61594 . 63799) (NUTF8CODEBYTES 63801 . 64754) +(NUTF8STRINGBYTES 64756 . 65237) (XTOUSTRING 65239 . 68250) (XCCSSTRING 68252 . 68613)) (68616 70420 ( +\UTF8.FETCHCODE 68626 . 70418)) (70421 71931 (SHOWCHARS 70431 . 71929))))) STOP diff --git a/library/UNICODE.LCOM b/library/UNICODE.LCOM index 226963f071acc4adc4f6b42b29a284a4377f95db..c644aa690f9ede51eeadb8c4f92049766b61d1bb 100644 GIT binary patch delta 1206 zcmZuwO-~b16s4H>QPY(WgG#wh{FouMnfKmII}D;JGgBBTGh=58)UatNLK6Wa?2PHo zg)4RbfQbuNCMJywb>W7!ap6DEy^&wwy-z9-?4s|?efOSo-?{JYeat-joO!X-=B2{j z)>eV*+yG|SY|=J`0;;mQz4@r~aIFp7TcE&`nm_Zj5%^B%dc`S21BLU$I-S|Nu!X6> z9YdLyEDLNe@A^q$rU}aC?uG(3HWJKwaUlsotp+iq zzlVl1veC2sK2tUi*ECQUspaf}2~M~WYhXupZ_y7Malyr2(y`1)?wC=%uNkR1xElm~fch~rg^TYYlNO!xCQBf(%M zbelM!5+utBt_-~8#0z3S41nt-)4-U3WygsNz;r9^jNP~@N}${x zFCyR6xPm8Q_=zi$(aM?Ldq4K=Vwa&25uh;@of({SmTXh~=gEK5lA*cK(ACxBU%E@EmxNGWs%p_JmpFaAnVFER@kDaENJ%s?iS z__2;4k>VCXl0r8L5DTkIsz@!{nf>6pC zBK>3qIfmR3NL60y4NpGEoz`IB-QZyQyxPh6Kh6!{Z=3%7SF_*q*}$)5Rh*Xr$^_1X z%Z1@oCh*Or`?Fs}Rz(Y9-?r;s>?Mt03GWgZ;Kork3=({pz&NN*trEw+gILqDy`OV? wXGcJd>S2UW3@={Nj64kv-cj%Ey^-^(8Z{D1i2Gr>*fJ691fN<|(`0vl0Ac?i?*IS* delta 1103 zcmaJo6r`pXOF)9;$nQPdvD2nfYQLn0)QMt;HWEmrAJB+?pcIf0 zLRkz|6f*N`VT<3-5nU>mpky9PJ81*}}* zPQ_+AtPMAhgx0tEyAL<_uT>!GCUuv(7ki0}VSozitr%z|9>A}o?rk@)5k0Z;1 zyOH0Dd=5f*fU$zGgU|vh))ElR0W3*BX?1#0ZCSQKnBL@+i}Iw03l8m?f15T#Jm9}* z%yExwm?f|!3Wx`JCY(RJ7-tjlvYU=(Z?y4B7F$6NIP)M`P4I1jTuo#Yhph;h=Yq0h zf~jN1o!^g{fGRN4fmPp+4MZ&cF?;r@4@UIRW#njsr<0^c)G_qY2 z+F&_QGi{dWS0OL^bo3%I& WeZ9Z%XC-)N{;R_-F