1
0
mirror of synced 2026-02-27 01:19:42 +00:00

A few fixes to UNICODE that I stumbled across.

READ-UNICODE-MAPPING-FILENAMES returned a bare string if FILESPEC matched 1 file (first clause of the (OR...) in join), this caused READ-UNICODE-MAPPING to fail.
SHOWCHARS referenced variable CODE that should have been C
Added FILETYPE property to UNICODE to specify TCOMPL compiler.
(Other changes are formatting by pretty printer, not mine.)
This commit is contained in:
Matt Heffron
2024-01-26 14:38:04 -08:00
parent 54b2607070
commit a90b7ed73d
2 changed files with 90 additions and 88 deletions

View File

@@ -1,18 +1,20 @@
(DEFINE-FILE-INFO PACKAGE "INTERLISP" READTABLE "INTERLISP" BASE 10)
(FILECREATED " 8-Jan-2024 10:58:06" {WMEDLEY}<library>UNICODE.;212 72240
(FILECREATED "26-Jan-2024 14:19:50" {LIB}UNICODE.;4 72688
:EDIT-BY rmk
:EDIT-BY "mth"
:CHANGES-TO (FNS NUTF8CODEBYTES)
:CHANGES-TO (FNS MAKE-UNICODE-FORMATS MAKE-UNICODE-TRANSLATION-TABLES SHOWCHARS
READ-UNICODE-MAPPING-FILENAMES)
(VARS UNICODECOMS)
:PREVIOUS-DATE " 5-Jan-2024 17:25:29" {WMEDLEY}<library>UNICODE.;211)
:PREVIOUS-DATE " 8-Jan-2024 10:58:06" {LIB}UNICODE.;1)
(PRETTYCOMPRINT UNICODECOMS)
(RPAQQ UNICODECOMS
[(COMS
((COMS
(* ;; "External formats")
(FNS UTF8.OUTCHARFN UTF8.INCCODEFN UTF8.PEEKCCODEFN \UTF8.BACKCCODEFN)
@@ -61,7 +63,7 @@
XCCSSTRING)
(FNS \UTF8.FETCHCODE)
(FNS SHOWCHARS)
(DECLARE%: EVAL@COMPILE DONTCOPY (FILES (FROM LOADUPS)
[DECLARE%: EVAL@COMPILE DONTCOPY (FILES (FROM LOADUPS)
EXPORTS.ALL)
(* ;; "These control the layout of the translation tables. Since many of the upper panels are sparse, doing it per-panel (128) seems more space-efficient, and residual alists can be shorter")
@@ -70,7 +72,9 @@
(MAX-ALIST-LENGTH 10)
(N-TRANSLATION-SEGMENTS (IQUOTIENT 65536 TRANSLATION-SEGMENT-SIZE))
(TRANSLATION-SHIFT (INTEGERLENGTH (SUB1 TRANSLATION-SEGMENT-SIZE)))
(TRANSLATION-MASK (SUB1 TRANSLATION-SEGMENT-SIZE])
(TRANSLATION-MASK (SUB1 TRANSLATION-SEGMENT-SIZE]
(PROP (FILETYPE)
UNICODE)))
@@ -528,16 +532,19 @@
(DEFINEQ
(READ-UNICODE-MAPPING-FILENAMES
[LAMBDA (FILESPEC DIRS) (* ; "Edited 5-Jan-2024 17:24 by rmk")
[LAMBDA (FILESPEC DIRS) (* ; "Edited 26-Jan-2024 14:02 by mth")
(* ; "Edited 5-Jan-2024 17:24 by rmk")
(* ; "Edited 5-Aug-2020 15:59 by kaplan")
(* ; "Edited 4-Aug-2020 17:31 by rmk:")
(DECLARE (USEDFREE UNICODEDIRECTORIES XCCS-SET-NAMES))
(CL:UNLESS DIRS (SETQ DIRS UNICODEDIRECTORIES))
(FOR F X CSI INSIDE FILESPEC JOIN
(* ;;
 "Last case hopes to pick up tables that are gruped together in a subdirectory (e.g. JIS)")
(OR (FINDFILE (PACKFILENAME 'BODY F 'EXTENSION 'TXT)
T DIRS)
(OR (MKLIST (FINDFILE (PACKFILENAME 'BODY F 'EXTENSION
'TXT)
T DIRS))
(for D inside DIRS
when (SETQ D (FILDIR (PACKFILENAME 'NAME
(CONCAT "XCCS-*=" F)
@@ -876,7 +883,7 @@
(DEFINEQ
(MAKE-UNICODE-TRANSLATION-TABLES
[LAMBDA (MAPPING LTORVAR RTOLVAR) (* ; "Edited 21-Aug-2021 13:12 by rmk:")
[LAMBDA (MAPPING LTORVAR RTOLVAR) (* ; "Edited 21-Aug-2021 13:12 by rmk:")
(* ; "Edited 17-Aug-2020 08:46 by rmk:")
(* ;; "MAPPING is the list of numeric code correspondence pairs constructed by applying READ-UNICODE-MAPPING to a Unicode mapping file.")
@@ -902,7 +909,7 @@
(* ;; "")
(* ;;
 "An unmapped XCCS character is assigned a code in the %"private use%" code blocks 0xE000-F8FF")
 "An unmapped XCCS character is assigned a code in the %"private use%" code blocks 0xE000-F8FF")
(* ;; "")
@@ -918,75 +925,67 @@
(* ;; "The left-to-right direction (into Unicode). We start by distributing the mappings into alists in arrays indexed by the higher-order (charaset set byte). The second loop converts long alists into arrays.")
[FOR M LEFTC RBASE RCODES IN MAPPING EACHTIME (SETQ RCODES (CDR M))
(SETQ RBASE (CAR RCODES))
(SETQ RBASE (CAR RCODES))
UNLESS (IGEQ RBASE MISSINGCODE) DO (SETQ LEFTC (CAR M))
(* ;;
 "(CDR RCODES) contains combiners on the base")
(* ;; "(CDR RCODES) contains combiners on the base")
(CL:PUSH (CONS (LOGAND LEFTC TRANSLATION-MASK)
(CL:IF (CDR RCODES)
RCODES
RBASE))
(CL:SVREF LTORARRAY (LRSH LEFTC
TRANSLATION-SHIFT
]
(CL:PUSH (CONS (LOGAND LEFTC TRANSLATION-MASK)
(CL:IF (CDR RCODES)
RCODES
RBASE))
(CL:SVREF LTORARRAY (LRSH LEFTC
TRANSLATION-SHIFT]
(FOR I CSA FROM 0 TO (SUB1 N-TRANSLATION-SEGMENTS)
WHEN (IGREATERP (LENGTH (CL:SVREF LTORARRAY I))
MAX-ALIST-LENGTH)
DO
MAX-ALIST-LENGTH) DO
(* ;; "Leave it alone if the alist is short")
(* ;; "Leave it alone if the alist is short")
(SETQ CSA (CL:MAKE-ARRAY TRANSLATION-SEGMENT-SIZE :INITIAL-ELEMENT NIL))
(FOR P IN (CL:SVREF LTORARRAY I)
DO (CL:SETF (CL:SVREF CSA (LOGAND (CAR P)
TRANSLATION-MASK))
(CDR P)))
(CL:SETF (CL:SVREF LTORARRAY I)
CSA))
(SETQ CSA (CL:MAKE-ARRAY TRANSLATION-SEGMENT-SIZE
:INITIAL-ELEMENT NIL))
(FOR P IN (CL:SVREF LTORARRAY I)
DO (CL:SETF (CL:SVREF CSA (LOGAND (CAR P)
TRANSLATION-MASK))
(CDR P)))
(CL:SETF (CL:SVREF LTORARRAY I)
CSA))
(* ;; "")
(* ;; "Now the right-to-left direction (from Unicode). Here we have to detect and compensate for ambiguity.")
(FOR M LEFTC RBASE RCOMBINERS PREV IN MAPPING EACHTIME (SETQ RBASE (CADR M))
(SETQ RCOMBINERS (CDDR M))
(SETQ RCOMBINERS (CDDR M))
UNLESS (OR (IGEQ RBASE MISSINGCODE)
RCOMBINERS) DO
RCOMBINERS) DO
(* ;;
 "Have we already seen an explicit mapping from right to left?")
(* ;;
 "Have we already seen an explicit mapping from right to left?")
(SETQ LEFTC (CAR M))
[SETQ PREV (ASSOC (LOGAND RBASE TRANSLATION-MASK)
(CL:SVREF RTOLARRAY (LRSH RBASE
TRANSLATION-SHIFT
]
(IF (NULL PREV)
THEN (CL:PUSH (CONS (LOGAND RBASE
TRANSLATION-MASK)
LEFTC)
(CL:SVREF RTOLARRAY (LRSH RBASE
TRANSLATION-SHIFT
)))
ELSEIF (IGREATERP (CDR PREV)
LEFTC)
THEN (RPLACD PREV LEFTC)))
(SETQ LEFTC (CAR M))
[SETQ PREV (ASSOC (LOGAND RBASE TRANSLATION-MASK)
(CL:SVREF RTOLARRAY (LRSH RBASE
TRANSLATION-SHIFT]
(IF (NULL PREV)
THEN (CL:PUSH (CONS (LOGAND RBASE TRANSLATION-MASK)
LEFTC)
(CL:SVREF RTOLARRAY (LRSH RBASE
TRANSLATION-SHIFT)))
ELSEIF (IGREATERP (CDR PREV)
LEFTC)
THEN (RPLACD PREV LEFTC)))
(FOR I CSA FROM 0 TO (SUB1 N-TRANSLATION-SEGMENTS)
WHEN (IGREATERP (LENGTH (CL:SVREF RTOLARRAY I))
MAX-ALIST-LENGTH)
DO
MAX-ALIST-LENGTH) DO
(* ;; "Long list, make an array")
(* ;; "Long list, make an array")
(SETQ CSA (CL:MAKE-ARRAY TRANSLATION-SEGMENT-SIZE :INITIAL-ELEMENT NIL))
(FOR P IN (CL:SVREF RTOLARRAY I)
DO (CL:SETF (CL:SVREF CSA (LOGAND (CAR P)
TRANSLATION-MASK))
(CDR P)))
(CL:SETF (CL:SVREF RTOLARRAY I)
CSA))
(SETQ CSA (CL:MAKE-ARRAY TRANSLATION-SEGMENT-SIZE
:INITIAL-ELEMENT NIL))
(FOR P IN (CL:SVREF RTOLARRAY I)
DO (CL:SETF (CL:SVREF CSA (LOGAND (CAR P)
TRANSLATION-MASK))
(CDR P)))
(CL:SETF (CL:SVREF RTOLARRAY I)
CSA))
(* ;; "")
@@ -1285,14 +1284,15 @@
(DEFINEQ
(SHOWCHARS
[LAMBDA (FROMCHAR TOCHAR FONT) (* ; "Edited 1-Aug-2020 09:27 by rmk:")
[LAMBDA (FROMCHAR TOCHAR FONT) (* ; "Edited 26-Jan-2024 14:18 by mth")
(* ; "Edited 1-Aug-2020 09:27 by rmk:")
(RESETFORM (DSPFONT (OR FONT '(CLASSIC 12))
T)
(CL:WHEN (AND (SMALLP FROMCHAR)
(NOT TOCHAR))
(* ;;
 "If a small number, assume it's an octal (in decimal) character set, no need for string quotes")
 "If a small number, assume it's an octal (in decimal) character set, no need for string quotes")
(SETQ TOCHAR (CONCAT FROMCHAR "," 376))
(SETQ FROMCHAR (CONCAT FROMCHAR "," 41)))
@@ -1302,16 +1302,16 @@
(SETQ TOCHAR (CL:IF TOCHAR
(CHARCODE.DECODE TOCHAR)
FROMCHAR)))
(FOR C FROM FROMCHAR TO TOCHAR UNLESS (AND (IGEQ (LOGAND C 255)
127)
(ILEQ (LOGAND C 255)
(PLUS 128 33)))
DO (PRINTOUT T .P2 (CONCAT (OCTALSTRING (LRSH CODE 8))
","
(OCTALSTRING (LOGAND CODE 255)))
10
(CHARACTER C)
T])
(for C from FROMCHAR to TOCHAR unless (AND (IGEQ (LOGAND C 255)
127)
(ILEQ (LOGAND C 255)
(PLUS 128 33)))
do (PRINTOUT T .P2 (CONCAT (OCTALSTRING (LRSH C 8))
","
(OCTALSTRING (LOGAND C 255)))
10
(CHARACTER C)
T])
)
(DECLARE%: EVAL@COMPILE DONTCOPY
@@ -1338,17 +1338,19 @@
(TRANSLATION-MASK (SUB1 TRANSLATION-SEGMENT-SIZE)))
)
)
(PUTPROPS UNICODE FILETYPE :TCOMPL)
(DECLARE%: DONTCOPY
(FILEMAP (NIL (3950 18041 (UTF8.OUTCHARFN 3960 . 6791) (UTF8.INCCODEFN 6793 . 12283) (UTF8.PEEKCCODEFN
12285 . 17059) (\UTF8.BACKCCODEFN 17061 . 18039)) (18042 21823 (UTF16BE.OUTCHARFN 18052 . 18876) (
UTF16BE.INCCODEFN 18878 . 19777) (UTF16BE.PEEKCCODEFN 19779 . 20850) (\UTF16BE.BACKCCODEFN 20852 .
21821)) (21853 24134 (MAKE-UNICODE-FORMATS 21863 . 24132)) (24231 25537 (UNICODE.UNMAPPED 24241 .
25535)) (25538 26214 (XCCS-UTF8-AFTER-OPEN 25548 . 26212)) (27670 28019 (XTOUCODE 27680 . 27848) (
UTOXCODE 27850 . 28017)) (28059 44757 (READ-UNICODE-MAPPING-FILENAMES 28069 . 30519) (
READ-UNICODE-MAPPING 30521 . 33497) (WRITE-UNICODE-MAPPING 33499 . 37249) (WRITE-UNICODE-INCLUDED
37251 . 41973) (WRITE-UNICODE-MAPPING-HEADER 41975 . 43223) (WRITE-UNICODE-MAPPING-FILENAME 43225 .
44755)) (48071 56550 (MAKE-UNICODE-TRANSLATION-TABLES 48081 . 56548)) (57055 68253 (UTF-8.VALIDATE
57065 . 60067) (HEXSTRING 60069 . 61230) (UTF8HEXSTRING 61232 . 63437) (NUTF8CODEBYTES 63439 . 64392)
(NUTF8STRINGBYTES 64394 . 64875) (XTOUSTRING 64877 . 67888) (XCCSSTRING 67890 . 68251)) (68254 70058 (
\UTF8.FETCHCODE 68264 . 70056)) (70059 71528 (SHOWCHARS 70069 . 71526)))))
(FILEMAP (NIL (4111 18202 (UTF8.OUTCHARFN 4121 . 6952) (UTF8.INCCODEFN 6954 . 12444) (UTF8.PEEKCCODEFN
12446 . 17220) (\UTF8.BACKCCODEFN 17222 . 18200)) (18203 21984 (UTF16BE.OUTCHARFN 18213 . 19037) (
UTF16BE.INCCODEFN 19039 . 19938) (UTF16BE.PEEKCCODEFN 19940 . 21011) (\UTF16BE.BACKCCODEFN 21013 .
21982)) (22014 24295 (MAKE-UNICODE-FORMATS 22024 . 24293)) (24392 25698 (UNICODE.UNMAPPED 24402 .
25696)) (25699 26375 (XCCS-UTF8-AFTER-OPEN 25709 . 26373)) (27831 28180 (XTOUCODE 27841 . 28009) (
UTOXCODE 28011 . 28178)) (28220 45174 (READ-UNICODE-MAPPING-FILENAMES 28230 . 30936) (
READ-UNICODE-MAPPING 30938 . 33914) (WRITE-UNICODE-MAPPING 33916 . 37666) (WRITE-UNICODE-INCLUDED
37668 . 42390) (WRITE-UNICODE-MAPPING-HEADER 42392 . 43640) (WRITE-UNICODE-MAPPING-FILENAME 43642 .
45172)) (48488 56912 (MAKE-UNICODE-TRANSLATION-TABLES 48498 . 56910)) (57417 68615 (UTF-8.VALIDATE
57427 . 60429) (HEXSTRING 60431 . 61592) (UTF8HEXSTRING 61594 . 63799) (NUTF8CODEBYTES 63801 . 64754)
(NUTF8STRINGBYTES 64756 . 65237) (XTOUSTRING 65239 . 68250) (XCCSSTRING 68252 . 68613)) (68616 70420 (
\UTF8.FETCHCODE 68626 . 70418)) (70421 71931 (SHOWCHARS 70431 . 71929)))))
STOP

Binary file not shown.