Merge pull request #1518 from Interlisp/mth1--a-few-UNICODE-cleanups
A few fixes to UNICODE that I stumbled across.
This commit is contained in:
178
library/UNICODE
178
library/UNICODE
@@ -1,18 +1,20 @@
|
||||
(DEFINE-FILE-INFO PACKAGE "INTERLISP" READTABLE "INTERLISP" BASE 10)
|
||||
|
||||
(FILECREATED " 8-Jan-2024 10:58:06" {WMEDLEY}<library>UNICODE.;212 72240
|
||||
(FILECREATED "26-Jan-2024 14:19:50" {LIB}UNICODE.;4 72688
|
||||
|
||||
:EDIT-BY rmk
|
||||
:EDIT-BY "mth"
|
||||
|
||||
:CHANGES-TO (FNS NUTF8CODEBYTES)
|
||||
:CHANGES-TO (FNS MAKE-UNICODE-FORMATS MAKE-UNICODE-TRANSLATION-TABLES SHOWCHARS
|
||||
READ-UNICODE-MAPPING-FILENAMES)
|
||||
(VARS UNICODECOMS)
|
||||
|
||||
:PREVIOUS-DATE " 5-Jan-2024 17:25:29" {WMEDLEY}<library>UNICODE.;211)
|
||||
:PREVIOUS-DATE " 8-Jan-2024 10:58:06" {LIB}UNICODE.;1)
|
||||
|
||||
|
||||
(PRETTYCOMPRINT UNICODECOMS)
|
||||
|
||||
(RPAQQ UNICODECOMS
|
||||
[(COMS
|
||||
((COMS
|
||||
(* ;; "External formats")
|
||||
|
||||
(FNS UTF8.OUTCHARFN UTF8.INCCODEFN UTF8.PEEKCCODEFN \UTF8.BACKCCODEFN)
|
||||
@@ -61,7 +63,7 @@
|
||||
XCCSSTRING)
|
||||
(FNS \UTF8.FETCHCODE)
|
||||
(FNS SHOWCHARS)
|
||||
(DECLARE%: EVAL@COMPILE DONTCOPY (FILES (FROM LOADUPS)
|
||||
[DECLARE%: EVAL@COMPILE DONTCOPY (FILES (FROM LOADUPS)
|
||||
EXPORTS.ALL)
|
||||
|
||||
(* ;; "These control the layout of the translation tables. Since many of the upper panels are sparse, doing it per-panel (128) seems more space-efficient, and residual alists can be shorter")
|
||||
@@ -70,7 +72,9 @@
|
||||
(MAX-ALIST-LENGTH 10)
|
||||
(N-TRANSLATION-SEGMENTS (IQUOTIENT 65536 TRANSLATION-SEGMENT-SIZE))
|
||||
(TRANSLATION-SHIFT (INTEGERLENGTH (SUB1 TRANSLATION-SEGMENT-SIZE)))
|
||||
(TRANSLATION-MASK (SUB1 TRANSLATION-SEGMENT-SIZE])
|
||||
(TRANSLATION-MASK (SUB1 TRANSLATION-SEGMENT-SIZE]
|
||||
(PROP (FILETYPE)
|
||||
UNICODE)))
|
||||
|
||||
|
||||
|
||||
@@ -528,16 +532,19 @@
|
||||
(DEFINEQ
|
||||
|
||||
(READ-UNICODE-MAPPING-FILENAMES
|
||||
[LAMBDA (FILESPEC DIRS) (* ; "Edited 5-Jan-2024 17:24 by rmk")
|
||||
[LAMBDA (FILESPEC DIRS) (* ; "Edited 26-Jan-2024 14:02 by mth")
|
||||
(* ; "Edited 5-Jan-2024 17:24 by rmk")
|
||||
(* ; "Edited 5-Aug-2020 15:59 by kaplan")
|
||||
(* ; "Edited 4-Aug-2020 17:31 by rmk:")
|
||||
(DECLARE (USEDFREE UNICODEDIRECTORIES XCCS-SET-NAMES))
|
||||
(CL:UNLESS DIRS (SETQ DIRS UNICODEDIRECTORIES))
|
||||
(FOR F X CSI INSIDE FILESPEC JOIN
|
||||
(* ;;
|
||||
"Last case hopes to pick up tables that are gruped together in a subdirectory (e.g. JIS)")
|
||||
|
||||
(OR (FINDFILE (PACKFILENAME 'BODY F 'EXTENSION 'TXT)
|
||||
T DIRS)
|
||||
(OR (MKLIST (FINDFILE (PACKFILENAME 'BODY F 'EXTENSION
|
||||
'TXT)
|
||||
T DIRS))
|
||||
(for D inside DIRS
|
||||
when (SETQ D (FILDIR (PACKFILENAME 'NAME
|
||||
(CONCAT "XCCS-*=" F)
|
||||
@@ -876,7 +883,7 @@
|
||||
(DEFINEQ
|
||||
|
||||
(MAKE-UNICODE-TRANSLATION-TABLES
|
||||
[LAMBDA (MAPPING LTORVAR RTOLVAR) (* ; "Edited 21-Aug-2021 13:12 by rmk:")
|
||||
[LAMBDA (MAPPING LTORVAR RTOLVAR) (* ; "Edited 21-Aug-2021 13:12 by rmk:")
|
||||
(* ; "Edited 17-Aug-2020 08:46 by rmk:")
|
||||
|
||||
(* ;; "MAPPING is the list of numeric code correspondence pairs constructed by applying READ-UNICODE-MAPPING to a Unicode mapping file.")
|
||||
@@ -902,7 +909,7 @@
|
||||
(* ;; "")
|
||||
|
||||
(* ;;
|
||||
"An unmapped XCCS character is assigned a code in the %"private use%" code blocks 0xE000-F8FF")
|
||||
"An unmapped XCCS character is assigned a code in the %"private use%" code blocks 0xE000-F8FF")
|
||||
|
||||
(* ;; "")
|
||||
|
||||
@@ -918,75 +925,67 @@
|
||||
(* ;; "The left-to-right direction (into Unicode). We start by distributing the mappings into alists in arrays indexed by the higher-order (charaset set byte). The second loop converts long alists into arrays.")
|
||||
|
||||
[FOR M LEFTC RBASE RCODES IN MAPPING EACHTIME (SETQ RCODES (CDR M))
|
||||
(SETQ RBASE (CAR RCODES))
|
||||
(SETQ RBASE (CAR RCODES))
|
||||
UNLESS (IGEQ RBASE MISSINGCODE) DO (SETQ LEFTC (CAR M))
|
||||
|
||||
(* ;;
|
||||
"(CDR RCODES) contains combiners on the base")
|
||||
(* ;; "(CDR RCODES) contains combiners on the base")
|
||||
|
||||
(CL:PUSH (CONS (LOGAND LEFTC TRANSLATION-MASK)
|
||||
(CL:IF (CDR RCODES)
|
||||
RCODES
|
||||
RBASE))
|
||||
(CL:SVREF LTORARRAY (LRSH LEFTC
|
||||
TRANSLATION-SHIFT
|
||||
]
|
||||
(CL:PUSH (CONS (LOGAND LEFTC TRANSLATION-MASK)
|
||||
(CL:IF (CDR RCODES)
|
||||
RCODES
|
||||
RBASE))
|
||||
(CL:SVREF LTORARRAY (LRSH LEFTC
|
||||
TRANSLATION-SHIFT]
|
||||
(FOR I CSA FROM 0 TO (SUB1 N-TRANSLATION-SEGMENTS)
|
||||
WHEN (IGREATERP (LENGTH (CL:SVREF LTORARRAY I))
|
||||
MAX-ALIST-LENGTH)
|
||||
DO
|
||||
MAX-ALIST-LENGTH) DO
|
||||
(* ;; "Leave it alone if the alist is short")
|
||||
|
||||
(* ;; "Leave it alone if the alist is short")
|
||||
|
||||
(SETQ CSA (CL:MAKE-ARRAY TRANSLATION-SEGMENT-SIZE :INITIAL-ELEMENT NIL))
|
||||
(FOR P IN (CL:SVREF LTORARRAY I)
|
||||
DO (CL:SETF (CL:SVREF CSA (LOGAND (CAR P)
|
||||
TRANSLATION-MASK))
|
||||
(CDR P)))
|
||||
(CL:SETF (CL:SVREF LTORARRAY I)
|
||||
CSA))
|
||||
(SETQ CSA (CL:MAKE-ARRAY TRANSLATION-SEGMENT-SIZE
|
||||
:INITIAL-ELEMENT NIL))
|
||||
(FOR P IN (CL:SVREF LTORARRAY I)
|
||||
DO (CL:SETF (CL:SVREF CSA (LOGAND (CAR P)
|
||||
TRANSLATION-MASK))
|
||||
(CDR P)))
|
||||
(CL:SETF (CL:SVREF LTORARRAY I)
|
||||
CSA))
|
||||
|
||||
(* ;; "")
|
||||
|
||||
(* ;; "Now the right-to-left direction (from Unicode). Here we have to detect and compensate for ambiguity.")
|
||||
|
||||
(FOR M LEFTC RBASE RCOMBINERS PREV IN MAPPING EACHTIME (SETQ RBASE (CADR M))
|
||||
(SETQ RCOMBINERS (CDDR M))
|
||||
(SETQ RCOMBINERS (CDDR M))
|
||||
UNLESS (OR (IGEQ RBASE MISSINGCODE)
|
||||
RCOMBINERS) DO
|
||||
RCOMBINERS) DO
|
||||
(* ;;
|
||||
"Have we already seen an explicit mapping from right to left?")
|
||||
|
||||
(* ;;
|
||||
"Have we already seen an explicit mapping from right to left?")
|
||||
|
||||
(SETQ LEFTC (CAR M))
|
||||
[SETQ PREV (ASSOC (LOGAND RBASE TRANSLATION-MASK)
|
||||
(CL:SVREF RTOLARRAY (LRSH RBASE
|
||||
TRANSLATION-SHIFT
|
||||
]
|
||||
(IF (NULL PREV)
|
||||
THEN (CL:PUSH (CONS (LOGAND RBASE
|
||||
TRANSLATION-MASK)
|
||||
LEFTC)
|
||||
(CL:SVREF RTOLARRAY (LRSH RBASE
|
||||
TRANSLATION-SHIFT
|
||||
)))
|
||||
ELSEIF (IGREATERP (CDR PREV)
|
||||
LEFTC)
|
||||
THEN (RPLACD PREV LEFTC)))
|
||||
(SETQ LEFTC (CAR M))
|
||||
[SETQ PREV (ASSOC (LOGAND RBASE TRANSLATION-MASK)
|
||||
(CL:SVREF RTOLARRAY (LRSH RBASE
|
||||
TRANSLATION-SHIFT]
|
||||
(IF (NULL PREV)
|
||||
THEN (CL:PUSH (CONS (LOGAND RBASE TRANSLATION-MASK)
|
||||
LEFTC)
|
||||
(CL:SVREF RTOLARRAY (LRSH RBASE
|
||||
TRANSLATION-SHIFT)))
|
||||
ELSEIF (IGREATERP (CDR PREV)
|
||||
LEFTC)
|
||||
THEN (RPLACD PREV LEFTC)))
|
||||
(FOR I CSA FROM 0 TO (SUB1 N-TRANSLATION-SEGMENTS)
|
||||
WHEN (IGREATERP (LENGTH (CL:SVREF RTOLARRAY I))
|
||||
MAX-ALIST-LENGTH)
|
||||
DO
|
||||
MAX-ALIST-LENGTH) DO
|
||||
(* ;; "Long list, make an array")
|
||||
|
||||
(* ;; "Long list, make an array")
|
||||
|
||||
(SETQ CSA (CL:MAKE-ARRAY TRANSLATION-SEGMENT-SIZE :INITIAL-ELEMENT NIL))
|
||||
(FOR P IN (CL:SVREF RTOLARRAY I)
|
||||
DO (CL:SETF (CL:SVREF CSA (LOGAND (CAR P)
|
||||
TRANSLATION-MASK))
|
||||
(CDR P)))
|
||||
(CL:SETF (CL:SVREF RTOLARRAY I)
|
||||
CSA))
|
||||
(SETQ CSA (CL:MAKE-ARRAY TRANSLATION-SEGMENT-SIZE
|
||||
:INITIAL-ELEMENT NIL))
|
||||
(FOR P IN (CL:SVREF RTOLARRAY I)
|
||||
DO (CL:SETF (CL:SVREF CSA (LOGAND (CAR P)
|
||||
TRANSLATION-MASK))
|
||||
(CDR P)))
|
||||
(CL:SETF (CL:SVREF RTOLARRAY I)
|
||||
CSA))
|
||||
|
||||
(* ;; "")
|
||||
|
||||
@@ -1285,14 +1284,15 @@
|
||||
(DEFINEQ
|
||||
|
||||
(SHOWCHARS
|
||||
[LAMBDA (FROMCHAR TOCHAR FONT) (* ; "Edited 1-Aug-2020 09:27 by rmk:")
|
||||
[LAMBDA (FROMCHAR TOCHAR FONT) (* ; "Edited 26-Jan-2024 14:18 by mth")
|
||||
(* ; "Edited 1-Aug-2020 09:27 by rmk:")
|
||||
(RESETFORM (DSPFONT (OR FONT '(CLASSIC 12))
|
||||
T)
|
||||
(CL:WHEN (AND (SMALLP FROMCHAR)
|
||||
(NOT TOCHAR))
|
||||
|
||||
(* ;;
|
||||
"If a small number, assume it's an octal (in decimal) character set, no need for string quotes")
|
||||
"If a small number, assume it's an octal (in decimal) character set, no need for string quotes")
|
||||
|
||||
(SETQ TOCHAR (CONCAT FROMCHAR "," 376))
|
||||
(SETQ FROMCHAR (CONCAT FROMCHAR "," 41)))
|
||||
@@ -1302,16 +1302,16 @@
|
||||
(SETQ TOCHAR (CL:IF TOCHAR
|
||||
(CHARCODE.DECODE TOCHAR)
|
||||
FROMCHAR)))
|
||||
(FOR C FROM FROMCHAR TO TOCHAR UNLESS (AND (IGEQ (LOGAND C 255)
|
||||
127)
|
||||
(ILEQ (LOGAND C 255)
|
||||
(PLUS 128 33)))
|
||||
DO (PRINTOUT T .P2 (CONCAT (OCTALSTRING (LRSH CODE 8))
|
||||
","
|
||||
(OCTALSTRING (LOGAND CODE 255)))
|
||||
10
|
||||
(CHARACTER C)
|
||||
T])
|
||||
(for C from FROMCHAR to TOCHAR unless (AND (IGEQ (LOGAND C 255)
|
||||
127)
|
||||
(ILEQ (LOGAND C 255)
|
||||
(PLUS 128 33)))
|
||||
do (PRINTOUT T .P2 (CONCAT (OCTALSTRING (LRSH C 8))
|
||||
","
|
||||
(OCTALSTRING (LOGAND C 255)))
|
||||
10
|
||||
(CHARACTER C)
|
||||
T])
|
||||
)
|
||||
(DECLARE%: EVAL@COMPILE DONTCOPY
|
||||
|
||||
@@ -1338,17 +1338,19 @@
|
||||
(TRANSLATION-MASK (SUB1 TRANSLATION-SEGMENT-SIZE)))
|
||||
)
|
||||
)
|
||||
|
||||
(PUTPROPS UNICODE FILETYPE :TCOMPL)
|
||||
(DECLARE%: DONTCOPY
|
||||
(FILEMAP (NIL (3950 18041 (UTF8.OUTCHARFN 3960 . 6791) (UTF8.INCCODEFN 6793 . 12283) (UTF8.PEEKCCODEFN
|
||||
12285 . 17059) (\UTF8.BACKCCODEFN 17061 . 18039)) (18042 21823 (UTF16BE.OUTCHARFN 18052 . 18876) (
|
||||
UTF16BE.INCCODEFN 18878 . 19777) (UTF16BE.PEEKCCODEFN 19779 . 20850) (\UTF16BE.BACKCCODEFN 20852 .
|
||||
21821)) (21853 24134 (MAKE-UNICODE-FORMATS 21863 . 24132)) (24231 25537 (UNICODE.UNMAPPED 24241 .
|
||||
25535)) (25538 26214 (XCCS-UTF8-AFTER-OPEN 25548 . 26212)) (27670 28019 (XTOUCODE 27680 . 27848) (
|
||||
UTOXCODE 27850 . 28017)) (28059 44757 (READ-UNICODE-MAPPING-FILENAMES 28069 . 30519) (
|
||||
READ-UNICODE-MAPPING 30521 . 33497) (WRITE-UNICODE-MAPPING 33499 . 37249) (WRITE-UNICODE-INCLUDED
|
||||
37251 . 41973) (WRITE-UNICODE-MAPPING-HEADER 41975 . 43223) (WRITE-UNICODE-MAPPING-FILENAME 43225 .
|
||||
44755)) (48071 56550 (MAKE-UNICODE-TRANSLATION-TABLES 48081 . 56548)) (57055 68253 (UTF-8.VALIDATE
|
||||
57065 . 60067) (HEXSTRING 60069 . 61230) (UTF8HEXSTRING 61232 . 63437) (NUTF8CODEBYTES 63439 . 64392)
|
||||
(NUTF8STRINGBYTES 64394 . 64875) (XTOUSTRING 64877 . 67888) (XCCSSTRING 67890 . 68251)) (68254 70058 (
|
||||
\UTF8.FETCHCODE 68264 . 70056)) (70059 71528 (SHOWCHARS 70069 . 71526)))))
|
||||
(FILEMAP (NIL (4111 18202 (UTF8.OUTCHARFN 4121 . 6952) (UTF8.INCCODEFN 6954 . 12444) (UTF8.PEEKCCODEFN
|
||||
12446 . 17220) (\UTF8.BACKCCODEFN 17222 . 18200)) (18203 21984 (UTF16BE.OUTCHARFN 18213 . 19037) (
|
||||
UTF16BE.INCCODEFN 19039 . 19938) (UTF16BE.PEEKCCODEFN 19940 . 21011) (\UTF16BE.BACKCCODEFN 21013 .
|
||||
21982)) (22014 24295 (MAKE-UNICODE-FORMATS 22024 . 24293)) (24392 25698 (UNICODE.UNMAPPED 24402 .
|
||||
25696)) (25699 26375 (XCCS-UTF8-AFTER-OPEN 25709 . 26373)) (27831 28180 (XTOUCODE 27841 . 28009) (
|
||||
UTOXCODE 28011 . 28178)) (28220 45174 (READ-UNICODE-MAPPING-FILENAMES 28230 . 30936) (
|
||||
READ-UNICODE-MAPPING 30938 . 33914) (WRITE-UNICODE-MAPPING 33916 . 37666) (WRITE-UNICODE-INCLUDED
|
||||
37668 . 42390) (WRITE-UNICODE-MAPPING-HEADER 42392 . 43640) (WRITE-UNICODE-MAPPING-FILENAME 43642 .
|
||||
45172)) (48488 56912 (MAKE-UNICODE-TRANSLATION-TABLES 48498 . 56910)) (57417 68615 (UTF-8.VALIDATE
|
||||
57427 . 60429) (HEXSTRING 60431 . 61592) (UTF8HEXSTRING 61594 . 63799) (NUTF8CODEBYTES 63801 . 64754)
|
||||
(NUTF8STRINGBYTES 64756 . 65237) (XTOUSTRING 65239 . 68250) (XCCSSTRING 68252 . 68613)) (68616 70420 (
|
||||
\UTF8.FETCHCODE 68626 . 70418)) (70421 71931 (SHOWCHARS 70431 . 71929)))))
|
||||
STOP
|
||||
|
||||
Binary file not shown.
Reference in New Issue
Block a user