mirror of
https://github.com/PDP-10/its.git
synced 2026-01-16 16:28:40 +00:00
227 lines
6.1 KiB
Common Lisp
227 lines
6.1 KiB
Common Lisp
;;; -*- LISP -*-
|
||
|
||
;;; This is KMP's Word Parsing Package.
|
||
;;;
|
||
;;; The only user function is PARSE$MAKE-WORDS(Char-List) which
|
||
;;; will take a list of ascii characters and convert them into
|
||
;;; a list of words (each punctuating object being treated as a
|
||
;;; word.
|
||
;;;
|
||
;;; Supporting functions defined are:
|
||
;;;
|
||
;;; Definition Predicate Other
|
||
;;;
|
||
;;; PARSE$PUNCTUATION PARSE$PUNCTUATION?
|
||
;;; PARSE$DELIMITER PARSE$DELIMITER?
|
||
;;; PARSE$SPECIAL-CHAR PARSE$SPECIAL-CHAR?
|
||
;;; PARSE$STRAY-CHAR?
|
||
;;; PARSE$QUOTE PARSE$QUOTE?
|
||
;;; PARSE$CAPS
|
||
;;; PARSE$ALPHABETIC PARSE$ALPHABETIC?
|
||
;;; PARSE$NUMERIC?
|
||
|
||
;;; Turn of load messages
|
||
|
||
(SSTATUS FEATURE NOLDMSG)
|
||
|
||
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;; Standard Predicates ;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
||
;;; PARSE$CAPS
|
||
;;; Returns ascii->ascii or numeric->numeric capitalized character.
|
||
|
||
(DEFUN PARSE$CAPS (X)
|
||
(COND ((NUMBERP X)
|
||
(COND ((AND (> X 96.) (< X 123.)) (- X 32.))
|
||
(T X)))
|
||
(T
|
||
(ASCII (PARSE$CAPS (GETCHARN X 1.))))))
|
||
|
||
;;; PARSE$ALPHABETIC?
|
||
;;; Predicate returns T if arg represents an alpha character. Accepts
|
||
;;; ascii or numeric arg.
|
||
|
||
(DEFUN PARSE$ALPHABETIC? (C)
|
||
(COND ((NUMBERP C) (AND (> C 64.) (< C 91.))) ; A <= C <= Z
|
||
(T (PARSE$ALPHABETIC? (GETCHARN C 1.)))))
|
||
|
||
;;; PARSE$DIGIT?
|
||
;;; Predicate returns T if arg represents a digital character. Accepts
|
||
;;; ascii or numeric arg.
|
||
|
||
(DEFUN PARSE$DIGIT? (N)
|
||
(COND ((NUMBERP N) (AND (> N 47.) (< N 58.))) ; 0 <= N <= 9
|
||
(T (PARSE$DIGIT? (GETCHARN N 1.)))))
|
||
|
||
|
||
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Punctuation ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
||
;;; PARSE$PUNCTUATION?
|
||
;;; Is [ascii] character punctuation?
|
||
|
||
(DEFUN PARSE$PUNCTUATION? (X) (GET X 'PARSE$PUNCTUATION))
|
||
|
||
;;; PARSE$PUNCTUATION
|
||
;;; Make [ascii] character into punctuation.
|
||
|
||
(DEFUN PARSE$PUNCTUATION (X) (PUTPROP X T 'PARSE$PUNCTUATION))
|
||
|
||
;;; PARSE$QUOTE?
|
||
;;; Is [ascii] character a quotation designator?
|
||
|
||
(DEFUN PARSE$QUOTE? (X) (GET X 'PARSE$QUOTE))
|
||
|
||
;;; PARSE$QUOTE
|
||
;;; Make [ascii] character into quotation designator.
|
||
|
||
(DEFUN PARSE$QUOTE (X) (PUTPROP X T 'PARSE$QUOTE))
|
||
|
||
;;; Make these chars into delimiters (single character objects)
|
||
|
||
(MAPC 'PARSE$PUNCTUATION
|
||
(LIST (ASCII 33.) ; <Exclamation Mark>
|
||
'|''| ; <Quote Marks> (Pseudo-punctuation generated below)
|
||
(ASCII 40.) ; <Left-Parenthesis>
|
||
(ASCII 41.) ; <Right-Parenthesis>
|
||
(ASCII 44.) ; <Comma>
|
||
'-- ; <Dash> (Pseudo-punctuation generated below)
|
||
(ASCII 46.) ; <Period>
|
||
(ASCII 58.) ; <Colon>
|
||
(ASCII 59.) ; <Semi-colon>
|
||
(ASCII 63.) ; <Question-mark>
|
||
(ASCII 91.) ; <Right-Bracket>
|
||
(ASCII 93.))); <Left-Bracket>
|
||
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;; Other Delimiters ;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
||
;;; PARSE$DELIMITER?
|
||
;;; Is [ascii] character a delimiter?
|
||
|
||
(DEFUN PARSE$DELIMITER? (X)
|
||
(OR (PARSE$PUNCTUATION? X) (GET X 'PARSE$DELIMITER)))
|
||
|
||
;;; PARSE$DELIMITER
|
||
;;; Make [ascii] character a delimiter.
|
||
|
||
(DEFUN PARSE$DELIMITER (X) (PUTPROP X T 'PARSE$DELIMITER))
|
||
|
||
;;; Make these characters into delimiters (white space)
|
||
|
||
(MAPC 'PARSE$DELIMITER
|
||
(LIST (ASCII 9.) ; <Tab>
|
||
(ASCII 10.) ; <Linefeed>
|
||
(ASCII 13.) ; <Carriage Return>
|
||
(ASCII 32.))); <Space>
|
||
|
||
|
||
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;; Special Characters ;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
||
;;; PARSE$SPECIAL-CHAR?
|
||
;;; Is [ascii] character a special (legal) character?
|
||
|
||
(DEFUN PARSE$SPECIAL-CHAR? (X) (GET X 'PARSE$SPECIAL-CHAR))
|
||
|
||
;;; PARSE$SPECIAL-CHAR
|
||
;;; Make [ascii] character into a special character.
|
||
|
||
(DEFUN PARSE$SPECIAL-CHAR (X) (PUTPROP X T 'PARSE$SPECIAL-CHAR))
|
||
|
||
;;; Make <Single Quote> and <Hyphen> a speical char (to be treated like
|
||
;;; an alphabetic character).
|
||
|
||
(PARSE$SPECIAL-CHAR '/')
|
||
(PARSE$SPECIAL-CHAR '-)
|
||
|
||
;;; PARSE$FUNNY-CHAR?
|
||
;;; A printing ascii char, but not a commonly seen one.
|
||
|
||
(DEFUN PARSE$FUNNY-CHAR? (X) (GET X 'PARSE$FUNNY-CHAR))
|
||
|
||
;;; Set up FUNNY-CHAR definitions
|
||
|
||
(DO ((I 33. (1+ I)))
|
||
((> I 126.))
|
||
(LET ((X (ASCII I)))
|
||
(AND (NOT (GET X 'PARSE$FUNNY-CHAR))
|
||
(NOT (PARSE$PUNCTUATION? X))
|
||
(NOT (PARSE$DELIMITER? X))
|
||
(NOT (PARSE$ALPHABETIC? X))
|
||
(NOT (PARSE$DIGIT? X))
|
||
(NOT (PARSE$SPECIAL-CHAR? X))
|
||
(PUTPROP X T 'PARSE$FUNNY-CHAR))))
|
||
|
||
;;; PARSE$STRAY-CHAR?
|
||
;;; Is [ascii] character a random character of unknown type?
|
||
|
||
(DEFUN PARSE$STRAY-CHAR? (X)
|
||
(NOT (OR (PARSE$DELIMITER? X)
|
||
(PARSE$PUNCTUATION? X)
|
||
(PARSE$FUNNY-CHAR? X)
|
||
(PARSE$ALPHABETIC? X)
|
||
(PARSE$SPECIAL-CHAR? X)
|
||
(PARSE$DIGIT? X))))
|
||
|
||
|
||
|
||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Main Word Parser ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||
|
||
;;; PARSE$MAKE-WORDS
|
||
;;; Take a list of [ascii] characters and return a list of atoms that can
|
||
;;; be made from those characters.
|
||
|
||
(DEFUN PARSE$MAKE-WORDS (CHAR-LIST)
|
||
(DO ((C CHAR-LIST (CDR C))
|
||
(CHAR)
|
||
(WORD NIL)
|
||
(SENT NIL))
|
||
((NULL C)
|
||
(COND (WORD (NREVERSE (CONS (IMPLODE (NREVERSE WORD)) SENT)))
|
||
(T (NREVERSE SENT))))
|
||
(SETQ CHAR (PARSE$CAPS (CAR C)))
|
||
(COND ((AND (EQ CHAR '-)
|
||
(EQ (CADR C) '-))
|
||
(SETQ C (CDR C)) ; Gobble second -
|
||
(SETQ CHAR '--)) ; Join hyphens to a dash
|
||
((AND (NOT WORD)
|
||
(PARSE$QUOTE? CHAR))
|
||
(DO () ((NOT (PARSE$QUOTE? (CAR C)))) (POP C)) ; Strip quotes
|
||
(DO () ((OR (NULL C) (PARSE$QUOTE? (CAR C))))
|
||
(PUSH (CAR C) WORD)
|
||
(POP C))
|
||
(DO () ((NOT (PARSE$QUOTE? (CADR C)))) (POP C)); Strip quotes
|
||
(PUSH '|``| SENT)
|
||
(PUSH (NCONS (IMPLODE
|
||
(SUBST '| | (ASCII 13.) (NREVERSE WORD))))
|
||
SENT)
|
||
(PUSH '|''| SENT)
|
||
(SETQ WORD NIL)
|
||
(SETQ CHAR '*DUMMY*))) ; This won't get cons'd in
|
||
(COND ((PARSE$STRAY-CHAR? CHAR)
|
||
(COMMENT IGNORE IT))
|
||
((AND (EQ CHAR '-)
|
||
(EQ (CADR C) (ASCII 13.)))
|
||
(COMMENT IGNORE HYPHEN -- GOBBLE <CR>)
|
||
(SETQ C (CDR C)))
|
||
((OR (PARSE$PUNCTUATION? CHAR)
|
||
(PARSE$FUNNY-CHAR? CHAR))
|
||
(COND (WORD
|
||
(SETQ SENT
|
||
(CONS CHAR
|
||
(CONS (IMPLODE (NREVERSE WORD))
|
||
SENT)))
|
||
(SETQ WORD NIL))
|
||
(T
|
||
(SETQ SENT (CONS CHAR SENT)))))
|
||
((PARSE$DELIMITER? CHAR)
|
||
(COND
|
||
(WORD (SETQ SENT (CONS (IMPLODE (NREVERSE WORD)) SENT))
|
||
(SETQ WORD NIL))))
|
||
((OR (PARSE$ALPHABETIC? CHAR)
|
||
(PARSE$SPECIAL-CHAR? CHAR)
|
||
(PARSE$DIGIT? CHAR))
|
||
(SETQ WORD (CONS CHAR WORD))))))
|
||
|
||
|