;;; -*- LISP -*- ;;; This is KMP's Word Parsing Package. ;;; ;;; The only user function is PARSE$MAKE-WORDS(Char-List) which ;;; will take a list of ascii characters and convert them into ;;; a list of words (each punctuating object being treated as a ;;; word. ;;; ;;; Supporting functions defined are: ;;; ;;; Definition Predicate Other ;;; ;;; PARSE$PUNCTUATION PARSE$PUNCTUATION? ;;; PARSE$DELIMITER PARSE$DELIMITER? ;;; PARSE$SPECIAL-CHAR PARSE$SPECIAL-CHAR? ;;; PARSE$STRAY-CHAR? ;;; PARSE$QUOTE PARSE$QUOTE? ;;; PARSE$CAPS ;;; PARSE$ALPHABETIC PARSE$ALPHABETIC? ;;; PARSE$NUMERIC? ;;; Turn of load messages (SSTATUS FEATURE NOLDMSG) ;;;;;;;;;;;;;;;;;;;;;;;;;;; Standard Predicates ;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; PARSE$CAPS ;;; Returns ascii->ascii or numeric->numeric capitalized character. (DEFUN PARSE$CAPS (X) (COND ((NUMBERP X) (COND ((AND (> X 96.) (< X 123.)) (- X 32.)) (T X))) (T (ASCII (PARSE$CAPS (GETCHARN X 1.)))))) ;;; PARSE$ALPHABETIC? ;;; Predicate returns T if arg represents an alpha character. Accepts ;;; ascii or numeric arg. (DEFUN PARSE$ALPHABETIC? (C) (COND ((NUMBERP C) (AND (> C 64.) (< C 91.))) ; A <= C <= Z (T (PARSE$ALPHABETIC? (GETCHARN C 1.))))) ;;; PARSE$DIGIT? ;;; Predicate returns T if arg represents a digital character. Accepts ;;; ascii or numeric arg. (DEFUN PARSE$DIGIT? (N) (COND ((NUMBERP N) (AND (> N 47.) (< N 58.))) ; 0 <= N <= 9 (T (PARSE$DIGIT? (GETCHARN N 1.))))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Punctuation ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; PARSE$PUNCTUATION? ;;; Is [ascii] character punctuation? (DEFUN PARSE$PUNCTUATION? (X) (GET X 'PARSE$PUNCTUATION)) ;;; PARSE$PUNCTUATION ;;; Make [ascii] character into punctuation. (DEFUN PARSE$PUNCTUATION (X) (PUTPROP X T 'PARSE$PUNCTUATION)) ;;; PARSE$QUOTE? ;;; Is [ascii] character a quotation designator? (DEFUN PARSE$QUOTE? (X) (GET X 'PARSE$QUOTE)) ;;; PARSE$QUOTE ;;; Make [ascii] character into quotation designator. (DEFUN PARSE$QUOTE (X) (PUTPROP X T 'PARSE$QUOTE)) ;;; Make these chars into delimiters (single character objects) (MAPC 'PARSE$PUNCTUATION (LIST (ASCII 33.) ; '|''| ; (Pseudo-punctuation generated below) (ASCII 40.) ; (ASCII 41.) ; (ASCII 44.) ; '-- ; (Pseudo-punctuation generated below) (ASCII 46.) ; (ASCII 58.) ; (ASCII 59.) ; (ASCII 63.) ; (ASCII 91.) ; (ASCII 93.))); ;;;;;;;;;;;;;;;;;;;;;;;;;;; Other Delimiters ;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; PARSE$DELIMITER? ;;; Is [ascii] character a delimiter? (DEFUN PARSE$DELIMITER? (X) (OR (PARSE$PUNCTUATION? X) (GET X 'PARSE$DELIMITER))) ;;; PARSE$DELIMITER ;;; Make [ascii] character a delimiter. (DEFUN PARSE$DELIMITER (X) (PUTPROP X T 'PARSE$DELIMITER)) ;;; Make these characters into delimiters (white space) (MAPC 'PARSE$DELIMITER (LIST (ASCII 9.) ; (ASCII 10.) ; (ASCII 13.) ; (ASCII 32.))); ;;;;;;;;;;;;;;;;;;;;;;;;;;; Special Characters ;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; PARSE$SPECIAL-CHAR? ;;; Is [ascii] character a special (legal) character? (DEFUN PARSE$SPECIAL-CHAR? (X) (GET X 'PARSE$SPECIAL-CHAR)) ;;; PARSE$SPECIAL-CHAR ;;; Make [ascii] character into a special character. (DEFUN PARSE$SPECIAL-CHAR (X) (PUTPROP X T 'PARSE$SPECIAL-CHAR)) ;;; Make and a speical char (to be treated like ;;; an alphabetic character). (PARSE$SPECIAL-CHAR '/') (PARSE$SPECIAL-CHAR '-) ;;; PARSE$FUNNY-CHAR? ;;; A printing ascii char, but not a commonly seen one. (DEFUN PARSE$FUNNY-CHAR? (X) (GET X 'PARSE$FUNNY-CHAR)) ;;; Set up FUNNY-CHAR definitions (DO ((I 33. (1+ I))) ((> I 126.)) (LET ((X (ASCII I))) (AND (NOT (GET X 'PARSE$FUNNY-CHAR)) (NOT (PARSE$PUNCTUATION? X)) (NOT (PARSE$DELIMITER? X)) (NOT (PARSE$ALPHABETIC? X)) (NOT (PARSE$DIGIT? X)) (NOT (PARSE$SPECIAL-CHAR? X)) (PUTPROP X T 'PARSE$FUNNY-CHAR)))) ;;; PARSE$STRAY-CHAR? ;;; Is [ascii] character a random character of unknown type? (DEFUN PARSE$STRAY-CHAR? (X) (NOT (OR (PARSE$DELIMITER? X) (PARSE$PUNCTUATION? X) (PARSE$FUNNY-CHAR? X) (PARSE$ALPHABETIC? X) (PARSE$SPECIAL-CHAR? X) (PARSE$DIGIT? X)))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Main Word Parser ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; PARSE$MAKE-WORDS ;;; Take a list of [ascii] characters and return a list of atoms that can ;;; be made from those characters. (DEFUN PARSE$MAKE-WORDS (CHAR-LIST) (DO ((C CHAR-LIST (CDR C)) (CHAR) (WORD NIL) (SENT NIL)) ((NULL C) (COND (WORD (NREVERSE (CONS (IMPLODE (NREVERSE WORD)) SENT))) (T (NREVERSE SENT)))) (SETQ CHAR (PARSE$CAPS (CAR C))) (COND ((AND (EQ CHAR '-) (EQ (CADR C) '-)) (SETQ C (CDR C)) ; Gobble second - (SETQ CHAR '--)) ; Join hyphens to a dash ((AND (NOT WORD) (PARSE$QUOTE? CHAR)) (DO () ((NOT (PARSE$QUOTE? (CAR C)))) (POP C)) ; Strip quotes (DO () ((OR (NULL C) (PARSE$QUOTE? (CAR C)))) (PUSH (CAR C) WORD) (POP C)) (DO () ((NOT (PARSE$QUOTE? (CADR C)))) (POP C)); Strip quotes (PUSH '|``| SENT) (PUSH (NCONS (IMPLODE (SUBST '| | (ASCII 13.) (NREVERSE WORD)))) SENT) (PUSH '|''| SENT) (SETQ WORD NIL) (SETQ CHAR '*DUMMY*))) ; This won't get cons'd in (COND ((PARSE$STRAY-CHAR? CHAR) (COMMENT IGNORE IT)) ((AND (EQ CHAR '-) (EQ (CADR C) (ASCII 13.))) (COMMENT IGNORE HYPHEN -- GOBBLE ) (SETQ C (CDR C))) ((OR (PARSE$PUNCTUATION? CHAR) (PARSE$FUNNY-CHAR? CHAR)) (COND (WORD (SETQ SENT (CONS CHAR (CONS (IMPLODE (NREVERSE WORD)) SENT))) (SETQ WORD NIL)) (T (SETQ SENT (CONS CHAR SENT))))) ((PARSE$DELIMITER? CHAR) (COND (WORD (SETQ SENT (CONS (IMPLODE (NREVERSE WORD)) SENT)) (SETQ WORD NIL)))) ((OR (PARSE$ALPHABETIC? CHAR) (PARSE$SPECIAL-CHAR? CHAR) (PARSE$DIGIT? CHAR)) (SETQ WORD (CONS CHAR WORD))))))