Improve parsing of symbols.

In particular, don't confuse `4..` with a symbol.
This commit is contained in:
Olaf Seibert 2022-06-04 21:07:10 +02:00
parent 8d7414d7c3
commit ff183c0e36
5 changed files with 118 additions and 16 deletions

4
TODO
View File

@ -4,10 +4,6 @@ listing format errors: ignore whitespace of input
documentation: print supported directives
register symbols: %3+1 is the same as %4 (pdf page 3-9 aka 35),
but where precisely register symbols can be used, and how,
isn't specified.
---------------------------------------
I was not able to locate a Macro-11 language reference manual any more
recent than for RT11 version *3*, so I used that plus my recollection

23
parse.c
View File

@ -743,23 +743,23 @@ char *get_symbol(
{
int len;
char *symcp;
int digits = 0;
int start_digit = 0;
int not_digits = 0;
cp = skipwhite(cp); /* Skip leading whitespace */
if (!issym((unsigned char)*cp))
return NULL;
digits = 0;
if (isdigit((unsigned char)*cp))
digits = 2; /* Think about digit count */
start_digit = 1;
for (symcp = cp + 1; issym((unsigned char)*symcp); symcp++) {
if (!isdigit((unsigned char)*symcp)) /* Not a digit? */
digits--; /* Make a note. */
not_digits++; /* Make a note. */
}
if (digits == 2)
if (start_digit && not_digits == 0)
return NULL; /* Not a symbol, it's a digit string */
if (endp)
@ -780,9 +780,9 @@ char *get_symbol(
if (islocal) {
*islocal = 0;
/* Turn to local label format */
if (digits == 1) {
if (symcp[len - 1] == '$') {
/* Check if local label format */
if (start_digit) {
if (not_digits == 1 && symcp[len - 1] == '$') {
char *newsym = memcheck(malloc(32)); /* Overkill */
sprintf(newsym, "%ld$%d", strtol(symcp, NULL, 10), lsb);
@ -792,8 +792,7 @@ char *get_symbol(
}
free(symcp);
symcp = newsym;
if (islocal)
*islocal = SYMBOLFLAG_LOCAL;
*islocal = SYMBOLFLAG_LOCAL;
lsb_used++;
} else {
free(symcp);
@ -801,8 +800,8 @@ char *get_symbol(
}
}
} else {
/* disallow local label format */
if (isdigit((unsigned char)*symcp)) {
/* Disallow local label format */
if (start_digit) {
free(symcp);
return NULL;
}

View File

@ -33,6 +33,7 @@ TESTS="test-asciz \
test-reg \
test-reloc \
test-rept \
test-syntax \
test-ua-pl \
test-undef \
test-word-comma"

64
tests/test-syntax.lst.ok Normal file
View File

@ -0,0 +1,64 @@
1 ;;;;
2 ;
3 ; Test some aspects of syntax.
4 ;
5 ; Reference MACRO11 does very weird things with these errors.
6 ; Fortunately I don't plan to produce exactly the same results in case of errors.
7 ; It seems to recognise an operand where there is none (or none yet)...
8 ;
9 ; AQ 37 000022 012767 000004 177772 mov #4..,r0
10 ; AQ 38 000030 012767 000011 177772 mov #9..,r0
11 ; AQ 39 000036 012767 000004 000000G mov #4.$,r0
12 ; AQU 40 000044 012767 000000 177772 mov #4$.,r0
13 ; 41
14 ; A 42 000052 012767 000004 177772 mov #4..
15 ; A 43 000060 012767 000004 000000G mov #4.$
16 ; AU 44 000066 012767 000000 177772 mov #4$.
17
test-syntax.mac:18: ***ERROR Invalid syntax (comma expected)
18 mov #4..,r0 ; bad syntax
test-syntax.mac:19: ***ERROR Invalid syntax (comma expected)
19 mov #4$.,r0 ; bad syntax
test-syntax.mac:20: ***ERROR Invalid syntax (comma expected)
20 mov #4.$,r0 ; bad syntax
test-syntax.mac:21: ***ERROR Invalid syntax (comma expected)
21 mov #4$$,r0 ; bad syntax
22
test-syntax.mac:23: ***ERROR Invalid syntax (comma expected)
23 mov #4.. ; bad syntax
test-syntax.mac:24: ***ERROR Invalid syntax (comma expected)
24 mov #4$. ; bad syntax
test-syntax.mac:25: ***ERROR Invalid syntax (comma expected)
25 mov #4.$ ; bad syntax
test-syntax.mac:26: ***ERROR Invalid syntax (comma expected)
26 mov #4$$ ; bad syntax
27
28 ;; page 2-4:
29 ;; Multiple expressions used in the operand field of a MACRO-11 statement
30 ;; must be separated by a comma;
31 ;; multiple symbols similarly must be delimited by a valid separator
32 ;; (a comma, tab, and/or space).
33 ;; When the operator field contains an op code, associated operands are
34 ;; always expressions, ...
35
36 000001 a=1
test-syntax.mac:37: ***ERROR Invalid syntax (comma expected)
37 mov #4 r0
test-syntax.mac:38: ***ERROR Invalid syntax (comma expected)
38 mov a r0
39
40 ;; page 2-3:
41 ;; An operator is terminated by a space, tab, or any non-Radix-50 character,
42 000000 017700 000001' mov@a,r0
42
Symbol table
. 000004R 001 A = 000001
Program sections:
. ABS. 000000 000 (RW,I,GBL,ABS,OVR,NOSAV)
000004 001 (RW,I,LCL,REL,CON,NOSAV)

42
tests/test-syntax.mac Normal file
View File

@ -0,0 +1,42 @@
;;;;
;
; Test some aspects of syntax.
;
; Reference MACRO11 does very weird things with these errors.
; Fortunately I don't plan to produce exactly the same results in case of errors.
; It seems to recognise an operand where there is none (or none yet)...
;
; AQ 37 000022 012767 000004 177772 mov #4..,r0
; AQ 38 000030 012767 000011 177772 mov #9..,r0
; AQ 39 000036 012767 000004 000000G mov #4.$,r0
; AQU 40 000044 012767 000000 177772 mov #4$.,r0
; 41
; A 42 000052 012767 000004 177772 mov #4..
; A 43 000060 012767 000004 000000G mov #4.$
; AU 44 000066 012767 000000 177772 mov #4$.
mov #4..,r0 ; bad syntax
mov #4$.,r0 ; bad syntax
mov #4.$,r0 ; bad syntax
mov #4$$,r0 ; bad syntax
mov #4.. ; bad syntax
mov #4$. ; bad syntax
mov #4.$ ; bad syntax
mov #4$$ ; bad syntax
;; page 2-4:
;; Multiple expressions used in the operand field of a MACRO-11 statement
;; must be separated by a comma;
;; multiple symbols similarly must be delimited by a valid separator
;; (a comma, tab, and/or space).
;; When the operator field contains an op code, associated operands are
;; always expressions, ...
a=1
mov #4 r0
mov a r0
;; page 2-3:
;; An operator is terminated by a space, tab, or any non-Radix-50 character,
mov@a,r0