From 397824a957d9b8b5030925d5a35e7595e5918ed7 Mon Sep 17 00:00:00 2001 From: Irene Knapp Date: Sat, 9 May 2026 23:23:30 -0700 Subject: implement more stuff in interpret.e it depended on the execution-model stuff, for the "next" macro. in general, any word implemented in assembly will depend on that. so, the execution stuff is moved into its own file, leaving evoke.e with only the job of bringing everything together. that's a little disappointing because talking about the execution model seems like a good introduction to the whole topic, but perhaps the problem can be solved down the line with literate programming... Force-Push: yes Change-Id: Ic2fe22dcc39980ef75763ae293e41024abc8ba38 --- interpret.e | 236 +++++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 202 insertions(+), 34 deletions(-) (limited to 'interpret.e') diff --git a/interpret.e b/interpret.e index 235c98b..4517b25 100644 --- a/interpret.e +++ b/interpret.e @@ -4,8 +4,8 @@ ~ ~ The code in this file defines the basic syntax and semantics of Forth as ~ a text-based language. It's written in terms of the underlying executor, -~ which is implemented and explained in evoke.e. The execution model gives us -~ the concept of "words"; the control and value stacks; and the ability to +~ which is implemented and explained in execution.e. The execution model gives +~ us the concept of "words"; the control and value stacks; and the ability to ~ call things. It has nothing to say about text, only about the binary form of ~ the language. ~ @@ -59,8 +59,8 @@ ~ all set up to start appending contents to the new word by calling ",". ~ ~ There's a handy diagram of the entry header format under "quick -~ reference", in the description of the exeuction model in evoke.e. Create is -~ responsible for everything up to the codeword, not including it. +~ reference", in the description of the exeuction model in exeuction.e. Create +~ is responsible for everything up to the codeword, not including it. ~ ~ When a word is created in interpret mode using s" to provide a string ~ literal, the temporary space that s" uses is in the same place as the @@ -127,10 +127,34 @@ ~ Finally, we write our updated value of "here" back into the variable. here ! ; -~ , 0000001000018080 -~ self-codeword 00000010000180d0 -~ variable 0000001000018128 -~ allocate 00000010000181c8 +~ (value to append to current word-in-progress --) +: , here @ swap pack64 here ! ; + +: self-codeword here @ 8 + , ; + +~ (address for new variable word to create, name string --) +: variable + create + self-codeword + here @ + swap :rax mov-reg64-imm64 + :rax push-reg64 + next + 8 packalign + here ! ; + +~ Allocates bytes on the heap by incrementing the global "here" pointer. The +~ "here" pointer is kept aligned to an 8-byte boundary, regardless of the size +~ requested. +~ +~ This does not create dictionary entries, it's just a raw memory interface. +~ It's suitable for allocating data or scratch space. +: allocate + here @ dup + ~ (size, here value, here value) + 3roll + 8 packalign here ! ; + +~ TODO ~ buffer-physical-start 0000001000018240 ~ buffer-physical-length 0000001000018270 ~ buffer-logical-start 00000010000182c0 @@ -157,31 +181,174 @@ ~ accumulate-string 0000001000018fc8 ~ word 00000010000194a0 ~ find 00000010000195f0 -~ is-alphanumeric 0000001000019628 -~ generalized-digit-value 0000001000019850 -~ decode-generalized-digit 0000001000019970 -~ read-base-unsigned 0000001000019a58 -~ read-integer-unsigned 0000001000019cb8 -~ read-integer 0000001000019eb0 + +~ (character -- 1 for true or 0 for false) +: is-alphanumeric + ~ We don't have a character-literal syntax; the hex constants here are + ~ ASCII codes. + dup 0x30 stackhex > { drop 0 exit } if ~ Less than "0". + dup 0x39 >= { drop 1 exit } if ~ Less than or equal to "9". + dup 0x41 > { drop 0 exit } if ~ Less than "A". + dup 0x5a >= { drop 1 exit } if ~ Less than or equal to "Z". + dup 0x61 > { drop 0 exit } if ~ Less than "a". + dup 0x7a >= { drop 1 exit } if ~ Less than or equal to "z". + drop 0 ; ~ Greater than "z". + + +~ (character -- value) +: generalized-digit-value + ~ We don't have a character-literal syntax; the hex constants here are + ~ ASCII codes. + dup 0x61 <= { 0x61 - 10 + exit } if ~ lowercase "a" + dup 0x41 <= { 0x41 - 10 + exit } if ~ uppercase "a" + 0x30 - ; ~ digit "0" + + +~ (character, base +~ -- value (if successful), +~ error indicator (zero equals success)) +: decode-generalized-digit + swap dup is-alphanumeric { + ~ It's alphanumeric. + ~ (base, character) + generalized-digit-value + ~ (base, value) + dup 3roll + ~ (value, value, base) + > { + ~ It's in range. + ~ (value) + 0 exit } if + ~ It's out of range. + ~ (value) + drop 1 exit } if + ~ It's not alphanumeric. + drop drop 1 ; + + +~ (string pointer, base +~ -- result (if successful), +~ error indicator (zero equals success)) +: read-base-unsigned + swap + + ~ If the first byte is null, this is an error + unpack8 + ~ (numeric base, current point in string, character) + dup 0 = { drop drop drop 1 exit } if + + ~ Decode the first byte as a generalized digit in the base. + ~ (numeric base, current point in string, character) + ~ If the first byte is less than "0", this is an error. + 3roll dup 4 unroll + ~ (numeric base, current point in string, character, numeric base) + decode-generalized-digit { + ~ (numeric base, current point in string) + drop drop 1 exit } if + + ~ The first byte is a valid generalized digit in the appropriate base, so + ~ let's get started. + ~ (numeric base, current point in string, initial value) + swap + + { + ~ (numeric base, result so far, current point in string) + unpack8 dup 0 = { + ~ A null after the first character is valid, and indicates we're done. + drop drop swap drop 0 exit } if + + ~ Decode the latest byte as a generalized digit in the base. + ~ (numeric base, result so far, current point in string, latest byte) + 4 roll dup 5 unroll + ~ (numeric base, result so far, current point in string, character + ~ numeric base) + decode-generalized-digit { + ~ If the latest character is not a valid digit, that's an error. + ~ (numeric base, result so far, current point in string) + drop drop drop 1 exit } if + + ~ The latest character is valid, so incorporate it and loop. + ~ (numeric base, result so far, current point in string, latest value) + 3roll 4 roll dup 5 unroll * + swap + } forever ; + ~ (string pointer ~ -- result (if successful), ~ error indicator (zero equals success)) -: read-decimal - dup unpack8 lit 0 != 0branch [ 6 8 * , ] ~ TODO character literal minus - ~ This is the case where it's non-negative. +: read-integer-unsigned + ~ We don't have a character-literal syntax; the hex constants here are + ~ ASCII codes. + dup unpack8 0x30 != { ~ digit "0" + ~ This is the case where the leading digit is not a zero. + ~ (original string pointer, advanced string pointer) + drop 10 read-base-unsigned exit } if + + ~ This is the case where the leading digit is a zero. ~ (original string pointer, advanced string pointer) - drop 10 read-base-unsigned exit + unpack8 dup 0x78 = { ~ lowercase "x" + ~ (original string pointer, doubly advanced string pointer, character) + drop swap drop 16 read-base-unsigned exit } if + + dup 0x6f = { ~ lowercase "o" + ~ (original string pointer, doubly advanced string pointer, character) + drop swap drop 8 read-base-unsigned exit } if + + dup 0x62 = { ~ lowercase "b" + ~ (original string pointer, doubly advanced string pointer, character) + swap drop swap 2 read-base-unsigned exit } if + + ~ This is the case where the second character is something else. + ~ (original string pointer, doubly advanced string pointer, character) + drop drop 10 read-base-unsigned ; + + +~ (string pointer +~ -- result (if successful), +~ error indicator (zero equals success)) +: read-integer + ~ We don't have a character-literal syntax; this is ASCII for a hyphen. + dup unpack8 0x2d != { + ~ This is the case where it's non-negative. + ~ (original string pointer, advanced string pointer) + drop read-integer-unsigned exit + } if ~ This is the case where it's negative. ~ (original string pointer, advanced string pointer) - swap drop 10 read-base-unsigned + swap drop read-integer-unsigned ~ (result maybe, exit code) - dup 0branch [ 2 8 * , ] + dup { + ~ Failure + ~ (non-zero exit code) + exit + } if + + ~ Success + ~ (result, zero exit code) + swap -1 * swap ; + + +~ (string pointer +~ -- result (if successful), +~ error indicator (zero equals success)) +: read-decimal + ~ We don't have a character-literal syntax; this is ASCII for a hyphen. + dup unpack8 0x2d != { + ~ This is the case where it's non-negative. + ~ (original string pointer, advanced string pointer) + drop 10 read-base-unsigned exit + } if - ~ Failure - ~ (non-zero exit code) - exit + ~ This is the case where it's negative. + ~ (original string pointer, advanced string pointer) + swap drop 10 read-base-unsigned + ~ (result maybe, exit code) + dup { + ~ Failure + ~ (non-zero exit code) + exit + } if ~ Success ~ (result, zero exit code) @@ -198,10 +365,10 @@ ~ we give it a name. ~ TODO this is the "create" / "here" conflict thing -~ describe-compilation -~ ' interpreter-flags-storage describe -~ ' interpreter-flags describe -~ newline +describe-compilation +' interpreter-flags-storage describe +' interpreter-flags describe +newline ~ here @ hexdump ~ s" interpreter-flags-storage" stackhex create stackhex ~ make-immediate 0 , ~ ~ latest @ dup unhide-entry s" interpreter-flags" variable @@ -337,12 +504,13 @@ : quit { interpret } forever ; ~ quit -~ 4 5 + . : za 13 12 - . ; za -~ : ' word value@ find dropstring-with-result -~ interpreter-flags @ 1 & { literal } if ; make-immediate -~ ' za . newline -~ : piz ' za . newline ; piz -~ ~ ' interpret forget quit 2 3 * . -~ ' ' describe ' za describe ' piz describe +-0x10 newline . newline +4 5 + . : za 13 12 - . ; za +: ' word value@ find dropstring-with-result + interpreter-flags @ 1 & { literal } if ; make-immediate +' za . newline +: piz ' za . newline ; piz +~ ' interpret forget quit 2 3 * . +' ' describe ' za describe ' piz describe bye -- cgit 1.4.1