diff options
Diffstat (limited to 'interpret.e')
| -rw-r--r-- | interpret.e | 236 |
1 files changed, 202 insertions, 34 deletions
diff --git a/interpret.e b/interpret.e index 235c98b..4517b25 100644 --- a/interpret.e +++ b/interpret.e @@ -4,8 +4,8 @@ ~ ~ The code in this file defines the basic syntax and semantics of Forth as ~ a text-based language. It's written in terms of the underlying executor, -~ which is implemented and explained in evoke.e. The execution model gives us -~ the concept of "words"; the control and value stacks; and the ability to +~ which is implemented and explained in execution.e. The execution model gives +~ us the concept of "words"; the control and value stacks; and the ability to ~ call things. It has nothing to say about text, only about the binary form of ~ the language. ~ @@ -59,8 +59,8 @@ ~ all set up to start appending contents to the new word by calling ",". ~ ~ There's a handy diagram of the entry header format under "quick -~ reference", in the description of the exeuction model in evoke.e. Create is -~ responsible for everything up to the codeword, not including it. +~ reference", in the description of the exeuction model in exeuction.e. Create +~ is responsible for everything up to the codeword, not including it. ~ ~ When a word is created in interpret mode using s" to provide a string ~ literal, the temporary space that s" uses is in the same place as the @@ -127,10 +127,34 @@ ~ Finally, we write our updated value of "here" back into the variable. here ! ; -~ , 0000001000018080 -~ self-codeword 00000010000180d0 -~ variable 0000001000018128 -~ allocate 00000010000181c8 +~ (value to append to current word-in-progress --) +: , here @ swap pack64 here ! ; + +: self-codeword here @ 8 + , ; + +~ (address for new variable word to create, name string --) +: variable + create + self-codeword + here @ + swap :rax mov-reg64-imm64 + :rax push-reg64 + next + 8 packalign + here ! ; + +~ Allocates bytes on the heap by incrementing the global "here" pointer. The +~ "here" pointer is kept aligned to an 8-byte boundary, regardless of the size +~ requested. +~ +~ This does not create dictionary entries, it's just a raw memory interface. +~ It's suitable for allocating data or scratch space. +: allocate + here @ dup + ~ (size, here value, here value) + 3roll + 8 packalign here ! ; + +~ TODO ~ buffer-physical-start 0000001000018240 ~ buffer-physical-length 0000001000018270 ~ buffer-logical-start 00000010000182c0 @@ -157,31 +181,174 @@ ~ accumulate-string 0000001000018fc8 ~ word 00000010000194a0 ~ find 00000010000195f0 -~ is-alphanumeric 0000001000019628 -~ generalized-digit-value 0000001000019850 -~ decode-generalized-digit 0000001000019970 -~ read-base-unsigned 0000001000019a58 -~ read-integer-unsigned 0000001000019cb8 -~ read-integer 0000001000019eb0 + +~ (character -- 1 for true or 0 for false) +: is-alphanumeric + ~ We don't have a character-literal syntax; the hex constants here are + ~ ASCII codes. + dup 0x30 stackhex > { drop 0 exit } if ~ Less than "0". + dup 0x39 >= { drop 1 exit } if ~ Less than or equal to "9". + dup 0x41 > { drop 0 exit } if ~ Less than "A". + dup 0x5a >= { drop 1 exit } if ~ Less than or equal to "Z". + dup 0x61 > { drop 0 exit } if ~ Less than "a". + dup 0x7a >= { drop 1 exit } if ~ Less than or equal to "z". + drop 0 ; ~ Greater than "z". + + +~ (character -- value) +: generalized-digit-value + ~ We don't have a character-literal syntax; the hex constants here are + ~ ASCII codes. + dup 0x61 <= { 0x61 - 10 + exit } if ~ lowercase "a" + dup 0x41 <= { 0x41 - 10 + exit } if ~ uppercase "a" + 0x30 - ; ~ digit "0" + + +~ (character, base +~ -- value (if successful), +~ error indicator (zero equals success)) +: decode-generalized-digit + swap dup is-alphanumeric { + ~ It's alphanumeric. + ~ (base, character) + generalized-digit-value + ~ (base, value) + dup 3roll + ~ (value, value, base) + > { + ~ It's in range. + ~ (value) + 0 exit } if + ~ It's out of range. + ~ (value) + drop 1 exit } if + ~ It's not alphanumeric. + drop drop 1 ; + + +~ (string pointer, base +~ -- result (if successful), +~ error indicator (zero equals success)) +: read-base-unsigned + swap + + ~ If the first byte is null, this is an error + unpack8 + ~ (numeric base, current point in string, character) + dup 0 = { drop drop drop 1 exit } if + + ~ Decode the first byte as a generalized digit in the base. + ~ (numeric base, current point in string, character) + ~ If the first byte is less than "0", this is an error. + 3roll dup 4 unroll + ~ (numeric base, current point in string, character, numeric base) + decode-generalized-digit { + ~ (numeric base, current point in string) + drop drop 1 exit } if + + ~ The first byte is a valid generalized digit in the appropriate base, so + ~ let's get started. + ~ (numeric base, current point in string, initial value) + swap + + { + ~ (numeric base, result so far, current point in string) + unpack8 dup 0 = { + ~ A null after the first character is valid, and indicates we're done. + drop drop swap drop 0 exit } if + + ~ Decode the latest byte as a generalized digit in the base. + ~ (numeric base, result so far, current point in string, latest byte) + 4 roll dup 5 unroll + ~ (numeric base, result so far, current point in string, character + ~ numeric base) + decode-generalized-digit { + ~ If the latest character is not a valid digit, that's an error. + ~ (numeric base, result so far, current point in string) + drop drop drop 1 exit } if + + ~ The latest character is valid, so incorporate it and loop. + ~ (numeric base, result so far, current point in string, latest value) + 3roll 4 roll dup 5 unroll * + swap + } forever ; + ~ (string pointer ~ -- result (if successful), ~ error indicator (zero equals success)) -: read-decimal - dup unpack8 lit 0 != 0branch [ 6 8 * , ] ~ TODO character literal minus - ~ This is the case where it's non-negative. +: read-integer-unsigned + ~ We don't have a character-literal syntax; the hex constants here are + ~ ASCII codes. + dup unpack8 0x30 != { ~ digit "0" + ~ This is the case where the leading digit is not a zero. + ~ (original string pointer, advanced string pointer) + drop 10 read-base-unsigned exit } if + + ~ This is the case where the leading digit is a zero. ~ (original string pointer, advanced string pointer) - drop 10 read-base-unsigned exit + unpack8 dup 0x78 = { ~ lowercase "x" + ~ (original string pointer, doubly advanced string pointer, character) + drop swap drop 16 read-base-unsigned exit } if + + dup 0x6f = { ~ lowercase "o" + ~ (original string pointer, doubly advanced string pointer, character) + drop swap drop 8 read-base-unsigned exit } if + + dup 0x62 = { ~ lowercase "b" + ~ (original string pointer, doubly advanced string pointer, character) + swap drop swap 2 read-base-unsigned exit } if + + ~ This is the case where the second character is something else. + ~ (original string pointer, doubly advanced string pointer, character) + drop drop 10 read-base-unsigned ; + + +~ (string pointer +~ -- result (if successful), +~ error indicator (zero equals success)) +: read-integer + ~ We don't have a character-literal syntax; this is ASCII for a hyphen. + dup unpack8 0x2d != { + ~ This is the case where it's non-negative. + ~ (original string pointer, advanced string pointer) + drop read-integer-unsigned exit + } if ~ This is the case where it's negative. ~ (original string pointer, advanced string pointer) - swap drop 10 read-base-unsigned + swap drop read-integer-unsigned ~ (result maybe, exit code) - dup 0branch [ 2 8 * , ] + dup { + ~ Failure + ~ (non-zero exit code) + exit + } if + + ~ Success + ~ (result, zero exit code) + swap -1 * swap ; + + +~ (string pointer +~ -- result (if successful), +~ error indicator (zero equals success)) +: read-decimal + ~ We don't have a character-literal syntax; this is ASCII for a hyphen. + dup unpack8 0x2d != { + ~ This is the case where it's non-negative. + ~ (original string pointer, advanced string pointer) + drop 10 read-base-unsigned exit + } if - ~ Failure - ~ (non-zero exit code) - exit + ~ This is the case where it's negative. + ~ (original string pointer, advanced string pointer) + swap drop 10 read-base-unsigned + ~ (result maybe, exit code) + dup { + ~ Failure + ~ (non-zero exit code) + exit + } if ~ Success ~ (result, zero exit code) @@ -198,10 +365,10 @@ ~ we give it a name. ~ TODO this is the "create" / "here" conflict thing -~ describe-compilation -~ ' interpreter-flags-storage describe -~ ' interpreter-flags describe -~ newline +describe-compilation +' interpreter-flags-storage describe +' interpreter-flags describe +newline ~ here @ hexdump ~ s" interpreter-flags-storage" stackhex create stackhex ~ make-immediate 0 , ~ ~ latest @ dup unhide-entry s" interpreter-flags" variable @@ -337,12 +504,13 @@ : quit { interpret } forever ; ~ quit -~ 4 5 + . : za 13 12 - . ; za -~ : ' word value@ find dropstring-with-result -~ interpreter-flags @ 1 & { literal } if ; make-immediate -~ ' za . newline -~ : piz ' za . newline ; piz -~ ~ ' interpret forget quit 2 3 * . -~ ' ' describe ' za describe ' piz describe +-0x10 newline . newline +4 5 + . : za 13 12 - . ; za +: ' word value@ find dropstring-with-result + interpreter-flags @ 1 & { literal } if ; make-immediate +' za . newline +: piz ' za . newline ; piz +~ ' interpret forget quit 2 3 * . +' ' describe ' za describe ' piz describe bye |