1 files changed, 202 insertions, 34 deletions
diff --git a/interpret.e b/interpret.e
index 235c98b..4517b25 100644
--- a/interpret.e
+++ b/interpret.e
@@ -4,8 +4,8 @@
 ~
 ~   The code in this file defines the basic syntax and semantics of Forth as
 ~ a text-based language. It's written in terms of the underlying executor,
-~ which is implemented and explained in evoke.e. The execution model gives us
-~ the concept of "words"; the control and value stacks; and the ability to
+~ which is implemented and explained in execution.e. The execution model gives
+~ us the concept of "words"; the control and value stacks; and the ability to
 ~ call things. It has nothing to say about text, only about the binary form of
 ~ the language.
 ~
@@ -59,8 +59,8 @@
 ~ all set up to start appending contents to the new word by calling ",".
 ~
 ~   There's a handy diagram of the entry header format under "quick
-~ reference", in the description of the exeuction model in evoke.e. Create is
-~ responsible for everything up to the codeword, not including it.
+~ reference", in the description of the exeuction model in exeuction.e. Create
+~ is responsible for everything up to the codeword, not including it.
 ~
 ~   When a word is created in interpret mode using s" to provide a string
 ~ literal, the temporary space that s" uses is in the same place as the
@@ -127,10 +127,34 @@
   ~   Finally, we write our updated value of "here" back into the variable.
   here ! ;
 
-~ ,                                                     0000001000018080
-~ self-codeword                                         00000010000180d0
-~ variable                                              0000001000018128
-~ allocate                                              00000010000181c8
+~ (value to append to current word-in-progress --)
+: , here @ swap pack64 here ! ;
+
+: self-codeword here @ 8 + , ;
+
+~ (address for new variable word to create, name string --)
+: variable
+  create
+  self-codeword
+  here @
+  swap :rax mov-reg64-imm64
+  :rax push-reg64
+  next
+  8 packalign
+  here ! ;
+
+~   Allocates bytes on the heap by incrementing the global "here" pointer. The
+~ "here" pointer is kept aligned to an 8-byte boundary, regardless of the size
+~ requested.
+~
+~   This does not create dictionary entries, it's just a raw memory interface.
+~ It's suitable for allocating data or scratch space.
+: allocate
+  here @ dup
+  ~ (size, here value, here value)
+  3roll + 8 packalign here ! ;
+
+~ TODO
 ~ buffer-physical-start                                 0000001000018240
 ~ buffer-physical-length                                0000001000018270
 ~ buffer-logical-start                                  00000010000182c0
@@ -157,31 +181,174 @@
 ~ accumulate-string                                     0000001000018fc8
 ~ word                                                  00000010000194a0
 ~ find                                                  00000010000195f0
-~ is-alphanumeric                                       0000001000019628
-~ generalized-digit-value                               0000001000019850
-~ decode-generalized-digit                              0000001000019970
-~ read-base-unsigned                                    0000001000019a58
-~ read-integer-unsigned                                 0000001000019cb8
-~ read-integer                                          0000001000019eb0
+
+~ (character -- 1 for true or 0 for false)
+: is-alphanumeric
+  ~ We don't have a character-literal syntax; the hex constants here are
+  ~ ASCII codes.
+  dup 0x30 stackhex > { drop 0 exit } if ~ Less than "0".
+  dup 0x39 >= { drop 1 exit } if ~ Less than or equal to "9".
+  dup 0x41 > { drop 0 exit } if ~ Less than "A".
+  dup 0x5a >= { drop 1 exit } if ~ Less than or equal to "Z".
+  dup 0x61 > { drop 0 exit } if ~ Less than "a".
+  dup 0x7a >= { drop 1 exit } if ~ Less than or equal to "z".
+  drop 0 ; ~ Greater than "z".
+
+
+~ (character -- value)
+: generalized-digit-value
+  ~ We don't have a character-literal syntax; the hex constants here are
+  ~ ASCII codes.
+  dup 0x61 <= { 0x61 - 10 + exit } if ~ lowercase "a"
+  dup 0x41 <= { 0x41 - 10 + exit } if ~ uppercase "a"
+  0x30 - ; ~ digit "0"
+
+
+~ (character, base
+~  -- value (if successful),
+~     error indicator (zero equals success))
+: decode-generalized-digit
+  swap dup is-alphanumeric {
+    ~ It's alphanumeric.
+    ~ (base, character)
+    generalized-digit-value
+    ~ (base, value)
+    dup 3roll
+    ~ (value, value, base)
+    > {
+      ~ It's in range.
+      ~ (value)
+      0 exit } if
+    ~ It's out of range.
+    ~ (value)
+    drop 1 exit } if
+  ~ It's not alphanumeric.
+  drop drop 1 ;
+
+
+~ (string pointer, base
+~  -- result (if successful),
+~     error indicator (zero equals success))
+: read-base-unsigned
+  swap
+
+  ~ If the first byte is null, this is an error
+  unpack8
+  ~ (numeric base, current point in string, character)
+  dup 0 = { drop drop drop 1 exit } if
+
+  ~ Decode the first byte as a generalized digit in the base.
+  ~ (numeric base, current point in string, character)
+  ~ If the first byte is less than "0", this is an error.
+  3roll dup 4 unroll
+  ~ (numeric base, current point in string, character, numeric base)
+  decode-generalized-digit {
+    ~ (numeric base, current point in string)
+    drop drop 1 exit } if
+
+  ~ The first byte is a valid generalized digit in the appropriate base, so
+  ~ let's get started.
+  ~ (numeric base, current point in string, initial value)
+  swap
+
+  {
+    ~ (numeric base, result so far, current point in string)
+    unpack8 dup 0 = {
+      ~ A null after the first character is valid, and indicates we're done.
+      drop drop swap drop 0 exit } if
+
+    ~ Decode the latest byte as a generalized digit in the base.
+    ~ (numeric base, result so far, current point in string, latest byte)
+    4 roll dup 5 unroll
+    ~ (numeric base, result so far, current point in string, character
+    ~  numeric base)
+    decode-generalized-digit {
+      ~ If the latest character is not a valid digit, that's an error.
+      ~ (numeric base, result so far, current point in string)
+      drop drop drop 1 exit } if
+
+    ~ The latest character is valid, so incorporate it and loop.
+    ~ (numeric base, result so far, current point in string, latest value)
+    3roll 4 roll dup 5 unroll * + swap
+  } forever ;
+
 
 ~ (string pointer
 ~  -- result (if successful),
 ~     error indicator (zero equals success))
-: read-decimal
-  dup unpack8 lit 0 != 0branch [ 6 8 * , ] ~ TODO character literal minus
-  ~ This is the case where it's non-negative.
+: read-integer-unsigned
+  ~ We don't have a character-literal syntax; the hex constants here are
+  ~ ASCII codes.
+  dup unpack8 0x30 != { ~ digit "0"
+    ~ This is the case where the leading digit is not a zero.
+    ~ (original string pointer, advanced string pointer)
+    drop 10 read-base-unsigned exit } if
+
+  ~ This is the case where the leading digit is a zero.
   ~ (original string pointer, advanced string pointer)
-  drop 10 read-base-unsigned exit
+  unpack8 dup 0x78 = { ~ lowercase "x"
+    ~ (original string pointer, doubly advanced string pointer, character)
+    drop swap drop 16 read-base-unsigned exit } if
+
+  dup 0x6f = { ~ lowercase "o"
+    ~ (original string pointer, doubly advanced string pointer, character)
+    drop swap drop 8 read-base-unsigned exit } if
+
+  dup 0x62 = { ~ lowercase "b"
+    ~ (original string pointer, doubly advanced string pointer, character)
+    swap drop swap 2 read-base-unsigned exit } if
+
+  ~ This is the case where the second character is something else.
+  ~ (original string pointer, doubly advanced string pointer, character)
+  drop drop 10 read-base-unsigned ;
+
+
+~ (string pointer
+~  -- result (if successful),
+~     error indicator (zero equals success))
+: read-integer
+  ~ We don't have a character-literal syntax; this is ASCII for a hyphen.
+  dup unpack8 0x2d != {
+    ~ This is the case where it's non-negative.
+    ~ (original string pointer, advanced string pointer)
+    drop read-integer-unsigned exit
+  } if
 
   ~ This is the case where it's negative.
   ~ (original string pointer, advanced string pointer)
-  swap drop 10 read-base-unsigned
+  swap drop read-integer-unsigned
   ~ (result maybe, exit code)
-  dup 0branch [ 2 8 * , ]
+  dup {
+    ~ Failure
+    ~ (non-zero exit code)
+    exit
+  } if
+
+  ~ Success
+  ~ (result, zero exit code)
+  swap -1 * swap ;
+
+
+~ (string pointer
+~  -- result (if successful),
+~     error indicator (zero equals success))
+: read-decimal
+  ~ We don't have a character-literal syntax; this is ASCII for a hyphen.
+  dup unpack8 0x2d != {
+    ~ This is the case where it's non-negative.
+    ~ (original string pointer, advanced string pointer)
+    drop 10 read-base-unsigned exit
+  } if
 
-  ~ Failure
-  ~ (non-zero exit code)
-  exit
+  ~ This is the case where it's negative.
+  ~ (original string pointer, advanced string pointer)
+  swap drop 10 read-base-unsigned
+  ~ (result maybe, exit code)
+  dup {
+    ~ Failure
+    ~ (non-zero exit code)
+    exit
+  } if
 
   ~ Success
   ~ (result, zero exit code)
@@ -198,10 +365,10 @@
 ~ we give it a name.
 
 ~ TODO this is the "create" / "here" conflict thing
-~ describe-compilation
-~ ' interpreter-flags-storage describe
-~ ' interpreter-flags describe
-~ newline
+describe-compilation
+' interpreter-flags-storage describe
+' interpreter-flags describe
+newline
 ~ here @ hexdump
 ~ s" interpreter-flags-storage" stackhex create stackhex ~ make-immediate 0 ,
 ~ ~ latest @ dup unhide-entry s" interpreter-flags" variable
@@ -337,12 +504,13 @@
 : quit { interpret } forever ;
 
 ~ quit
-~ 4 5 + . : za 13 12 - . ; za
-~ : ' word value@ find dropstring-with-result
-~   interpreter-flags @ 1 & { literal } if ; make-immediate
-~ ' za . newline
-~ : piz ' za . newline ; piz
-~ ~ ' interpret forget quit 2 3 * .
-~ ' ' describe ' za describe ' piz describe
+-0x10 newline . newline
+4 5 + . : za 13 12 - . ; za
+: ' word value@ find dropstring-with-result
+  interpreter-flags @ 1 & { literal } if ; make-immediate
+' za . newline
+: piz ' za . newline ; piz
+~ ' interpret forget quit 2 3 * .
+' ' describe ' za describe ' piz describe
 bye