1 files changed, 160 insertions, 129 deletions
diff --git a/dynamic.e b/dynamic.e
index 7c7ff13..0adc5c6 100644
--- a/dynamic.e
+++ b/dynamic.e
@@ -192,13 +192,26 @@
   { dup describe next-newer-entry } while drop ;
 
 
+: describe-compilation
+  ~ It's always in progress ;) We just need a header like this so it doesn't
+  ~ get confused with other kinds of debug output.
+  ." compilation in progress" newline
+  latest @ hexdump
+  newline
+  ."   here " here @ .hex64 newline
+  ."   latest " latest @ .hex64 newline
+  ."   name of latest: " latest @ entry-to-name emitstring newline
+  newline ;
+
+
+
 ~ Log manipulation
 ~ ~~~~~~~~~~~~~~~~
 
 ~   In general, we're going to want to be able to go on little excursions
 ~ where we define utility words that are only useful for one task, then
 ~ deallocate that stuff after we're done with it. We implement "forget",
-~ which removes both dictionary entries and heap allocations for the entry
+~ which removes both dictionary entries and log allocations for the entry
 ~ pointer it's given and everything that came after.
 ~
 ~   The implementation strategy is the same as Jonesforth's version, but
@@ -224,147 +237,165 @@
 ~ begun).
 : recurse latest @ entry-to-execution-token , ; make-immediate
 
-~   The word "'", often pronounced "tick", quotes the following word, looking
-~ it up and treating it as a constant. In immediate mode, the constant winds
-~ up on the stack; in compile mode it gets compiled.
-~
-~   There are a few possible implementation strategies here. Running as an
-~ immediate word means there's a clear and unambiguous concept of "the
-~ following word", so that's what we do; otherwise we'd have to get clever
-~ about somehow finding out where we were called from. That means we take on
-~ what would otherwise be the interpreter's responsibility, of checking what
-~ mode we're in. Happily, that's easy to do.
+
+~   The implementation of find-in is in log-load.e, for now.
 ~
-~   Though it might be nice to have high-level flow control for this, our
-~ implementation of "if" below relies on "'" several times, whereas "'" only
-~ branches once. So we bootstrap "'" first.
-~ : ' word value@ find dropstring-with-result
-~   interpreter-flags @ 1 & 0branch [ 2 8 * , ] literal
-~   ; make-immediate
+~ (string pointer -- entry pointer or 0)
+: find latest swap find-in ;
 
 
-~ High-level flow-control
-~ ~~~~~~~~~~~~~~~~~~~~~~~
-~
-~   We use a novel suffix-based approach to flow control. We define words
-~ { and } which describe the boundaries of blocks of code, leaving a
-~ description on the value stack, while still compiling the contents
-~ normally.
-~
-~   Then follow-up words such as "if" can use that information to slide
-~ the blocks around and insert any needed branches and other logic.
+~   Allocates bytes on the log by incrementing the global "here" pointer. The
+~ "here" pointer is kept aligned to an 8-byte boundary, regardless of the size
+~ requested.
 ~
-~   These words get their own file because they of course have very high
-~ importance to bootstrapping, and it's useful to be able to see where they
-~ fall in the list of files.
+~   This does not create dictionary entries, it's just a raw memory interface.
+~ It's suitable for allocating data or scratch space.
 ~
-~   Both the label transform and the log-load transform go out of their way
-~ to make sure these words work.
+~ (size -- pointer)
+: allocate
+  here @ dup
+  ~ (size, here value, here value)
+  3roll + 8 packalign here ! ;
 
 
-~ ~ (-- start pointer)
-~ : { here @ ; make-immediate
-~
-~ ~ (start pointer -- start pointer, length)
-~ : } dup here @ swap - ; make-immediate
-~
+~   Allocate space by incrementing "here", and output a word entry header in
+~ it. Also add it to the "latest" linked list. Use zero as the flag values;
+~ accept a string pointer on the stack and use its contents as the name.
 ~
-~ ~ (start pointer, length --)
-~ : if 2dup swap dup 5 8 * + 3unroll swap
+~   This is the first step of creating a new word. Its responsibility includes
+~ everything up to the codeword, not including the codeword; it leaves things
+~ all set up to start appending contents to the new word by calling ",".
 ~
-~ ~ (start pointer, length, adjusted start pointer, start pointer, length)
-~   memmove
-~ ~ (start pointer, length)
-~   swap here @ swap here ! swap
-~ ~ (old here, length)
-~   ' lit entry-to-execution-token , 0 ,
-~   ' != entry-to-execution-token ,
-~ ~   The branch length needs to be one word longer than the block length,
-~ ~ because the length field itself is part of the scope of the branch.
-~   ' 0branch entry-to-execution-token , dup 8 + ,
-~ ~ (old here, length)
-~   drop 5 8 * + here ! ; make-immediate
+~   There's a handy diagram of the entry header format under "quick
+~ reference", in the description of the exeuction model in exeuction.e. Create
+~ is responsible for everything up to the codeword, not including it.
 ~
+~   When a word is created in interpret mode using s" to provide a string
+~ literal, the temporary space that s" uses is in the same place as the
+~ entry header we're going to write out. It really is very useful to have
+~ that work. Fortunately, it does! We're able to avoid needing a special case
+~ by doing things in a very careful way, as described below.
 ~
-~ ~ (start pointer, length --)
-~ : unless 2dup swap dup 5 8 * + 3unroll swap
-~ ~ (start pointer, length, start pointer, adjusted start pointer, length)
-~   memmove
-~ ~ (start pointer, length)
-~   swap here @ swap here ! swap
-~ ~ (old here, length)
-~   ' lit entry-to-execution-token , 0 ,
-~   ' = entry-to-execution-token ,
-~ ~   The branch length needs to be one word longer than the block length,
-~ ~ because the length field itself is part of the scope of the branch.
-~   ' 0branch entry-to-execution-token , dup 8 + ,
-~ ~ (old here, length)
-~   drop 5 8 * + here ! ; make-immediate
+~ (string pointer --)
+: create
+  ~   We add one to the string length in order to include the trailing null
+  ~ terminator. This will be the length of our name field; we save an extra
+  ~ copy of it to help with packing later.
+  dup stringlen 1 + dup 3unroll
+  ~ (name field length, string pointer, name field length)
+
+  ~   We use memmove to put the string in its final position, because it works
+  ~ correctly when the destination overlaps with the source. Notice that we
+  ~ do this before writing anything else in the entry header, to avoid
+  ~ stepping on it. The name string always starts ten bytes into the header,
+  ~ so we can use a fixed offset.
+  here @ 10 + 3unroll memmove
+  ~ (name field length)
+
+  ~   Now we can get back to the fields that belong at the start of the entry
+  ~ header. We take the value of "here" and keep a working copy of it on the
+  ~ stack, which we'll advance every time we write more bytes.
+  here @
+  ~ (name field length, updated "here" pointer)
+
+  ~   Pack the old value of "latest" as the first field of the header, linking
+  ~ from the newly-defined word to the next-newest word.
+  ~
+  ~   All the entries form a linked list, from newest to oldest. Since the
+  ~ link is the first field in the entry header, you can get from each entry
+  ~ to the one before it just by dereferencing the entry pointer.
+  latest @ pack64
+
+  ~   This is the flags byte. It starts at zero; our caller can change it if
+  ~ desired.
+  0 pack8
+
+  ~   This is the "other" null terminator, used when traversing the name
+  ~ string backwards for execution-token-to-entry. Yes, the name is
+  ~ null-terminated at both ends.
+  0 pack8
+
+  + ~ The name field is already populated, so just skip past it.
+  ~ (updated "here" pointer)
+
+  ~   The codeword is aligned to a machine-word boundary, and the padding for
+  ~ it is create's responsibility.
+  ~
+  ~   By adding the null terminator before adding alignment padding, we've
+  ~ made sure there's always at least one null byte. Otherwise we'd be missing
+  ~ the terminator if by chance the name were exactly the wrong length.
+  8 packalign
+  ~ (updated "here" pointer)
+
+  ~   Retrieve the value of "here", which still doesn't reflect our additions,
+  ~ and store it at the adddress of "latest". It's the start of our
+  ~ newly-defined word, which makes it the latest word.
+  here @ latest !
+
+  ~   Finally, we write our updated value of "here" back into the variable.
+  here ! ;
+
+
+: self-codeword here @ 8 + , ;
+
+
+~   A variable is simply a word that returns a specific address, always the
+~ same one, at which a value can be stored. This word "variable" takes and
+~ address and a word name, and defines the word. Allocating space is its
+~ caller's responsibility.
 ~
+~ TODO the address is constant but the contents vary, confusing, write it up
 ~
-~ ~ (true start, true length, false start, false length --)
-~ : if-else
-~   dup 4 roll dup 5 unroll +
-~ ~
-~ ~   First we slide the false-block forward, then the true-block. We slide
-~ ~ them both directly into their final positions, leaving space at the start
-~ ~ for a test and branch, and space in between for an unconditional branch.
-~ ~ Those spaces will take five words, and two words, respectively. So the
-~ ~ false-block gets moved by seven words, and the true-block gets moved by
-~ ~ five words.
-~   2dup swap dup 7 8 * + swap 3roll memmove
-~   4 roll dup 5 unroll 4 roll dup 5 unroll
-~   swap dup 5 8 * + swap 3roll memmove
-~ ~ (true start, true length, false start, false length)
-~ ~
-~ ~   Now we write out the initial test-and-branch.
-~   4 roll dup 5 unroll here @ 6 unroll here !
-~ ~ (old here, true start, true length, false start, false length)
-~   ' lit entry-to-execution-token , 0 ,
-~   ' != entry-to-execution-token ,
-~ ~   Branch past the length field, the true-block, and the unconditional
-~ ~ branch in the middle.
-~   ' 0branch entry-to-execution-token ,
-~   3roll dup 4 unroll 3 8 * + ,
-~ ~
-~ ~  Next, write out the unconditional branch in the middle.
-~   swap dup 3unroll 5 8 * + here !
-~   ' branch entry-to-execution-token ,
-~ ~  Branch past the length field and the false-block.
-~   dup 8 + ,
-~ ~
-~ ~  Set "here" to point to the true end.
-~   drop drop drop drop 7 8 * + here !
-~   ; make-immediate
+~ (address for new variable word to point to, string pointer --)
+: variable
+  create
+  self-codeword
+  here @
+  swap :rax mov-reg64-imm64
+  :rax push-reg64
+  pack-next
+  8 packalign
+  here ! ;
+
+
+~   A keyword is a word that evaluates to its own address, which makes it
+~ suitable for use as a constant. By convention, all our keywords have names
+~ starting with a colon, which imitates the way they work in Common Lisp.
 ~
+~   Specifically, it returns its own execution token. Thus, executing its
+~ result repeatedly will keep giving the same value. We aren't in the habit of
+~ doing quote-exec kinds of things in Evocation, but it seems as good as any
+~ other unique value, so we might as well.
 ~
-~ ~ (start, length --)
-~ : forever
-~   ' branch entry-to-execution-token , 8 + -1 * , drop
-~   ; make-immediate
-~
-~
-~ ~   This slides the body forward, leaving the test where it is. It puts a
-~ ~ conditional branch in-between them, then appends an unconditional branch
-~ ~ at the end.
-~ ~
-~ ~ (test start, test length, body start, body length --)
-~ : while
-~ ~   The conditional branch needs five words.
-~   2dup swap dup 5 8 * + swap 3roll memmove
-~   here @ 5 unroll swap dup 3unroll here !
-~ ~ (old here, test start, test length, body start, body length)
-~   ' lit entry-to-execution-token , 0 ,
-~   ' != entry-to-execution-token ,
-~ ~ Branch past the length field, the body, and the unconditional branch.
-~   ' 0branch entry-to-execution-token ,
-~   dup 3 8 * + ,
-~ ~ Set "here" to the new end.
-~   5 8 * 6 roll + here !
-~ ~ (test start, test length, body start, body length)
-~ ~   Unconditionally branch backwards past the branch word, the body, the
-~ ~ conditional branch, and the test.
-~   ' branch entry-to-execution-token ,
-~   6 8 * + swap drop + swap drop -1 * ,
-~   ; make-immediate
+~   Unlike CL, we don't currently have the lexer automatically create keywords
+~ for us; we create them explicitly. That's likely to be added at some point,
+~ but at the moment the feature is lying fallow to see whether it winds up
+~ seeing a lot of use.
 ~
+~ (string pointer --)
+: keyword
+  create
+
+  ~   Before outputting our codeword, save a copy of the address where it's
+  ~ going to be. That will be the execution token we return.
+  here @ dup
+  ~ (self execution token, output point)
+
+  ~   Now add a codeword. This is an assembly word, so it's a self-codeword,
+  ~ meaning it points to the word right after itself.
+  dup 8 + pack64
+  ~ (self execution token, output point)
+
+  ~ Now we consume the execution token, using it as part of this instruction.
+  :rax mov-reg64-imm64
+  ~ (output point)
+
+  ~ To return it, we push it to the stack.
+  :rax push-reg64
+
+  ~ Now just the normal stuff every assembly word ends with.
+  pack-next
+  8 packalign
+
+  here ! ;
+