summary refs log tree commit diff
path: root/dynamic.e
diff options
context:
space:
mode:
Diffstat (limited to 'dynamic.e')
-rw-r--r--dynamic.e370
1 files changed, 370 insertions, 0 deletions
diff --git a/dynamic.e b/dynamic.e
new file mode 100644
index 0000000..7c7ff13
--- /dev/null
+++ b/dynamic.e
@@ -0,0 +1,370 @@
+~ ~~~~~~~~~~~~~~~~~~~~~~~~~
+~ ~~ Dynamic definitions ~~
+~ ~~~~~~~~~~~~~~~~~~~~~~~~~
+~
+~   This file provides additional facilities which are fundamental parts of
+~ Evocation as a language, but which it's not possible to define until global
+~ variables are available. Therefore it is incompatible with the label
+~ transform, but compatible with the log-load transform and written to obey
+~ its constraints; see transform.e for more details on that.
+~
+~   The code here relies on the words "log", "s0", "r0", "latest", and "here".
+~ These five global variables are the root of all our other data structures.
+~ They are defined specially by warm-start in execution.e, since there is no
+~ way to create regular definitions for them. Thus, they come to us already
+~ set up.
+~
+~   It may not be obvious, but when a regular docol-based Evocation word is
+~ compiled, it hardcodes pointers to all the words it references, which will
+~ be part of it forever after. Thus, these words can reference "here" and so
+~ on, and they'll just know where to find it, no runtime mechanism for looking
+~ it up is needed. That's important, because there are no good ways to build
+~ such a mechanism! It would have to dedicate a register or something of that
+~ nature, and registers are far too precious for such a use.
+~
+~   In this file, we define a bunch of more sophisticated ways to work with
+~ the log, then we use it to define a high-level flow control facility which
+~ saves us from having to compute branch offsets by hand. Pleasantly, we get
+~ to use this facility before it's actually defined, since the log-load
+~ transform also provides it. That being the case, we might as well start with
+~ whatever's most urgent - which is, of course, the debugging tools.
+
+
+~ Debugging tools for real
+~ ~~~~~~~~~~~~~~~~~~~~~~~~
+
+: stack
+  s0 @ 8 - { dup value@ 8 + != }
+  { dup s0 @ 8 - != { space } if dup @ . 8 - }
+  while drop newline ;
+
+: stackhex
+  s0 @ 8 - { dup value@ 8 + != }
+  { dup s0 @ 8 - != { space } if dup @ .hex64 8 - }
+  while drop newline ;
+
+~ (pointer -- boolean)
+: is-in-log dup log @ <= swap here @ > && ;
+
+~ (-- entry pointer or 0)
+: oldest-entry
+  latest @ { dup @ } { @ } while ;
+
+~ (entry pointer -- entry pointer or 0)
+: next-newer-entry
+  latest @
+  2dup = { 2drop 0 exit } if
+  { dup { 2dup @ != } if }
+  { @ } while swap drop ;
+
+~ (entry pointer -- pointer)
+: guess-entry-end
+  dup entry-flags@ 64 & 64 = { exit } if
+  dup next-newer-entry dup
+    { drop dup is-in-log { drop here @ } { drop } if-else
+      exit } unless
+  swap drop ;
+
+~ (pointer -- entry pointer or 0)
+: containing-entry
+  dup is-in-log { drop 0 exit } unless
+  latest @ { dup { 2dup > } { 0 } if-else }
+    { @ } while swap drop ;
+
+~ (entry pointer -- boolean)
+: is-assembly-word
+  entry-to-execution-token dup 8 + swap @ = ;
+
+~ (entry pointer -- boolean)
+: is-docol-itself
+  entry-to-name s" docol" stringcmp 0 = ;
+
+~   The word named "docol" has the job of returning the value that gets used
+~ as the actual codeword. We make the assumption that the codeword will
+~ point somewhere near the entry header; we allow for the possibility that
+~ it might be before or after.
+~
+~   Generally, it's possible for there to be several copies of docol due to
+~ alternate logs and things like that, so the goal is to recognize any of
+~ them.
+~
+~ (entry pointer -- boolean)
+: is-docol-codeword
+  dup is-in-log { drop 0 exit } unless
+  containing-entry dup
+    { dup is-docol-itself
+       { drop 1 }
+       { next-newer-entry dup { is-docol-itself } if } if-else
+    } if ;
+
+
+~ TODO this only works on log words
+~
+~ (entry pointer -- boolean)
+: is-docol-interpreted-word
+  dup is-assembly-word { drop 0 exit } if
+  entry-to-execution-token @ is-docol-codeword ;
+
+
+~ (pointer -- boolean)
+: is-codeword-pointer
+  dup is-in-log { drop 0 exit } unless
+  dup containing-entry dup { 2drop 0 exit } unless
+  entry-to-execution-token = ;
+
+
+~ (width --)
+: indent { dup } { space 1- } while drop ;
+
+~ (entry pointer --)
+: word-heading
+  dup entry-to-name dup emitstring space
+  stringlen 1+ 54 swap - 0 max indent dup .hex64
+  dup entry-flags@ dup
+    { space
+      dup 128 & { s" H" emitstring } if
+      dup 64 & { s" M" emitstring } if
+      dup 1 & { s" I" emitstring } if
+    } if drop
+  dup is-assembly-word { s"  asm" emitstring }
+    { dup is-docol-interpreted-word { s"  raw" emitstring } unless
+    } if-else drop
+  newline ;
+
+
+: list-dictionary
+  oldest-entry { dup }
+  { dup word-heading next-newer-entry } while drop ;
+
+
+~ (content end, content start, label start --)
+: hexdump-row
+  2 indent dup .hex32 dup 4 unroll
+  0 { dup 16 > }
+  { dup 7 & 0 = { space } if space
+    2dup + dup 4 pick <= swap 5 pick > &&
+      { 2dup + 8@ .hex8 } { space space } if-else
+    1+ } while
+  newline 5 ndrop ;
+
+
+~ (end, start --)
+: hexdump-between
+  dup 16 1- invert &
+  { dup 3 pick > }
+  { 3dup hexdump-row 16 + } while 3 ndrop ;
+
+
+~ (start, length --)
+: hexdump-from swap dup 3unroll + swap hexdump-between ;
+
+
+~ (start --)
+: hexdump 64 hexdump-from ;
+
+
+~ (entry pointer --)
+: describe-hex
+  dup word-heading
+  dup guess-entry-end swap entry-to-execution-token
+  hexdump-between ;
+
+
+~ (entry pointer --)
+: describe-docol
+  dup word-heading
+  dup guess-entry-end swap entry-to-execution-token 8 +
+  { 2dup < }
+  { space dup @ dup is-codeword-pointer
+    { execution-token-to-entry entry-to-name emitstring }
+    { . } if-else
+    8 + } while newline ;
+
+
+~ (entry pointer --)
+: describe
+  dup is-docol-interpreted-word
+  { describe-docol } { describe-hex } if-else ;
+
+
+: describe-all
+  oldest-entry { dup }
+  { dup describe next-newer-entry } while drop ;
+
+
+~ Log manipulation
+~ ~~~~~~~~~~~~~~~~
+
+~   In general, we're going to want to be able to go on little excursions
+~ where we define utility words that are only useful for one task, then
+~ deallocate that stuff after we're done with it. We implement "forget",
+~ which removes both dictionary entries and heap allocations for the entry
+~ pointer it's given and everything that came after.
+~
+~   The implementation strategy is the same as Jonesforth's version, but
+~ Jonesforth runs in immediate mode and reads a word to operate on, whereas
+~ ours takes an entry pointer and runs in either compiled or immediate
+~ modes.
+~
+~ (entry pointer --)
+: forget dup @ latest ! here ! ;
+
+~ (value --)
+: , here @ swap pack64 here ! ;
+
+~   We'll be defining a lot of immediate words, so we should set up a terse
+~ way to do that.
+: make-immediate latest @ dup entry-flags@ 0x01 | entry-flags! ;
+: make-hidden latest @ dup entry-flags@ 0x80 | entry-flags! ;
+: make-visible latest @ dup entry-flags@ 0x80 invert & entry-flags! ;
+
+~   Sooner or later we'll want to define recursive words; this one lets us
+~ do that. It compiles into a call to the word that's currently being
+~ defined (strictly speaking, the one whose definition was most recently
+~ begun).
+: recurse latest @ entry-to-execution-token , ; make-immediate
+
+~   The word "'", often pronounced "tick", quotes the following word, looking
+~ it up and treating it as a constant. In immediate mode, the constant winds
+~ up on the stack; in compile mode it gets compiled.
+~
+~   There are a few possible implementation strategies here. Running as an
+~ immediate word means there's a clear and unambiguous concept of "the
+~ following word", so that's what we do; otherwise we'd have to get clever
+~ about somehow finding out where we were called from. That means we take on
+~ what would otherwise be the interpreter's responsibility, of checking what
+~ mode we're in. Happily, that's easy to do.
+~
+~   Though it might be nice to have high-level flow control for this, our
+~ implementation of "if" below relies on "'" several times, whereas "'" only
+~ branches once. So we bootstrap "'" first.
+~ : ' word value@ find dropstring-with-result
+~   interpreter-flags @ 1 & 0branch [ 2 8 * , ] literal
+~   ; make-immediate
+
+
+~ High-level flow-control
+~ ~~~~~~~~~~~~~~~~~~~~~~~
+~
+~   We use a novel suffix-based approach to flow control. We define words
+~ { and } which describe the boundaries of blocks of code, leaving a
+~ description on the value stack, while still compiling the contents
+~ normally.
+~
+~   Then follow-up words such as "if" can use that information to slide
+~ the blocks around and insert any needed branches and other logic.
+~
+~   These words get their own file because they of course have very high
+~ importance to bootstrapping, and it's useful to be able to see where they
+~ fall in the list of files.
+~
+~   Both the label transform and the log-load transform go out of their way
+~ to make sure these words work.
+
+
+~ ~ (-- start pointer)
+~ : { here @ ; make-immediate
+~
+~ ~ (start pointer -- start pointer, length)
+~ : } dup here @ swap - ; make-immediate
+~
+~
+~ ~ (start pointer, length --)
+~ : if 2dup swap dup 5 8 * + 3unroll swap
+~
+~ ~ (start pointer, length, adjusted start pointer, start pointer, length)
+~   memmove
+~ ~ (start pointer, length)
+~   swap here @ swap here ! swap
+~ ~ (old here, length)
+~   ' lit entry-to-execution-token , 0 ,
+~   ' != entry-to-execution-token ,
+~ ~   The branch length needs to be one word longer than the block length,
+~ ~ because the length field itself is part of the scope of the branch.
+~   ' 0branch entry-to-execution-token , dup 8 + ,
+~ ~ (old here, length)
+~   drop 5 8 * + here ! ; make-immediate
+~
+~
+~ ~ (start pointer, length --)
+~ : unless 2dup swap dup 5 8 * + 3unroll swap
+~ ~ (start pointer, length, start pointer, adjusted start pointer, length)
+~   memmove
+~ ~ (start pointer, length)
+~   swap here @ swap here ! swap
+~ ~ (old here, length)
+~   ' lit entry-to-execution-token , 0 ,
+~   ' = entry-to-execution-token ,
+~ ~   The branch length needs to be one word longer than the block length,
+~ ~ because the length field itself is part of the scope of the branch.
+~   ' 0branch entry-to-execution-token , dup 8 + ,
+~ ~ (old here, length)
+~   drop 5 8 * + here ! ; make-immediate
+~
+~
+~ ~ (true start, true length, false start, false length --)
+~ : if-else
+~   dup 4 roll dup 5 unroll +
+~ ~
+~ ~   First we slide the false-block forward, then the true-block. We slide
+~ ~ them both directly into their final positions, leaving space at the start
+~ ~ for a test and branch, and space in between for an unconditional branch.
+~ ~ Those spaces will take five words, and two words, respectively. So the
+~ ~ false-block gets moved by seven words, and the true-block gets moved by
+~ ~ five words.
+~   2dup swap dup 7 8 * + swap 3roll memmove
+~   4 roll dup 5 unroll 4 roll dup 5 unroll
+~   swap dup 5 8 * + swap 3roll memmove
+~ ~ (true start, true length, false start, false length)
+~ ~
+~ ~   Now we write out the initial test-and-branch.
+~   4 roll dup 5 unroll here @ 6 unroll here !
+~ ~ (old here, true start, true length, false start, false length)
+~   ' lit entry-to-execution-token , 0 ,
+~   ' != entry-to-execution-token ,
+~ ~   Branch past the length field, the true-block, and the unconditional
+~ ~ branch in the middle.
+~   ' 0branch entry-to-execution-token ,
+~   3roll dup 4 unroll 3 8 * + ,
+~ ~
+~ ~  Next, write out the unconditional branch in the middle.
+~   swap dup 3unroll 5 8 * + here !
+~   ' branch entry-to-execution-token ,
+~ ~  Branch past the length field and the false-block.
+~   dup 8 + ,
+~ ~
+~ ~  Set "here" to point to the true end.
+~   drop drop drop drop 7 8 * + here !
+~   ; make-immediate
+~
+~
+~ ~ (start, length --)
+~ : forever
+~   ' branch entry-to-execution-token , 8 + -1 * , drop
+~   ; make-immediate
+~
+~
+~ ~   This slides the body forward, leaving the test where it is. It puts a
+~ ~ conditional branch in-between them, then appends an unconditional branch
+~ ~ at the end.
+~ ~
+~ ~ (test start, test length, body start, body length --)
+~ : while
+~ ~   The conditional branch needs five words.
+~   2dup swap dup 5 8 * + swap 3roll memmove
+~   here @ 5 unroll swap dup 3unroll here !
+~ ~ (old here, test start, test length, body start, body length)
+~   ' lit entry-to-execution-token , 0 ,
+~   ' != entry-to-execution-token ,
+~ ~ Branch past the length field, the body, and the unconditional branch.
+~   ' 0branch entry-to-execution-token ,
+~   dup 3 8 * + ,
+~ ~ Set "here" to the new end.
+~   5 8 * 6 roll + here !
+~ ~ (test start, test length, body start, body length)
+~ ~   Unconditionally branch backwards past the branch word, the body, the
+~ ~ conditional branch, and the test.
+~   ' branch entry-to-execution-token ,
+~   6 8 * + swap drop + swap drop -1 * ,
+~   ; make-immediate
+~