6 files changed, 1235 insertions, 114 deletions
diff --git a/amd64.e b/amd64.e
index 4ffc64f..e098505 100644
--- a/amd64.e
+++ b/amd64.e
@@ -851,11 +851,21 @@ s" :cc-greater" keyword
 ~ Control flow instructions
 ~ ~~~~~~~~~~~~~~~~~~~~~~~~~
 
+~   Pretend to subtract right from left, and set the flags the same way as if
+~ we actually had.
+~
 ~ (output point, left register, right register -- output point)
 : cmp-reg64-reg64
   3roll rex-w 0x3B pack8 3unroll
   reg64 swap addressing-reg64 ;
 
+~   Pretend to xor left with right, and set the flags the same way as if we
+~ actually had.
+~
+~   The names of the condition codes can be a little confusing when using them
+~ after "test", because they're really premised on the idea that you did
+~ "cmp".
+~
 ~ (output point, left register, right register -- output point)
 : test-reg64-reg64
   3roll rex-w 0x85 pack8 3unroll
diff --git a/core.e b/core.e
index 8c14948..1094e45 100644
--- a/core.e
+++ b/core.e
@@ -384,6 +384,8 @@ here !
     :rax push-reg64
     here ! ] ;asm
 
+~   Is the top of the stack greater than the second item in the stack, when
+~ both are treated as signed?
 : >
   [ here @
     :rax pop-reg64
@@ -394,7 +396,8 @@ here !
     :rax push-reg64
     here ! ] ;asm
 
-~ Is the top of the stack less than the second item in the stack?
+~   Is the top of the stack less than the second item in the stack, when both
+~ are treated as signed?
 : <
   [ here @
     :rax pop-reg64
@@ -405,6 +408,8 @@ here !
     :rax push-reg64
     here ! ] ;asm
 
+~   Is the top of the stack greater than or equal to the second item in the
+~ stack, when both are treated as signed?
 : >=
   [ here @
     :rax pop-reg64
@@ -415,6 +420,8 @@ here !
     :rax push-reg64
     here ! ] ;asm
 
+~   Is the top of the stack less than or equal to the second item in the
+~ stack, when both are treated as signed?
 : <=
   [ here @
     :rax pop-reg64
@@ -425,6 +432,8 @@ here !
     :rax push-reg64
     here ! ] ;asm
 
+~   Is the top of the stack greater than the second item in the stack, when
+~ both are treated as unsigned?
 : >unsigned
   [ here @
     :rax pop-reg64
@@ -435,6 +444,8 @@ here !
     :rax push-reg64
     here ! ] ;asm
 
+~   Is the top of the stack less than the second item in the stack, when
+~ both are treated as unsigned?
 : <unsigned
   [ here @
     :rax pop-reg64
@@ -445,6 +456,8 @@ here !
     :rax push-reg64
     here ! ] ;asm
 
+~   Is the top of the stack greater than or equal to the second item in the
+~ stack, when both are treated as unsigned?
 : >=unsigned
   [ here @
     :rax pop-reg64
@@ -455,6 +468,8 @@ here !
     :rax push-reg64
     here ! ] ;asm
 
+~   Is the top of the stack less than or equal to the second item in the
+~ stack, when both are treated as unsigned?
 : <=unsigned
   [ here @
     :rax pop-reg64
@@ -465,6 +480,7 @@ here !
     :rax push-reg64
     here ! ] ;asm
 
+
 ~ Bitwise routines
 ~ ~~~~~~~~~~~~~~~~
 ~
diff --git a/evoke.e b/evoke.e
index 9bc6979..4252e91 100644
--- a/evoke.e
+++ b/evoke.e
@@ -1,6 +1,7 @@
 ~ (cat labels.e elf.e transform.e execution.e \
 ~  echo 131072 read-to-buffer; \
-~  cat core.e core-plus.e linux.e amd64.e execution-support.e log-load.e; \
+~  cat core.e core-plus.e linux.e output.e \
+~      amd64.e execution-support.e log-load.e; \
 ~  echo pyrzqxgl; \
 ~  cat evoke.e) \
 ~     | ./quine > evoke && chmod 755 evoke && ./evoke
@@ -9,6 +10,8 @@ s" source-to-precompile" variable
 
 ~ : fooze 4 . ; fooze
 1024 read-to-buffer
+~   We begin by outputting the actual docol routine, the one that codewords
+~ should point to. Note that this is before we've done any word header.
 here @
 dup L!' docol-codeword-value
 :rsi pack-pushcontrol
@@ -18,20 +21,37 @@ pack-next
 8 packalign
 here !
 
-: docol
-  [ here @
-    L@' docol-codeword-value :rax mov-reg64-imm64
-    :rax push-reg64
-    here ! ] ;asm
+~   We can't use colon to create docol, not even the part that's a word,
+~ because colon tries to dynamically invoke docol to fill in the codeword.
+s" docol" create
+here @
+dup 8 + pack64
+L@' docol-codeword-value :rax mov-reg64-imm64
+:rax push-reg64
+pack-next
+8 packalign
+here !
+
 
-: exit
-  [ here @
-    :rsi pack-popcontrol
-    here ! ] ;asm
+s" exit" create
+here @
+dup 8 + pack64
+:rsi pack-popcontrol
+here !
+
+: foo [ crash 1 2 + ;
 
-: foo 1 2 + ;
 0 sys-exit
+
+~   Now that we have docol, colon will work and we can define exit the normal
+~ way.
+~ : exit
+~   [ here @
+~     :rsi pack-popcontrol
+~     here ! ] ;asm
+
 pyrzqxgl
+~ 0 sys-exit
 s" source-to-copy-to-log" variable
 
 ~ (output memory start, current output point
diff --git a/execution-support.e b/execution-support.e
index b668157..67f9162 100644
--- a/execution-support.e
+++ b/execution-support.e
@@ -95,11 +95,13 @@
 ~
 ~ * rbp points to the top of the control stack.
 ~
+~ TODO can this be correct? it seems backwards?
 ~ (source register, base address -- new base address)
 : pack-pushcontrol
   swap :rbp -8 :rbp lea-reg64-disp8-reg64
   swap :rbp 0 mov-disp8-reg64-reg64 ;
 
+~ TODO can this be correct? it seems backwards?
 ~ (target register, base address -- new base address)
 : pack-popcontrol
   :rbp 0 3roll mov-reg64-disp8-reg64
diff --git a/output.e b/output.e
new file mode 100644
index 0000000..29789b6
--- /dev/null
+++ b/output.e
@@ -0,0 +1,218 @@
+~ Output
+~ ~~~~~~
+~
+~   It is convenient to be able to output text. This depends on the
+~ OS-specific stuff in linux.e, so it's in its own file.
+~
+~   The most interesting word we define here is ".", pronounced "dot"; the
+~ math stuff is needed to implement it.
+~
+~   Unlike Jonesforth, we do not have a global "base" variable, and this word
+~ does not change its behavior depending on that value. It's always base ten.
+~
+~ TODO surely we can find some way to get high-level flow-control
+
+
+~   Strings are comparatively easy.
+~
+~ (string pointer --)
+: emitstring dup stringlen swap sys-write ;
+
+: space 32 value@ emitstring drop ;
+: newline 10 value@ emitstring drop ;
+
+
+~ (base, exponent -- base to the power of exponent)
+: pow
+  1 swap
+
+  ~ If the count of remaining powers is NOT equal to zero, the comparison will
+  ~ return 0, which will cause the zbranch to skip the rest of the line.
+  dup 0 = 0branch [ 5 8 * , ] drop swap drop exit
+  ~ (base, result so far, count of remaining powers)
+
+  1 - 3unroll
+  ~ (updated count of remaining powers, base, result so far)
+
+  swap dup 4 unroll
+  ~ (base, updated count of remaining powers, result so far, base)
+
+  * swap
+  ~ (base, updated result so far, updated count of remaining powers)
+
+  branch [ -22 8 * , ] ;
+
+
+~ (base, value -- floor of logarithm of value in base base)
+: logfloor
+  ~ Start with a product equal to the base, and a count of 0.
+  swap dup 3unroll 0
+
+  ~ This is the start of the loop body.
+  ~ (base, value, product so far, count of powers included so far)
+  3unroll 2dup
+  ~ (base, count so far, value, product so far)
+  >unsigned 0branch [ 6 8 * , ]
+
+  ~ If we get here, we're done.
+  ~ (base, count so far, value, product so far)
+  drop drop swap drop exit
+
+  ~ If we're here, we need to do another loop.
+  ~ (base, count so far, value, product so far)
+  4 roll dup 5 unroll * 3roll 1 +
+  ~ (base, value, updated product so far, updated count so far)
+
+  ~ If the product is less than the base, we overflowed. In that case, the
+  ~ product-so-far is the maximum, so just return it.
+  4 roll dup 5 unroll 3roll dup 4 unroll
+  < 0branch [ 8 8 * , ]
+  4 unroll drop drop drop exit
+
+  ~ Nothing else weird going on, so loop.
+  branch [ -45 8 * , ] ;
+
+
+~ (base, value -- ceiling of logarithm of value in base base)
+: logceil
+  ~ Start with a product of 1 and a count of 0.
+  1 0
+
+  ~ This is the start of the loop body.
+  ~ (base, value, product so far, count of powers included so far)
+  3unroll 2dup
+  ~ (base, count so far, value, product so far)
+  >=unsigned 0branch [ 6 8 * , ]
+
+  ~ If we get here, we're done.
+  ~ (base, count so far, value, product so far)
+  drop drop swap drop exit
+
+  ~ If we're here, we need to do another loop.
+  ~ (base, count so far, value, product so far)
+  4 roll dup 5 unroll * 3roll 1 +
+  ~ (base, value, updated product so far, updated count so far)
+
+  ~ If the product is less than the base, we overflowed. In that case, the
+  ~ product-so-far is the maximum, so just return it.
+  4 roll dup 5 unroll 3roll dup 4 unroll
+  < 0branch [ 8 8 * , ]
+  4 unroll drop drop drop exit
+
+  ~ Nothing else weird going on, so loop.
+  branch [ -45 8 * , ] ;
+
+
+~   This is an extremely inefficient implementation, but on the plus side,
+~ doing that avoids having to think about any sort of memory management or
+~ recursion, and lets us stick entirely with the trivial control-flow
+~ constructs we already have.
+~
+~ (integer to print, base to print in, width to zero-pad to --)
+: .base-unsigned
+  ~ (input, base, width)
+  ~ Compute how many digits we need to display. Because we use logfloor, the
+  ~ logic of always printing at least one digit is already handled for us.
+  3unroll swap 2dup logfloor 1 +
+  ~ (width, base, input, number of digits if no padding)
+  4 roll 2dup > 0branch [ 5 8 * , ]
+  ~ (base, input, number of digits if no padding, width)
+  ~ If we're here, we should use the padded width
+  swap drop branch [ 2 8 * , ]
+  ~ (base, input, number of digits if no padding, width)
+  ~ If we're here, we should use the unpadded width
+  drop
+
+  ~ (base, input, number of digits remaining)
+  ~ This is the start of the loop.
+  2dup 1 -
+  ~ (base, input, number of digits remaining, input, intermediate value)
+  5 roll dup 6 unroll swap
+  ~ (base, input, number of digits remaining, input, base, intermediate value)
+  pow /% swap drop
+  ~ (base, input, number of digits remaining,
+  ~  input divided by base^x appropriately)
+  4 roll dup 5 unroll
+  ~ (base, input, number of digits remaining,
+  ~  input divided by base^x appropriately, base)
+  /% drop
+  ~ (base, input, number of digits remaining, current digit)
+
+  ~   We construct a one-character string on the stack, then use a pointer to
+  ~ it. It will always contain its own null-termination without us having to
+  ~ do anything special to that end.
+  dup 10 > 0branch [ 5 8 * , ]
+  0x30 branch [ 6 8 * , ]                        ~ ASCII "0"
+  10 - 0x61                                      ~ ASCII "a"
+  +
+  value@ emitstring
+
+  ~ We deallocate the string by dropping it.
+  ~
+  ~ Saying it like that makes it sound obvious; contemplate it until it feels
+  ~ surprising.
+  drop
+
+  1 -
+  dup 0branch [ 3 8 * , ] branch [ -51 8 * , ]
+  drop drop drop ;
+
+
+~ (integer to print, base to print in --)
+: .base
+  swap
+  ~ (base, input)
+
+  ~ Deal with negative numbers.
+  dup 0 > 0branch [ 7 8 * , ]
+  -1 *
+  s" -" emitstring
+
+  swap 0 .base-unsigned ;
+
+
+~   Although this could notionally be called emitinteger for symmetry, it's
+~ well-known under the name dot and as a single period character, and that
+~ name participates in conventions for names of other things. So, we go with
+~ it.
+~
+~ (integer to print --)
+: . 10 .base ;
+: .hex 16 0 .base-unsigned ;
+: .hex8 16 2 .base-unsigned ;
+: .hex16 16 4 .base-unsigned ;
+: .hex32 16 8 .base-unsigned ;
+: .hex64 16 16 .base-unsigned ;
+
+~ (integer to print, width --)
+: .hexn 16 swap .base-unsigned ;
+
+
+~ Debugging tools
+~ ~~~~~~~~~~~~~~~
+
+~ TODO this is a horrible, horrible hack
+: s0 0x1000010008 ;
+
+~ TODO replace these with the implementations that use proper flow-control
+: stack
+  s0 @ 8 -
+
+  dup value@ 8 + !=
+  0branch [ 19 8 * , ]
+  dup s0 @ 8 - != 0branch [ 2 8 * , ] space dup @ . 8 -
+  branch [ -25 8 * , ]
+
+  drop newline ;
+
+
+: stackhex
+  s0 @ 8 -
+
+  dup value@ 8 + !=
+  0branch [ 19 8 * , ]
+  dup s0 @ 8 - != 0branch [ 2 8 * , ] space dup @ .hex64 8 -
+  branch [ -25 8 * , ]
+
+  drop newline ;
+
diff --git a/transform.e b/transform.e
index 2516e53..581cb0b 100644
--- a/transform.e
+++ b/transform.e
@@ -245,11 +245,13 @@
 : transform-state-saved-here ;
 : transform-state-saved-latest 8 + ;
 : transform-state-output-buffer-start 2 8 * + ;
+: transform-state-user-stack-depth 3 8 * + ;
 : allocate-transform-state
-  3 8 * allocate
+  4 8 * allocate
   dup transform-state-saved-here 0 swap !
   dup transform-state-saved-latest 0 swap !
-  dup transform-state-output-buffer-start 0 swap ! ;
+  dup transform-state-output-buffer-start 0 swap !
+  dup transform-state-user-stack-depth 0 swap ! ;
 allocate-transform-state s" transform-state" variable
 
 
@@ -310,14 +312,410 @@ allocate-transform-state s" transform-state" variable
 
 
 : describe-transformation
-  ."    active here " here @ .hex64 space ." latest " latest @ .hex64 newline
-  ."     saved here " transform-state transform-state-saved-here
+  ."        active here " here @ .hex64 space
+  ." latest " latest @ .hex64 newline
+  ."         saved here " transform-state transform-state-saved-here
   @ .hex64 space
   ." latest " transform-state transform-state-saved-latest @ .hex64 newline
-  ."   output start " transform-state transform-state-output-buffer-start
+  ."       output start " transform-state transform-state-output-buffer-start
+  @ .hex64 newline
+  ."   user stack depth " transform-state transform-state-user-stack-depth
   @ .hex64 newline ;
 
 
+~ User stack depth tracking helper
+~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~
+~   The transformation facility offers a feature which specific transforms can
+~ use, or not, as they prefer, called user stack depth tracking. The log-load
+~ transform uses it; the label transform does not. The most difficult part of
+~ the tracking is the heler word below; it also relies on the user-stack-depth
+~ field of the transform-state variable.
+~
+~   What does user stack depth mean? Well, for every transform there's some
+~ notion of running a word "immediately"; for the log-load transform, it means
+~ that the word runs at the time the generated log-load routine executes. Each
+~ time a word runs, it interacts with the value stack in some way. Some words
+~ are neutral; others result in a net increase or decrease in its depth, which
+~ we call the delta. The running total of these deltas is what we call the
+~ user stack depth. By "user" we mean that these are values which were created
+~ by and for the code being transformed, which the code doing the transforming
+~ must be careful not to interfere with.
+~
+~   Why do we need to do this? Recall that the log-load routine is given the
+~ log address as the only value on the stack when it begins, and any time it
+~ does something that requires looking at the log, it needs to be able to find
+~ that address. The log-load routine runs as part of the warm-start routine
+~ (see execution.e), whose overall job is to get Evocation into a state where
+~ it can freely work with dynamically-allocated data on the log whose address
+~ isn't known at compile time. Therefore, necessarily, the log-load routine
+~ itself cannot assume access to any scratch space; the value stack is the
+~ only place it can store anything it needs, most crucially including the log
+~ address.
+~
+~   Fortunately, it is possible for the transform's own generated code to
+~ cooperate with the transformed code so that they can both share the stack,
+~ as long as the log transform knows what the transformed code is doing with
+~ it. Unfortunately, there's no general way to know that. We achieve it by
+~ hard-coding a delta value for every single possible word, and looking up
+~ that delta at transform-time, based on the word's name. Yes, that is quite a
+~ thing to have to do, but it does mean we don't have to think about this
+~ while writing the code we're transforming.
+~
+~   It's worth noticing that the delta only applies to things that happen
+~ immediately. It does not apply to compiled words because they run later, and
+~ it does not apply to alternates because they run too soon. Feel free to
+~ contemplate how confusing it is that transformation-time is sooner than
+~ "immediate".
+~
+~ (name pointer -- delta value)
+: transform-get-stack-delta
+  ~ From core.e.
+  dup s" docol" stringcmp 0 = { drop 1 exit } if
+  dup s" exit" stringcmp 0 = { drop 0 exit } if
+  dup s" swap" stringcmp 0 = { drop 0 exit } if
+  dup s" drop" stringcmp 0 = { drop -1 exit } if
+  dup s" 2drop" stringcmp 0 = { drop -2 exit } if
+  ~ The following are deliberate omissions: roll, unroll.
+  dup s" 3roll" stringcmp 0 = { drop 0 exit } if
+  dup s" 3unroll" stringcmp 0 = { drop 0 exit } if
+  dup s" dup" stringcmp 0 = { drop 1 exit } if
+  dup s" 2dup" stringcmp 0 = { drop 2 exit } if
+  dup s" +" stringcmp 0 = { drop -1 exit } if
+  dup s" -" stringcmp 0 = { drop -1 exit } if
+  dup s" *" stringcmp 0 = { drop -1 exit } if
+  dup s" /%" stringcmp 0 = { drop 0 exit } if
+  dup s" =" stringcmp 0 = { drop -1 exit } if
+  dup s" !=" stringcmp 0 = { drop -1 exit } if
+  dup s" >" stringcmp 0 = { drop -1 exit } if
+  dup s" <" stringcmp 0 = { drop -1 exit } if
+  dup s" >=" stringcmp 0 = { drop -1 exit } if
+  dup s" <=" stringcmp 0 = { drop -1 exit } if
+  dup s" >unsigned" stringcmp 0 = { drop -1 exit } if
+  dup s" <unsigned" stringcmp 0 = { drop -1 exit } if
+  dup s" >=unsigned" stringcmp 0 = { drop -1 exit } if
+  dup s" <=unsigned" stringcmp 0 = { drop -1 exit } if
+  dup s" &" stringcmp 0 = { drop -1 exit } if
+  dup s" |" stringcmp 0 = { drop -1 exit } if
+  dup s" xor" stringcmp 0 = { drop -1 exit } if
+  dup s" invert" stringcmp 0 = { drop 0 exit } if
+  dup s" lit" stringcmp 0 = { drop 1 exit } if
+  dup s" litstring" stringcmp 0 = { drop 1 exit } if
+  dup s" !" stringcmp 0 = { drop -2 exit } if
+  dup s" @" stringcmp 0 = { drop 0 exit } if
+  dup s" +!" stringcmp 0 = { drop -2 exit } if
+  dup s" -!" stringcmp 0 = { drop -2 exit } if
+  dup s" 8!" stringcmp 0 = { drop -2 exit } if
+  dup s" 8@" stringcmp 0 = { drop 0 exit } if
+  dup s" 16!" stringcmp 0 = { drop -2 exit } if
+  dup s" 16@" stringcmp 0 = { drop 0 exit } if
+  dup s" 32!" stringcmp 0 = { drop -2 exit } if
+  dup s" 32@" stringcmp 0 = { drop 0 exit } if
+  dup s" control!" stringcmp 0 = { drop -1 exit } if
+  dup s" control@" stringcmp 0 = { drop 1 exit } if
+  dup s" value!" stringcmp 0 = { drop -1 exit } if
+  dup s" value@" stringcmp 0 = { drop 1 exit } if
+  dup s" memcopy" stringcmp 0 = { drop -3 exit } if
+  dup s" memmove" stringcmp 0 = { drop -3 exit } if
+  dup s" stringlen" stringcmp 0 = { drop 0 exit } if
+  dup s" reverse-stringlen" stringcmp 0 = { drop 0 exit } if
+  dup s" reverse-padding-len" stringcmp 0 = { drop 0 exit } if
+  dup s" stringcmp" stringcmp 0 = { drop -1 exit } if
+  dup s" branch" stringcmp 0 = { drop 0 exit } if
+  dup s" 0branch" stringcmp 0 = { drop 0 exit } if
+  ~ This is kind of a big assumption.
+  dup s" execute" stringcmp 0 = { drop -1 exit } if
+  dup s" entry-to-execution-token" stringcmp 0 = { drop 0 exit } if
+  dup s" execution-token-to-entry" stringcmp 0 = { drop 0 exit } if
+  dup s" entry-flags@" stringcmp 0 = { drop 0 exit } if
+  dup s" entry-flags!" stringcmp 0 = { drop -2 exit } if
+  dup s" entry-to-name" stringcmp 0 = { drop 0 exit } if
+  dup s" pack64" stringcmp 0 = { drop -1 exit } if
+  dup s" pack32" stringcmp 0 = { drop -1 exit } if
+  dup s" pack16" stringcmp 0 = { drop -1 exit } if
+  dup s" pack8" stringcmp 0 = { drop -1 exit } if
+  dup s" packstring" stringcmp 0 = { drop -1 exit } if
+  dup s" packalign" stringcmp 0 = { drop -1 exit } if
+  dup s" unpack64" stringcmp 0 = { drop 1 exit } if
+  dup s" unpack32" stringcmp 0 = { drop 1 exit } if
+  dup s" unpack16" stringcmp 0 = { drop 1 exit } if
+  dup s" unpack8" stringcmp 0 = { drop 1 exit } if
+  dup s" align-size" stringcmp 0 = { drop -1 exit } if
+  dup s" unpackalign" stringcmp 0 = { drop -1 exit } if
+  dup s" crash" stringcmp 0 = { drop 0 exit } if
+
+  ~ From core-plus.e.
+  dup s" 1-" stringcmp 0 = { drop 0 exit } if
+  dup s" 1+" stringcmp 0 = { drop 0 exit } if
+  dup s" max" stringcmp 0 = { drop -1 exit } if
+  dup s" min" stringcmp 0 = { drop -1 exit } if
+  dup s" over" stringcmp 0 = { drop 1 exit } if
+  dup s" pick" stringcmp 0 = { drop 0 exit } if
+  ~ The following are deliberate omissions: ndrop, ndup.
+  dup s" 3drop" stringcmp 0 = { drop -3 exit } if
+  dup s" 3dup" stringcmp 0 = { drop 3 exit } if
+  dup s" &&" stringcmp 0 = { drop -1 exit } if
+  dup s" ||" stringcmp 0 = { drop -1 exit } if
+  dup s" not" stringcmp 0 = { drop 0 exit } if
+  dup s" negate" stringcmp 0 = { drop 0 exit } if
+  dup s" align-floor" stringcmp 0 = { drop -1 exit } if
+
+  ~ From linux.e.
+  dup s" sys-exit" stringcmp 0 = { drop -1 exit } if
+  dup s" sys-write" stringcmp 0 = { drop -2 exit } if
+  dup s" sys-read" stringcmp 0 = { drop -1 exit } if
+
+  ~ From output.e.
+  dup s" emitstring" stringcmp 0 = { drop -1 exit } if
+  dup s" space" stringcmp 0 = { drop 0 exit } if
+  dup s" newline" stringcmp 0 = { drop 0 exit } if
+  dup s" pow" stringcmp 0 = { drop 2 exit } if
+  dup s" logfloor" stringcmp 0 = { drop 2 exit } if
+  dup s" logceil" stringcmp 0 = { drop 2 exit } if
+  dup s" .base-unsigned" stringcmp 0 = { drop -3 exit } if
+  dup s" .base" stringcmp 0 = { drop -2 exit } if
+  dup s" ." stringcmp 0 = { drop -1 exit } if
+  dup s" .hex" stringcmp 0 = { drop -1 exit } if
+  dup s" .hex8" stringcmp 0 = { drop -1 exit } if
+  dup s" .hex16" stringcmp 0 = { drop -1 exit } if
+  dup s" .hex32" stringcmp 0 = { drop -1 exit } if
+  dup s" .hex64" stringcmp 0 = { drop -1 exit } if
+  dup s" .hexn" stringcmp 0 = { drop -2 exit } if
+  ~ The following is a deliberate omission: s0.
+  dup s" stack" stringcmp 0 = { drop 0 exit } if
+  dup s" stackhex" stringcmp 0 = { drop 0 exit } if
+
+  ~ From amd64.e.
+  dup s" :rax" stringcmp 0 = { drop 1 exit } if
+  dup s" :rcx" stringcmp 0 = { drop 1 exit } if
+  dup s" :rdx" stringcmp 0 = { drop 1 exit } if
+  dup s" :rbx" stringcmp 0 = { drop 1 exit } if
+  dup s" :rsp" stringcmp 0 = { drop 1 exit } if
+  dup s" :rbp" stringcmp 0 = { drop 1 exit } if
+  dup s" :rsi" stringcmp 0 = { drop 1 exit } if
+  dup s" :rdi" stringcmp 0 = { drop 1 exit } if
+  dup s" :r8" stringcmp 0 = { drop 1 exit } if
+  dup s" :r9" stringcmp 0 = { drop 1 exit } if
+  dup s" :r10" stringcmp 0 = { drop 1 exit } if
+  dup s" :r11" stringcmp 0 = { drop 1 exit } if
+  dup s" :r12" stringcmp 0 = { drop 1 exit } if
+  dup s" :r13" stringcmp 0 = { drop 1 exit } if
+  dup s" :r14" stringcmp 0 = { drop 1 exit } if
+  dup s" :r15" stringcmp 0 = { drop 1 exit } if
+  dup s" :eax" stringcmp 0 = { drop 1 exit } if
+  dup s" :ecx" stringcmp 0 = { drop 1 exit } if
+  dup s" :edx" stringcmp 0 = { drop 1 exit } if
+  dup s" :ebx" stringcmp 0 = { drop 1 exit } if
+  dup s" :esp" stringcmp 0 = { drop 1 exit } if
+  dup s" :ebp" stringcmp 0 = { drop 1 exit } if
+  dup s" :esi" stringcmp 0 = { drop 1 exit } if
+  dup s" :edi" stringcmp 0 = { drop 1 exit } if
+  dup s" :ax" stringcmp 0 = { drop 1 exit } if
+  dup s" :cx" stringcmp 0 = { drop 1 exit } if
+  dup s" :dx" stringcmp 0 = { drop 1 exit } if
+  dup s" :bx" stringcmp 0 = { drop 1 exit } if
+  dup s" :sp" stringcmp 0 = { drop 1 exit } if
+  dup s" :bp" stringcmp 0 = { drop 1 exit } if
+  dup s" :si" stringcmp 0 = { drop 1 exit } if
+  dup s" :di" stringcmp 0 = { drop 1 exit } if
+  dup s" :al" stringcmp 0 = { drop 1 exit } if
+  dup s" :cl" stringcmp 0 = { drop 1 exit } if
+  dup s" :dl" stringcmp 0 = { drop 1 exit } if
+  dup s" :bl" stringcmp 0 = { drop 1 exit } if
+  dup s" :ah" stringcmp 0 = { drop 1 exit } if
+  dup s" :ch" stringcmp 0 = { drop 1 exit } if
+  dup s" :dh" stringcmp 0 = { drop 1 exit } if
+  dup s" :bh" stringcmp 0 = { drop 1 exit } if
+  dup s" :cc-overflow" stringcmp 0 = { drop 1 exit } if
+  dup s" :cc-no-overflow" stringcmp 0 = { drop 1 exit } if
+  dup s" :cc-below" stringcmp 0 = { drop 1 exit } if
+  dup s" :cc-above-equal" stringcmp 0 = { drop 1 exit } if
+  dup s" :cc-equal" stringcmp 0 = { drop 1 exit } if
+  dup s" :cc-not-equal" stringcmp 0 = { drop 1 exit } if
+  dup s" :cc-below-equal" stringcmp 0 = { drop 1 exit } if
+  dup s" :cc-above" stringcmp 0 = { drop 1 exit } if
+  dup s" :cc-sign" stringcmp 0 = { drop 1 exit } if
+  dup s" :cc-not-sign" stringcmp 0 = { drop 1 exit } if
+  dup s" :cc-even" stringcmp 0 = { drop 1 exit } if
+  dup s" :cc-odd" stringcmp 0 = { drop 1 exit } if
+  dup s" :cc-less" stringcmp 0 = { drop 1 exit } if
+  dup s" :cc-greater-equal" stringcmp 0 = { drop 1 exit } if
+  dup s" :cc-less-equal" stringcmp 0 = { drop 1 exit } if
+  dup s" :cc-greater" stringcmp 0 = { drop 1 exit } if
+  dup s" reg64" stringcmp 0 = { drop 0 exit } if
+  dup s" extrareg64" stringcmp 0 = { drop 0 exit } if
+  dup s" reg32" stringcmp 0 = { drop 0 exit } if
+  dup s" reg16" stringcmp 0 = { drop 0 exit } if
+  dup s" reg8" stringcmp 0 = { drop 0 exit } if
+  dup s" scalefield" stringcmp 0 = { drop 0 exit } if
+  dup s" condition-code" stringcmp 0 = { drop 0 exit } if
+  dup s" rex-0" stringcmp 0 = { drop 0 exit } if
+  dup s" rex-w" stringcmp 0 = { drop 0 exit } if
+  dup s" rex-r" stringcmp 0 = { drop 0 exit } if
+  dup s" rex-x" stringcmp 0 = { drop 0 exit } if
+  dup s" rex-b" stringcmp 0 = { drop 0 exit } if
+  dup s" rex-wr" stringcmp 0 = { drop 0 exit } if
+  dup s" rex-wx" stringcmp 0 = { drop 0 exit } if
+  dup s" rex-wb" stringcmp 0 = { drop 0 exit } if
+  dup s" rex-rx" stringcmp 0 = { drop 0 exit } if
+  dup s" rex-rb" stringcmp 0 = { drop 0 exit } if
+  dup s" rex-xb" stringcmp 0 = { drop 0 exit } if
+  dup s" rex-wrx" stringcmp 0 = { drop 0 exit } if
+  dup s" rex-wrb" stringcmp 0 = { drop 0 exit } if
+  dup s" rex-wxb" stringcmp 0 = { drop 0 exit } if
+  dup s" rex-rxb" stringcmp 0 = { drop 0 exit } if
+  dup s" rex-wrxb" stringcmp 0 = { drop 0 exit } if
+  dup s" opcodereg" stringcmp 0 = { drop -2 exit } if
+  dup s" opcodecc" stringcmp 0 = { drop -2 exit } if
+  dup s" modrm" stringcmp 0 = { drop -3 exit } if
+  dup s" sib" stringcmp 0 = { drop -3 exit } if
+  dup s" addressing-reg64" stringcmp 0 = { drop -2 exit } if
+  dup s" addressing-reg8" stringcmp 0 = { drop -2 exit } if
+  dup s" addressing-indirect-reg64" stringcmp 0 = { drop -2 exit } if
+  dup s" addressing-disp8-reg64" stringcmp 0 = { drop -3 exit } if
+  dup s" addressing-disp32-reg64" stringcmp 0 = { drop -3 exit } if
+  dup s" addressing-indexed-reg64" stringcmp 0 = { drop -4 exit } if
+  dup s" addressing-disp8-indexed-reg64" stringcmp 0 = { drop -5 exit } if
+  dup s" cld" stringcmp 0 = { drop 0 exit } if
+  dup s" std" stringcmp 0 = { drop 0 exit } if
+  dup s" syscall" stringcmp 0 = { drop 0 exit } if
+  dup s" hlt" stringcmp 0 = { drop 0 exit } if
+  dup s" push-reg64" stringcmp 0 = { drop -1 exit } if
+  dup s" pop-reg64" stringcmp 0 = { drop -1 exit } if
+  dup s" push-imm32-extended64" stringcmp 0 = { drop -1 exit } if
+  dup s" lea-reg64-disp8-reg64" stringcmp 0 = { drop -3 exit } if
+  dup s" lea-reg64-disp32-reg64" stringcmp 0 = { drop -3 exit } if
+  dup s" lea-reg64-indexed-reg64" stringcmp 0 = { drop -4 exit } if
+  dup s" lea-reg64-disp8-indexed-reg64" stringcmp 0 = { drop -5 exit } if
+  dup s" mov-reg64-imm32" stringcmp 0 = { drop -2 exit } if
+  dup s" mov-reg64-imm64" stringcmp 0 = { drop -2 exit } if
+  dup s" mov-extrareg64-imm64" stringcmp 0 = { drop -2 exit } if
+  dup s" mov-reg64-reg64" stringcmp 0 = { drop -2 exit } if
+  dup s" mov-indirect-reg64-reg64" stringcmp 0 = { drop -2 exit } if
+  dup s" mov-disp8-reg64-reg64" stringcmp 0 = { drop -3 exit } if
+  dup s" mov-reg64-indirect-reg64" stringcmp 0 = { drop -2 exit } if
+  dup s" mov-reg64-disp8-reg64" stringcmp 0 = { drop -3 exit } if
+  dup s" mov-reg64-disp32-reg64" stringcmp 0 = { drop -3 exit } if
+  dup s" mov-reg64-indexed-reg64" stringcmp 0 = { drop -4 exit } if
+  dup s" mov-indexed-reg64-reg64" stringcmp 0 = { drop -4 exit } if
+  dup s" mov-indirect-reg64-reg32" stringcmp 0 = { drop -2 exit } if
+  dup s" mov-disp8-reg64-reg32" stringcmp 0 = { drop -3 exit } if
+  dup s" mov-reg32-indirect-reg64" stringcmp 0 = { drop -2 exit } if
+  dup s" mov-reg32-disp8-reg64" stringcmp 0 = { drop -3 exit } if
+  dup s" mov-indirect-reg64-reg16" stringcmp 0 = { drop -2 exit } if
+  dup s" mov-disp8-reg64-reg16" stringcmp 0 = { drop -3 exit } if
+  dup s" mov-reg16-indirect-reg64" stringcmp 0 = { drop -2 exit } if
+  dup s" mov-reg16-disp8-reg64" stringcmp 0 = { drop -3 exit } if
+  dup s" mov-indirect-reg64-reg8" stringcmp 0 = { drop -2 exit } if
+  dup s" mov-disp8-reg64-reg8" stringcmp 0 = { drop -3 exit } if
+  dup s" mov-reg8-indirect-reg64" stringcmp 0 = { drop -2 exit } if
+  dup s" mov-reg8-disp8-reg64" stringcmp 0 = { drop -3 exit } if
+  dup s" mov-reg8-reg8" stringcmp 0 = { drop -2 exit } if
+  dup s" movs8" stringcmp 0 = { drop 0 exit } if
+  dup s" movs16" stringcmp 0 = { drop 0 exit } if
+  dup s" movs32" stringcmp 0 = { drop 0 exit } if
+  dup s" movs64" stringcmp 0 = { drop 0 exit } if
+  dup s" rep-movs8" stringcmp 0 = { drop 0 exit } if
+  dup s" rep-movs16" stringcmp 0 = { drop 0 exit } if
+  dup s" rep-movs32" stringcmp 0 = { drop 0 exit } if
+  dup s" rep-movs64" stringcmp 0 = { drop 0 exit } if
+  dup s" lods8" stringcmp 0 = { drop 0 exit } if
+  dup s" lods16" stringcmp 0 = { drop 0 exit } if
+  dup s" lods32" stringcmp 0 = { drop 0 exit } if
+  dup s" lods64" stringcmp 0 = { drop 0 exit } if
+  dup s" rep-lods8" stringcmp 0 = { drop 0 exit } if
+  dup s" rep-lods16" stringcmp 0 = { drop 0 exit } if
+  dup s" rep-lods32" stringcmp 0 = { drop 0 exit } if
+  dup s" rep-lods64" stringcmp 0 = { drop 0 exit } if
+  dup s" stos8" stringcmp 0 = { drop 0 exit } if
+  dup s" stos16" stringcmp 0 = { drop 0 exit } if
+  dup s" stos32" stringcmp 0 = { drop 0 exit } if
+  dup s" stos64" stringcmp 0 = { drop 0 exit } if
+  dup s" rep-stos8" stringcmp 0 = { drop 0 exit } if
+  dup s" rep-stos16" stringcmp 0 = { drop 0 exit } if
+  dup s" rep-stos32" stringcmp 0 = { drop 0 exit } if
+  dup s" rep-stos64" stringcmp 0 = { drop 0 exit } if
+  dup s" cmps8" stringcmp 0 = { drop 0 exit } if
+  dup s" cmps16" stringcmp 0 = { drop 0 exit } if
+  dup s" cmps32" stringcmp 0 = { drop 0 exit } if
+  dup s" cmps64" stringcmp 0 = { drop 0 exit } if
+  dup s" repz-cmps8" stringcmp 0 = { drop 0 exit } if
+  dup s" repz-cmps16" stringcmp 0 = { drop 0 exit } if
+  dup s" repz-cmps32" stringcmp 0 = { drop 0 exit } if
+  dup s" repz-cmps64" stringcmp 0 = { drop 0 exit } if
+  dup s" repnz-cmps8" stringcmp 0 = { drop 0 exit } if
+  dup s" repnz-cmps16" stringcmp 0 = { drop 0 exit } if
+  dup s" repnz-cmps32" stringcmp 0 = { drop 0 exit } if
+  dup s" repnz-cmps64" stringcmp 0 = { drop 0 exit } if
+  dup s" scas8" stringcmp 0 = { drop 0 exit } if
+  dup s" scas16" stringcmp 0 = { drop 0 exit } if
+  dup s" scas32" stringcmp 0 = { drop 0 exit } if
+  dup s" scas64" stringcmp 0 = { drop 0 exit } if
+  dup s" repz-scas8" stringcmp 0 = { drop 0 exit } if
+  dup s" repz-scas16" stringcmp 0 = { drop 0 exit } if
+  dup s" repz-scas32" stringcmp 0 = { drop 0 exit } if
+  dup s" repz-scas64" stringcmp 0 = { drop 0 exit } if
+  dup s" repnz-scas8" stringcmp 0 = { drop 0 exit } if
+  dup s" repnz-scas16" stringcmp 0 = { drop 0 exit } if
+  dup s" repnz-scas32" stringcmp 0 = { drop 0 exit } if
+  dup s" repnz-scas64" stringcmp 0 = { drop 0 exit } if
+  dup s" add-reg64-reg64" stringcmp 0 = { drop -2 exit } if
+  dup s" add-indirect-reg64-reg64" stringcmp 0 = { drop -2 exit } if
+  dup s" add-reg64-indirect-reg64" stringcmp 0 = { drop -2 exit } if
+  dup s" add-reg64-imm8" stringcmp 0 = { drop -2 exit } if
+  dup s" sub-reg64-reg64" stringcmp 0 = { drop -2 exit } if
+  dup s" sub-indirect-reg64-reg64" stringcmp 0 = { drop -2 exit } if
+  dup s" sub-reg64-imm8" stringcmp 0 = { drop -2 exit } if
+  dup s" sbb-reg64-imm8" stringcmp 0 = { drop -2 exit } if
+  dup s" mul-reg64" stringcmp 0 = { drop -1 exit } if
+  dup s" divmod-reg64" stringcmp 0 = { drop -1 exit } if
+  dup s" idivmod-reg64" stringcmp 0 = { drop -1 exit } if
+  dup s" inc-reg64" stringcmp 0 = { drop -1 exit } if
+  dup s" dec-reg64" stringcmp 0 = { drop -1 exit } if
+  dup s" and-reg64-reg64" stringcmp 0 = { drop -2 exit } if
+  dup s" and-reg68-imm8" stringcmp 0 = { drop -2 exit } if
+  dup s" or-reg64-reg64" stringcmp 0 = { drop -2 exit } if
+  dup s" or-reg64-imm8" stringcmp 0 = { drop -2 exit } if
+  dup s" xor-reg64-reg64" stringcmp 0 = { drop -2 exit } if
+  dup s" not-reg64" stringcmp 0 = { drop -1 exit } if
+  dup s" cmp-reg64-reg64" stringcmp 0 = { drop -2 exit } if
+  dup s" test-reg64-reg64" stringcmp 0 = { drop -2 exit } if
+  dup s" set-reg8-cc" stringcmp 0 = { drop -2 exit } if
+  dup s" jmp-cc-rel-imm8" stringcmp 0 = { drop -2 exit } if
+  dup s" jmp-cc-rel-imm32" stringcmp 0 = { drop -2 exit } if
+  dup s" jmp-abs-indirect-reg64" stringcmp 0 = { drop -1 exit } if
+  dup s" jmp-rel-imm8" stringcmp 0 = { drop -1 exit } if
+  dup s" jmp-rel-imm32" stringcmp 0 = { drop -1 exit } if
+
+  ~ From execution-support.e.
+  dup s" pack-next" stringcmp 0 = { drop 0 exit } if
+  dup s" pack-beforenext" stringcmp 0 = { drop -1 exit } if
+  dup s" pack-pushcontrol" stringcmp 0 = { drop -1 exit } if
+  dup s" pack-popcontrol" stringcmp 0 = { drop -1 exit } if
+
+  ~ Word not provided statically, but used during the log-load routine anyway.
+  dup s" here" stringcmp 0 = { drop 1 exit } if
+  dup s" [" stringcmp 0 = { drop 0 exit } if
+  dup s" ]" stringcmp 0 = { drop 0 exit } if
+  dup s" :" stringcmp 0 = { drop 0 exit } if
+  dup s" ;" stringcmp 0 = { drop 0 exit } if
+  dup s" ;asm" stringcmp 0 = { drop 0 exit } if
+  dup s" L@'" stringcmp 0 = { drop 1 exit } if
+  dup s" L!'" stringcmp 0 = { drop -1 exit } if
+
+  ~   If we get here, that's a problem. Emit an error message to make sure
+  ~ it's easy to diagnose. We also return a comically large negative value,
+  ~ to make sure things fail as quickly as possible afterwards.
+  ." No known stack delta: " emitstring newline
+  -256 ;
+
+
+~ (delta --)
+: transform-apply-stack-delta
+  transform-state transform-state-user-stack-depth @ +
+  transform-state transform-state-user-stack-depth ! ;
+
+
 ~ Label transform implementation
 ~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 ~
@@ -469,6 +867,137 @@ allocate-transform-state s" transform-state" variable
   ; make-immediate
 
 
+~   We implement alternates of all the high-level flow-control words for the
+~ label transform: if, unless, if-else, forever, and while. We leave { and }
+~ as their usual implementations. The values { and } leave on the stack will
+~ be in the host address space, which is convenient anyway. Those values don't
+~ make their way into the output, since everything is relative, and the
+~ relative offsets are correct regardless. What we do need to change, though,
+~ is the addresses of branch and 0branch. We need to resolve these via label.
+~
+~ (start pointer, length --)
+: label-if-alternate
+  2dup swap dup 5 8 * + 3unroll swap
+  ~ (start pointer, length, start pointer, adjusted start pointer, length)
+  memmove
+  ~ (start pointer, length)
+  swap here @ swap here ! swap
+  ~ (old here, length)
+  swap-transform-variables
+  L@' 0branch
+  L@' !=
+  L@' lit
+  swap-transform-variables
+  offset-to-target-address-space , 0 ,           ~ lit
+  offset-to-target-address-space ,               ~ !=
+  ~   The branch length needs to be one word longer than the block length,
+  ~ because the length field itself is part of the scope of the branch.
+  offset-to-target-address-space , dup 8 + ,     ~ 0branch
+  ~ (old here, length)
+  drop 5 8 * + here !
+  ; make-immediate
+
+~ (start pointer, length)
+: label-unless-alternate
+  2dup swap dup 5 8 * + 3unroll swap
+  ~ (start pointer, length, start pointer, adjusted start pointer, length)
+  memmove
+  ~ (start pointer, length)
+  swap here @ swap here ! swap
+  ~ (old here, length)
+  swap-transform-variables
+  L@' 0branch
+  L@' =
+  L@' lit
+  swap-transform-variables
+  offset-to-target-address-space , 0 ,           ~ lit
+  offset-to-target-address-space ,               ~ =
+  ~   The branch length needs to be one word longer than the block length,
+  ~ because the length field itself is part of the scope of the branch.
+  offset-to-target-address-space , dup 8 + ,     ~ 0branch
+  ~ (old here, length)
+  drop 5 8 * + here !
+  ; make-immediate
+
+~ (true start, true length, false start, false length)
+: label-if-else-alternate
+  ~   First we slide the false-block forward, then the true-block. We slide
+  ~ them both directly into their final positions, leaving space at the start
+  ~ for a test and branch, and space in between for an unconditional branch.
+  ~ Those spaces will take five words, and two words, respectively. So the
+  ~ false-block gets moved by seven words, and the true-block gets moved by
+  ~ five words.
+  2dup swap dup 7 8 * + swap 3roll memmove
+  4 roll dup 5 unroll 4 roll dup 5 unroll
+  swap dup 5 8 * + swap 3roll memmove
+  ~ (true start, true length, false start, false length)
+
+  ~   Now we write out the initial test-and-branch.
+  4 roll dup 5 unroll here @ 6 unroll here !
+  ~ (old here, true start, true length, false start, false length)
+  swap-transform-variables
+  L@' 0branch
+  L@' !=
+  L@' lit
+  swap-transform-variables
+  offset-to-target-address-space , 0 ,           ~ lit
+  offset-to-target-address-space ,               ~ !=
+  ~   Branch past the length field, the true-block, and the unconditional
+  ~ branch in the middle.
+  offset-to-target-address-space ,               ~ 0branch
+  3roll dup 4 unroll 3 8 * + ,
+
+  ~  Next, write out the unconditional branch in the middle.
+  swap dup 3unroll 5 8 * + here !
+  swap-transform-variables
+  L@' branch
+  swap-transform-variables
+  offset-to-target-address-space ,               ~ branch
+  ~  Branch past the length field and the false-block.
+  dup 8 + ,
+
+  ~  Set "here" to point to the true end.
+  drop drop drop drop 7 8 * + here !
+  ; make-immediate
+
+~ (start, length --)
+: label-forever-alternate
+  swap-transform-variables
+  L@' branch
+  swap-transform-variables
+  offset-to-target-address-space ,               ~ branch
+  8 + -1 * , drop
+  ; make-immediate
+
+~ (test start, test length, body start, body length --)
+: label-while-alternate
+  ~ The conditional branch needs five words.
+  2dup swap dup 5 8 * + swap 3roll memmove
+  here @ 5 unroll swap dup 3unroll here !
+  ~ (old here, test start, test length, body start, body length)
+  swap-transform-variables
+  L@' 0branch
+  L@' !=
+  L@' lit
+  swap-transform-variables
+  offset-to-target-address-space , 0 ,           ~ lit
+  offset-to-target-address-space ,               ~ !=
+  ~ Branch past the length field, the body, and the unconditional branch.
+  offset-to-target-address-space ,               ~ 0branch
+  dup 3 8 * + ,
+  ~ Set "here" to the new end.
+  5 8 * 6 roll + here !
+  ~ (test start, test length, body start, body length)
+  ~   Unconditionally branch backwards past the branch word, the body, the
+  ~ conditional branch, and the test.
+  swap-transform-variables
+  L@' branch
+  swap-transform-variables
+  offset-to-target-address-space ,
+  6 8 * + swap drop + swap drop -1 * ,
+  ; make-immediate
+
+
 ~   This implements the label transform for a single word. It is directly
 ~ analogous to "interpret", and reading interpret.e may help in understanding
 ~ it, though it's meant to still make sense on its own.
@@ -503,6 +1032,13 @@ allocate-transform-state s" transform-state" variable
   dup s" L!'" stringcmp 0 = { swap drop ' label-L!'-alternate swap } if
   dup s" keyword" stringcmp 0 = {
     swap drop ' label-keyword-alternate swap } if
+  dup s" if" stringcmp 0 = { swap drop ' label-if-alternate swap } if
+  dup s" unless" stringcmp 0 = { swap drop ' label-unless-alternate swap } if
+  dup s" if-else" stringcmp 0 = {
+    swap drop ' label-if-else-alternate swap } if
+  dup s" forever" stringcmp 0 = {
+    swap drop ' label-forever-alternate swap } if
+  dup s" while" stringcmp 0 = { swap drop ' label-while-alternate swap } if
   drop swap
   ~ (name as stack string, 0 or alternate entry pointer, name pointer)
 
@@ -713,26 +1249,266 @@ allocate-transform-state s" transform-state" variable
 ~ For conceptual overview, see the top of this file.
 
 
+~   This outputs code that performs a roll of a given size, for use by the
+~ words that rely on the tracked user stack depth.
+~
+~ (amount to roll by --)
+: log-load-roll
+  ~ A roll of size 1 is an nop, and a roll of size 0 crashes.
+  dup 2 > { drop exit } if
+
+  ~   A roll of size 2 is equivalent to a swap, and we compile it as one for
+  ~ clarity when reading the hexdump.
+  ~
+  ~   Yes, clarity when reading the hexdump.
+  dup 2 = {
+    drop
+
+    swap-transform-variables
+    L@' swap
+    swap-transform-variables
+
+    offset-to-target-address-space ,     ~ swap
+
+    exit
+  } if
+
+  swap-transform-variables
+  L@' roll
+  L@' lit
+  swap-transform-variables
+
+  offset-to-target-address-space ,     ~ lit
+  swap ,
+  offset-to-target-address-space ,     ~ roll
+  ;
+
+
+~   This outputs code that perfoms an unroll of a given size, for use by the
+~ words that rely on the tracked user stack depth.
+
+~ (amount to unroll by --)
+: log-load-unroll
+  ~ An unroll of size 1 is an nop, and an unroll of size 0 crashes.
+  dup 2 > { drop exit } if
+
+  ~ Yes, clarity when reading the hexdump! What's so weird about that? :)
+  dup 2 = {
+    drop
+
+    swap-transform-variables
+    L@' swap
+    swap-transform-variables
+
+    offset-to-target-address-space ,     ~ swap
+
+    exit
+  } if
+
+  swap-transform-variables
+  L@' unroll
+  L@' lit
+  swap-transform-variables
+
+  offset-to-target-address-space ,     ~ lit
+  swap ,
+  offset-to-target-address-space ,     ~ unroll
+  ;
+
+
+~   This checks the tracked user stack depth and outputs an appropriate roll
+~ to put the log address on top, as preparation for calling something that
+~ relies on it.
+: log-load-roll-log-address
+  transform-state transform-state-user-stack-depth @
+  ~   The amount we're rolling includes the log address, which is not a user
+  ~ value, so we add 1.
+  1 +
+  log-load-roll ;
+
+
+~   This checks the tracked user stack depth and outputs an appropriate unroll
+~ to put the log address back at the bottom, as cleanup after calling
+~ something that relies on it.
+: log-load-unroll-log-address
+  transform-state transform-state-user-stack-depth @
+  ~   The amount we're rolling includes the log address, which is not a user
+  ~ value, so we add 1.
+  1 +
+  log-load-unroll ;
+
+
+~   This checks the tracked user stack depth and outputs an appropriate roll
+~ to put the saved label value on top, as preparation for calling something
+~ that consumes it.
+: log-load-roll-saved-label
+  transform-state transform-state-user-stack-depth @
+  ~   The amount we're rolling includes both the log address and the label
+  ~ value, neither of which are user values, so we add 2.
+  2 +
+  log-load-roll ;
+
+
+~   This checks the tracked user stack depth and outputs an appropriate unroll
+~ to put a saved label value on the bottom, as a way of safely storing it away
+~ when it's newly generated.
+: log-load-unroll-saved-label
+  transform-state transform-state-user-stack-depth @
+  ~   The amount we're rolling includes both the log address and the label
+  ~ value, neither of which are user values, so we add 2.
+  2 +
+  log-load-unroll ;
+
+
+~ (name pointer --)
+: log-load-compile-dynamic-word
+  log-load-roll-log-address
+
+  swap-transform-variables
+  ~   Looking these up in reverse order saves us some stack juggling. Does it
+  ~ help readability, or hurt it? Who can say...
+  L@' log-load-comma
+  L@' log-load-find-execution-token
+  L@' litstring
+  swap-transform-variables
+  ~ (name pointer, log-load-comma, log-load-find-execution-token, litstring)
+
+  ~ The overall stack delta of this sequence is 0.
+  offset-to-target-address-space ,     ~ litstring
+  3roll here @ swap packstring 8 packalign here !
+  offset-to-target-address-space ,     ~ log-load-find-execution-token
+  offset-to-target-address-space ,     ~ log-load-comma
+
+  log-load-unroll-log-address ;
+
+
+~   When we want the log-load routine to run a word that wasn't statically
+~ compiled-in to the target executable, we output code that looks up the word
+~ by name on the log, then calls it.
+~
+~   We update the user stack depth as we go, to account for our internal
+~ needs. The last thing our generated code does is execute the word we were
+~ asked to, and we have no way of knowing what its stack delta will be, so
+~ that final delta is our caller's responsibility. The caller should only
+~ consider the delta of the word it requested itself; we handle everything
+~ pertaining to retrieving, and then consuming, its execution token.
+~
+~ (name pointer --)
+: log-load-execute-dynamic-word
+  log-load-roll-log-address
+
+  swap-transform-variables
+  ~ This is reverse order again.
+  L@' swap
+  L@' log-load-find-execution-token
+  L@' litstring
+  swap-transform-variables
+  ~ (name pointer, swap, log-load-find-execution-token, litstring)
+
+  offset-to-target-address-space ,     ~ litstring
+  3roll here @ swap packstring 8 packalign here !
+  offset-to-target-address-space ,     ~ log-load-find-execution-token
+  offset-to-target-address-space ,     ~ swap
+
+  ~   Now the execution token is on the stack, immediately below the log
+  ~ address, so we apply a delta for it.
+  1 transform-apply-stack-delta
+
+  log-load-unroll-log-address
+
+  swap-transform-variables
+  L@' execute
+  swap-transform-variables
+
+  offset-to-target-address-space ,     ~ execute
+
+  ~   Invoking execute consumes the execution token, which is a delta of -1.
+  ~ Any additional delta is our caller's responsibility, per above.
+  -1 transform-apply-stack-delta ;
+
+
+~   We generate code that looks up "docol" by name, runs it to get the
+~ codeword pointer, then finally appends it to the entry.
+~
+~   This one's unusual in that it first executes the word, then compiles the
+~ result. To avoid rolling and unrolling repeatedly, we implement it as a
+~ special case. For clarity's sake we put the code here, with the other
+~ dynamic word stuff.
+~
+: log-load-compile-docol
+  log-load-roll-log-address
+
+  swap-transform-variables
+  ~ As usual, we do these in reverse.
+  L@' log-load-comma
+  L@' execute
+  L@' log-load-find-execution-token
+  L@' litstring
+  swap-transform-variables
+
+  ~ The overall stack delta of this sequence is 0.
+  offset-to-target-address-space ,     ~ litstring
+  here @ s" docol" packstring 8 packalign here !
+  offset-to-target-address-space ,     ~ log-load-find-execution-token
+  offset-to-target-address-space ,     ~ execute
+  offset-to-target-address-space ,     ~ log-load-comma
+
+  log-load-unroll-log-address ;
+
+
+~   There's one more compilation case, where we wish to compile an integer
+~ literal. Again, for clarity, we do it here.
+~
+~ (integer value --)
+: log-load-compile-literal
+  log-load-roll-log-address
+
+  swap-transform-variables
+  ~ As usual, we do these in reverse.
+  L@' log-load-comma
+  L@' log-load-comma
+  L@' log-load-find-execution-token
+  L@' litstring
+  swap-transform-variables
+
+  ~ The overall stack delta of this sequence is 0.
+  offset-to-target-address-space ,     ~ litstring
+  here @ s" lit" packstring 8 packalign here !
+  offset-to-target-address-space ,     ~ log-load-find-execution-token
+  offset-to-target-address-space ,     ~ log-load-comma
+  swap ,                               ~ the value
+  offset-to-target-address-space ,     ~ log-load-comma
+
+  log-load-unroll-log-address ;
+
+
 ~   This is the alternate version of "create" for use with the log-load
 ~ transform. This one is quite unlike the regular "create"; rather than
 ~ creating an entry on the log directly, its job is to output words which,
 ~ when they're later executed, will do create's job.
 ~
-~   The implementations of log-load-find-execution-token and log-load-create
-~ are in log-load.e.
+~   In practice that means outputting a codeword pointer to run a
+~ statically-compiled word that does the work. The implementation of
+~ log-load-create is in log-load.e.
 ~
-~ (string pointer --)
+~   It's worth keeping in mind that this alternate only gets called for
+~ manual invocations of "create". It isn't called from the colon alternate.
 : log-load-create-alternate
+  log-load-roll-log-address
+
   swap-transform-variables
-  ~   Looking these up in reverse order saves us some stack juggling. Does it
-  ~ help readability, or hurt it? Who can say...
   L@' log-load-create
-  L@' litstring
+  L@' swap
   swap-transform-variables
-  offset-to-target-address-space ,     ~ litstring
-  swap here @ swap packstring 8 packalign here !
+
+  ~ The overall stack delta of this sequence is 0.
+  offset-to-target-address-space ,     ~ swap
   offset-to-target-address-space ,     ~ log-load-create
-  ;
+
+  ~   We've consumed a string pointer from the stack, so that's a delta of -1.
+  -1 transform-apply-stack-delta
+
+  log-load-unroll-log-address ;
 
 
 ~   This is the alternate version of ":" for use with the log-load transform.
@@ -740,24 +1516,29 @@ allocate-transform-state s" transform-state" variable
 ~ to be extremely useful to read and understand ":" in interpret.e before
 ~ attempting to understand "log-load-colon-alternate".
 : log-load-colon-alternate
-  ~ This calls "log-load-create" instead of "create".
-  word value@ log-load-create-alternate dropstring
+  word value@
+
+  ~   Calling log-load-create-alternate would result in some redundant rolling
+  ~ and unrolling, so we do it together like this instead.
+  log-load-roll-log-address
 
-  ~   We generate code that looks up "docol" by name, runs it to get the
-  ~ codeword pointer, then finally appends it to the entry.
   swap-transform-variables
-  ~ As usual, we do these in reverse.
-  L@' log-load-comma
-  L@' execute
-  L@' log-load-find-execution-token
+  L@' log-load-create
   L@' litstring
   swap-transform-variables
 
+  ~ The overall stack delta of this sequence is 0.
   offset-to-target-address-space ,     ~ litstring
-  here @ s" docol" packstring 8 packalign here !
-  offset-to-target-address-space ,     ~ log-load-find-execution-token
-  offset-to-target-address-space ,     ~ execute
-  offset-to-target-address-space ,     ~ log-load-comma
+  swap here @ swap packstring 8 packalign here !
+  offset-to-target-address-space ,     ~ log-load-create
+
+  log-load-unroll-log-address
+
+  dropstring
+
+  ~   We generate code that looks up "docol" by name, runs it to get the
+  ~ codeword pointer, then finally appends it to the entry.
+  log-load-compile-docol
 
   ~   This is where we would mark the entry hidden, but we don't do that. It
   ~ won't shadow anything and it won't be called until the entire log-load
@@ -776,6 +1557,8 @@ allocate-transform-state s" transform-state" variable
 : log-load-semicolon-alternate
   ~   We generate code that looks up "exit" by name and appends it to the
   ~ entry.
+  log-load-roll-log-address
+
   swap-transform-variables
   ~ As usual, we do these in reverse.
   L@' log-load-comma
@@ -783,11 +1566,14 @@ allocate-transform-state s" transform-state" variable
   L@' litstring
   swap-transform-variables
 
+  ~ The overall stack delta of this sequence is 0.
   offset-to-target-address-space ,     ~ litstring
   here @ s" exit" packstring 8 packalign here !
   offset-to-target-address-space ,     ~ log-load-find-execution-token
   offset-to-target-address-space ,     ~ log-load-comma
 
+  log-load-unroll-log-address
+
   ~ This is where we would unhide the entry, but again, we don't do that.
 
   ~   Since [ is an immediate word, we have to go to extra trouble to compile
@@ -801,6 +1587,7 @@ allocate-transform-state s" transform-state" variable
 ~ below. It is likely to be extremely useful to read and understand ";asm" in
 ~ interpret.e before attempting to understand "log-load;asm".
 : log-load-semicolon-assembly-alternate
+  ~ TODO this is just unimplemented, which seems bad :D
   ~ here @ pack-next 8 packalign here !
   ~ latest @ dup unhide-entry entry-to-execution-token
   ~ ~ The codeword needs to be transformed to the target address space.
@@ -813,6 +1600,65 @@ allocate-transform-state s" transform-state" variable
   ; make-immediate
 
 
+~   This just does the same thing [ always does, but having it as an alternate
+~ means it happens at transformation time, which is sooner than "immediate"
+~ time. The log load transform is weird like that, it has three different
+~ times things can happen, rather than the usual two.
+: log-load-left-square-brace-alternate
+  ~   Since [ is an immediate word, we have to go to extra trouble to compile
+  ~ it as part of the alternate.
+  [ ' [ entry-to-execution-token , ]
+  ; make-immediate
+
+~ We need this one, too. It's not even an immediate word normally!
+: log-load-right-square-brace-alternate ] ; make-immediate
+
+~ Yeah comments have to go.
+: log-load-tilde-alternate
+  ' ~ entry-to-execution-token execute
+  ; make-immediate
+
+~ Strings are important and must happen now, now, now.
+: log-load-string-alternate
+  ~   Something subtle here: s" is state-dependent. That is, it does different
+  ~ things depending on the interpreter flags. We would really rather know
+  ~ which version we're getting, and also it would be best if it didn't
+  ~ scribble on the output buffer. Fortunately we can achieve both of these,
+  ~ by coercing things into a known state while calling it.
+  ~
+  ~   We could choose either version of s", but the interpreted one is more
+  ~ convenient because it doesn't mess with a spurious litstring invocation,
+  ~ just scribbled into scratch space after "here". Of course, that raises the
+  ~ additional concern that we have the wrong "here", but we can just swap
+  ~ that around, too.
+  ~
+  ~   This is all worth it to avoid reimplementing s". If we had two
+  ~ implementations, they'd have to be kept in sync, and it's an important
+  ~ user-facing word with semantics that are likely to improve over time.
+  interpreter-flags @
+  ' s" entry-to-execution-token
+  swap-transform-variables
+  ~   Since [ is an immediate word, we have to go to extra trouble to compile
+  ~ it as part of the alternate.
+  [ ' [ entry-to-execution-token , ]
+  execute
+  swap-transform-variables
+  swap interpreter-flags !
+  ~   Whew. What a mouthful.
+  ~ (string pointer)
+
+  swap-transform-variables
+  L@' litstring
+  swap-transform-variables
+
+  offset-to-target-address-space ,     ~ litstring
+  here @ swap packstring 8 packalign here !
+
+  ~   Now the string pointer is on the stack, so we apply a delta for it.
+  1 transform-apply-stack-delta
+  ; make-immediate
+
+
 ~   Because docol requires it, we provide a special mini-version of the label
 ~ system. We only do L@' and L!', because that's all we need. Unlike the
 ~ version of this feature for the label transform, for the log-load transform,
@@ -822,17 +1668,25 @@ allocate-transform-state s" transform-state" variable
 ~ the value on the stack when the generated log-load routine runs. So, each
 ~ instance of L@' must be closely followed by a matching instance of L!'. Each
 ~ label can only ever be used exactly once, and it must be a backward
-~ reference. Furthermore, there is a very tight restriction on what can be
-~ on the stack. The easiest way to explain it is by showing the interface of
-~ these words from the transformed code's perspective:
+~ reference. Furthermore, the stack is used in a very specific way, which the
+~ transformed code must be compatible with. The easiest way to explain it is
+~ by showing the interface of these words from the transformed code's
+~ perspective:
 ~
-~   L!' is (preserved value, value of label
-~           -- value of label, preserved value)
-~   L@' is (value of label, preserved value
-~           -- preserved value, value of label)
+~   L!' is (preserved values, ..., ..., value of label
+~           -- value of label, preserved values, ..., ...)
+~   L@' is (value of label, preserved values, ..., ...)
+~           -- preserved values, ..., ..., value of label)
 ~
-~   The preserved value is simply another item on the stack, which the label
-~ takes pains not to interfere with.
+~   The preserved values are simply more items on the stack, which the
+~ alternates take pains not to interfere with. The alternates output a roll or
+~ unroll of an appropriate size. Hopefully, in reading this, you have the
+~ question: How can the alternates possibly know what size is appropriate?
+~ The answer is that the log-load transform carefully tracks how many items
+~ the transformed code is expected to have on the stack, based on its
+~ hardcoded understanding of each word that's expected to be relevant, looked
+~ up by the word's name. The number of items is stored in the user-stack-depth
+~ field of transform-state.
 ~
 ~   There is no adjustment done on the saved value, since it's created in the
 ~ target address space and then also used in the target address space. It
@@ -841,31 +1695,25 @@ allocate-transform-state s" transform-state" variable
 ~ depending on which transform it's running under, and there'd have to be a
 ~ mechanism for that.
 ~
-~   If that sounds super complex: All we actually do is read a label name,
-~ ignore it, and output a call to swap.
-~
 ~   This is sufficient to implement docol, and that's probably the only thing
 ~ it should be used for.
 : log-load-L@'-alternate
   word dropstring
-
-  swap-transform-variables
-  L@' swap
-  swap-transform-variables
-
-  offset-to-target-address-space ,     ~ swap
+  log-load-roll-saved-label
+  ~   We now begin thinking of the label value as user data, so we need to
+  ~ notate that appropriately.
+  1 transform-apply-stack-delta
   ; make-immediate
 
 : log-load-L!'-alternate
   word dropstring
-
-  swap-transform-variables
-  L@' swap
-  swap-transform-variables
-
-  offset-to-target-address-space ,     ~ swap
+  ~   We cease to think of the label value as user data, so we need to notate
+  ~ that appropriately.
+  -1 transform-apply-stack-delta
+  log-load-unroll-saved-label
   ; make-immediate
 
+
 ~   This implements the log-load transform for a single word. It is directly
 ~ analogous to "interpret", and reading interpret.e may help in understanding
 ~ it, though it's meant to still make sense on its own.
@@ -899,10 +1747,18 @@ allocate-transform-state s" transform-state" variable
     swap drop ' log-load-semicolon-alternate swap } if
   dup s" ;asm" stringcmp 0 = {
     swap drop ' log-load-semicolon-assembly-alternate swap } if
+  dup s" [" stringcmp 0 = {
+    swap drop ' log-load-left-square-brace-alternate swap } if
+  dup s" ]" stringcmp 0 = {
+    swap drop ' log-load-right-square-brace-alternate swap } if
+  dup s" ~" stringcmp 0 = {
+    swap drop ' log-load-tilde-alternate swap } if
+  ~ It is nontrivial to construct a string with a double-quote in it.
+  dup ' s" entry-to-name stringcmp 0 = {
+    swap drop ' log-load-string-alternate swap } if
   dup s" L@'" stringcmp 0 = { swap drop ' log-load-L@'-alternate swap } if
   dup s" L!'" stringcmp 0 = { swap drop ' log-load-L!'-alternate swap } if
-  drop
-  ~ (name as stack string, 0 or alternate entry pointer)
+  ~ (name as stack string, 0 or alternate entry pointer, name pointer)
 
   ~   If we have an alternate, we want to run that now, regardless of what
   ~ mode we're in. They're all flagged as immediate, but we don't even bother
@@ -910,11 +1766,12 @@ allocate-transform-state s" transform-state" variable
   ~ this transform there's three potential times at which we might execute
   ~ things, not two. The alternates are more immediate than immediate; they
   ~ run NOW, during the transformation.
-  dup {
-    dropstring-with-result entry-to-execution-token execute
+  over {
+    drop dropstring-with-result
+    entry-to-execution-token execute
     0 exit
   } if
-  drop
+  drop drop
   ~ (name as stack string)
 
   ~   Now we might have a compiled word, an immediate word, or an integer
@@ -929,32 +1786,26 @@ allocate-transform-state s" transform-state" variable
   ~ shadow an integer literal with a word definition. Oh, so limiting.
   value@ read-integer 0 = {
     ~ It's a number.
+    ~
+    ~ (name as stack string, integer value)
     dropstring-with-result
+    ~ (integer value)
 
     interpreter-flags @ 0x01 & {
       ~ We're in compile mode, so we want to generate code which will compile
       ~ the number.
-
-      swap-transform-variables
-      ~ Just like in log-load-create-alternate, we do these in reverse.
-      L@' log-load-comma
-      L@' log-load-comma
-      L@' litstring
-      swap-transform-variables
-
-      offset-to-target-address-space ,     ~ litstring
-      here @ s" lit" packstring 8 packalign here !
-      offset-to-target-address-space ,     ~ log-load-comma
-      swap ,                               ~ the value
-      offset-to-target-address-space ,     ~ log-load-comma
-
+      log-load-compile-literal
       0 exit
     } if
 
-    ~ We're in interpret mode, so we want to generate code which will push the
-    ~ number to the stack.
+    ~   We're in interpret mode, so we want to generate code which will push
+    ~ the number to the stack.
+    ~
+    ~   This is an immediate effect, so once we've done it, we update the user
+    ~ stack depth. An integer literal is a stack-depth delta of 1.
     swap-transform-variables L@' lit swap-transform-variables
     offset-to-target-address-space , ,
+    1 transform-apply-stack-delta
     0 exit
   } if
   ~ (name as stack string)
@@ -964,23 +1815,24 @@ allocate-transform-state s" transform-state" variable
   ~ means immediate words don't work with this transform. We still treat it
   ~ differently based on whether we're in compile mode.
   interpreter-flags @ 0x01 & {
-    ~ We're in compile mode. We compile code that compiles the word.
-    value@
-    swap-transform-variables
-    ~ Just like in log-load-create-alternate, we do these in reverse.
-    L@' log-load-comma
-    L@' log-load-find-execution-token
-    L@' litstring
-    swap-transform-variables
-
-    offset-to-target-address-space ,     ~ litstring
-    3roll here @ swap packstring 8 packalign here !
-    offset-to-target-address-space ,     ~ log-load-find-execution-token
-    offset-to-target-address-space ,     ~ log-load-comma
+    ~   We're in compile mode. We compile code that compiles the word.
+    ~
+    ~   Since that's not an immediate effect, we don't need to update the
+    ~ user stack depth. Instead, this is a key spot where we rely on that
+    ~ knowledge, to roll and unroll the log address.
+    value@ log-load-compile-dynamic-word
     dropstring 0 exit
   } if
   ~ (name as stack string)
 
+  ~   At this point we know we're in immediate mode and have a regular word.
+  ~ While we still have the name pointer handy, we check what delta this word
+  ~ will cause for the user stack depth, when run. See
+  ~ transform-get-stack-delta for more explanation of this. We keep track of
+  ~ the proposed delta until we actually process the word, down below.
+  value@ dup transform-get-stack-delta swap
+  ~ (name as stack string, proposed stack depth delta, name pointer)
+
   ~   We're in immediate mode. We compile code that runs the word immediately.
   ~ We check whether there's a label for the word; if there is, we'll output
   ~ that. Otherwise we'll output code that looks it up in the log and runs it.
@@ -999,44 +1851,42 @@ allocate-transform-state s" transform-state" variable
   ~   The first pass will never accidentally think it succeeded, because even
   ~ the reference to L' cold-start from the ELF header is a forward reference
   ~ and won't exist on the first pass.
-  value@
+  dup
   swap-transform-variables
   find-label
   swap-transform-variables
   {
     ~   Again just like in label-transform, we declare our use of the label
     ~ and get a value for it.
-    value@
     swap-transform-variables
     intern-label use-label
     swap-transform-variables
+    ~ (name as stack string, proposed stack depth delta, label value)
 
     ~   Like in label-transform, this is a codeword pointer, so we just output
     ~ it directly. Also as before, because we don't have to examine it, we
     ~ don't have to do anything special in the case where it's zero due to the
     ~ way the label loop works.
-    offset-to-target-address-space , dropstring 0 exit
+    ~
+    ~   This is an immediate effect, so once we've done that, we update the
+    ~ user stack depth.
+    offset-to-target-address-space ,
+    transform-apply-stack-delta
     dropstring 0 exit
   } if
+  ~ (name as stack string, proposed stack depth delta, name pointer)
 
   ~   There's no label for the word; that means it wasn't statically
   ~ compiled-in to the target executable. So we output code that looks up the
   ~ word by name on the log, then calls it.
-  value@
-  swap-transform-variables
-  ~ This is reverse order again.
-  L@' execute
-  L@' log-load-find-execution-token
-  L@' litstring
-  swap-transform-variables
-
-  offset-to-target-address-space ,     ~ litstring
-  3roll here @ swap packstring 8 packalign here !
-  offset-to-target-address-space ,     ~ log-load-find-execution-token
-  offset-to-target-address-space ,     ~ execute
-
-  ~ There's no such thing as not finding the word, with this transform. So
-  ~ we just exit.
+  ~
+  ~   This is also an immediate effect, despite being distinct from the above
+  ~ case, so once we've done that, we update the user stack depth.
+  ~
+  ~   There's no such thing as not finding the word, with this transform. So
+  ~ once we're done, we just exit.
+  log-load-execute-dynamic-word
+  transform-apply-stack-delta
   dropstring 0 ;
 
 
@@ -1056,9 +1906,13 @@ allocate-transform-state s" transform-state" variable
   ~ since client code will make its own updates to them and then rely on those
   ~ updates having taken effect. So we do the swap just once, here outside the
   ~ loop, and set it back when the loop ends.
+  ~
+  ~   We also take this opportunity to initialize the output-buffer-start and
+  ~ user-stack-depth fields of transform-state.
   here @ transform-state transform-state-saved-here !
   latest @ transform-state transform-state-saved-latest !
   over transform-state transform-state-output-buffer-start !
+  0 transform-state transform-state-user-stack-depth !
   here !
   0 latest !
   ~ Now the stack has nothing of ours on it, so client code can do its thing.
@@ -1082,6 +1936,7 @@ allocate-transform-state s" transform-state" variable
       0 transform-state transform-state-saved-here !
       0 transform-state transform-state-saved-latest !
       0 transform-state transform-state-output-buffer-start !
+      0 transform-state transform-state-user-stack-depth !
 
       ~  Also put the input source back how it was.
       main-input-buffer pop-input-buffer