1 files changed, 246 insertions, 37 deletions
diff --git a/transform.e b/transform.e
index 35ccc9b..7f3d9ef 100644
--- a/transform.e
+++ b/transform.e
@@ -96,6 +96,7 @@
 ~ their runtime addresses, though it is otherwise allowed to modify and rely
 ~ on them in all the usual ways. The alternate versions are defined in this
 ~ file as their own words, "Lcreate", "L:", "L;", and "L;asm".
+~ TODO note L@' and L!'
 ~
 ~   Note that these alternates are applied via a purely lexical
 ~ transformation: when a word would be looked up in the dictionary to
@@ -240,6 +241,7 @@
       { drop zero-input-buffer-metadata } if-else ;
 
 
+~ TODO rename this to transformation-state
 : transform-state-saved-here ;
 : transform-state-saved-latest 8 + ;
 : transform-state-output-buffer-start 2 8 * + ;
@@ -307,6 +309,15 @@ allocate-transform-state s" transform-state" variable
   target-address-space-to-offset offset-to-host-address-space ;
 
 
+: describe-transformation
+  ."    active here " here @ .hex64 space ." latest " latest @ .hex64 newline
+  ."     saved here " transform-state transform-state-saved-here
+  @ .hex64 space
+  ." latest " transform-state transform-state-saved-latest @ .hex64 newline
+  ."   output start " transform-state transform-state-output-buffer-start
+  @ .hex64 newline ;
+
+
 ~ Label transform implementation
 ~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 ~
@@ -355,7 +366,7 @@ allocate-transform-state s" transform-state" variable
 
   ~ This looks up "docol" by label.
   swap-transform-variables
-  L@' docol
+  L@' docol-codeword-value
   L@' origin
   swap-transform-variables
   + ,
@@ -399,6 +410,47 @@ allocate-transform-state s" transform-state" variable
   ; make-immediate
 
 
+~   Because docol requires it, we provide a special mini-version of the label
+~ system. We only do L@' and L!', because that's all we need. These are real
+~ labels; there can be arbitrarily many of them, and they can have forward
+~ references.
+~
+~   The value that's accepted is in the host address space; the label is set
+~ to an offset; and the value that's returned is in the target address space.
+~
+~ (-- value)
+: label-L@'-alternate
+  word value@
+
+  swap-transform-variables
+  intern-label
+  use-label
+  swap-transform-variables
+
+  dropstring-with-result
+
+  offset-to-target-address-space
+  ; make-immediate
+
+
+~ (value --)
+: label-L!'-alternate
+  host-address-space-to-offset
+
+  word value@
+
+  swap-transform-variables
+  intern-label
+  swap-transform-variables
+
+  dropstring-with-result
+
+  swap-transform-variables
+  set-label
+  swap-transform-variables
+  ; make-immediate
+
+
 ~   This implements the label transform for a single word. It is directly
 ~ analogous to "interpret", and reading interpret.e may help in understanding
 ~ it, though it's meant to still make sense on its own.
@@ -428,6 +480,8 @@ allocate-transform-state s" transform-state" variable
   dup s" :" stringcmp 0 = { swap drop ' L: swap } if
   dup s" ;" stringcmp 0 = { swap drop ' L; swap } if
   dup s" ;asm" stringcmp 0 = { swap drop ' L;asm swap } if
+  dup s" L@'" stringcmp 0 = { swap drop ' label-L@'-alternate swap } if
+  dup s" L!'" stringcmp 0 = { swap drop ' label-L!'-alternate swap } if
   drop swap
   ~ (name as stack string, 0 or alternate entry pointer, name pointer)
 
@@ -495,25 +549,61 @@ allocate-transform-state s" transform-state" variable
         ~ and clean up.
         offset-to-target-address-space , drop dropstring 0 exit
       } if
-    } if
-  } if
-  ~ (name as stack string, immediate entry pointer, name pointer)
 
-  ~   If we got here, one of three things is true: We're in interpret mode;
-  ~ the word is immediate; or no word was found. If the immediate entry
-  ~ pointer is non-zero, run it.
-  over {
-    drop dropstring-with-result entry-to-execution-token execute
-    0 exit
-  } if
+      ~   If we got here, we're in compile mode, no label was found, and even
+      ~ if there was a candidate for an immediate word it wasn't flagged as
+      ~ immediate. There are two possibilities: It's genuinely missing, or it's
+      ~ an integer literal. We decline to run the candidate immediate entry,
+      ~ even if it exists, because that's not the correct semantics.
+      ~
+      ~   If the word is genuinely missing, we want to make sure we make it
+      ~ all the way to the not-found error-handling code at the end, because
+      ~ that will be way easier to debug than doing the wrong thing will. Way,
+      ~ way easier. Far less staring at numbers.
+      ~
+      ~   Anyway, we no longer need the immediate entry pointer, so we drop
+      ~ it.
+      drop drop
+    } {
+      ~   If we get here, we're in compile mode, but there was a candidate
+      ~ entry for an immediate word, and it was indeed flagged as immediate.
+      ~ So, we run it and exit.
+      drop dropstring-with-result entry-to-execution-token execute
+      0 exit
+    } if-else
+
+    ~   This is the end of the compile-mode branch. As you can see by tracing
+    ~ through all the above cases, if we got here, the two possibilities are
+    ~ that the word is genuinely missing, or it's an integer literal.
+    ~
+    ~   Please notice that these are the same two possibilities remaining at
+    ~ the end of the immediate-mode branch, below.
+  } {
+    ~   If we got here, we're in interpret mode. There are three
+    ~ possibilities: there's an immediate word which we should run; it's an
+    ~ integer literal; or the word is genuinely missing.
+    ~
+    ~   If the immediate entry pointer is non-zero, run it and exit.
+    over {
+      drop dropstring-with-result entry-to-execution-token execute
+      0 exit
+    } if
 
-  ~   If we're still here, it wasn't in the dictionary. Also, we don't need
-  ~ the immediate entry pointer, either.
-  drop drop
+    ~   There was no immediate word, so either it's an integer literal or
+    ~ the word is genuinely missing. Please notice that these are the same two
+    ~ possibilities remaining at the end of the compile-mode branch, above.
+    ~
+    ~   We no longer need the immediate-mode pointer, so drop it.
+    drop drop
+  } if-else
   ~ (name as stack string)
 
-  ~   If it's not in the dictionary, check whether it's an integer literal. As
-  ~ before, we get the stack address and use it as a string pointer.
+  ~   If we got here, one of two things is true: the word is an integer
+  ~ literal, or it's genuinely missing. We know this because both the mode
+  ~ cases above end with these as the only two remaining possibilities. So
+  ~
+  ~   Check whether it's an integer literal. As before, we get the stack
+  ~ address and use it as a string pointer.
   value@ read-integer 0 = {
     ~ It's a number.
     interpreter-flags @ 0x01 & {
@@ -536,8 +626,13 @@ allocate-transform-state s" transform-state" variable
     0 exit
   } if
 
-  ~ If it's neither in the dictionary nor a number, just print an error.
-  s" No such word: " emitstring value@ emitstring dropstring 0 ;
+  ~   If it's neither in the dictionary nor a number, just print an error.
+  ~
+  ~   It's really important, when maintaining this code, to make sure that all
+  ~ the possible ways the word can fail to exist, end up here. Doing anything
+  ~ else is going to result in many hours of trying to untangle the
+  ~ consequences of incorrect behavior, after-the-fact.
+  s" No such word: " emitstring value@ emitstring newline dropstring 0 ;
 
 
 ~   This implements the label transform for all words in a region given as an
@@ -623,17 +718,31 @@ allocate-transform-state s" transform-state" variable
 ~ to be extremely useful to read and understand ":" in interpret.e before
 ~ attempting to understand "log-load-colon-alternate".
 : log-load-colon-alternate
-  ~ ~ This calls "log-load-create" instead of "create".
+  ~ This calls "log-load-create" instead of "create".
   word value@ log-load-create-alternate dropstring
 
-  ~ This looks up "docol" by label.
-  ~ swap-transform-variables
-  ~ L@' docol
-  ~ L@' origin
-  ~ swap-transform-variables
-  ~ + ,
+  ~   We generate code that looks up "docol" by name, runs it to get the
+  ~ codeword pointer, then finally appends it to the entry.
+  swap-transform-variables
+  ~ As usual, we do these in reverse.
+  L@' log-load-comma
+  L@' execute
+  L@' log-load-find-execution-token
+  L@' litstring
+  swap-transform-variables
 
-  ~ TODO note no hiding the entry
+  offset-to-target-address-space ,     ~ litstring
+  here @ s" docol" packstring 8 packalign here !
+  offset-to-target-address-space ,     ~ log-load-find-execution-token
+  offset-to-target-address-space ,     ~ execute
+  offset-to-target-address-space ,     ~ log-load-comma
+
+  ~   This is where we would mark the entry hidden, but we don't do that. It
+  ~ won't shadow anything and it won't be called until the entire log-load
+  ~ routine has finished.
+
+  ~   Switching between immediate and compile mode is one of the very few
+  ~ things that happens NOW, while the log-load transform is actually running.
   ]
   ;
 
@@ -643,16 +752,24 @@ allocate-transform-state s" transform-state" variable
 ~ likely to be extremely useful to read and understand ";" in interpret.e
 ~ before attempting to understand "log-load-semicolon-alternate".
 : log-load-semicolon-alternate
-  ~ ~ This looks up "exit" by label.
-  ~ swap-transform-variables
-  ~ L@' exit
-  ~ swap-transform-variables
-  ~ offset-to-target-address-space ,
+  ~   We generate code that looks up "exit" by name and appends it to the
+  ~ entry.
+  swap-transform-variables
+  ~ As usual, we do these in reverse.
+  L@' log-load-comma
+  L@' log-load-find-execution-token
+  L@' litstring
+  swap-transform-variables
 
-  ~ latest @ unhide-entry
+  offset-to-target-address-space ,     ~ litstring
+  here @ s" exit" packstring 8 packalign here !
+  offset-to-target-address-space ,     ~ log-load-find-execution-token
+  offset-to-target-address-space ,     ~ log-load-comma
 
-  ~ ~   Since [ is an immediate word, we have to go to extra trouble to compile
-  ~ ~ it as part of ;.
+  ~ This is where we would unhide the entry, but again, we don't do that.
+
+  ~   Since [ is an immediate word, we have to go to extra trouble to compile
+  ~ it as part of ;.
   [ ' [ entry-to-execution-token , ]
   ; make-immediate
 
@@ -673,6 +790,60 @@ allocate-transform-state s" transform-state" variable
   ~ [ ' [ entry-to-execution-token , ]
   ; make-immediate
 
+
+~   Because docol requires it, we provide a special mini-version of the label
+~ system. We only do L@' and L!', because that's all we need. Unlike the
+~ version of this feature for the label transform, for the log-load transform,
+~ we heavily restrict the use-case.
+~
+~   The implementation strategy is that we ignore the label name, and store
+~ the value on the stack when the generated log-load routine runs. So, each
+~ instance of L@' must be closely followed by a matching instance of L!'. Each
+~ label can only ever be used exactly once, and it must be a backward
+~ reference. Furthermore, there is a very tight restriction on what can be
+~ on the stack. The easiest way to explain it is by showing the interface of
+~ these words from the transformed code's perspective:
+~
+~   L!' is (preserved value, value of label
+~           -- value of label, preserved value)
+~   L@' is (value of label, preserved value
+~           -- preserved value, value of label)
+~
+~   The preserved value is simply another item on the stack, which the label
+~ takes pains not to interfere with.
+~
+~   There is no adjustment done on the saved value, since it's created in the
+~ target address space and then also used in the target address space. It
+~ wouldn't actually be necessary to use this at all, since checking "here"
+~ would be sufficient, but then the code would have to do something different
+~ depending on which transform it's running under, and there'd have to be a
+~ mechanism for that.
+~
+~   If that sounds super complex: All we actually do is read a label name,
+~ ignore it, and output a call to swap.
+~
+~   This is sufficient to implement docol, and that's probably the only thing
+~ it should be used for.
+: log-load-L@'-alternate
+  word dropstring
+
+  swap-transform-variables
+  L@' swap
+  swap-transform-variables
+
+  offset-to-target-address-space ,     ~ swap
+  ; make-immediate
+
+: log-load-L!'-alternate
+  word dropstring
+
+  swap-transform-variables
+  L@' swap
+  swap-transform-variables
+
+  offset-to-target-address-space ,     ~ swap
+  ; make-immediate
+
 ~   This implements the log-load transform for a single word. It is directly
 ~ analogous to "interpret", and reading interpret.e may help in understanding
 ~ it, though it's meant to still make sense on its own.
@@ -706,6 +877,8 @@ allocate-transform-state s" transform-state" variable
     swap drop ' log-load-semicolon-alternate swap } if
   dup s" ;asm" stringcmp 0 = {
     swap drop ' log-load-semicolon-assembly-alternate swap } if
+  dup s" L@'" stringcmp 0 = { swap drop ' log-load-L@'-alternate swap } if
+  dup s" L!'" stringcmp 0 = { swap drop ' log-load-L!'-alternate swap } if
   drop
   ~ (name as stack string, 0 or alternate entry pointer)
 
@@ -787,10 +960,46 @@ allocate-transform-state s" transform-state" variable
   ~ (name as stack string)
 
   ~   We're in immediate mode. We compile code that runs the word immediately.
-  ~ We check whether there's a label for the word; if there is, we output
-  ~ that. Otherwise we output code that looks it up and runs it.
-  ~ TODO
+  ~ We check whether there's a label for the word; if there is, we'll output
+  ~ that. Otherwise we'll output code that looks it up in the log and runs it.
+  ~
+  ~   Just like in label-transform, we use find-label to check whether a label
+  ~ exists without declaring a dependency on it, then if it does, we do
+  ~ use-label to ask for its value.
+  ~
+  ~   There's one additional wrinkle to remember here: We're running inside
+  ~ the label loop, and warm-start appears before all the normal words in the
+  ~ executable. So all the labels we'll be checking are forwared references,
+  ~ and on the very first pass they definitely won't be defined. That's fine
+  ~ though, they will exist on all subsequent passes, so things will
+  ~ definitely still converge.
+  ~
+  ~   The first pass will never accidentally think it succeeded, because even
+  ~ the reference to L' cold-start from the ELF header is a forward reference
+  ~ and won't exist on the first pass.
+  value@
+  swap-transform-variables
+  find-label
+  swap-transform-variables
+  {
+    ~   Again just like in label-transform, we declare our use of the label
+    ~ and get a value for it.
+    value@
+    swap-transform-variables
+    intern-label use-label
+    swap-transform-variables
+
+    ~   Like in label-transform, this is a codeword pointer, so we just output
+    ~ it directly. Also as before, because we don't have to examine it, we
+    ~ don't have to do anything special in the case where it's zero due to the
+    ~ way the label loop works.
+    offset-to-target-address-space , dropstring 0 exit
+    dropstring 0 exit
+  } if
 
+  ~   There's no label for the word; that means it wasn't statically
+  ~ compiled-in to the target executable. So we output code that looks up the
+  ~ word by name on the log, then calls it.
   value@
   swap-transform-variables
   ~ This is reverse order again.