summary refs log tree commit diff
path: root/transform.e
diff options
context:
space:
mode:
authorIrene Knapp <ireneista@irenes.space>2026-05-18 16:36:08 -0700
committerIrene Knapp <ireneista@irenes.space>2026-05-18 16:36:08 -0700
commiteec336dea3d86e176c4bd86c435e6be35fec64e2 (patch)
tree9d72ffb1ab138e693ef8de7cbfec58c10d04d412 /transform.e
parent5820c29532ba877ac50b331658a4b4dd7f138ded (diff)
okay there's a strat for making the here/latest variables now
it doesn't work yet, but it's gonna

the reason it doesn't work is that the new helper log-load-variable relies on the assembly-definition words being statically available, and they aren't yet

that's fine though, this is still a huge change, worth checking in. why?  well, it represents like 16 hours of debugging which culminated in some very minor changes to the semantics of the label transform, in order to make missing words easier to notice and debug. see comments for details.

woooooo :D

Force-Push: yes
Change-Id: Id8334819d165ba9e3156ef2bf32008af748eac29
Diffstat (limited to 'transform.e')
-rw-r--r--transform.e283
1 files changed, 246 insertions, 37 deletions
diff --git a/transform.e b/transform.e
index 35ccc9b..7f3d9ef 100644
--- a/transform.e
+++ b/transform.e
@@ -96,6 +96,7 @@
 ~ their runtime addresses, though it is otherwise allowed to modify and rely
 ~ on them in all the usual ways. The alternate versions are defined in this
 ~ file as their own words, "Lcreate", "L:", "L;", and "L;asm".
+~ TODO note L@' and L!'
 ~
 ~   Note that these alternates are applied via a purely lexical
 ~ transformation: when a word would be looked up in the dictionary to
@@ -240,6 +241,7 @@
       { drop zero-input-buffer-metadata } if-else ;
 
 
+~ TODO rename this to transformation-state
 : transform-state-saved-here ;
 : transform-state-saved-latest 8 + ;
 : transform-state-output-buffer-start 2 8 * + ;
@@ -307,6 +309,15 @@ allocate-transform-state s" transform-state" variable
   target-address-space-to-offset offset-to-host-address-space ;
 
 
+: describe-transformation
+  ."    active here " here @ .hex64 space ." latest " latest @ .hex64 newline
+  ."     saved here " transform-state transform-state-saved-here
+  @ .hex64 space
+  ." latest " transform-state transform-state-saved-latest @ .hex64 newline
+  ."   output start " transform-state transform-state-output-buffer-start
+  @ .hex64 newline ;
+
+
 ~ Label transform implementation
 ~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 ~
@@ -355,7 +366,7 @@ allocate-transform-state s" transform-state" variable
 
   ~ This looks up "docol" by label.
   swap-transform-variables
-  L@' docol
+  L@' docol-codeword-value
   L@' origin
   swap-transform-variables
   + ,
@@ -399,6 +410,47 @@ allocate-transform-state s" transform-state" variable
   ; make-immediate
 
 
+~   Because docol requires it, we provide a special mini-version of the label
+~ system. We only do L@' and L!', because that's all we need. These are real
+~ labels; there can be arbitrarily many of them, and they can have forward
+~ references.
+~
+~   The value that's accepted is in the host address space; the label is set
+~ to an offset; and the value that's returned is in the target address space.
+~
+~ (-- value)
+: label-L@'-alternate
+  word value@
+
+  swap-transform-variables
+  intern-label
+  use-label
+  swap-transform-variables
+
+  dropstring-with-result
+
+  offset-to-target-address-space
+  ; make-immediate
+
+
+~ (value --)
+: label-L!'-alternate
+  host-address-space-to-offset
+
+  word value@
+
+  swap-transform-variables
+  intern-label
+  swap-transform-variables
+
+  dropstring-with-result
+
+  swap-transform-variables
+  set-label
+  swap-transform-variables
+  ; make-immediate
+
+
 ~   This implements the label transform for a single word. It is directly
 ~ analogous to "interpret", and reading interpret.e may help in understanding
 ~ it, though it's meant to still make sense on its own.
@@ -428,6 +480,8 @@ allocate-transform-state s" transform-state" variable
   dup s" :" stringcmp 0 = { swap drop ' L: swap } if
   dup s" ;" stringcmp 0 = { swap drop ' L; swap } if
   dup s" ;asm" stringcmp 0 = { swap drop ' L;asm swap } if
+  dup s" L@'" stringcmp 0 = { swap drop ' label-L@'-alternate swap } if
+  dup s" L!'" stringcmp 0 = { swap drop ' label-L!'-alternate swap } if
   drop swap
   ~ (name as stack string, 0 or alternate entry pointer, name pointer)
 
@@ -495,25 +549,61 @@ allocate-transform-state s" transform-state" variable
         ~ and clean up.
         offset-to-target-address-space , drop dropstring 0 exit
       } if
-    } if
-  } if
-  ~ (name as stack string, immediate entry pointer, name pointer)
 
-  ~   If we got here, one of three things is true: We're in interpret mode;
-  ~ the word is immediate; or no word was found. If the immediate entry
-  ~ pointer is non-zero, run it.
-  over {
-    drop dropstring-with-result entry-to-execution-token execute
-    0 exit
-  } if
+      ~   If we got here, we're in compile mode, no label was found, and even
+      ~ if there was a candidate for an immediate word it wasn't flagged as
+      ~ immediate. There are two possibilities: It's genuinely missing, or it's
+      ~ an integer literal. We decline to run the candidate immediate entry,
+      ~ even if it exists, because that's not the correct semantics.
+      ~
+      ~   If the word is genuinely missing, we want to make sure we make it
+      ~ all the way to the not-found error-handling code at the end, because
+      ~ that will be way easier to debug than doing the wrong thing will. Way,
+      ~ way easier. Far less staring at numbers.
+      ~
+      ~   Anyway, we no longer need the immediate entry pointer, so we drop
+      ~ it.
+      drop drop
+    } {
+      ~   If we get here, we're in compile mode, but there was a candidate
+      ~ entry for an immediate word, and it was indeed flagged as immediate.
+      ~ So, we run it and exit.
+      drop dropstring-with-result entry-to-execution-token execute
+      0 exit
+    } if-else
+
+    ~   This is the end of the compile-mode branch. As you can see by tracing
+    ~ through all the above cases, if we got here, the two possibilities are
+    ~ that the word is genuinely missing, or it's an integer literal.
+    ~
+    ~   Please notice that these are the same two possibilities remaining at
+    ~ the end of the immediate-mode branch, below.
+  } {
+    ~   If we got here, we're in interpret mode. There are three
+    ~ possibilities: there's an immediate word which we should run; it's an
+    ~ integer literal; or the word is genuinely missing.
+    ~
+    ~   If the immediate entry pointer is non-zero, run it and exit.
+    over {
+      drop dropstring-with-result entry-to-execution-token execute
+      0 exit
+    } if
 
-  ~   If we're still here, it wasn't in the dictionary. Also, we don't need
-  ~ the immediate entry pointer, either.
-  drop drop
+    ~   There was no immediate word, so either it's an integer literal or
+    ~ the word is genuinely missing. Please notice that these are the same two
+    ~ possibilities remaining at the end of the compile-mode branch, above.
+    ~
+    ~   We no longer need the immediate-mode pointer, so drop it.
+    drop drop
+  } if-else
   ~ (name as stack string)
 
-  ~   If it's not in the dictionary, check whether it's an integer literal. As
-  ~ before, we get the stack address and use it as a string pointer.
+  ~   If we got here, one of two things is true: the word is an integer
+  ~ literal, or it's genuinely missing. We know this because both the mode
+  ~ cases above end with these as the only two remaining possibilities. So
+  ~
+  ~   Check whether it's an integer literal. As before, we get the stack
+  ~ address and use it as a string pointer.
   value@ read-integer 0 = {
     ~ It's a number.
     interpreter-flags @ 0x01 & {
@@ -536,8 +626,13 @@ allocate-transform-state s" transform-state" variable
     0 exit
   } if
 
-  ~ If it's neither in the dictionary nor a number, just print an error.
-  s" No such word: " emitstring value@ emitstring dropstring 0 ;
+  ~   If it's neither in the dictionary nor a number, just print an error.
+  ~
+  ~   It's really important, when maintaining this code, to make sure that all
+  ~ the possible ways the word can fail to exist, end up here. Doing anything
+  ~ else is going to result in many hours of trying to untangle the
+  ~ consequences of incorrect behavior, after-the-fact.
+  s" No such word: " emitstring value@ emitstring newline dropstring 0 ;
 
 
 ~   This implements the label transform for all words in a region given as an
@@ -623,17 +718,31 @@ allocate-transform-state s" transform-state" variable
 ~ to be extremely useful to read and understand ":" in interpret.e before
 ~ attempting to understand "log-load-colon-alternate".
 : log-load-colon-alternate
-  ~ ~ This calls "log-load-create" instead of "create".
+  ~ This calls "log-load-create" instead of "create".
   word value@ log-load-create-alternate dropstring
 
-  ~ This looks up "docol" by label.
-  ~ swap-transform-variables
-  ~ L@' docol
-  ~ L@' origin
-  ~ swap-transform-variables
-  ~ + ,
+  ~   We generate code that looks up "docol" by name, runs it to get the
+  ~ codeword pointer, then finally appends it to the entry.
+  swap-transform-variables
+  ~ As usual, we do these in reverse.
+  L@' log-load-comma
+  L@' execute
+  L@' log-load-find-execution-token
+  L@' litstring
+  swap-transform-variables
 
-  ~ TODO note no hiding the entry
+  offset-to-target-address-space ,     ~ litstring
+  here @ s" docol" packstring 8 packalign here !
+  offset-to-target-address-space ,     ~ log-load-find-execution-token
+  offset-to-target-address-space ,     ~ execute
+  offset-to-target-address-space ,     ~ log-load-comma
+
+  ~   This is where we would mark the entry hidden, but we don't do that. It
+  ~ won't shadow anything and it won't be called until the entire log-load
+  ~ routine has finished.
+
+  ~   Switching between immediate and compile mode is one of the very few
+  ~ things that happens NOW, while the log-load transform is actually running.
   ]
   ;
 
@@ -643,16 +752,24 @@ allocate-transform-state s" transform-state" variable
 ~ likely to be extremely useful to read and understand ";" in interpret.e
 ~ before attempting to understand "log-load-semicolon-alternate".
 : log-load-semicolon-alternate
-  ~ ~ This looks up "exit" by label.
-  ~ swap-transform-variables
-  ~ L@' exit
-  ~ swap-transform-variables
-  ~ offset-to-target-address-space ,
+  ~   We generate code that looks up "exit" by name and appends it to the
+  ~ entry.
+  swap-transform-variables
+  ~ As usual, we do these in reverse.
+  L@' log-load-comma
+  L@' log-load-find-execution-token
+  L@' litstring
+  swap-transform-variables
 
-  ~ latest @ unhide-entry
+  offset-to-target-address-space ,     ~ litstring
+  here @ s" exit" packstring 8 packalign here !
+  offset-to-target-address-space ,     ~ log-load-find-execution-token
+  offset-to-target-address-space ,     ~ log-load-comma
 
-  ~ ~   Since [ is an immediate word, we have to go to extra trouble to compile
-  ~ ~ it as part of ;.
+  ~ This is where we would unhide the entry, but again, we don't do that.
+
+  ~   Since [ is an immediate word, we have to go to extra trouble to compile
+  ~ it as part of ;.
   [ ' [ entry-to-execution-token , ]
   ; make-immediate
 
@@ -673,6 +790,60 @@ allocate-transform-state s" transform-state" variable
   ~ [ ' [ entry-to-execution-token , ]
   ; make-immediate
 
+
+~   Because docol requires it, we provide a special mini-version of the label
+~ system. We only do L@' and L!', because that's all we need. Unlike the
+~ version of this feature for the label transform, for the log-load transform,
+~ we heavily restrict the use-case.
+~
+~   The implementation strategy is that we ignore the label name, and store
+~ the value on the stack when the generated log-load routine runs. So, each
+~ instance of L@' must be closely followed by a matching instance of L!'. Each
+~ label can only ever be used exactly once, and it must be a backward
+~ reference. Furthermore, there is a very tight restriction on what can be
+~ on the stack. The easiest way to explain it is by showing the interface of
+~ these words from the transformed code's perspective:
+~
+~   L!' is (preserved value, value of label
+~           -- value of label, preserved value)
+~   L@' is (value of label, preserved value
+~           -- preserved value, value of label)
+~
+~   The preserved value is simply another item on the stack, which the label
+~ takes pains not to interfere with.
+~
+~   There is no adjustment done on the saved value, since it's created in the
+~ target address space and then also used in the target address space. It
+~ wouldn't actually be necessary to use this at all, since checking "here"
+~ would be sufficient, but then the code would have to do something different
+~ depending on which transform it's running under, and there'd have to be a
+~ mechanism for that.
+~
+~   If that sounds super complex: All we actually do is read a label name,
+~ ignore it, and output a call to swap.
+~
+~   This is sufficient to implement docol, and that's probably the only thing
+~ it should be used for.
+: log-load-L@'-alternate
+  word dropstring
+
+  swap-transform-variables
+  L@' swap
+  swap-transform-variables
+
+  offset-to-target-address-space ,     ~ swap
+  ; make-immediate
+
+: log-load-L!'-alternate
+  word dropstring
+
+  swap-transform-variables
+  L@' swap
+  swap-transform-variables
+
+  offset-to-target-address-space ,     ~ swap
+  ; make-immediate
+
 ~   This implements the log-load transform for a single word. It is directly
 ~ analogous to "interpret", and reading interpret.e may help in understanding
 ~ it, though it's meant to still make sense on its own.
@@ -706,6 +877,8 @@ allocate-transform-state s" transform-state" variable
     swap drop ' log-load-semicolon-alternate swap } if
   dup s" ;asm" stringcmp 0 = {
     swap drop ' log-load-semicolon-assembly-alternate swap } if
+  dup s" L@'" stringcmp 0 = { swap drop ' log-load-L@'-alternate swap } if
+  dup s" L!'" stringcmp 0 = { swap drop ' log-load-L!'-alternate swap } if
   drop
   ~ (name as stack string, 0 or alternate entry pointer)
 
@@ -787,10 +960,46 @@ allocate-transform-state s" transform-state" variable
   ~ (name as stack string)
 
   ~   We're in immediate mode. We compile code that runs the word immediately.
-  ~ We check whether there's a label for the word; if there is, we output
-  ~ that. Otherwise we output code that looks it up and runs it.
-  ~ TODO
+  ~ We check whether there's a label for the word; if there is, we'll output
+  ~ that. Otherwise we'll output code that looks it up in the log and runs it.
+  ~
+  ~   Just like in label-transform, we use find-label to check whether a label
+  ~ exists without declaring a dependency on it, then if it does, we do
+  ~ use-label to ask for its value.
+  ~
+  ~   There's one additional wrinkle to remember here: We're running inside
+  ~ the label loop, and warm-start appears before all the normal words in the
+  ~ executable. So all the labels we'll be checking are forwared references,
+  ~ and on the very first pass they definitely won't be defined. That's fine
+  ~ though, they will exist on all subsequent passes, so things will
+  ~ definitely still converge.
+  ~
+  ~   The first pass will never accidentally think it succeeded, because even
+  ~ the reference to L' cold-start from the ELF header is a forward reference
+  ~ and won't exist on the first pass.
+  value@
+  swap-transform-variables
+  find-label
+  swap-transform-variables
+  {
+    ~   Again just like in label-transform, we declare our use of the label
+    ~ and get a value for it.
+    value@
+    swap-transform-variables
+    intern-label use-label
+    swap-transform-variables
+
+    ~   Like in label-transform, this is a codeword pointer, so we just output
+    ~ it directly. Also as before, because we don't have to examine it, we
+    ~ don't have to do anything special in the case where it's zero due to the
+    ~ way the label loop works.
+    offset-to-target-address-space , dropstring 0 exit
+    dropstring 0 exit
+  } if
 
+  ~   There's no label for the word; that means it wasn't statically
+  ~ compiled-in to the target executable. So we output code that looks up the
+  ~ word by name on the log, then calls it.
   value@
   swap-transform-variables
   ~ This is reverse order again.