summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--evoke.e38
-rw-r--r--execution.e7
-rw-r--r--labels.e20
-rw-r--r--transform.e118
4 files changed, 130 insertions, 53 deletions
diff --git a/evoke.e b/evoke.e
index 68645cb..86e79c8 100644
--- a/evoke.e
+++ b/evoke.e
@@ -2,9 +2,43 @@
 ~     | ./quine > evoke && chmod 755 evoke && ./evoke
 
 1024 read-to-buffer
-: foo ;asm
+~   Any word that ends with a normal semicolon will require this, so its
+~ definition has to be near the start.
+: exit
+  [ here @
+    :rsi pack-popcontrol
+    here ! ] ;asm
+
+: lit
+  [ here @
+    lods64
+    :rax push-reg64
+    here ! ] ;asm
+
+: sys-exit
+  [ here @
+    60 :rax mov-reg64-imm64
+    :rdi pop-reg64
+    syscall
+    hlt
+    here ! ] ;asm
+
+: happy-path 42 sys-exit ;
+
+~  : lods64 ;
+~  : :rax ;
+~  : push-reg64 ;
+~  : mov-reg64-imm64 ;
+~  : :rdi ;
+~  : pop-reg64 ;
+~  : syscall ;
+~  : hlt ;
+~  : here ;
+~  : @ ;
+~  : ! ;
+~  : [ ;
+~  : ] ;
 
-: bar ;asm
 pyrzqxgl
 s" source-to-precompile" variable
 
diff --git a/execution.e b/execution.e
index 48761de..8410678 100644
--- a/execution.e
+++ b/execution.e
@@ -235,12 +235,12 @@
 ~ * rbp points to the top of the control stack.
 ~
 ~ (source register, base address -- new base address)
-: pushcontrol
+: pack-pushcontrol
   swap :rbp -8 :rbp lea-reg64-disp8-reg64
   swap :rbp 0 mov-disp8-reg64-reg64 ;
 
 ~ (target register, base address -- new base address)
-: popcontrol
+: pack-popcontrol
   :rbp 0 3roll mov-reg64-disp8-reg64
   :rbp 8 :rbp lea-reg64-disp8-reg64 ;
 
@@ -480,6 +480,9 @@
   8 packalign
   current-offset L!' warm-start
 
+  ~ TODO this is tied to the specific example in evoke
+  over L@' happy-path swap - L@' origin + pack64
+
   ~   Before handing off to us, cold-start pushed a single value onto the
   ~ stack, a pointer to the beginning of the heap. Now, we load our entire
   ~ Forth implementation onto that heap, beginning with the minimal set of
diff --git a/labels.e b/labels.e
index c437003..4e315f7 100644
--- a/labels.e
+++ b/labels.e
@@ -106,9 +106,20 @@
   labels oldest-entry-in { dup }
   { dup label-heading labels next-newer-entry-in } while drop ;
 
+~   This looks up a label by name if it exists, or returns 0 if it doesn't.
+~ The return value if it's found is an entry pointer.
+~
+~   Keep in mind that this is a pointer to the entry in the label dictionary,
+~ not the label's value. It's common for a label's value to itself be a
+~ pointer to a dictionary entry, in the program being compiled, so it's easy
+~ to confuse that.
+~
+~ (name string pointer -- label entry pointer)
+: find-label labels swap find-in ;
+
 ~   This creates a new label given a name for it, initializing its value and
-~ status to zero and adding it to the dictionary. This is responsible for the
-~ initial guess of zero on the first pass.
+~ status to zero and adding it to the label dictionary. This is responsible
+~ for the initial guess of zero on the first pass.
 ~
 ~ (name string pointer -- )
 : new-label labels create-in 0 , 0 , ;
@@ -122,9 +133,9 @@
 ~ Either way, it returns an entry pointer. It's named after the function
 ~ "intern" that many Lisp dialects have.
 ~
-~ (name string pointer -- entry pointer)
+~ (name string pointer -- label entry pointer)
 : intern-label
-  dup labels swap find-in
+  dup find-label
   dup { swap drop }
       { drop dup new-label labels swap find-in } if-else ;
 
@@ -279,6 +290,7 @@
   ~ (iteration count, execution token, output start, output point)
   { 3 pick 100 > }
   { 2 pick execute 4 roll 1+ 4 unroll
+
     check-labels-converged
     { 4 roll drop
       3 roll drop
diff --git a/transform.e b/transform.e
index d727d53..e913d42 100644
--- a/transform.e
+++ b/transform.e
@@ -18,9 +18,12 @@
 ~ literals, are looked up via the label facility.
 ~
 ~   Since the label facility is able to resolve forward references, there is
-~ no hard requirement that words be topologically sorted, but forward
-~ references should still be kept to a minimum, since that's a significant
-~ difference from un-transformed code that could easily become confusing.
+~ no hard requirement that everything in the file be topologically sorted.
+~ However, the transform will refuse to create forward references to compiled
+~ words. If you want them, you can create them by hand by calling use-label
+~ yourself. This restriction is in place because allowing forward references
+~ would be a significant difference from un-transformed code that could easily
+~ become confusing, and because it simplifies the implementation a bit.
 ~
 ~   Compilation words do make extensive reference to the global variables
 ~ "here" and "latest". In particular, flow-control words such as if-else
@@ -40,15 +43,7 @@
 ~ for not doing anything else that would rely on "here" and "latest" matching
 ~ their runtime addresses, though it is otherwise allowed to modify and rely
 ~ on them in all the usual ways. The alternate versions are defined in this
-~ file as their own words, "Lcreate", "L:", "L;", and "L;asm". The alternates
-~ rely on various labels, all of which must be defined elsewhere:
-~
-~   * origin
-~   * docol
-~   * exit
-~   * :
-~   * ;
-~   * ;asm
+~ file as their own words, "Lcreate", "L:", "L;", and "L;asm".
 ~
 ~   Note that these alternates are applied via a purely lexical
 ~ transformation: when a word would be looked up in the dictionary to
@@ -60,6 +55,10 @@
 ~ the rest of Evocation. There's no need to keep it separate like there is
 ~ with the other variables. This makes it easy to change modes.
 ~
+~   The transformation and the alternates rely on various labels, all of which
+~ must be defined elsewhere, lest the label loop fail to converge: "lit",
+~ "origin", "docol", "exit", ":", ";", and ";asm".
+~
 ~   All of these limitations result in the compiled code being, in effect,
 ~ written in a dialect which is like Evocation, but more restricted. This is
 ~ acceptable, because the label transform is intended for compiling code that
@@ -180,6 +179,7 @@ allocate-transform-state s" transform-state" variable
   dup stringlen 1 + dup 3unroll
   here @ 10 + 3unroll memmove
   here @
+
   ~   This value of "latest" is going into the generated output, so call
   ~ transform-offset on it first.
   latest @ transform-offset pack64
@@ -188,6 +188,14 @@ allocate-transform-state s" transform-state" variable
   +
   8 packalign
   here @ latest !
+
+  ~   Now we're immediately after the word header, which is where the codeword
+  ~ will be. This is the value the label should taken on, so we set it.
+  dup here @ 10 +
+  swap-transform-variables
+  intern-label set-label
+  swap-transform-variables
+
   here ! ;
 
 
@@ -282,23 +290,6 @@ allocate-transform-state s" transform-state" variable
          3roll drop swap } unless
   ~ (name as stack string, immediate entry pointer, name pointer)
 
-  ~   For compile mode, we need to look the word up in the output buffer. We
-  ~ can't easily traverse the next-entry pointers in the output buffer's
-  ~ dictionary, so we use the label.
-  ~
-  ~   Labels point to codewords (because that's what "Lcreate" does), so we
-  ~ have to convert it to get the entry pointer. Since we don't know the
-  ~ word's name statically, this is a rare scenario where we can't use the
-  ~ abbreviated label syntax, but that's easy enough.
-  ~
-  ~   We do have to be careful of one thing: On the first run, the label may
-  ~ be zero!
-  swap-transform-variables
-  intern-label use-label
-  swap-transform-variables
-  dup { execution-token-to-entry } if
-  ~ (name as stack string, immediate entry pointer, compiled entry pointer)
-
   ~   In regular "interpret", we would check whether we found the word before
   ~ checking the mode. However, we have three different places words could
   ~ come from, so that's not a simple notion. So, we check the mode first.
@@ -309,30 +300,67 @@ allocate-transform-state s" transform-state" variable
     ~ override an immediate word with a non-immediate word of the same name.
     over dup { entry-flags@ 0x01 & not } if
 
-    ~   Either there was no immediate entry, or the immediate entry wasn't
-    ~ flagged as an immediate word. So we treat this as a compilation, which
-    ~ means we append a word to the heap. Specificaly, of course, we use the
-    ~ compiled entry to do that.
-    { swap drop dropstring-with-result
-      entry-to-execution-token ,
-      0 exit } if
+    {
+      ~   Either there was no immediate entry, or the immediate entry wasn't
+      ~ flagged as an immediate word. So we check whether this could be a
+      ~ compilation.
+      ~
+      ~   To do this, we need to look the word up in the output buffer. We
+      ~ can't easily traverse the next-entry pointers in the output buffer's
+      ~ dictionary, so we check the label. Since we don't know the word's name
+      ~ statically, this is a rare scenario where we can't use the abbreviated
+      ~ label syntax, but that's easy enough.
+      ~
+      ~   Even though we've ruled out the possibility that the word is only
+      ~ ever used immediately, it is still possible that there's some reason
+      ~ the word doesn't exist. In particular, it could be an integer literal.
+      ~ If we were to call use-label first, that would count as a requirement
+      ~ that the label must eventually be set. We don't want to require that
+      ~ quite yet, so we call find-label.
+      ~
+      ~   This check is the means by which forward references are disallowed:
+      ~ On the very first pass, a forward-referenced label won't exist yet, so
+      ~ transform will give a "no such word" error, which in an ideal world
+      ~ would prevent there from being a subsequent pass, but at the very
+      ~ least it will ensure the output isn't a valid ELF.
+      dup
+      swap-transform-variables
+      find-label
+      swap-transform-variables
+      {
+        ~   It exists, so we declare our use of it (that's also the only way to
+        ~ get a value for it).
+        swap-transform-variables
+        intern-label use-label
+        swap-transform-variables
+
+        ~   Labels point to codewords (because that's what "Lcreate" does),
+        ~ which is already what we want to output.
+        ~
+        ~   An important caveat: Though it would require something weird to be
+        ~ happening, such as a forced forward reference, the label may be zero!
+        ~ We need to allow for that possibility by not examining the contents of
+        ~ a nonexistent entry.
+        ~
+        ~   Fortunately we don't have to look at it, just append it to the heap
+        ~ and clean up.
+        drop , dropstring 0 exit
+      } if
+    } if
   } if
+  ~ (name as stack string, immediate entry pointer, name pointer)
 
   ~   If we got here, one of three things is true: We're in interpret mode;
-  ~ the word is immediate; or no word was found.  Regardless, we don't need
-  ~ the compiled entry pointer anymore, so drop it.
-  drop
-  ~ (name as stack string, immediate entry pointer)
-
-  ~   If the immediate entry pointer is non-zero, run it.
-  dup {
-    dropstring-with-result entry-to-execution-token execute
+  ~ the word is immediate; or no word was found. If the immediate entry
+  ~ pointer is non-zero, run it.
+  over {
+    drop dropstring-with-result entry-to-execution-token execute
     0 exit
   } if
 
   ~   If we're still here, it wasn't in the dictionary. Also, we don't need
   ~ the immediate entry pointer, either.
-  drop
+  drop drop
   ~ (name as stack string)
 
   ~   If it's not in the dictionary, check whether it's an integer literal. As