summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--README.txt8
-rw-r--r--amd64.e16
-rw-r--r--dynamic.e30
-rw-r--r--hex.e2
-rw-r--r--linux-dynamic.e2
-rw-r--r--premortem.txt107
-rw-r--r--quine.asm15
-rw-r--r--transform.e621
8 files changed, 779 insertions, 22 deletions
diff --git a/README.txt b/README.txt
index 229ef70..bf11f9c 100644
--- a/README.txt
+++ b/README.txt
@@ -86,3 +86,11 @@ program.
 
   TODO output.e interpret.e dynamic.e flow-control.e execution-support.e
 
+  If you want examples of programs that are smaller than Evocation itself,
+quine.e is a tiny program written in proper Evocation that outputs its own
+source code; hello.e is a hello-world written in Evocation-assembly, and hex.e
+is another small Evocation-assembly program that might make a good example of
+how to do slightly more complex things that way. All three of these are
+self-contained, consisting of just that one file plus calls to Evocation's
+built-in library.
+
diff --git a/amd64.e b/amd64.e
index 31354e2..95cdb2a 100644
--- a/amd64.e
+++ b/amd64.e
@@ -168,13 +168,13 @@ s" :cc-greater" keyword
 ~ (register -- 3-bit encoded value for register)
 : reg32
   dup :eax = { drop 0 exit } if
-  dup :ecx = { drop 0 exit } if
-  dup :edx = { drop 0 exit } if
-  dup :ebx = { drop 0 exit } if
-  dup :esp = { drop 0 exit } if
-  dup :ebp = { drop 0 exit } if
-  dup :esi = { drop 0 exit } if
-  dup :edi = { drop 0 exit } if
+  dup :ecx = { drop 1 exit } if
+  dup :edx = { drop 2 exit } if
+  dup :ebx = { drop 3 exit } if
+  dup :esp = { drop 4 exit } if
+  dup :ebp = { drop 5 exit } if
+  dup :esi = { drop 6 exit } if
+  dup :edi = { drop 7 exit } if
   ." Parameter to reg32 is not a reg32." 1 sys-exit ;
 
 ~ (register -- 3-bit encoded value for register)
@@ -213,7 +213,7 @@ s" :cc-greater" keyword
 : scalefield
   dup 1 = { drop 0 exit } if
   dup 2 = { drop 1 exit } if
-  dup 5 = { drop 2 exit } if
+  dup 4 = { drop 2 exit } if
   dup 8 = { drop 3 exit } if
   ." Parameter to scalefield is not 1, 2, 4, or 8." 1 sys-exit ;
 
diff --git a/dynamic.e b/dynamic.e
index 79944b2..a02eec0 100644
--- a/dynamic.e
+++ b/dynamic.e
@@ -135,9 +135,9 @@
   stringlen 1+ 54 swap - 0 max indent dup .hex64
   dup entry-flags@ dup
     { space
-      dup 128 & { s" H" emitstring } if
-      dup 64 & { s" M" emitstring } if
-      dup 1 & { s" I" emitstring } if
+      dup 0x80 & { s" H" emitstring } if
+      dup 0x40 & { s" M" emitstring } if
+      dup 0x01 & { s" I" emitstring } if
     } if drop
   dup is-assembly-word { s"  asm" emitstring }
     { dup is-docol-interpreted-word { s"  raw" emitstring } unless
@@ -150,14 +150,33 @@
   { dup word-heading next-newer-entry } while drop ;
 
 
+~ (byte -- boolean)
+: is-printable
+  dup 0x20 <= swap 0x7F > && ;
+
+
 ~ (content end, content start, label start --)
 : hexdump-row
   2 indent dup .hex32 dup 4 unroll
-  0 { dup 16 > }
-  { dup 7 & 0 = { space } if space
+  ~ (label start, content end, content start, label start)
+  0 { dup 16 > } {
+    ~ (label start, content end, content start, label start, offset within row)
+    dup 7 & 0 = { space } if space
     2dup + dup 4 pick <= swap 5 pick > &&
       { 2dup + 8@ .hex8 } { space space } if-else
     1+ } while
+  drop
+  2 indent s" |" emitstring
+  0 { dup 16 > } {
+    ~ (label start, content end, content start, label start, offset within row)
+    2dup + dup 4 pick <= swap 5 pick > &&
+      { 2dup + 8@
+        dup is-printable
+        { value@ emitstring } { s" ." emitstring } if-else
+        drop }
+      { space } if-else
+    1+ } while
+  s" |" emitstring
   newline 5 ndrop ;
 
 
@@ -296,6 +315,7 @@
 : make-immediate latest @ dup entry-flags@ 0x01 | swap entry-flags! ;
 : make-hidden latest @ dup entry-flags@ 0x80 | swap entry-flags! ;
 : make-visible latest @ dup entry-flags@ 0x80 invert & swap entry-flags! ;
+: make-metadata latest @ dup entry-flags@ 0x40 | swap entry-flags! ;
 
 ~   Sooner or later we'll want to define recursive words; this one lets us
 ~ do that. It compiles into a call to the word that's currently being
diff --git a/hex.e b/hex.e
index 51e7703..963b82a 100644
--- a/hex.e
+++ b/hex.e
@@ -228,8 +228,8 @@
   elf-file-header
   elf-program-header-writable
   output-start-routine
-  output-read-byte
   output-exit
+  output-read-byte
   output-decode-nibble
   output-error-handlers
   output-messages
diff --git a/linux-dynamic.e b/linux-dynamic.e
index 7890043..ee34e0e 100644
--- a/linux-dynamic.e
+++ b/linux-dynamic.e
@@ -340,7 +340,7 @@
   sys-sigaction drop ;
 
 
-: handle-crash list-callers 1 sys-exit ;
+: handle-crash ." CRASH" newline list-callers 1 sys-exit ;
 
 : install-crash-handler
   ' handle-crash entry-to-execution-token wrap-signal-handler
diff --git a/premortem.txt b/premortem.txt
new file mode 100644
index 0000000..d3809b1
--- /dev/null
+++ b/premortem.txt
@@ -0,0 +1,107 @@
+ Programming Language Checklist
+by Colin McMillen, Jason Reed, and Elly Fong-Jones, 2011-10-10.
+filled out for Evocation by Irenes, 2026-06-05.
+
+You appear to be advocating a new:
+[ ] functional  [X] imperative  [ ] object-oriented  [ ] procedural [X] stack-based
+[ ] "multi-paradigm"  [ ] lazy  [X] eager  [ ] statically-typed  [ ] dynamically-typed
+[ ] pure  [X] impure  [X] non-hygienic  [ ] visual  [ ] beginner-friendly
+[X] non-programmer-friendly  [X] completely incomprehensible
+programming language.  Your language will not work.  Here is why it will not work.
+
+You appear to believe that:
+[X] Syntax is what makes programming difficult
+[ ] Garbage collection is free                [ ] Computers have infinite memory
+[X] Nobody really needs:
+    [X] concurrency  [ ] a REPL  [X] debugger support  [X] IDE support  [X] I/O
+    [ ] to interact with code not written in your language
+[ ] The entire world speaks 7-bit ASCII
+[X] Scaling up to large software projects will be easy
+[X] Convincing programmers to adopt a new language will be easy
+[ ] Convincing programmers to adopt a language-specific IDE will be easy
+[ ] Programmers love writing lots of boilerplate
+[X] Specifying behaviors as "undefined" means that programmers won't rely on them
+[X] "Spooky action at a distance" makes programming more fun
+
+Unfortunately, your language (has/lacks):
+[X] comprehensible syntax  [X] semicolons  [ ] significant whitespace  [ ] macros
+[ ] implicit type conversion  [ ] explicit casting  [ ] type inference
+[ ] goto  [ ] exceptions  [ ] closures  [ ] tail recursion  [ ] coroutines
+[ ] reflection  [ ] subtyping  [ ] multiple inheritance  [ ] operator overloading
+[ ] algebraic datatypes  [ ] recursive types  [ ] polymorphic types
+[ ] covariant array typing  [ ] monads  [ ] dependent types
+[ ] infix operators  [ ] nested comments  [ ] multi-line strings  [ ] regexes
+[ ] call-by-value  [ ] call-by-name  [ ] call-by-reference  [ ] call-cc
+
+The following philosophical objections apply:
+[ ] Programmers should not need to understand category theory to write "Hello, World!"
+[ ] Programmers should not develop RSI from writing "Hello, World!"
+[X] The most significant program written in your language is its own compiler
+[ ] The most significant program written in your language isn't even its own compiler
+[X] No language spec
+[ ] "The implementation is the spec"
+   [ ] The implementation is closed-source  [ ] covered by patents  [ ] not owned by you
+[ ] Your type system is unsound  [X] Your language cannot be unambiguously parsed
+   [ ] a proof of same is attached
+   [ ] invoking this proof crashes the compiler
+[ ] The name of your language makes it impossible to find on Google
+[X] Interpreted languages will never be as fast as C
+[X] Compiled languages will never be "extensible"
+[X] Writing a compiler that understands English is AI-complete
+[ ] Your language relies on an optimization which has never been shown possible
+[ ] There are less than 100 programmers on Earth smart enough to use your language
+[ ] ____________________________ takes exponential time
+[ ] ____________________________ is known to be undecidable
+
+Your implementation has the following flaws:
+[ ] CPUs do not work that way
+[X] RAM does not work that way
+[ ] VMs do not work that way
+[ ] Compilers do not work that way
+[ ] Compilers cannot work that way
+[ ] Shift-reduce conflicts in parsing seem to be resolved using rand()
+[X] You require the compiler to be present at runtime
+[X] You require the language runtime to be present at compile-time
+[ ] Your compiler errors are completely inscrutable
+[ ] Dangerous behavior is only a warning
+[ ] The compiler crashes if you look at it funny
+[ ] The VM crashes if you look at it funny
+[ ] You don't seem to understand basic optimization techniques
+[ ] You don't seem to understand basic systems programming
+[ ] You don't seem to understand pointers
+[ ] You don't seem to understand functions
+
+Additionally, your marketing has the following problems:
+[ ] Unsupported claims of increased productivity
+[ ] Unsupported claims of greater "ease of use"
+[ ] Obviously rigged benchmarks
+   [ ] Graphics, simulation, or crypto benchmarks where your code just calls
+       handwritten assembly through your FFI
+   [ ] String-processing benchmarks where you just call PCRE
+   [ ] Matrix-math benchmarks where you just call BLAS
+[X] Noone really believes that your language is faster than:
+    [X] assembly  [X] C  [ ] FORTRAN  [ ] Java  [ ] Ruby  [ ] Prolog
+[X] Rejection of orthodox programming-language theory without justification
+[ ] Rejection of orthodox systems programming without justification
+[ ] Rejection of orthodox algorithmic theory without justification
+[ ] Rejection of basic computer science without justification
+
+Taking the wider ecosystem into account, I would like to note that:
+[ ] Your complex sample code would be one line in: _______________________
+[X] We already have an unsafe imperative language
+[ ] We already have a safe imperative OO language
+[ ] We already have a safe statically-typed eager functional language
+[X] You have reinvented Lisp but worse
+[ ] You have reinvented Javascript but worse
+[ ] You have reinvented Java but worse
+[ ] You have reinvented C++ but worse
+[ ] You have reinvented PHP but worse
+[ ] You have reinvented PHP better, but that's still no justification
+[X] You have reinvented Brainfuck but non-ironically
+
+In conclusion, this is what I think of you:
+[ ] You have some interesting ideas, but this won't fly.
+[ ] This is a bad language, and you should feel bad for inventing it.
+[X] Programming in this language is an adequate punishment for inventing it.
+
+
diff --git a/quine.asm b/quine.asm
index 2df2d3a..f11b435 100644
--- a/quine.asm
+++ b/quine.asm
@@ -1883,7 +1883,7 @@ _start:
   ;;;   These will be the permanent homes of these values, though we have
   ;;; copies of them elsewhere while we're still in this routine.
   ;;;
-  mov.qreg.disp32.qreg rdi, control_stack_size + 0x00, rdi    ; heap
+  mov.qreg.disp32.qreg rdi, control_stack_size + 0x00, rdi    ; log
   mov.qreg.disp32.qreg rdi, control_stack_size + 0x08, rsp    ; s0
   mov.qreg.disp32.qreg rdi, control_stack_size + 0x10, rbp    ; r0
   mov.qreg.qimm rax, final_word_name
@@ -1891,9 +1891,14 @@ _start:
   lea.qreg.disp32.qreg rax, control_stack_size + 0x28, rdi
   mov.qreg.disp32.qreg rdi, control_stack_size + 0x20, rax    ; here
   ;;;
-  ;;; * "heap" is the physical bottom of the heap
-  ;;;     The heap grows upwards in memory, so this is also the logical
+  ;;; * "log" is the physical bottom of the log
+  ;;;     The log grows upwards in memory, so this is also the logical
   ;;;   bottom. This comes from the address mmap() just returned to us.
+  ;;;     The rest of quine.asm refers to the log as the heap. It's not a
+  ;;;   heap, but it used to be called that. The self-hosted version of
+  ;;;   Evocation has the fully revised and reconciled copy of all these
+  ;;;   comments, it just felt like unnecessary tedium to do that here as
+  ;;;   well.
   ;;; * "s0" is the logical bottom of the value stack
   ;;;     The value stack grows downwards in memory, so this is the physical
   ;;;   top of it. This comes from the stack pointer the kernel initialized us
@@ -2018,7 +2023,7 @@ cold_start:
   ;;;   This is the only hardcoding we need to do; by building on top of it,
   ;;; we will soon reach a point where the rest of the system can be defined
   ;;; within itself.
-  dq early_heap, litstring, "heap", early_variable
+  dq early_heap, litstring, "log", early_variable
   dq early_s0, litstring, "s0", early_variable
   dq early_r0, litstring, "r0", early_variable
   dq early_latest, litstring, "latest", early_variable
@@ -11966,7 +11971,7 @@ defword boot_source, 0x40
 
   ;   This use of bitwise and is okay because they're both either 0 or 1.
   ; We'll have logical and real soon now, be patient... :)
-  dq ": is-in-heap dup heap @ <= swap here @ > & ;                    "
+  dq ": is-in-heap dup log @ <= swap here @ > & ;                     "
 
 ;  dq ": unlink-pre-heap-words                                         "
 ;  dq "  latest @                                                      "
diff --git a/transform.e b/transform.e
index 76a192e..ebf493f 100644
--- a/transform.e
+++ b/transform.e
@@ -155,6 +155,45 @@
 ~
 ~   The log-load transformation and its alternates rely on the following
 ~ labels, all of which must be defined elsewhere: TODO
+~
+~
+~ About the hex transform
+~ ~~~~~~~~~~~~~~~~~~~~~~~
+~
+~   The hex transform's role is a bit different. Whereas the label and
+~ log-load transforms are used as part of generating an executable binary, the
+~ hex transform produces a commented hex dump that, when later processed by
+~ the "hex" tool, produces that same binary. It achieves this by modifying
+~ various words which are core parts of Evocation to keep track of appropriate
+~ comments and other metadata, and modifying the output facility to output a
+~ hex dump with any attached comments, rather than raw bytes.
+~
+~   By its nature, the hex transform needs to be able to cope with other
+~ transforms running inside itself; that is, the transformation facility needs
+~ to be reentrant. The other transforms don't have to cope with that, since
+~ the code they run is mostly just about creating word definitions, but the
+~ code given to the hex transform will normally include a call to label-loop,
+~ all the memory manipulation that happens as part of binary generation, and
+~ various output words.
+~
+~   While the label and log-load transforms allow Evocation to be a
+~ self-hosting compiler, the hex transform attains the even higher bar of
+~ making Evocation a "self-bootstrapping" compiler, able to create a running
+~ copy of itself without having any sort of pre-existing compiler at all, only
+~ a hex converter or similar small tool. The idea is that the hex converter is
+~ small and simple enough that it's easy to perform a BINARY audit on, and
+~ that there need be no other binary artifacts in the chain of trust.
+~
+~   This is not a new concept, but as far as Irenes are aware this name for it
+~ is a new coinage. The approach of using a hex-to-binary conversion tool as
+~ the initial bootstrapping stage is due to MesCC, which is quite inspiring in
+~ that regard.
+~
+~   The key insight is that, some sense, the difference between source code
+~ and binary is the ability to have comments.
+~
+~   The hex transform DOES NOT WORK yet. It's still in development.
+~ TODO update this note when it does work
 
 
 ~ Buffer- and address-management helpers
@@ -245,12 +284,14 @@
 : transformation-state-saved-latest 8 + ;
 : transformation-state-output-buffer-start 2 8 * + ;
 : transformation-state-user-stack-depth 3 8 * + ;
+: transformation-state-label-scratch 4 8 * + ;
 : allocate-transformation-state
-  4 8 * allocate
+  5 8 * allocate
   dup transformation-state-saved-here 0 swap !
   dup transformation-state-saved-latest 0 swap !
   dup transformation-state-output-buffer-start 0 swap !
-  dup transformation-state-user-stack-depth 0 swap ! ;
+  dup transformation-state-user-stack-depth 0 swap !
+  dup transformation-state-label-scratch 0 swap ! ;
 allocate-transformation-state s" transformation-state" variable
 
 
@@ -2547,3 +2588,579 @@ allocate-transformation-state s" transformation-state" variable
 
       exit } if } forever ;
 
+
+~ Hex transform implementation
+~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~
+~   The following code is all part of implementing the hex transform. For
+~ conceptual overview, see the top of this file.
+~
+~   The hex transform DOES NOT WORK yet. It's still in development.
+~ TODO update this note when it does work
+
+: hex-tilde-alternate [ ' ~ entry-to-execution-token , ]
+  ; make-immediate
+
+: hex-self-codeword-alternate self-codeword ;
+
+: hex-string-alternate
+  ~   See label-string-alternate for detailed notes on how we wrap s".
+  ~ Essentially, we call the immediate version of it, in the outer context,
+  ~ which uses scratch space in the outer, "real" log.
+  interpreter-flags @
+  ' s" entry-to-execution-token
+  swap-transform-variables
+  [ ' [ entry-to-execution-token , ]
+  execute
+  swap-transform-variables
+  swap interpreter-flags !
+
+  ~   Now we have a string pointer on the stack at transform time. If we're in
+  ~ immediate mode, that's sufficient. If we're in compile mode, output a
+  ~ litstring invocation. Notice also that these are essentially the same
+  ~ responsibilities as we'd have in the label transform.
+  interpreter-flags @ 0x01 & {
+    ~   We look up the inner version of litstring to reference here. This is
+    ~ similar to what the label transform does, except we don't use a label
+    ~ for it.
+    s" litstring" find entry-to-execution-token ,
+    here @ swap packstring 8 packalign here !
+  } if
+  ; make-immediate
+
+: hex-dot-string-alternate
+  ' hex-string-alternate entry-to-execution-token execute
+
+  interpreter-flags @ 0x01 & {
+    ~ We look up the inner version of emitstring, too.
+    s" emitstring" find entry-to-execution-token ,
+  } { emitstring } if-else
+  ; make-immediate
+
+: hex-create-alternate create ;
+
+~   This is a helper called from the patched version of "variable", described
+~ in more detail in hex-colon-alternate, below. It expects to be called after
+~ outputting the entry header for "variable", during the body of the
+~ definition, so that it can output compiled code which will run as part of
+~ the transformed "variable".
+~
+~   The helper accepts a string pointer giving a variable name. The code it
+~ produces checks the name of the variable being defined and, if the two names
+~ match, alters the resulting inner variable to point to the same backing
+~ store as the outer variable of the same name.
+~
+~   In many ways the hex transform is the trippiest one. To wit, there's two
+~ layers of compilation happening here... so don't get confused. When the
+~ helper is called, we're compiling the inner "variable", but "variable"
+~ itself is a word-defining word which also has the task of compilation...
+~ which we're modifying.
+~
+~ (name pointer --)
+: hex-variable-use-outer
+  ~   The actual payload here is that we check whether we're defining the
+  ~ word "interpreter-flags" and, if so, we make it reference the outer
+  ~ backing store instead of the inner one.
+  ~
+  ~   We want to do this all as references to inner words, which fortunately
+  ~ have been defined by now, but we have to do that a bit indirectly...
+  s" dup" find entry-to-execution-token ,
+  s" litstring" find entry-to-execution-token ,
+  here @ over packstring 8 packalign here !
+  s" stringcmp" find entry-to-execution-token ,
+  s" lit" find entry-to-execution-token , 0 ,
+  s" =" find entry-to-execution-token ,
+
+  ~   Also, we don't have high-level flow control yet, and even if we did,
+  ~ it would be awkward to use it here. So we count the branch by hand. Ah
+  ~ well. As always, remember that with forward branches, the offset to
+  ~ branch by is the first word to be skipped, and is included in the count.
+  s" 0branch" find entry-to-execution-token , 6 8 * ,
+
+  ~ If control reaches here in the generated code, the string matched.
+  s" swap" find entry-to-execution-token ,
+  s" drop" find entry-to-execution-token ,
+
+  ~   To get the value of the outer variable, we just call it. Of course,
+  ~ looking up an outer entry is a pain, but at least it's a pain in a way
+  ~ that should be familiar by now.
+  s" lit" find entry-to-execution-token ,
+  swap-transform-variables
+  find
+  swap-transform-variables
+  entry-to-execution-token execute ,
+
+  s" swap" find entry-to-execution-token , ;
+
+~   This "replacement" is a little different from an alternate: When the code
+~ under transformation attempts to compile its own version of sys-write, it
+~ gets a stub that calls this word instead. It's swapped out by
+~ hex-colon-alternate, whereas the regular alternates are swapped out by
+~ hex-transform-one.
+~ (length to write, base address --)
+: hex-sys-write-replacement
+  { over } {
+    dup 8@ .hex8 space
+    1+ swap 1- swap
+  } while ;
+
+~   By overriding colon, we can special-case the definitions of particular
+~ words. It's very metacircular.
+: hex-colon-alternate
+  word value@
+
+  ~   The word "variable" is itself a word-defining word, and we will
+  ~ special-case its definition to special-case the definitions of particular
+  ~ variables. It's very very metacircular.
+  dup s" variable" stringcmp 0 = {
+    ~   Don't lose track of the layering happening here. The word "variable"
+    ~ is a regular docol word; it's defining a word that's implemented in
+    ~ assembly, but it can use whatever Forthy logic it wants to do so. In
+    ~ this case we're going to have it run a little extra logic, then continue
+    ~ with the rest of its usual definition.
+
+    ~   Before we get to the extra logic, we do want an entry header for
+    ~ "variable" itself, so we do that... This takes care of all of colon's
+    ~ responsibilities except switching to compile mode; we'll do that part
+    ~ after we've output our payload.
+    create dropstring
+    s" docol" find entry-to-execution-token execute ,
+    make-hidden
+
+    ~   There's two variables that we want to point to the outer backing
+    ~ stores, rather than the inner ones. The code for that is a bit
+    ~ repetitive, so we have a helper for it; see above.
+    ~
+    ~   It is worth stopping to contemplate the meaning of sharing these two
+    ~ variables in particular between the inner and outer contexts.
+    ~ Essentially this says that they're both reading the same input stream,
+    ~ and the two copies of the interpreter both share the same state. Thus,
+    ~ trading off responsibility for lexing between inner and outer contexts
+    ~ works just like trading off responsibility between two interpreters
+    ~ when there's no transformation involved, or between a regular
+    ~ interpreter and a transform.
+    ~
+    ~   If we didn't do this, we'd still have to invent some way to control
+    ~ what input the inner context sees, and the concept of "the next
+    ~ character" would become more complex during the transform and require
+    ~ care and attention. Sharing this stuff keeps it simple.
+    s" interpreter-flags" hex-variable-use-outer
+    s" main-input-buffer" hex-variable-use-outer
+    ~   After this, we can return control to the regularly-scheduled
+    ~ "variable", which will do the "create" and all that. That stuff isn't
+    ~ colon's responsibility, so it isn't our responsibility, it'll happen
+    ~ regardless.
+
+    ~   Now we close out colon's responsibilities by switching to compile
+    ~ mode. We return from colon after this. The hex transform will continue
+    ~ by processing the source words that form the regular body of "variable",
+    ~ eventually hitting the matching semicolon. Our friendly tampering is
+    ~ now complete!
+    ' ] entry-to-execution-token execute
+    exit
+  } if
+
+  ~   Now we want to override s". As usual, that's the single most annoying
+  ~ string to quote, so we cheat.
+  dup ' s" entry-to-name stringcmp 0 = {
+    ~  Create the word header. It's a normal docol word, so that much is
+    ~ simple.
+    create dropstring
+    s" docol" find entry-to-execution-token execute ,
+    make-hidden
+
+    ~   This time around we would really rather just always use the alternate,
+    ~ which already untangles the layered nonsense. So we have the payload
+    ~ call the alternate directly, then exit. We could come up with a way to
+    ~ then skip forward in the code under transformation, but that would be
+    ~ complex, and it's unnecessary: We let it keep running, outputting the
+    ~ usual body of s", which we know will never be reached.
+    ' hex-string-alternate entry-to-execution-token ,
+    s" exit" find entry-to-execution-token ,
+
+    ~   As before, finish up colon's responsibilities, then return control to
+    ~ the code under transformation.
+    ' ] entry-to-execution-token execute
+    exit
+  } if
+
+  ~ Same deal for .". Hey, we're getting good at this!
+  dup ' ." entry-to-name stringcmp 0 = {
+    create dropstring
+    s" docol" find entry-to-execution-token execute ,
+    make-hidden
+
+    ' hex-dot-string-alternate entry-to-execution-token ,
+    s" exit" find entry-to-execution-token ,
+
+    ' ] entry-to-execution-token execute
+    exit
+  } if
+
+  ~   We want to suppress the behavior of relink-main-input-buffer-to-stdin
+  ~ entirely. Happily, that's easy. We need to do this because otherwise the
+  ~ transformed code will mess with our outer interpreter!
+  dup s" relink-main-input-buffer-to-stdin" stringcmp 0 = {
+    create dropstring
+    s" docol" find entry-to-execution-token execute ,
+    make-hidden
+
+    ~ Return before doing anything.
+    s" exit" find entry-to-execution-token ,
+
+    ' ] entry-to-execution-token execute
+    exit
+  } if
+
+  ~   We are entirely replacing sys-write with our own version. It's an
+  ~ assembly word, and we're replacing it with a Forth word, so there's some
+  ~ matching code in the ;asm alternate that makes sure to not mess that up.
+  dup s" sys-write" stringcmp 0 = {
+    create dropstring
+    s" docol" find entry-to-execution-token execute ,
+    make-hidden
+
+    ' hex-sys-write-replacement entry-to-execution-token ,
+    s" exit" find entry-to-execution-token ,
+
+    ' ] entry-to-execution-token execute
+    exit
+  } if
+
+  ~   If no special case matches, we fall back to just being a regular colon.
+  ~ We already read the word name above, so we have to do the rest of the
+  ~ steps ourselves as well.
+  create dropstring
+  s" docol" find entry-to-execution-token execute ,
+  make-hidden
+  ' ] entry-to-execution-token execute
+  ;
+
+: hex-semicolon-alternate [ ' ; entry-to-execution-token , ]
+  ; make-immediate
+
+: hex-semicolon-assembly-alternate
+  latest @ entry-to-name
+  dup s" sys-write" stringcmp 0 = {
+    ~   As detailed in hex-colon-alternate, above, sys-write is implemented in
+    ~ assembly but we replace it with Forth. This logic here collaborates with
+    ~ the logic there to make that work.
+    ~
+    ~   The transformed code will still have compiled some assembly, though it
+    ~ won't be reached and would crash, so we need to fix alignment before we
+    ~ call semicolon.
+    drop
+    here @ 8 packalign here !
+    [ ' ; entry-to-execution-token , ]
+    exit
+  } if
+  drop
+
+  ~ If no special case matches, we fall back to the regular behavior.
+  [ ' ;asm entry-to-execution-token , ]
+  ; make-immediate
+
+~   Because docol requires it, we provide a special mini-version of the label
+~ system. We only do L@' and L!', because that's all we need. Unlike with the
+~ label transform, these are NOT real labels; they're restricted similarly to
+~ how they are for the log-load transform.
+~
+~   Per the precedence rules in hex-transform-one, this version will only
+~ apply for immediate execution; compilation will use the inner label system
+~ instead.
+: hex-L@'-alternate
+  s" L@'" find dup {
+    entry-to-execution-token execute
+    exit
+  } { drop } if-else
+
+  word dropstring
+  transformation-state transformation-state-label-scratch @
+  ; make-immediate
+
+: hex-L!'-alternate
+  s" L!'" find dup {
+    entry-to-execution-token execute
+    exit
+  } { drop } if-else
+
+  word dropstring
+  transformation-state transformation-state-label-scratch !
+  ; make-immediate
+
+~   We have to provide alternates for the globals that are bootstrapped by
+~ warm-start in normal execution, because code under the hex transform never
+~ gets its own copy of warm-start. They don't have to do anything special,
+~ though, so we can just use the outer ones. Most of these work unmodified;
+~ for "latest" and "here" we invoke the outer copy of the word, but the actual
+~ value it sees is the wrapped, inner one, since we swapped that out
+~ elsewhere.
+: hex-log-alternate log ;
+: hex-s0-alternate s0 ;
+: hex-r0-alternate r0 ;
+: hex-latest-alternate latest ;
+: hex-here-alternate here ;
+
+~   This implements the hex transform for a single word. It is directly
+~ analogous to "interpret", and reading interpret.e may help in understanding
+~ it, though it's meant to still make sense on its own.
+~
+~   The hex transform runs code immediately. Whereas most transforms alter
+~ what the transformed code compiles into, the hex transform alters what it
+~ outputs. It's assumed that the output is a binary file of some sort; the
+~ binary is output as hexadecimal, interspersed with comments describing the
+~ code that produced it, including descriptions of what was executed, along
+~ with any comments from the original code.
+~
+~   The hex transform's alternates take priority over words defined under
+~ the transform when running immediately, but for compilation, words defined
+~ under the transform take precedence. In the event that neither an alternate
+~ nor an entry in the inner dictionary is found, the outer dictionary is
+~ checked; otherwise it's irrelevant.
+~
+~   The point of this precedence rule is that string literals, which require
+~ special treatment, will be using the inner implementation by the time it
+~ gets to actually generating an executable image. It's important that there
+~ not be any alternates altering the generated code, only the program actually
+~ being compiled. Other literal syntaxes, such as tick, don't have alternates
+~ at all (the hex transform manages to be more parsimonious with alternates
+~ than other transforms), and will always be using inner versions.
+~
+~   The precedence rule does fail to have the desired effect for label
+~ references. For these, the alternates take the unusual step of manually
+~ calling the inner versions when they exist.
+~ TODO think through whether the precedence rule is actually doing anything...
+~
+~ It expects to be called from "hex-transform", below, which loops.
+~
+~ (-- done)
+: hex-transform-one
+  word
+
+  ~ If no word was returned, exit.
+  dup 0 = { drop 0 exit } if
+
+  ~ The string is on the top of the stack, so to get a pointer to it we get
+  ~ the stack address.
+  ~ (string)
+  value@
+
+  ~ If it's the magic word, end the transformation.
+  dup s" pyrzqxgl" stringcmp 0 = { drop dropstring 1 exit } if
+
+  ~   Check whether it's one of the words we have alternates for, and look up
+  ~ the alternate if so.
+  0 swap
+  ~ (name as stack string, placeholder, name pointer)
+  dup s" ~" stringcmp 0 = { swap drop ' hex-tilde-alternate swap } if
+  dup s" self-codeword" stringcmp 0 = {
+    swap drop ' hex-self-codeword-alternate swap } if
+  ~ It is nontrivial to construct a string with a double-quote in it.
+  dup ' s" entry-to-name stringcmp 0 = {
+    swap drop ' hex-string-alternate swap } if
+  dup ' ." entry-to-name stringcmp 0 = {
+    swap drop ' hex-dot-string-alternate swap } if
+  dup s" create" stringcmp 0 = { swap drop ' hex-create-alternate swap } if
+  dup s" :" stringcmp 0 = { swap drop ' hex-colon-alternate swap } if
+  dup s" ;" stringcmp 0 = { swap drop ' hex-semicolon-alternate swap } if
+  dup s" ;asm" stringcmp 0 = {
+    swap drop ' hex-semicolon-assembly-alternate swap } if
+  dup s" L@'" stringcmp 0 = { swap drop ' hex-L@'-alternate swap } if
+  dup s" L!'" stringcmp 0 = { swap drop ' hex-L!'-alternate swap } if
+  dup s" log" stringcmp 0 = { swap drop ' hex-log-alternate swap } if
+  dup s" s0" stringcmp 0 = { swap drop ' hex-s0-alternate swap } if
+  dup s" r0" stringcmp 0 = { swap drop ' hex-r0-alternate swap } if
+  dup s" latest" stringcmp 0 = { swap drop ' hex-latest-alternate swap } if
+  dup s" here" stringcmp 0 = { swap drop ' hex-here-alternate swap } if
+  ~ (name as stack string, 0 or alternate entry pointer, name pointer)
+
+  find
+  ~ (stack string, 0 or alternate entry pointer, 0 or inner entry pointer)
+
+  interpreter-flags @ 0x01 & {
+    ~ We're in compile mode. An alternate immediate entry has the highest
+    ~ precedence; an inner entry of any kind has second-highest.
+    over {
+      ~ An alternate entry exists; check its flags.
+      over entry-flags@ 0x01 & {
+        ~   It's an immediate entry, so it has precedence, regardless of
+        ~ what's up with the inner entry. Execute it.
+        drop dropstring-with-result
+        entry-to-execution-token execute
+        0 exit
+      } {
+        ~ The alternate is not immediate, so check if there's an inner entry.
+        dup {
+          ~ There is also an inner entry. Check its flags.
+          dup entry-flags@ 0x01 & {
+            ~ The inner entry is immediate, so it has precedence. Execute it.
+            swap drop dropstring-with-result
+            entry-to-execution-token execute
+            0 exit
+          } {
+            ~   The inner entry is not immediate, so the alternate has
+            ~ precedence. Compile it.
+            ~
+            ~   Watch this space closely for correctness issues, it's a rare
+            ~ codepath.
+            drop dropstring-with-result
+            entry-to-execution-token ,
+            0 exit
+          } if-else
+        } {
+          ~ There's no inner entry. Compile the alternate.
+          ~
+          ~   This path, too, is rare and should get close scrutiny for
+          ~ correctness.
+          drop dropstring-with-result
+          entry-to-execution-token ,
+          0 exit
+        } if-else
+      } if-else
+    } {
+      ~ There is no alternate entry; check for an inner entry.
+      dup {
+        ~ An inner entry exists; check its flags.
+        dup entry-flags@ 0x01 & {
+          ~ It's an immediate entry. Execute it.
+          swap drop dropstring-with-result
+          entry-to-execution-token execute
+          0 exit
+        }  {
+          ~ It's not an immediate entry. Compile it.
+          swap drop dropstring-with-result
+          entry-to-execution-token ,
+          0 exit
+        } if-else
+      } if
+      ~ If we got here, there's no inner or alternate entry; fall through.
+    } if-else
+  } {
+    ~ We're in immediate mode. An alternate entry of any kind has precedence.
+    over {
+      ~ There's an alternate entry. Execute it.
+      drop dropstring-with-result
+      entry-to-execution-token execute
+      0 exit
+    } {
+      ~ There's no alternate entry. Check for an inner entry.
+      dup {
+        ~ An inner entry exists. Execute it.
+        swap drop dropstring-with-result
+        entry-to-execution-token execute
+        0 exit
+      } if
+      ~ If we got here, there's no inner or alternate; fall through.
+    } if-else
+  } if-else
+  drop drop
+  ~ (stack string)
+
+  ~   As a final fallback, we also check the outer dictionary, for immediate
+  ~ use only. This will allow things like assembly words to work.
+  value@
+  swap-transform-variables
+  find
+  swap-transform-variables
+  dup {
+    ~ There's an outer entry; check the mode.
+    interpreter-flags @ 0x01 & {
+      ~ We're in compile mode; check the outer entry's flags.
+      dup entry-flags@ 0x01 & {
+        ~ It's an immediate word; execute it.
+        dropstring-with-result
+        entry-to-execution-token
+        execute
+        0 exit
+      } {
+        ~   It's not an immediate word. Pretend it doesn't exist, and fall
+        ~ through.
+        drop
+      } if-else
+    } {
+      ~ We're in immediate mode. Execute the outer entry.
+      dropstring-with-result
+      entry-to-execution-token
+      execute
+      0 exit
+    } if-else
+  } {
+    ~ There's no outer entry. Fall through.
+    drop
+  } if-else
+  ~ (stack string)
+
+  ~ Check whether it's a number literal.
+  value@ read-integer 0 = {
+    ~ It's a number.
+    ~
+    ~ (name as stack string, integer value)
+    dropstring-with-result
+    ~ (integer value)
+
+    interpreter-flags @ 0x01 & {
+      ~ TODO this probably needs a lit alternate (!)
+      ' lit entry-to-execution-token , ,
+      0 exit
+    } if
+
+    0 exit
+  } if
+  ~ (stack string)
+
+  ." No such word: " value@ emitstring newline dropstring 0 ;
+
+
+~   This implements the hex transform for all words in a region given as
+~ an input string. It is directly analogous to "quit", in interpret.e, but is
+~ more complex.
+~
+~ (output buffer start, output point, input string pointer
+~  -- output buffer start, output point)
+: hex-transform
+  main-input-buffer dup push-input-buffer
+  ~ TODO the arguments for this seem to be backwards from the documentation
+  swap attach-string-to-input-buffer
+
+  ~   Save the old values of "here" and "latest", and set the initial values
+  ~ of the internal ones. These values need to persist across iterations,
+  ~ since client code will make its own updates to them and then rely on those
+  ~ updates having taken effect. So we do the swap just once, here outside the
+  ~ loop, and set it back when the loop ends.
+  ~
+  ~   We also take this opportunity to initialize the output-buffer-start and
+  ~ user-stack-depth fields of transformation-state.
+  here @ transformation-state transformation-state-saved-here !
+  latest @ transformation-state transformation-state-saved-latest !
+  over transformation-state transformation-state-output-buffer-start !
+  0 transformation-state transformation-state-user-stack-depth !
+  here !
+  0 latest !
+  ~ Now the stack has nothing of ours on it, so client code can do its thing.
+
+  ~   It's important that the stack has nothing of ours on it that persists
+  ~ across iterations, so that client code can add and remove stuff there as
+  ~ it sees fit.
+  { hex-transform-one
+    ~ (done)
+
+    ~  When the loop is done, get the real values of "here" and "latest"
+    ~ back. The internal "here" is also the output point, and will become our
+    ~ return value. The internal "latest" is discarded.
+    { here @
+      transformation-state transformation-state-saved-here @ here !
+      transformation-state transformation-state-saved-latest @ latest !
+      ~ (output point)
+
+      ~   Though we don't actually use transformation-state outside of this
+      ~ invocation, for tidiness we zero it out.
+      0 transformation-state transformation-state-saved-here !
+      0 transformation-state transformation-state-saved-latest !
+      0 transformation-state transformation-state-output-buffer-start !
+      0 transformation-state transformation-state-user-stack-depth !
+
+      ~  Also put the input source back how it was.
+      main-input-buffer pop-input-buffer
+
+      exit } if } forever ;
+