summary refs log tree commit diff
diff options
context:
space:
mode:
authorIrene Knapp <ireneista@irenes.space>2026-04-08 19:45:06 -0700
committerIrene Knapp <ireneista@irenes.space>2026-04-08 20:15:17 -0700
commit2c9a8635792f6c4bdad0a18cadaa3c1a7fcf9502 (patch)
tree0db0d919886963d9a016b53f0290d9f18bbb537d
parent3eb34e894de079234ca7c8b764c390d6471c4248 (diff)
reorder things to better separate "early" stuff
Change-Id: Ieba5e3fb7eca5be769cbf798eac0b91e31a1fe75
Force-Push: yes
-rw-r--r--quine.asm440
1 files changed, 230 insertions, 210 deletions
diff --git a/quine.asm b/quine.asm
index e16471e..ed1d277 100644
--- a/quine.asm
+++ b/quine.asm
@@ -2750,50 +2750,22 @@ cold_start:
   ;;;   jmp_abs_indirect_reg64, jmp_rel_imm8, jmp_rel_imm32,
   ;;;   syscall, hlt
   ;;;     basics plus assembly helpers
-  ;;; TODO this next stuff is way more interleaved than it needs to be
-  ;;; Forth, not needed on heap:
-  ;;;   early_heap, early_s0, early_r0, early_latest, early_here
-  ;;;   early_find
-  ;;;     find_in
-  ;;; Forth, needed on heap:
-  ;;;   find_in
-  ;;; Forth, not needed on heap:
-  ;;;   early_next_newer_entry
-  ;;;     next_newer_entry_in
-  ;;; Forth, needed on heap:
-  ;;;   next_newer_entry_in,
-  ;;;   entry_to_execution_token,
-  ;;;   fetch_entry_flags,
-  ;;;   store_entry_flags,
+  ;;;   pack_next, pack_beforenext, pack_pushcontrol, pack_popcontrol,
+  ;;;     basics, assembly stuff
+  ;;;   entry_to_execution_token, execution_token_to_entry,
+  ;;;   fetch_entry_flags, store_entry_flags,
   ;;;   entry_to_name,
+  ;;;   find_in, next_newer_entry_in, guess_entry_end_in,
+  ;;;   show_hex_between, show_source_between, show_source_or_hex_between,
+  ;;;     some interdependencies here but it's all topologically sorted
   ;;; Forth, not needed on heap:
-  ;;;   early_guess_entry_end
-  ;;; Forth, needed on heap:
-  ;;;   guess_entry_end_in
-  ;;;   execution_token_to_entry
-  ;;; Forth, not needed on heap:
-  ;;;   early_show_source
-  ;;; Forth, needed on heap:
-  ;;;   show_source_between
-  ;;; Forth, not needed on heap:
-  ;;;   early_show_hex
-  ;;; Forth, needed on heap:
-  ;;;   show_hex_between
-  ;;; Forth, not needed on heap:
-  ;;;   early_show_source_or_hex
-  ;;; Forth, needed on heap:
-  ;;;   show_source_or_hex_between
-  ;;; Forth, not needed on heap:
-  ;;;   early_describe
-  ;;;   early_describe_all
-  ;;; Forth, needed on heap:
-  ;;; Forth, not needed on heap:
+  ;;;   early_heap, early_s0, early_r0, early_latest, early_here
+  ;;;   early_find, early_next_newer_entry, early_guess_entry_end,
+  ;;;   early_show_hex, early_show_source, early_show_source_or_hex
+  ;;;   early_describe, early_describe_all,
+  ;;;     TODO there should really be non-early versions of these two
   ;;;   early_create, early_comma, early_self_codeword, early_docol_codeword,
-  ;;;   early_here_store,
-  ;;; Forth, needed on heap:
-  ;;;   pack_next, pack_beforenext, pack_pushcontrol, pack_popcontrol,
-  ;;; Forth, not needed on heap:
-  ;;;   early_variable
+  ;;;   early_here_store, early_variable
   ;;;
   ;;; It's likely that nothing past this point is required for the heap copy,
   ;;; but it's here for completeness.
@@ -5226,80 +5198,64 @@ defword hlt, 0
 ;;; to minimize code duplication. Since they do have slightly different
 ;;; interfaces, we prefix the names of the early ones with "early_".
 
+;   First, we have some words that follow the pack* idioms and are used to
+; build specific assembly-based constructs needed in word implementations.
+; You can think of them as macros. They are in a sense reimplementations of
+; their flatassembler equivalents, which are far, far above, and have more
+; documentation.
+;
 ; Stack in:
-;   heap address
-; Stack out:
-;   heap address
-;   requested variable address
-defword early_heap, 0
-  dq docol, dup, lit, control_stack_size, add, exit
-defword early_s0, 0
-  dq docol, early_heap, lit, 8, add, exit
-defword early_r0, 0
-  dq docol, early_heap, lit, 16, add, exit
-defword early_latest, 0
-  dq docol, early_heap, lit, 24, add, exit
-defword early_here, 0
-  dq docol, early_heap, lit, 32, add, exit
-
-; Stack in:
-;   heap address
-;   name string to find
+;   base address
 ; Stack out:
-;   heap address
-;   execution token or 0
-defword early_find, 0
-  dq docol, swap, early_latest, fetch, swap, unroll3, swap, find_in, exit
+;   new base address
+defword pack_next, 0
+  dq docol, lods64, rax, jmp_abs_indirect_reg64, exit
 
+;   This is another helper "macro" that we'll use in defining assembly words
+; from Forth. As before, see the flatassembler version for more explanation.
+;
 ; Stack in:
-;   dictionary to search within
-;   name string to find
+;   base address
+;   destination address (absolute)
 ; Stack out:
-;   execution token or 0
-defword find_in, 0
-  dq docol
-  ; It will be more convenient to have the dictionary pointer on top.
-  dq swap
-  ; If the dictionary pointer is null, exit.
-  dq dup, lit, 0, eq, zbranch, 4*8, swap, drop, exit
-  ; Test whether this entry is a match.
-  dq dup2, lit, 10, add, stringcmp, zbranch, 4*8
-  ; If we're here, it's not a match; traverse the pointer and repeat.
-  dq fetch, branch, -18*8
-  ; If we're here, it's a match. Clean up our working state and exit.
-  dq swap, drop, exit
+;   new base address
+defword pack_beforenext, 0
+  dq docol, rax, mov_reg64_imm64, rax, jmp_abs_indirect_reg64, exit
 
+;   This is another helper "macro" that we'll use in defining assembly words
+; from Forth. In particular, this one is used in docol. As before, see the
+; flatassembler version for more explanation.
+;
 ; Stack in:
-;   heap address
-;   entry address
+;   base address
+;   source register keyword
 ; Stack out:
-;   heap address
-;   entry address or 0
-defword early_next_newer_entry, 0
+;   new base address
+defword pack_pushcontrol, 0
   dq docol
-  dq swap, early_latest, fetch, swap, unroll3, swap, next_newer_entry_in
+  dq swap, rbp, lit, -8, rbp, lea_reg64_disp8_reg64, swap
+  dq rbp, lit, 0, mov_disp8_reg64_reg64
   dq exit
 
-; This returns zero if the entry isn't found at all.
+;   This is another helper "macro" that we'll use in defining assembly words
+; from Forth. In particular, this one is used in "exit". See the flatassembler
+; version for more explanation.
 ;
 ; Stack in:
-;   dictionary to search within
-;   entry address
+;   base address
+;   target register keyword
 ; Stack out:
-;   entry address or 0
-defword next_newer_entry_in, 0
+;   new base address
+defword pack_popcontrol, 0
   dq docol
-  ; Dictionary pointer on top
-  dq swap
-  ; Exit if null
-  dq dup, lit, 0, eq, zbranch, 4*8, swap, drop, exit
-  ; Test if it's a match
-  dq dup2, fetch, ne, zbranch, 4*8
-  ; Non-match case; loop
-  dq fetch, branch, -16*8
-  ; Match case; return
-  dq swap, drop, exit
+  dq rbp, lit, 0, roll3, mov_reg64_disp8_reg64
+  dq rbp, lit, 8, rbp, lea_reg64_disp8_reg64
+  dq exit
 
+;   Now, we have a bunch of words that are used for traversing the Forth
+; core data structures that describe words. First, we have a couple that
+; relate to individual words and their pieces...
+;
 ;   Jonesforth calls this "TFCA" and ">CFA"; its author speculates that the
 ; original meaning is "code field address".
 defword entry_to_execution_token, 0
@@ -5314,6 +5270,17 @@ defword entry_to_execution_token, 0
   dq lit, 7, invert, and
   dq exit
 
+;   Jonesforth calls this "CFA>". Jonesforth's implementation searches the
+; entire dictionary, since its word header format isn't designed to be
+; traversed in reverse, but ours is, so it should be fast.
+defword execution_token_to_entry, 0
+  dq docol
+  dq lit, 1, sub
+  dq dup, reverse_padding_len, sub
+  dq dup, reverse_stringlen, sub
+  dq lit, 9, sub
+  dq exit
+
 ; Stack in:
 ;   entry address
 ; Stack out:
@@ -5340,23 +5307,57 @@ defword entry_to_name, 0
   dq lit, 10, add
   dq exit
 
-; This doesn't work on the entry at the end of the ELF .text segment. It does
-; work on everything else.
+;   Now, having finished with individual words, we have a bunch of stuff that
+; traverses the overall dictionary structure, which is formed by a linked list
+; using the pointer that each word starts with.
+;
+;   This one is the backend for early_find_in; it will eventually also be the
+; backend for a non-early version.
+;
 ; Stack in:
-;   heap address
+;   dictionary to search within
+;   name string to find
+; Stack out:
+;   execution token or 0
+defword find_in, 0
+  dq docol
+  ; It will be more convenient to have the dictionary pointer on top.
+  dq swap
+  ; If the dictionary pointer is null, exit.
+  dq dup, lit, 0, eq, zbranch, 4*8, swap, drop, exit
+  ; Test whether this entry is a match.
+  dq dup2, lit, 10, add, stringcmp, zbranch, 4*8
+  ; If we're here, it's not a match; traverse the pointer and repeat.
+  dq fetch, branch, -18*8
+  ; If we're here, it's a match. Clean up our working state and exit.
+  dq swap, drop, exit
+
+;   This is the backend for early_next_newer_entry_in; it will eventually also
+; be the backend for a non-early version.
+;
+;   This returns zero if the entry isn't found at all.
+;
+; Stack in:
+;   dictionary to search within
 ;   entry address
 ; Stack out:
-;   heap address
-;   guessed entry end address (first byte that's not part of it)
-defword early_guess_entry_end, 0
+;   entry address or 0
+defword next_newer_entry_in, 0
   dq docol
-  dq swap, early_here, fetch, swap, early_latest, fetch, swap
-  ; (entry, here, latest, heap)
-  dq lit, 4, unroll, roll3
-  ; (heap, here, latest, entry)
-  dq guess_entry_end_in
-  dq exit
+  ; Dictionary pointer on top
+  dq swap
+  ; Exit if null
+  dq dup, lit, 0, eq, zbranch, 4*8, swap, drop, exit
+  ; Test if it's a match
+  dq dup2, fetch, ne, zbranch, 4*8
+  ; Non-match case; loop
+  dq fetch, branch, -16*8
+  ; Match case; return
+  dq swap, drop, exit
 
+;   This is the backend for early_guess_entry_end; it will eventually also be
+; the backend for a non-early version.
+;
 ; Stack in:
 ;   "here" value
 ;   dictionary to search within
@@ -5389,29 +5390,32 @@ defword guess_entry_end_in, 0
   ; This is the branch where we found it. Return the next entry address.
   dq swap, drop, swap, drop, exit
 
-;   Jonesforth calls this "CFA>". Jonesforth's implementation searches the
-; entire dictionary, since its word header format isn't designed to be
-; traversed in reverse, but ours is, so it should be fast.
-defword execution_token_to_entry, 0
-  dq docol
-  dq lit, 1, sub
-  dq dup, reverse_padding_len, sub
-  dq dup, reverse_stringlen, sub
-  dq lit, 9, sub
-  dq exit
-
+;   That's it for the code that traverses the dictionary; now we have some
+; code meant for use in debugging, which prints out the contents of individual
+; words' bodies.
+;
+;   This one is the backend for early_show_hex; it will eventually also be the
+; backend for a non-early version.
+;
 ; Stack in:
-;   heap address
 ;   entry address
-; Stack out:
-;   heap address
-defword early_show_source, 0
+;   end address
+defword show_hex_between, 0
   dq docol
-  dq dup, unroll3, early_guess_entry_end, swap, unroll3
-  ; (heap address, entry address, end address)
-  dq show_source_between
-  dq exit
+  dq swap, entry_to_execution_token, lit, 8, add
+  ; (end address, current address)
+  dq dup2, ge, zbranch, 4*8, drop, drop, exit
+  dq dup, fetch, dothex64, litstring, " ", emitstring
+  dq lit, 8, add
+  dq branch, -17*8
 
+;   This one "decompiles" a Forth word, printing the names of the codewords it
+; consists of. It also understands all the built-in literal-related words, and
+; serves as an example of how to do that.
+;
+;   This is the backend for early_show_source; it will eventually also be the
+; backend for a non-early version.
+;
 ; Stack in:
 ;   entry address
 ;   end address
@@ -5470,6 +5474,94 @@ defword show_source_between, 0
 
   dq branch, -120*8
 
+;   This is the backend for early_show_source_or_hex; it will eventually also
+; be the backend for a non-early version.
+;
+; Stack in:
+;   entry address
+;   end address
+defword show_source_or_hex_between, 0
+  dq docol
+
+  ; If it's zero-length, don't try to print its contents.
+  dq dup2, eq, zbranch, 2*8, exit
+
+  dq swap, dup, unroll3, swap, roll3
+  ; (entry address, end address, entry address)
+  dq entry_to_execution_token, fetch, lit, docol, eq, zbranch, 3*8
+  dq show_source_between, exit
+  dq show_hex_between, exit
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;; Heap bootstrapping ;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;
+;;;   Now, we have a little more work to do before we can bootstrap the heap.
+;;; We have all these early_* words which rely on being told explicitly where
+;;; the heap is, most of them pertaining to defining words at runtime. Once
+;;; the heap exists, these won't be needed, so we never actually copy them to
+;;; the heap. To make sure that's easy to keep track of, they're all together
+;;; at the end here.
+;;;
+;;;   The goal is that none of these do any particularly interesting work,
+;;; instead deferring to implementations defined in other sections that will
+;;; be re-used by the later, fully-bootstrapped stuff. At the moment, some of
+;;; them still do interesting things; it's a work in progress.
+
+; Stack in:
+;   heap address
+; Stack out:
+;   heap address
+;   requested variable address
+defword early_heap, 0
+  dq docol, dup, lit, control_stack_size, add, exit
+defword early_s0, 0
+  dq docol, early_heap, lit, 8, add, exit
+defword early_r0, 0
+  dq docol, early_heap, lit, 16, add, exit
+defword early_latest, 0
+  dq docol, early_heap, lit, 24, add, exit
+defword early_here, 0
+  dq docol, early_heap, lit, 32, add, exit
+
+; Stack in:
+;   heap address
+;   name string to find
+; Stack out:
+;   heap address
+;   execution token or 0
+defword early_find, 0
+  dq docol, swap, early_latest, fetch, swap, unroll3, swap, find_in, exit
+
+; Stack in:
+;   heap address
+;   entry address
+; Stack out:
+;   heap address
+;   entry address or 0
+defword early_next_newer_entry, 0
+  dq docol
+  dq swap, early_latest, fetch, swap, unroll3, swap, next_newer_entry_in
+  dq exit
+
+; This doesn't work on the entry at the end of the ELF .text segment. It does
+; work on everything else.
+; Stack in:
+;   heap address
+;   entry address
+; Stack out:
+;   heap address
+;   guessed entry end address (first byte that's not part of it)
+defword early_guess_entry_end, 0
+  dq docol
+  dq swap, early_here, fetch, swap, early_latest, fetch, swap
+  ; (entry, here, latest, heap)
+  dq lit, 4, unroll, roll3
+  ; (heap, here, latest, entry)
+  dq guess_entry_end_in
+  dq exit
+
 ; Stack in:
 ;   heap address
 ;   entry address
@@ -5483,16 +5575,16 @@ defword early_show_hex, 0
   dq exit
 
 ; Stack in:
+;   heap address
 ;   entry address
-;   end address
-defword show_hex_between, 0
+; Stack out:
+;   heap address
+defword early_show_source, 0
   dq docol
-  dq swap, entry_to_execution_token, lit, 8, add
-  ; (end address, current address)
-  dq dup2, ge, zbranch, 4*8, drop, drop, exit
-  dq dup, fetch, dothex64, litstring, " ", emitstring
-  dq lit, 8, add
-  dq branch, -17*8
+  dq dup, unroll3, early_guess_entry_end, swap, unroll3
+  ; (heap address, entry address, end address)
+  dq show_source_between
+  dq exit
 
 ; Stack in:
 ;   heap address
@@ -5507,21 +5599,6 @@ defword early_show_source_or_hex, 0
   dq exit
 
 ; Stack in:
-;   entry address
-;   end address
-defword show_source_or_hex_between, 0
-  dq docol
-
-  ; If it's zero-length, don't try to print its contents.
-  dq dup2, eq, zbranch, 2*8, exit
-
-  dq swap, dup, unroll3, swap, roll3
-  ; (entry address, end address, entry address)
-  dq entry_to_execution_token, fetch, lit, docol, eq, zbranch, 3*8
-  dq show_source_between, exit
-  dq show_hex_between, exit
-
-; Stack in:
 ;   heap address
 ;   entry address
 ; Stack out:
@@ -5550,7 +5627,6 @@ defword early_describe_all, 0
   dq branch, -13*8
   dq exit
 
-
 ;   Allocate space by incrementing "here", and output a word header in it.
 ; Also add it to the "latest" linked list. Use zero as the flag values;
 ; callers that want something else can do that themselves.
@@ -5631,62 +5707,6 @@ defword early_docol_codeword, 0
 defword early_here_store, 0
   dq docol, swap, early_here, swap, unroll3, store, exit
 
-;   Notice that we've switched over to stuff that follows the pack* idioms.
-;
-;   This is a helper "macro" that we'll use in defining assembly words from
-; Forth. This is in a sense a redefinition of it; the flatassembler version of
-; it is far, far above, and has more documentation.
-;
-; Stack in:
-;   base address
-; Stack out:
-;   new base address
-defword pack_next, 0
-  dq docol, lods64, rax, jmp_abs_indirect_reg64, exit
-
-;   This is another helper "macro" that we'll use in defining assembly words
-; from Forth. As before, see the flatassembler version for more explanation.
-;
-; Stack in:
-;   base address
-;   destination address (absolute)
-; Stack out:
-;   new base address
-defword pack_beforenext, 0
-  dq docol, rax, mov_reg64_imm64, rax, jmp_abs_indirect_reg64, exit
-
-;   This is another helper "macro" that we'll use in defining assembly words
-; from Forth. In particular, this one is used in docol. As before, see the
-; flatassembler version for more explanation.
-;
-; Stack in:
-;   base address
-;   source register keyword
-; Stack out:
-;   new base address
-defword pack_pushcontrol, 0
-  dq docol
-  dq swap, rbp, lit, -8, rbp, lea_reg64_disp8_reg64, swap
-  dq rbp, lit, 0, mov_disp8_reg64_reg64
-  dq exit
-
-;   This is another helper "macro" that we'll use in defining assembly words
-; from Forth. In particular, this one is used in "exit". See the flatassembler
-; version for more explanation.
-;
-; Stack in:
-;   base address
-;   target register keyword
-; Stack out:
-;   new base address
-defword pack_popcontrol, 0
-  dq docol
-  dq rbp, lit, 0, roll3, mov_reg64_disp8_reg64
-  dq rbp, lit, 8, rbp, lea_reg64_disp8_reg64
-  dq exit
-
-;   Now we're back to heap idioms again.
-;
 ; Stack in:
 ;   heap address
 ;   address for new variable word to return