diff options
| author | Irene Knapp <ireneista@irenes.space> | 2026-04-08 19:45:06 -0700 |
|---|---|---|
| committer | Irene Knapp <ireneista@irenes.space> | 2026-04-08 20:15:17 -0700 |
| commit | 2c9a8635792f6c4bdad0a18cadaa3c1a7fcf9502 (patch) | |
| tree | 0db0d919886963d9a016b53f0290d9f18bbb537d | |
| parent | 3eb34e894de079234ca7c8b764c390d6471c4248 (diff) | |
reorder things to better separate "early" stuff
Change-Id: Ieba5e3fb7eca5be769cbf798eac0b91e31a1fe75 Force-Push: yes
| -rw-r--r-- | quine.asm | 440 |
1 files changed, 230 insertions, 210 deletions
diff --git a/quine.asm b/quine.asm index e16471e..ed1d277 100644 --- a/quine.asm +++ b/quine.asm @@ -2750,50 +2750,22 @@ cold_start: ;;; jmp_abs_indirect_reg64, jmp_rel_imm8, jmp_rel_imm32, ;;; syscall, hlt ;;; basics plus assembly helpers - ;;; TODO this next stuff is way more interleaved than it needs to be - ;;; Forth, not needed on heap: - ;;; early_heap, early_s0, early_r0, early_latest, early_here - ;;; early_find - ;;; find_in - ;;; Forth, needed on heap: - ;;; find_in - ;;; Forth, not needed on heap: - ;;; early_next_newer_entry - ;;; next_newer_entry_in - ;;; Forth, needed on heap: - ;;; next_newer_entry_in, - ;;; entry_to_execution_token, - ;;; fetch_entry_flags, - ;;; store_entry_flags, + ;;; pack_next, pack_beforenext, pack_pushcontrol, pack_popcontrol, + ;;; basics, assembly stuff + ;;; entry_to_execution_token, execution_token_to_entry, + ;;; fetch_entry_flags, store_entry_flags, ;;; entry_to_name, + ;;; find_in, next_newer_entry_in, guess_entry_end_in, + ;;; show_hex_between, show_source_between, show_source_or_hex_between, + ;;; some interdependencies here but it's all topologically sorted ;;; Forth, not needed on heap: - ;;; early_guess_entry_end - ;;; Forth, needed on heap: - ;;; guess_entry_end_in - ;;; execution_token_to_entry - ;;; Forth, not needed on heap: - ;;; early_show_source - ;;; Forth, needed on heap: - ;;; show_source_between - ;;; Forth, not needed on heap: - ;;; early_show_hex - ;;; Forth, needed on heap: - ;;; show_hex_between - ;;; Forth, not needed on heap: - ;;; early_show_source_or_hex - ;;; Forth, needed on heap: - ;;; show_source_or_hex_between - ;;; Forth, not needed on heap: - ;;; early_describe - ;;; early_describe_all - ;;; Forth, needed on heap: - ;;; Forth, not needed on heap: + ;;; early_heap, early_s0, early_r0, early_latest, early_here + ;;; early_find, early_next_newer_entry, early_guess_entry_end, + ;;; early_show_hex, early_show_source, early_show_source_or_hex + ;;; early_describe, early_describe_all, + ;;; TODO there should really be non-early versions of these two ;;; early_create, early_comma, early_self_codeword, early_docol_codeword, - ;;; early_here_store, - ;;; Forth, needed on heap: - ;;; pack_next, pack_beforenext, pack_pushcontrol, pack_popcontrol, - ;;; Forth, not needed on heap: - ;;; early_variable + ;;; early_here_store, early_variable ;;; ;;; It's likely that nothing past this point is required for the heap copy, ;;; but it's here for completeness. @@ -5226,80 +5198,64 @@ defword hlt, 0 ;;; to minimize code duplication. Since they do have slightly different ;;; interfaces, we prefix the names of the early ones with "early_". +; First, we have some words that follow the pack* idioms and are used to +; build specific assembly-based constructs needed in word implementations. +; You can think of them as macros. They are in a sense reimplementations of +; their flatassembler equivalents, which are far, far above, and have more +; documentation. +; ; Stack in: -; heap address -; Stack out: -; heap address -; requested variable address -defword early_heap, 0 - dq docol, dup, lit, control_stack_size, add, exit -defword early_s0, 0 - dq docol, early_heap, lit, 8, add, exit -defword early_r0, 0 - dq docol, early_heap, lit, 16, add, exit -defword early_latest, 0 - dq docol, early_heap, lit, 24, add, exit -defword early_here, 0 - dq docol, early_heap, lit, 32, add, exit - -; Stack in: -; heap address -; name string to find +; base address ; Stack out: -; heap address -; execution token or 0 -defword early_find, 0 - dq docol, swap, early_latest, fetch, swap, unroll3, swap, find_in, exit +; new base address +defword pack_next, 0 + dq docol, lods64, rax, jmp_abs_indirect_reg64, exit +; This is another helper "macro" that we'll use in defining assembly words +; from Forth. As before, see the flatassembler version for more explanation. +; ; Stack in: -; dictionary to search within -; name string to find +; base address +; destination address (absolute) ; Stack out: -; execution token or 0 -defword find_in, 0 - dq docol - ; It will be more convenient to have the dictionary pointer on top. - dq swap - ; If the dictionary pointer is null, exit. - dq dup, lit, 0, eq, zbranch, 4*8, swap, drop, exit - ; Test whether this entry is a match. - dq dup2, lit, 10, add, stringcmp, zbranch, 4*8 - ; If we're here, it's not a match; traverse the pointer and repeat. - dq fetch, branch, -18*8 - ; If we're here, it's a match. Clean up our working state and exit. - dq swap, drop, exit +; new base address +defword pack_beforenext, 0 + dq docol, rax, mov_reg64_imm64, rax, jmp_abs_indirect_reg64, exit +; This is another helper "macro" that we'll use in defining assembly words +; from Forth. In particular, this one is used in docol. As before, see the +; flatassembler version for more explanation. +; ; Stack in: -; heap address -; entry address +; base address +; source register keyword ; Stack out: -; heap address -; entry address or 0 -defword early_next_newer_entry, 0 +; new base address +defword pack_pushcontrol, 0 dq docol - dq swap, early_latest, fetch, swap, unroll3, swap, next_newer_entry_in + dq swap, rbp, lit, -8, rbp, lea_reg64_disp8_reg64, swap + dq rbp, lit, 0, mov_disp8_reg64_reg64 dq exit -; This returns zero if the entry isn't found at all. +; This is another helper "macro" that we'll use in defining assembly words +; from Forth. In particular, this one is used in "exit". See the flatassembler +; version for more explanation. ; ; Stack in: -; dictionary to search within -; entry address +; base address +; target register keyword ; Stack out: -; entry address or 0 -defword next_newer_entry_in, 0 +; new base address +defword pack_popcontrol, 0 dq docol - ; Dictionary pointer on top - dq swap - ; Exit if null - dq dup, lit, 0, eq, zbranch, 4*8, swap, drop, exit - ; Test if it's a match - dq dup2, fetch, ne, zbranch, 4*8 - ; Non-match case; loop - dq fetch, branch, -16*8 - ; Match case; return - dq swap, drop, exit + dq rbp, lit, 0, roll3, mov_reg64_disp8_reg64 + dq rbp, lit, 8, rbp, lea_reg64_disp8_reg64 + dq exit +; Now, we have a bunch of words that are used for traversing the Forth +; core data structures that describe words. First, we have a couple that +; relate to individual words and their pieces... +; ; Jonesforth calls this "TFCA" and ">CFA"; its author speculates that the ; original meaning is "code field address". defword entry_to_execution_token, 0 @@ -5314,6 +5270,17 @@ defword entry_to_execution_token, 0 dq lit, 7, invert, and dq exit +; Jonesforth calls this "CFA>". Jonesforth's implementation searches the +; entire dictionary, since its word header format isn't designed to be +; traversed in reverse, but ours is, so it should be fast. +defword execution_token_to_entry, 0 + dq docol + dq lit, 1, sub + dq dup, reverse_padding_len, sub + dq dup, reverse_stringlen, sub + dq lit, 9, sub + dq exit + ; Stack in: ; entry address ; Stack out: @@ -5340,23 +5307,57 @@ defword entry_to_name, 0 dq lit, 10, add dq exit -; This doesn't work on the entry at the end of the ELF .text segment. It does -; work on everything else. +; Now, having finished with individual words, we have a bunch of stuff that +; traverses the overall dictionary structure, which is formed by a linked list +; using the pointer that each word starts with. +; +; This one is the backend for early_find_in; it will eventually also be the +; backend for a non-early version. +; ; Stack in: -; heap address +; dictionary to search within +; name string to find +; Stack out: +; execution token or 0 +defword find_in, 0 + dq docol + ; It will be more convenient to have the dictionary pointer on top. + dq swap + ; If the dictionary pointer is null, exit. + dq dup, lit, 0, eq, zbranch, 4*8, swap, drop, exit + ; Test whether this entry is a match. + dq dup2, lit, 10, add, stringcmp, zbranch, 4*8 + ; If we're here, it's not a match; traverse the pointer and repeat. + dq fetch, branch, -18*8 + ; If we're here, it's a match. Clean up our working state and exit. + dq swap, drop, exit + +; This is the backend for early_next_newer_entry_in; it will eventually also +; be the backend for a non-early version. +; +; This returns zero if the entry isn't found at all. +; +; Stack in: +; dictionary to search within ; entry address ; Stack out: -; heap address -; guessed entry end address (first byte that's not part of it) -defword early_guess_entry_end, 0 +; entry address or 0 +defword next_newer_entry_in, 0 dq docol - dq swap, early_here, fetch, swap, early_latest, fetch, swap - ; (entry, here, latest, heap) - dq lit, 4, unroll, roll3 - ; (heap, here, latest, entry) - dq guess_entry_end_in - dq exit + ; Dictionary pointer on top + dq swap + ; Exit if null + dq dup, lit, 0, eq, zbranch, 4*8, swap, drop, exit + ; Test if it's a match + dq dup2, fetch, ne, zbranch, 4*8 + ; Non-match case; loop + dq fetch, branch, -16*8 + ; Match case; return + dq swap, drop, exit +; This is the backend for early_guess_entry_end; it will eventually also be +; the backend for a non-early version. +; ; Stack in: ; "here" value ; dictionary to search within @@ -5389,29 +5390,32 @@ defword guess_entry_end_in, 0 ; This is the branch where we found it. Return the next entry address. dq swap, drop, swap, drop, exit -; Jonesforth calls this "CFA>". Jonesforth's implementation searches the -; entire dictionary, since its word header format isn't designed to be -; traversed in reverse, but ours is, so it should be fast. -defword execution_token_to_entry, 0 - dq docol - dq lit, 1, sub - dq dup, reverse_padding_len, sub - dq dup, reverse_stringlen, sub - dq lit, 9, sub - dq exit - +; That's it for the code that traverses the dictionary; now we have some +; code meant for use in debugging, which prints out the contents of individual +; words' bodies. +; +; This one is the backend for early_show_hex; it will eventually also be the +; backend for a non-early version. +; ; Stack in: -; heap address ; entry address -; Stack out: -; heap address -defword early_show_source, 0 +; end address +defword show_hex_between, 0 dq docol - dq dup, unroll3, early_guess_entry_end, swap, unroll3 - ; (heap address, entry address, end address) - dq show_source_between - dq exit + dq swap, entry_to_execution_token, lit, 8, add + ; (end address, current address) + dq dup2, ge, zbranch, 4*8, drop, drop, exit + dq dup, fetch, dothex64, litstring, " ", emitstring + dq lit, 8, add + dq branch, -17*8 +; This one "decompiles" a Forth word, printing the names of the codewords it +; consists of. It also understands all the built-in literal-related words, and +; serves as an example of how to do that. +; +; This is the backend for early_show_source; it will eventually also be the +; backend for a non-early version. +; ; Stack in: ; entry address ; end address @@ -5470,6 +5474,94 @@ defword show_source_between, 0 dq branch, -120*8 +; This is the backend for early_show_source_or_hex; it will eventually also +; be the backend for a non-early version. +; +; Stack in: +; entry address +; end address +defword show_source_or_hex_between, 0 + dq docol + + ; If it's zero-length, don't try to print its contents. + dq dup2, eq, zbranch, 2*8, exit + + dq swap, dup, unroll3, swap, roll3 + ; (entry address, end address, entry address) + dq entry_to_execution_token, fetch, lit, docol, eq, zbranch, 3*8 + dq show_source_between, exit + dq show_hex_between, exit + + +;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Heap bootstrapping ;;; +;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; +;;; Now, we have a little more work to do before we can bootstrap the heap. +;;; We have all these early_* words which rely on being told explicitly where +;;; the heap is, most of them pertaining to defining words at runtime. Once +;;; the heap exists, these won't be needed, so we never actually copy them to +;;; the heap. To make sure that's easy to keep track of, they're all together +;;; at the end here. +;;; +;;; The goal is that none of these do any particularly interesting work, +;;; instead deferring to implementations defined in other sections that will +;;; be re-used by the later, fully-bootstrapped stuff. At the moment, some of +;;; them still do interesting things; it's a work in progress. + +; Stack in: +; heap address +; Stack out: +; heap address +; requested variable address +defword early_heap, 0 + dq docol, dup, lit, control_stack_size, add, exit +defword early_s0, 0 + dq docol, early_heap, lit, 8, add, exit +defword early_r0, 0 + dq docol, early_heap, lit, 16, add, exit +defword early_latest, 0 + dq docol, early_heap, lit, 24, add, exit +defword early_here, 0 + dq docol, early_heap, lit, 32, add, exit + +; Stack in: +; heap address +; name string to find +; Stack out: +; heap address +; execution token or 0 +defword early_find, 0 + dq docol, swap, early_latest, fetch, swap, unroll3, swap, find_in, exit + +; Stack in: +; heap address +; entry address +; Stack out: +; heap address +; entry address or 0 +defword early_next_newer_entry, 0 + dq docol + dq swap, early_latest, fetch, swap, unroll3, swap, next_newer_entry_in + dq exit + +; This doesn't work on the entry at the end of the ELF .text segment. It does +; work on everything else. +; Stack in: +; heap address +; entry address +; Stack out: +; heap address +; guessed entry end address (first byte that's not part of it) +defword early_guess_entry_end, 0 + dq docol + dq swap, early_here, fetch, swap, early_latest, fetch, swap + ; (entry, here, latest, heap) + dq lit, 4, unroll, roll3 + ; (heap, here, latest, entry) + dq guess_entry_end_in + dq exit + ; Stack in: ; heap address ; entry address @@ -5483,16 +5575,16 @@ defword early_show_hex, 0 dq exit ; Stack in: +; heap address ; entry address -; end address -defword show_hex_between, 0 +; Stack out: +; heap address +defword early_show_source, 0 dq docol - dq swap, entry_to_execution_token, lit, 8, add - ; (end address, current address) - dq dup2, ge, zbranch, 4*8, drop, drop, exit - dq dup, fetch, dothex64, litstring, " ", emitstring - dq lit, 8, add - dq branch, -17*8 + dq dup, unroll3, early_guess_entry_end, swap, unroll3 + ; (heap address, entry address, end address) + dq show_source_between + dq exit ; Stack in: ; heap address @@ -5507,21 +5599,6 @@ defword early_show_source_or_hex, 0 dq exit ; Stack in: -; entry address -; end address -defword show_source_or_hex_between, 0 - dq docol - - ; If it's zero-length, don't try to print its contents. - dq dup2, eq, zbranch, 2*8, exit - - dq swap, dup, unroll3, swap, roll3 - ; (entry address, end address, entry address) - dq entry_to_execution_token, fetch, lit, docol, eq, zbranch, 3*8 - dq show_source_between, exit - dq show_hex_between, exit - -; Stack in: ; heap address ; entry address ; Stack out: @@ -5550,7 +5627,6 @@ defword early_describe_all, 0 dq branch, -13*8 dq exit - ; Allocate space by incrementing "here", and output a word header in it. ; Also add it to the "latest" linked list. Use zero as the flag values; ; callers that want something else can do that themselves. @@ -5631,62 +5707,6 @@ defword early_docol_codeword, 0 defword early_here_store, 0 dq docol, swap, early_here, swap, unroll3, store, exit -; Notice that we've switched over to stuff that follows the pack* idioms. -; -; This is a helper "macro" that we'll use in defining assembly words from -; Forth. This is in a sense a redefinition of it; the flatassembler version of -; it is far, far above, and has more documentation. -; -; Stack in: -; base address -; Stack out: -; new base address -defword pack_next, 0 - dq docol, lods64, rax, jmp_abs_indirect_reg64, exit - -; This is another helper "macro" that we'll use in defining assembly words -; from Forth. As before, see the flatassembler version for more explanation. -; -; Stack in: -; base address -; destination address (absolute) -; Stack out: -; new base address -defword pack_beforenext, 0 - dq docol, rax, mov_reg64_imm64, rax, jmp_abs_indirect_reg64, exit - -; This is another helper "macro" that we'll use in defining assembly words -; from Forth. In particular, this one is used in docol. As before, see the -; flatassembler version for more explanation. -; -; Stack in: -; base address -; source register keyword -; Stack out: -; new base address -defword pack_pushcontrol, 0 - dq docol - dq swap, rbp, lit, -8, rbp, lea_reg64_disp8_reg64, swap - dq rbp, lit, 0, mov_disp8_reg64_reg64 - dq exit - -; This is another helper "macro" that we'll use in defining assembly words -; from Forth. In particular, this one is used in "exit". See the flatassembler -; version for more explanation. -; -; Stack in: -; base address -; target register keyword -; Stack out: -; new base address -defword pack_popcontrol, 0 - dq docol - dq rbp, lit, 0, roll3, mov_reg64_disp8_reg64 - dq rbp, lit, 8, rbp, lea_reg64_disp8_reg64 - dq exit - -; Now we're back to heap idioms again. -; ; Stack in: ; heap address ; address for new variable word to return |