diff options
| author | Irene Knapp <ireneista@irenes.space> | 2026-04-25 01:15:46 -0700 |
|---|---|---|
| committer | Irene Knapp <ireneista@irenes.space> | 2026-04-25 03:07:25 -0700 |
| commit | fb64087b6097038430e1f26816b4b6d5a8f4dd87 (patch) | |
| tree | 2e947930d34f6b34d8667f514b95d330b4022bd8 | |
| parent | c55d4666c870a54655080b5e78e9fb37f4b81d9f (diff) | |
implement "quit", the top level word
this is a real milestone, wow. this required changing a few things in "interpret". first and simplest, it wasn't properly dropping the word string when processing a number in compile mode. second, "word" wasn't properly skipping whitespace - it was still becoming part of the word, which resulted in lookup failures. third and most interesting, "interpret" no longer loops, it relies on "quit" to do that. that means that instead of cold_start handing off to "interpret" as it previously did, cold_start now hands off to a trampoline it creates that calls "interpret" just enough times to create and invoke "quit". Force-Push: yes Change-Id: Iff826c6ef7e58c1e014688ae7508ab13e89c3c28
| -rw-r--r-- | quine.asm | 171 |
1 files changed, 141 insertions, 30 deletions
diff --git a/quine.asm b/quine.asm index b1447cf..55af67d 100644 --- a/quine.asm +++ b/quine.asm @@ -1890,9 +1890,6 @@ _start: mov.qreg.disp32.qreg rdi, control_stack_size + 0x18, rax ; latest lea.qreg.disp32.qreg rax, control_stack_size + 0x28, rdi mov.qreg.disp32.qreg rdi, control_stack_size + 0x20, rax ; here - ; TODO also consider STATE - ; strictly speaking, r0 could be a constant... but it isn't known until - ; runtime, so we might as well make it a variable ;;; ;;; * "heap" is the physical bottom of the heap ;;; The heap grows upwards in memory, so this is also the logical @@ -1921,8 +1918,13 @@ _start: ;;; and convince yourself that it only ever writes things just below the rbp ;;; address it receives, never right on top of it. ;;; - ;;; Notice that "here" points immediately after itself. This is just a - ;;; convenience, making it the last one like that so that the concern is + ;;; As an aside, by the way, please notice that strictly speaking, r0 + ;;; could be a constant... but it isn't known until runtime, so we might as + ;;; well make it a variable. That will play nicely with any astonishing + ;;; memory shenanigans someone might wish to do in the future. + ;;; + ;;; Notice also that "here" points immediately after itself. This is just + ;;; a convenience, making it the last one like that so that the concern is ;;; dealt with in a single place and is easy to keep up-to-date with code ;;; changes. ;;; @@ -6857,9 +6859,11 @@ cold_start: dq litstring, "dup", early_find, entry_to_execution_token, early_comma dq litstring, "is-space", 0, early_find, entry_to_execution_token dq early_comma - dq litstring, "invert", early_find, entry_to_execution_token, early_comma dq litstring, "0branch", early_find, entry_to_execution_token, early_comma - dq lit, -5*8, early_comma + dq lit, 4*8, early_comma + dq litstring, "drop", early_find, entry_to_execution_token, early_comma + dq litstring, "branch", early_find, entry_to_execution_token, early_comma + dq lit, -7*8, early_comma ; Early exit if it's a zero byte. dq litstring, "dup", early_find, entry_to_execution_token, early_comma @@ -7193,7 +7197,6 @@ cold_start: dq litstring, "interpret", early_create, early_docol_codeword - ; Start of the loop. dq litstring, "word", early_find, entry_to_execution_token, early_comma ; If no word was returned, exit. @@ -7218,7 +7221,7 @@ cold_start: dq lit, 0, early_comma dq litstring, "!=", early_find, entry_to_execution_token, early_comma dq litstring, "0branch", early_find, entry_to_execution_token, early_comma - dq lit, 27*8, early_comma + dq lit, 25*8, early_comma ; If the word is in the dictionary, check what mode we're in, then... dq litstring, "dropstring-with-result", early_find, entry_to_execution_token @@ -7231,7 +7234,7 @@ cold_start: dq lit, 0x01, early_comma dq litstring, "and", early_find, entry_to_execution_token, early_comma dq litstring, "0branch", early_find, entry_to_execution_token, early_comma - dq lit, 15*8, early_comma + dq lit, 14*8, early_comma ; ... if we're in compile mode, there's still a chance it's an immediate ; word, in which case we branch over to interpret mode... @@ -7244,22 +7247,18 @@ cold_start: dq lit, 0, early_comma dq litstring, "=", early_find, entry_to_execution_token, early_comma dq litstring, "0branch", early_find, entry_to_execution_token, early_comma - dq lit, 5*8, early_comma + dq lit, 4*8, early_comma ; ... but it's a regular word, so append it to the heap. dq litstring, "entry-to-execution-token", 0, early_find dq entry_to_execution_token, early_comma dq litstring, ",", early_find, entry_to_execution_token, early_comma - ; o/~ Like a whirlpool and it never ends. o/~ - dq litstring, "branch", early_find, entry_to_execution_token, early_comma - dq lit, -38*8, early_comma + dq litstring, "exit", early_find, entry_to_execution_token, early_comma ; ... if we're in interpret mode, or the word is immediate, run it. dq litstring, "entry-to-execution-token", 0, early_find dq entry_to_execution_token, early_comma dq litstring, "execute", early_find, entry_to_execution_token, early_comma - ; o/~ Like a whirlpool and it never ends. o/~ - dq litstring, "branch", early_find, entry_to_execution_token, early_comma - dq lit, -42*8, early_comma + dq litstring, "exit", early_find, entry_to_execution_token, early_comma ; If it's not in the dictionary, check whether it's a decimal number. dq litstring, "drop", early_find, entry_to_execution_token, early_comma @@ -7272,7 +7271,7 @@ cold_start: dq lit, 0, early_comma dq litstring, "=", early_find, entry_to_execution_token, early_comma dq litstring, "0branch", early_find, entry_to_execution_token, early_comma - dq lit, 17*8, early_comma + dq lit, 16*8, early_comma ; It's a number. dq litstring, "interpreter-flags", early_find, entry_to_execution_token @@ -7288,24 +7287,22 @@ cold_start: ; The version of "lit" we use is the one that's current when we ourselves ; are compiled, hardcoded; doing a dynamic lookup would require dealing with ; what happens if it's not found. + dq litstring, "dropstring-with-result", early_find, entry_to_execution_token + dq early_comma dq litstring, "lit", early_find, entry_to_execution_token, early_comma dq litstring, "lit", early_find, entry_to_execution_token, early_comma dq litstring, ",", early_find, entry_to_execution_token, early_comma dq litstring, ",", early_find, entry_to_execution_token, early_comma - ; o/~ Like a whirlpool and it never ends. o/~ - dq litstring, "branch", early_find, entry_to_execution_token, early_comma - dq lit, -63*8, early_comma + dq litstring, "exit", early_find, entry_to_execution_token, early_comma ; We're in interpret mode; push the number to the stack. Or at least, that's ; what the code we're interpreting will see. Really it's already on the ; stack, just clean everything else up and leave it there. dq litstring, "dropstring-with-result", early_find, entry_to_execution_token dq early_comma - ; o/~ Like a whirlpool and it never ends. o/~ - dq litstring, "branch", early_find, entry_to_execution_token, early_comma - dq lit, -66*8, early_comma + dq litstring, "exit", early_find, entry_to_execution_token, early_comma - ; If it's neither in the dictionary nor a number, exit. + ; If it's neither in the dictionary nor a number, just print an error. dq litstring, "dropstring", early_find, entry_to_execution_token dq early_comma dq litstring, "litstring", early_find, entry_to_execution_token, early_comma @@ -7316,14 +7313,93 @@ cold_start: dq early_here, fetch, lit, 8, packalign, early_here_store - ; The next layer is built now, so let's move on to it. + ; The next layer is built now, so let's move on to it. + ; + ; We've chosen to defer creating "quit" until we're in interpreted mode, + ; so we can't call "quit" here, we need to call "interpret". Since + ; "interpret" doesn't loop, we need to call it a few times in a row - just + ; enough times for the code in boot_source, which it'll be interpreting, to + ; create and invoke "quit". + ; + ; We could allocate a real word that does that, but then it would stick + ; around after it's no longer needed. Instead, we create a trampoline, which + ; has a "docol" codeword, some calls to "interpret", but nothing else, then + ; we call it the same way we would call an ordinary word given its execution + ; token. + ; + ; To avoid needing to explicitly clean up the trampoline, we never + ; formally allocate it. Since we know exactly what code it'll be running, we + ; also know how much heap space that code will use. We put the trampoline + ; immediately after the end of where "quit" will be, so that the very next + ; heap allocations after creating "quit" will overwrite the trampoline. + ; + ; For simplicity's sake, we use the flatassembler copies of docol, lit, + ; and sys_exit. It would be possible to use the heap copies, but we have + ; these ones for just a few moments longer, so we might as well use them. dq litstring, "interpret", early_find, entry_to_execution_token + dq swap, early_here, fetch + ; (interpret, heap pointer, here pointer) + + ; Consult the definition of "quit" in boot_source and count how many words + ; the compiled implementation of it will take up. + ; + ; That will include one for the next-entry pointer; one for the name and + ; flags (it's short enough to fit, even with the null terminators); one for + ; the codeword pointer to docol; one for each word that's explicitly added + ; as part of its body in the source; and one for a trailing "exit" that's + ; added by ";" and never reached. + ; + ; Whatever total you get, that's how many words to add to "here", on this + ; line, to obtain the trampoline's start location. + ; + ; Notionally, less might appear to work, but you'd be racing to execute a + ; word before overwriting it, so it isn't a great idea. + dq lit, 10*8, add + ; (interpret, heap pointer, start of trampoline) + + dq dup, lit, 4, roll, swap + ; (heap pointer, start of trampoline, interpret, current point in trampoline) + dq lit, docol, pack64 + + ; In boot_source, count how many interpreted words there are from the + ; initial ":" to the invocation of "quit", inclusive. Don't forget that + ; ": quit" is only a single interpreted word, because ":" reads a word to + ; use as a name, and that one is never seen by "interpret". + ; + ; Whatever total you get, adjust the number of repeated lines just below, + ; to put that exact number of copies of the execution token for "interpret" + ; into the trampoline. + ; + ; (heap pointer, start of trampoline, interpret, current point in trampoline) + dq swap, dup, unroll3, pack64 + dq swap, dup, unroll3, pack64 + dq swap, dup, unroll3, pack64 + dq swap, dup, unroll3, pack64 + dq swap, dup, unroll3, pack64 + dq swap, dup, unroll3, pack64 + dq swap, dup, unroll3, pack64 + dq swap, dup, unroll3, pack64 + dq swap, dup, unroll3, pack64 + dq swap, dup, unroll3, pack64 + dq swap, dup, unroll3, pack64 + dq swap, dup, unroll3, pack64 + dq swap, dup, unroll3, pack64 + dq swap, dup, unroll3, pack64 + + dq swap, drop + ; (heap pointer, start of trampoline, current point in trampoline) + + ; The trampoline will never get past the above calls to "interpret", but if + ; it does, complain. + dq lit, lit, pack64, lit, 1, pack64, lit, sys_exit, pack64 + dq drop + ; (heap pointer, start of trampoline) ; Get rid of that heap pointer on the stack, we're finally done with it! dq swap, drop dq execute - ; We won't be coming back, but if we do, complain. + ; We won't be coming back, but if we do, complain about that, too. dq lit, 1, sys_exit @@ -10884,10 +10960,45 @@ defword self_raw, 0 ; word both to delimit the segment end, and to hold the boot source. Cute, ; right? defword boot_source, 0x40 - ; Keep in mind that these words don't exist in memory, so branching won't + ; Keep in mind that these words don't exist in memory, so branching won't ; have the intended effect. Any logic that requires branching needs to be - ; written to the heap instead, and branch while running there. - dq ": fribble 5 . ; fribble 0 sys-exit" + ; written to the heap instead, and branch while running there. In fact, + ; let's start out with an example of that, which is also our top-level + ; routine! + dq ": quit r0 @ control! interpret branch [ -2 8 * , ] ; quit " + ; o/~ Like a whirlpool and it never ends. o/~ + ; + ; This implementation of quit does the same thing it does in Jonesforth: + ; First, it wipes the control stack, completely emptying it so that whatever + ; calls brought us here are forgotten and no longer relevant to what happens + ; going forward. Second, it calls interpret repeatedly, forever. + ; + ; Interpret will return control to quit after every iteration. The control + ; stack doesn't get wiped on those iterations, but user code that wants to + ; halt its own execution can always call quit to do that, which will once + ; again wipe the control stack. Isn't that trippy? Instead of unwinding the + ; stack it just overwrites it! + ; + ; Notice that we immediately call quit, before doing anything else. That's + ; load-bearing. Keep it that way. To get this far, we used a trampoline, + ; created in cold_start, that calls interpret in an unrolled loop, just + ; barely enough times to successfully reach that invocation. If you make + ; quit longer, make sure to also make the trampoline longer and adjust its + ; starting location. It's been defined with no extra length, in order to + ; make sure things will crash in a noticeable way if anyone modifies it + ; without that follow-through. It would be important to have a close look at + ; the trampoline setup regardless, because it's allocated in space that this + ; code could easily overwrite by accident, so it's better to have it fail in + ; a predictable way than an unpredictable one. + + ; From here, we can do more or less whatever we want; nearly all the + ; bootstrapping concerns have been dealt with. We do have to make sure every + ; line in boot_source that outputs a string outputs one that's an exact + ; multiple of eight bytes long; any accidental null-padding that + ; flatassembler inserts will be treated as a string terminator by + ; attach-string-to-input-buffer. + dq "0 sys-exit " + dq " " dq 0 |