diff options
Diffstat (limited to 'quine.asm')
| -rw-r--r-- | quine.asm | 171 |
1 files changed, 141 insertions, 30 deletions
diff --git a/quine.asm b/quine.asm index b1447cf..55af67d 100644 --- a/quine.asm +++ b/quine.asm @@ -1890,9 +1890,6 @@ _start: mov.qreg.disp32.qreg rdi, control_stack_size + 0x18, rax ; latest lea.qreg.disp32.qreg rax, control_stack_size + 0x28, rdi mov.qreg.disp32.qreg rdi, control_stack_size + 0x20, rax ; here - ; TODO also consider STATE - ; strictly speaking, r0 could be a constant... but it isn't known until - ; runtime, so we might as well make it a variable ;;; ;;; * "heap" is the physical bottom of the heap ;;; The heap grows upwards in memory, so this is also the logical @@ -1921,8 +1918,13 @@ _start: ;;; and convince yourself that it only ever writes things just below the rbp ;;; address it receives, never right on top of it. ;;; - ;;; Notice that "here" points immediately after itself. This is just a - ;;; convenience, making it the last one like that so that the concern is + ;;; As an aside, by the way, please notice that strictly speaking, r0 + ;;; could be a constant... but it isn't known until runtime, so we might as + ;;; well make it a variable. That will play nicely with any astonishing + ;;; memory shenanigans someone might wish to do in the future. + ;;; + ;;; Notice also that "here" points immediately after itself. This is just + ;;; a convenience, making it the last one like that so that the concern is ;;; dealt with in a single place and is easy to keep up-to-date with code ;;; changes. ;;; @@ -6857,9 +6859,11 @@ cold_start: dq litstring, "dup", early_find, entry_to_execution_token, early_comma dq litstring, "is-space", 0, early_find, entry_to_execution_token dq early_comma - dq litstring, "invert", early_find, entry_to_execution_token, early_comma dq litstring, "0branch", early_find, entry_to_execution_token, early_comma - dq lit, -5*8, early_comma + dq lit, 4*8, early_comma + dq litstring, "drop", early_find, entry_to_execution_token, early_comma + dq litstring, "branch", early_find, entry_to_execution_token, early_comma + dq lit, -7*8, early_comma ; Early exit if it's a zero byte. dq litstring, "dup", early_find, entry_to_execution_token, early_comma @@ -7193,7 +7197,6 @@ cold_start: dq litstring, "interpret", early_create, early_docol_codeword - ; Start of the loop. dq litstring, "word", early_find, entry_to_execution_token, early_comma ; If no word was returned, exit. @@ -7218,7 +7221,7 @@ cold_start: dq lit, 0, early_comma dq litstring, "!=", early_find, entry_to_execution_token, early_comma dq litstring, "0branch", early_find, entry_to_execution_token, early_comma - dq lit, 27*8, early_comma + dq lit, 25*8, early_comma ; If the word is in the dictionary, check what mode we're in, then... dq litstring, "dropstring-with-result", early_find, entry_to_execution_token @@ -7231,7 +7234,7 @@ cold_start: dq lit, 0x01, early_comma dq litstring, "and", early_find, entry_to_execution_token, early_comma dq litstring, "0branch", early_find, entry_to_execution_token, early_comma - dq lit, 15*8, early_comma + dq lit, 14*8, early_comma ; ... if we're in compile mode, there's still a chance it's an immediate ; word, in which case we branch over to interpret mode... @@ -7244,22 +7247,18 @@ cold_start: dq lit, 0, early_comma dq litstring, "=", early_find, entry_to_execution_token, early_comma dq litstring, "0branch", early_find, entry_to_execution_token, early_comma - dq lit, 5*8, early_comma + dq lit, 4*8, early_comma ; ... but it's a regular word, so append it to the heap. dq litstring, "entry-to-execution-token", 0, early_find dq entry_to_execution_token, early_comma dq litstring, ",", early_find, entry_to_execution_token, early_comma - ; o/~ Like a whirlpool and it never ends. o/~ - dq litstring, "branch", early_find, entry_to_execution_token, early_comma - dq lit, -38*8, early_comma + dq litstring, "exit", early_find, entry_to_execution_token, early_comma ; ... if we're in interpret mode, or the word is immediate, run it. dq litstring, "entry-to-execution-token", 0, early_find dq entry_to_execution_token, early_comma dq litstring, "execute", early_find, entry_to_execution_token, early_comma - ; o/~ Like a whirlpool and it never ends. o/~ - dq litstring, "branch", early_find, entry_to_execution_token, early_comma - dq lit, -42*8, early_comma + dq litstring, "exit", early_find, entry_to_execution_token, early_comma ; If it's not in the dictionary, check whether it's a decimal number. dq litstring, "drop", early_find, entry_to_execution_token, early_comma @@ -7272,7 +7271,7 @@ cold_start: dq lit, 0, early_comma dq litstring, "=", early_find, entry_to_execution_token, early_comma dq litstring, "0branch", early_find, entry_to_execution_token, early_comma - dq lit, 17*8, early_comma + dq lit, 16*8, early_comma ; It's a number. dq litstring, "interpreter-flags", early_find, entry_to_execution_token @@ -7288,24 +7287,22 @@ cold_start: ; The version of "lit" we use is the one that's current when we ourselves ; are compiled, hardcoded; doing a dynamic lookup would require dealing with ; what happens if it's not found. + dq litstring, "dropstring-with-result", early_find, entry_to_execution_token + dq early_comma dq litstring, "lit", early_find, entry_to_execution_token, early_comma dq litstring, "lit", early_find, entry_to_execution_token, early_comma dq litstring, ",", early_find, entry_to_execution_token, early_comma dq litstring, ",", early_find, entry_to_execution_token, early_comma - ; o/~ Like a whirlpool and it never ends. o/~ - dq litstring, "branch", early_find, entry_to_execution_token, early_comma - dq lit, -63*8, early_comma + dq litstring, "exit", early_find, entry_to_execution_token, early_comma ; We're in interpret mode; push the number to the stack. Or at least, that's ; what the code we're interpreting will see. Really it's already on the ; stack, just clean everything else up and leave it there. dq litstring, "dropstring-with-result", early_find, entry_to_execution_token dq early_comma - ; o/~ Like a whirlpool and it never ends. o/~ - dq litstring, "branch", early_find, entry_to_execution_token, early_comma - dq lit, -66*8, early_comma + dq litstring, "exit", early_find, entry_to_execution_token, early_comma - ; If it's neither in the dictionary nor a number, exit. + ; If it's neither in the dictionary nor a number, just print an error. dq litstring, "dropstring", early_find, entry_to_execution_token dq early_comma dq litstring, "litstring", early_find, entry_to_execution_token, early_comma @@ -7316,14 +7313,93 @@ cold_start: dq early_here, fetch, lit, 8, packalign, early_here_store - ; The next layer is built now, so let's move on to it. + ; The next layer is built now, so let's move on to it. + ; + ; We've chosen to defer creating "quit" until we're in interpreted mode, + ; so we can't call "quit" here, we need to call "interpret". Since + ; "interpret" doesn't loop, we need to call it a few times in a row - just + ; enough times for the code in boot_source, which it'll be interpreting, to + ; create and invoke "quit". + ; + ; We could allocate a real word that does that, but then it would stick + ; around after it's no longer needed. Instead, we create a trampoline, which + ; has a "docol" codeword, some calls to "interpret", but nothing else, then + ; we call it the same way we would call an ordinary word given its execution + ; token. + ; + ; To avoid needing to explicitly clean up the trampoline, we never + ; formally allocate it. Since we know exactly what code it'll be running, we + ; also know how much heap space that code will use. We put the trampoline + ; immediately after the end of where "quit" will be, so that the very next + ; heap allocations after creating "quit" will overwrite the trampoline. + ; + ; For simplicity's sake, we use the flatassembler copies of docol, lit, + ; and sys_exit. It would be possible to use the heap copies, but we have + ; these ones for just a few moments longer, so we might as well use them. dq litstring, "interpret", early_find, entry_to_execution_token + dq swap, early_here, fetch + ; (interpret, heap pointer, here pointer) + + ; Consult the definition of "quit" in boot_source and count how many words + ; the compiled implementation of it will take up. + ; + ; That will include one for the next-entry pointer; one for the name and + ; flags (it's short enough to fit, even with the null terminators); one for + ; the codeword pointer to docol; one for each word that's explicitly added + ; as part of its body in the source; and one for a trailing "exit" that's + ; added by ";" and never reached. + ; + ; Whatever total you get, that's how many words to add to "here", on this + ; line, to obtain the trampoline's start location. + ; + ; Notionally, less might appear to work, but you'd be racing to execute a + ; word before overwriting it, so it isn't a great idea. + dq lit, 10*8, add + ; (interpret, heap pointer, start of trampoline) + + dq dup, lit, 4, roll, swap + ; (heap pointer, start of trampoline, interpret, current point in trampoline) + dq lit, docol, pack64 + + ; In boot_source, count how many interpreted words there are from the + ; initial ":" to the invocation of "quit", inclusive. Don't forget that + ; ": quit" is only a single interpreted word, because ":" reads a word to + ; use as a name, and that one is never seen by "interpret". + ; + ; Whatever total you get, adjust the number of repeated lines just below, + ; to put that exact number of copies of the execution token for "interpret" + ; into the trampoline. + ; + ; (heap pointer, start of trampoline, interpret, current point in trampoline) + dq swap, dup, unroll3, pack64 + dq swap, dup, unroll3, pack64 + dq swap, dup, unroll3, pack64 + dq swap, dup, unroll3, pack64 + dq swap, dup, unroll3, pack64 + dq swap, dup, unroll3, pack64 + dq swap, dup, unroll3, pack64 + dq swap, dup, unroll3, pack64 + dq swap, dup, unroll3, pack64 + dq swap, dup, unroll3, pack64 + dq swap, dup, unroll3, pack64 + dq swap, dup, unroll3, pack64 + dq swap, dup, unroll3, pack64 + dq swap, dup, unroll3, pack64 + + dq swap, drop + ; (heap pointer, start of trampoline, current point in trampoline) + + ; The trampoline will never get past the above calls to "interpret", but if + ; it does, complain. + dq lit, lit, pack64, lit, 1, pack64, lit, sys_exit, pack64 + dq drop + ; (heap pointer, start of trampoline) ; Get rid of that heap pointer on the stack, we're finally done with it! dq swap, drop dq execute - ; We won't be coming back, but if we do, complain. + ; We won't be coming back, but if we do, complain about that, too. dq lit, 1, sys_exit @@ -10884,10 +10960,45 @@ defword self_raw, 0 ; word both to delimit the segment end, and to hold the boot source. Cute, ; right? defword boot_source, 0x40 - ; Keep in mind that these words don't exist in memory, so branching won't + ; Keep in mind that these words don't exist in memory, so branching won't ; have the intended effect. Any logic that requires branching needs to be - ; written to the heap instead, and branch while running there. - dq ": fribble 5 . ; fribble 0 sys-exit" + ; written to the heap instead, and branch while running there. In fact, + ; let's start out with an example of that, which is also our top-level + ; routine! + dq ": quit r0 @ control! interpret branch [ -2 8 * , ] ; quit " + ; o/~ Like a whirlpool and it never ends. o/~ + ; + ; This implementation of quit does the same thing it does in Jonesforth: + ; First, it wipes the control stack, completely emptying it so that whatever + ; calls brought us here are forgotten and no longer relevant to what happens + ; going forward. Second, it calls interpret repeatedly, forever. + ; + ; Interpret will return control to quit after every iteration. The control + ; stack doesn't get wiped on those iterations, but user code that wants to + ; halt its own execution can always call quit to do that, which will once + ; again wipe the control stack. Isn't that trippy? Instead of unwinding the + ; stack it just overwrites it! + ; + ; Notice that we immediately call quit, before doing anything else. That's + ; load-bearing. Keep it that way. To get this far, we used a trampoline, + ; created in cold_start, that calls interpret in an unrolled loop, just + ; barely enough times to successfully reach that invocation. If you make + ; quit longer, make sure to also make the trampoline longer and adjust its + ; starting location. It's been defined with no extra length, in order to + ; make sure things will crash in a noticeable way if anyone modifies it + ; without that follow-through. It would be important to have a close look at + ; the trampoline setup regardless, because it's allocated in space that this + ; code could easily overwrite by accident, so it's better to have it fail in + ; a predictable way than an unpredictable one. + + ; From here, we can do more or less whatever we want; nearly all the + ; bootstrapping concerns have been dealt with. We do have to make sure every + ; line in boot_source that outputs a string outputs one that's an exact + ; multiple of eight bytes long; any accidental null-padding that + ; flatassembler inserts will be treated as a string terminator by + ; attach-string-to-input-buffer. + dq "0 sys-exit " + dq " " dq 0 |