From 65be4b0cdc118b5ff9533f9f8372af70ac2e6387 Mon Sep 17 00:00:00 2001 From: Irene Knapp Date: Sun, 19 Oct 2025 21:49:40 -0700 Subject: much closer to being able to move to threaded execution not quite, but very very near Force-Push: yes Change-Id: I79ae9f2970e4d7263db5c511e3a5398c22c4771c --- flake.nix | 1 + quine.asm | 224 +++++++++++++++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 188 insertions(+), 37 deletions(-) diff --git a/flake.nix b/flake.nix index 7b997aa..f38cbf5 100644 --- a/flake.nix +++ b/flake.nix @@ -26,6 +26,7 @@ fasm fasmg gcc + gdb nasm zydis ]; diff --git a/quine.asm b/quine.asm index 1139cc8..9e33bbc 100644 --- a/quine.asm +++ b/quine.asm @@ -476,6 +476,33 @@ macro mov.qreg.disp8.qreg target, offset, source end match end macro +; Move from a 64-bit register, to a 64-bit location relative to a 64-bit +; register, with a 32-bit displacement and no indexing. +; +; This uses opcode 0x89 with REX.W, so that gives us the reg field as the +; 64-bit source and the R/M field as the 64-bit destination. +; +; We need to treat a target of rsp specially because it's the SIB case per +; table 2-2. +macro mov.qreg.disp32.qreg target, offset, source + qwordreg sreg, source + qwordreg treg, target + match =rsp, target + rex.w + db 0x89 + modrm 2, sreg, treg + ; treg is rsp by assumption, and R/M = rsp is the SIB case + sib 0, 4, 4 + ; no scaling, no indexing, rsp as base + dd offset + else + rex.w + db 0x89 + modrm 2, sreg, treg + dd offset + end match +end macro + ; Move from a 32-bit immediate value, to a 64-bit location relative to a ; 64-bit register, with an 8-bit displacement and no indexing. ; @@ -525,6 +552,34 @@ macro lodsq db 0xAD end macro +; Push a 64-bit value from a register onto the stack (the one pointed to by +; rsp). Decrement rsp, then write the value at the new location. +; +; In the corner case where rsp is also the value being pushed, the old value +; is the one used. +; +; There's an alternate encoding of this that uses a ModRM byte, but doing it +; without is more compact, so we do without. +macro push.qreg source + qwordreg sreg, source + opcodereg 0x50, sreg +end macro + +; Pop a 64-bit value into a register from the stack (the one pointed to by +; rsp). Read the value from the old location, then increment rsp. +; +; In the corner case where rsp is also the destination being written to, the +; read happens from the old location, then the write causes the increment to +; be irrelevant. +; +; There's an alternate encoding of this that uses a ModRM byte, but doing it +; without is more compact, so we do without. +macro pop.qreg target + qwordreg treg, target + opcodereg 0x58, target +end macro + + ; Do an absolute indirect jump with a 64-bit register operand. That is: given ; a register which holds a pointer, read another address from the pointed-to ; memory and jump to it. @@ -533,11 +588,21 @@ end macro ; jumps and segments don't exist. They are still a thing in 64-bit mode, we ; just don't use them. macro jmp.abs.indirect.qreg location - db 0xFF qwordreg lreg, location + db 0xFF modrm 0, lreg, 4 end macro +; There in no 64-bit immediate "near" jump, so we use 32-bit. It's relatve, +; so that's honestly plenty. +; +; The location is relative to the start of the instruction immediately +; following the jmp. +macro jmp.rel.dimm location + db 0xE9 + dd location +end macro + ; Invoke a system call provided by the kernel. On Linux, the System V ABI ; describes the semantics of such calls (at least, on x86). macro syscall @@ -610,8 +675,7 @@ program_header: dd 0x05 ; *read+execute permission dq 0 ; *offset in file dq $$ ; *virtual address - ; required, but can be anything, subject to - ; alignment + ; required, but can be anything, subject to alignment dq 0 ; physical address (ignored) dq file_size ; *size in file dq file_size ; *size in memory @@ -620,6 +684,10 @@ program_header: ; Save the size of this chunk, as well. program_header_entry_size = $ - program_header +; Everything after this point is code or data, not headers, so save the start +; of it for use in size calculations later. +code_start: + ;;;;;;;;;;;;;;;;;;;;;;; ;;; Execution model ;;; @@ -830,12 +898,13 @@ DOCOL: NEXT ;;; -;;; Routine _start -;;; -------------- +;;; Routine start +;;; ------------- ;;; ;;; This is the entry point of the whole program, the very first code we -;;; actually execute. We go with the name linkers generally use for it, though -;;; it's not very Forth-y. The ELF header points to it. +;;; actually execute. Linkers traditionally call this _start, and on balance +;;; I think it's probably best to keep that name, though I've honestly never +;;; liked it... Anyway, the ELF header points to it and exec() jumps to it. ;;; ;;; The kernel gives us most registers zeroed, and rsp pointing to the ;;; command-line stuff (argc, argv, envp), which is at an ASLR'd address with @@ -849,22 +918,42 @@ DOCOL: ;;; ;;; Registers in: ;;; -;;; * rsp points to the top (low end) of the value stack +;;; * rsp points to the logical top of the value stack ;;; The kernel sets this up for us, and we need to save it somewhere so ;;; Forth can use it. ;;; ;;; Registers out: ;;; -;;; * rsp points to the top of the control stack ;;; * rsi points within QUIT ;;; QUIT is the word that's Forth's closest equivalent to main(). +;;; * rsp points to the top of the value stack +;;; +;;; Notably, rbp is still uninitialialized after _start. +;;; +;;; Stack in: +;;; +;;; * argc, argv, envp in the usual Unix way +;;; We ignore them, though. +;;; +;;; Stack out: +;;; +;;; * The value of HEAP, as a pointer +;;; The meaning of this will be explained below. ;;; ;;; Registers within: ;;; -;;; * rdi points to the base the heap was allocated at, once it is -;;; This is the same value that S0 will hold, once we reach a point -;;; where we can rely on Forth variable-words. +;;; * rdi points to the base the heap was allocated at, once it exists +;;; This is the same value that HEAP will hold, once we reach a point +;;; where we have variables. Of course, variables are stored on the heap, +;;; hence this temporary measure. +;;; +;;; We also take this opportunity to define soeme memory layout parameters +;;; that this routine will be responsible for doing something with: ;;; +heap_requested_address = 0x0000001000000000 ; (very arbitrary) +heap_size = 0x0000000001000000 ; 16 MiB +control_stack_size = 0x10000 ; 64 KiB + _start: cld ; clear the DF flag @@ -881,13 +970,13 @@ _start: ;;; and the kernel would trust us, but this gives us more options for ;;; interoperating with other runtimes. ;;; - mov.b rax, 9 ; mmap() - mov.qreg.qimm rdi, 0x0000001000000000 ; address (very arbitrary) - mov.qreg.qimm rsi, 0x0000000001000000 ; size (one meg) - mov.qreg.qimm rdx, 0x03 ; protection (read+write) - mov.oreg.qimm r10, 0x22 ; flags (private+anonymous) - mov.oreg.qimm r8, 0 ; file descriptor (ignored) - mov.oreg.qimm r9, 0 ; offset (ignored) + mov.b rax, 9 ; mmap() + mov.qreg.qimm rdi, heap_requested_address ; address (very arbitrary) + mov.qreg.qimm rsi, heap_size ; size (one meg) + mov.qreg.qimm rdx, 0x03 ; protection (read+write) + mov.oreg.qimm r10, 0x22 ; flags (private+anonymous) + mov.oreg.qimm r8, 0 ; file descriptor (ignored) + mov.oreg.qimm r9, 0 ; offset (ignored) syscall ;;; @@ -938,9 +1027,9 @@ _start: ;;; These will be the permanent homes of these values, though we have ;;; copies of them elsewhere while we're still in this routine. ;;; - mov.qreg.disp8.qreg rdi, 0x00, rdi ; HEAP - mov.qreg.disp8.qreg rdi, 0x08, rsp ; S0 - ; TODO this isn't done yet, need to reserve space and explain it more + mov.qreg.disp32.qreg rdi, control_stack_size + 0x00, rdi ; HEAP + mov.qreg.disp32.qreg rdi, control_stack_size + 0x08, rsp ; S0 + mov.qreg.disp32.qreg rdi, control_stack_size + 0x10, rbp ; R0 ;;; ;;; * HEAP is the physical bottom of the heap ;;; The heap grows upwards in memory, so this is also the logical @@ -954,16 +1043,70 @@ _start: ;;; as well. We allocate this dedicated space within the heap right here, ;;; in this routine, through our choice of where to put things. ;;; - ;;; S0 and R0 are mostly used when we want to initialize or reinitialize - ;;; their respective stacks - that is, discard all their contents at once. + ;;; S0 and R0 are mostly used when we want to initialize or reinitialize + ;;; their respective stacks - that is, discard all their contents at once. + ;;; + ;;; The value of R0 is the same address these variables start at, so + ;;; you'll want to do a close read of the implementation of PUSHCONTROL and + ;;; convince yourself that it only ever writes things just below the rbp + ;;; address it receives, never right on top of it. + ;;; + ;;; A little more detail about why we offset everything by + ;;; control_stack_size: We're carving out some space at the bottom of the + ;;; heap - which grows low-to-high - to be the control stack - which grows + ;;; high-to-low. So the control stack is allocated out of the heap as a + ;;; fixed-size, one-time thing, and then the variables come immediately + ;;; after that. We do need to use 32-bit displacement indexing to access + ;;; them this way, but that's no big deal. + ;;; + ;;; This is perhaps questionable, they should maybe be separate segments + ;;; created with separate calls to mmap(), but for now we're not worried + ;;; about overflow so we use the same allocation for both. + ;;; + ;;; We'll come back to these variables a bit later and generate the word + ;;; headers that point at them, but now we're almost ready to switch to + ;;; proper threaded-execution, so we finish that setup first... + ;;; + + ;;; + ;;; Push the value of HEAP onto the value stack so that it can be the + ;;; breadcrumb the threaded code needs to find... the backing store of HEAP. + ;;; Yes, self-reference can be weird like that sometimes. There's nothing + ;;; stopping QUIT from reading rdi, it just violates the abstraction... + ;;; + push.qreg rdi - ;;; TODO we don't do this yet - ;;; Now we're going to create a word in the heap, to hold the value of - ;;; HERE long-term. + ;;; + ;;; Notice that, although we are about to set up rsi, and rsp came to us + ;;; already valid, rbp is still uninitialized. This is because we're about + ;;; to hand off to QUIT, which will do that for us. + ;;; + mov.qreg.qimm rsi, cold_start ;;; - ;;; We would like very much to get out of the bootstrap code and into a - ;;; proper threaded-execution setup. + ;;; That's all that NEXT needs, so, take it away! + ;;; + jmp.rel.dimm old_code - skip_from_here ; TODO placeholder +skip_from_here: + NEXT + +;;; +;;; This isn't really a routine so much as it's an array of words (exactly +;;; one of them), which is what NEXT wants rsi to point to. It's only ever +;;; used this one time, so we just put it right here. +;;; +cold_start: + dq QUIT + +;;; +;;; One of the most charming naming traditions in Forth is that the +;;; top-level word that stays running forever, is called "quit". +;;; +QUIT: + dq DOCOL ; codeword + ;dq R0, CONTROL! ; overwrite rbp to reset the control stack + ;dq INTERPRET ; run the repl + ;dq BRANCH, QUIT - $ ; if the repl ever exits, start again ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -988,6 +1131,7 @@ _start: ;;; ;;; * rsp points to the bottom of the buffer. ;;; +old_code: mov.dreg.dimm rdx, 0 ; store running file size here ;sub.qreg.bimm rsp, 0xFF ; reserve stack space @@ -1006,9 +1150,8 @@ _start: mov.qreg.disp8.dimm rsp, 0x14, 1 ; ELF format version ; Compute the entry pointer. - mov.qreg.qimm rax, $$ ; the memory origin - add.qreg.bimm rax, 0x78 ; the size of the headers - add.qreg.dimm rax, 155 ; the offset of _start + mov.qreg.qimm rax, _start ; the offset of _start + ; This includes the origin, intentionally. mov.qreg.disp8.qreg rsp, 0x18, rax ; entry point mov.qreg.disp8.dimm rsp, 0x20, 64 ; program header offset @@ -1048,15 +1191,21 @@ _start: mov.dreg.dimm rax, 0x38 add.qreg.qreg rdx, rax - ;;; Hardcode the size of the actual code chunk, since we don't yet have a - ;;; way to generate it. + ;;; Hardcode the size of the actual code chunk based on flatassembler's + ;;; label calculations, since we don't yet have a way to generate it from + ;;; within our code. + ;;; + ;;; Originally this was a constant number, to discourage reliance on label + ;;; math, but the direction things are growing in now is to implement + ;;; general label math ourselves, so that's okay. ;;; - ;;; TODO of course, really we want to for-real track this - mov.qreg.qimm rax, 0x24F + ;;; TODO of course, really we want to for-real compute this at runtime + mov.qreg.qimm rax, code_size add.qreg.qreg rdx, rax ;;; - ;;; Go back and fill in the file size now that we know it. + ;;; Go back and fill in the file size now that we know it (ill-gotten + ;;; knowledge though it is). ;;; mov.qreg.disp8.qreg rsp, 0x60, rdx ; size in file mov.qreg.disp8.qreg rsp, 0x68, rdx ; size in memory @@ -1090,5 +1239,6 @@ _start: mov.b rdi, 0 syscall +code_size = $ - code_start file_size = $ - $$ -- cgit 1.4.1