diff options
| author | Irene Knapp <ireneista@irenes.space> | 2025-11-07 16:12:09 -0800 |
|---|---|---|
| committer | Irene Knapp <ireneista@irenes.space> | 2025-11-07 16:12:09 -0800 |
| commit | ff84e9b7a1c5e65d387c95fbab17c2e478a04ae1 (patch) | |
| tree | 22f721faab0ad5a637c9c4708fcfc6c4713d0902 | |
| parent | 34840ee7f9e12fe2ac2e0e75d296e25ecdefdc87 (diff) | |
re-order some stuff to make bootstrapping feel easier
it may not actually be any easier, but at the very least this is a readability improvement Force-Push: yeah Change-Id: I1ab991fc895e41af44f2aaac59d6152053bd9be3
| -rw-r--r-- | quine.asm | 277 |
1 files changed, 171 insertions, 106 deletions
diff --git a/quine.asm b/quine.asm index 00a57c1..3663dde 100644 --- a/quine.asm +++ b/quine.asm @@ -2025,6 +2025,14 @@ cold_start: ;;; branch, zbranch ;;; sorta needs a label but might be avoidable ;;; needs next + ;;; sys_exit, sys_write + ;;; nothing special + ;;; Forth: + ;;; emitstring + ;;; stringlen, sys_write, basics + ;;; Assembly: + ;;; crash + ;;; nothing special ;;; Forth: ;;; pack64, pack32, pack16, pack8, packstring, packalign ;;; only the basics above @@ -2034,14 +2042,44 @@ cold_start: ;;; Forth: ;;; rax, rcx, dx, rbx, rsp, rbp, rsi, rdi ;;; r8, r9, r10, r11, r12, r13, r14, r15 + ;;; eax, ecx, edx, ebx, esp, ebp, esi, edi + ;;; ax, cx, dx, bx, sp, bp, si, di + ;;; al, cl, dl, bl, ah, ch, dh, bh + ;;; cc_overflow, cc_no_overflow, cc_below, cc_above_equal, cc_equal, + ;;; cc_not_equal, cc_below_equal, cc_above, cc_sign, cc_not_sign, cc_even, + ;;; cc_odd, cc_less, cc_greater_equal, cc_less_equal, cc_greater ;;; only the basics above - ;;; reg64, extrareg64 + ;;; reg64, extrareg64, reg32, reg16, reg8, scalefield, conditioncode ;;; only the basics plus optional emitstring and sys_exit ;;; (notice that these are forward references!) - ;;; rex_w, rex_wb, opcodereg, modrm + ;;; rex_w, rex_wb, opcodereg, opcodecc, modrm, sib ;;; only the basics above - ;;; cld, mov_reg64_imm64, mov_reg64_reg64, push_reg64, lodsq - ;;; jmp_abs_indirect_reg64, syscall + ;;; addressing_reg64, addressing_indirect_reg64, addressing_disp8_reg64, + ;;; addressing_indexed_reg64, addressing_disp8_indexed_reg64 + ;;; basics plus earlier assembly stuff + ;;; cld, std, mov_reg64_imm64, mov_extrareg64_imm64, mov_reg64_reg64, + ;;; mov_indirect_reg64_reg64, mov_disp8_reg64_reg64, + ;;; mov_reg64_indirect_reg64, mov_reg64_disp8_reg64, + ;;; mov_reg64_indexed_reg64, + ;;; mov_indirect_reg64_reg32, mov_disp8_reg64_reg32, + ;;; mov_reg32_indirect_reg64, mov_reg32_disp8_reg64, + ;;; mov_indirect_reg64_reg16, mov_disp8_reg64_reg16, + ;;; mov_reg16_indirect_reg64, mov_reg16_disp8_reg64, + ;;; mov_indirect_reg64_reg8, mov_disp8_reg64_reg8, + ;;; mov_reg8_indirect_reg64, mov_reg8_disp8_reg64, + ;;; lea_reg64_disp8_reg64, lea_reg64_indexed_reg64, + ;;; lea_reg64_disp8_indexed_reg64, + ;;; push_reg64, pop_reg64, + ;;; lodsq, rep_movs8, rep_mov16, rep_movs32, rep_movs64, repnz_scas8, + ;;; add_reg64_reg64, add_indirect_reg64_reg64, add_reg64_indirect_reg64, + ;;; add_reg64_imm8, + ;;; sub_reg64_reg64, sub_indirect_reg64_reg64, + ;;; mul_reg64_reg64, divmod_reg64, idivmod_reg64, inc_reg64, dec_reg64, + ;;; and_reg64_reg64, or_reg64_reg64, or_reg64_imm8, xox_reg64_reg64, + ;;; not_reg64, + ;;; cmp_reg64_reg64, test_reg64_reg64, + ;;; set_reg8_cc, jmp_cc_rel_imm8, jmp_abs_indirect_reg64, jmp_rel_imm32, + ;;; syscall, hlt ;;; basics plus assembly helpers ;;; Forth, not needed on heap: ;;; early_heap, early_s0, early_r0, early_latest, early_here @@ -2054,20 +2092,12 @@ cold_start: ;;; Forth, subject to reconsideration: ;;; quit ;;; quine, sys_exit (these are forward references) - ;;; Assembly: - ;;; sys_exit, sys_write - ;;; nothing special ;;; Forth: - ;;; emitstring - ;;; stringlen, sys_write, basics ;;; quine ;;; early_here (removable), all_contents (forward) ;;; sys_write, basics ;;; use_label, set_label ;;; only basics - ;;; Assembly: - ;;; hlt - ;;; nothing ;;; Forth: ;;; all_contents, elf_file_header, elf_program_header ;;; output_start_routine @@ -2724,6 +2754,83 @@ zbranch_after_jmp: next +;;;;;;;;;;;;;;;;;;;; +;;; System calls ;;; +;;;;;;;;;;;;;;;;;;;; +;;; +;;; The kernel preserves every register except rax, rcx, and r11. The system +;;; call number goes in rax, as does the return value. Parameters go in rdi, +;;; rsi, rdx, r10, r8, and r9, in that order. [SysV] A.2.1. +;;; +;;; Notice that rsi is our control stack, so we have to save it (for +;;; syscalls with at least two parameters). We can use the value stack to do +;;; that, since rsp is preserved. We don't save other registers because our +;;; caller should do that, if it cares. +;;; + +;;; +;;; This does the Linux exit() system call, passing it an exit code taken +;;; from the stack. +;;; +defword sys_exit, 0 + dq $ + 8 + + mov.qreg.qimm rax, 60 ; syscall number + pop.qreg rdi ; exit code + syscall + + ; In the event we're still here, let's minimize confusion. + hlt + + +;;; +;;; This does the Linux write() system call, passing it an address from the +;;; top of the stack and a length from the second position on the stack. It +;;; writes to file descriptor 1, which is stdout. +;;; +;;; For our length parameter, we can pop directly from the stack into rdx, +;;; which directly becomes the syscall parameter. For our address parameter, +;;; the syscall wants it in rsi, which we also care about, so we have to do a +;;; little juggling. +;;; +defword sys_write, 0 + dq $ + 8 + pop.qreg rcx ; address from stack + pop.qreg rdx ; length from stack, passed directly + push.qreg rsi ; save rsi + mov.qreg.qimm rax, 1 ; syscall number + mov.qreg.qimm rdi, 1 ; file descriptor + mov.qreg.qreg rsi, rcx ; pass address + syscall + pop.qreg rsi ; restore rsi + next + + +;;;;;;;;;;;;;;;;;;;;;;;; +;;; I/O conveniences ;;; +;;;;;;;;;;;;;;;;;;;;;;;; + + +defword emitstring, 0 + dq docol, dup, stringlen, swap, sys_write, exit + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Development utilities ;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; This peforms the "hlt" instruction (Intel's mnemomic, short for "halt"), +; which will cause the program to exit with a segmentation fault. If you're +; running under a debugger, this is a convenient way to get execution to stop +; at a certain point. +; +; It's called "crash" rather than "hlt" to distinguish it from the word +; which outputs the instruction as machine code. +defword crash, 0 + dq $ + 8 + hlt + + ;;;;;;;;;;;;;;;;;;;;;; ;;; Output helpers ;;; ;;;;;;;;;;;;;;;;;;;;;; @@ -2859,53 +2966,53 @@ defword r14, 0 defword r15, 0 dq docol, lit, r15, exit defword eax, 0 - dq docol, lit, rax, exit + dq docol, lit, eax, exit defword ecx, 0 - dq docol, lit, rcx, exit + dq docol, lit, ecx, exit defword edx, 0 - dq docol, lit, rdx, exit + dq docol, lit, edx, exit defword ebx, 0 - dq docol, lit, rbx, exit + dq docol, lit, ebx, exit defword esp, 0 - dq docol, lit, rsp, exit + dq docol, lit, esp, exit defword ebp, 0 - dq docol, lit, rbp, exit + dq docol, lit, ebp, exit defword esi, 0 - dq docol, lit, rsi, exit + dq docol, lit, esi, exit defword edi, 0 - dq docol, lit, rdi, exit + dq docol, lit, edi, exit defword ax, 0 - dq docol, lit, rax, exit + dq docol, lit, ax, exit defword cx, 0 - dq docol, lit, rcx, exit + dq docol, lit, cx, exit defword dx, 0 - dq docol, lit, rdx, exit + dq docol, lit, dx, exit defword bx, 0 - dq docol, lit, rbx, exit + dq docol, lit, bx, exit defword sp, 0 - dq docol, lit, rsp, exit + dq docol, lit, sp, exit defword bp, 0 - dq docol, lit, rbp, exit + dq docol, lit, bp, exit defword si, 0 - dq docol, lit, rsi, exit + dq docol, lit, si, exit defword di, 0 - dq docol, lit, rdi, exit + dq docol, lit, di, exit defword al, 0 - dq docol, lit, rax, exit + dq docol, lit, al, exit defword cl, 0 - dq docol, lit, rcx, exit + dq docol, lit, cl, exit defword dl, 0 - dq docol, lit, rdx, exit + dq docol, lit, dl, exit defword bl, 0 - dq docol, lit, rbx, exit + dq docol, lit, bl, exit defword ah, 0 - dq docol, lit, rsp, exit + dq docol, lit, ah, exit defword ch, 0 - dq docol, lit, rbp, exit + dq docol, lit, ch, exit defword dh, 0 - dq docol, lit, rsi, exit + dq docol, lit, dh, exit defword bh, 0 - dq docol, lit, rdi, exit + dq docol, lit, bh, exit defword cc_overflow, 0 dq docol, lit, cc_overflow, exit defword cc_no_overflow, 0 @@ -3465,34 +3572,41 @@ defword push_reg64, 0 defword pop_reg64, 0 dq docol, reg64, lit, 0x58, opcodereg, exit +; TODO: Contemplate renaming this to lods64. +; ; Stack: ; output point defword lodsq, 0 dq docol, rex_w, lit, 0xAD, pack8, exit +; We break with the Intel mnemonics, which are movsb/movsw/movsd/movsq, +; because this would otherwise be the only place we use the b/w/d/q thing +; instead of 8/16/32/64. Tradition and pronounceability are both nice things, +; but approachability to newcomers is important, too. +; ; Stack: ; output point -defword rep_movsb, 0 +defword rep_movs8, 0 dq docol, lit, 0xF3, pack8, lit, 0xA4, pack8, exit ; Stack: ; output point -defword rep_movsw, 0 +defword rep_movs16, 0 dq docol, lit, 0xF3, pack8, lit, 0x66, pack8, lit, 0xA5, pack8, exit ; Stack: ; output point -defword rep_movsd, 0 +defword rep_movs32, 0 dq docol, lit, 0xF3, pack8, lit, 0xA5, pack8, exit ; Stack: ; output point -defword rep_movsq, 0 +defword rep_movs64, 0 dq docol, lit, 0xF3, pack8, rex_w, lit, 0xA5, pack8, exit ; Stack: ; output point -defword repnz_scasb, 0 +defword repnz_scas8, 0 dq docol, lit, 0xF2, pack8, lit, 0xAE, pack8, exit ; Stack: @@ -3740,6 +3854,11 @@ defword jmp_rel_imm32, 0 defword syscall, 0 dq docol, lit, 0x0F, pack8, lit, 0x05, pack8, exit +; Stack: +; output point +defword hlt, 0 + dq docol, lit, 0xF4, pack8, exit + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Runtime word definition ;;; @@ -3875,6 +3994,17 @@ defword pack_next, 0 dq docol, lodsq, rax, jmp_abs_indirect_reg64, exit ; This is another helper "macro" that we'll use in defining assembly words +; from Forth. As before, see the flatassembler version for more explanation. +; +; Stack in: +; base address +; destination address (absolute) +; Stack out: +; new base address +defword pack_beforenext, 0 + dq docol, rax, swap, mov_reg64_imm64, rax, jmp_abs_indirect_reg64, exit + +; This is another helper "macro" that we'll use in defining assembly words ; from Forth. In particular, this one is used in docol. As before, see the ; flatassembler version for more explanation. ; @@ -3965,66 +4095,6 @@ defword quit, 0 dq lit, 0, sys_exit -;;;;;;;;;;;;;;;;;;;; -;;; System calls ;;; -;;;;;;;;;;;;;;;;;;;; -;;; -;;; The kernel preserves every register except rax, rcx, and r11. The system -;;; call number goes in rax, as does the return value. Parameters go in rdi, -;;; rsi, rdx, r10, r8, and r9, in that order. [SysV] A.2.1. -;;; -;;; Notice that rsi is our control stack, so we have to save it (for -;;; syscalls with at least two parameters). We can use the value stack to do -;;; that, since rsp is preserved. We don't save other registers because our -;;; caller should do that, if it cares. -;;; - -;;; -;;; This does the Linux exit() system call, passing it an exit code taken -;;; from the stack. -;;; -defword sys_exit, 0 - dq $ + 8 - - mov.qreg.qimm rax, 60 ; syscall number - pop.qreg rdi ; exit code - syscall - - ; In the event we're still here, let's minimize confusion. - hlt - - -;;; -;;; This does the Linux write() system call, passing it an address from the -;;; top of the stack and a length from the second position on the stack. It -;;; writes to file descriptor 1, which is stdout. -;;; -;;; For our length parameter, we can pop directly from the stack into rdx, -;;; which directly becomes the syscall parameter. For our address parameter, -;;; the syscall wants it in rsi, which we also care about, so we have to do a -;;; little juggling. -;;; -defword sys_write, 0 - dq $ + 8 - pop.qreg rcx ; address from stack - pop.qreg rdx ; length from stack, passed directly - push.qreg rsi ; save rsi - mov.qreg.qimm rax, 1 ; syscall number - mov.qreg.qimm rdi, 1 ; file descriptor - mov.qreg.qreg rsi, rcx ; pass address - syscall - pop.qreg rsi ; restore rsi - next - - -;;;;;;;;;;;;;;;;;;;;;;;; -;;; I/O conveniences ;;; -;;;;;;;;;;;;;;;;;;;;;;;; - - -defword emitstring, 0 - dq docol, dup, stringlen, swap, sys_write, exit - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; (new) Implementation strategy ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -4161,11 +4231,6 @@ defword set_label, 0 dq exit -defword hlt, 0 - dq $ + 8 - hlt - - ; This takes an address to write to on the stack, writes stuff there, and ; returns the next address after where it stopped Writing. ; |