diff options
| -rw-r--r-- | quine.asm | 357 |
1 files changed, 319 insertions, 38 deletions
diff --git a/quine.asm b/quine.asm index a63ae02..2f8e91d 100644 --- a/quine.asm +++ b/quine.asm @@ -180,11 +180,11 @@ end macro macro conditioncode cc, condition match =above, condition cc = 0x07 - else match =above.equal, condition + else match =above_equal, condition cc = 0x03 else match =below, condition cc = 0x02 - else match =below.equal, condition + else match =below_equal, condition cc = 0x06 else match =carry, condition cc = 0x02 @@ -192,47 +192,47 @@ macro conditioncode cc, condition cc = 0x04 else match =greater, condition cc = 0x0F - else match =greater.equal, condition + else match =greater_equal, condition cc = 0x0D else match =less, condition cc = 0x0C - else match =less.equal, condition + else match =less_equal, condition cc = 0x0E - else match =not.above, condition + else match =not_above, condition cc = 0x06 - else match =not.above.equal, condition + else match =not_above_equal, condition cc = 0x02 - else match =not.below, condition + else match =not_below, condition cc = 0x03 - else match =not.below.equal, condition + else match =not_below_equal, condition cc = 0x07 - else match =not.carry, condition + else match =not_carry, condition cc = 0x03 - else match =not.equal, condition + else match =not_equal, condition cc = 0x05 - else match =not.greater, condition + else match =not_greater, condition cc = 0x0E - else match =not.greater.equal, condition + else match =not_greater_equal, condition cc = 0x0C - else match =not.less, condition + else match =not_less, condition cc = 0x0D - else match =not.less.equal, condition + else match =not_less_equal, condition cc = 0x0F - else match =not.overflow, condition + else match =not_overflow, condition cc = 0x01 - else match =not.parity, condition + else match =not_parity, condition cc = 0x0B - else match =not.sign, condition + else match =not_sign, condition cc = 0x09 - else match =not.zero, condition + else match =not_zero, condition cc = 0x05 else match =overflow, condition cc = 0x00 else match =parity, condition cc = 0x0A - else match =parity.even, condition + else match =parity_even, condition cc = 0x0A - else match =parity.odd, condition + else match =parity_odd, condition cc = 0x0B else match =sign, condition cc = 0x08 @@ -1898,17 +1898,118 @@ _start: cold_start: ;;; TODO this is probably where we should deal with that "heap" that we passed ;;; on the stack - ;;; Start defining some words that are allocated at runtime on the heap, - ;;; beginning with the minimal set of words needed to define more words. - ;;; The first few are our variables, which hardcode the addresses they will - ;;; return - but since we're doing this at runtime, "hardcoding" can reflect - ;;; where our heap is. This is the fundamental trick that makes the heap - ;;; usable. - dq early_heap, litstring, "heap", early_defvar - dq early_s0, litstring, "s0", early_defvar - dq early_r0, litstring, "r0", early_defvar - dq early_latest, litstring, "latest", early_defvar - dq early_here, litstring, "here", early_defvar + ;;; Before handing off to us, _start pushed a single value onto the stack, + ;;; a pointer to the beginning of the heap. Now, we load our entire Forth + ;;; implementation onto that heap, beginning with the minimal set of words + ;;; needed to define more words. We do this because we need variables as + ;;; infrastructure so we can eventually have dynamic definitions. + ;;; + ;;; There's something non-obvious here: words implemented statically as + ;;; part of the executable image can't contain things that vary at runtime. + ;;; That means that even if these words tried to implement some sort of + ;;; dynamic lookup, they would have no way to find the root of whatever + ;;; dynamic data structure they use. Dynamism needs to be bootstrapped. + ;;; + ;;; In a more traditional C-style program, static code could look up + ;;; variables based on fixed addresses that are the same on every run. + ;;; Failing that, we could dedicate a register to it, though that's a + ;;; considerable expense. We chose not to do either of those things, because + ;;; we want the versatility that comes with not being picky about our + ;;; address space: It allows us to contemplate future improvements such as + ;;; ASLR, or embedding into other processes that impose their own addressing + ;;; constraints, or even coexisting with multiple versions of ourselves. + ;;; That choice does mean we have the hard version of this bootstrapping + ;;; problem, and copying ourselves to the heap is how we solve it. + ;;; + ;;; We do have the heap address right now, though that won't last. In case + ;;; it's unclear why not: keeping it on the stack would require all future + ;;; references to walk the stack, and somehow know when they've reached the + ;;; bottom. The stack is a good place to keep things with clearly delimited + ;;; lifetimes and visibility, but when we want something to live for our + ;;; entire program and be easy to find from any code within it, we need to + ;;; do something else. Anyway, since we have the address, we can use it for + ;;; the next little bit of setup. + ;;; + ;;; The first few words we define are our variables, which hardcode the + ;;; addresses they will return - but since we're doing this at runtime, + ;;; "hardcoding" can reflect where our heap is. This is the fundamental + ;;; trick that makes the heap usable. + ;;; + ;;; One more thing to notice: We already allocated the backing stores of + ;;; these variables, and populated their initial values, in _start. The + ;;; words we're defining return those same addresses for the same backing + ;;; stores. So, we have continuity: Stuff defined in terms of the + ;;; variable-words we're defining now will interoperate with the stuff that + ;;; we define in the "early" way, which includes those very words. Both the + ;;; early code and the later code are dealing with the same data structures, + ;;; they're just using a different technique to find them. + ;;; + ;;; This is the only hardcoding we need to do; by building on top of it, + ;;; we will soon reach a point where the rest of the system can be defined + ;;; within itself. + dq early_heap, litstring, "heap", early_variable + dq early_s0, litstring, "s0", early_variable + dq early_r0, litstring, "r0", early_variable + dq early_latest, litstring, "latest", early_variable + dq early_here, litstring, "here", early_variable + + ;;; Now we define a heap version of docol. Strictly speaking it doesn't + ;;; need to be among the first words, it only needs to come before the first + ;;; words implemented in Forth. However, it's conceptually tidy to have it + ;;; that way, so that's what we do. + ;;; + ;;; Docol also presents a unique challenge, in that it's two snippets of + ;;; code and one of them needs to refer to the other. When we use docol as + ;;; the codeword of a word we're defining, we point to a snippet which acts + ;;; as an interpreter for the word's body. However, when we look up "docol" + ;;; in the dictionary, what we get is a word that returns the address of the + ;;; interpreter snippet, effectively acting as a constant. + ;;; + ;;; One way to make this work would be to use a forward-referencing + ;;; address using the labels system. However, it turns out that only docol + ;;; and zbranch would benefit from this, and we drastically simplify our + ;;; code by reworking things so that no forward reference is needed. + ;;; + ;;; What we do is define the interpreter snippet first, allocating space + ;;; for the machine code directly out of "here", with no word header nor any + ;;; dictionary entry pointing to it. We keep track of the address we put + ;;; that at, then we define the constant to point to it. + ;;; + ;;; While it may seem weird to use space that's "outside" of any word, + ;;; keep in mind that using the heap in creative ways is part of the spirit + ;;; of Forth. Jonesforth doesn't have this bootstapping problem, but its + ;;; variables use this same technique of putting the value before the word + ;;; header to avoid a forward reference. Also, words don't have end + ;;; delimiters, so who's to say what's inside or outside them? + dq early_here, fetch, dup + dq rsi, pack_pushcontrol + dq lit, 8, rax, add_reg64_imm8 + dq rax, rsi, mov_reg64_reg64 + dq pack_next + dq lit, 8, packalign + dq roll3, swap, early_here_store, swap +; it seems to be outputting the lea correctly but then it's supposed to +; move the contents of rsi into the address in rbp, which is a weird +; addressing mode, and it winds up encoded wrong +;(gdb) disassemble/r 0x10000100f8,+24 +;Dump of assembler code from 0x10000100f8 to 0x1000010110: +; 0x00000010000100f8: 48 8d 6d f8 lea -0x8(%rbp),%rbp +; 0x00000010000100fc: 48 89 35 48 83 c0 08 mov %rsi,0x8c08348(%rip) # 0x1008c1844b +; 0x0000001000010103: 48 89 c6 mov %rax,%rsi +; 0x0000001000010106: 48 ad lods %ds:(%rsi),%rax +; 0x0000001000010108: ff 20 jmp *(%rax) +; 0x000000100001010a: 00 00 add %al,(%rax) +; 0x000000100001010c: 00 00 add %al,(%rax) +; 0x000000100001010e: 00 00 add %al,(%rax) + ; Now the interpreter snippet is in-place and "here" points after it, so + ; that future allocation won't step on it. We also still have a copy of its + ; start address, which we will now pass to early_variable. + dq litstring, "docol", early_variable + ; (While it might be tidy to have a separate "early_constant", it would do + ; the same thing. Late variables and constants will be different because the + ; real "variable" word will also be responsible for allocating the backing + ; store, but the only thing early_variable is doing is returning an + ; address.) ;;; For triage's sake, here's an inventory of everything else in the file. ;;; @@ -1952,7 +2053,7 @@ cold_start: ;;; Forth, not needed on heap: ;;; early_heap, early_s0, early_r0, early_latest, early_here ;;; early_create, early_comma, early_self_codeword, early_here_store, - ;;; pack_next, early_defvar + ;;; pack_next, early_variable ;;; ;;; It's likely that nothing past this point is required for the heap copy, ;;; but it's here for completeness. @@ -1981,6 +2082,7 @@ cold_start: ;;; self_raw ;;; self-reference ;;; + dq hlt dq quit ;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -2063,7 +2165,7 @@ docol_constant: align 8 docol: ; Since docol is not a normal word, the label points to the value we care - ; about from the assembly side of things, wich is the address we use as the + ; about from the assembly side of things, which is the address we use as the ; codeword. pushcontrol rsi add.qreg.bimm rax, 8 @@ -2346,7 +2448,7 @@ defword ne, 0 pop.qreg rbx pop.qreg rax cmp.qreg.qreg rax, rbx - set.breg.cc al, not.equal + set.breg.cc al, not_equal and.qreg.bimm rax, 0x01 push.qreg rax next @@ -2376,7 +2478,7 @@ defword ge, 0 dq $ + 8 pop.qreg rbx pop.qreg rax - set.breg.cc al, greater.equal + set.breg.cc al, greater_equal cmp.qreg.qreg rax, rbx push.qreg rax next @@ -2385,7 +2487,7 @@ defword le, 0 dq $ + 8 pop.qreg rbx pop.qreg rax - set.breg.cc al, less.equal + set.breg.cc al, less_equal cmp.qreg.qreg rax, rbx push.qreg rax next @@ -2722,6 +2824,10 @@ defword litpack8, 0 ;;; ;;; It's all backwards and stuff. ;;; +;;; Okay, but seriously, the convention is: target on the top of the stack, +;;; source behind it. This is similar to how the Forth "store" and "fetch" +;;; words work. +;;; ;;; These routines use the output helpers, defined above. They're called in ;;; the same way: an output address, followed by data items specific to what's ;;; being output. @@ -2809,6 +2915,10 @@ defword rex_wb, 0 defword opcodereg, 0 dq docol, or, pack8, exit +; The low-level word that outputs a modrm byte given fully-processed, +; numeric values for its fields. Most code will want to call one of the +; higher-level modrm_* words, instead. +; ; Stack ; output point ; mode ("mod") field @@ -2817,6 +2927,77 @@ defword opcodereg, 0 defword modrm, 0 dq docol, swap, lit, 8, mul, or, swap, lit, 64, mul, or, pack8, exit +; The simplest of the modrm modes: Direct register addressing. There are no +; special cases to check. +; +; It's important to notice that the R/M field may describe either a source, +; or a target, depending on what the instruction is. So, this helper doesn't +; get to know that. It also doesn't get to know whether the value in the +; reg/op field describes a register, or if instead it's an extension of the +; opcode. The caller is responsible for figuring that all out. +; +; Stack: +; output point +; reg/op field value (raw number) +; reg/mem field register name +defword modrm_reg64, 0 + dq docol, reg64, lit, 3, unroll3, modrm, exit + +; This is a helper for assembly instructions that want to do a form of +; addressing that requires a value of 1 in the modrm byte's mode field, and +; do not want to do any indexing. +; +; Its main responsibility is to deal with the scenario that requires an SIB +; byte, which happens when the R/M field has a value of 4, which would +; otherwise refer to the register rsp. In that situation, it also generates +; an SIB byte which indicates a scale of 1, no indexing, and rsp as the base +; register. +; +; Stack: +; output point +; reg/op field value (raw number) +; reg/mem field register name +defword modrm_indirect_reg64, 0 + dq docol + ; Exit with an error if the R/M register is rbp. + dq dup, rbp, ne, zbranch, 23*8 + ; Check whether the R/M register is rsp; save the test result for later. + dq dup, rsp, eq, lit, 4, unroll + dq reg64, lit, 0, unroll3, modrm + ; If the R/M register was rsp, we need an SIB byte; otherwise, skip it. + dq swap, zbranch, 8*8, lit, 0, lit, 4, rsp, reg64, sib + dq exit + dq litstring, "R/M parameter to modrm_indirect_reg64 is rbp.", emitstring + dq lit, 1, sys_exit + +; This mode can do rbp fine, rsp is the only unusual case. +; +; Stack: +; output point +; reg/op field value (raw number) +; reg/mem field register name +; displacement value +defword modrm_disp8_reg64, 0 + dq docol + ; Check whether the R/M register is rsp; save the test result for later. + dq swap, dup, rsp, eq, lit, 5, unroll, swap + ; Stash the displacement value out of the way, too. + dq lit, 4, unroll + dq reg64, lit, 1, unroll3, modrm + ; If the R/M register was rsp, we need an SIB byte; otherwise, skip it. + dq roll3, zbranch, 8*8, lit, 0, lit, 4, rsp, reg64, sib + ; The displacement byte. + dq swap, pack8 + dq exit + +; Stack +; output point +; scale field +; index field +; base field +defword sib, 0 + dq docol, swap, lit, 8, mul, or, swap, lit, 64, mul, or, pack8, exit + ; Stack: ; output point defword cld, 0 @@ -2842,7 +3023,70 @@ defword mov_extrareg64_imm64, 0 defword mov_reg64_reg64, 0 dq docol dq roll3, rex_w, lit, 0x89, pack8, unroll3 - dq reg64, swap, reg64, swap, lit, 3, unroll3, modrm + dq swap, reg64, swap, modrm_reg64 + dq exit + +; Stack: +; output point +; source register name +; target register name +defword mov_indirect_reg64_reg64, 0 + dq docol + dq roll3, rex_w, lit, 0x89, pack8, unroll3 + dq swap, reg64, swap, modrm_indirect_reg64 + dq exit + +; Stack: +; output point +; source register name +; target register name +; target displacement value +defword mov_disp8_reg64_reg64, 0 + dq docol + dq lit, 4, roll, rex_w, lit, 0x89, pack8, lit, 4, unroll + dq roll3, reg64, unroll3, modrm_disp8_reg64 + dq exit + +; Stack: +; output point +; source register name +; target register name +defword mov_reg64_indirect_reg64, 0 + dq docol + dq roll3, rex_w, lit, 0x8B, pack8, unroll3 + dq reg64, swap, modrm_indirect_reg64 + dq exit + +; Stack: +; output point +; source register name +; source displacement value +; target register name +defword mov_reg64_disp8_reg64, 0 + dq docol + dq lit, 4, roll, rex_w, lit, 0x8B, pack8, lit, 4, unroll + dq reg64, unroll3, modrm_disp8_reg64 + dq exit + +; Stack: +; output point +; source register name +; source displacement value +; target register name +defword lea_reg64_disp8_reg64, 0 + dq docol + dq lit, 4, roll, rex_w, lit, 0x8D, pack8, lit, 4, unroll + dq reg64, unroll3, modrm_disp8_reg64 + dq exit + +; Stack: +; output point +; source value +; target register name +defword add_reg64_imm8, 0 + dq docol + dq roll3, rex_w, lit, 0x83, pack8, swap, lit, 0, swap, modrm_reg64 + dq swap, pack8 dq exit ; Stack: @@ -2857,6 +3101,7 @@ defword lodsq, 0 defword jmp_abs_indirect_reg64, 0 dq docol dq swap, lit, 0xFF, pack8, swap + ; TODO use modrm_indirect_reg64 instead dq reg64, lit, 0, lit, 4, roll3, modrm dq exit @@ -2989,6 +3234,10 @@ defword early_here_store, 0 ; Notice that we've switched over to stuff that follows the pack* idioms. ; +; This is a helper "macro" that we'll use in defining assembly words from +; Forth. This is in a sense a redefinition of it; the flatassembler version of +; it is far, far above, and has more documentation. +; ; Stack in: ; base address ; Stack out: @@ -2996,13 +3245,45 @@ defword early_here_store, 0 defword pack_next, 0 dq docol, lodsq, rax, jmp_abs_indirect_reg64, exit +; This is another helper "macro" that we'll use in defining assembly words +; from Forth. In particular, this one is used in docol. As before, see the +; flatassembler version for more explanation. +; +; Stack in: +; base address +; source register keyword +; Stack out: +; new base address +defword pack_pushcontrol, 0 + dq docol + dq swap, rbp, lit, -8, rbp, lea_reg64_disp8_reg64, swap + dq rbp, lit, 0, mov_disp8_reg64_reg64 + dq exit + +; This is another helper "macro" that we'll use in defining assembly words +; from Forth. In particular, this one is used in "exit". See the flatassembler +; version for more explanation. +; +; Stack in: +; base address +; target register keyword +; Stack out: +; new base address +defword pack_popcontrol, 0 + dq docol + dq rbp, swap, mov_reg64_indirect_reg64 + dq rbp, lit, -8, rbp, lea_reg64_disp8_reg64 + dq exit + +; Now we're back to heap idioms again. +; ; Stack in: ; heap address ; address for new variable word to return ; name string ; Stack out: ; heap address -defword early_defvar, 0 +defword early_variable, 0 dq docol dq swap, unroll3, early_create, early_self_codeword ; (address to return, heap address) |