diff options
| -rw-r--r-- | quine.asm | 170 |
1 files changed, 146 insertions, 24 deletions
diff --git a/quine.asm b/quine.asm index f44f608..a25e1c5 100644 --- a/quine.asm +++ b/quine.asm @@ -13,11 +13,11 @@ ;;; Currently, this is not yet fully self-hosting; it is based on ;;; flatassembler[1]. A minimal command to build and run it is: ;;; -;;; fasmg quine.asm quine && chmod 755 quine && ./quine; echo $? +;;; $ fasmg quine.asm quine && chmod 755 quine && ./quine; echo $? ;;; ;;; A workflow you may wish to use for debugging is: ;;; -;;; rm quine2; fasmg quine.asm quine && chmod 755 quine && ./quine > quine2; echo "exit code:" $?; echo; hexdump -C quine; echo; hexdump -C quine2; echo; cmp -l quine quine2 ; echo cmp: $? +;;; $ rm quine2; fasmg quine.asm quine && chmod 755 quine && ./quine > quine2; echo "exit code:" $?; echo; hexdump -C quine; echo; hexdump -C quine2; echo; cmp -l quine quine2 ; echo cmp: $? ;;; ;;; The reason this removes the old one first is that otherwise, there's a ;;; risk the error message will be scrolled off the top of the screen and @@ -25,12 +25,34 @@ ;;; ;;; You may also wish to do: ;;; -;;; objdump --disassemble quine -;;; ZydisDisasm -64 quine +;;; $ objdump --disassemble quine +;;; $ ZydisDisasm -64 quine ;;; ;;; This relies on GNU binutils, and on zydis, respectively. ;;; ;;; [1] https://flatassembler.net/ +;;; +;;; +;;; gdb +;;; --- +;;; +;;; You can run gdb on it if you want; there's no symbols, but if you are +;;; familiar with the hex it should be readable. Keep a hexdump of the program +;;; handy to look up what addresses are. +;;; +;;; If you want to see a routine implemented in assembly, look at the hexdump +;;; of the overall file, find it by looking at the ASCII names, skip past the +;;; codeword, and do ie +;;; +;;; (gdb) disassemble/r 0x0x80007c0,+32 +;;; +;;; If you get the value of rsp out of info registers, and for example it's an +;;; address along the lines of 0x7fffffff8650, you can do +;;; +;;; (gdb) x/16xg 0x7fffffff8650 +;;; +;;; The same will work with rbp for the control stack, and don't forget that +;;; the "instruction pointer" is rsi. ;;;;;;;;;;;;;;;;;;;;;;;;; @@ -527,6 +549,23 @@ macro add.qreg.qreg target, source end macro +macro add.indirect.qreg.qreg target, source + match =rsp, target + assert 0 + ; The SIB case. + else match =rbp, target + assert 0 + ; An unrelated addressing mode. + else + qwordreg treg, target + qwordreg sreg, source + rex.w + db 0x01 + modrm 0, sreg, treg + end match +end macro + + ; This adds a signed 8-bit immediate value to a 64-bit register, in place. ; ; Notice the use of 3 as the addressing mode. This says to use the register @@ -561,6 +600,21 @@ macro sub.qreg.qreg target, source modrm 3, treg, sreg end macro + +macro sub.indirect.qreg.qreg target, source + match =rsp, target + ; The SIB case. + assert 0 + else + qwordreg treg, target + qwordreg sreg, source + rex.w + db 0x2B + modrm 0, sreg, treg + end match +end macro + + ; This subtracts a signed 8-bit immediate value from a 64-bit register, in ; place. ; @@ -774,9 +828,9 @@ end macro ; register, with an 8-bit displacement and no indexing. ; ; This uses opcode 0xC7, which has w = 1. We run in 64-bit mode, so that gives -; us an operand size of 32 bits by default. [Intel] volume 1, section 3.6.1, -; table 3-4. We want a 16-bit operand, so we use the operand-size prefix, -; 0x66, and we leave REX.W unset. +; us an operand size of 32 bits by default. [Intel] volume 1, chapter 3, +; section 3-6.1, table 3-4. We want a 16-bit operand, so we use the +; operand-size prefix, 0x66, and we leave REX.W unset. ; ; We need to treat rsp specially because it's the SIB case, per table 2-2. macro mov.qreg.disp8.wimm target, offset, source @@ -912,7 +966,7 @@ macro lea.qreg.qreg.disp8 target, offset, source end match end macro -macro lea.qreg.qreg.disp32 target, offset, source +macro lea.qreg.qreg.disp32 target, source, offset match =rsp, target ; This is the SIB case assert 0 @@ -957,14 +1011,42 @@ macro push.bimm source end macro ; Operand-size prefix makes it 16-bit. +; +; If you're trying to fake pushing a larger size by doing several 16-bit +; pushes, remember to start by pushing the low end and proceed upwards. +; [Intel] volume 1, chapter 9, section 9-2.4, "Memory Data Formats". macro push.wimm source db 0x66 db 0x68 dw source end macro -; There is no 64-bit immediate push. To fake it, push the low half, then the -; high half. [Intel] volume 1, chapter 9, section 9-2.4, "Memory Data Formats". +; There is no 64-bit immediate push. So, can we have a push instruction that +; pushes a 32-bit immediate value? Sort-of, but it's sign-extended to 64 bits, +; so rsp is decremented by 8, not by 4. This is that instruction. +; +; You need to do a really close read of a number of things to understand why. +; The opcode tables in [Intel] in volume 2D, appendix A, section A-3 give it +; the d64 annotation, which per table A-1 in section A-2.5 indicates that the +; operand size is always 64 bits and that there is no corresponding 32-bit +; version. Yet, the actual immediate value is still only 32 bits! Direct your +; attention to the instruction's details page, volume 2B, chapter 4, section +; 4-3, "PUSH". The description section clearly details that the immediate may +; be less than the operand size, which makes sense once you know it, but it +; doesn't explictly call out that the operand size is still 64 bits here. +; +; In general, the size of an immediate doesn't determine operand size, as you +; can read about in detail in [Intel] volume 1, chapter 3, section 3-6.1, with +; particular attention to table 3-4. +; +; Why is this surprising, given that it's consistent with the behavior of +; other instructions? Well, most instructions don't have such obvious +; side-effects. It's easy to not notice the operand size disagreeing with the +; immediate size when you'e only writing to a register, but changing the stack +; in an unexpected way breaks things much more obviously. +; +; Anyway, if you really want to decrement the stack pointer by 32 bits after +; a push, consider pushing a register. macro push.dimm source db 0x68 dd source @@ -1444,7 +1526,7 @@ _start: ;;; your home. See below for a little more thought about why here in ;;; particular. ;;; - lea.qreg.qreg.disp32 rbp, control_stack_size, rdi + lea.qreg.qreg.disp32 rbp, rdi, control_stack_size ;;; ;;; Now we save some stuff onto the heap. These are the locations that @@ -1460,7 +1542,9 @@ _start: mov.qreg.disp32.qreg rdi, control_stack_size + 0x00, rdi ; HEAP mov.qreg.disp32.qreg rdi, control_stack_size + 0x08, rsp ; S0 mov.qreg.disp32.qreg rdi, control_stack_size + 0x10, rbp ; R0 - ; TODO also consider HERE, LATEST, and STATE + lea.qreg.qreg.disp32 rax, rdi, control_stack_size + 0x20 + mov.qreg.disp32.qreg rdi, control_stack_size + 0x18, rax ; HERE + ; TODO also consider LATEST and STATE ; strictly speaking, R0 could be a constant... but it isn't known until ; runtime, so we might as well make it a variable ;;; @@ -1475,6 +1559,11 @@ _start: ;;; The control stack also grows downwards, so this is its pysical top ;;; as well. We allocate this dedicated space within the heap right here, ;;; in this routine, through our choice of where to put things. + ;;; * HERE is the physical start of the unallocated space in the heap + ;;; We allocate heap space from bottom to top, by incrementing this + ;;; value. So, it would also be accurate to say that it points immediately + ;;; after the physical top of the allocated space. At any rate, the + ;;; address it points to is the first one that hasn't been used yet. ;;; ;;; S0 and R0 are mostly used when we want to initialize or reinitialize ;;; their respective stacks - that is, discard all their contents at once. @@ -1484,6 +1573,11 @@ _start: ;;; convince yourself that it only ever writes things just below the rbp ;;; address it receives, never right on top of it. ;;; + ;;; Notice that HERE points immediately after itself. This is just a + ;;; convenience, making it the last one like that so that the concern is + ;;; dealt with in a single place and is easy to keep up-to-date with code + ;;; changes. + ;;; ;;; A little more detail about why we offset everything by ;;; control_stack_size: We're carving out some space at the bottom of the ;;; heap - which grows low-to-high - to be the control stack - which grows @@ -1867,6 +1961,23 @@ defword FETCH, 0 push.qreg rax NEXT +; Address on top, value second +; I might have done it the other way, but this is what Jonesforth does and it +; seems reasonable enough. +defword ADDSTORE, 0 + dq $ + 0x8 ; codeword + pop.qreg rbx + pop.qreg rax + add.indirect.qreg.qreg rbx, rax + NEXT + +defword SUBSTORE, 0 + dq $ + 0x8 ; codeword + pop.qreg rbx + pop.qreg rax + sub.indirect.qreg.qreg rbx, rax + NEXT + defword STORE8, 0 dq $ + 0x8 ; codeword pop.qreg rbx @@ -2000,10 +2111,17 @@ defword SYS_WRITE, 0 defword QUINE, 0 dq DOCOL ; codeword - ; This stack-allocates a buffer, then finishes by pushing its length and - ; address on the value stack. Thus we don't need to care about how it + ; We still have HEAP on the stack. Use it to find HERE... + dq DUP, LIT, control_stack_size + 0x18, ADD + ; ... add a constant to HERE in-place, keeping a copy of the pointer ... + dq DUP, LIT, 0x78, SWAP, ADDSTORE + ; ... and now we have allocated a block of memory, with its address on the + ; stack. We also still have HEAP at the bottom of the stack, for future use. + + ; This takes a buffer's address on the stack, populates it, then finishes by + ; pushing its length and address. Thus we don't need to care about how it ; internally uses registers. - dq OLD_CODE + dq DUP, OLD_CODE, SWAP ; write() from stack-allocated buffer dq SYS_WRITE @@ -2014,6 +2132,9 @@ defword QUINE, 0 dq SYS_WRITE dq EXIT +defword HLT, 0 + dq $ + 0x8 ; codeword + hlt defword WRITE_SELF_RAW_H, 0 dq $ + 0x8 ; codeword @@ -2038,6 +2159,12 @@ defword WRITE_SELF_RAW_H, 0 ;;; which refers to how many bytes have actually been populated, not to the ;;; size of the buffer. ;;; +;;; Stack in: +;;; * Top: The address of a block of memory to use. +;;; +;;; Stack out: +;;; * Top: The length of the data that was written to the provided memory. +;;; ;;; Registers within: ;;; ;;; * rdx holds the total used file size so far. During hand-off between @@ -2047,11 +2174,9 @@ defword WRITE_SELF_RAW_H, 0 ;;; * rcx points to the bottom of the buffer. ;;; defword OLD_CODE, 0 - dq $ + 0x8 ; codeword - - mov.qreg.qreg rcx, rdi - add.qreg.dimm rcx, control_stack_size + 0x18 + dq $ + 0x8 ; codeword + pop.qreg rcx ; our parameter - a block of memory mov.dreg.dimm rdx, 0 ; store running file size here ;;; @@ -2130,13 +2255,10 @@ defword OLD_CODE, 0 mov.qreg.disp8.qreg rcx, 0x68, rdx ; size in memory ;;; - ;;; The buffer is ready; push its length and address on the value stack, so - ;;; our caller can handle write()ing it out. + ;;; The buffer is ready; push its length on the value stack, so our caller + ;;; can handle write()ing it out. ;;; - - push.dimm 0 push.dimm 0x78 - push.qreg rcx NEXT |