diff options
Diffstat (limited to 'quine.asm')
| -rw-r--r-- | quine.asm | 473 |
1 files changed, 291 insertions, 182 deletions
diff --git a/quine.asm b/quine.asm index 6ab7da4..2d9f6eb 100644 --- a/quine.asm +++ b/quine.asm @@ -152,6 +152,12 @@ macro opcodereg opcode, reg db opcode or reg end macro +macro opcodecc opcode, cc + assert opcode >= 0 & opcode < 256 & opcode and 15 = 0 + assert cc >= 0 & cc < 16 + db opcode or cc +end macro + macro scalefield sfield, scale if 1 = scale sfield = 0 @@ -166,6 +172,77 @@ macro scalefield sfield, scale end if end macro +; Yep, there sure is a lot of duplication in these. This is based on Intel's +; documented mnemonics... +; +; "Above" and "below" are for unsigned comparisons. "Greater" and "less" are +; for signed comparisons. +macro conditioncode cc, condition + match =above, condition + cc = 0x07 + else match =above.equal, condition + cc = 0x03 + else match =below, condition + cc = 0x02 + else match =below.equal, condition + cc = 0x06 + else match =carry, condition + cc = 0x02 + else match =equal, condition + cc = 0x04 + else match =greater, condition + cc = 0x0F + else match =greater.equal, condition + cc = 0x0D + else match =less, condition + cc = 0x0C + else match =less.equal, condition + cc = 0x0E + else match =not.above, condition + cc = 0x06 + else match =not.above.equal, condition + cc = 0x02 + else match =not.below, condition + cc = 0x03 + else match =not.below.equal, condition + cc = 0x07 + else match =not.carry, condition + cc = 0x03 + else match =not.equal, condition + cc = 0x05 + else match =not.greater, condition + cc = 0x0E + else match =not.greater.equal, condition + cc = 0x0C + else match =not.less, condition + cc = 0x0D + else match =not.less.equal, condition + cc = 0x0F + else match =not.overflow, condition + cc = 0x01 + else match =not.parity, condition + cc = 0x0B + else match =not.sign, condition + cc = 0x09 + else match =not.zero, condition + cc = 0x05 + else match =overflow, condition + cc = 0x00 + else match =parity, condition + cc = 0x0A + else match =parity.even, condition + cc = 0x0A + else match =parity.odd, condition + cc = 0x0B + else match =sign, condition + cc = 0x08 + else match =zero, condition + cc = 0x04 + else + assert 0 + end match +end macro + ;;; On registers ;;; ------------ @@ -288,6 +365,10 @@ macro owordreg result, register end macro +;;; Instructions +;;; ------------ + + ; TODO what register size does this use? macro mov.b target, source match =rax?, target @@ -649,6 +730,23 @@ macro add.indirect.qreg.qreg target, source end macro +macro add.qreg.indirect.qreg target, source + match =rsp, source + assert 0 + ; The SIB case. + else match =rbp, source + assert 0 + ; An unrelated addressing mode + else + qwordreg treg, target + qwordreg sreg, source + rex.w + db 0x03 + modrm 0, treg, sreg + end match +end macro + + ; This adds a signed 8-bit immediate value to a 64-bit register, in place. ; ; Notice the use of 3 as the addressing mode. This says to use the register @@ -801,7 +899,7 @@ macro not.qreg target end macro -; This sets the flags to the same things they'd be set by if subtracting +; This sets the flags to the same things they'd be set to if subtracting ; right from left. macro cmp.qreg.qreg left, right qwordreg lreg, left @@ -811,81 +909,25 @@ macro cmp.qreg.qreg left, right modrm 3, lreg, rreg end macro +; This sets the flags to the same things they'd be set to if AND'ing right +; with left. +macro test.qreg.qreg left, right + qwordreg lreg, left + qwordreg rreg, right + rex.w + db 0x85 + modrm 3, rreg, lreg +end macro -; Yep, there sure is a lot of duplication in these. This is based on Intel's -; documented mnemonics... -; -; "Above" and "below" are for unsigned comparisons. "Greater" and "less" are -; for signed comparisons. macro set.breg.cc target, condition bytereg treg, target + conditioncode cc, condition db 0x0F - match =above, condition - db 0x97 - else match =above.equal, condition - db 0x93 - else match =below, condition - db 0x92 - else match =below.equal, condition - db 0x96 - else match =carry, condition - db 0x92 - else match =equal, condition - db 0x94 - else match =greater, condition - db 0x9F - else match =greater.equal, condition - db 0x9D - else match =less, condition - db 0x9C - else match =less.equal, condition - db 0x9E - else match =not.above, condition - db 0x96 - else match =not.above.equal, condition - db 0x92 - else match =not.below, condition - db 0x93 - else match =not.below.equal, condition - db 0x97 - else match =not.carry, condition - db 0x93 - else match =not.equal, condition - db 0x95 - else match =not.greater, condition - db 0x9E - else match =not.greater.equal, condition - db 0x9C - else match =not.less, condition - db 0x9D - else match =not.less.equal, condition - db 0x9F - else match =not.overflow, condition - db 0x91 - else match =not.parity, condition - db 0x9B - else match =not.sign, condition - db 0x99 - else match =not.zero, condition - db 0x95 - else match =overflow, condition - db 0x90 - else match =parity, condition - db 0x9A - else match =parity.even, condition - db 0x9A - else match =parity.odd, condition - db 0x9B - else match =sign, condition - db 0x98 - else match =zero, condition - db 0x94 - else - assert 0 - end match + opcodecc 0x90, cc modrm 3, 0, treg end macro + ; Move from an 8-bit immediate value, to a location relative to a 64-bit ; register, with an 8-bit displacement and no indexing. ; @@ -965,7 +1007,7 @@ macro mov.qreg.disp8.qreg target, offset, source rex.w db 0x89 modrm 1, sreg, treg - match =rsp, target + match =rsp, source ; R/M = rsp is the SIB case sib 0, 4, 4 ; no scaling, no indexing, rsp as base @@ -1035,32 +1077,32 @@ end macro ; indexed addressing, with an 8-bit displacement and no indexing, but instead ; of doing anything with the memory, just store the address itself into a ; register. -macro lea.qreg.qreg.disp8 target, offset, source - match =rsp, target - ; This is the SIB case - assert 0 - else - qwordreg treg, target - qwordreg sreg, source - rex.w - db 0x8D - modrm 1, treg, sreg - db offset +macro lea.qreg.disp8.qreg target, offset, source + qwordreg treg, target + qwordreg sreg, source + rex.w + db 0x8D + modrm 1, treg, sreg + match =rsp, source + ; R/M = rsp is the SIB case + sib 0, 4, sreg + ; no scaling, no indexing, rsp as base end match + db offset end macro -macro lea.qreg.qreg.disp32 target, source, offset - match =rsp, target - ; This is the SIB case - assert 0 - else - qwordreg treg, target - qwordreg sreg, source - rex.w - db 0x8D - modrm 2, treg, sreg - dd offset +macro lea.qreg.disp32.qreg target, offset, source + qwordreg treg, target + qwordreg sreg, source + rex.w + db 0x8D + modrm 2, treg, sreg + match =rsp, source + ; R/M = rsp is the SIB case + sib 0, 4, sreg + ; no scaling, no indexing, rsp as base end match + dd offset end macro macro lea.qreg.indexed.qreg target, source, index, scale @@ -1249,6 +1291,23 @@ macro jmp.rel.dimm location dd location end macro +; The location is relative to the start of the instruction immediately +; following the jmp. +macro jmp.cc.rel.bimm condition, location + conditioncode cc, condition + opcodecc 0x70, cc + db location +end macro + +; The location is relative to the start of the instruction immediately +; following the jmp. +macro jmp.cc.rel.dimm condition, location + conditioncode cc, condition + db 0x0F + opcodecc 0x70, cc + dd location +end macro + ; Invoke a system call provided by the kernel. On Linux, the System V ABI ; describes the semantics of such calls (at least, on x86). macro syscall @@ -1570,13 +1629,13 @@ end macro ;;; * rbp points to the top of the control stack. ;;; macro PUSHCONTROL source - lea.qreg.qreg.disp8 rbp, -8, rbp + lea.qreg.disp8.qreg rbp, -8, rbp mov.indirect.qreg.qreg rbp, source end macro macro POPCONTROL target mov.qreg.indirect.qreg target, rbp - lea.qreg.qreg.disp8 rbp, 8, rbp + lea.qreg.disp8.qreg rbp, 8, rbp end macro ;;; @@ -1711,7 +1770,7 @@ _start: ;;; your home. See below for a little more thought about why here in ;;; particular. ;;; - lea.qreg.qreg.disp32 rbp, rdi, control_stack_size + lea.qreg.disp32.qreg rbp, control_stack_size, rdi ;;; ;;; Now we save some stuff onto the heap. These are the locations that @@ -1727,7 +1786,7 @@ _start: mov.qreg.disp32.qreg rdi, control_stack_size + 0x00, rdi ; HEAP mov.qreg.disp32.qreg rdi, control_stack_size + 0x08, rsp ; S0 mov.qreg.disp32.qreg rdi, control_stack_size + 0x10, rbp ; R0 - lea.qreg.qreg.disp32 rax, rdi, control_stack_size + 0x20 + lea.qreg.disp32.qreg rax, control_stack_size + 0x20, rdi mov.qreg.disp32.qreg rdi, control_stack_size + 0x18, rax ; HERE ; TODO also consider LATEST and STATE ; strictly speaking, R0 could be a constant... but it isn't known until @@ -2042,10 +2101,10 @@ defword UNROLL, 0 ; accordingly. push.qreg rsi - ; When rcx is 1, we want rsp + 16. - lea.qreg.disp8.indexed.qreg rsi, 8, rsp, rcx, 8 - ; When rcx is 1, we want rsp + 8. - lea.qreg.indexed.qreg rdi, rsp, rcx, 8 + ; Regardless of rcx, we want rsp + 16. + lea.qreg.disp8.qreg rsi, 16, rsp + ; Regardless of rcx, we want rsp + 8. + lea.qreg.disp8.qreg rdi, 8, rsp ; With ROLL, we were starting at the high end. Here, we start at the low ; end, which means we need rsi to increment after each repetition. That's @@ -2335,6 +2394,31 @@ defword FETCH32, 0 mov.dreg.indirect.qreg eax, rbx NEXT +;;;;;;;;;;;;;;;;; +;;; Branching ;;; +;;;;;;;;;;;;;;;;; + +; This takes a number of bytes, not machine words. That allows it to be used +; for putting weird things embedded in the code. +; +; The offset is relative to the start of the word the number of bytes is in, +; so, make sure to have it skip itself. +defword BRANCH, 0 + dq $ + 0x8 ; codeword + add.qreg.indirect.qreg rsi, rsi + NEXT + +; This should probably be 0BRANCH, but right now the auto-label code is picky. +defword ZBRANCH, 0 + dq $ + 0x8 ; codeword + pop.qreg rax + test.qreg.qreg rax, rax + ; Please notice the 8-bit branch to the nearby word. + jmp.cc.rel.bimm zero, BRANCH + 8 - zbranch_after_jmp +zbranch_after_jmp: + lodsq ; just a convenient way to skip rsi forward + NEXT + ;;; ;;; One of the most charming naming traditions in Forth is that the ;;; top-level word that stays running forever, is called "quit". @@ -2484,15 +2568,40 @@ defword QUINE, 0 ; ... and now we have allocated a block of memory, with its address on the ; stack. We also still have HEAP at the bottom of the stack, for future use. - ; This takes a buffer's address on the stack and adds an ELF file header to - ; it, leaving nothing on the stack afterwards. - dq DUP, ELF_FILE_HEADER, ELF_PROGRAM_HEADER, DROP - - ; This takes a buffer's address on the stack, skips an ELF file header based - ; on hardcoded size, appends an ELF program header, then finishes by pushing - ; the length of the part of the buffer that has now been used. Thus we don't - ; need to care about how it internally uses registers. - dq DUP, OLD_CODE, SWAP + ; We have one label, and three pieces of information about it: Guessed value, + ; actual value, and status. We keep them on the stack in this order, from + ; top to bottom: guess, actual, status. Above that, at the actual top of + ; the stack, we have a mutable copy of the buffer's address. + ; + ; Status is a bit field: + ; bit zero is whether it was used before being defined + ; bit one is whether it's been defined + ; bit two is whether the guessed value wound up equaling the actual value + dq DUP, LIT, 0, LIT, 0, LIT, 0, LIT, 4, ROLL + + ; This takes an address to write to on the stack and adds an ELF file header + ; to it, leaving the adjusted address with the size of the header added. + ; Then it does the same thing with an ELF program header. + dq ELF_FILE_HEADER, ELF_PROGRAM_HEADER + + ; The two-pass magick. + dq LIT, file_size - 0x78, ADD + dq SET_LABEL + dq DROP, LIT, 4, ROLL, DUP, LIT, 5, UNROLL + dq ELF_FILE_HEADER, ELF_PROGRAM_HEADER + + ; Drop the copy of the buffer's address. + dq DROP + + ; Drop the label data. + dq DROP, DROP, DROP + + ; This takes a buffer's address on the stack, skips an ELF file header and + ; program header based on hardcoded size, computes an offset (secretly + ; hardcoded), and writes that offset into an appopriate place in the middle + ; of those headers. It then returns the length of the used portion of the + ; buffer. + dq LIT, 0x78, SWAP ; write() from stack-allocated buffer dq SYS_WRITE @@ -2503,6 +2612,76 @@ defword QUINE, 0 dq SYS_WRITE dq EXIT + +; Stack in: +; output memory start +; label actual value +; label guessed value +; label status +; output memory current point +; Stack out: +; output memory start +; label actual value +; label guessed value +; label status (potentially modified) +; output memory current point +; label value for caller to use +defword USE_LABEL, 0 + dq DOCOL + + ; Fetch the status + dq SWAP + ; Check the bit that indicates it's been set. + dq DUP, LIT, 2, AND, ZBRANCH, 12*8 + + ; If we're here, it has been set already, so just put the status back... + dq LIT, 2, UNROLL + ; Fetch the actual value... + dq LIT, 4, ROLL, DUP, LIT, 5, UNROLL + ; ... and exit + dq EXIT + + ; If we're here, it hasn't been set yet, so mark it used-before-set. + dq LIT, 1, OR + ; Put the status back... + dq SWAP + ; Fetch the guessed value... + dq LIT, 3, ROLL, DUP, LIT, 4, UNROLL + ; ... and exit + dq EXIT + +; Stack in: +; output memory start +; label actual value (not yet set) +; label guessed value +; label status +; output memory current point +; Stack out: +; output memory start +; label actual value (now set) +; label guessed value +; label status (modified) +; output memory current point +defword SET_LABEL, 0 + dq DOCOL + + ; Compute the current offset, to use as the actual value + dq DUP, LIT, 6, ROLL, DUP, LIT, 7, UNROLL, SUB + + ; Overwrite the old actual value; keep a copy + dq LIT, 5, ROLL, DROP, DUP, LIT, 5, UNROLL + + ; Check equality with the guessed value + dq LIT, 4, ROLL, DUP, LIT, 5, UNROLL, EQ + + ; We don't need to branch. Now we mark the status as having been defined, + ; and we also set bit 2 if appropriate. + dq LIT, 4, MUL + dq LIT, 3, ROLL, OR, LIT, 2, OR, LIT, 2, UNROLL + + dq EXIT + + defword HLT, 0 dq $ + 0x8 ; codeword hlt @@ -2570,87 +2749,17 @@ defword ELF_PROGRAM_HEADER, 0 dq LITPACK64, 0 ; offset in file dq LITPACK64, $$ ; virtual address ; required, but can be anything, subject to alignment - dq LITPACK16, 0 ; physical address (ignored) + dq LITPACK64, 0 ; physical address (ignored) ; Fill in 0 as the file size for now, to avoid unitialized memory. - dq LITPACK64, 0 ; size in file - dq LITPACK64, 0 ; size in memory + dq USE_LABEL, PACK64 ; size in file + dq USE_LABEL, PACK64 ; size in memory dq LITPACK64, 0 ; segment alignment ; for relocation, but this doesn't apply to us dq EXIT - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; (old) Implementation strategy ;;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; -;;; We assemble the entire file contents in a stack-allocated buffer. -;;; We avoid using the stack for any other purpose. When the file is fully -;;; assembled, we output it. -;;; -;;; The assembly proceeds in several chunks - ELF header, program header, -;;; etc. Each chunk extends the buffer as per its own needs, by adjusting -;;; the stack pointer. All chunks also update a running total file size, -;;; which refers to how many bytes have actually been populated, not to the -;;; size of the buffer. -;;; -;;; Stack in: -;;; * Top: The address of a block of memory to use. -;;; -;;; Stack out: -;;; * Top: The length of the data that was written to the provided memory. -;;; -;;; Registers within: -;;; -;;; * rdx holds the total used file size so far. During hand-off between -;;; chunks, this size must be equal to the buffer size; within a chunk it -;;; may be less. -;;; -;;; * rcx points to the bottom of the buffer. -;;; -defword OLD_CODE, 0 - dq $ + 0x8 ; codeword - - pop.qreg rcx ; our parameter - a block of memory - mov.dreg.dimm rdx, 0 ; store running file size here - - ; Add the size of the ELF header to the running total - mov.dreg.dimm rax, 0x40 - add.qreg.qreg rdx, rax - - ; Add the size of the program header to the running total - mov.dreg.dimm rax, 0x38 - add.qreg.qreg rdx, rax - - ;;; Hardcode the size of the actual code chunk based on flatassembler's - ;;; label calculations, since we don't yet have a way to generate it from - ;;; within our code. - ;;; - ;;; Originally this was a constant number, to discourage reliance on label - ;;; math, but the direction things are growing in now is to implement - ;;; general label math ourselves, so that's okay. - ;;; - ;;; TODO of course, really we want to for-real compute this at runtime - mov.qreg.qimm rax, code_size - add.qreg.qreg rdx, rax - - ;;; - ;;; Go back and fill in the file size now that we know it (ill-gotten - ;;; knowledge though it is). - ;;; - mov.qreg.disp8.qreg rcx, 0x60, rdx ; size in file - mov.qreg.disp8.qreg rcx, 0x68, rdx ; size in memory - - ;;; - ;;; The buffer is ready; push its length on the value stack, so our caller - ;;; can handle write()ing it out. - ;;; - push.dimm 0x78 - - NEXT - code_size = $ - code_start file_size = $ - $$ |