diff options
-rw-r--r-- | quine.asm | 317 |
1 files changed, 290 insertions, 27 deletions
diff --git a/quine.asm b/quine.asm index 3b6df78..1139cc8 100644 --- a/quine.asm +++ b/quine.asm @@ -34,21 +34,61 @@ ;;; Assembly language ;;; ;;;;;;;;;;;;;;;;;;;;;;;;; ;;; -;;; Before doing any actual code, we define macros for writing x86-64 assembly -;;; language. This is built from scratch, relying only on flatassembler's -;;; built-in semantics. No include files of any kind are used for it. +;;; Before doing any actual code, we define macros for writing x86-64 +;;; assembly language. This is built from scratch, relying only on +;;; flatassembler's built-in semantics. No include files of any kind are used +;;; for it. +; The way these are all spelled out like this is slightly ridiculous, there +; must be a better way. macro rex.0 db 0x40 end macro - macro rex.w db 0x48 end macro - +macro rex.r + db 0x44 +end macro +macro rex.x + db 0x42 +end macro +macro rex.b + db 0x41 +end macro +macro rex.wr + db 0x4C +end macro +macro rex.wx + db 0x4A +end macro +macro rex.wb + db 0x49 +end macro +macro rex.rx + db 0x46 +end macro +macro rex.rb + db 0x45 +end macro macro rex.xb db 0x43 end macro +macro rex.wrx + db 0x4E +end macro +macro rex.wrb + db 0x4D +end macro +macro rex.wxb + db 0x4B +end macro +macro rex.rxb + db 0x47 +end macro +macro rex.wrxb + db 0x4F +end macro macro modrm mod, reg, rm assert mod >= 0 & mod < 4 @@ -92,6 +132,28 @@ macro qwordreg result, register end match end macro +macro owordreg result, register + match =r8?, register + result = 0 + else match =r9?, register + result = 1 + else match =r10?, register + result = 2 + else match =r11?, register + result = 3 + else match =r12?, register + result = 4 + else match =r13?, register + result = 5 + else match =r14?, register + result = 6 + else match =r15?, register + result = 7 + else + assert 0 + end match +end macro + ; TODO what register size does this use? macro mov.b target, source @@ -118,8 +180,18 @@ end macro macro mov.qreg.qimm target, source - rex.w qwordreg treg, target + rex.w + opcodereg 0xB8, treg + dq source +end macro + + +; Notice the use of REX.B here; this instruction puts the register number in +; the opcode field, so it uses Table 3-1. +macro mov.oreg.qimm target, source + owordreg treg, target + rex.wb opcodereg 0xB8, treg dq source end macro @@ -135,8 +207,12 @@ end macro ; Take a 64-bit source register, treat it as an address and look up the 64-bit -; value it points to, store that into a 64-bit target register. The only modes -; available also have displacement; we use an 8-bit one and set it to zero. +; value it points to, store that into a 64-bit target register. +; +; For rsp and rbp, the only modes available also have displacement; we use an +; 8-bit one and set it to zero. The other registers could be encoded without +; the displacement, but for simplicity's sake we do the same thing for all of +; them. ; ; In understanding this, pay close attention to the Op/En column in the opcode ; table. The "RM" variant means the ModRM byte's R/M field (the third one) @@ -200,6 +276,55 @@ macro mov.indirect.qreg.qreg target, source end macro +; Take a 64-bit source register, store its value into a high 64-bit target +; register (r8-r15). +; +; Notice that there are two ways to add another bit to the register encoding. +; Table 3-1 is about REX.B, but does not apply here, it's for instructions +; that use opcode bits to specify a register, and none of the +; register-to-register MOV variants do that (it's for immediate mode). +; +; Instead, we want the mechanism that uses REX.R as the extra bit, and it +; combines with the reg field of ModRM, as per 2.2.1.2. +; +; Therefore, we want the variant of MOV which puts the target in the reg +; field. That's Op/En "RM", opcode 0x8B with REX.WR. +; +; Mode 3 is direct addressing. +macro mov.oreg.qreg target, source + owordreg treg, target + qwordreg sreg, source + rex.wr + rb 0x8B + modrm 3, treg, sreg +end macro + + +; Take a high 64-bit source register (r8-r15), store its value into a 64-bit +; target register. +; +; Notice that there are two ways to add another bit to the register encoding. +; Table 3-1 is about REX.B, but does not apply here, it's for instructions +; that use opcode bits to specify a register, and none of the +; register-to-register MOV variants do that (it's for immediate mode). +; +; Instead, we want the mechanism that uses REX.R as the extra bit, and it +; combines with the reg field of ModRM, as per 2.2.1.2. +; +; Therefore, we want the variant of MOV which puts the source in the reg +; field. That's Op/En "MR", opcode 0x89 with REX.WR. +; +; Mode 3 is direct addressing. +macro mov.qreg.oreg target, source + qwordreg treg, target + owordreg sreg, source + rex.wr + rb 0x89 + modrm 3, sreg, treg +end macro + + +; This adds a 64-bit register to another 64-bit register, in place. macro add.qreg.qreg target, source qwordreg treg, target qwordreg sreg, source @@ -221,6 +346,7 @@ macro add.qreg.bimm target, source db source end macro + ; This adds a signed 32-bit immediate value to a 64-bit register, in place. ; ; Notice the use of 3 as the addressing mode. This says to use the register @@ -269,7 +395,9 @@ macro mov.qreg.disp8.bimm target, offset, source match =rsp, target db 0xC6 modrm 1, 0, 4 - sib 0, 0, 4 + ; 4 is rsp, but it's a special case + sib 0, 4, 4 + ; no scaling, no indexing, rsp as base db offset db source else @@ -284,12 +412,16 @@ end macro ; us an operand size of 32 bits by default. [Intel] volume 1, section 3.6.1, ; table 3-4. We want a 16-bit operand, so we use the operand-size prefix, ; 0x66, and we leave REX.W unset. +; +; We need to treat rsp specially because it's the SIB case, per table 2-2. macro mov.qreg.disp8.wimm target, offset, source match =rsp, target db 0x66 db 0xC7 modrm 1, 0, 4 + ; 4 is rsp, but it's a special case sib 0, 4, 4 + ; no scaling, no indexing, rsp as base db offset dw source else @@ -307,7 +439,9 @@ macro mov.qreg.disp8.dimm target, offset, source match =rsp, target db 0xC7 modrm 1, 0, 4 + ; 4 is rsp, but it's a special case sib 0, 4, 4 + ; no scaling, no indexing, rsp as base db offset dd source else @@ -318,17 +452,27 @@ end macro ; Move from a 64-bit register, to a 64-bit location relative to a 64-bit ; register, with an 8-bit displacement and no indexing. ; -; This uses opcode 0x89. +; This uses opcode 0x89 with REX.W, so that gives us the reg field as the +; 64-bit source and the R/M field as the 64-bit destination. +; +; We need to treat a target of rsp specially because it's the SIB case per +; table 2-2. macro mov.qreg.disp8.qreg target, offset, source + qwordreg sreg, source + qwordreg treg, target match =rsp, target - qwordreg sreg, source rex.w db 0x89 - modrm 1, sreg, 4 + modrm 1, sreg, treg + ; treg is rsp by assumption, and R/M = rsp is the SIB case sib 0, 4, 4 + ; no scaling, no indexing, rsp as base db offset else - assert 0 + rex.w + db 0x89 + modrm 1, sreg, treg + db offset end match end macro @@ -346,7 +490,9 @@ macro mov.qreg.disp8.dimm target, offset, source rex.w db 0xC7 modrm 1, 0, 4 + ; 4 is rsp, but it's a special case sib 0, 4, 4 + ; no scaling, no indexing, rsp as base db offset dd source else @@ -404,16 +550,17 @@ end macro ;;; Executable file format ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; -;;; Before we get into the body of the program, we do a lot of ELF-specific +;;; Before we get into the body of the program, we do a lot of ELF-specific ;;; stuff to ensure that our output is in a format Linux knows how to run. ;;; -;;; First, we set the origin to load at. This is arbitrary, but it can't be +;;; First, we set the origin to load at. This is arbitrary, but it can't be ;;; zero. We tell flatassembler about it because it's used in label ;;; calculations; we can reference it as $$ any time we need it in future. org 0x08000000 ;;; -;;; Second, we output ELF's top-level file header. +;;; Second, we output ELF's top-level file header. The only interesting +;;; thing here is the entry pointer. ;;; elf_header: ; * denotes mandatory fields according to breadbox @@ -443,18 +590,20 @@ elf_header: elf_header_size = $ - elf_header ;;; -;;; Third, immediately after the ELF file header, we output ELF's program +;;; Third, immediately after the ELF file header, we output ELF's program ;;; header, which lists the memory regions ("segments") we want to have and ;;; where we want them to come from. We list just a single region, which is ;;; the entire contents of the ELF file from disk. ;;; -;;; It would be more typical to have separate code and data segments, and -;;; perhaps a stack or heap, but this keeps things simple. We do have a little -;;; stack space available, though we don't explicitily request any; the kernel -;;; allocates it for us as part of exec() so that it can pass us argc and argv -;;; (which we ignore). That stack space will be at a random address, different -;;; every time, because of ASLR; that's a neat security feature, so we leave -;;; it as-is. +;;; It would be more typical to use this header to ask the loader to give us +;;; separate code and data segments, and perhaps a stack or heap, but this +;;; keeps things simple, and we can create those things for ourselves later. +;;; +;;; We do have a little stack space available, though we don't explicitily +;;; request any; the kernel allocates it for us as part of exec() so that it +;;; can pass us argc and argv (which we ignore). That stack space will be at a +;;; random address, different every time, because of ASLR; that's a neat +;;; security feature, so we leave it as-is. ;;; program_header: dd 1 ; *"loadable" segment type @@ -568,7 +717,7 @@ program_header_entry_size = $ - program_header ;;; ;;; Additionally, immediately after beginning execution of a word: ;;; -;;; * rax points to the address being executed +;;; * rax points to the address of the codeword being executed ;;; The value of rax is purely for the callee's benefit, and does not need ;;; to be preserved. ;;; @@ -698,9 +847,123 @@ DOCOL: ;;; ;;; This routine is really only responsible for one-time initialization. ;;; +;;; Registers in: +;;; +;;; * rsp points to the top (low end) of the value stack +;;; The kernel sets this up for us, and we need to save it somewhere so +;;; Forth can use it. +;;; +;;; Registers out: +;;; +;;; * rsp points to the top of the control stack +;;; * rsi points within QUIT +;;; QUIT is the word that's Forth's closest equivalent to main(). +;;; +;;; Registers within: +;;; +;;; * rdi points to the base the heap was allocated at, once it is +;;; This is the same value that S0 will hold, once we reach a point +;;; where we can rely on Forth variable-words. +;;; _start: cld ; clear the DF flag - ; If we wanted to save the initial stack pointer, we'd do that here. + + ;;; + ;;; Prepare the heap. + ;;; + ;;; We could ask for a data segment in the program header, but where's the + ;;; fun in that? Instead, we call mmap(). + ;;; + ;;; If we wanted the kernel to do ASLR for us, passing address zero would + ;;; cause it to pick somewhere at random, but instead we choose our own + ;;; location. It's still not guaranteed to be where we ask for, so we still + ;;; do the work to record where it wound up. We could pass the "fixed" flag + ;;; and the kernel would trust us, but this gives us more options for + ;;; interoperating with other runtimes. + ;;; + mov.b rax, 9 ; mmap() + mov.qreg.qimm rdi, 0x0000001000000000 ; address (very arbitrary) + mov.qreg.qimm rsi, 0x0000000001000000 ; size (one meg) + mov.qreg.qimm rdx, 0x03 ; protection (read+write) + mov.oreg.qimm r10, 0x22 ; flags (private+anonymous) + mov.oreg.qimm r8, 0 ; file descriptor (ignored) + mov.oreg.qimm r9, 0 ; offset (ignored) + syscall + + ;;; + ;;; The return value of the system call is in rax, we'll use it in a sec. + ;;; We need to save this somewhere in case we ever want to munmap() it; + ;;; there's no widely-used name for it so we have to make one up. S0 and R0 + ;;; are widely-used names for the logical tops of the value and control + ;;; stacks, respectively, and we will eventually set those up as well, so we + ;;; should keep those names in mind. The control stack lives within the + ;;; heap, while the value stack is its own segment. This value, though, is + ;;; the physical bottom of the segment, meaning that it stays the same even + ;;; as we allocate and deallocate things within it. This is unlike the two + ;;; stack pointers, so we give it a name that doesn't suggest similarity: + ;;; HEAP. + ;;; + ;;; Once Forth is fully set up, its internal variables will be accessed + ;;; through variable-words like any other Forth data, including HEAP. To get + ;;; to that point, though, we need to be able to hold onto variable data + ;;; between now and then. In fact, if we don't have at least one of HEAP and + ;;; HERE (its counterpart which points to the logical top end), all our + ;;; efforts to hold onto anything seem a bit doomed. + ;;; + ;;; So, we temporarily dedicate rdi to HEAP - only within this routine - + ;;; and store everything else in ways that let us find things by reference + ;;; to it. We choose rdi because it works with the indexing modes we care + ;;; about, and its name suggests its function. + ;;; + ;;; The strategy Jonesforth uses is not applicable to us; Jonesforth + ;;; takes advantage of the linker to let its code segment refer to specific, + ;;; pre-allocated objects in the data segment. We are our own linker. + ;;; Hence, this approach. + ;;; + ;;; Keying things off HEAP is the fundamental decision, but to make sure + ;;; our variables are accessible both during early bootstrapping, and later, + ;;; we also have to be thoughtful about data structures. More on that in a + ;;; moment. + ;;; + mov.qreg.qreg rdi, rax + + ;;; + ;;; Now we save some stuff onto the heap. These are the locations that + ;;; will eventually be the backing stores of the Forth variables, but we + ;;; don't create the word headers yet, since there's no requirement that + ;;; they be next to the backing stores. We'll do that later, once we have + ;;; word-writing infrastructure in place. For now, we just use their offsets + ;;; relative to the physical bottom of the heap, which are fixed. + ;;; + ;;; These will be the permanent homes of these values, though we have + ;;; copies of them elsewhere while we're still in this routine. + ;;; + mov.qreg.disp8.qreg rdi, 0x00, rdi ; HEAP + mov.qreg.disp8.qreg rdi, 0x08, rsp ; S0 + ; TODO this isn't done yet, need to reserve space and explain it more + ;;; + ;;; * HEAP is the physical bottom of the heap + ;;; The heap grows upwards in memory, so this is also the logical + ;;; bottom. This comes from the address mmap() just returned to us. + ;;; * S0 is the logical bottom of the value stack + ;;; The value stack grows downwards in memory, so this is the physical + ;;; top of it. This comes from the stack pointer the kernel initialized us + ;;; with. + ;;; * R0 is the logical bottom of the control stack + ;;; The control stack also grows downwards, so this is its pysical top + ;;; as well. We allocate this dedicated space within the heap right here, + ;;; in this routine, through our choice of where to put things. + ;;; + ;;; S0 and R0 are mostly used when we want to initialize or reinitialize + ;;; their respective stacks - that is, discard all their contents at once. + + ;;; TODO we don't do this yet + ;;; Now we're going to create a word in the heap, to hold the value of + ;;; HERE long-term. + + ;;; + ;;; We would like very much to get out of the bootstrap code and into a + ;;; proper threaded-execution setup. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -789,7 +1052,7 @@ _start: ;;; way to generate it. ;;; ;;; TODO of course, really we want to for-real track this - mov.qreg.qimm rax, 0x201 + mov.qreg.qimm rax, 0x24F add.qreg.qreg rdx, rax ;;; |