From 62c486fabed46ae4f8fccaa5d16fa8ad0dd0694c Mon Sep 17 00:00:00 2001 From: Irene Knapp Date: Sat, 18 Oct 2025 17:02:06 -0700 Subject: Add documentation! Yay :) This also gets rid of the `load_origin` variable, it was redundant with flatassembler's `$$` feature. Force-Push: yes Change-Id: I0abc4cf8da65fcb443220e70b69267919ca43ba7 --- quine.asm | 94 ++++++++++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 66 insertions(+), 28 deletions(-) (limited to 'quine.asm') diff --git a/quine.asm b/quine.asm index abc709b..a468578 100644 --- a/quine.asm +++ b/quine.asm @@ -1,27 +1,36 @@ -; Currently, this is not yet fully self-hosting; it is based on -; flatassembler[1]. A minimal command to build and run it is: -; -; fasmg quine.asm quine && chmod 755 quine && ./quine; echo $? -; -; ***WARNING*** this version leaves the error message scrolled off the top of -; the screen and you see stale output -; fasmg quine.asm quine && ./quine > quine2; echo "exit code:" $?; echo; hexdump -C quine; echo; hexdump -C quine2; cmp quine quine2; echo; echo "compare:" $? -; ZydisDisasm -64 quine -; -; A workflow you may wish to use for debugging is: -; -; rm quine2; fasmg quine.asm quine && ./quine > quine2; echo "exit code:" $?; echo; hexdump -C quine; echo; hexdump -C quine2; echo; cmp quine quine2 ; echo cmp: $? -; -; You may also wish to do: -; -; objdump --disassemble quine -; ZydisDisasm -64 quine -; -; This relies on GNU binutils, and on zydis, respectively. -; -; [1] https://flatassembler.net/ +;;; Workflow tips: +;;; +;;; Currently, this is not yet fully self-hosting; it is based on +;;; flatassembler[1]. A minimal command to build and run it is: +;;; +;;; fasmg quine.asm quine && chmod 755 quine && ./quine; echo $? +;;; +;;; A workflow you may wish to use for debugging is: +;;; +;;; rm quine2; fasmg quine.asm quine && ./quine > quine2; echo "exit code:" $?; echo; hexdump -C quine; echo; hexdump -C quine2; echo; cmp quine quine2 ; echo cmp: $? +;;; +;;; The reason this removes the old one first is that otherwise, there's a +;;; risk the error message will be scrolled off the top of the screen and +;;; you'll see stale output and not realize. +;;; +;;; You may also wish to do: +;;; +;;; objdump --disassemble quine +;;; ZydisDisasm -64 quine +;;; +;;; This relies on GNU binutils, and on zydis, respectively. +;;; +;;; [1] https://flatassembler.net/ +;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Assembly language ;;; +;;;;;;;;;;;;;;;;;;;;;;;;; +;;; +;;; Before doing any actual code, we define macros for writing x86-64 assembly +;;; language. This is built from scratch, relying only on flatassembler's +;;; built-in semantics. No include files of any kind are used for it. + macro rex.0 db 0x40 end macro @@ -256,9 +265,21 @@ end macro +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Executable file format ;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; +;;; Before we get into the meat of the program, we do a lot of ELF-specific +;;; stuff to ensure that our output is in a format Linux knows how to run. +;;; +;;; First, we set the origin to load at. This is arbitrary, but it can't be +;;; zero. We tell flatassembler about it because it's used in label +;;; calculations; we can reference it as $$ any time we need it in future. org 0x08000000 - +;;; +;;; Second, we output ELF's top-level file header. +;;; elf_header: ; * denotes mandatory fields according to breadbox db 0x7F, "ELF" ; *magic number @@ -282,8 +303,24 @@ elf_header: dw 0 ; section header entry size dw 0 ; number of section header entries dw 0 ; section name string table index +; Save a copy of the size of this chunk for our future reference, by comparing +; the current posiion to the label above. elf_header_size = $ - elf_header +;;; +;;; Third, immediately after the ELF file header, we output ELF's program +;;; header, which lists the memory regions ("segments") we want to have and +;;; where we want them to come from. We list just a single region, which is +;;; the entire contents of the ELF file from disk. +;;; +;;; It would be more typical to have separate code and data segments, and +;;; perhaps a stack or heap, but this keeps things simple. We do have a little +;;; stack space available, though we don't explicitily request any; the kernel +;;; allocates it for us as part of exec() so that it can pass us argc and argv +;;; (which we ignore). That stack space will be at a random address, different +;;; every time, because of ASLR; that's a neat security feature, so we leave +;;; it as-is. +;;; program_header: dd 1 ; *"loadable" segment type dd 0x05 ; *read+execute permission @@ -296,12 +333,13 @@ program_header: dq file_size ; *size in memory dq 0 ; segment alignment ; for relocation - will we be ASLR'd? +; Save the size of this chunk, as well. program_header_entry_size = $ - program_header -load_origin = 0x08000000 - -;;; Implementation strategy: +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Implementation strategy ;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; We assemble the entire file contents in a stack-allocated buffer. ;;; We avoid using the stack for any other purpose. When the file is fully @@ -343,7 +381,7 @@ _start: mov.rel.d rsp, 0x14, 1 ; ELF format version ; Compute the entry pointer. - mov.qreg.qimm rax, load_origin + mov.qreg.qimm rax, $$ add.b rax, 120 mov.rel.q rsp, 0x18, rax ; entry point @@ -369,7 +407,7 @@ _start: mov.rel.d rsp, 0x40, 1 ; "loadable" segment type mov.rel.d rsp, 0x44, 0x05 ; read+execute permission mov.rel.q.d rsp, 0x48, 0 ; offset in file - mov.rel.q.d rsp, 0x50, load_origin ; virtual address + mov.rel.q.d rsp, 0x50, $$ ; virtual address ; required, but can be anything, subject to alignment mov.rel.q.d rsp, 0x58, 0 ; physical address (ignored) -- cgit 1.4.1