summary refs log tree commit diff
path: root/quine.asm
diff options
context:
space:
mode:
authorIrene Knapp <ireneista@irenes.space>2025-10-18 17:02:06 -0700
committerIrene Knapp <ireneista@irenes.space>2025-10-18 17:02:06 -0700
commit62c486fabed46ae4f8fccaa5d16fa8ad0dd0694c (patch)
treeb8fb441b46daa98ab6258fcf9c4fa4e6458916b6 /quine.asm
parent5fc2871f2ced6ac718685bad4729994441d6c832 (diff)
Add documentation! Yay :)
This also gets rid of the `load_origin` variable, it was redundant with
flatassembler's `$$` feature.

Force-Push: yes
Change-Id: I0abc4cf8da65fcb443220e70b69267919ca43ba7
Diffstat (limited to 'quine.asm')
-rw-r--r--quine.asm94
1 files changed, 66 insertions, 28 deletions
diff --git a/quine.asm b/quine.asm
index abc709b..a468578 100644
--- a/quine.asm
+++ b/quine.asm
@@ -1,27 +1,36 @@
-; Currently, this is not yet fully self-hosting; it is based on
-; flatassembler[1]. A minimal command to build and run it is:
-;
-; fasmg quine.asm quine && chmod 755 quine && ./quine; echo $?
-;
-; ***WARNING*** this version leaves the error message scrolled off the top of
-; the screen and you see stale output
-; fasmg quine.asm quine && ./quine > quine2; echo "exit code:" $?; echo; hexdump -C quine; echo; hexdump -C quine2; cmp quine quine2; echo; echo "compare:" $?
-; ZydisDisasm -64 quine
-;
-; A workflow you may wish to use for debugging is:
-;
-; rm quine2; fasmg quine.asm quine && ./quine > quine2; echo "exit code:" $?; echo; hexdump -C quine; echo; hexdump -C quine2; echo; cmp quine quine2 ; echo cmp: $?
-;
-; You may also wish to do:
-;
-; objdump --disassemble quine
-; ZydisDisasm -64 quine
-;
-; This relies on GNU binutils, and on zydis, respectively.
-;
-; [1] https://flatassembler.net/
+;;; Workflow tips:
+;;;
+;;; Currently, this is not yet fully self-hosting; it is based on
+;;; flatassembler[1]. A minimal command to build and run it is:
+;;;
+;;; fasmg quine.asm quine && chmod 755 quine && ./quine; echo $?
+;;;
+;;; A workflow you may wish to use for debugging is:
+;;;
+;;; rm quine2; fasmg quine.asm quine && ./quine > quine2; echo "exit code:" $?; echo; hexdump -C quine; echo; hexdump -C quine2; echo; cmp quine quine2 ; echo cmp: $?
+;;;
+;;; The reason this removes the old one first is that otherwise, there's a
+;;; risk the error message will be scrolled off the top of the screen and
+;;; you'll see stale output and not realize.
+;;;
+;;; You may also wish to do:
+;;;
+;;; objdump --disassemble quine
+;;; ZydisDisasm -64 quine
+;;;
+;;; This relies on GNU binutils, and on zydis, respectively.
+;;;
+;;; [1] https://flatassembler.net/
 
 
+;;;;;;;;;;;;;;;;;;;;;;;;;
+;;; Assembly language ;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;
+;;; Before doing any actual code, we define macros for writing x86-64 assembly
+;;; language. This is built from scratch, relying only on flatassembler's
+;;; built-in semantics. No include files of any kind are used for it.
+
 macro rex.0
   db 0x40
 end macro
@@ -256,9 +265,21 @@ end macro
 
 
 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;; Executable file format ;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;
+;;; Before we get into the meat of the program, we do a lot of ELF-specific
+;;; stuff to ensure that our output is in a format Linux knows how to run.
+;;;
+;;; First, we set the origin to load at. This is arbitrary, but it can't be
+;;; zero. We tell flatassembler about it because it's used in label
+;;; calculations; we can reference it as $$ any time we need it in future.
 org 0x08000000
 
-
+;;;
+;;; Second, we output ELF's top-level file header.
+;;;
 elf_header:
   ; * denotes mandatory fields according to breadbox
   db 0x7F, "ELF"                 ; *magic number
@@ -282,8 +303,24 @@ elf_header:
   dw 0                           ; section header entry size
   dw 0                           ; number of section header entries
   dw 0                           ; section name string table index
+; Save a copy of the size of this chunk for our future reference, by comparing
+; the current posiion to the label above.
 elf_header_size = $ - elf_header
 
+;;;
+;;; Third, immediately after the ELF file header, we output ELF's program
+;;; header, which lists the memory regions ("segments") we want to have and
+;;; where we want them to come from. We list just a single region, which is
+;;; the entire contents of the ELF file from disk.
+;;;
+;;; It would be more typical to have separate code and data segments, and
+;;; perhaps a stack or heap, but this keeps things simple. We do have a little
+;;; stack space available, though we don't explicitily request any; the kernel
+;;; allocates it for us as part of exec() so that it can pass us argc and argv
+;;; (which we ignore). That stack space will be at a random address, different
+;;; every time, because of ASLR; that's a neat security feature, so we leave
+;;; it as-is.
+;;;
 program_header:
   dd 1                           ; *"loadable" segment type
   dd 0x05                        ; *read+execute permission
@@ -296,12 +333,13 @@ program_header:
   dq file_size                   ; *size in memory
   dq 0                           ; segment alignment
                                  ;   for relocation - will we be ASLR'd?
+; Save the size of this chunk, as well.
 program_header_entry_size = $ - program_header
 
-load_origin = 0x08000000
-
 
-;;; Implementation strategy:
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;; Implementation strategy ;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;;
 ;;;   We assemble the entire file contents in a stack-allocated buffer.
 ;;; We avoid using the stack for any other purpose. When the file is fully
@@ -343,7 +381,7 @@ _start:
   mov.rel.d rsp, 0x14, 1                   ; ELF format version
 
   ; Compute the entry pointer.
-  mov.qreg.qimm rax, load_origin
+  mov.qreg.qimm rax, $$
   add.b rax, 120
   mov.rel.q rsp, 0x18, rax                 ; entry point
 
@@ -369,7 +407,7 @@ _start:
   mov.rel.d rsp, 0x40, 1                   ; "loadable" segment type
   mov.rel.d rsp, 0x44, 0x05                ; read+execute permission
   mov.rel.q.d rsp, 0x48, 0                 ; offset in file
-  mov.rel.q.d rsp, 0x50, load_origin       ; virtual address
+  mov.rel.q.d rsp, 0x50, $$                ; virtual address
     ; required, but can be anything, subject to alignment
   mov.rel.q.d rsp, 0x58, 0                 ; physical address (ignored)