woooooo successful debugging

it turns out things are real enough now that we can't just scribble wherever :) Change-Id: I26c71beaaefecae018ebb3ced4f1f579879a030a Force-Push: yes
author: Irene Knapp <ireneista@irenes.space> 2025-10-20 00:11:07 -0700
committer: Irene Knapp <ireneista@irenes.space> 2025-10-20 00:41:11 -0700
commit: 66cdcc19b820a871c5eca435bbabf958a09014a9 (patch)
tree: f3148156972c655fdfcf6d4ee1f3ea846a995bcf
parent: 5d8318f323c400c86d1e999c86275beea156e768 (diff)
1 files changed, 131 insertions, 93 deletions
diff --git a/quine.asm b/quine.asm
index 5c4357b..ed0be32 100644
--- a/quine.asm
+++ b/quine.asm
@@ -392,17 +392,18 @@ end macro
 ; makes the operand size 8 bits, regardless of the current operand-size
 ; attribute. [Intel] volume 2D, appendix B, section B-1.4.3, table B-6.
 macro mov.qreg.disp8.bimm target, offset, source
+  qwordreg treg, target
+  db 0xC6
+  modrm 1, 0, treg
+    ; the 0 is part of the opcode
+    ; 4 is rsp, but it's a special case
   match =rsp, target
-    db 0xC6
-    modrm 1, 0, 4
-      ; 4 is rsp, but it's a special case
-    sib 0, 4, 4
-      ; no scaling, no indexing, rsp as base
-    db offset
-    db source
-  else
-    assert 0
+    ; R/M = rsp is the SIB case
+    sib 0, 4, treg
+      ; no scaling, no indexing, target as base
   end match
+  db offset
+  db source
 end macro
 
 ; Move from a 16-bit immediate value, to a location relative to a 64-bit
@@ -415,18 +416,18 @@ end macro
 ;
 ; We need to treat rsp specially because it's the SIB case, per table 2-2.
 macro mov.qreg.disp8.wimm target, offset, source
+  qwordreg treg, target
+  db 0x66
+  db 0xC7
+  modrm 1, 0, treg
+    ; the 0 is part of the opcode
   match =rsp, target
-    db 0x66
-    db 0xC7
-    modrm 1, 0, 4
-      ; 4 is rsp, but it's a special case
-    sib 0, 4, 4
-      ; no scaling, no indexing, rsp as base
-    db offset
-    dw source
-  else
-    assert 0
+    ; R/M = rsp is the SIB case
+    sib 0, 4, treg
+      ; no scaling, no indexing, target as base
   end match
+  db offset
+  dw source
 end macro
 
 ; Move from a 32-bit immediate value, to a location relative to a 64-bit
@@ -436,17 +437,17 @@ end macro
 ; us an operand size of 32 by default. [Intel] volume 2D, section B.1.43,
 ; table B-6. This is what we want, so we leave it.
 macro mov.qreg.disp8.dimm target, offset, source
+  qwordreg treg, target
+  db 0xC7
+  modrm 1, 0, treg
+    ; the 0 is part of the opcode
   match =rsp, target
-    db 0xC7
-    modrm 1, 0, 4
-      ; 4 is rsp, but it's a special case
-    sib 0, 4, 4
-      ; no scaling, no indexing, rsp as base
-    db offset
-    dd source
-  else
-    assert 0
+    ; R/M = rsp is the SIB case
+    sib 0, 4, treg
+      ; no scaling, no indexing, target as base
   end match
+  db offset
+  dd source
 end macro
 
 ; Move from a 64-bit register, to a 64-bit location relative to a 64-bit
@@ -460,20 +461,15 @@ end macro
 macro mov.qreg.disp8.qreg target, offset, source
   qwordreg sreg, source
   qwordreg treg, target
+  rex.w
+  db 0x89
+  modrm 1, sreg, treg
   match =rsp, target
-    rex.w
-    db 0x89
-    modrm 1, sreg, treg
-      ; treg is rsp by assumption, and R/M = rsp is the SIB case
+    ; R/M = rsp is the SIB case
     sib 0, 4, 4
       ; no scaling, no indexing, rsp as base
-    db offset
-  else
-    rex.w
-    db 0x89
-    modrm 1, sreg, treg
-    db offset
   end match
+  db offset
 end macro
 
 ; Move from a 64-bit register, to a 64-bit location relative to a 64-bit
@@ -513,17 +509,24 @@ end macro
 ; section B.1.43, table B-6. We want a 64-bit operand, so we use the REX.W
 ; prefix, 0x48.
 macro mov.qreg.disp8.dimm target, offset, source
+  qwordreg treg, target
   match =rsp, target
     rex.w
     db 0xC7
-    modrm 1, 0, 4
+    modrm 1, 0, treg
+      ; the 0 is part of the opcode
       ; 4 is rsp, but it's a special case
-    sib 0, 4, 4
+    sib 0, 4, treg
       ; no scaling, no indexing, rsp as base
     db offset
     dd source
   else
-    assert 0
+    rex.w
+    db 0xC7
+    modrm 1, 0, treg
+      ; the 0 is part of the opcode
+    db offset
+    dd source
   end match
 end macro
 
@@ -595,7 +598,7 @@ end macro
 ; without is more compact, so we do without.
 macro pop.qreg target
   qwordreg treg, target
-  opcodereg 0x58, target
+  opcodereg 0x58, treg
 end macro
 
 
@@ -841,8 +844,8 @@ code_start:
 ;;; * DF = 0 is required
 ;;;
 macro NEXT
-  ; Copy the next word's address from *rsi into rax. Increment the stack
-  ; pointer (as per the DF flag).
+  ; Copy the next word's address from *rsi into rax. Increment rsi (as per the
+  ; DF flag).
   lodsq
 
   ; Load the codeword from the word's contents, and jump to the interpreter it
@@ -1121,6 +1124,8 @@ _start:
 ;;; used this one time, so we just put it right here.
 ;;;
 cold_start:
+;;; TODO this is probably where we should deal with that HEAP that we passed
+;;; on the stack
   dq QUIT
 
 ;;;
@@ -1129,15 +1134,42 @@ cold_start:
 ;;;
 QUIT:
   dq DOCOL                       ; codeword
-  dq OLD_CODE
-  ; TODO this is more like what it should be
-  ; Although we initialized rbp already, we do so again because we'll want
-  ; that on subsequent visits to this word - it's the main thing it's for.
+
+  ;;;
+  ;;; Although we initialized rbp already, we do so again because we'll want
+  ;;; that on subsequent visits to this word - it's the main thing it's for.
+  ;;; Keep in mind that rsi is the actual "instruction pointer", and we're
+  ;;; leaving it unchanged, we just get rid of everything above it.
+  ;;;
   ;dq R0, CONTROL!                ; overwrite rbp to reset the control stack
-    ; Keep in mind that rsi is the actual "instruction pointer", and we're
-    ; leaving it unchanged, we just get rid of everything above it.
+    ; TODO though the implementation of R0 is trivial, it depends on where we
+    ; put the heap, so it can't be hardcoded, we'll have to build it in RAM.
+    ; the same therefore goes for anything that needs to call it. so we can't
+    ; call it here, now, yet - we have prep work to do first
+
+  ;;;
+  ;;; Do the read-eval-print-loop, which is the main body of the Forth
+  ;;; interpreter.
+  ;;;
   ;dq INTERPRET                   ; run the repl
-  ;dq BRANCH, QUIT - $            ; if the repl ever exits, start again
+  dq QUINE
+
+  ;;;
+  ;;; If the repl ever exits, do it all again.
+  ;;;
+  ;dq BRANCH, QUIT - $
+
+
+END_PROCESS:
+  dq $ + 0x8                     ; codeword
+  mov.b rax, 60                  ; syscall number
+  mov.b rdi, 0                   ; exit code
+  syscall
+
+QUINE:
+  dq DOCOL                       ; codeword
+  dq OLD_CODE
+  dq END_PROCESS
 
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -1154,51 +1186,52 @@ QUIT:
 ;;; which refers to how many bytes have actually been populated, not to the
 ;;; size of the buffer.
 ;;;
-;;; Register usage:
+;;; Registers within:
 ;;;
 ;;; * rdx holds the total used file size so far. During hand-off between
 ;;;   chunks, this size must be equal to the buffer size; within a chunk it
 ;;;   may be less.
 ;;;
-;;; * ~~~~rsp points to the bottom of the buffer.~~~~
-;;; TODO WRONG this is all just scribbling where it shouldn't
+;;; * rcx points to the bottom of the buffer.
 ;;;
 OLD_CODE:
   dq $ + 0x8                               ; The codeword
 
+  mov.qreg.qreg rcx, rdi
+  add.qreg.dimm rcx, control_stack_size + 0x18
+
   mov.dreg.dimm rdx, 0                     ; store running file size here
-  ;sub.qreg.bimm rsp, 0xFF                  ; reserve stack space
 
   ;;;
   ;;; ELF header
   ;;;
-  mov.qreg.disp8.dimm rsp, 0x00, 0x7F bappend "ELF"  ; magic number
-  mov.qreg.disp8.bimm rsp, 0x04, 2         ; 64-bit
-  mov.qreg.disp8.bimm rsp, 0x05, 1         ; little-endian
-  mov.qreg.disp8.bimm rsp, 0x06, 1         ; ELF header format version 1
-  mov.qreg.disp8.bimm rsp, 0x07, 0         ; System-V ABI
-  mov.qreg.disp8.dimm rsp, 0x08, 0         ; (padding)
+  mov.qreg.disp8.dimm rcx, 0x00, 0x7F bappend "ELF"  ; magic number
+  mov.qreg.disp8.bimm rcx, 0x04, 2         ; 64-bit
+  mov.qreg.disp8.bimm rcx, 0x05, 1         ; little-endian
+  mov.qreg.disp8.bimm rcx, 0x06, 1         ; ELF header format version 1
+  mov.qreg.disp8.bimm rcx, 0x07, 0         ; System-V ABI
+  mov.qreg.disp8.dimm rcx, 0x08, 0         ; (padding)
 
-  mov.qreg.disp8.wimm rsp, 0x10, 2         ; executable
-  mov.qreg.disp8.wimm rsp, 0x12, 0x3E      ; Intel x86-64
-  mov.qreg.disp8.dimm rsp, 0x14, 1         ; ELF format version
+  mov.qreg.disp8.wimm rcx, 0x10, 2         ; executable
+  mov.qreg.disp8.wimm rcx, 0x12, 0x3E      ; Intel x86-64
+  mov.qreg.disp8.dimm rcx, 0x14, 1         ; ELF format version
 
   ; Compute the entry pointer.
   mov.qreg.qimm rax, _start                ; the offset of _start
     ; This includes the origin, intentionally.
-  mov.qreg.disp8.qreg rsp, 0x18, rax       ; entry point
+  mov.qreg.disp8.qreg rcx, 0x18, rax       ; entry point
 
-  mov.qreg.disp8.dimm rsp, 0x20, 64        ; program header offset
+  mov.qreg.disp8.dimm rcx, 0x20, 64        ; program header offset
     ; We place the program header immediately after the ELF header. This
     ; offset is from the start of the file.
-  mov.qreg.disp8.dimm rsp, 0x28, 0         ; section header offset
-  mov.qreg.disp8.dimm rsp, 0x30, 0         ; processor flags
-  mov.qreg.disp8.wimm rsp, 0x34, 64        ; ELF header size
-  mov.qreg.disp8.wimm rsp, 0x36, 56        ; program header entry size
-  mov.qreg.disp8.wimm rsp, 0x38, 1         ; number of program header entries
-  mov.qreg.disp8.wimm rsp, 0x3a, 0         ; section header entry size
-  mov.qreg.disp8.wimm rsp, 0x3c, 0         ; number of section header entries
-  mov.qreg.disp8.wimm rsp, 0x3e, 0         ; section name string table index
+  mov.qreg.disp8.dimm rcx, 0x28, 0         ; section header offset
+  mov.qreg.disp8.dimm rcx, 0x30, 0         ; processor flags
+  mov.qreg.disp8.wimm rcx, 0x34, 64        ; ELF header size
+  mov.qreg.disp8.wimm rcx, 0x36, 56        ; program header entry size
+  mov.qreg.disp8.wimm rcx, 0x38, 1         ; number of program header entries
+  mov.qreg.disp8.wimm rcx, 0x3a, 0         ; section header entry size
+  mov.qreg.disp8.wimm rcx, 0x3c, 0         ; number of section header entries
+  mov.qreg.disp8.wimm rcx, 0x3e, 0         ; section name string table index
 
   ; Add the size of the ELF header to the running total
   mov.dreg.dimm rax, 0x40
@@ -1207,18 +1240,18 @@ OLD_CODE:
   ;;;
   ;;; Program header
   ;;;
-  mov.qreg.disp8.dimm rsp, 0x40, 1         ; "loadable" segment type
-  mov.qreg.disp8.dimm rsp, 0x44, 0x05      ; read+execute permission
-  mov.qreg.disp8.dimm rsp, 0x48, 0         ; offset in file
-  mov.qreg.disp8.dimm rsp, 0x50, $$        ; virtual address
+  mov.qreg.disp8.dimm rcx, 0x40, 1         ; "loadable" segment type
+  mov.qreg.disp8.dimm rcx, 0x44, 0x05      ; read+execute permission
+  mov.qreg.disp8.dimm rcx, 0x48, 0         ; offset in file
+  mov.qreg.disp8.dimm rcx, 0x50, $$        ; virtual address
     ; required, but can be anything, subject to alignment
-  mov.qreg.disp8.dimm rsp, 0x58, 0         ; physical address (ignored)
+  mov.qreg.disp8.dimm rcx, 0x58, 0         ; physical address (ignored)
 
   ; Fill in 0 as the file size for now, to avoid unitialized memory.
-  mov.qreg.disp8.dimm rsp, 0x60, 0         ; size in file
-  mov.qreg.disp8.dimm rsp, 0x68, 0         ; size in memory
+  mov.qreg.disp8.dimm rcx, 0x60, 0         ; size in file
+  mov.qreg.disp8.dimm rcx, 0x68, 0         ; size in memory
 
-  mov.qreg.disp8.dimm rsp, 0x70, 0         ; segment alignment
+  mov.qreg.disp8.dimm rcx, 0x70, 0         ; segment alignment
     ; for relocation, but this doesn't apply to us
 
   ; Add the size of the program header to the running total
@@ -1241,37 +1274,42 @@ OLD_CODE:
   ;;; Go back and fill in the file size now that we know it (ill-gotten
   ;;; knowledge though it is).
   ;;;
-  mov.qreg.disp8.qreg rsp, 0x60, rdx       ; size in file
-  mov.qreg.disp8.qreg rsp, 0x68, rdx       ; size in memory
+  mov.qreg.disp8.qreg rcx, 0x60, rdx       ; size in file
+  mov.qreg.disp8.qreg rcx, 0x68, rdx       ; size in memory
 
   ;;;
   ;;; The buffer is ready; output the file.
   ;;;
 
   ; write() from stack-allocated buffer
+  ; we have real stuff using rsi now, so don't forget to save and restore it
+  ; also rcx is the only "low" register the kernel doesn't preserve
+  push.qreg rsi
+  push.qreg rcx
   mov.b rax,1
   mov.qreg.qimm rdi, 1
-  mov.qreg.qreg rsi, rsp
+  mov.qreg.qreg rsi, rcx
   mov.qreg.qimm rdx, 0x78
   syscall
+  pop.qreg rcx
+  pop.qreg rsi
 
   ; write() the machine code by using self-reference
+  ; we have real stuff using rsi now, so don't forget to save and restore it
+  ; also rcx is the only "low" register the kernel doesn't preserve
   ;
   ; TODO do this in a "real" quine way
+  push.qreg rsi
+  push.qreg rcx
   mov.b rax, 1
   mov.qreg.qimm rdi, 1
   mov.qreg.qimm rsi, elf_header + 0x78
   mov.qreg.qimm rdx, file_size - 0x78
   syscall
+  pop.qreg rcx
+  pop.qreg rsi
 
-  ;;;
-  ;;; Clean up.
-  ;;;
-
-  ; exit()
-  mov.b rax, 60
-  mov.b rdi, 0
-  syscall
+  NEXT
 
 code_size = $ - code_start
 file_size = $ - $$
author	Irene Knapp <ireneista@irenes.space>	2025-10-20 00:11:07 -0700
committer	Irene Knapp <ireneista@irenes.space>	2025-10-20 00:41:11 -0700
commit	66cdcc19b820a871c5eca435bbabf958a09014a9 (patch)
tree	f3148156972c655fdfcf6d4ee1f3ea846a995bcf
parent	5d8318f323c400c86d1e999c86275beea156e768 (diff)