heap allocation now works! also fix some encoding issues

implement indirect adds and subtracts; it turns out they're convenient for heap management the operand order for lea.qreg.qreg.disp32 didn't match what the name suggested, so it's fixed now the semantics of push.dimm are quite surprising, for reasons that are now extensively documented also add notes on helpful ways to use gdb Force-Push: yes Change-Id: I717eb6427520e2c336586d4e8066ef172d97ce55
author: Irene Knapp <ireneista@irenes.space> 2025-10-26 01:01:08 -0700
committer: Irene Knapp <ireneista@irenes.space> 2025-10-26 01:01:08 -0700
commit: 48bb1a77f3232e365f1becbb5313fb3240c8dc77 (patch)
tree: b3247c216dcde35821813633ccc88c54a078576a /quine.asm
parent: fb564c85813f7ccf35e321af939cdb3328f6c18b (diff)
1 files changed, 146 insertions, 24 deletions
diff --git a/quine.asm b/quine.asm
index f44f608..a25e1c5 100644
--- a/quine.asm
+++ b/quine.asm
@@ -13,11 +13,11 @@
 ;;; Currently, this is not yet fully self-hosting; it is based on
 ;;; flatassembler[1]. A minimal command to build and run it is:
 ;;;
-;;; fasmg quine.asm quine && chmod 755 quine && ./quine; echo $?
+;;; $ fasmg quine.asm quine && chmod 755 quine && ./quine; echo $?
 ;;;
 ;;; A workflow you may wish to use for debugging is:
 ;;;
-;;; rm quine2; fasmg quine.asm quine && chmod 755 quine && ./quine > quine2; echo "exit code:" $?; echo; hexdump -C quine; echo; hexdump -C quine2; echo; cmp -l quine quine2 ; echo cmp: $?
+;;; $ rm quine2; fasmg quine.asm quine && chmod 755 quine && ./quine > quine2; echo "exit code:" $?; echo; hexdump -C quine; echo; hexdump -C quine2; echo; cmp -l quine quine2 ; echo cmp: $?
 ;;;
 ;;; The reason this removes the old one first is that otherwise, there's a
 ;;; risk the error message will be scrolled off the top of the screen and
@@ -25,12 +25,34 @@
 ;;;
 ;;; You may also wish to do:
 ;;;
-;;; objdump --disassemble quine
-;;; ZydisDisasm -64 quine
+;;; $ objdump --disassemble quine
+;;; $ ZydisDisasm -64 quine
 ;;;
 ;;; This relies on GNU binutils, and on zydis, respectively.
 ;;;
 ;;; [1] https://flatassembler.net/
+;;;
+;;;
+;;; gdb
+;;; ---
+;;;
+;;; You can run gdb on it if you want; there's no symbols, but if you are
+;;; familiar with the hex it should be readable. Keep a hexdump of the program
+;;; handy to look up what addresses are.
+;;;
+;;; If you want to see a routine implemented in assembly, look at the hexdump
+;;; of the overall file, find it by looking at the ASCII names, skip past the
+;;; codeword, and do ie
+;;;
+;;; (gdb) disassemble/r 0x0x80007c0,+32
+;;;
+;;; If you get the value of rsp out of info registers, and for example it's an
+;;; address along the lines of 0x7fffffff8650, you can do
+;;;
+;;; (gdb) x/16xg 0x7fffffff8650
+;;;
+;;; The same will work with rbp for the control stack, and don't forget that
+;;; the "instruction pointer" is rsi.
 
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -527,6 +549,23 @@ macro add.qreg.qreg target, source
 end macro
 
 
+macro add.indirect.qreg.qreg target, source
+  match =rsp, target
+    assert 0
+    ; The SIB case.
+  else match =rbp, target
+    assert 0
+    ; An unrelated addressing mode.
+  else
+    qwordreg treg, target
+    qwordreg sreg, source
+    rex.w
+    db 0x01
+    modrm 0, sreg, treg
+  end match
+end macro
+
+
 ; This adds a signed 8-bit immediate value to a 64-bit register, in place.
 ;
 ; Notice the use of 3 as the addressing mode. This says to use the register
@@ -561,6 +600,21 @@ macro sub.qreg.qreg target, source
   modrm 3, treg, sreg
 end macro
 
+
+macro sub.indirect.qreg.qreg target, source
+  match =rsp, target
+    ; The SIB case.
+    assert 0
+  else
+    qwordreg treg, target
+    qwordreg sreg, source
+    rex.w
+    db 0x2B
+    modrm 0, sreg, treg
+  end match
+end macro
+
+
 ; This subtracts a signed 8-bit immediate value from a 64-bit register, in
 ; place.
 ;
@@ -774,9 +828,9 @@ end macro
 ; register, with an 8-bit displacement and no indexing.
 ;
 ; This uses opcode 0xC7, which has w = 1. We run in 64-bit mode, so that gives
-; us an operand size of 32 bits by default. [Intel] volume 1, section 3.6.1,
-; table 3-4. We want a 16-bit operand, so we use the operand-size prefix,
-; 0x66, and we leave REX.W unset.
+; us an operand size of 32 bits by default. [Intel] volume 1, chapter 3,
+; section 3-6.1, table 3-4. We want a 16-bit operand, so we use the
+; operand-size prefix, 0x66, and we leave REX.W unset.
 ;
 ; We need to treat rsp specially because it's the SIB case, per table 2-2.
 macro mov.qreg.disp8.wimm target, offset, source
@@ -912,7 +966,7 @@ macro lea.qreg.qreg.disp8 target, offset, source
   end match
 end macro
 
-macro lea.qreg.qreg.disp32 target, offset, source
+macro lea.qreg.qreg.disp32 target, source, offset
   match =rsp, target
     ; This is the SIB case
     assert 0
@@ -957,14 +1011,42 @@ macro push.bimm source
 end macro
 
 ; Operand-size prefix makes it 16-bit.
+;
+; If you're trying to fake pushing a larger size by doing several 16-bit
+; pushes, remember to start by pushing the low end and proceed upwards.
+; [Intel] volume 1, chapter 9, section 9-2.4, "Memory Data Formats".
 macro push.wimm source
   db 0x66
   db 0x68
   dw source
 end macro
 
-; There is no 64-bit immediate push. To fake it, push the low half, then the
-; high half. [Intel] volume 1, chapter 9, section 9-2.4, "Memory Data Formats".
+; There is no 64-bit immediate push. So, can we have a push instruction that
+; pushes a 32-bit immediate value? Sort-of, but it's sign-extended to 64 bits,
+; so rsp is decremented by 8, not by 4. This is that instruction.
+;
+; You need to do a really close read of a number of things to understand why.
+; The opcode tables in [Intel] in volume 2D, appendix A, section A-3 give it
+; the d64 annotation, which per table A-1 in section A-2.5 indicates that the
+; operand size is always 64 bits and that there is no corresponding 32-bit
+; version. Yet, the actual immediate value is still only 32 bits! Direct your
+; attention to the instruction's details page, volume 2B, chapter 4, section
+; 4-3, "PUSH". The description section clearly details that the immediate may
+; be less than the operand size, which makes sense once you know it, but it
+; doesn't explictly call out that the operand size is still 64 bits here.
+;
+; In general, the size of an immediate doesn't determine operand size, as you
+; can read about in detail in [Intel] volume 1, chapter 3, section 3-6.1, with
+; particular attention to table 3-4.
+;
+; Why is this surprising, given that it's consistent with the behavior of
+; other instructions? Well, most instructions don't have such obvious
+; side-effects. It's easy to not notice the operand size disagreeing with the
+; immediate size when you'e only writing to a register, but changing the stack
+; in an unexpected way breaks things much more obviously.
+;
+; Anyway, if you really want to decrement the stack pointer by 32 bits after
+; a push, consider pushing a register.
 macro push.dimm source
   db 0x68
   dd source
@@ -1444,7 +1526,7 @@ _start:
   ;;; your home. See below for a little more thought about why here in
   ;;; particular.
   ;;;
-  lea.qreg.qreg.disp32 rbp, control_stack_size, rdi
+  lea.qreg.qreg.disp32 rbp, rdi, control_stack_size
 
   ;;;
   ;;;   Now we save some stuff onto the heap. These are the locations that
@@ -1460,7 +1542,9 @@ _start:
   mov.qreg.disp32.qreg rdi, control_stack_size + 0x00, rdi    ; HEAP
   mov.qreg.disp32.qreg rdi, control_stack_size + 0x08, rsp    ; S0
   mov.qreg.disp32.qreg rdi, control_stack_size + 0x10, rbp    ; R0
-  ; TODO also consider HERE, LATEST, and STATE
+  lea.qreg.qreg.disp32 rax, rdi, control_stack_size + 0x20
+  mov.qreg.disp32.qreg rdi, control_stack_size + 0x18, rax    ; HERE
+  ; TODO also consider LATEST and STATE
   ; strictly speaking, R0 could be a constant... but it isn't known until
   ; runtime, so we might as well make it a variable
   ;;;
@@ -1475,6 +1559,11 @@ _start:
   ;;;     The control stack also grows downwards, so this is its pysical top
   ;;;   as well. We allocate this dedicated space within the heap right here,
   ;;;   in this routine, through our choice of where to put things.
+  ;;; * HERE is the physical start of the unallocated space in the heap
+  ;;;     We allocate heap space from bottom to top, by incrementing this
+  ;;;   value. So, it would also be accurate to say that it points immediately
+  ;;;   after the physical top of the allocated space. At any rate, the
+  ;;;   address it points to is the first one that hasn't been used yet.
   ;;;
   ;;;   S0 and R0 are mostly used when we want to initialize or reinitialize
   ;;; their respective stacks - that is, discard all their contents at once.
@@ -1484,6 +1573,11 @@ _start:
   ;;; convince yourself that it only ever writes things just below the rbp
   ;;; address it receives, never right on top of it.
   ;;;
+  ;;;   Notice that HERE points immediately after itself. This is just a
+  ;;; convenience, making it the last one like that so that the concern is
+  ;;; dealt with in a single place and is easy to keep up-to-date with code
+  ;;; changes.
+  ;;;
   ;;;   A little more detail about why we offset everything by
   ;;; control_stack_size: We're carving out some space at the bottom of the
   ;;; heap - which grows low-to-high - to be the control stack - which grows
@@ -1867,6 +1961,23 @@ defword FETCH, 0
   push.qreg rax
   NEXT
 
+; Address on top, value second
+; I might have done it the other way, but this is what Jonesforth does and it
+; seems reasonable enough.
+defword ADDSTORE, 0
+  dq $ + 0x8                     ; codeword
+  pop.qreg rbx
+  pop.qreg rax
+  add.indirect.qreg.qreg rbx, rax
+  NEXT
+
+defword SUBSTORE, 0
+  dq $ + 0x8                     ; codeword
+  pop.qreg rbx
+  pop.qreg rax
+  sub.indirect.qreg.qreg rbx, rax
+  NEXT
+
 defword STORE8, 0
   dq $ + 0x8                     ; codeword
   pop.qreg rbx
@@ -2000,10 +2111,17 @@ defword SYS_WRITE, 0
 defword QUINE, 0
   dq DOCOL                       ; codeword
 
-  ; This stack-allocates a buffer, then finishes by pushing its length and
-  ; address on the value stack. Thus we don't need to care about how it
+  ; We still have HEAP on the stack. Use it to find HERE...
+  dq DUP, LIT, control_stack_size + 0x18, ADD
+  ; ... add a constant to HERE in-place, keeping a copy of the pointer ...
+  dq DUP, LIT, 0x78, SWAP, ADDSTORE
+  ; ... and now we have allocated a block of memory, with its address on the
+  ; stack. We also still have HEAP at the bottom of the stack, for future use.
+
+  ; This takes a buffer's address on the stack, populates it, then finishes by
+  ; pushing its length and address. Thus we don't need to care about how it
   ; internally uses registers.
-  dq OLD_CODE
+  dq DUP, OLD_CODE, SWAP
 
   ; write() from stack-allocated buffer
   dq SYS_WRITE
@@ -2014,6 +2132,9 @@ defword QUINE, 0
   dq SYS_WRITE
 
   dq EXIT
+defword HLT, 0
+  dq $ + 0x8                     ; codeword
+  hlt
 
 defword WRITE_SELF_RAW_H, 0
   dq $ + 0x8                     ; codeword
@@ -2038,6 +2159,12 @@ defword WRITE_SELF_RAW_H, 0
 ;;; which refers to how many bytes have actually been populated, not to the
 ;;; size of the buffer.
 ;;;
+;;; Stack in:
+;;; * Top: The address of a block of memory to use.
+;;;
+;;; Stack out:
+;;; * Top: The length of the data that was written to the provided memory.
+;;;
 ;;; Registers within:
 ;;;
 ;;; * rdx holds the total used file size so far. During hand-off between
@@ -2047,11 +2174,9 @@ defword WRITE_SELF_RAW_H, 0
 ;;; * rcx points to the bottom of the buffer.
 ;;;
 defword OLD_CODE, 0
-  dq $ + 0x8                     ; codeword
-
-  mov.qreg.qreg rcx, rdi
-  add.qreg.dimm rcx, control_stack_size + 0x18
+  dq $ + 0x8                               ; codeword
 
+  pop.qreg rcx                             ; our parameter - a block of memory
   mov.dreg.dimm rdx, 0                     ; store running file size here
 
   ;;;
@@ -2130,13 +2255,10 @@ defword OLD_CODE, 0
   mov.qreg.disp8.qreg rcx, 0x68, rdx       ; size in memory
 
   ;;;
-  ;;; The buffer is ready; push its length and address on the value stack, so
-  ;;; our caller can handle write()ing it out.
+  ;;; The buffer is ready; push its length on the value stack, so our caller
+  ;;; can handle write()ing it out.
   ;;;
-
-  push.dimm 0
   push.dimm 0x78
-  push.qreg rcx
 
   NEXT
author	Irene Knapp <ireneista@irenes.space>	2025-10-26 01:01:08 -0700
committer	Irene Knapp <ireneista@irenes.space>	2025-10-26 01:01:08 -0700
commit	48bb1a77f3232e365f1becbb5313fb3240c8dc77 (patch)
tree	b3247c216dcde35821813633ccc88c54a078576a /quine.asm
parent	fb564c85813f7ccf35e321af939cdb3328f6c18b (diff)