summary refs log tree commit diff
path: root/quine.asm
diff options
context:
space:
mode:
authorIrene Knapp <ireneista@irenes.space>2025-10-29 01:12:51 -0700
committerIrene Knapp <ireneista@irenes.space>2025-10-29 01:12:51 -0700
commitdf5928294f9154f317d2a2352bae2c1bc46848d9 (patch)
tree4181cbb675c1090c5bed9261f629d09c892f3d8a /quine.asm
parenta2dbddffcfc70e2ab84173b7eb7e24ac09f5dfe2 (diff)
the two-pass magick works!!!!!
yessssssss

that was a lot of debugging, wow. there were two bugs in UNROLL: the source and destination for the block move were set to the high end instead of the low end; and the encoding of an lea variant fell into an unsupported case but was emitted anyway due to an incorrect guard clause

that was seriously a lot of debugging. also all the stack logic for grabbing and stashing the label stuff was wrong several times, but it works now

the original "old code" is now fully eliminated, wow!

Force-Push: yeah
Change-Id: I85d951eabc7de6ba502e2a1aa0f5998f9b399765
Diffstat (limited to 'quine.asm')
-rw-r--r--quine.asm473
1 files changed, 291 insertions, 182 deletions
diff --git a/quine.asm b/quine.asm
index 6ab7da4..2d9f6eb 100644
--- a/quine.asm
+++ b/quine.asm
@@ -152,6 +152,12 @@ macro opcodereg opcode, reg
   db opcode or reg
 end macro
 
+macro opcodecc opcode, cc
+  assert opcode >= 0 & opcode < 256 & opcode and 15 = 0
+  assert cc >= 0 & cc < 16
+  db opcode or cc
+end macro
+
 macro scalefield sfield, scale
   if 1 = scale
     sfield = 0
@@ -166,6 +172,77 @@ macro scalefield sfield, scale
   end if
 end macro
 
+; Yep, there sure is a lot of duplication in these. This is based on Intel's
+; documented mnemonics...
+;
+; "Above" and "below" are for unsigned comparisons. "Greater" and "less" are
+; for signed comparisons.
+macro conditioncode cc, condition
+  match =above, condition
+    cc = 0x07
+  else match =above.equal, condition
+    cc = 0x03
+  else match =below, condition
+    cc = 0x02
+  else match =below.equal, condition
+    cc = 0x06
+  else match =carry, condition
+    cc = 0x02
+  else match =equal, condition
+    cc = 0x04
+  else match =greater, condition
+    cc = 0x0F
+  else match =greater.equal, condition
+    cc = 0x0D
+  else match =less, condition
+    cc = 0x0C
+  else match =less.equal, condition
+    cc = 0x0E
+  else match =not.above, condition
+    cc = 0x06
+  else match =not.above.equal, condition
+    cc = 0x02
+  else match =not.below, condition
+    cc = 0x03
+  else match =not.below.equal, condition
+    cc = 0x07
+  else match =not.carry, condition
+    cc = 0x03
+  else match =not.equal, condition
+    cc = 0x05
+  else match =not.greater, condition
+    cc = 0x0E
+  else match =not.greater.equal, condition
+    cc = 0x0C
+  else match =not.less, condition
+    cc = 0x0D
+  else match =not.less.equal, condition
+    cc = 0x0F
+  else match =not.overflow, condition
+    cc = 0x01
+  else match =not.parity, condition
+    cc = 0x0B
+  else match =not.sign, condition
+    cc = 0x09
+  else match =not.zero, condition
+    cc = 0x05
+  else match =overflow, condition
+    cc = 0x00
+  else match =parity, condition
+    cc = 0x0A
+  else match =parity.even, condition
+    cc = 0x0A
+  else match =parity.odd, condition
+    cc = 0x0B
+  else match =sign, condition
+    cc = 0x08
+  else match =zero, condition
+    cc = 0x04
+  else
+    assert 0
+  end match
+end macro
+
 
 ;;; On registers
 ;;; ------------
@@ -288,6 +365,10 @@ macro owordreg result, register
 end macro
 
 
+;;; Instructions
+;;; ------------
+
+
 ; TODO what register size does this use?
 macro mov.b target, source
   match =rax?, target
@@ -649,6 +730,23 @@ macro add.indirect.qreg.qreg target, source
 end macro
 
 
+macro add.qreg.indirect.qreg target, source
+  match =rsp, source
+    assert 0
+    ; The SIB case.
+  else match =rbp, source
+    assert 0
+    ; An unrelated addressing mode
+  else
+    qwordreg treg, target
+    qwordreg sreg, source
+    rex.w
+    db 0x03
+    modrm 0, treg, sreg
+  end match
+end macro
+
+
 ; This adds a signed 8-bit immediate value to a 64-bit register, in place.
 ;
 ; Notice the use of 3 as the addressing mode. This says to use the register
@@ -801,7 +899,7 @@ macro not.qreg target
 end macro
 
 
-; This sets the flags to the same things they'd be set by if subtracting
+; This sets the flags to the same things they'd be set to if subtracting
 ; right from left.
 macro cmp.qreg.qreg left, right
   qwordreg lreg, left
@@ -811,81 +909,25 @@ macro cmp.qreg.qreg left, right
   modrm 3, lreg, rreg
 end macro
 
+; This sets the flags to the same things they'd be set to if AND'ing right
+; with left.
+macro test.qreg.qreg left, right
+  qwordreg lreg, left
+  qwordreg rreg, right
+  rex.w
+  db 0x85
+  modrm 3, rreg, lreg
+end macro
 
-; Yep, there sure is a lot of duplication in these. This is based on Intel's
-; documented mnemonics...
-;
-; "Above" and "below" are for unsigned comparisons. "Greater" and "less" are
-; for signed comparisons.
 macro set.breg.cc target, condition
   bytereg treg, target
+  conditioncode cc, condition
   db 0x0F
-  match =above, condition
-    db 0x97
-  else match =above.equal, condition
-    db 0x93
-  else match =below, condition
-    db 0x92
-  else match =below.equal, condition
-    db 0x96
-  else match =carry, condition
-    db 0x92
-  else match =equal, condition
-    db 0x94
-  else match =greater, condition
-    db 0x9F
-  else match =greater.equal, condition
-    db 0x9D
-  else match =less, condition
-    db 0x9C
-  else match =less.equal, condition
-    db 0x9E
-  else match =not.above, condition
-    db 0x96
-  else match =not.above.equal, condition
-    db 0x92
-  else match =not.below, condition
-    db 0x93
-  else match =not.below.equal, condition
-    db 0x97
-  else match =not.carry, condition
-    db 0x93
-  else match =not.equal, condition
-    db 0x95
-  else match =not.greater, condition
-    db 0x9E
-  else match =not.greater.equal, condition
-    db 0x9C
-  else match =not.less, condition
-    db 0x9D
-  else match =not.less.equal, condition
-    db 0x9F
-  else match =not.overflow, condition
-    db 0x91
-  else match =not.parity, condition
-    db 0x9B
-  else match =not.sign, condition
-    db 0x99
-  else match =not.zero, condition
-    db 0x95
-  else match =overflow, condition
-    db 0x90
-  else match =parity, condition
-    db 0x9A
-  else match =parity.even, condition
-    db 0x9A
-  else match =parity.odd, condition
-    db 0x9B
-  else match =sign, condition
-    db 0x98
-  else match =zero, condition
-    db 0x94
-  else
-    assert 0
-  end match
+  opcodecc 0x90, cc
   modrm 3, 0, treg
 end macro
 
+
 ; Move from an 8-bit immediate value, to a location relative to a 64-bit
 ; register, with an 8-bit displacement and no indexing.
 ;
@@ -965,7 +1007,7 @@ macro mov.qreg.disp8.qreg target, offset, source
   rex.w
   db 0x89
   modrm 1, sreg, treg
-  match =rsp, target
+  match =rsp, source
     ; R/M = rsp is the SIB case
     sib 0, 4, 4
       ; no scaling, no indexing, rsp as base
@@ -1035,32 +1077,32 @@ end macro
 ;  indexed addressing, with an 8-bit displacement and no indexing, but instead
 ; of doing anything with the memory, just store the address itself into a
 ; register.
-macro lea.qreg.qreg.disp8 target, offset, source
-  match =rsp, target
-    ; This is the SIB case
-    assert 0
-  else
-    qwordreg treg, target
-    qwordreg sreg, source
-    rex.w
-    db 0x8D
-    modrm 1, treg, sreg
-    db offset
+macro lea.qreg.disp8.qreg target, offset, source
+  qwordreg treg, target
+  qwordreg sreg, source
+  rex.w
+  db 0x8D
+  modrm 1, treg, sreg
+  match =rsp, source
+    ; R/M = rsp is the SIB case
+    sib 0, 4, sreg
+      ; no scaling, no indexing, rsp as base
   end match
+  db offset
 end macro
 
-macro lea.qreg.qreg.disp32 target, source, offset
-  match =rsp, target
-    ; This is the SIB case
-    assert 0
-  else
-    qwordreg treg, target
-    qwordreg sreg, source
-    rex.w
-    db 0x8D
-    modrm 2, treg, sreg
-    dd offset
+macro lea.qreg.disp32.qreg target, offset, source
+  qwordreg treg, target
+  qwordreg sreg, source
+  rex.w
+  db 0x8D
+  modrm 2, treg, sreg
+  match =rsp, source
+    ; R/M = rsp is the SIB case
+    sib 0, 4, sreg
+      ; no scaling, no indexing, rsp as base
   end match
+  dd offset
 end macro
 
 macro lea.qreg.indexed.qreg target, source, index, scale
@@ -1249,6 +1291,23 @@ macro jmp.rel.dimm location
   dd location
 end macro
 
+; The location is relative to the start of the instruction immediately
+; following the jmp.
+macro jmp.cc.rel.bimm condition, location
+  conditioncode cc, condition
+  opcodecc 0x70, cc
+  db location
+end macro
+
+; The location is relative to the start of the instruction immediately
+; following the jmp.
+macro jmp.cc.rel.dimm condition, location
+  conditioncode cc, condition
+  db 0x0F
+  opcodecc 0x70, cc
+  dd location
+end macro
+
 ; Invoke a system call provided by the kernel. On Linux, the System V ABI
 ; describes the semantics of such calls (at least, on x86).
 macro syscall
@@ -1570,13 +1629,13 @@ end macro
 ;;; * rbp points to the top of the control stack.
 ;;;
 macro PUSHCONTROL source
-  lea.qreg.qreg.disp8 rbp, -8, rbp
+  lea.qreg.disp8.qreg rbp, -8, rbp
   mov.indirect.qreg.qreg rbp, source
 end macro
 
 macro POPCONTROL target
   mov.qreg.indirect.qreg target, rbp
-  lea.qreg.qreg.disp8 rbp, 8, rbp
+  lea.qreg.disp8.qreg rbp, 8, rbp
 end macro
 
 ;;;
@@ -1711,7 +1770,7 @@ _start:
   ;;; your home. See below for a little more thought about why here in
   ;;; particular.
   ;;;
-  lea.qreg.qreg.disp32 rbp, rdi, control_stack_size
+  lea.qreg.disp32.qreg rbp, control_stack_size, rdi
 
   ;;;
   ;;;   Now we save some stuff onto the heap. These are the locations that
@@ -1727,7 +1786,7 @@ _start:
   mov.qreg.disp32.qreg rdi, control_stack_size + 0x00, rdi    ; HEAP
   mov.qreg.disp32.qreg rdi, control_stack_size + 0x08, rsp    ; S0
   mov.qreg.disp32.qreg rdi, control_stack_size + 0x10, rbp    ; R0
-  lea.qreg.qreg.disp32 rax, rdi, control_stack_size + 0x20
+  lea.qreg.disp32.qreg rax, control_stack_size + 0x20, rdi
   mov.qreg.disp32.qreg rdi, control_stack_size + 0x18, rax    ; HERE
   ; TODO also consider LATEST and STATE
   ; strictly speaking, R0 could be a constant... but it isn't known until
@@ -2042,10 +2101,10 @@ defword UNROLL, 0
   ; accordingly.
   push.qreg rsi
 
-  ;   When rcx is 1, we want rsp + 16.
-  lea.qreg.disp8.indexed.qreg rsi, 8, rsp, rcx, 8
-  ;   When rcx is 1, we want rsp + 8.
-  lea.qreg.indexed.qreg rdi, rsp, rcx, 8
+  ;   Regardless of rcx, we want rsp + 16.
+  lea.qreg.disp8.qreg rsi, 16, rsp
+  ;   Regardless of rcx, we want rsp + 8.
+  lea.qreg.disp8.qreg rdi, 8, rsp
 
   ;   With ROLL, we were starting at the high end. Here, we start at the low
   ; end, which means we need rsi to increment after each repetition. That's
@@ -2335,6 +2394,31 @@ defword FETCH32, 0
   mov.dreg.indirect.qreg eax, rbx
   NEXT
 
+;;;;;;;;;;;;;;;;;
+;;; Branching ;;;
+;;;;;;;;;;;;;;;;;
+
+;   This takes a number of bytes, not machine words. That allows it to be used
+; for putting weird things embedded in the code.
+;
+;   The offset is relative to the start of the word the number of bytes is in,
+; so, make sure to have it skip itself.
+defword BRANCH, 0
+  dq $ + 0x8                     ; codeword
+  add.qreg.indirect.qreg rsi, rsi
+  NEXT
+
+; This should probably be 0BRANCH, but right now the auto-label code is picky.
+defword ZBRANCH, 0
+  dq $ + 0x8                     ; codeword
+  pop.qreg rax
+  test.qreg.qreg rax, rax
+  ; Please notice the 8-bit branch to the nearby word.
+  jmp.cc.rel.bimm zero, BRANCH + 8 - zbranch_after_jmp
+zbranch_after_jmp:
+  lodsq                          ; just a convenient way to skip rsi forward
+  NEXT
+
 ;;;
 ;;;   One of the most charming naming traditions in Forth is that the
 ;;; top-level word that stays running forever, is called "quit".
@@ -2484,15 +2568,40 @@ defword QUINE, 0
   ; ... and now we have allocated a block of memory, with its address on the
   ; stack. We also still have HEAP at the bottom of the stack, for future use.
 
-  ; This takes a buffer's address on the stack and adds an ELF file header to
-  ; it, leaving nothing on the stack afterwards.
-  dq DUP, ELF_FILE_HEADER, ELF_PROGRAM_HEADER, DROP
-
-  ; This takes a buffer's address on the stack, skips an ELF file header based
-  ; on hardcoded size, appends an ELF program header, then finishes by pushing
-  ; the length of the part of the buffer that has now been used. Thus we don't
-  ; need to care about how it internally uses registers.
-  dq DUP, OLD_CODE, SWAP
+  ; We have one label, and three pieces of information about it: Guessed value,
+  ; actual value, and status. We keep them on the stack in this order, from
+  ; top to bottom: guess, actual, status. Above that, at the actual top of
+  ; the stack, we have a mutable copy of the buffer's address.
+  ;
+  ; Status is a bit field:
+  ;    bit zero is whether it was used before being defined
+  ;    bit one is whether it's been defined
+  ;    bit two is whether the guessed value wound up equaling the actual value
+  dq DUP, LIT, 0, LIT, 0, LIT, 0, LIT, 4, ROLL
+
+  ; This takes an address to write to on the stack and adds an ELF file header
+  ; to it, leaving the adjusted address with the size of the header added.
+  ; Then it does the same thing with an ELF program header.
+  dq ELF_FILE_HEADER, ELF_PROGRAM_HEADER
+
+  ; The two-pass magick.
+  dq LIT, file_size - 0x78, ADD
+  dq SET_LABEL
+  dq DROP, LIT, 4, ROLL, DUP, LIT, 5, UNROLL
+  dq ELF_FILE_HEADER, ELF_PROGRAM_HEADER
+
+  ; Drop the copy of the buffer's address.
+  dq DROP
+
+  ; Drop the label data.
+  dq DROP, DROP, DROP
+
+  ; This takes a buffer's address on the stack, skips an ELF file header and
+  ; program header based on hardcoded size, computes an offset (secretly
+  ; hardcoded), and writes that offset into an appopriate place in the middle
+  ; of those headers. It then returns the length of the used portion of the
+  ; buffer.
+  dq LIT, 0x78, SWAP
 
   ; write() from stack-allocated buffer
   dq SYS_WRITE
@@ -2503,6 +2612,76 @@ defword QUINE, 0
   dq SYS_WRITE
 
   dq EXIT
+
+; Stack in:
+;   output memory start
+;   label actual value
+;   label guessed value
+;   label status
+;   output memory current point
+; Stack out:
+;   output memory start
+;   label actual value
+;   label guessed value
+;   label status (potentially modified)
+;   output memory current point
+;   label value for caller to use
+defword USE_LABEL, 0
+  dq DOCOL
+
+  ; Fetch the status
+  dq SWAP
+  ; Check the bit that indicates it's been set.
+  dq DUP, LIT, 2, AND, ZBRANCH, 12*8
+
+  ; If we're here, it has been set already, so just put the status back...
+  dq LIT, 2, UNROLL
+  ; Fetch the actual value...
+  dq LIT, 4, ROLL, DUP, LIT, 5, UNROLL
+  ; ... and exit
+  dq EXIT
+
+  ; If we're here, it hasn't been set yet, so mark it used-before-set.
+  dq LIT, 1, OR
+  ; Put the status back...
+  dq SWAP
+  ; Fetch the guessed value...
+  dq LIT, 3, ROLL, DUP, LIT, 4, UNROLL
+  ; ... and exit
+  dq EXIT
+
+; Stack in:
+;   output memory start
+;   label actual value (not yet set)
+;   label guessed value
+;   label status
+;   output memory current point
+; Stack out:
+;   output memory start
+;   label actual value (now set)
+;   label guessed value
+;   label status (modified)
+;   output memory current point
+defword SET_LABEL, 0
+  dq DOCOL
+
+  ; Compute the current offset, to use as the actual value
+  dq DUP, LIT, 6, ROLL, DUP, LIT, 7, UNROLL, SUB
+
+  ; Overwrite the old actual value; keep a copy
+  dq LIT, 5, ROLL, DROP, DUP, LIT, 5, UNROLL
+
+  ; Check equality with the guessed value
+  dq LIT, 4, ROLL, DUP, LIT, 5, UNROLL, EQ
+
+  ; We don't need to branch. Now we mark the status as having been defined,
+  ; and we also set bit 2 if appropriate.
+  dq LIT, 4, MUL
+  dq LIT, 3, ROLL, OR, LIT, 2, OR, LIT, 2, UNROLL
+
+  dq EXIT
+
+
 defword HLT, 0
   dq $ + 0x8                     ; codeword
   hlt
@@ -2570,87 +2749,17 @@ defword ELF_PROGRAM_HEADER, 0
   dq LITPACK64, 0                          ; offset in file
   dq LITPACK64, $$                         ; virtual address
     ; required, but can be anything, subject to alignment
-  dq LITPACK16, 0                          ; physical address (ignored)
+  dq LITPACK64, 0                          ; physical address (ignored)
 
   ; Fill in 0 as the file size for now, to avoid unitialized memory.
-  dq LITPACK64, 0                          ; size in file
-  dq LITPACK64, 0                          ; size in memory
+  dq USE_LABEL, PACK64                     ; size in file
+  dq USE_LABEL, PACK64                     ; size in memory
 
   dq LITPACK64, 0                          ; segment alignment
     ; for relocation, but this doesn't apply to us
 
   dq EXIT
 
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;; (old) Implementation strategy ;;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;
-;;;   We assemble the entire file contents in a stack-allocated buffer.
-;;; We avoid using the stack for any other purpose. When the file is fully
-;;; assembled, we output it.
-;;;
-;;;   The assembly proceeds in several chunks - ELF header, program header,
-;;; etc. Each chunk extends the buffer as per its own needs, by adjusting
-;;; the stack pointer. All chunks also update a running total file size,
-;;; which refers to how many bytes have actually been populated, not to the
-;;; size of the buffer.
-;;;
-;;; Stack in:
-;;; * Top: The address of a block of memory to use.
-;;;
-;;; Stack out:
-;;; * Top: The length of the data that was written to the provided memory.
-;;;
-;;; Registers within:
-;;;
-;;; * rdx holds the total used file size so far. During hand-off between
-;;;   chunks, this size must be equal to the buffer size; within a chunk it
-;;;   may be less.
-;;;
-;;; * rcx points to the bottom of the buffer.
-;;;
-defword OLD_CODE, 0
-  dq $ + 0x8                               ; codeword
-
-  pop.qreg rcx                             ; our parameter - a block of memory
-  mov.dreg.dimm rdx, 0                     ; store running file size here
-
-  ; Add the size of the ELF header to the running total
-  mov.dreg.dimm rax, 0x40
-  add.qreg.qreg rdx, rax
-
-  ; Add the size of the program header to the running total
-  mov.dreg.dimm rax, 0x38
-  add.qreg.qreg rdx, rax
-
-  ;;; Hardcode the size of the actual code chunk based on flatassembler's
-  ;;; label calculations, since we don't yet have a way to generate it from
-  ;;; within our code.
-  ;;;
-  ;;; Originally this was a constant number, to discourage reliance on label
-  ;;; math, but the direction things are growing in now is to implement
-  ;;; general label math ourselves, so that's okay.
-  ;;;
-  ;;; TODO of course, really we want to for-real compute this at runtime
-  mov.qreg.qimm rax, code_size
-  add.qreg.qreg rdx, rax
-
-  ;;;
-  ;;; Go back and fill in the file size now that we know it (ill-gotten
-  ;;; knowledge though it is).
-  ;;;
-  mov.qreg.disp8.qreg rcx, 0x60, rdx       ; size in file
-  mov.qreg.disp8.qreg rcx, 0x68, rdx       ; size in memory
-
-  ;;;
-  ;;; The buffer is ready; push its length on the value stack, so our caller
-  ;;; can handle write()ing it out.
-  ;;;
-  push.dimm 0x78
-
-  NEXT
-
 code_size = $ - code_start
 file_size = $ - $$