summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--quine.asm473
1 files changed, 291 insertions, 182 deletions
diff --git a/quine.asm b/quine.asm
index 6ab7da4..2d9f6eb 100644
--- a/quine.asm
+++ b/quine.asm
@@ -152,6 +152,12 @@ macro opcodereg opcode, reg
   db opcode or reg
 end macro
 
+macro opcodecc opcode, cc
+  assert opcode >= 0 & opcode < 256 & opcode and 15 = 0
+  assert cc >= 0 & cc < 16
+  db opcode or cc
+end macro
+
 macro scalefield sfield, scale
   if 1 = scale
     sfield = 0
@@ -166,6 +172,77 @@ macro scalefield sfield, scale
   end if
 end macro
 
+; Yep, there sure is a lot of duplication in these. This is based on Intel's
+; documented mnemonics...
+;
+; "Above" and "below" are for unsigned comparisons. "Greater" and "less" are
+; for signed comparisons.
+macro conditioncode cc, condition
+  match =above, condition
+    cc = 0x07
+  else match =above.equal, condition
+    cc = 0x03
+  else match =below, condition
+    cc = 0x02
+  else match =below.equal, condition
+    cc = 0x06
+  else match =carry, condition
+    cc = 0x02
+  else match =equal, condition
+    cc = 0x04
+  else match =greater, condition
+    cc = 0x0F
+  else match =greater.equal, condition
+    cc = 0x0D
+  else match =less, condition
+    cc = 0x0C
+  else match =less.equal, condition
+    cc = 0x0E
+  else match =not.above, condition
+    cc = 0x06
+  else match =not.above.equal, condition
+    cc = 0x02
+  else match =not.below, condition
+    cc = 0x03
+  else match =not.below.equal, condition
+    cc = 0x07
+  else match =not.carry, condition
+    cc = 0x03
+  else match =not.equal, condition
+    cc = 0x05
+  else match =not.greater, condition
+    cc = 0x0E
+  else match =not.greater.equal, condition
+    cc = 0x0C
+  else match =not.less, condition
+    cc = 0x0D
+  else match =not.less.equal, condition
+    cc = 0x0F
+  else match =not.overflow, condition
+    cc = 0x01
+  else match =not.parity, condition
+    cc = 0x0B
+  else match =not.sign, condition
+    cc = 0x09
+  else match =not.zero, condition
+    cc = 0x05
+  else match =overflow, condition
+    cc = 0x00
+  else match =parity, condition
+    cc = 0x0A
+  else match =parity.even, condition
+    cc = 0x0A
+  else match =parity.odd, condition
+    cc = 0x0B
+  else match =sign, condition
+    cc = 0x08
+  else match =zero, condition
+    cc = 0x04
+  else
+    assert 0
+  end match
+end macro
+
 
 ;;; On registers
 ;;; ------------
@@ -288,6 +365,10 @@ macro owordreg result, register
 end macro
 
 
+;;; Instructions
+;;; ------------
+
+
 ; TODO what register size does this use?
 macro mov.b target, source
   match =rax?, target
@@ -649,6 +730,23 @@ macro add.indirect.qreg.qreg target, source
 end macro
 
 
+macro add.qreg.indirect.qreg target, source
+  match =rsp, source
+    assert 0
+    ; The SIB case.
+  else match =rbp, source
+    assert 0
+    ; An unrelated addressing mode
+  else
+    qwordreg treg, target
+    qwordreg sreg, source
+    rex.w
+    db 0x03
+    modrm 0, treg, sreg
+  end match
+end macro
+
+
 ; This adds a signed 8-bit immediate value to a 64-bit register, in place.
 ;
 ; Notice the use of 3 as the addressing mode. This says to use the register
@@ -801,7 +899,7 @@ macro not.qreg target
 end macro
 
 
-; This sets the flags to the same things they'd be set by if subtracting
+; This sets the flags to the same things they'd be set to if subtracting
 ; right from left.
 macro cmp.qreg.qreg left, right
   qwordreg lreg, left
@@ -811,81 +909,25 @@ macro cmp.qreg.qreg left, right
   modrm 3, lreg, rreg
 end macro
 
+; This sets the flags to the same things they'd be set to if AND'ing right
+; with left.
+macro test.qreg.qreg left, right
+  qwordreg lreg, left
+  qwordreg rreg, right
+  rex.w
+  db 0x85
+  modrm 3, rreg, lreg
+end macro
 
-; Yep, there sure is a lot of duplication in these. This is based on Intel's
-; documented mnemonics...
-;
-; "Above" and "below" are for unsigned comparisons. "Greater" and "less" are
-; for signed comparisons.
 macro set.breg.cc target, condition
   bytereg treg, target
+  conditioncode cc, condition
   db 0x0F
-  match =above, condition
-    db 0x97
-  else match =above.equal, condition
-    db 0x93
-  else match =below, condition
-    db 0x92
-  else match =below.equal, condition
-    db 0x96
-  else match =carry, condition
-    db 0x92
-  else match =equal, condition
-    db 0x94
-  else match =greater, condition
-    db 0x9F
-  else match =greater.equal, condition
-    db 0x9D
-  else match =less, condition
-    db 0x9C
-  else match =less.equal, condition
-    db 0x9E
-  else match =not.above, condition
-    db 0x96
-  else match =not.above.equal, condition
-    db 0x92
-  else match =not.below, condition
-    db 0x93
-  else match =not.below.equal, condition
-    db 0x97
-  else match =not.carry, condition
-    db 0x93
-  else match =not.equal, condition
-    db 0x95
-  else match =not.greater, condition
-    db 0x9E
-  else match =not.greater.equal, condition
-    db 0x9C
-  else match =not.less, condition
-    db 0x9D
-  else match =not.less.equal, condition
-    db 0x9F
-  else match =not.overflow, condition
-    db 0x91
-  else match =not.parity, condition
-    db 0x9B
-  else match =not.sign, condition
-    db 0x99
-  else match =not.zero, condition
-    db 0x95
-  else match =overflow, condition
-    db 0x90
-  else match =parity, condition
-    db 0x9A
-  else match =parity.even, condition
-    db 0x9A
-  else match =parity.odd, condition
-    db 0x9B
-  else match =sign, condition
-    db 0x98
-  else match =zero, condition
-    db 0x94
-  else
-    assert 0
-  end match
+  opcodecc 0x90, cc
   modrm 3, 0, treg
 end macro
 
+
 ; Move from an 8-bit immediate value, to a location relative to a 64-bit
 ; register, with an 8-bit displacement and no indexing.
 ;
@@ -965,7 +1007,7 @@ macro mov.qreg.disp8.qreg target, offset, source
   rex.w
   db 0x89
   modrm 1, sreg, treg
-  match =rsp, target
+  match =rsp, source
     ; R/M = rsp is the SIB case
     sib 0, 4, 4
       ; no scaling, no indexing, rsp as base
@@ -1035,32 +1077,32 @@ end macro
 ;  indexed addressing, with an 8-bit displacement and no indexing, but instead
 ; of doing anything with the memory, just store the address itself into a
 ; register.
-macro lea.qreg.qreg.disp8 target, offset, source
-  match =rsp, target
-    ; This is the SIB case
-    assert 0
-  else
-    qwordreg treg, target
-    qwordreg sreg, source
-    rex.w
-    db 0x8D
-    modrm 1, treg, sreg
-    db offset
+macro lea.qreg.disp8.qreg target, offset, source
+  qwordreg treg, target
+  qwordreg sreg, source
+  rex.w
+  db 0x8D
+  modrm 1, treg, sreg
+  match =rsp, source
+    ; R/M = rsp is the SIB case
+    sib 0, 4, sreg
+      ; no scaling, no indexing, rsp as base
   end match
+  db offset
 end macro
 
-macro lea.qreg.qreg.disp32 target, source, offset
-  match =rsp, target
-    ; This is the SIB case
-    assert 0
-  else
-    qwordreg treg, target
-    qwordreg sreg, source
-    rex.w
-    db 0x8D
-    modrm 2, treg, sreg
-    dd offset
+macro lea.qreg.disp32.qreg target, offset, source
+  qwordreg treg, target
+  qwordreg sreg, source
+  rex.w
+  db 0x8D
+  modrm 2, treg, sreg
+  match =rsp, source
+    ; R/M = rsp is the SIB case
+    sib 0, 4, sreg
+      ; no scaling, no indexing, rsp as base
   end match
+  dd offset
 end macro
 
 macro lea.qreg.indexed.qreg target, source, index, scale
@@ -1249,6 +1291,23 @@ macro jmp.rel.dimm location
   dd location
 end macro
 
+; The location is relative to the start of the instruction immediately
+; following the jmp.
+macro jmp.cc.rel.bimm condition, location
+  conditioncode cc, condition
+  opcodecc 0x70, cc
+  db location
+end macro
+
+; The location is relative to the start of the instruction immediately
+; following the jmp.
+macro jmp.cc.rel.dimm condition, location
+  conditioncode cc, condition
+  db 0x0F
+  opcodecc 0x70, cc
+  dd location
+end macro
+
 ; Invoke a system call provided by the kernel. On Linux, the System V ABI
 ; describes the semantics of such calls (at least, on x86).
 macro syscall
@@ -1570,13 +1629,13 @@ end macro
 ;;; * rbp points to the top of the control stack.
 ;;;
 macro PUSHCONTROL source
-  lea.qreg.qreg.disp8 rbp, -8, rbp
+  lea.qreg.disp8.qreg rbp, -8, rbp
   mov.indirect.qreg.qreg rbp, source
 end macro
 
 macro POPCONTROL target
   mov.qreg.indirect.qreg target, rbp
-  lea.qreg.qreg.disp8 rbp, 8, rbp
+  lea.qreg.disp8.qreg rbp, 8, rbp
 end macro
 
 ;;;
@@ -1711,7 +1770,7 @@ _start:
   ;;; your home. See below for a little more thought about why here in
   ;;; particular.
   ;;;
-  lea.qreg.qreg.disp32 rbp, rdi, control_stack_size
+  lea.qreg.disp32.qreg rbp, control_stack_size, rdi
 
   ;;;
   ;;;   Now we save some stuff onto the heap. These are the locations that
@@ -1727,7 +1786,7 @@ _start:
   mov.qreg.disp32.qreg rdi, control_stack_size + 0x00, rdi    ; HEAP
   mov.qreg.disp32.qreg rdi, control_stack_size + 0x08, rsp    ; S0
   mov.qreg.disp32.qreg rdi, control_stack_size + 0x10, rbp    ; R0
-  lea.qreg.qreg.disp32 rax, rdi, control_stack_size + 0x20
+  lea.qreg.disp32.qreg rax, control_stack_size + 0x20, rdi
   mov.qreg.disp32.qreg rdi, control_stack_size + 0x18, rax    ; HERE
   ; TODO also consider LATEST and STATE
   ; strictly speaking, R0 could be a constant... but it isn't known until
@@ -2042,10 +2101,10 @@ defword UNROLL, 0
   ; accordingly.
   push.qreg rsi
 
-  ;   When rcx is 1, we want rsp + 16.
-  lea.qreg.disp8.indexed.qreg rsi, 8, rsp, rcx, 8
-  ;   When rcx is 1, we want rsp + 8.
-  lea.qreg.indexed.qreg rdi, rsp, rcx, 8
+  ;   Regardless of rcx, we want rsp + 16.
+  lea.qreg.disp8.qreg rsi, 16, rsp
+  ;   Regardless of rcx, we want rsp + 8.
+  lea.qreg.disp8.qreg rdi, 8, rsp
 
   ;   With ROLL, we were starting at the high end. Here, we start at the low
   ; end, which means we need rsi to increment after each repetition. That's
@@ -2335,6 +2394,31 @@ defword FETCH32, 0
   mov.dreg.indirect.qreg eax, rbx
   NEXT
 
+;;;;;;;;;;;;;;;;;
+;;; Branching ;;;
+;;;;;;;;;;;;;;;;;
+
+;   This takes a number of bytes, not machine words. That allows it to be used
+; for putting weird things embedded in the code.
+;
+;   The offset is relative to the start of the word the number of bytes is in,
+; so, make sure to have it skip itself.
+defword BRANCH, 0
+  dq $ + 0x8                     ; codeword
+  add.qreg.indirect.qreg rsi, rsi
+  NEXT
+
+; This should probably be 0BRANCH, but right now the auto-label code is picky.
+defword ZBRANCH, 0
+  dq $ + 0x8                     ; codeword
+  pop.qreg rax
+  test.qreg.qreg rax, rax
+  ; Please notice the 8-bit branch to the nearby word.
+  jmp.cc.rel.bimm zero, BRANCH + 8 - zbranch_after_jmp
+zbranch_after_jmp:
+  lodsq                          ; just a convenient way to skip rsi forward
+  NEXT
+
 ;;;
 ;;;   One of the most charming naming traditions in Forth is that the
 ;;; top-level word that stays running forever, is called "quit".
@@ -2484,15 +2568,40 @@ defword QUINE, 0
   ; ... and now we have allocated a block of memory, with its address on the
   ; stack. We also still have HEAP at the bottom of the stack, for future use.
 
-  ; This takes a buffer's address on the stack and adds an ELF file header to
-  ; it, leaving nothing on the stack afterwards.
-  dq DUP, ELF_FILE_HEADER, ELF_PROGRAM_HEADER, DROP
-
-  ; This takes a buffer's address on the stack, skips an ELF file header based
-  ; on hardcoded size, appends an ELF program header, then finishes by pushing
-  ; the length of the part of the buffer that has now been used. Thus we don't
-  ; need to care about how it internally uses registers.
-  dq DUP, OLD_CODE, SWAP
+  ; We have one label, and three pieces of information about it: Guessed value,
+  ; actual value, and status. We keep them on the stack in this order, from
+  ; top to bottom: guess, actual, status. Above that, at the actual top of
+  ; the stack, we have a mutable copy of the buffer's address.
+  ;
+  ; Status is a bit field:
+  ;    bit zero is whether it was used before being defined
+  ;    bit one is whether it's been defined
+  ;    bit two is whether the guessed value wound up equaling the actual value
+  dq DUP, LIT, 0, LIT, 0, LIT, 0, LIT, 4, ROLL
+
+  ; This takes an address to write to on the stack and adds an ELF file header
+  ; to it, leaving the adjusted address with the size of the header added.
+  ; Then it does the same thing with an ELF program header.
+  dq ELF_FILE_HEADER, ELF_PROGRAM_HEADER
+
+  ; The two-pass magick.
+  dq LIT, file_size - 0x78, ADD
+  dq SET_LABEL
+  dq DROP, LIT, 4, ROLL, DUP, LIT, 5, UNROLL
+  dq ELF_FILE_HEADER, ELF_PROGRAM_HEADER
+
+  ; Drop the copy of the buffer's address.
+  dq DROP
+
+  ; Drop the label data.
+  dq DROP, DROP, DROP
+
+  ; This takes a buffer's address on the stack, skips an ELF file header and
+  ; program header based on hardcoded size, computes an offset (secretly
+  ; hardcoded), and writes that offset into an appopriate place in the middle
+  ; of those headers. It then returns the length of the used portion of the
+  ; buffer.
+  dq LIT, 0x78, SWAP
 
   ; write() from stack-allocated buffer
   dq SYS_WRITE
@@ -2503,6 +2612,76 @@ defword QUINE, 0
   dq SYS_WRITE
 
   dq EXIT
+
+; Stack in:
+;   output memory start
+;   label actual value
+;   label guessed value
+;   label status
+;   output memory current point
+; Stack out:
+;   output memory start
+;   label actual value
+;   label guessed value
+;   label status (potentially modified)
+;   output memory current point
+;   label value for caller to use
+defword USE_LABEL, 0
+  dq DOCOL
+
+  ; Fetch the status
+  dq SWAP
+  ; Check the bit that indicates it's been set.
+  dq DUP, LIT, 2, AND, ZBRANCH, 12*8
+
+  ; If we're here, it has been set already, so just put the status back...
+  dq LIT, 2, UNROLL
+  ; Fetch the actual value...
+  dq LIT, 4, ROLL, DUP, LIT, 5, UNROLL
+  ; ... and exit
+  dq EXIT
+
+  ; If we're here, it hasn't been set yet, so mark it used-before-set.
+  dq LIT, 1, OR
+  ; Put the status back...
+  dq SWAP
+  ; Fetch the guessed value...
+  dq LIT, 3, ROLL, DUP, LIT, 4, UNROLL
+  ; ... and exit
+  dq EXIT
+
+; Stack in:
+;   output memory start
+;   label actual value (not yet set)
+;   label guessed value
+;   label status
+;   output memory current point
+; Stack out:
+;   output memory start
+;   label actual value (now set)
+;   label guessed value
+;   label status (modified)
+;   output memory current point
+defword SET_LABEL, 0
+  dq DOCOL
+
+  ; Compute the current offset, to use as the actual value
+  dq DUP, LIT, 6, ROLL, DUP, LIT, 7, UNROLL, SUB
+
+  ; Overwrite the old actual value; keep a copy
+  dq LIT, 5, ROLL, DROP, DUP, LIT, 5, UNROLL
+
+  ; Check equality with the guessed value
+  dq LIT, 4, ROLL, DUP, LIT, 5, UNROLL, EQ
+
+  ; We don't need to branch. Now we mark the status as having been defined,
+  ; and we also set bit 2 if appropriate.
+  dq LIT, 4, MUL
+  dq LIT, 3, ROLL, OR, LIT, 2, OR, LIT, 2, UNROLL
+
+  dq EXIT
+
+
 defword HLT, 0
   dq $ + 0x8                     ; codeword
   hlt
@@ -2570,87 +2749,17 @@ defword ELF_PROGRAM_HEADER, 0
   dq LITPACK64, 0                          ; offset in file
   dq LITPACK64, $$                         ; virtual address
     ; required, but can be anything, subject to alignment
-  dq LITPACK16, 0                          ; physical address (ignored)
+  dq LITPACK64, 0                          ; physical address (ignored)
 
   ; Fill in 0 as the file size for now, to avoid unitialized memory.
-  dq LITPACK64, 0                          ; size in file
-  dq LITPACK64, 0                          ; size in memory
+  dq USE_LABEL, PACK64                     ; size in file
+  dq USE_LABEL, PACK64                     ; size in memory
 
   dq LITPACK64, 0                          ; segment alignment
     ; for relocation, but this doesn't apply to us
 
   dq EXIT
 
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;; (old) Implementation strategy ;;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;;
-;;;   We assemble the entire file contents in a stack-allocated buffer.
-;;; We avoid using the stack for any other purpose. When the file is fully
-;;; assembled, we output it.
-;;;
-;;;   The assembly proceeds in several chunks - ELF header, program header,
-;;; etc. Each chunk extends the buffer as per its own needs, by adjusting
-;;; the stack pointer. All chunks also update a running total file size,
-;;; which refers to how many bytes have actually been populated, not to the
-;;; size of the buffer.
-;;;
-;;; Stack in:
-;;; * Top: The address of a block of memory to use.
-;;;
-;;; Stack out:
-;;; * Top: The length of the data that was written to the provided memory.
-;;;
-;;; Registers within:
-;;;
-;;; * rdx holds the total used file size so far. During hand-off between
-;;;   chunks, this size must be equal to the buffer size; within a chunk it
-;;;   may be less.
-;;;
-;;; * rcx points to the bottom of the buffer.
-;;;
-defword OLD_CODE, 0
-  dq $ + 0x8                               ; codeword
-
-  pop.qreg rcx                             ; our parameter - a block of memory
-  mov.dreg.dimm rdx, 0                     ; store running file size here
-
-  ; Add the size of the ELF header to the running total
-  mov.dreg.dimm rax, 0x40
-  add.qreg.qreg rdx, rax
-
-  ; Add the size of the program header to the running total
-  mov.dreg.dimm rax, 0x38
-  add.qreg.qreg rdx, rax
-
-  ;;; Hardcode the size of the actual code chunk based on flatassembler's
-  ;;; label calculations, since we don't yet have a way to generate it from
-  ;;; within our code.
-  ;;;
-  ;;; Originally this was a constant number, to discourage reliance on label
-  ;;; math, but the direction things are growing in now is to implement
-  ;;; general label math ourselves, so that's okay.
-  ;;;
-  ;;; TODO of course, really we want to for-real compute this at runtime
-  mov.qreg.qimm rax, code_size
-  add.qreg.qreg rdx, rax
-
-  ;;;
-  ;;; Go back and fill in the file size now that we know it (ill-gotten
-  ;;; knowledge though it is).
-  ;;;
-  mov.qreg.disp8.qreg rcx, 0x60, rdx       ; size in file
-  mov.qreg.disp8.qreg rcx, 0x68, rdx       ; size in memory
-
-  ;;;
-  ;;; The buffer is ready; push its length on the value stack, so our caller
-  ;;; can handle write()ing it out.
-  ;;;
-  push.dimm 0x78
-
-  NEXT
-
 code_size = $ - code_start
 file_size = $ - $$