1 files changed, 319 insertions, 38 deletions
diff --git a/quine.asm b/quine.asm
index a63ae02..2f8e91d 100644
--- a/quine.asm
+++ b/quine.asm
@@ -180,11 +180,11 @@ end macro
 macro conditioncode cc, condition
   match =above, condition
     cc = 0x07
-  else match =above.equal, condition
+  else match =above_equal, condition
     cc = 0x03
   else match =below, condition
     cc = 0x02
-  else match =below.equal, condition
+  else match =below_equal, condition
     cc = 0x06
   else match =carry, condition
     cc = 0x02
@@ -192,47 +192,47 @@ macro conditioncode cc, condition
     cc = 0x04
   else match =greater, condition
     cc = 0x0F
-  else match =greater.equal, condition
+  else match =greater_equal, condition
     cc = 0x0D
   else match =less, condition
     cc = 0x0C
-  else match =less.equal, condition
+  else match =less_equal, condition
     cc = 0x0E
-  else match =not.above, condition
+  else match =not_above, condition
     cc = 0x06
-  else match =not.above.equal, condition
+  else match =not_above_equal, condition
     cc = 0x02
-  else match =not.below, condition
+  else match =not_below, condition
     cc = 0x03
-  else match =not.below.equal, condition
+  else match =not_below_equal, condition
     cc = 0x07
-  else match =not.carry, condition
+  else match =not_carry, condition
     cc = 0x03
-  else match =not.equal, condition
+  else match =not_equal, condition
     cc = 0x05
-  else match =not.greater, condition
+  else match =not_greater, condition
     cc = 0x0E
-  else match =not.greater.equal, condition
+  else match =not_greater_equal, condition
     cc = 0x0C
-  else match =not.less, condition
+  else match =not_less, condition
     cc = 0x0D
-  else match =not.less.equal, condition
+  else match =not_less_equal, condition
     cc = 0x0F
-  else match =not.overflow, condition
+  else match =not_overflow, condition
     cc = 0x01
-  else match =not.parity, condition
+  else match =not_parity, condition
     cc = 0x0B
-  else match =not.sign, condition
+  else match =not_sign, condition
     cc = 0x09
-  else match =not.zero, condition
+  else match =not_zero, condition
     cc = 0x05
   else match =overflow, condition
     cc = 0x00
   else match =parity, condition
     cc = 0x0A
-  else match =parity.even, condition
+  else match =parity_even, condition
     cc = 0x0A
-  else match =parity.odd, condition
+  else match =parity_odd, condition
     cc = 0x0B
   else match =sign, condition
     cc = 0x08
@@ -1898,17 +1898,118 @@ _start:
 cold_start:
   ;;; TODO this is probably where we should deal with that "heap" that we passed
   ;;; on the stack
-  ;;;   Start defining some words that are allocated at runtime on the heap,
-  ;;; beginning with the minimal set of words needed to define more words.
-  ;;; The first few are our variables, which hardcode the addresses they will
-  ;;; return - but since we're doing this at runtime, "hardcoding" can reflect
-  ;;; where our heap is. This is the fundamental trick that makes the heap
-  ;;; usable.
-  dq early_heap, litstring, "heap", early_defvar
-  dq early_s0, litstring, "s0", early_defvar
-  dq early_r0, litstring, "r0", early_defvar
-  dq early_latest, litstring, "latest", early_defvar
-  dq early_here, litstring, "here", early_defvar
+  ;;;   Before handing off to us, _start pushed a single value onto the stack,
+  ;;; a pointer to the beginning of the heap. Now, we load our entire Forth
+  ;;; implementation onto that heap, beginning with the minimal set of words
+  ;;; needed to define more words. We do this because we need variables as
+  ;;; infrastructure so we can eventually have dynamic definitions.
+  ;;;
+  ;;;   There's something non-obvious here: words implemented statically as
+  ;;; part of the executable image can't contain things that vary at runtime.
+  ;;; That means that even if these words tried to implement some sort of
+  ;;; dynamic lookup, they would have no way to find the root of whatever
+  ;;; dynamic data structure they use. Dynamism needs to be bootstrapped.
+  ;;;
+  ;;;   In a more traditional C-style program, static code could look up
+  ;;; variables based on fixed addresses that are the same on every run.
+  ;;; Failing that, we could dedicate a register to it, though that's a
+  ;;; considerable expense. We chose not to do either of those things, because
+  ;;; we want the versatility that comes with not being picky about our
+  ;;; address space: It allows us to contemplate future improvements such as
+  ;;; ASLR, or embedding into other processes that impose their own addressing
+  ;;; constraints, or even coexisting with multiple versions of ourselves.
+  ;;; That choice does mean we have the hard version of this bootstrapping
+  ;;; problem, and copying ourselves to the heap is how we solve it.
+  ;;;
+  ;;;   We do have the heap address right now, though that won't last. In case
+  ;;; it's unclear why not: keeping it on the stack would require all future
+  ;;; references to walk the stack, and somehow know when they've reached the
+  ;;; bottom. The stack is a good place to keep things with clearly delimited
+  ;;; lifetimes and visibility, but when we want something to live for our
+  ;;; entire program and be easy to find from any code within it, we need to
+  ;;; do something else. Anyway, since we have the address, we can use it for
+  ;;; the next little bit of setup.
+  ;;;
+  ;;;   The first few words we define are our variables, which hardcode the
+  ;;; addresses they will return - but since we're doing this at runtime,
+  ;;; "hardcoding" can reflect where our heap is. This is the fundamental
+  ;;; trick that makes the heap usable.
+  ;;;
+  ;;;   One more thing to notice: We already allocated the backing stores of
+  ;;; these variables, and populated their initial values, in _start. The
+  ;;; words we're defining return those same addresses for the same backing
+  ;;; stores. So, we have continuity: Stuff defined in terms of the
+  ;;; variable-words we're defining now will interoperate with the stuff that
+  ;;; we define in the "early" way, which includes those very words. Both the
+  ;;; early code and the later code are dealing with the same data structures,
+  ;;; they're just using a different technique to find them.
+  ;;;
+  ;;;   This is the only hardcoding we need to do; by building on top of it,
+  ;;; we will soon reach a point where the rest of the system can be defined
+  ;;; within itself.
+  dq early_heap, litstring, "heap", early_variable
+  dq early_s0, litstring, "s0", early_variable
+  dq early_r0, litstring, "r0", early_variable
+  dq early_latest, litstring, "latest", early_variable
+  dq early_here, litstring, "here", early_variable
+
+  ;;;   Now we define a heap version of docol. Strictly speaking it doesn't
+  ;;; need to be among the first words, it only needs to come before the first
+  ;;; words implemented in Forth. However, it's conceptually tidy to have it
+  ;;; that way, so that's what we do.
+  ;;;
+  ;;;   Docol also presents a unique challenge, in that it's two snippets of
+  ;;; code and one of them needs to refer to the other. When we use docol as
+  ;;; the codeword of a word we're defining, we point to a snippet which acts
+  ;;; as an interpreter for the word's body. However, when we look up "docol"
+  ;;; in the dictionary, what we get is a word that returns the address of the
+  ;;; interpreter snippet, effectively acting as a constant.
+  ;;;
+  ;;;   One way to make this work would be to use a forward-referencing
+  ;;; address using the labels system. However, it turns out that only docol
+  ;;; and zbranch would benefit from this, and we drastically simplify our
+  ;;; code by reworking things so that no forward reference is needed.
+  ;;;
+  ;;;   What we do is define the interpreter snippet first, allocating space
+  ;;; for the machine code directly out of "here", with no word header nor any
+  ;;; dictionary entry pointing to it. We keep track of the address we put
+  ;;; that at, then we define the constant to point to it.
+  ;;;
+  ;;;   While it may seem weird to use space that's "outside" of any word,
+  ;;; keep in mind that using the heap in creative ways is part of the spirit
+  ;;; of Forth. Jonesforth doesn't have this bootstapping problem, but its
+  ;;; variables use this same technique of putting the value before the word
+  ;;; header to avoid a forward reference. Also, words don't have end
+  ;;; delimiters, so who's to say what's inside or outside them?
+  dq early_here, fetch, dup
+  dq rsi, pack_pushcontrol
+  dq lit, 8, rax, add_reg64_imm8
+  dq rax, rsi, mov_reg64_reg64
+  dq pack_next
+  dq lit, 8, packalign
+  dq roll3, swap, early_here_store, swap
+; it seems to be outputting the lea correctly but then it's supposed to
+; move the contents of rsi into the address in rbp, which is a weird
+; addressing mode, and it winds up encoded wrong
+;(gdb) disassemble/r 0x10000100f8,+24
+;Dump of assembler code from 0x10000100f8 to 0x1000010110:
+;   0x00000010000100f8:  48 8d 6d f8             lea    -0x8(%rbp),%rbp
+;   0x00000010000100fc:  48 89 35 48 83 c0 08    mov    %rsi,0x8c08348(%rip)        # 0x1008c1844b
+;   0x0000001000010103:  48 89 c6                mov    %rax,%rsi
+;   0x0000001000010106:  48 ad                   lods   %ds:(%rsi),%rax
+;   0x0000001000010108:  ff 20                   jmp    *(%rax)
+;   0x000000100001010a:  00 00                   add    %al,(%rax)
+;   0x000000100001010c:  00 00                   add    %al,(%rax)
+;   0x000000100001010e:  00 00                   add    %al,(%rax)
+  ; Now the interpreter snippet is in-place and "here" points after it, so
+  ; that future allocation won't step on it. We also still have a copy of its
+  ; start address, which we will now pass to early_variable.
+  dq litstring, "docol", early_variable
+  ; (While it might be tidy to have a separate "early_constant", it would do
+  ; the same thing. Late variables and constants will be different because the
+  ; real "variable" word will also be responsible for allocating the backing
+  ; store, but the only thing early_variable is doing is returning an
+  ; address.)
 
   ;;; For triage's sake, here's an inventory of everything else in the file.
   ;;;
@@ -1952,7 +2053,7 @@ cold_start:
   ;;; Forth, not needed on heap:
   ;;;   early_heap, early_s0, early_r0, early_latest, early_here
   ;;;   early_create, early_comma, early_self_codeword, early_here_store,
-  ;;;   pack_next, early_defvar
+  ;;;   pack_next, early_variable
   ;;;
   ;;; It's likely that nothing past this point is required for the heap copy,
   ;;; but it's here for completeness.
@@ -1981,6 +2082,7 @@ cold_start:
   ;;;   self_raw
   ;;;     self-reference
   ;;;
+  dq hlt
   dq quit
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -2063,7 +2165,7 @@ docol_constant:
   align 8
 docol:
   ; Since docol is not a normal word, the label points to the value we care
-  ; about from the assembly side of things, wich is the address we use as the
+  ; about from the assembly side of things, which is the address we use as the
   ; codeword.
   pushcontrol rsi
   add.qreg.bimm rax, 8
@@ -2346,7 +2448,7 @@ defword ne, 0
   pop.qreg rbx
   pop.qreg rax
   cmp.qreg.qreg rax, rbx
-  set.breg.cc al, not.equal
+  set.breg.cc al, not_equal
   and.qreg.bimm rax, 0x01
   push.qreg rax
   next
@@ -2376,7 +2478,7 @@ defword ge, 0
   dq $ + 8
   pop.qreg rbx
   pop.qreg rax
-  set.breg.cc al, greater.equal
+  set.breg.cc al, greater_equal
   cmp.qreg.qreg rax, rbx
   push.qreg rax
   next
@@ -2385,7 +2487,7 @@ defword le, 0
   dq $ + 8
   pop.qreg rbx
   pop.qreg rax
-  set.breg.cc al, less.equal
+  set.breg.cc al, less_equal
   cmp.qreg.qreg rax, rbx
   push.qreg rax
   next
@@ -2722,6 +2824,10 @@ defword litpack8, 0
 ;;;
 ;;;   It's all backwards and stuff.
 ;;;
+;;;   Okay, but seriously, the convention is: target on the top of the stack,
+;;; source behind it. This is similar to how the Forth "store" and "fetch"
+;;; words work.
+;;;
 ;;;   These routines use the output helpers, defined above.  They're called in
 ;;; the same way: an output address, followed by data items specific to what's
 ;;; being output.
@@ -2809,6 +2915,10 @@ defword rex_wb, 0
 defword opcodereg, 0
   dq docol, or, pack8, exit
 
+;   The low-level word that outputs a modrm byte given fully-processed,
+; numeric values for its fields. Most code will want to call one of the
+; higher-level modrm_* words, instead.
+;
 ; Stack
 ;   output point
 ;   mode ("mod") field
@@ -2817,6 +2927,77 @@ defword opcodereg, 0
 defword modrm, 0
   dq docol, swap, lit, 8, mul, or, swap, lit, 64, mul, or, pack8, exit
 
+;   The simplest of the modrm modes: Direct register addressing. There are no
+; special cases to check.
+;
+;   It's important to notice that the R/M field may describe either a source,
+; or a target, depending on what the instruction is. So, this helper doesn't
+; get to know that. It also doesn't get to know whether the value in the
+; reg/op field describes a register, or if instead it's an extension of the
+; opcode. The caller is responsible for figuring that all out.
+;
+; Stack:
+;   output point
+;   reg/op field value (raw number)
+;   reg/mem field register name
+defword modrm_reg64, 0
+  dq docol, reg64, lit, 3, unroll3, modrm, exit
+
+;   This is a helper for assembly instructions that want to do a form of
+; addressing that requires a value of 1 in the modrm byte's mode field, and
+; do not want to do any indexing.
+;
+;   Its main responsibility is to deal with the scenario that requires an SIB
+; byte, which happens when the R/M field has a value of 4, which would
+; otherwise refer to the register rsp. In that situation, it also generates
+; an SIB byte which indicates a scale of 1, no indexing, and rsp as the base
+; register.
+;
+; Stack:
+;   output point
+;   reg/op field value (raw number)
+;   reg/mem field register name
+defword modrm_indirect_reg64, 0
+  dq docol
+  ; Exit with an error if the R/M register is rbp.
+  dq dup, rbp, ne, zbranch, 23*8
+  ; Check whether the R/M register is rsp; save the test result for later.
+  dq dup, rsp, eq, lit, 4, unroll
+  dq reg64, lit, 0, unroll3, modrm
+  ; If the R/M register was rsp, we need an SIB byte; otherwise, skip it.
+  dq swap, zbranch, 8*8, lit, 0, lit, 4, rsp, reg64, sib
+  dq exit
+  dq litstring, "R/M parameter to modrm_indirect_reg64 is rbp.", emitstring
+  dq lit, 1, sys_exit
+
+;   This mode can do rbp fine, rsp is the only unusual case.
+;
+; Stack:
+;   output point
+;   reg/op field value (raw number)
+;   reg/mem field register name
+;   displacement value
+defword modrm_disp8_reg64, 0
+  dq docol
+  ; Check whether the R/M register is rsp; save the test result for later.
+  dq swap, dup, rsp, eq, lit, 5, unroll, swap
+  ; Stash the displacement value out of the way, too.
+  dq lit, 4, unroll
+  dq reg64, lit, 1, unroll3, modrm
+  ; If the R/M register was rsp, we need an SIB byte; otherwise, skip it.
+  dq roll3, zbranch, 8*8, lit, 0, lit, 4, rsp, reg64, sib
+  ; The displacement byte.
+  dq swap, pack8
+  dq exit
+
+; Stack
+;   output point
+;   scale field
+;   index field
+;   base field
+defword sib, 0
+  dq docol, swap, lit, 8, mul, or, swap, lit, 64, mul, or, pack8, exit
+
 ; Stack:
 ;   output point
 defword cld, 0
@@ -2842,7 +3023,70 @@ defword mov_extrareg64_imm64, 0
 defword mov_reg64_reg64, 0
   dq docol
   dq roll3, rex_w, lit, 0x89, pack8, unroll3
-  dq reg64, swap, reg64, swap, lit, 3, unroll3, modrm
+  dq swap, reg64, swap, modrm_reg64
+  dq exit
+
+; Stack:
+;   output point
+;   source register name
+;   target register name
+defword mov_indirect_reg64_reg64, 0
+  dq docol
+  dq roll3, rex_w, lit, 0x89, pack8, unroll3
+  dq swap, reg64, swap, modrm_indirect_reg64
+  dq exit
+
+; Stack:
+;   output point
+;   source register name
+;   target register name
+;   target displacement value
+defword mov_disp8_reg64_reg64, 0
+  dq docol
+  dq lit, 4, roll, rex_w, lit, 0x89, pack8, lit, 4, unroll
+  dq roll3, reg64, unroll3, modrm_disp8_reg64
+  dq exit
+
+; Stack:
+;   output point
+;   source register name
+;   target register name
+defword mov_reg64_indirect_reg64, 0
+  dq docol
+  dq roll3, rex_w, lit, 0x8B, pack8, unroll3
+  dq reg64, swap, modrm_indirect_reg64
+  dq exit
+
+; Stack:
+;   output point
+;   source register name
+;   source displacement value
+;   target register name
+defword mov_reg64_disp8_reg64, 0
+  dq docol
+  dq lit, 4, roll, rex_w, lit, 0x8B, pack8, lit, 4, unroll
+  dq reg64, unroll3, modrm_disp8_reg64
+  dq exit
+
+; Stack:
+;   output point
+;   source register name
+;   source displacement value
+;   target register name
+defword lea_reg64_disp8_reg64, 0
+  dq docol
+  dq lit, 4, roll, rex_w, lit, 0x8D, pack8, lit, 4, unroll
+  dq reg64, unroll3, modrm_disp8_reg64
+  dq exit
+
+; Stack:
+;   output point
+;   source value
+;   target register name
+defword add_reg64_imm8, 0
+  dq docol
+  dq roll3, rex_w, lit, 0x83, pack8, swap, lit, 0, swap, modrm_reg64
+  dq swap, pack8
   dq exit
 
 ; Stack:
@@ -2857,6 +3101,7 @@ defword lodsq, 0
 defword jmp_abs_indirect_reg64, 0
   dq docol
   dq swap, lit, 0xFF, pack8, swap
+  ; TODO use modrm_indirect_reg64 instead
   dq reg64, lit, 0, lit, 4, roll3, modrm
   dq exit
 
@@ -2989,6 +3234,10 @@ defword early_here_store, 0
 
 ;   Notice that we've switched over to stuff that follows the pack* idioms.
 ;
+;   This is a helper "macro" that we'll use in defining assembly words from
+; Forth. This is in a sense a redefinition of it; the flatassembler version of
+; it is far, far above, and has more documentation.
+;
 ; Stack in:
 ;   base address
 ; Stack out:
@@ -2996,13 +3245,45 @@ defword early_here_store, 0
 defword pack_next, 0
   dq docol, lodsq, rax, jmp_abs_indirect_reg64, exit
 
+;   This is another helper "macro" that we'll use in defining assembly words
+; from Forth. In particular, this one is used in docol. As before, see the
+; flatassembler version for more explanation.
+;
+; Stack in:
+;   base address
+;   source register keyword
+; Stack out:
+;   new base address
+defword pack_pushcontrol, 0
+  dq docol
+  dq swap, rbp, lit, -8, rbp, lea_reg64_disp8_reg64, swap
+  dq rbp, lit, 0, mov_disp8_reg64_reg64
+  dq exit
+
+;   This is another helper "macro" that we'll use in defining assembly words
+; from Forth. In particular, this one is used in "exit". See the flatassembler
+; version for more explanation.
+;
+; Stack in:
+;   base address
+;   target register keyword
+; Stack out:
+;   new base address
+defword pack_popcontrol, 0
+  dq docol
+  dq rbp, swap, mov_reg64_indirect_reg64
+  dq rbp, lit, -8, rbp, lea_reg64_disp8_reg64
+  dq exit
+
+;   Now we're back to heap idioms again.
+;
 ; Stack in:
 ;   heap address
 ;   address for new variable word to return
 ;   name string
 ; Stack out:
 ;   heap address
-defword early_defvar, 0
+defword early_variable, 0
   dq docol
   dq swap, unroll3, early_create, early_self_codeword
   ; (address to return, heap address)