summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--quine.asm93
1 files changed, 78 insertions, 15 deletions
diff --git a/quine.asm b/quine.asm
index 52460e4..206c6a7 100644
--- a/quine.asm
+++ b/quine.asm
@@ -1728,14 +1728,14 @@ _start:
   ;;;   The return value of the system call is in rax, we'll use it in a sec.
   ;;; We need to save this somewhere in case we ever want to munmap() it;
   ;;; there's no widely-used name for it so we have to make one up. S0 and R0
-  ;;; are widely-used names for the logical tops of the value and control
-  ;;; stacks, respectively, and we will eventually set those up as well, so we
-  ;;; should keep those names in mind. The control stack lives within the
-  ;;; heap, while the value stack is its own segment. This value, though, is
-  ;;; the physical bottom of the segment, meaning that it stays the same even
-  ;;; as we allocate and deallocate things within it. This is unlike the two
-  ;;; stack pointers, so we give it a name that doesn't suggest similarity:
-  ;;; HEAP.
+  ;;; are widely-used names for the physical tops (logical bottoms) of the
+  ;;; value and control stacks, respectively, and we will eventually set those
+  ;;; up as well, so we should keep those names in mind. The control stack
+  ;;; lives within the heap, while the value stack is its own segment. This
+  ;;; value, though, is the physical bottom of the segment, meaning that it
+  ;;; stays the same even as we allocate and deallocate things within it. This
+  ;;; is unlike the two stack pointers, so we give it a name that doesn't
+  ;;; suggest similarity: HEAP.
   ;;;
   ;;;   Once Forth is fully set up, its internal variables will be accessed
   ;;; through variable-words like any other Forth data, including HEAP. To get
@@ -1788,9 +1788,11 @@ _start:
   mov.qreg.disp32.qreg rdi, control_stack_size + 0x00, rdi    ; HEAP
   mov.qreg.disp32.qreg rdi, control_stack_size + 0x08, rsp    ; S0
   mov.qreg.disp32.qreg rdi, control_stack_size + 0x10, rbp    ; R0
-  lea.qreg.disp32.qreg rax, control_stack_size + 0x20, rdi
-  mov.qreg.disp32.qreg rdi, control_stack_size + 0x18, rax    ; HERE
-  ; TODO also consider LATEST and STATE
+  mov.qreg.qimm rax, final_word_name
+  mov.qreg.disp32.qreg rdi, control_stack_size + 0x18, rax    ; LATEST
+  lea.qreg.disp32.qreg rax, control_stack_size + 0x28, rdi
+  mov.qreg.disp32.qreg rdi, control_stack_size + 0x20, rax    ; HERE
+  ; TODO also consider STATE
   ; strictly speaking, R0 could be a constant... but it isn't known until
   ; runtime, so we might as well make it a variable
   ;;;
@@ -1810,6 +1812,8 @@ _start:
   ;;;   value. So, it would also be accurate to say that it points immediately
   ;;;   after the physical top of the allocated space. At any rate, the
   ;;;   address it points to is the first one that hasn't been used yet.
+  ;;; * LATEST is the address of the most-recently-defined word's header
+  ;;;     Defining new words changes this value.
   ;;;
   ;;;   S0 and R0 are mostly used when we want to initialize or reinitialize
   ;;; their respective stacks - that is, discard all their contents at once.
@@ -2448,6 +2452,65 @@ zbranch_after_jmp:
   lodsq                          ; just a convenient way to skip rsi forward
   NEXT
 
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;; Runtime word definition ;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;
+;;;   We need to be able to use Forth code that defines other Forth code; it
+;;; will be the easiest way to create our Forth-flavored assembly languge. To
+;;; do that, there's a critical bootstrapping problem: The variables that we
+;;; allocate on the heap in _start must be used by the Forth words that define
+;;; other Forth words (and, in general, they are needed for anything that
+;;; does memory management). Since the heap's address isn't known until
+;;; runtime, we can't use build-time labels to locate the heap or the
+;;; variables on it, in the way we've done for everything up to now.
+;;;
+;;;   In fact, it's worse than that: nothing we define at build time can
+;;; statically reference anything on the heap; it must always be given the
+;;; heap's address as a parameter. That also means statically defined words
+;;; can't reference runtime words, at all, except via special code that goes
+;;; outside the normal Forth execution model.
+;;;
+;;;   So, what we'll want to do to deal with that is copy ourselves onto the
+;;; heap, and run the rest of the program from there. It would be okay in
+;;; principle to keep a few things such as DOCOL in the static code segment,
+;;; as long as they don't need to reference the heap, though it might in some
+;;; sense be more elegant to entirely abandon the code segment and run
+;;; heap-only. At any rate, to keep the set of stuff we have to copy small,
+;;; we'll want to make the change-over as soon as possible.
+;;;
+;;;   Following a common Forth practice, we implement variables as words that
+;;; push the corresponding addresses onto the stack. Since the variables that
+;;; define the heap are ON the heap, we will definitely need two distinct
+;;; versions of these; having the two versions be almost-compatible allows us
+;;; to minimize code duplication. Since they do have slightly different
+;;; interfaces, we prefix the names of the early ones with "early_".
+
+; Stack in:
+;   heap address
+; Stack out:
+;   heap address
+;   requestd variable address
+defword early_heap, 0
+  dq DOCOL, DUP, LIT, control_stack_size, ADD, EXIT
+defword early_s0, 0
+  dq DOCOL, early_heap, LIT, 8, ADD, EXIT
+defword early_r0, 0
+  dq DOCOL, early_heap, LIT, 16, ADD, EXIT
+defword early_latest, 0
+  dq DOCOL, early_heap, LIT, 24, ADD, EXIT
+defword early_here, 0
+  dq DOCOL, early_heap, LIT, 32, ADD, EXIT
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;; Now.... what was our original goal, again? ;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;
+;;;   Finally, having fully bootstrapped our runtime environment, we move on
+;;; to the core stuff we actually want to accomplish. For this quine, that's
+;;; outputting itself.
+
 ;;;
 ;;;   One of the most charming naming traditions in Forth is that the
 ;;; top-level word that stays running forever, is called "quit".
@@ -2590,10 +2653,9 @@ defword LITPACK8, 0
 defword QUINE, 0
   dq DOCOL                       ; codeword
 
-  ; We still have HEAP on the stack. Use it to find HERE...
-  dq DUP, LIT, control_stack_size + 0x18, ADD
-  ; ... add a constant to HERE in-place, keeping a copy of the pointer ...
-  dq DUP, LIT, 0x78, SWAP, ADDSTORE
+  ; We still have HEAP on the stack, so we can call early_here. Add a constant
+  ; to HERE in-place, keeping a copy of the pointer ...
+  dq early_here, LIT, 0x78, SWAP, ADDSTORE
   ; ... and now we have allocated a block of memory, with its address on the
   ; stack. We also still have HEAP at the bottom of the stack, for future use.
 
@@ -2957,6 +3019,7 @@ defword syscall, 0
   dq DOCOL, LIT, 0x0F, PACK8, LIT, 0x05, PACK8, EXIT
 
 
+final_word_name = syscall_name
 code_size = $ - code_start
 file_size = $ - $$