summary refs log tree commit diff
diff options
context:
space:
mode:
authorIrene Knapp <ireneista@irenes.space>2026-04-25 23:02:47 -0700
committerIrene Knapp <ireneista@irenes.space>2026-04-25 23:07:08 -0700
commit51ffe53f3f11fdb94ee5e360029ab4181ff4cf10 (patch)
tree6e221a18dcf03c1a888154a59f07064e69af5e4f
parentfb64087b6097038430e1f26816b4b6d5a8f4dd87 (diff)
implement a whole bunch of Forth words, most notably "if"
we decided to do flow control in a fun suffix-y way. so we introduced words { and } which track code blocks while compiling them, and the new word "if" uses memmove to slide that block around to insert a branch before it.

memmove is also new, implemented in flatassembler

also "if" relies on "'" which itself wants to be able to quote the word "lit", so the flatassembler word "literal" is introduced to break the cycle

Force-Push: yes
Change-Id: Iff17a15a1e748fe536e73ad05b1d4b06725a9f78
-rw-r--r--quine.asm160
1 files changed, 158 insertions, 2 deletions
diff --git a/quine.asm b/quine.asm
index 55af67d..9ffb10c 100644
--- a/quine.asm
+++ b/quine.asm
@@ -7196,6 +7196,23 @@ cold_start:
   dq swap, litstring, ";", early_find, roll3, execute
 
 
+  ; Although we will eventually define the word "'" to give us the symbol of a
+  ; word, it will rely on being able to compile a literal. Rather than do
+  ; lots of string processing later, we choose to define this word now to
+  ; avoid having to look up the word "lit" as part of that.
+  ;
+  ; In:
+  ;   value
+  dq litstring, "literal", early_create, early_docol_codeword
+  dq litstring, "lit", early_find, entry_to_execution_token, early_comma
+  dq litstring, "lit", early_find, entry_to_execution_token, early_comma
+  dq litstring, ",", early_find, entry_to_execution_token, early_comma
+  dq litstring, ",", early_find, entry_to_execution_token, early_comma
+  dq litstring, "exit", early_find, entry_to_execution_token, early_comma
+  dq early_here, fetch, lit, 8, packalign, early_here_store
+
+
+  ; Now the single most important word...
   dq litstring, "interpret", early_create, early_docol_codeword
   dq litstring, "word", early_find, entry_to_execution_token, early_comma
 
@@ -8224,7 +8241,7 @@ defword fetch_value_stack, 0
 ;   Also, the "c" was meant to indicate that it works at one-byte granularity,
 ; but that isn't, uh... actually an important property here, and as a blanket
 ; call we're not using letters to denote data sizes. So we call it "memcopy".
-; Apologies to C programmers but vowels are good, actually.
+; Apologies to the C programming tradition but vowels are good, actually.
 ;
 ;   Jonesforth also offers C@C! as another name for its CCOPY, but neither
 ; "@!" nor "mem@mem!" seems particulaly nice.
@@ -8247,6 +8264,46 @@ defword memcopy, 0
   mov.qreg.qreg rsi, rdx
   next
 
+; (written much later)
+;
+; In:
+;   destination
+;   source
+;   length
+defword memmove, 0
+  dq $ + 8
+  ; We need to save and restore rsi; the other registers we can trample.
+  mov.qreg.qreg rdx, rsi
+  pop.qreg rcx
+  pop.qreg rsi
+  pop.qreg rdi
+
+  ; We need to check source < destination to decide which end to start from.
+  mov.qreg.qreg rax, rsi
+  cmp.qreg.qreg rax, rdi
+  ; Relative offsets are from the start of the instruction after the jmp.
+  jmp.cc.rel.bimm below, 4
+
+  ; If source is greater, we are sliding downwards so we start from the low
+  ; end. So, we get to leave the DF flag alone.
+  rep movsb                         ; 2 bytes
+  jmp.rel.bimm 16                   ; 2 bytes
+
+  ; If destination is greater, we are sliding upwards so we start from the
+  ; high end. So, we have to save and restore DF. Also, we have to adjust the
+  ; pointers.
+  add.qreg.qreg rsi, rcx            ; 3 bytes
+  dec.qreg rsi                      ; 3 bytes
+  add.qreg.qreg rdi, rcx            ; 3 bytes
+  dec.qreg rdi                      ; 3 bytes
+  std                               ; 1 byte
+  rep movsb                         ; 2 bytes
+  cld                               ; 1 byte
+
+  mov.qreg.qreg rsi, rdx
+  next
+
+
 ; Stack in:
 ;   string address
 ; Stack out:
@@ -10997,8 +11054,107 @@ defword boot_source, 0x40
   ; multiple of eight bytes long; any accidental null-padding that
   ; flatassembler inserts will be treated as a string terminator by
   ; attach-string-to-input-buffer.
-  dq "0 sys-exit                                                      "
+
+  ;   In general, we're going to want to be able to go on little excursions
+  ; where we define utility words that are only useful for one task, then
+  ; deallocate that stuff after we're done with it. We implement "forget",
+  ; which removes both dictionary entries and heap allocations for the entry
+  ; pointer it's given and everything that came after.
+  ;
+  ;   The implementation strategy is the same as Jonesforth's version, but
+  ; Jonesforth runs in immediate mode and reads a word to operate on, whereas
+  ; ours takes an entry pointer and runs in either compiled or immediate
+  ; modes.
+  ;
+  ; In:
+  ;   entry pointer
+  dq ": forget dup @ latest ! here ! ;                                "
+
+  ;   We'll be defining a lot of immediate words, so we should set up a terse
+  ; way to do that.
+  dq ": make-immediate latest @ set-word-immediate ;                  "
+  dq ": make-hidden latest @ hide-entry ;                             "
+
+  ;   The word "'" quotes the following word, looking it up and treating it as
+  ; a constant. In immediate mode, the constant winds up on the stack; in
+  ; compile mode it gets compiled.
+  ;
+  ;   There are a few possible implementation strategies here. Running as an
+  ; immediate word means there's a clear and unambiguous concept of "the
+  ; following word", so that's what we do; otherwise we'd have to get clever
+  ; about somehow finding out where we were called from. That means we take on
+  ; what would otherwise be the interpreter's responsibility, of checking what
+  ; mode we're in. Happily, that's easy to do.
+  ;
+  ;   Though it might be nice to have high-level flow control for this, our
+  ; implementation of "if" below relies on "'" several times, whereas "'" only
+  ; branches once. So we bootstrap "'" first.
+  dq ": ' word value@ find dropstring-with-result                     "
+  dq "  interpreter-flags @ 1 and 0branch [ 2 8 * , ] literal         "
+  dq "  ; make-immediate                                              "
+
+  ;   Sooner or later we'll want to define recursive words; this one lets us
+  ; do that. It compiles into a call to the word that's currently being
+  ; defined (strictly speaking, the one whose definition was most recently
+  ; begun).
+  dq ": recurse latest @ entry-to-execution-token , ; make-immediate  "
+
+  ;   We use a novel suffix-based approach to flow control. We define words
+  ; { and } which describe the boundaries of blocks of code, leaving a
+  ; description on the value stack, while still compiling the contents
+  ; normally.
+  ;
+  ;   Then follow-up words such as "if" can use that information to slide
+  ; the blocks around and insert any needed branches and other logic.
+  ;
+  ;   After compiling a { ... } block, the stack is (start pointer, length).
+  dq ": { here @ ; make-immediate                                     "
+  dq ": } dup here @ swap - ; make-immediate                          "
+
+  ; (start pointer, length)
+  dq ": if dup2 swap dup 5 8 * + 3unroll swap                         "
+  ; (start pointer, length, start pointer, adjusted start pointer, length)
+  dq "  memmove                                                       "
+  ; (start pointer, length)
+  dq "  swap here @ swap here ! swap                                  "
+  ; (old here, length)
+  dq "  ' lit entry-to-execution-token , 0 ,                          "
+  dq "  ' = entry-to-execution-token ,                                "
+  ;   The branch length needs to be one word longer than the block length,
+  ; because the length field itself is part of the scope of the branch.
+  dq "  ' 0branch entry-to-execution-token , dup 8 + ,                "
+  ; (old here, length)
+  dq "  drop 5 8 * + here ! ; make-immediate                          "
+
+  dq ": foo 5 6 < { 42 . } if ; foo                                   "
+
   dq "                                                                "
+
+  ; TODO define if/then/else
+  ; TODO define begin/until/again and repeat, or something like them
+  ; TODO consider defining case / endcase
+  ; TODO define ( ... ) comments
+  ; TODO define negate, true, false, not
+  ; TODO define constant (double-check variable)
+  ; TODO consider defining "value" and "to"
+  ; TODO consider defining "id"
+  ; TODO consider defining is-hidden and is-immediate
+  ; TODO consider defining 'word and "words"
+  ; TODO define ?
+  ; TODO define allot and cells
+  ; TODO consider what text stuff to define
+  ; TODO figure out a comment syntax
+  ; TODO fix the "describe" words
+  ; TODO stack trace
+  ; TODO make an interactive debugger
+  ; TODO argc argv envp etc
+  ; TODO consider "bye" and "unused"
+  ; TODO consider file API
+  ; TODO consider ";asm" or something
+  ; TODO consider a welcome message
+
+  ; If we get to this point, clean up and leave.
+  dq "0 sys-exit                                                      "
   dq 0