summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--core.e17
-rw-r--r--dynamic.e2
-rw-r--r--flow-control.e14
-rw-r--r--linux-dynamic.e5
-rw-r--r--log-load.e32
-rw-r--r--quine.asm35
-rw-r--r--transform.e21
7 files changed, 59 insertions, 67 deletions
diff --git a/core.e b/core.e
index a636c07..62e2301 100644
--- a/core.e
+++ b/core.e
@@ -757,15 +757,14 @@ here !
 ~   Jonesforth also offers C@C! as another name for its CCOPY, but neither
 ~ "@!" nor "mem@mem!" seems particulaly nice.
 ~
-~ TODO these parameters feel backwards
-~ (destination, source, length --)
+~ (source, destination, length --)
 : memcopy
   [ here @
     ~   We need to save and restore rsi; the other registers we can trample.
     :rsi :rdx mov-reg64-reg64
     :rcx pop-reg64
-    :rsi pop-reg64
     :rdi pop-reg64
+    :rsi pop-reg64
     ~   We start from the low end, since that's easier arithmetic. So, we get
     ~ to leave the DF flag alone.
     rep-movs8
@@ -777,15 +776,14 @@ here !
 ~ careful about which end the transfer starts from. This "move" vs. "copy"
 ~ distinction mirrors C terminology.
 ~
-~ TODO these parameters feel backwards
-~ (destination, source, length --)
+~ (source, destination, length --)
 : memmove
   [ here @
     ~ We need to save and restore rsi; the other registers we can trample.
     :rsi :rdx mov-reg64-reg64
     :rcx pop-reg64
-    :rsi pop-reg64
     :rdi pop-reg64
+    :rsi pop-reg64
 
     ~ We need to check source < destination to decide which end to start from.
     :rsi :rax mov-reg64-reg64
@@ -1192,10 +1190,11 @@ here !
 : packstring
   dup stringlen 1 + dup
   ~ (output point, source, length, length)
-  4 roll dup 5 unroll
-  ~ (destination, source, length, length, output point)
+  4 roll dup 4 unroll
+
+  ~ (source, destination, length, length, output point)
   + 4 unroll
-  ~ (output point, destination, source, length)
+  ~ (output point, source, destination, length)
   memcopy ;
 
 ~ (output point, alignment byte count -- output point)
diff --git a/dynamic.e b/dynamic.e
index 39ddd3f..a260abe 100644
--- a/dynamic.e
+++ b/dynamic.e
@@ -344,7 +344,7 @@
   ~ do this before writing anything else in the entry header, to avoid
   ~ stepping on it. The name string always starts ten bytes into the header,
   ~ so we can use a fixed offset.
-  here @ 10 + 3unroll memmove
+  here @ 10 + swap memmove
   ~ (name field length)
 
   ~   Now we can get back to the fields that belong at the start of the entry
diff --git a/flow-control.e b/flow-control.e
index ba6a5b6..bee5038 100644
--- a/flow-control.e
+++ b/flow-control.e
@@ -25,8 +25,8 @@
 
 ~ (start pointer, length --)
 : if
-  2dup swap dup 5 8 * + 3unroll swap
-  ~ (start pointer, length, adjusted start pointer, start pointer, length)
+  2dup swap dup 5 8 * + 3roll
+  ~ (start pointer, length, start pointer, adjusted start pointer, length)
   memmove
   ~ (start pointer, length)
 
@@ -47,7 +47,7 @@
 
 ~ (start pointer, length --)
 : unless
-  2dup swap dup 5 8 * + 3unroll swap
+  2dup swap dup 5 8 * + 3roll
   ~ (start pointer, length, start pointer, adjusted start pointer, length)
   memmove
   ~ (start pointer, length)
@@ -71,9 +71,9 @@
   ~ Those spaces will take five words, and two words, respectively. So the
   ~ false-block gets moved by seven words, and the true-block gets moved by
   ~ five words.
-  2dup swap dup 7 8 * + swap 3roll memmove
-  4 roll dup 5 unroll 4 roll dup 5 unroll
-  swap dup 5 8 * + swap 3roll memmove
+  2dup swap dup 7 8 * + 3roll memmove
+  ~ (true start, true length, false start, false length)
+  3 pick dup 5 8 * + 4 pick memmove
   ~ (true start, true length, false start, false length)
 
   ~   Now we write out the initial test-and-branch.
@@ -110,7 +110,7 @@
 ~ (test start, test length, body start, body length --)
 : while
   ~   The conditional branch needs five words.
-  2dup swap dup 5 8 * + swap 3roll memmove
+  2dup swap dup 5 8 * + 3roll memmove
   here @ 5 unroll swap dup 3unroll here !
   ~ (old here, test start, test length, body start, body length)
   ' lit entry-to-execution-token , 0 ,
diff --git a/linux-dynamic.e b/linux-dynamic.e
index 4c73bb7..7890043 100644
--- a/linux-dynamic.e
+++ b/linux-dynamic.e
@@ -97,9 +97,6 @@
 ~   It's possible to set up an alternate stack for signal handlers. We don't,
 ~ though, so it's possible this code has bitrotted. At the very least, it
 ~ should be more configurable than this.
-~
-~   Note that for it to actually be used, there also needs to be a flag set
-~ at the time the action is bound.
 : prepare-signal-stack
   here @ 2048 packalign here !
   1024 1024 * 4 * allocate
@@ -270,7 +267,7 @@
   ~ (saved location, second half execution token)
 
   ~   Something subtle here: That above "codeword" was actually a word
-  ~ pointer. See, because we're pretending the wrapper is Forth word, even
+  ~ pointer. See, because we're pretending the wrapper is a Forth word, even
   ~ though we're writing it in assembly, so "returning" to it means invoking
   ~ the next word pointer in the word pointer array that is its compiled form.
   ~ Instead of creating a separate memory area though, we just put the pointer
diff --git a/log-load.e b/log-load.e
index 55fa25d..a462c2e 100644
--- a/log-load.e
+++ b/log-load.e
@@ -178,7 +178,7 @@
   dup stringlen 1 + dup 3unroll
   ~ (log address, name field length, string pointer, name field length)
 
-  3 pick log-load-here swap drop @ 10 + 3unroll memmove
+  3 pick log-load-here swap drop @ 10 + swap memmove
   ~ (log address, name field length)
 
   over log-load-here swap drop @
@@ -372,8 +372,8 @@
   ~ (log address, start, length, length, start, start)
   5 8 * +
   ~ (log address, start, length, length, start, adjusted start)
-  3unroll swap
-  ~ (log address, start, length, adjusted start, start, length)
+  3roll
+  ~ (log address, start, length, start, adjusted start, length)
   memmove
   ~ (log address, start, length)
   swap 3roll log-load-here dup @
@@ -410,8 +410,8 @@
   ~ (log address, start, length, length, start, start)
   5 8 * +
   ~ (log address, start, length, length, start, adjusted start)
-  3unroll swap
-  ~ (log address, start, length, adjusted start, start, length)
+  3roll
+  ~ (log address, start, length, start, adjusted start, length)
   memmove
   ~ (log address, start, length)
   swap 3roll log-load-here dup @
@@ -446,23 +446,17 @@
   5 unroll 2dup
   ~ (log address, true start, true length, false start, false length,
   ~  false start, false length)
-  swap dup 7 8 * + swap 3roll
+  swap dup 7 8 * + 3roll
   ~ (log address, true start, true length, false start, false length,
-  ~  adjusted false start, false start, false length)
+  ~  false start, adjusted false start, false length)
   memmove
   ~ (log address, true start, true length, false start, false length)
-  4 roll dup 5 unroll
-  ~ (log address, true start, true length, false start, false length,
-  ~  true start)
-  4 roll dup 5 unroll
-  ~ (log address, true start, true length, false start, false length,
-  ~  true start, true length)
-  swap dup 5 8 * +
+  3 pick dup 5 8 * +
   ~ (log address, true start, true length, false start, false length,
-  ~  true length, true start, adjusted true start)
-  swap 3roll
+  ~  true start, adjusted true start)
+  4 pick
   ~ (log address, true start, true length, false start, false length,
-  ~  adjusted true start, true start, true length)
+  ~  true start, adjusted true start, true length)
   memmove
   ~ (log address, true start, true length, false start, false length)
 
@@ -523,9 +517,9 @@
   5 unroll 2dup
   ~ (log address, test start, test length, body start, body length,
   ~  body start, body length)
-  swap dup 5 8 * + swap 3roll
+  swap dup 5 8 * + 3roll
   ~ (log address, test start, test length, body start, body length,
-  ~  adjusted body start, body start, body length)
+  ~  body start, adjusted body start, body length)
   memmove
 
   ~ (log address, test start, test length, body start, body length)
diff --git a/quine.asm b/quine.asm
index 7d5d9fa..b47eb5a 100644
--- a/quine.asm
+++ b/quine.asm
@@ -2453,8 +2453,8 @@ cold_start:
   dq early_here, fetch
   dq rsi, rdx, mov_reg64_reg64
   dq rcx, pop_reg64
-  dq rsi, pop_reg64
   dq rdi, pop_reg64
+  dq rsi, pop_reg64
   dq rep_movs8
   dq rdx, rsi, mov_reg64_reg64
   dq pack_next, lit, 8, packalign, early_here_store
@@ -2463,8 +2463,8 @@ cold_start:
   dq early_here, fetch
   dq rsi, rdx, mov_reg64_reg64
   dq rcx, pop_reg64
-  dq rsi, pop_reg64
   dq rdi, pop_reg64
+  dq rsi, pop_reg64
   dq rsi, rax, mov_reg64_reg64
   dq rdi, rax, cmp_reg64_reg64
   dq lit, 4, cc_below, jmp_cc_rel_imm8
@@ -3025,7 +3025,7 @@ cold_start:
   dq litstring, "roll", early_find, entry_to_execution_token, early_comma
   dq litstring, "dup", early_find, entry_to_execution_token, early_comma
   dq litstring, "lit", early_find, entry_to_execution_token, early_comma
-  dq lit, 5, early_comma
+  dq lit, 4, early_comma
   dq litstring, "unroll", early_find, entry_to_execution_token, early_comma
   dq litstring, "+", early_find, entry_to_execution_token, early_comma
   dq litstring, "lit", early_find, entry_to_execution_token, early_comma
@@ -6133,7 +6133,7 @@ cold_start:
   dq litstring, "lit", early_find, entry_to_execution_token, early_comma
   dq lit, 10, early_comma
   dq litstring, "+", early_find, entry_to_execution_token, early_comma
-  dq litstring, "3unroll", early_find, entry_to_execution_token, early_comma
+  dq litstring, "swap", early_find, entry_to_execution_token, early_comma
   dq litstring, "memmove", early_find, entry_to_execution_token, early_comma
   dq litstring, "here", early_find, entry_to_execution_token, early_comma
   dq litstring, "@", early_find, entry_to_execution_token, early_comma
@@ -8900,8 +8900,8 @@ defword memcopy, 0
   ; We need to save and restore rsi; the other registers we can trample.
   mov.qreg.qreg rdx, rsi
   pop.qreg rcx
-  pop.qreg rsi
   pop.qreg rdi
+  pop.qreg rsi
   ; We start from the low end, since that's easier arithmetic. So, we get to
   ; leave the DF flag alone.
   rep movsb
@@ -8919,8 +8919,8 @@ defword memmove, 0
   ; We need to save and restore rsi; the other registers we can trample.
   mov.qreg.qreg rdx, rsi
   pop.qreg rcx
-  pop.qreg rsi
   pop.qreg rdi
+  pop.qreg rsi
 
   ; We need to check source < destination to decide which end to start from.
   mov.qreg.qreg rax, rsi
@@ -9532,10 +9532,10 @@ defword packstring, 0
   dq docol
   dq dup, stringlen, lit, 1, add, dup
   ; base/destination, source, length, length
-  dq lit, 4, roll, dup, lit, 5, unroll
-  ; destination, source, length, length, base/destination
+  dq lit, 4, roll, dup, lit, 4, unroll
+  ; source, destination, length, length, base/destination
   dq add, lit, 4, unroll
-  ; new base, destination, source, length
+  ; new base, source, destination, length
   dq memcopy
   dq exit
 
@@ -11710,7 +11710,7 @@ defword self_raw, 0
   ; destination destination source length
   dq dup, lit, 4, roll, add, lit, 4, unroll
   ; result destination source length
-  dq memcopy
+  dq memcopy ; broken since memcopy args change
   dq exit
 
 
@@ -11829,8 +11829,7 @@ defword boot_source, 0x40
   dq ": } dup here @ swap - ; make-immediate                          "
 
   ; (start pointer, length)
-  dq ": if 2dup swap dup 5 8 * + 3unroll swap                         "
-  ; ~ TODO this is wrong, it goes adjusted start, start, length? probably?
+  dq ": if 2dup swap dup 5 8 * + 3roll                                "
   ; (start pointer, length, start pointer, adjusted start pointer, length)
   dq "  memmove                                                       "
   ; (start pointer, length)
@@ -11845,7 +11844,7 @@ defword boot_source, 0x40
   dq "  drop 5 8 * + here ! ; make-immediate                          "
 
   ; (start pointer, length)
-  dq ": unless 2dup swap dup 5 8 * + 3unroll swap                     "
+  dq ": unless 2dup swap dup 5 8 * + 3roll                            "
   ; (start pointer, length, start pointer, adjusted start pointer, length)
   dq "  memmove                                                       "
   ; (start pointer, length)
@@ -11869,9 +11868,13 @@ defword boot_source, 0x40
   ; Those spaces will take five words, and two words, respectively. So the
   ; false-block gets moved by seven words, and the true-block gets moved by
   ; five words.
-  dq "  2dup swap dup 7 8 * + swap 3roll memmove                      "
+  ;
+  ;   Note that this has diverged slightly from the self-hosted version,
+  ; because in this version we don't have "pick" yet.
+  dq "  2dup swap dup 7 8 * + 3roll memmove                           "
   dq "  4 roll dup 5 unroll 4 roll dup 5 unroll                       "
-  dq "  swap dup 5 8 * + swap 3roll memmove                           "
+  dq "  swap dup 5 8 * + 3roll memmove                                "
+
   ; (true start, true length, false start, false length)
   ;
   ;   Now we write out the initial test-and-branch.
@@ -11906,7 +11909,7 @@ defword boot_source, 0x40
   ; (test start, test length, body start, body length)
   dq ": while                                                         "
   ;   The conditional branch needs five words.
-  dq "  2dup swap dup 5 8 * + swap 3roll memmove                      "
+  dq "  2dup swap dup 5 8 * + 3roll memmove                           "
   dq "  here @ 5 unroll swap dup 3unroll here !                       "
   ; (old here, test start, test length, body start, body length)
   dq "  ' lit entry-to-execution-token , 0 ,                          "
diff --git a/transform.e b/transform.e
index 4004db9..1230883 100644
--- a/transform.e
+++ b/transform.e
@@ -220,7 +220,7 @@
 : push-input-buffer
   allocate-input-buffer-metadata
   ~ (original metadata pointer, new metadata pointer)
-  2dup swap 6 8 * memcopy
+  2dup 6 8 * memcopy
   ~ (original metadata pointer, new metadata pointer)
   swap dup zero-input-buffer-metadata
   input-buffer-next-source ! ;
@@ -237,7 +237,7 @@
 : pop-input-buffer
   dup input-buffer-next-source @
   ~ (original metadata pointer, next source metadata pointer)
-  dup { 6 8 * memcopy }
+  dup { swap 6 8 * memcopy }
       { drop zero-input-buffer-metadata } if-else ;
 
 
@@ -839,7 +839,7 @@ allocate-transform-state s" transform-state" variable
 ~ in dynamic.e before attempting to understand label-create-alternate.
 : label-create-alternate
   dup stringlen 1 + dup 3unroll
-  here @ 10 + 3unroll memmove
+  here @ 10 + swap memmove
   here @
 
   ~   This value of "latest" is going into the generated output, so we need
@@ -1066,8 +1066,8 @@ allocate-transform-state s" transform-state" variable
 ~
 ~ (start pointer, length --)
 : label-if-alternate
-  2dup swap dup 5 8 * + 3unroll swap
-  ~ (start pointer, length, start pointer, adjusted start pointer, length)
+  2dup swap dup 5 8 * + 3roll
+  ~ (start pointer, length, adjusted start pointer, start pointer, length)
   memmove
   ~ (start pointer, length)
   swap here @ swap here ! swap
@@ -1088,8 +1088,8 @@ allocate-transform-state s" transform-state" variable
 
 ~ (start pointer, length)
 : label-unless-alternate
-  2dup swap dup 5 8 * + 3unroll swap
-  ~ (start pointer, length, start pointer, adjusted start pointer, length)
+  2dup swap dup 5 8 * + 3roll
+  ~ (start pointer, length, adjusted start pointer, start pointer, length)
   memmove
   ~ (start pointer, length)
   swap here @ swap here ! swap
@@ -1116,9 +1116,8 @@ allocate-transform-state s" transform-state" variable
   ~ Those spaces will take five words, and two words, respectively. So the
   ~ false-block gets moved by seven words, and the true-block gets moved by
   ~ five words.
-  2dup swap dup 7 8 * + swap 3roll memmove
-  4 roll dup 5 unroll 4 roll dup 5 unroll
-  swap dup 5 8 * + swap 3roll memmove
+  2dup swap dup 7 8 * + 3roll memmove
+  3 pick dup 5 8 * + 4 pick memmove
   ~ (true start, true length, false start, false length)
 
   ~   Now we write out the initial test-and-branch.
@@ -1161,7 +1160,7 @@ allocate-transform-state s" transform-state" variable
 ~ (test start, test length, body start, body length --)
 : label-while-alternate
   ~ The conditional branch needs five words.
-  2dup swap dup 5 8 * + swap 3roll memmove
+  2dup swap dup 5 8 * + 3roll memmove
   here @ 5 unroll swap dup 3unroll here !
   ~ (old here, test start, test length, body start, body length)
   swap-transform-variables