From 3b00a5376036bb3f50a1a1f97d136305a2d03a57 Mon Sep 17 00:00:00 2001 From: Irene Knapp Date: Thu, 28 May 2026 19:34:07 -0700 Subject: change the parameter order for memcopy and memmove that was surprisingly involved, but it feels like the right thing to do Change-Id: Ia2f38c7278f4237cebd0435d27131fe32dbc3718 Force-Push: yes --- core.e | 17 ++++++++--------- dynamic.e | 2 +- flow-control.e | 14 +++++++------- linux-dynamic.e | 5 +---- log-load.e | 32 +++++++++++++------------------- quine.asm | 35 +++++++++++++++++++---------------- transform.e | 21 ++++++++++----------- 7 files changed, 59 insertions(+), 67 deletions(-) diff --git a/core.e b/core.e index a636c07..62e2301 100644 --- a/core.e +++ b/core.e @@ -757,15 +757,14 @@ here ! ~ Jonesforth also offers C@C! as another name for its CCOPY, but neither ~ "@!" nor "mem@mem!" seems particulaly nice. ~ -~ TODO these parameters feel backwards -~ (destination, source, length --) +~ (source, destination, length --) : memcopy [ here @ ~ We need to save and restore rsi; the other registers we can trample. :rsi :rdx mov-reg64-reg64 :rcx pop-reg64 - :rsi pop-reg64 :rdi pop-reg64 + :rsi pop-reg64 ~ We start from the low end, since that's easier arithmetic. So, we get ~ to leave the DF flag alone. rep-movs8 @@ -777,15 +776,14 @@ here ! ~ careful about which end the transfer starts from. This "move" vs. "copy" ~ distinction mirrors C terminology. ~ -~ TODO these parameters feel backwards -~ (destination, source, length --) +~ (source, destination, length --) : memmove [ here @ ~ We need to save and restore rsi; the other registers we can trample. :rsi :rdx mov-reg64-reg64 :rcx pop-reg64 - :rsi pop-reg64 :rdi pop-reg64 + :rsi pop-reg64 ~ We need to check source < destination to decide which end to start from. :rsi :rax mov-reg64-reg64 @@ -1192,10 +1190,11 @@ here ! : packstring dup stringlen 1 + dup ~ (output point, source, length, length) - 4 roll dup 5 unroll - ~ (destination, source, length, length, output point) + 4 roll dup 4 unroll + + ~ (source, destination, length, length, output point) + 4 unroll - ~ (output point, destination, source, length) + ~ (output point, source, destination, length) memcopy ; ~ (output point, alignment byte count -- output point) diff --git a/dynamic.e b/dynamic.e index 39ddd3f..a260abe 100644 --- a/dynamic.e +++ b/dynamic.e @@ -344,7 +344,7 @@ ~ do this before writing anything else in the entry header, to avoid ~ stepping on it. The name string always starts ten bytes into the header, ~ so we can use a fixed offset. - here @ 10 + 3unroll memmove + here @ 10 + swap memmove ~ (name field length) ~ Now we can get back to the fields that belong at the start of the entry diff --git a/flow-control.e b/flow-control.e index ba6a5b6..bee5038 100644 --- a/flow-control.e +++ b/flow-control.e @@ -25,8 +25,8 @@ ~ (start pointer, length --) : if - 2dup swap dup 5 8 * + 3unroll swap - ~ (start pointer, length, adjusted start pointer, start pointer, length) + 2dup swap dup 5 8 * + 3roll + ~ (start pointer, length, start pointer, adjusted start pointer, length) memmove ~ (start pointer, length) @@ -47,7 +47,7 @@ ~ (start pointer, length --) : unless - 2dup swap dup 5 8 * + 3unroll swap + 2dup swap dup 5 8 * + 3roll ~ (start pointer, length, start pointer, adjusted start pointer, length) memmove ~ (start pointer, length) @@ -71,9 +71,9 @@ ~ Those spaces will take five words, and two words, respectively. So the ~ false-block gets moved by seven words, and the true-block gets moved by ~ five words. - 2dup swap dup 7 8 * + swap 3roll memmove - 4 roll dup 5 unroll 4 roll dup 5 unroll - swap dup 5 8 * + swap 3roll memmove + 2dup swap dup 7 8 * + 3roll memmove + ~ (true start, true length, false start, false length) + 3 pick dup 5 8 * + 4 pick memmove ~ (true start, true length, false start, false length) ~ Now we write out the initial test-and-branch. @@ -110,7 +110,7 @@ ~ (test start, test length, body start, body length --) : while ~ The conditional branch needs five words. - 2dup swap dup 5 8 * + swap 3roll memmove + 2dup swap dup 5 8 * + 3roll memmove here @ 5 unroll swap dup 3unroll here ! ~ (old here, test start, test length, body start, body length) ' lit entry-to-execution-token , 0 , diff --git a/linux-dynamic.e b/linux-dynamic.e index 4c73bb7..7890043 100644 --- a/linux-dynamic.e +++ b/linux-dynamic.e @@ -97,9 +97,6 @@ ~ It's possible to set up an alternate stack for signal handlers. We don't, ~ though, so it's possible this code has bitrotted. At the very least, it ~ should be more configurable than this. -~ -~ Note that for it to actually be used, there also needs to be a flag set -~ at the time the action is bound. : prepare-signal-stack here @ 2048 packalign here ! 1024 1024 * 4 * allocate @@ -270,7 +267,7 @@ ~ (saved location, second half execution token) ~ Something subtle here: That above "codeword" was actually a word - ~ pointer. See, because we're pretending the wrapper is Forth word, even + ~ pointer. See, because we're pretending the wrapper is a Forth word, even ~ though we're writing it in assembly, so "returning" to it means invoking ~ the next word pointer in the word pointer array that is its compiled form. ~ Instead of creating a separate memory area though, we just put the pointer diff --git a/log-load.e b/log-load.e index 55fa25d..a462c2e 100644 --- a/log-load.e +++ b/log-load.e @@ -178,7 +178,7 @@ dup stringlen 1 + dup 3unroll ~ (log address, name field length, string pointer, name field length) - 3 pick log-load-here swap drop @ 10 + 3unroll memmove + 3 pick log-load-here swap drop @ 10 + swap memmove ~ (log address, name field length) over log-load-here swap drop @ @@ -372,8 +372,8 @@ ~ (log address, start, length, length, start, start) 5 8 * + ~ (log address, start, length, length, start, adjusted start) - 3unroll swap - ~ (log address, start, length, adjusted start, start, length) + 3roll + ~ (log address, start, length, start, adjusted start, length) memmove ~ (log address, start, length) swap 3roll log-load-here dup @ @@ -410,8 +410,8 @@ ~ (log address, start, length, length, start, start) 5 8 * + ~ (log address, start, length, length, start, adjusted start) - 3unroll swap - ~ (log address, start, length, adjusted start, start, length) + 3roll + ~ (log address, start, length, start, adjusted start, length) memmove ~ (log address, start, length) swap 3roll log-load-here dup @ @@ -446,23 +446,17 @@ 5 unroll 2dup ~ (log address, true start, true length, false start, false length, ~ false start, false length) - swap dup 7 8 * + swap 3roll + swap dup 7 8 * + 3roll ~ (log address, true start, true length, false start, false length, - ~ adjusted false start, false start, false length) + ~ false start, adjusted false start, false length) memmove ~ (log address, true start, true length, false start, false length) - 4 roll dup 5 unroll - ~ (log address, true start, true length, false start, false length, - ~ true start) - 4 roll dup 5 unroll - ~ (log address, true start, true length, false start, false length, - ~ true start, true length) - swap dup 5 8 * + + 3 pick dup 5 8 * + ~ (log address, true start, true length, false start, false length, - ~ true length, true start, adjusted true start) - swap 3roll + ~ true start, adjusted true start) + 4 pick ~ (log address, true start, true length, false start, false length, - ~ adjusted true start, true start, true length) + ~ true start, adjusted true start, true length) memmove ~ (log address, true start, true length, false start, false length) @@ -523,9 +517,9 @@ 5 unroll 2dup ~ (log address, test start, test length, body start, body length, ~ body start, body length) - swap dup 5 8 * + swap 3roll + swap dup 5 8 * + 3roll ~ (log address, test start, test length, body start, body length, - ~ adjusted body start, body start, body length) + ~ body start, adjusted body start, body length) memmove ~ (log address, test start, test length, body start, body length) diff --git a/quine.asm b/quine.asm index 7d5d9fa..b47eb5a 100644 --- a/quine.asm +++ b/quine.asm @@ -2453,8 +2453,8 @@ cold_start: dq early_here, fetch dq rsi, rdx, mov_reg64_reg64 dq rcx, pop_reg64 - dq rsi, pop_reg64 dq rdi, pop_reg64 + dq rsi, pop_reg64 dq rep_movs8 dq rdx, rsi, mov_reg64_reg64 dq pack_next, lit, 8, packalign, early_here_store @@ -2463,8 +2463,8 @@ cold_start: dq early_here, fetch dq rsi, rdx, mov_reg64_reg64 dq rcx, pop_reg64 - dq rsi, pop_reg64 dq rdi, pop_reg64 + dq rsi, pop_reg64 dq rsi, rax, mov_reg64_reg64 dq rdi, rax, cmp_reg64_reg64 dq lit, 4, cc_below, jmp_cc_rel_imm8 @@ -3025,7 +3025,7 @@ cold_start: dq litstring, "roll", early_find, entry_to_execution_token, early_comma dq litstring, "dup", early_find, entry_to_execution_token, early_comma dq litstring, "lit", early_find, entry_to_execution_token, early_comma - dq lit, 5, early_comma + dq lit, 4, early_comma dq litstring, "unroll", early_find, entry_to_execution_token, early_comma dq litstring, "+", early_find, entry_to_execution_token, early_comma dq litstring, "lit", early_find, entry_to_execution_token, early_comma @@ -6133,7 +6133,7 @@ cold_start: dq litstring, "lit", early_find, entry_to_execution_token, early_comma dq lit, 10, early_comma dq litstring, "+", early_find, entry_to_execution_token, early_comma - dq litstring, "3unroll", early_find, entry_to_execution_token, early_comma + dq litstring, "swap", early_find, entry_to_execution_token, early_comma dq litstring, "memmove", early_find, entry_to_execution_token, early_comma dq litstring, "here", early_find, entry_to_execution_token, early_comma dq litstring, "@", early_find, entry_to_execution_token, early_comma @@ -8900,8 +8900,8 @@ defword memcopy, 0 ; We need to save and restore rsi; the other registers we can trample. mov.qreg.qreg rdx, rsi pop.qreg rcx - pop.qreg rsi pop.qreg rdi + pop.qreg rsi ; We start from the low end, since that's easier arithmetic. So, we get to ; leave the DF flag alone. rep movsb @@ -8919,8 +8919,8 @@ defword memmove, 0 ; We need to save and restore rsi; the other registers we can trample. mov.qreg.qreg rdx, rsi pop.qreg rcx - pop.qreg rsi pop.qreg rdi + pop.qreg rsi ; We need to check source < destination to decide which end to start from. mov.qreg.qreg rax, rsi @@ -9532,10 +9532,10 @@ defword packstring, 0 dq docol dq dup, stringlen, lit, 1, add, dup ; base/destination, source, length, length - dq lit, 4, roll, dup, lit, 5, unroll - ; destination, source, length, length, base/destination + dq lit, 4, roll, dup, lit, 4, unroll + ; source, destination, length, length, base/destination dq add, lit, 4, unroll - ; new base, destination, source, length + ; new base, source, destination, length dq memcopy dq exit @@ -11710,7 +11710,7 @@ defword self_raw, 0 ; destination destination source length dq dup, lit, 4, roll, add, lit, 4, unroll ; result destination source length - dq memcopy + dq memcopy ; broken since memcopy args change dq exit @@ -11829,8 +11829,7 @@ defword boot_source, 0x40 dq ": } dup here @ swap - ; make-immediate " ; (start pointer, length) - dq ": if 2dup swap dup 5 8 * + 3unroll swap " - ; ~ TODO this is wrong, it goes adjusted start, start, length? probably? + dq ": if 2dup swap dup 5 8 * + 3roll " ; (start pointer, length, start pointer, adjusted start pointer, length) dq " memmove " ; (start pointer, length) @@ -11845,7 +11844,7 @@ defword boot_source, 0x40 dq " drop 5 8 * + here ! ; make-immediate " ; (start pointer, length) - dq ": unless 2dup swap dup 5 8 * + 3unroll swap " + dq ": unless 2dup swap dup 5 8 * + 3roll " ; (start pointer, length, start pointer, adjusted start pointer, length) dq " memmove " ; (start pointer, length) @@ -11869,9 +11868,13 @@ defword boot_source, 0x40 ; Those spaces will take five words, and two words, respectively. So the ; false-block gets moved by seven words, and the true-block gets moved by ; five words. - dq " 2dup swap dup 7 8 * + swap 3roll memmove " + ; + ; Note that this has diverged slightly from the self-hosted version, + ; because in this version we don't have "pick" yet. + dq " 2dup swap dup 7 8 * + 3roll memmove " dq " 4 roll dup 5 unroll 4 roll dup 5 unroll " - dq " swap dup 5 8 * + swap 3roll memmove " + dq " swap dup 5 8 * + 3roll memmove " + ; (true start, true length, false start, false length) ; ; Now we write out the initial test-and-branch. @@ -11906,7 +11909,7 @@ defword boot_source, 0x40 ; (test start, test length, body start, body length) dq ": while " ; The conditional branch needs five words. - dq " 2dup swap dup 5 8 * + swap 3roll memmove " + dq " 2dup swap dup 5 8 * + 3roll memmove " dq " here @ 5 unroll swap dup 3unroll here ! " ; (old here, test start, test length, body start, body length) dq " ' lit entry-to-execution-token , 0 , " diff --git a/transform.e b/transform.e index 4004db9..1230883 100644 --- a/transform.e +++ b/transform.e @@ -220,7 +220,7 @@ : push-input-buffer allocate-input-buffer-metadata ~ (original metadata pointer, new metadata pointer) - 2dup swap 6 8 * memcopy + 2dup 6 8 * memcopy ~ (original metadata pointer, new metadata pointer) swap dup zero-input-buffer-metadata input-buffer-next-source ! ; @@ -237,7 +237,7 @@ : pop-input-buffer dup input-buffer-next-source @ ~ (original metadata pointer, next source metadata pointer) - dup { 6 8 * memcopy } + dup { swap 6 8 * memcopy } { drop zero-input-buffer-metadata } if-else ; @@ -839,7 +839,7 @@ allocate-transform-state s" transform-state" variable ~ in dynamic.e before attempting to understand label-create-alternate. : label-create-alternate dup stringlen 1 + dup 3unroll - here @ 10 + 3unroll memmove + here @ 10 + swap memmove here @ ~ This value of "latest" is going into the generated output, so we need @@ -1066,8 +1066,8 @@ allocate-transform-state s" transform-state" variable ~ ~ (start pointer, length --) : label-if-alternate - 2dup swap dup 5 8 * + 3unroll swap - ~ (start pointer, length, start pointer, adjusted start pointer, length) + 2dup swap dup 5 8 * + 3roll + ~ (start pointer, length, adjusted start pointer, start pointer, length) memmove ~ (start pointer, length) swap here @ swap here ! swap @@ -1088,8 +1088,8 @@ allocate-transform-state s" transform-state" variable ~ (start pointer, length) : label-unless-alternate - 2dup swap dup 5 8 * + 3unroll swap - ~ (start pointer, length, start pointer, adjusted start pointer, length) + 2dup swap dup 5 8 * + 3roll + ~ (start pointer, length, adjusted start pointer, start pointer, length) memmove ~ (start pointer, length) swap here @ swap here ! swap @@ -1116,9 +1116,8 @@ allocate-transform-state s" transform-state" variable ~ Those spaces will take five words, and two words, respectively. So the ~ false-block gets moved by seven words, and the true-block gets moved by ~ five words. - 2dup swap dup 7 8 * + swap 3roll memmove - 4 roll dup 5 unroll 4 roll dup 5 unroll - swap dup 5 8 * + swap 3roll memmove + 2dup swap dup 7 8 * + 3roll memmove + 3 pick dup 5 8 * + 4 pick memmove ~ (true start, true length, false start, false length) ~ Now we write out the initial test-and-branch. @@ -1161,7 +1160,7 @@ allocate-transform-state s" transform-state" variable ~ (test start, test length, body start, body length --) : label-while-alternate ~ The conditional branch needs five words. - 2dup swap dup 5 8 * + swap 3roll memmove + 2dup swap dup 5 8 * + 3roll memmove here @ 5 unroll swap dup 3unroll here ! ~ (old here, test start, test length, body start, body length) swap-transform-variables -- cgit 1.4.1