From 272c9cf16bbca81ff0e15840c4dc8fd0bbce3ad8 Mon Sep 17 00:00:00 2001 From: Irene Knapp Date: Wed, 20 May 2026 21:35:05 -0700 Subject: the entirety of core.e now log-loads wow!!!!!! wow making this work required implementing comma and keyword in the log-load transform Force-Push: yes Change-Id: If888d89c23389720840b49b72478e4826a15a269 --- amd64.e | 2 +- core.e | 41 ++++++++++++++++++++++++++++++++--------- evoke.e | 52 +++++----------------------------------------------- labels.e | 2 +- output.e | 12 ++++++------ transform.e | 52 +++++++++++++++++++++++++++++++++++++++++++++++++++- 6 files changed, 96 insertions(+), 65 deletions(-) diff --git a/amd64.e b/amd64.e index e098505..4d3c734 100644 --- a/amd64.e +++ b/amd64.e @@ -821,7 +821,7 @@ s" :cc-greater" keyword reg64 swap addressing-reg64 ; ~ (output point, source value, target register -- output point) -: and-reg68-imm8 +: and-reg64-imm8 3roll rex-w 0x83 pack8 swap 4 swap addressing-reg64 swap pack8 ; diff --git a/core.e b/core.e index 1094e45..1bc73e1 100644 --- a/core.e +++ b/core.e @@ -65,6 +65,9 @@ ~ with the label transform, which can do forward references, but the log-load ~ transform's label support is special-cased to ONLY do this, and it will only ~ work with a backward reference. +~ +~ We begin by outputting the actual docol routine, the one that codewords +~ should point to. Note that this is before we've done any word header. here @ dup L!' docol-codeword-value :rsi pack-pushcontrol @@ -74,12 +77,18 @@ pack-next 8 packalign here ! -: docol - [ here @ - ~ Evaluated as a word, docol is a constant which returns a pointer. - L@' docol-codeword-value :rax mov-reg64-imm64 - :rax push-reg64 - here ! ] ;asm +~ We can't use colon to create docol, not even the part that's a word, +~ because colon tries to dynamically invoke docol to fill in the codeword. +s" docol" create +here @ +dup 8 + pack64 +L@' docol-codeword-value :rax mov-reg64-imm64 +:rax push-reg64 +pack-next +8 packalign +here ! +~ Now that we have docol, colon will work and we can define words the normal +~ way. ~ This is the mechanism to "return" from a word interpreted by docol. ~ We pop the control stack, and then, since this is threaded execution, we @@ -89,6 +98,11 @@ here ! :rsi pack-popcontrol here ! ] ;asm +~ Now that we have exit, semicolon will also work and we can define +~ non-assembly words. However, it will be hard to make them do much until we +~ have lit as well. There's no special hurry; most of the words in core.e are +~ assembly words. We'll get to it further down. + ~ Stack manipulation routines ~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -581,7 +595,7 @@ here ! ~ storage operations heavily leverage the fact they have an object system ~ with type tags and so on; we want to stay close to the bytes. -~ Address on the top of the stack, value in the second position +~ (value, address --) : ! [ here @ :rbx pop-reg64 @@ -589,6 +603,7 @@ here ! :rax :rbx mov-indirect-reg64-reg64 here ! ] ;asm +~ (address -- value) : @ [ here @ :rax pop-reg64 @@ -596,8 +611,8 @@ here ! :rax push-reg64 here ! ] ;asm -~ I might have put the parameters the other way round, but this is what -~ Jonesforth does and it seems reasonable enough. +~ This follows the general convention that words that set things take the +~ destination as the final parameter. ~ ~ (value, address --) : +! @@ -676,10 +691,14 @@ here ! ~ register by that name, it's a Forth-provided abstraction. That's super ~ confusing, plus as discussed above we call it the control stack not the ~ return stack, so we call the words... +~ +~ (stack pointer --) : control! [ here @ :rbp pop-reg64 here ! ] ;asm + +~ (-- stack pointer) : control@ [ here @ :rbp push-reg64 @@ -687,6 +706,8 @@ here ! ~ Jonesforth calls this DSP!, for data stack pointer. Again, there's no ~ Intel register by that name, and we call it the value stack, so... +~ +~ (stack pointer --) : value! [ here @ ~ Per Intel's description of POP this reads from the old location, and @@ -695,6 +716,8 @@ here ! ~ order. :rsp pop-reg64 here ! ] ;asm + +~ (-- stack pointer) : value@ [ here @ ~ Per Intel's description of PUSH this pushes the old value. diff --git a/evoke.e b/evoke.e index 1d545b1..fa9ccb0 100644 --- a/evoke.e +++ b/evoke.e @@ -1,57 +1,15 @@ ~ (cat labels.e elf.e transform.e execution.e \ -~ echo 131072 read-to-buffer; \ +~ echo 262144 read-to-buffer; \ ~ cat core.e core-plus.e linux.e output.e \ ~ amd64.e execution-support.e log-load.e; \ -~ echo pyrzqxgl; \ +~ echo pyrzqxgl 262144 read-to-buffer; \ +~ cat core.e; \ +~ echo 0 sys-exit pyrzqxgl; \ ~ cat evoke.e) \ ~ | ./quine > evoke && chmod 755 evoke && ./evoke -s" source-to-precompile" variable - -1024 read-to-buffer -~ We begin by outputting the actual docol routine, the one that codewords -~ should point to. Note that this is before we've done any word header. -here @ -dup L!' docol-codeword-value -:rsi pack-pushcontrol -8 :rax add-reg64-imm8 -:rax :rsi mov-reg64-reg64 -pack-next -8 packalign -here ! - -~ We can't use colon to create docol, not even the part that's a word, -~ because colon tries to dynamically invoke docol to fill in the codeword. -s" docol" create -here @ -dup 8 + pack64 -L@' docol-codeword-value :rax mov-reg64-imm64 -:rax push-reg64 -pack-next -8 packalign -here ! - -~ Now that we have docol, colon will work and we can define exit the normal -~ way. -: exit - [ here @ - :rsi pack-popcontrol - here ! ] ;asm - -~ Now that we have exit, semicolon will also work and we can define -~ non-assembly words. However, it will be hard to make them do much until we -~ have lit as well. -: lit - [ here @ - lods64 - :rax push-reg64 - here ! ] ;asm - -42 sys-exit - -pyrzqxgl -~ 0 sys-exit s" source-to-copy-to-log" variable +s" source-to-precompile" variable ~ (output memory start, current output point ~ -- output memory start, current output point) diff --git a/labels.e b/labels.e index 27752aa..dcb3914 100644 --- a/labels.e +++ b/labels.e @@ -288,7 +288,7 @@ 0 swap ~ TODO every time you double this to fix a crash, you must publicly ~ apologize for deferring a real fix. those are the rules - 0x10000 allocate dup + 0x40000 allocate dup ~ (iteration count, execution token, output start, output point) { 3 pick 100 > } { 2 pick execute 4 roll 1+ 4 unroll diff --git a/output.e b/output.e index 29789b6..fe777b8 100644 --- a/output.e +++ b/output.e @@ -191,27 +191,27 @@ ~ Debugging tools ~ ~~~~~~~~~~~~~~~ -~ TODO this is a horrible, horrible hack -: s0 0x1000010008 ; +~ ~ TODO this is a horrible, horrible hack +: s0-kludge 0x1000010008 ; ~ TODO replace these with the implementations that use proper flow-control : stack - s0 @ 8 - + s0-kludge @ 8 - dup value@ 8 + != 0branch [ 19 8 * , ] - dup s0 @ 8 - != 0branch [ 2 8 * , ] space dup @ . 8 - + dup s0-kludge @ 8 - != 0branch [ 2 8 * , ] space dup @ . 8 - branch [ -25 8 * , ] drop newline ; : stackhex - s0 @ 8 - + s0-kludge @ 8 - dup value@ 8 + != 0branch [ 19 8 * , ] - dup s0 @ 8 - != 0branch [ 2 8 * , ] space dup @ .hex64 8 - + dup s0-kludge @ 8 - != 0branch [ 2 8 * , ] space dup @ .hex64 8 - branch [ -25 8 * , ] drop newline ; diff --git a/transform.e b/transform.e index 507c1f1..2d75f9f 100644 --- a/transform.e +++ b/transform.e @@ -673,7 +673,7 @@ allocate-transform-state s" transform-state" variable dup s" inc-reg64" stringcmp 0 = { drop -1 exit } if dup s" dec-reg64" stringcmp 0 = { drop -1 exit } if dup s" and-reg64-reg64" stringcmp 0 = { drop -2 exit } if - dup s" and-reg68-imm8" stringcmp 0 = { drop -2 exit } if + dup s" and-reg64-imm8" stringcmp 0 = { drop -2 exit } if dup s" or-reg64-reg64" stringcmp 0 = { drop -2 exit } if dup s" or-reg64-imm8" stringcmp 0 = { drop -2 exit } if dup s" xor-reg64-reg64" stringcmp 0 = { drop -2 exit } if @@ -702,6 +702,7 @@ allocate-transform-state s" transform-state" variable dup s" ;asm" stringcmp 0 = { drop 0 exit } if dup s" L@'" stringcmp 0 = { drop 1 exit } if dup s" L!'" stringcmp 0 = { drop -1 exit } if + dup s" ," stringcmp 0 = { drop -1 exit } if ~ If we get here, that's a problem. Emit an error message to make sure ~ it's easy to diagnose. We also return a comically large negative value, @@ -1677,11 +1678,57 @@ allocate-transform-state s" transform-state" variable ~ We need this one, too. It's not even an immediate word normally! : log-load-right-square-brace-alternate ] ; make-immediate + +: log-load-comma-alternate + log-load-roll-log-address + + swap-transform-variables + L@' log-load-comma + L@' swap + swap-transform-variables + + offset-to-target-address-space , ~ swap + offset-to-target-address-space , ~ log-load-comma + + ~ We consumed the value, so we apply a delta. + -1 transform-apply-stack-delta + + log-load-roll-log-address + ; make-immediate + + +~ TODO there should really be an actual word that this alternate is replacing +~ +~ (string pointer --) +: log-load-keyword-alternate + word value@ + + ~ Calling log-load-create-alternate would result in some redundant rolling + ~ and unrolling, so we do it together like this instead. + log-load-roll-log-address + + swap-transform-variables + L@' log-load-keyword + L@' litstring + swap-transform-variables + + ~ The overall stack delta of this sequence is 0. + offset-to-target-address-space , ~ litstring + swap here @ swap packstring 8 packalign here ! + offset-to-target-address-space , ~ log-load-keyword + + log-load-unroll-log-address + + dropstring + ; make-immediate + + ~ Yeah comments have to go. : log-load-tilde-alternate ' ~ entry-to-execution-token execute ; make-immediate + ~ Strings are important and must happen now, now, now. : log-load-string-alternate ~ Something subtle here: s" is state-dependent. That is, it does different @@ -1815,6 +1862,9 @@ allocate-transform-state s" transform-state" variable swap drop ' log-load-left-square-brace-alternate swap } if dup s" ]" stringcmp 0 = { swap drop ' log-load-right-square-brace-alternate swap } if + dup s" ," stringcmp 0 = { swap drop ' log-load-comma-alternate swap } if + dup s" keyword" stringcmp 0 = { + swap drop ' log-load-keyword-alternate swap } if dup s" ~" stringcmp 0 = { swap drop ' log-load-tilde-alternate swap } if ~ It is nontrivial to construct a string with a double-quote in it. -- cgit 1.4.1