diff options
| author | Irene Knapp <ireneista@irenes.space> | 2026-05-16 00:43:24 -0700 |
|---|---|---|
| committer | Irene Knapp <ireneista@irenes.space> | 2026-05-16 00:43:24 -0700 |
| commit | 9922f43d213bb2f710f90cb5165a7580ef0326cd (patch) | |
| tree | 8efe744025fb1673fa506293f385e96f12d7fb8d | |
| parent | 09ae826c2f99b7e21606e01f577b934c1d669bdc (diff) | |
it's much closer, most of the labels work correctly now
Force-Push: yes Change-Id: I45662e60c0035758a2cd57d971031eb0562eccb7
| -rw-r--r-- | evoke.e | 38 | ||||
| -rw-r--r-- | execution.e | 7 | ||||
| -rw-r--r-- | labels.e | 20 | ||||
| -rw-r--r-- | transform.e | 118 |
4 files changed, 130 insertions, 53 deletions
diff --git a/evoke.e b/evoke.e index 68645cb..86e79c8 100644 --- a/evoke.e +++ b/evoke.e @@ -2,9 +2,43 @@ ~ | ./quine > evoke && chmod 755 evoke && ./evoke 1024 read-to-buffer -: foo ;asm +~ Any word that ends with a normal semicolon will require this, so its +~ definition has to be near the start. +: exit + [ here @ + :rsi pack-popcontrol + here ! ] ;asm + +: lit + [ here @ + lods64 + :rax push-reg64 + here ! ] ;asm + +: sys-exit + [ here @ + 60 :rax mov-reg64-imm64 + :rdi pop-reg64 + syscall + hlt + here ! ] ;asm + +: happy-path 42 sys-exit ; + +~ : lods64 ; +~ : :rax ; +~ : push-reg64 ; +~ : mov-reg64-imm64 ; +~ : :rdi ; +~ : pop-reg64 ; +~ : syscall ; +~ : hlt ; +~ : here ; +~ : @ ; +~ : ! ; +~ : [ ; +~ : ] ; -: bar ;asm pyrzqxgl s" source-to-precompile" variable diff --git a/execution.e b/execution.e index 48761de..8410678 100644 --- a/execution.e +++ b/execution.e @@ -235,12 +235,12 @@ ~ * rbp points to the top of the control stack. ~ ~ (source register, base address -- new base address) -: pushcontrol +: pack-pushcontrol swap :rbp -8 :rbp lea-reg64-disp8-reg64 swap :rbp 0 mov-disp8-reg64-reg64 ; ~ (target register, base address -- new base address) -: popcontrol +: pack-popcontrol :rbp 0 3roll mov-reg64-disp8-reg64 :rbp 8 :rbp lea-reg64-disp8-reg64 ; @@ -480,6 +480,9 @@ 8 packalign current-offset L!' warm-start + ~ TODO this is tied to the specific example in evoke + over L@' happy-path swap - L@' origin + pack64 + ~ Before handing off to us, cold-start pushed a single value onto the ~ stack, a pointer to the beginning of the heap. Now, we load our entire ~ Forth implementation onto that heap, beginning with the minimal set of diff --git a/labels.e b/labels.e index c437003..4e315f7 100644 --- a/labels.e +++ b/labels.e @@ -106,9 +106,20 @@ labels oldest-entry-in { dup } { dup label-heading labels next-newer-entry-in } while drop ; +~ This looks up a label by name if it exists, or returns 0 if it doesn't. +~ The return value if it's found is an entry pointer. +~ +~ Keep in mind that this is a pointer to the entry in the label dictionary, +~ not the label's value. It's common for a label's value to itself be a +~ pointer to a dictionary entry, in the program being compiled, so it's easy +~ to confuse that. +~ +~ (name string pointer -- label entry pointer) +: find-label labels swap find-in ; + ~ This creates a new label given a name for it, initializing its value and -~ status to zero and adding it to the dictionary. This is responsible for the -~ initial guess of zero on the first pass. +~ status to zero and adding it to the label dictionary. This is responsible +~ for the initial guess of zero on the first pass. ~ ~ (name string pointer -- ) : new-label labels create-in 0 , 0 , ; @@ -122,9 +133,9 @@ ~ Either way, it returns an entry pointer. It's named after the function ~ "intern" that many Lisp dialects have. ~ -~ (name string pointer -- entry pointer) +~ (name string pointer -- label entry pointer) : intern-label - dup labels swap find-in + dup find-label dup { swap drop } { drop dup new-label labels swap find-in } if-else ; @@ -279,6 +290,7 @@ ~ (iteration count, execution token, output start, output point) { 3 pick 100 > } { 2 pick execute 4 roll 1+ 4 unroll + check-labels-converged { 4 roll drop 3 roll drop diff --git a/transform.e b/transform.e index d727d53..e913d42 100644 --- a/transform.e +++ b/transform.e @@ -18,9 +18,12 @@ ~ literals, are looked up via the label facility. ~ ~ Since the label facility is able to resolve forward references, there is -~ no hard requirement that words be topologically sorted, but forward -~ references should still be kept to a minimum, since that's a significant -~ difference from un-transformed code that could easily become confusing. +~ no hard requirement that everything in the file be topologically sorted. +~ However, the transform will refuse to create forward references to compiled +~ words. If you want them, you can create them by hand by calling use-label +~ yourself. This restriction is in place because allowing forward references +~ would be a significant difference from un-transformed code that could easily +~ become confusing, and because it simplifies the implementation a bit. ~ ~ Compilation words do make extensive reference to the global variables ~ "here" and "latest". In particular, flow-control words such as if-else @@ -40,15 +43,7 @@ ~ for not doing anything else that would rely on "here" and "latest" matching ~ their runtime addresses, though it is otherwise allowed to modify and rely ~ on them in all the usual ways. The alternate versions are defined in this -~ file as their own words, "Lcreate", "L:", "L;", and "L;asm". The alternates -~ rely on various labels, all of which must be defined elsewhere: -~ -~ * origin -~ * docol -~ * exit -~ * : -~ * ; -~ * ;asm +~ file as their own words, "Lcreate", "L:", "L;", and "L;asm". ~ ~ Note that these alternates are applied via a purely lexical ~ transformation: when a word would be looked up in the dictionary to @@ -60,6 +55,10 @@ ~ the rest of Evocation. There's no need to keep it separate like there is ~ with the other variables. This makes it easy to change modes. ~ +~ The transformation and the alternates rely on various labels, all of which +~ must be defined elsewhere, lest the label loop fail to converge: "lit", +~ "origin", "docol", "exit", ":", ";", and ";asm". +~ ~ All of these limitations result in the compiled code being, in effect, ~ written in a dialect which is like Evocation, but more restricted. This is ~ acceptable, because the label transform is intended for compiling code that @@ -180,6 +179,7 @@ allocate-transform-state s" transform-state" variable dup stringlen 1 + dup 3unroll here @ 10 + 3unroll memmove here @ + ~ This value of "latest" is going into the generated output, so call ~ transform-offset on it first. latest @ transform-offset pack64 @@ -188,6 +188,14 @@ allocate-transform-state s" transform-state" variable + 8 packalign here @ latest ! + + ~ Now we're immediately after the word header, which is where the codeword + ~ will be. This is the value the label should taken on, so we set it. + dup here @ 10 + + swap-transform-variables + intern-label set-label + swap-transform-variables + here ! ; @@ -282,23 +290,6 @@ allocate-transform-state s" transform-state" variable 3roll drop swap } unless ~ (name as stack string, immediate entry pointer, name pointer) - ~ For compile mode, we need to look the word up in the output buffer. We - ~ can't easily traverse the next-entry pointers in the output buffer's - ~ dictionary, so we use the label. - ~ - ~ Labels point to codewords (because that's what "Lcreate" does), so we - ~ have to convert it to get the entry pointer. Since we don't know the - ~ word's name statically, this is a rare scenario where we can't use the - ~ abbreviated label syntax, but that's easy enough. - ~ - ~ We do have to be careful of one thing: On the first run, the label may - ~ be zero! - swap-transform-variables - intern-label use-label - swap-transform-variables - dup { execution-token-to-entry } if - ~ (name as stack string, immediate entry pointer, compiled entry pointer) - ~ In regular "interpret", we would check whether we found the word before ~ checking the mode. However, we have three different places words could ~ come from, so that's not a simple notion. So, we check the mode first. @@ -309,30 +300,67 @@ allocate-transform-state s" transform-state" variable ~ override an immediate word with a non-immediate word of the same name. over dup { entry-flags@ 0x01 & not } if - ~ Either there was no immediate entry, or the immediate entry wasn't - ~ flagged as an immediate word. So we treat this as a compilation, which - ~ means we append a word to the heap. Specificaly, of course, we use the - ~ compiled entry to do that. - { swap drop dropstring-with-result - entry-to-execution-token , - 0 exit } if + { + ~ Either there was no immediate entry, or the immediate entry wasn't + ~ flagged as an immediate word. So we check whether this could be a + ~ compilation. + ~ + ~ To do this, we need to look the word up in the output buffer. We + ~ can't easily traverse the next-entry pointers in the output buffer's + ~ dictionary, so we check the label. Since we don't know the word's name + ~ statically, this is a rare scenario where we can't use the abbreviated + ~ label syntax, but that's easy enough. + ~ + ~ Even though we've ruled out the possibility that the word is only + ~ ever used immediately, it is still possible that there's some reason + ~ the word doesn't exist. In particular, it could be an integer literal. + ~ If we were to call use-label first, that would count as a requirement + ~ that the label must eventually be set. We don't want to require that + ~ quite yet, so we call find-label. + ~ + ~ This check is the means by which forward references are disallowed: + ~ On the very first pass, a forward-referenced label won't exist yet, so + ~ transform will give a "no such word" error, which in an ideal world + ~ would prevent there from being a subsequent pass, but at the very + ~ least it will ensure the output isn't a valid ELF. + dup + swap-transform-variables + find-label + swap-transform-variables + { + ~ It exists, so we declare our use of it (that's also the only way to + ~ get a value for it). + swap-transform-variables + intern-label use-label + swap-transform-variables + + ~ Labels point to codewords (because that's what "Lcreate" does), + ~ which is already what we want to output. + ~ + ~ An important caveat: Though it would require something weird to be + ~ happening, such as a forced forward reference, the label may be zero! + ~ We need to allow for that possibility by not examining the contents of + ~ a nonexistent entry. + ~ + ~ Fortunately we don't have to look at it, just append it to the heap + ~ and clean up. + drop , dropstring 0 exit + } if + } if } if + ~ (name as stack string, immediate entry pointer, name pointer) ~ If we got here, one of three things is true: We're in interpret mode; - ~ the word is immediate; or no word was found. Regardless, we don't need - ~ the compiled entry pointer anymore, so drop it. - drop - ~ (name as stack string, immediate entry pointer) - - ~ If the immediate entry pointer is non-zero, run it. - dup { - dropstring-with-result entry-to-execution-token execute + ~ the word is immediate; or no word was found. If the immediate entry + ~ pointer is non-zero, run it. + over { + drop dropstring-with-result entry-to-execution-token execute 0 exit } if ~ If we're still here, it wasn't in the dictionary. Also, we don't need ~ the immediate entry pointer, either. - drop + drop drop ~ (name as stack string) ~ If it's not in the dictionary, check whether it's an integer literal. As |