diff options
| -rw-r--r-- | README.txt | 8 | ||||
| -rw-r--r-- | amd64.e | 16 | ||||
| -rw-r--r-- | dynamic.e | 30 | ||||
| -rw-r--r-- | hex.e | 2 | ||||
| -rw-r--r-- | linux-dynamic.e | 2 | ||||
| -rw-r--r-- | premortem.txt | 107 | ||||
| -rw-r--r-- | quine.asm | 15 | ||||
| -rw-r--r-- | transform.e | 621 |
8 files changed, 779 insertions, 22 deletions
diff --git a/README.txt b/README.txt index 229ef70..bf11f9c 100644 --- a/README.txt +++ b/README.txt @@ -86,3 +86,11 @@ program. TODO output.e interpret.e dynamic.e flow-control.e execution-support.e + If you want examples of programs that are smaller than Evocation itself, +quine.e is a tiny program written in proper Evocation that outputs its own +source code; hello.e is a hello-world written in Evocation-assembly, and hex.e +is another small Evocation-assembly program that might make a good example of +how to do slightly more complex things that way. All three of these are +self-contained, consisting of just that one file plus calls to Evocation's +built-in library. + diff --git a/amd64.e b/amd64.e index 31354e2..95cdb2a 100644 --- a/amd64.e +++ b/amd64.e @@ -168,13 +168,13 @@ s" :cc-greater" keyword ~ (register -- 3-bit encoded value for register) : reg32 dup :eax = { drop 0 exit } if - dup :ecx = { drop 0 exit } if - dup :edx = { drop 0 exit } if - dup :ebx = { drop 0 exit } if - dup :esp = { drop 0 exit } if - dup :ebp = { drop 0 exit } if - dup :esi = { drop 0 exit } if - dup :edi = { drop 0 exit } if + dup :ecx = { drop 1 exit } if + dup :edx = { drop 2 exit } if + dup :ebx = { drop 3 exit } if + dup :esp = { drop 4 exit } if + dup :ebp = { drop 5 exit } if + dup :esi = { drop 6 exit } if + dup :edi = { drop 7 exit } if ." Parameter to reg32 is not a reg32." 1 sys-exit ; ~ (register -- 3-bit encoded value for register) @@ -213,7 +213,7 @@ s" :cc-greater" keyword : scalefield dup 1 = { drop 0 exit } if dup 2 = { drop 1 exit } if - dup 5 = { drop 2 exit } if + dup 4 = { drop 2 exit } if dup 8 = { drop 3 exit } if ." Parameter to scalefield is not 1, 2, 4, or 8." 1 sys-exit ; diff --git a/dynamic.e b/dynamic.e index 79944b2..a02eec0 100644 --- a/dynamic.e +++ b/dynamic.e @@ -135,9 +135,9 @@ stringlen 1+ 54 swap - 0 max indent dup .hex64 dup entry-flags@ dup { space - dup 128 & { s" H" emitstring } if - dup 64 & { s" M" emitstring } if - dup 1 & { s" I" emitstring } if + dup 0x80 & { s" H" emitstring } if + dup 0x40 & { s" M" emitstring } if + dup 0x01 & { s" I" emitstring } if } if drop dup is-assembly-word { s" asm" emitstring } { dup is-docol-interpreted-word { s" raw" emitstring } unless @@ -150,14 +150,33 @@ { dup word-heading next-newer-entry } while drop ; +~ (byte -- boolean) +: is-printable + dup 0x20 <= swap 0x7F > && ; + + ~ (content end, content start, label start --) : hexdump-row 2 indent dup .hex32 dup 4 unroll - 0 { dup 16 > } - { dup 7 & 0 = { space } if space + ~ (label start, content end, content start, label start) + 0 { dup 16 > } { + ~ (label start, content end, content start, label start, offset within row) + dup 7 & 0 = { space } if space 2dup + dup 4 pick <= swap 5 pick > && { 2dup + 8@ .hex8 } { space space } if-else 1+ } while + drop + 2 indent s" |" emitstring + 0 { dup 16 > } { + ~ (label start, content end, content start, label start, offset within row) + 2dup + dup 4 pick <= swap 5 pick > && + { 2dup + 8@ + dup is-printable + { value@ emitstring } { s" ." emitstring } if-else + drop } + { space } if-else + 1+ } while + s" |" emitstring newline 5 ndrop ; @@ -296,6 +315,7 @@ : make-immediate latest @ dup entry-flags@ 0x01 | swap entry-flags! ; : make-hidden latest @ dup entry-flags@ 0x80 | swap entry-flags! ; : make-visible latest @ dup entry-flags@ 0x80 invert & swap entry-flags! ; +: make-metadata latest @ dup entry-flags@ 0x40 | swap entry-flags! ; ~ Sooner or later we'll want to define recursive words; this one lets us ~ do that. It compiles into a call to the word that's currently being diff --git a/hex.e b/hex.e index 51e7703..963b82a 100644 --- a/hex.e +++ b/hex.e @@ -228,8 +228,8 @@ elf-file-header elf-program-header-writable output-start-routine - output-read-byte output-exit + output-read-byte output-decode-nibble output-error-handlers output-messages diff --git a/linux-dynamic.e b/linux-dynamic.e index 7890043..ee34e0e 100644 --- a/linux-dynamic.e +++ b/linux-dynamic.e @@ -340,7 +340,7 @@ sys-sigaction drop ; -: handle-crash list-callers 1 sys-exit ; +: handle-crash ." CRASH" newline list-callers 1 sys-exit ; : install-crash-handler ' handle-crash entry-to-execution-token wrap-signal-handler diff --git a/premortem.txt b/premortem.txt new file mode 100644 index 0000000..d3809b1 --- /dev/null +++ b/premortem.txt @@ -0,0 +1,107 @@ + Programming Language Checklist +by Colin McMillen, Jason Reed, and Elly Fong-Jones, 2011-10-10. +filled out for Evocation by Irenes, 2026-06-05. + +You appear to be advocating a new: +[ ] functional [X] imperative [ ] object-oriented [ ] procedural [X] stack-based +[ ] "multi-paradigm" [ ] lazy [X] eager [ ] statically-typed [ ] dynamically-typed +[ ] pure [X] impure [X] non-hygienic [ ] visual [ ] beginner-friendly +[X] non-programmer-friendly [X] completely incomprehensible +programming language. Your language will not work. Here is why it will not work. + +You appear to believe that: +[X] Syntax is what makes programming difficult +[ ] Garbage collection is free [ ] Computers have infinite memory +[X] Nobody really needs: + [X] concurrency [ ] a REPL [X] debugger support [X] IDE support [X] I/O + [ ] to interact with code not written in your language +[ ] The entire world speaks 7-bit ASCII +[X] Scaling up to large software projects will be easy +[X] Convincing programmers to adopt a new language will be easy +[ ] Convincing programmers to adopt a language-specific IDE will be easy +[ ] Programmers love writing lots of boilerplate +[X] Specifying behaviors as "undefined" means that programmers won't rely on them +[X] "Spooky action at a distance" makes programming more fun + +Unfortunately, your language (has/lacks): +[X] comprehensible syntax [X] semicolons [ ] significant whitespace [ ] macros +[ ] implicit type conversion [ ] explicit casting [ ] type inference +[ ] goto [ ] exceptions [ ] closures [ ] tail recursion [ ] coroutines +[ ] reflection [ ] subtyping [ ] multiple inheritance [ ] operator overloading +[ ] algebraic datatypes [ ] recursive types [ ] polymorphic types +[ ] covariant array typing [ ] monads [ ] dependent types +[ ] infix operators [ ] nested comments [ ] multi-line strings [ ] regexes +[ ] call-by-value [ ] call-by-name [ ] call-by-reference [ ] call-cc + +The following philosophical objections apply: +[ ] Programmers should not need to understand category theory to write "Hello, World!" +[ ] Programmers should not develop RSI from writing "Hello, World!" +[X] The most significant program written in your language is its own compiler +[ ] The most significant program written in your language isn't even its own compiler +[X] No language spec +[ ] "The implementation is the spec" + [ ] The implementation is closed-source [ ] covered by patents [ ] not owned by you +[ ] Your type system is unsound [X] Your language cannot be unambiguously parsed + [ ] a proof of same is attached + [ ] invoking this proof crashes the compiler +[ ] The name of your language makes it impossible to find on Google +[X] Interpreted languages will never be as fast as C +[X] Compiled languages will never be "extensible" +[X] Writing a compiler that understands English is AI-complete +[ ] Your language relies on an optimization which has never been shown possible +[ ] There are less than 100 programmers on Earth smart enough to use your language +[ ] ____________________________ takes exponential time +[ ] ____________________________ is known to be undecidable + +Your implementation has the following flaws: +[ ] CPUs do not work that way +[X] RAM does not work that way +[ ] VMs do not work that way +[ ] Compilers do not work that way +[ ] Compilers cannot work that way +[ ] Shift-reduce conflicts in parsing seem to be resolved using rand() +[X] You require the compiler to be present at runtime +[X] You require the language runtime to be present at compile-time +[ ] Your compiler errors are completely inscrutable +[ ] Dangerous behavior is only a warning +[ ] The compiler crashes if you look at it funny +[ ] The VM crashes if you look at it funny +[ ] You don't seem to understand basic optimization techniques +[ ] You don't seem to understand basic systems programming +[ ] You don't seem to understand pointers +[ ] You don't seem to understand functions + +Additionally, your marketing has the following problems: +[ ] Unsupported claims of increased productivity +[ ] Unsupported claims of greater "ease of use" +[ ] Obviously rigged benchmarks + [ ] Graphics, simulation, or crypto benchmarks where your code just calls + handwritten assembly through your FFI + [ ] String-processing benchmarks where you just call PCRE + [ ] Matrix-math benchmarks where you just call BLAS +[X] Noone really believes that your language is faster than: + [X] assembly [X] C [ ] FORTRAN [ ] Java [ ] Ruby [ ] Prolog +[X] Rejection of orthodox programming-language theory without justification +[ ] Rejection of orthodox systems programming without justification +[ ] Rejection of orthodox algorithmic theory without justification +[ ] Rejection of basic computer science without justification + +Taking the wider ecosystem into account, I would like to note that: +[ ] Your complex sample code would be one line in: _______________________ +[X] We already have an unsafe imperative language +[ ] We already have a safe imperative OO language +[ ] We already have a safe statically-typed eager functional language +[X] You have reinvented Lisp but worse +[ ] You have reinvented Javascript but worse +[ ] You have reinvented Java but worse +[ ] You have reinvented C++ but worse +[ ] You have reinvented PHP but worse +[ ] You have reinvented PHP better, but that's still no justification +[X] You have reinvented Brainfuck but non-ironically + +In conclusion, this is what I think of you: +[ ] You have some interesting ideas, but this won't fly. +[ ] This is a bad language, and you should feel bad for inventing it. +[X] Programming in this language is an adequate punishment for inventing it. + + diff --git a/quine.asm b/quine.asm index 2df2d3a..f11b435 100644 --- a/quine.asm +++ b/quine.asm @@ -1883,7 +1883,7 @@ _start: ;;; These will be the permanent homes of these values, though we have ;;; copies of them elsewhere while we're still in this routine. ;;; - mov.qreg.disp32.qreg rdi, control_stack_size + 0x00, rdi ; heap + mov.qreg.disp32.qreg rdi, control_stack_size + 0x00, rdi ; log mov.qreg.disp32.qreg rdi, control_stack_size + 0x08, rsp ; s0 mov.qreg.disp32.qreg rdi, control_stack_size + 0x10, rbp ; r0 mov.qreg.qimm rax, final_word_name @@ -1891,9 +1891,14 @@ _start: lea.qreg.disp32.qreg rax, control_stack_size + 0x28, rdi mov.qreg.disp32.qreg rdi, control_stack_size + 0x20, rax ; here ;;; - ;;; * "heap" is the physical bottom of the heap - ;;; The heap grows upwards in memory, so this is also the logical + ;;; * "log" is the physical bottom of the log + ;;; The log grows upwards in memory, so this is also the logical ;;; bottom. This comes from the address mmap() just returned to us. + ;;; The rest of quine.asm refers to the log as the heap. It's not a + ;;; heap, but it used to be called that. The self-hosted version of + ;;; Evocation has the fully revised and reconciled copy of all these + ;;; comments, it just felt like unnecessary tedium to do that here as + ;;; well. ;;; * "s0" is the logical bottom of the value stack ;;; The value stack grows downwards in memory, so this is the physical ;;; top of it. This comes from the stack pointer the kernel initialized us @@ -2018,7 +2023,7 @@ cold_start: ;;; This is the only hardcoding we need to do; by building on top of it, ;;; we will soon reach a point where the rest of the system can be defined ;;; within itself. - dq early_heap, litstring, "heap", early_variable + dq early_heap, litstring, "log", early_variable dq early_s0, litstring, "s0", early_variable dq early_r0, litstring, "r0", early_variable dq early_latest, litstring, "latest", early_variable @@ -11966,7 +11971,7 @@ defword boot_source, 0x40 ; This use of bitwise and is okay because they're both either 0 or 1. ; We'll have logical and real soon now, be patient... :) - dq ": is-in-heap dup heap @ <= swap here @ > & ; " + dq ": is-in-heap dup log @ <= swap here @ > & ; " ; dq ": unlink-pre-heap-words " ; dq " latest @ " diff --git a/transform.e b/transform.e index 76a192e..ebf493f 100644 --- a/transform.e +++ b/transform.e @@ -155,6 +155,45 @@ ~ ~ The log-load transformation and its alternates rely on the following ~ labels, all of which must be defined elsewhere: TODO +~ +~ +~ About the hex transform +~ ~~~~~~~~~~~~~~~~~~~~~~~ +~ +~ The hex transform's role is a bit different. Whereas the label and +~ log-load transforms are used as part of generating an executable binary, the +~ hex transform produces a commented hex dump that, when later processed by +~ the "hex" tool, produces that same binary. It achieves this by modifying +~ various words which are core parts of Evocation to keep track of appropriate +~ comments and other metadata, and modifying the output facility to output a +~ hex dump with any attached comments, rather than raw bytes. +~ +~ By its nature, the hex transform needs to be able to cope with other +~ transforms running inside itself; that is, the transformation facility needs +~ to be reentrant. The other transforms don't have to cope with that, since +~ the code they run is mostly just about creating word definitions, but the +~ code given to the hex transform will normally include a call to label-loop, +~ all the memory manipulation that happens as part of binary generation, and +~ various output words. +~ +~ While the label and log-load transforms allow Evocation to be a +~ self-hosting compiler, the hex transform attains the even higher bar of +~ making Evocation a "self-bootstrapping" compiler, able to create a running +~ copy of itself without having any sort of pre-existing compiler at all, only +~ a hex converter or similar small tool. The idea is that the hex converter is +~ small and simple enough that it's easy to perform a BINARY audit on, and +~ that there need be no other binary artifacts in the chain of trust. +~ +~ This is not a new concept, but as far as Irenes are aware this name for it +~ is a new coinage. The approach of using a hex-to-binary conversion tool as +~ the initial bootstrapping stage is due to MesCC, which is quite inspiring in +~ that regard. +~ +~ The key insight is that, some sense, the difference between source code +~ and binary is the ability to have comments. +~ +~ The hex transform DOES NOT WORK yet. It's still in development. +~ TODO update this note when it does work ~ Buffer- and address-management helpers @@ -245,12 +284,14 @@ : transformation-state-saved-latest 8 + ; : transformation-state-output-buffer-start 2 8 * + ; : transformation-state-user-stack-depth 3 8 * + ; +: transformation-state-label-scratch 4 8 * + ; : allocate-transformation-state - 4 8 * allocate + 5 8 * allocate dup transformation-state-saved-here 0 swap ! dup transformation-state-saved-latest 0 swap ! dup transformation-state-output-buffer-start 0 swap ! - dup transformation-state-user-stack-depth 0 swap ! ; + dup transformation-state-user-stack-depth 0 swap ! + dup transformation-state-label-scratch 0 swap ! ; allocate-transformation-state s" transformation-state" variable @@ -2547,3 +2588,579 @@ allocate-transformation-state s" transformation-state" variable exit } if } forever ; + +~ Hex transform implementation +~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~ +~ The following code is all part of implementing the hex transform. For +~ conceptual overview, see the top of this file. +~ +~ The hex transform DOES NOT WORK yet. It's still in development. +~ TODO update this note when it does work + +: hex-tilde-alternate [ ' ~ entry-to-execution-token , ] + ; make-immediate + +: hex-self-codeword-alternate self-codeword ; + +: hex-string-alternate + ~ See label-string-alternate for detailed notes on how we wrap s". + ~ Essentially, we call the immediate version of it, in the outer context, + ~ which uses scratch space in the outer, "real" log. + interpreter-flags @ + ' s" entry-to-execution-token + swap-transform-variables + [ ' [ entry-to-execution-token , ] + execute + swap-transform-variables + swap interpreter-flags ! + + ~ Now we have a string pointer on the stack at transform time. If we're in + ~ immediate mode, that's sufficient. If we're in compile mode, output a + ~ litstring invocation. Notice also that these are essentially the same + ~ responsibilities as we'd have in the label transform. + interpreter-flags @ 0x01 & { + ~ We look up the inner version of litstring to reference here. This is + ~ similar to what the label transform does, except we don't use a label + ~ for it. + s" litstring" find entry-to-execution-token , + here @ swap packstring 8 packalign here ! + } if + ; make-immediate + +: hex-dot-string-alternate + ' hex-string-alternate entry-to-execution-token execute + + interpreter-flags @ 0x01 & { + ~ We look up the inner version of emitstring, too. + s" emitstring" find entry-to-execution-token , + } { emitstring } if-else + ; make-immediate + +: hex-create-alternate create ; + +~ This is a helper called from the patched version of "variable", described +~ in more detail in hex-colon-alternate, below. It expects to be called after +~ outputting the entry header for "variable", during the body of the +~ definition, so that it can output compiled code which will run as part of +~ the transformed "variable". +~ +~ The helper accepts a string pointer giving a variable name. The code it +~ produces checks the name of the variable being defined and, if the two names +~ match, alters the resulting inner variable to point to the same backing +~ store as the outer variable of the same name. +~ +~ In many ways the hex transform is the trippiest one. To wit, there's two +~ layers of compilation happening here... so don't get confused. When the +~ helper is called, we're compiling the inner "variable", but "variable" +~ itself is a word-defining word which also has the task of compilation... +~ which we're modifying. +~ +~ (name pointer --) +: hex-variable-use-outer + ~ The actual payload here is that we check whether we're defining the + ~ word "interpreter-flags" and, if so, we make it reference the outer + ~ backing store instead of the inner one. + ~ + ~ We want to do this all as references to inner words, which fortunately + ~ have been defined by now, but we have to do that a bit indirectly... + s" dup" find entry-to-execution-token , + s" litstring" find entry-to-execution-token , + here @ over packstring 8 packalign here ! + s" stringcmp" find entry-to-execution-token , + s" lit" find entry-to-execution-token , 0 , + s" =" find entry-to-execution-token , + + ~ Also, we don't have high-level flow control yet, and even if we did, + ~ it would be awkward to use it here. So we count the branch by hand. Ah + ~ well. As always, remember that with forward branches, the offset to + ~ branch by is the first word to be skipped, and is included in the count. + s" 0branch" find entry-to-execution-token , 6 8 * , + + ~ If control reaches here in the generated code, the string matched. + s" swap" find entry-to-execution-token , + s" drop" find entry-to-execution-token , + + ~ To get the value of the outer variable, we just call it. Of course, + ~ looking up an outer entry is a pain, but at least it's a pain in a way + ~ that should be familiar by now. + s" lit" find entry-to-execution-token , + swap-transform-variables + find + swap-transform-variables + entry-to-execution-token execute , + + s" swap" find entry-to-execution-token , ; + +~ This "replacement" is a little different from an alternate: When the code +~ under transformation attempts to compile its own version of sys-write, it +~ gets a stub that calls this word instead. It's swapped out by +~ hex-colon-alternate, whereas the regular alternates are swapped out by +~ hex-transform-one. +~ (length to write, base address --) +: hex-sys-write-replacement + { over } { + dup 8@ .hex8 space + 1+ swap 1- swap + } while ; + +~ By overriding colon, we can special-case the definitions of particular +~ words. It's very metacircular. +: hex-colon-alternate + word value@ + + ~ The word "variable" is itself a word-defining word, and we will + ~ special-case its definition to special-case the definitions of particular + ~ variables. It's very very metacircular. + dup s" variable" stringcmp 0 = { + ~ Don't lose track of the layering happening here. The word "variable" + ~ is a regular docol word; it's defining a word that's implemented in + ~ assembly, but it can use whatever Forthy logic it wants to do so. In + ~ this case we're going to have it run a little extra logic, then continue + ~ with the rest of its usual definition. + + ~ Before we get to the extra logic, we do want an entry header for + ~ "variable" itself, so we do that... This takes care of all of colon's + ~ responsibilities except switching to compile mode; we'll do that part + ~ after we've output our payload. + create dropstring + s" docol" find entry-to-execution-token execute , + make-hidden + + ~ There's two variables that we want to point to the outer backing + ~ stores, rather than the inner ones. The code for that is a bit + ~ repetitive, so we have a helper for it; see above. + ~ + ~ It is worth stopping to contemplate the meaning of sharing these two + ~ variables in particular between the inner and outer contexts. + ~ Essentially this says that they're both reading the same input stream, + ~ and the two copies of the interpreter both share the same state. Thus, + ~ trading off responsibility for lexing between inner and outer contexts + ~ works just like trading off responsibility between two interpreters + ~ when there's no transformation involved, or between a regular + ~ interpreter and a transform. + ~ + ~ If we didn't do this, we'd still have to invent some way to control + ~ what input the inner context sees, and the concept of "the next + ~ character" would become more complex during the transform and require + ~ care and attention. Sharing this stuff keeps it simple. + s" interpreter-flags" hex-variable-use-outer + s" main-input-buffer" hex-variable-use-outer + ~ After this, we can return control to the regularly-scheduled + ~ "variable", which will do the "create" and all that. That stuff isn't + ~ colon's responsibility, so it isn't our responsibility, it'll happen + ~ regardless. + + ~ Now we close out colon's responsibilities by switching to compile + ~ mode. We return from colon after this. The hex transform will continue + ~ by processing the source words that form the regular body of "variable", + ~ eventually hitting the matching semicolon. Our friendly tampering is + ~ now complete! + ' ] entry-to-execution-token execute + exit + } if + + ~ Now we want to override s". As usual, that's the single most annoying + ~ string to quote, so we cheat. + dup ' s" entry-to-name stringcmp 0 = { + ~ Create the word header. It's a normal docol word, so that much is + ~ simple. + create dropstring + s" docol" find entry-to-execution-token execute , + make-hidden + + ~ This time around we would really rather just always use the alternate, + ~ which already untangles the layered nonsense. So we have the payload + ~ call the alternate directly, then exit. We could come up with a way to + ~ then skip forward in the code under transformation, but that would be + ~ complex, and it's unnecessary: We let it keep running, outputting the + ~ usual body of s", which we know will never be reached. + ' hex-string-alternate entry-to-execution-token , + s" exit" find entry-to-execution-token , + + ~ As before, finish up colon's responsibilities, then return control to + ~ the code under transformation. + ' ] entry-to-execution-token execute + exit + } if + + ~ Same deal for .". Hey, we're getting good at this! + dup ' ." entry-to-name stringcmp 0 = { + create dropstring + s" docol" find entry-to-execution-token execute , + make-hidden + + ' hex-dot-string-alternate entry-to-execution-token , + s" exit" find entry-to-execution-token , + + ' ] entry-to-execution-token execute + exit + } if + + ~ We want to suppress the behavior of relink-main-input-buffer-to-stdin + ~ entirely. Happily, that's easy. We need to do this because otherwise the + ~ transformed code will mess with our outer interpreter! + dup s" relink-main-input-buffer-to-stdin" stringcmp 0 = { + create dropstring + s" docol" find entry-to-execution-token execute , + make-hidden + + ~ Return before doing anything. + s" exit" find entry-to-execution-token , + + ' ] entry-to-execution-token execute + exit + } if + + ~ We are entirely replacing sys-write with our own version. It's an + ~ assembly word, and we're replacing it with a Forth word, so there's some + ~ matching code in the ;asm alternate that makes sure to not mess that up. + dup s" sys-write" stringcmp 0 = { + create dropstring + s" docol" find entry-to-execution-token execute , + make-hidden + + ' hex-sys-write-replacement entry-to-execution-token , + s" exit" find entry-to-execution-token , + + ' ] entry-to-execution-token execute + exit + } if + + ~ If no special case matches, we fall back to just being a regular colon. + ~ We already read the word name above, so we have to do the rest of the + ~ steps ourselves as well. + create dropstring + s" docol" find entry-to-execution-token execute , + make-hidden + ' ] entry-to-execution-token execute + ; + +: hex-semicolon-alternate [ ' ; entry-to-execution-token , ] + ; make-immediate + +: hex-semicolon-assembly-alternate + latest @ entry-to-name + dup s" sys-write" stringcmp 0 = { + ~ As detailed in hex-colon-alternate, above, sys-write is implemented in + ~ assembly but we replace it with Forth. This logic here collaborates with + ~ the logic there to make that work. + ~ + ~ The transformed code will still have compiled some assembly, though it + ~ won't be reached and would crash, so we need to fix alignment before we + ~ call semicolon. + drop + here @ 8 packalign here ! + [ ' ; entry-to-execution-token , ] + exit + } if + drop + + ~ If no special case matches, we fall back to the regular behavior. + [ ' ;asm entry-to-execution-token , ] + ; make-immediate + +~ Because docol requires it, we provide a special mini-version of the label +~ system. We only do L@' and L!', because that's all we need. Unlike with the +~ label transform, these are NOT real labels; they're restricted similarly to +~ how they are for the log-load transform. +~ +~ Per the precedence rules in hex-transform-one, this version will only +~ apply for immediate execution; compilation will use the inner label system +~ instead. +: hex-L@'-alternate + s" L@'" find dup { + entry-to-execution-token execute + exit + } { drop } if-else + + word dropstring + transformation-state transformation-state-label-scratch @ + ; make-immediate + +: hex-L!'-alternate + s" L!'" find dup { + entry-to-execution-token execute + exit + } { drop } if-else + + word dropstring + transformation-state transformation-state-label-scratch ! + ; make-immediate + +~ We have to provide alternates for the globals that are bootstrapped by +~ warm-start in normal execution, because code under the hex transform never +~ gets its own copy of warm-start. They don't have to do anything special, +~ though, so we can just use the outer ones. Most of these work unmodified; +~ for "latest" and "here" we invoke the outer copy of the word, but the actual +~ value it sees is the wrapped, inner one, since we swapped that out +~ elsewhere. +: hex-log-alternate log ; +: hex-s0-alternate s0 ; +: hex-r0-alternate r0 ; +: hex-latest-alternate latest ; +: hex-here-alternate here ; + +~ This implements the hex transform for a single word. It is directly +~ analogous to "interpret", and reading interpret.e may help in understanding +~ it, though it's meant to still make sense on its own. +~ +~ The hex transform runs code immediately. Whereas most transforms alter +~ what the transformed code compiles into, the hex transform alters what it +~ outputs. It's assumed that the output is a binary file of some sort; the +~ binary is output as hexadecimal, interspersed with comments describing the +~ code that produced it, including descriptions of what was executed, along +~ with any comments from the original code. +~ +~ The hex transform's alternates take priority over words defined under +~ the transform when running immediately, but for compilation, words defined +~ under the transform take precedence. In the event that neither an alternate +~ nor an entry in the inner dictionary is found, the outer dictionary is +~ checked; otherwise it's irrelevant. +~ +~ The point of this precedence rule is that string literals, which require +~ special treatment, will be using the inner implementation by the time it +~ gets to actually generating an executable image. It's important that there +~ not be any alternates altering the generated code, only the program actually +~ being compiled. Other literal syntaxes, such as tick, don't have alternates +~ at all (the hex transform manages to be more parsimonious with alternates +~ than other transforms), and will always be using inner versions. +~ +~ The precedence rule does fail to have the desired effect for label +~ references. For these, the alternates take the unusual step of manually +~ calling the inner versions when they exist. +~ TODO think through whether the precedence rule is actually doing anything... +~ +~ It expects to be called from "hex-transform", below, which loops. +~ +~ (-- done) +: hex-transform-one + word + + ~ If no word was returned, exit. + dup 0 = { drop 0 exit } if + + ~ The string is on the top of the stack, so to get a pointer to it we get + ~ the stack address. + ~ (string) + value@ + + ~ If it's the magic word, end the transformation. + dup s" pyrzqxgl" stringcmp 0 = { drop dropstring 1 exit } if + + ~ Check whether it's one of the words we have alternates for, and look up + ~ the alternate if so. + 0 swap + ~ (name as stack string, placeholder, name pointer) + dup s" ~" stringcmp 0 = { swap drop ' hex-tilde-alternate swap } if + dup s" self-codeword" stringcmp 0 = { + swap drop ' hex-self-codeword-alternate swap } if + ~ It is nontrivial to construct a string with a double-quote in it. + dup ' s" entry-to-name stringcmp 0 = { + swap drop ' hex-string-alternate swap } if + dup ' ." entry-to-name stringcmp 0 = { + swap drop ' hex-dot-string-alternate swap } if + dup s" create" stringcmp 0 = { swap drop ' hex-create-alternate swap } if + dup s" :" stringcmp 0 = { swap drop ' hex-colon-alternate swap } if + dup s" ;" stringcmp 0 = { swap drop ' hex-semicolon-alternate swap } if + dup s" ;asm" stringcmp 0 = { + swap drop ' hex-semicolon-assembly-alternate swap } if + dup s" L@'" stringcmp 0 = { swap drop ' hex-L@'-alternate swap } if + dup s" L!'" stringcmp 0 = { swap drop ' hex-L!'-alternate swap } if + dup s" log" stringcmp 0 = { swap drop ' hex-log-alternate swap } if + dup s" s0" stringcmp 0 = { swap drop ' hex-s0-alternate swap } if + dup s" r0" stringcmp 0 = { swap drop ' hex-r0-alternate swap } if + dup s" latest" stringcmp 0 = { swap drop ' hex-latest-alternate swap } if + dup s" here" stringcmp 0 = { swap drop ' hex-here-alternate swap } if + ~ (name as stack string, 0 or alternate entry pointer, name pointer) + + find + ~ (stack string, 0 or alternate entry pointer, 0 or inner entry pointer) + + interpreter-flags @ 0x01 & { + ~ We're in compile mode. An alternate immediate entry has the highest + ~ precedence; an inner entry of any kind has second-highest. + over { + ~ An alternate entry exists; check its flags. + over entry-flags@ 0x01 & { + ~ It's an immediate entry, so it has precedence, regardless of + ~ what's up with the inner entry. Execute it. + drop dropstring-with-result + entry-to-execution-token execute + 0 exit + } { + ~ The alternate is not immediate, so check if there's an inner entry. + dup { + ~ There is also an inner entry. Check its flags. + dup entry-flags@ 0x01 & { + ~ The inner entry is immediate, so it has precedence. Execute it. + swap drop dropstring-with-result + entry-to-execution-token execute + 0 exit + } { + ~ The inner entry is not immediate, so the alternate has + ~ precedence. Compile it. + ~ + ~ Watch this space closely for correctness issues, it's a rare + ~ codepath. + drop dropstring-with-result + entry-to-execution-token , + 0 exit + } if-else + } { + ~ There's no inner entry. Compile the alternate. + ~ + ~ This path, too, is rare and should get close scrutiny for + ~ correctness. + drop dropstring-with-result + entry-to-execution-token , + 0 exit + } if-else + } if-else + } { + ~ There is no alternate entry; check for an inner entry. + dup { + ~ An inner entry exists; check its flags. + dup entry-flags@ 0x01 & { + ~ It's an immediate entry. Execute it. + swap drop dropstring-with-result + entry-to-execution-token execute + 0 exit + } { + ~ It's not an immediate entry. Compile it. + swap drop dropstring-with-result + entry-to-execution-token , + 0 exit + } if-else + } if + ~ If we got here, there's no inner or alternate entry; fall through. + } if-else + } { + ~ We're in immediate mode. An alternate entry of any kind has precedence. + over { + ~ There's an alternate entry. Execute it. + drop dropstring-with-result + entry-to-execution-token execute + 0 exit + } { + ~ There's no alternate entry. Check for an inner entry. + dup { + ~ An inner entry exists. Execute it. + swap drop dropstring-with-result + entry-to-execution-token execute + 0 exit + } if + ~ If we got here, there's no inner or alternate; fall through. + } if-else + } if-else + drop drop + ~ (stack string) + + ~ As a final fallback, we also check the outer dictionary, for immediate + ~ use only. This will allow things like assembly words to work. + value@ + swap-transform-variables + find + swap-transform-variables + dup { + ~ There's an outer entry; check the mode. + interpreter-flags @ 0x01 & { + ~ We're in compile mode; check the outer entry's flags. + dup entry-flags@ 0x01 & { + ~ It's an immediate word; execute it. + dropstring-with-result + entry-to-execution-token + execute + 0 exit + } { + ~ It's not an immediate word. Pretend it doesn't exist, and fall + ~ through. + drop + } if-else + } { + ~ We're in immediate mode. Execute the outer entry. + dropstring-with-result + entry-to-execution-token + execute + 0 exit + } if-else + } { + ~ There's no outer entry. Fall through. + drop + } if-else + ~ (stack string) + + ~ Check whether it's a number literal. + value@ read-integer 0 = { + ~ It's a number. + ~ + ~ (name as stack string, integer value) + dropstring-with-result + ~ (integer value) + + interpreter-flags @ 0x01 & { + ~ TODO this probably needs a lit alternate (!) + ' lit entry-to-execution-token , , + 0 exit + } if + + 0 exit + } if + ~ (stack string) + + ." No such word: " value@ emitstring newline dropstring 0 ; + + +~ This implements the hex transform for all words in a region given as +~ an input string. It is directly analogous to "quit", in interpret.e, but is +~ more complex. +~ +~ (output buffer start, output point, input string pointer +~ -- output buffer start, output point) +: hex-transform + main-input-buffer dup push-input-buffer + ~ TODO the arguments for this seem to be backwards from the documentation + swap attach-string-to-input-buffer + + ~ Save the old values of "here" and "latest", and set the initial values + ~ of the internal ones. These values need to persist across iterations, + ~ since client code will make its own updates to them and then rely on those + ~ updates having taken effect. So we do the swap just once, here outside the + ~ loop, and set it back when the loop ends. + ~ + ~ We also take this opportunity to initialize the output-buffer-start and + ~ user-stack-depth fields of transformation-state. + here @ transformation-state transformation-state-saved-here ! + latest @ transformation-state transformation-state-saved-latest ! + over transformation-state transformation-state-output-buffer-start ! + 0 transformation-state transformation-state-user-stack-depth ! + here ! + 0 latest ! + ~ Now the stack has nothing of ours on it, so client code can do its thing. + + ~ It's important that the stack has nothing of ours on it that persists + ~ across iterations, so that client code can add and remove stuff there as + ~ it sees fit. + { hex-transform-one + ~ (done) + + ~ When the loop is done, get the real values of "here" and "latest" + ~ back. The internal "here" is also the output point, and will become our + ~ return value. The internal "latest" is discarded. + { here @ + transformation-state transformation-state-saved-here @ here ! + transformation-state transformation-state-saved-latest @ latest ! + ~ (output point) + + ~ Though we don't actually use transformation-state outside of this + ~ invocation, for tidiness we zero it out. + 0 transformation-state transformation-state-saved-here ! + 0 transformation-state transformation-state-saved-latest ! + 0 transformation-state transformation-state-output-buffer-start ! + 0 transformation-state transformation-state-user-stack-depth ! + + ~ Also put the input source back how it was. + main-input-buffer pop-input-buffer + + exit } if } forever ; + |