summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--README.txt4
-rw-r--r--dynamic.e52
-rw-r--r--execution.e15
-rw-r--r--input.e31
-rw-r--r--linux-dynamic.e356
-rw-r--r--linux.e48
-rw-r--r--transform.e25
7 files changed, 506 insertions, 25 deletions
diff --git a/README.txt b/README.txt
index 88a7147..229ef70 100644
--- a/README.txt
+++ b/README.txt
@@ -20,12 +20,12 @@ it needs to, yet.
   This is a working Evocation interpreter, but it's incomplete and will become
 more so with time. So, next, build Evocation-in-Evocation:
 
-  $ (cat labels.e elf.e transform.e execution.e; echo 262144 read-to-buffer; cat core.e linux.e output.e amd64.e execution-support.e log-load.e; echo pyrzqxgl 262144 read-to-buffer; cat core.e linux.e output.e amd64.e execution-support.e log-load.e dynamic.e input.e interpret.e flow-control.e ; echo pyrzqxgl; cat evoke.e) | ./quine > evoke
+  $ (cat labels.e elf.e transform.e execution.e; echo 262144 read-to-buffer; cat core.e linux.e output.e amd64.e execution-support.e log-load.e; echo pyrzqxgl 262144 read-to-buffer; cat core.e linux.e output.e amd64.e execution-support.e log-load.e dynamic.e input.e interpret.e flow-control.e linux-dynamic.e ; echo pyrzqxgl; cat evoke.e) | ./quine > evoke
   $ chmod 755 evoke
 
   Finally, rebuild Evocation-in-Evocation with itself:
 
-  $ (cat labels.e elf.e transform.e execution.e; echo 262144 read-to-buffer; cat core.e linux.e output.e amd64.e execution-support.e log-load.e; echo pyrzqxgl 262144 read-to-buffer; cat core.e linux.e output.e amd64.e execution-support.e log-load.e dynamic.e input.e interpret.e flow-control.e ; echo pyrzqxgl; cat evoke.e) | ./evoke > evoke2
+  $ (cat labels.e elf.e transform.e execution.e; echo 262144 read-to-buffer; cat core.e linux.e output.e amd64.e execution-support.e log-load.e; echo pyrzqxgl 262144 read-to-buffer; cat core.e linux.e output.e amd64.e execution-support.e log-load.e dynamic.e input.e interpret.e flow-control.e linux-dynamic.e ; echo pyrzqxgl; cat evoke.e) | ./evoke > evoke2
   $ chmod 755 evoke2
 
   Now keep your evoke binary somewhere safe, and use it to build new versions
diff --git a/dynamic.e b/dynamic.e
index 0251e20..39ddd3f 100644
--- a/dynamic.e
+++ b/dynamic.e
@@ -204,6 +204,56 @@
   newline ;
 
 
+: symbolize-pointer
+  dup is-in-log {
+    dup containing-entry dup {
+      ~   Any format we're going to use here starts with the entry's name; by
+      ~ printing it now we don't have to keep track of it later.
+      dup entry-to-name emitstring
+
+      dup entry-to-execution-token
+      ~ (scrutinee, entry pointer, codeword pointer)
+      dup 3 pick >= {
+        ~   The pointer is somewhere in the word's body, so we compute the
+        ~ offset from the codeword.
+        swap drop -
+        ~ (offset)
+        dup {
+          ." +" .
+        } {
+          ~   The pointer goes to the codeword. We don't print anything here,
+          ~ so the output is just the word's name. This will look "right" in
+          ~ a code listing styled like the ones produced by "describe".
+          drop
+        } if-else
+      } {
+        ~   The pointer is in the header, so we compute the offset from the
+        ~ entry address.
+        drop -
+        ~ (offset)
+
+        ~   We always print the offset, even if it's zero; this visually
+        ~ emphasizes the difference between entry addresses and content
+        ~ addresses... or that's the hope.
+        ." :" .
+      } if-else
+
+      ~ All of the above paths printed SOMETHING, so we're done.
+      exit
+    } { drop } if-else
+  } if
+
+  ~ If we fall through, just print it as a number.
+  ." 0x" .hex64 ;
+
+
+: list-callers
+  control@ { dup r0 @ > }
+  { dup @ symbolize-pointer newline 8 + } while
+  drop ;
+
+
+
 : bye 0 sys-exit ;
 
 
@@ -238,6 +288,8 @@
 ~ do that. It compiles into a call to the word that's currently being
 ~ defined (strictly speaking, the one whose definition was most recently
 ~ begun).
+~
+~ TODO it seems like maybe the log-load transform breaks this? hm
 : recurse latest @ entry-to-execution-token , ; make-immediate
 
 
diff --git a/execution.e b/execution.e
index 0aa5491..7a10e70 100644
--- a/execution.e
+++ b/execution.e
@@ -147,6 +147,21 @@
 
 ~   The macros next, beforenext, pushcontrol, and popcontrol are implemented
 ~ in execution-support.e. It's a good idea to go read about them now.
+~
+~ TODO the following high-level summary should go... somewhere?
+~   When a docol word is invoked, it immediately does pushcontrol (see
+~ execution-support.e) to push rsi onto the control stack, whose top is
+~ indicated by rbp. When an assembly word is invoked, it leaves rsi and rbp
+~ alone; assembly words don't normally call other words, so there's no need
+~ for them to appear on the control stack at all.
+~
+~   When an assembly word returns, it looks for an execution token pointed
+~ to by rsi, increments rsi, and invokes that word as if calling it. This
+~ is the threaded calling approach to Forth execution. When a docol word
+~ returns, it calls exit, which needs to remove itself from the control
+~ stack, so exit pops the top of the control stack into rsi to get back into
+~ its caller's context, then does the same steps an assembly word would do.
+
 
 ~ Constants
 ~ ~~~~~~~~~
diff --git a/input.e b/input.e
index 52fe717..dd5b7d3 100644
--- a/input.e
+++ b/input.e
@@ -226,16 +226,31 @@
 : refill-input-buffer-from-stdin
   dup normalize-buffer
   dup compute-next-buffer-free-block
+  ~ (metadata pointer, destination start, destination length)
   ~   Check whether the buffer is full. If not, do a read. If so, that's not
   ~ an error, just clean up and take no action.
-  dup { swap sys-read
-        dup 0 > {
-          dup -4 =
-          { drop recurse }
-          { drop drop s" Read error." emitstring 0 sys-exit } if-else }
-        { swap dup buffer-logical-length @ 3roll +
-          swap buffer-logical-length ! } if-else }
-      { drop drop } if-else ;
+  dup { swap
+        ~ (metadata pointer, destination length, destination start)
+        { 2dup sys-read
+          ~ (metadata pointer, destination length, destination start,
+          ~  read result)
+          dup 0 > {
+            dup -4 =
+            {
+              ~   This is the EINTR case, so we're supposed to try the same
+              ~ read again. To that end, this whole thing is running in a
+              ~ loop, and all the other paths exit.
+              drop
+              ~ (metadata pointer, destination length, destination start)
+            }
+            { drop drop drop drop
+              ." Read error." 0 sys-exit } if-else }
+          { 3unroll drop drop
+            ~ (metadata pointer, read result)
+            swap dup buffer-logical-length @ 3roll +
+            ~ (metadata pointer, adjusted logical length)
+            swap buffer-logical-length ! exit } if-else } forever }
+      { drop drop drop } if-else ;
 
 
 
diff --git a/linux-dynamic.e b/linux-dynamic.e
new file mode 100644
index 0000000..4c73bb7
--- /dev/null
+++ b/linux-dynamic.e
@@ -0,0 +1,356 @@
+~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~ ~~ More system calls for Linux ~~
+~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~
+~   Everything that takes a struct is here, because that lets us define the
+~ system call next to the allocation stuff, for ease of reference. There are
+~ also higher-level facilities, later in the file.
+
+
+~   Since the only way to know the struct layout is by reading the kernel
+~ source, we put that definition here, as well.
+~ (-- struct pointer)
+: allocate-timespec 2 8 * allocate ;
+~ (struct pointer -- field pointer)
+: timespec-seconds ;
+: timespec-nanoseconds 8 + ;
+
+
+~ (nanoseconds -- result code)
+: nanosleep
+  1000000000 /%
+  ~ (nanoseconds, seconds)
+  allocate-timespec
+  dup timespec-seconds 3roll swap !
+  dup timespec-nanoseconds 3roll swap !
+  allocate-timespec
+  dup timespec-seconds 0 swap !
+  dup timespec-nanoseconds 0 swap !
+  dup 3unroll
+  35                             ~ syscall number
+  syscall-2
+  drop drop ;
+
+
+~   The old name was sigaction(), but per the glibc manpages, that was
+~ replaced by rt_sigaction() quite some time ago, to allow larger signal set
+~ bitmaps for the benefit of realtime signals. Both are still present; we use
+~ rt_sigaction(). There are also two versions of rt_sigaction; grep
+~ ODD_RT_SIGACTION in the kernel source for more details. Neither x86
+~ architecture has the "odd" version of rt_sigaction(), so we use the regular
+~ one, not to be confused with the "compat" one. Its definition is in
+~ kernel/signal.c, in case you need to check the struct sizes and layouts.
+~
+~   The parameters are: An integer signal identifier; an optional pointer to a
+~ struct describing the new action to bind (or unbind if NULL); an optional
+~ pointer to another of the same struct to hold a copy of the old action; and
+~ a 64-bit word which must describe the correct size of sigset_t, which is a
+~ C type used for the mask field of that struct. Any other value for the size
+~ field is an error.
+~
+~   Mandatory size fields start to make sense when you have a compatibility
+~ situation this convoluted... Think of it as the caller promising they know
+~ which version of the API they're calling. The unmarked version of
+~ rt_sigaction() takes a 64-bit sigset_t, and the parameter wants the size in
+~ bytes, so we use a value of 8.
+~
+~   Anyway, we aren't C and don't have POSIX naming obligations, so we just
+~ call it sigaction.
+~
+~ (signal number, new action pointer, old action pointer -- result code)
+: sys-sigaction
+  8                              ~ size of sigset_t in bytes
+  13                             ~ syscall number
+  syscall-4 ;
+
+
+~ (new stack pointer, old stack pointer -- result code)
+: sys-sigaltstack
+  131                            ~ syscall number
+  syscall-2 ;
+
+
+~   Since the only way to know the struct layout is by reading the kernel
+~ source, we put that definition here, as well.
+~
+~   There are MANY versions of this struct; this is the appropriate one for
+~ amd64.
+~
+~ (-- struct pointer)
+: allocate-sigaction here @ 128 packalign here ! 4 8 * allocate ;
+~ (struct pointer -- field pointer)
+: sigaction-action ;
+: sigaction-flags 8 + ;
+: sigaction-restorer 2 8 * + ;
+: sigaction-mask 3 8 * + ;
+
+
+: allocate-sigaltstack here @ 128 packalign here !  3 8 * allocate ;
+: sigaltstack-pointer ;
+: sigaltstack-flags 8 + ;
+: sigaltstack-size 2 8 * + ;
+
+
+~ High-level facilities
+~ ~~~~~~~~~~~~~~~~~~~~~
+
+~   It's possible to set up an alternate stack for signal handlers. We don't,
+~ though, so it's possible this code has bitrotted. At the very least, it
+~ should be more configurable than this.
+~
+~   Note that for it to actually be used, there also needs to be a flag set
+~ at the time the action is bound.
+: prepare-signal-stack
+  here @ 2048 packalign here !
+  1024 1024 * 4 * allocate
+  ~ (stack address)
+  allocate-sigaltstack
+  ~ (stack address, struct pointer)
+  2dup sigaltstack-pointer !
+  dup sigaltstack-flags 0 swap !
+  dup sigaltstack-size 1024 1024 * 32 * swap !
+  ~ (stack address, struct pointer)
+  allocate-sigaltstack
+  sys-sigaltstack
+  drop ;
+
+
+~   On amd64, and on no other architecture, the Linux kernel requires that
+~ the language runtime use a "restorer" when binding signal handlers. This
+~ is that restorer. The kernel wants a raw pointer that it can use as a
+~ C-style return address on the C stack, which is our value stack. So, this
+~ has to be an assembly word, it can't use docol. When the data structure is
+~ populated by bind-signal, we'll dereference the codeword and pass that
+~ value, but there's no chance to give it the usual callee address in rax, so
+~ we can't use a Forth-style interpreter codeword, it has to be a
+~ self-codeword.
+~
+~   The purpose of the trampoline is to be invoked by our signal handler. The
+~ manner of that invocation MUST be returning into it with the "ret"
+~ instruction; otherwise the C stack won't be right for cleanup to work
+~ properly.
+~
+~   When invoked, the trampoline invokes the syscall "sigreturn", whose sole
+~ purpose is to be called from this trampoline. That syscall won't return in
+~ a conventional way, so we don't bother handling the scenario where it does.
+~
+~   According to commentary in the Go compiler's internals[1][2][3], gdb
+~ recognizes the trampoline based on its exact byte values, since the intent
+~ is only to be compatible with glibc. We intend to be our own debugger
+~ anyway, so we don't worry about that. We're not seeking fame and we don't
+~ have a corporate image to uphold, so that level of fragility and contortion
+~ is just too much. The thing about compatibility constraints is knowing when
+~ to work with them and when to walk away.
+~
+~   Experimentally, it is also possible to avoid using this trampoline by
+~ faking it: set the "restorer" bit in the action flags, but pass a null
+~ pointer as the restorer, then have the handler pop 8 bytes from the stack
+~ and invoke sigreturn directly. The actual requirement seems to be that rsp
+~ points at the saved state, at the time of invoking sigreturn. We're not
+~ doing that, because signal handling is not intended to be
+~ performance-critical and it feels like asking for trouble, but the
+~ possibility is noted here against future use.
+~
+~   This doesn't have an interface definition comment, because it doesn't use
+~ the Forth execution model.
+~
+~ [1] https://go.googlesource.com/go/+/refs/heads/master/src/runtime/sys_linux_amd64.s#472
+~ [2] https://go.googlesource.com/go/+/refs/heads/master/src/runtime/os_linux.go#476
+~ [3] https://go.googlesource.com/go/+/refs/heads/master/src/runtime/defs_linux_amd64.go#118
+: signal-return-trampoline
+  [ here @
+    15 :rax mov-reg64-imm64        ~ sigreturn
+    syscall
+    here ! ] ;asm
+
+~   This accepts an execution token. It creates a hidden word on the log which
+~ wraps that execution token with necessary setup and teardown to run as a
+~ Unix signal handler, and returns the execution token of the wrapper.
+~
+~   Specifically, on invocation, the wrapper ensures that rsi points to its
+~ second half and rbp points to the top of the control stack; loads the target
+~ execution token into rax; then indirectly calls it. This is the usual
+~ interface of a normal call in the Forth execution model, so the wrapped word
+~ can be based on docol, on a self-codeword, or on any other interpreter word
+~ it wants. It can also freely call whatever Forth things it wants.
+~
+~   We don't have to do anything about rsp because the invariants for our use
+~ of it as the value stack are a subset of the invariants for C's use of it as
+~ its only stack. It's already working the way we need it to.
+~
+~   When the wrapped word returns, it uses the rsi the wrapper provided to do
+~ so, which places control in the second half of the wrapper. This second half
+~ simply executes a "ret" instruction, which is the necessary invocation of
+~ the signal return trampoline (see above). This will transfer control back
+~ to the kernel, and will ultimately result in Forth execution resuming where
+~ it left off before the signal was delivered.
+~
+~   Crucially, the wrapper relies on the kernel preserving the value of rbp
+~ that existed at the moment before the control transfer began. Signal
+~ delivery is an UNCONTROLLED control transfer, meaning that we as the
+~ language runtime do not have an opportunity to execute any cleanup before it
+~ happens. If it were a controlled transfer, we would be able to save rbp to
+~ a global variable somewhere, and restore it in the wrapper. It's not, so we
+~ don't have that chance.
+~
+~   Notionally we could freshly allocate a new control stack somewhere else,
+~ and set rbp to point to it, but it would be challenging to do that without
+~ relying on the control stack, and inefficient to execute, and the call for
+~ now is that that's not worth it.
+~
+~   The only situation in which this limitation will become a practical
+~ concern is if, at the time of signal delivery, something outside the Forth
+~ execution model is happening. In that case, the wrapper will likely crash.
+~
+~   As a long term strategy, the way to mitigate this would be to make sure
+~ that all non-signal transfers from within the Forth execution model to
+~ outside it are controlled, and that they save global state that can be
+~ reconstructed here. For now, we leave this as future work.
+~
+~ (execution token -- execution token)
+: wrap-signal-handler
+  ~   We generate a word entry for the wrapper, and hide it. Since it's
+  ~ hidden, the name doesn't have to be unique. This keeps the log clean, so
+  ~ that all the space on it will always be attributable to some specific
+  ~ word. Remember kids, keeping the log clean is everyone's responsibility!
+  s" signal-handler-wrapper" create make-hidden
+
+  ~ This self-codeword will be consumed by bind-signal.
+  self-codeword
+
+  here @ dup
+  ~ (inner execution token, saved location, output point)
+
+  ~   It's our responsibility as a caller to set rsi to point to the address
+  ~ of an execution token, which will pick up where we left off. That token
+  ~ will be our own second half, whose address we don't yet know, so we
+  ~ output a placeholder opcode here and overwrite it once we do. That's why
+  ~ we've saved the current location.
+  0 :rsi mov-reg64-imm64
+
+  ~   We also need to make sure that rbp points to an area that can be treated
+  ~ as the top of the control stack (there's no need to ever unwind past it,
+  ~ so it doesn't have to be the "real" one). Fortunately, it comes to us
+  ~ already valid and we don't have to do anything about that. Plus it even
+  ~ does happen to be the real one, which will let stack tracing code run in
+  ~ a handler, and we do care about that.
+
+  ~ We also need to set DF = 0, since that's also part of our ABI.
+  cld
+
+  ~   Compare this snippet to "execute" in core.e. Instead of taking rax from
+  ~ the stack, we set a hardcoded value picked at the time we generate the
+  ~ wrapper. We then do the same indirect jump via the codeword it points to,
+  ~ which allows the codeword's implementation to take advantage of rax
+  ~ pointing to the callee; that's the property docol cares about.
+  3roll :rax mov-reg64-imm64
+  :rax jmp-abs-indirect-reg64
+  ~ (saved location, output point)
+
+  8 packalign
+  here !
+  ~ (saved location)
+
+  ~   Now we have our second half, which has another codeword that rsi will
+  ~ point to for our callee's benefit. This half runs after the wrapped word,
+  ~ and has the responsibility of cleaning up and returning control to the
+  ~ kernel, which it does by returning to the restorer trampoline. Yes, this
+  ~ is a trampoline which passes control to another trampoline.
+  ~
+  ~   Although we run under the log-load transform, we won't ever actually be
+  ~ invoked until at least log-load time, if not ultimate runtime. Both of
+  ~ those are in the target address space. So there's no address translation
+  ~ going on behind our back. Nonetheless, we avoid directly outputting any
+  ~ address except what we get via self-codeword, which would be recommended
+  ~ practice under the transforms. Yeah, it's a little convoluted, perhaps
+  ~ unnecessarily so.
+  here @
+  self-codeword
+  @ 8 -
+  ~ (saved location, second half execution token)
+
+  ~   Something subtle here: That above "codeword" was actually a word
+  ~ pointer. See, because we're pretending the wrapper is Forth word, even
+  ~ though we're writing it in assembly, so "returning" to it means invoking
+  ~ the next word pointer in the word pointer array that is its compiled form.
+  ~ Instead of creating a separate memory area though, we just put the pointer
+  ~ target right here, as another codeword...
+  self-codeword
+
+  ~   Now we treat the saved location as an output point, and re-output the
+  ~ mov instruction that we stubbed out above. Because our assembler words
+  ~ always output a specific, exact form of the instruction, we know it will
+  ~ take up the same number of bytes.
+  :rsi mov-reg64-imm64
+  drop
+
+  ~   Having done that, we can get on to the body of our second half. Happily,
+  ~ it's quite short.
+  here @
+
+  ret
+
+  8 packalign
+  here !
+
+  ~   Our caller wants an execution token that invokes all this. Since we
+  ~ used "create" above, that's easy to get.
+  latest @ entry-to-execution-token ;
+
+
+~   This accepts an execution token and a Unix signal number, and binds the
+~ token to be the handler for the signal. It also does other necessary setup,
+~ including picking appropriate flags for the binding and attaching the
+~ return trampoline (see above).
+~
+~   Typically, you will want this execution token to be one returned by
+~ wrap-signal-handler. Doing this will allow the handler to use the Forth
+~ execution model in any way it wants, including calling both docol words and
+~ assembly words, and working with both the control and value stacks at will.
+~
+~   There is an important limitation of wrap-signal-handler, described in more
+~ detail above: Its wrapper only functions correctly when Forth code was
+~ running at the time of the control transfer. For example, if Forth had
+~ called into C, and then that C were interrupted by a signal, the signal
+~ handler would have no way of finding the top of the control stack.
+~
+~   If your program involves many callbacks back-and-forth between C and
+~ Forth, you may wish to forego the use of the wrapper and provide an
+~ execution token meant to be invoked directly by the kernel. In this case,
+~ bear in mind that its execution must not use the control stack - that is,
+~ it must not rely on having been given sensible values of rsi or rbp. This
+~ means it can't call other Forth words (unless it does something about that
+~ on its own).
+~
+~   Regardless of whether the execution token is a copy of the wrapper or not,
+~ it must be an assembly word, not a docol word. The kernel wants a raw
+~ pointer that it can simulate a C-style call to, so we dereference the
+~ codeword and pass that value, just as we do with the return trampoline. Also
+~ as with the return trampoline, there is no way to pass the callee in rax,
+~ which is the usual interface docol and other interpreter words expect. So,
+~ it needs to be a self-codeword.
+~
+~ (execution token, signal number --)
+: bind-signal
+  allocate-sigaction
+  dup sigaction-action 4 roll @ swap !
+  dup sigaction-mask 0 swap !
+  dup sigaction-flags 0x04000000 swap !
+  dup sigaction-restorer
+  ' signal-return-trampoline entry-to-execution-token @ swap !
+  ~ (signal number, struct pointer)
+  allocate-sigaction
+  sys-sigaction drop ;
+
+
+: handle-crash list-callers 1 sys-exit ;
+
+: install-crash-handler
+  ' handle-crash entry-to-execution-token wrap-signal-handler
+  11 bind-signal ;
+
+~   There are scenarios where someone might want to disable this, for example
+~ if calling back and forth between C and Evocation, but for now we always
+~ enable it.
+install-crash-handler
+
diff --git a/linux.e b/linux.e
index bf5a3e1..fb386f9 100644
--- a/linux.e
+++ b/linux.e
@@ -11,6 +11,22 @@
 ~ that, since rsp is preserved, or we can use one of the other registers. We
 ~ don't ourselves save other registers because our caller should do that, if
 ~ it cares.
+~
+~   In the kernel source, you may find the following files useful to
+~ reference:
+~
+~   * arch/x86/entry/syscalls/syscall_64.tbl
+~   * include/linux/syscalls.h
+~   * include/linux/compat.h
+~
+~   Don't be confused by tools/scripts/syscall.tbl, it's not for x86.
+~
+~   This file loads early, before dynamic.e. So, although it superficially
+~ appears to be able to do allocation and high-level flow control, those come
+~ from the transforms, which means they're shallow implementations. The stuff
+~ here can't be moved later, because input.e relies on it, and the dynamic
+~ stuff relies on that. So, there's additional Linux functionality in another
+~ file that loads later, linux-dynamic.e.
 
 
 ~ (call number -- return value)
@@ -21,91 +37,93 @@
     :rax push-reg64              ~ return value
     here ! ] ;asm
 
-~ (call number, first param -- return value)
+~ (first param, call number -- return value)
 : syscall-1
   [ here @
-    :rdi pop-reg64               ~ first param
     :rax pop-reg64               ~ syscall number
+    :rdi pop-reg64               ~ first param
     syscall
     :rax push-reg64              ~ return value
     here ! ] ;asm
 
-~ (call number, first param, second param -- return value)
+~ (first param, second param, call number -- return value)
 : syscall-2
   [ here @
     :rsi :rbx mov-reg64-reg64    ~ save rsi
+    :rax pop-reg64               ~ syscall number
     :rsi pop-reg64               ~ second param
     :rdi pop-reg64               ~ first param
-    :rax pop-reg64               ~ syscall number
     syscall
     :rbx :rsi mov-reg64-reg64    ~ restore rsi
     :rax push-reg64              ~ return value
     here ! ] ;asm
 
-~ (call number, first param, second param, third param -- return value)
+~ (first param, second param, third param, call number -- return value)
 : syscall-3
   [ here @
     :rsi :rbx mov-reg64-reg64    ~ save rsi
+    :rax pop-reg64               ~ syscall number
     :rdx pop-reg64               ~ third param
     :rsi pop-reg64               ~ second param
     :rdi pop-reg64               ~ first param
-    :rax pop-reg64               ~ syscall number
     syscall
     :rbx :rsi mov-reg64-reg64    ~ restore rsi
     :rax push-reg64              ~ return value
     here ! ] ;asm
 
-~ (call number, first param, second param, third param, fourth param
+~ (first param, second param, third param, fourth param, call number
 ~  -- return value)
 : syscall-4
   [ here @
     :rsi :rbx mov-reg64-reg64    ~ save rsi
+    :rax pop-reg64               ~ syscall number
     :r10 pop-extrareg64          ~ fourth param
     :rdx pop-reg64               ~ third param
     :rsi pop-reg64               ~ second param
     :rdi pop-reg64               ~ first param
-    :rax pop-reg64               ~ syscall number
     syscall
     :rbx :rsi mov-reg64-reg64    ~ restore rsi
     :rax push-reg64              ~ return value
     here ! ] ;asm
 
 
-~ (call number, first param, second param, third param, fourth param,
-~  fifth param -- return value)
+~ (first param, second param, third param, fourth param, fifth param,
+~  call number -- return value)
 : syscall-5
   [ here @
     :rsi :rbx mov-reg64-reg64    ~ save rsi
+    :rax pop-reg64               ~ syscall number
     :r8 pop-extrareg64           ~ fifth param
     :r10 pop-extrareg64          ~ fourth param
     :rdx pop-reg64               ~ third param
     :rsi pop-reg64               ~ second param
     :rdi pop-reg64               ~ first param
-    :rax pop-reg64               ~ syscall number
     syscall
     :rbx :rsi mov-reg64-reg64    ~ restore rsi
     :rax push-reg64              ~ return value
     here ! ] ;asm
 
 
-~ (call number, first param, second param, third param, fourth param,
-~  fifth param, sixth param -- return value)
+~ (first param, second param, third param, fourth param, fifth param,
+~  sixth param, call number -- return value)
 : syscall-6
   [ here @
     :rsi :rbx mov-reg64-reg64    ~ save rsi
+    :rax pop-reg64               ~ syscall number
     :r9 pop-extrareg64           ~ sixth param
     :r8 pop-extrareg64           ~ fifth param
     :r10 pop-extrareg64          ~ fourth param
     :rdx pop-reg64               ~ third param
     :rsi pop-reg64               ~ second param
     :rdi pop-reg64               ~ first param
-    :rax pop-reg64               ~ syscall number
     syscall
     :rbx :rsi mov-reg64-reg64    ~ restore rsi
     :rax push-reg64              ~ return value
     here ! ] ;asm
 
 
+~ Raw system calls
+~ ~~~~~~~~~~~~~~~~
 
 ~   This does the Linux exit() system call, passing it an exit code taken
 ~ from the stack. It does not return.
@@ -148,7 +166,7 @@
     here ! ] ;asm
 
 
-~ (length to read, base address -- *)
+~ (length to read, base address -- result code)
 : sys-read
   [ here @
     :rcx pop-reg64               ~ address from stack
diff --git a/transform.e b/transform.e
index 81e99b6..4004db9 100644
--- a/transform.e
+++ b/transform.e
@@ -715,6 +715,8 @@ allocate-transform-state s" transform-state" variable
   dup s" describe" stringcmp 0 = { drop -1 exit } if
   dup s" describe-all" stringcmp 0 = { drop 0 exit } if
   dup s" describe-compilation" stringcmp 0 = { drop 0 exit } if
+  dup s" symbolize-pointer" stringcmp 0 = { drop -1 exit } if
+  dup s" list-callers" stringcmp 0 = { drop 0 exit } if
   dup s" forget" stringcmp 0 = { drop -1 exit } if
   dup s" ," stringcmp 0 = { drop -1 exit } if
   dup s" make-immediate" stringcmp 0 = { drop 0 exit } if
@@ -775,6 +777,29 @@ allocate-transform-state s" transform-state" variable
   ~   The following is a deliberate omission: interpret.
   dup s" quit" stringcmp 0 = { drop 0 exit } if
 
+  ~ From linux-dynamic.e.
+  dup s" allocate-timespec" stringcmp 0 = { drop 1 exit } if
+  dup s" timespec-seconds" stringcmp 0 = { drop 0 exit } if
+  dup s" timespec-nanoseconds" stringcmp 0 = { drop 0 exit } if
+  dup s" nanosleep" stringcmp 0 = { drop 0 exit } if
+  dup s" sys-sigaction" stringcmp 0 = { drop -2 exit } if
+  dup s" sys-sigaltstack" stringcmp 0 = { drop -1 exit } if
+  dup s" allocate-sigaction" stringcmp 0 = { drop 1 exit } if
+  dup s" sigaction-action" stringcmp 0 = { drop 0 exit } if
+  dup s" sigaction-flags" stringcmp 0 = { drop 0 exit } if
+  dup s" sigaction-restorer" stringcmp 0 = { drop 0 exit } if
+  dup s" sigaction-mask" stringcmp 0 = { drop 0 exit } if
+  dup s" allocate-sigaltstack" stringcmp 0 = { drop 1 exit } if
+  dup s" sigaltstack-pointer" stringcmp 0 = { drop 0 exit } if
+  dup s" sigaltstack-flags" stringcmp 0 = { drop 0 exit } if
+  dup s" sigaltstack-size" stringcmp 0 = { drop 0 exit } if
+  dup s" prepare-signal-stack" stringcmp 0 = { drop 0 exit } if
+  ~   The following is a deliberate omission: signal-return-trampoline.
+  dup s" wrap-signal-handler" stringcmp 0 = { drop 0 exit } if
+  dup s" bind-signal" stringcmp 0 = { drop -2 exit } if
+  ~   The following is a deliberate omission: handle-crash.
+  dup s" install-crash-handler" stringcmp 0 = { drop 0 exit } if
+
   ~ Created by warm-start in execution.e.
   dup s" log" stringcmp 0 = { drop 1 exit } if
   dup s" s0" stringcmp 0 = { drop 1 exit } if