8 files changed, 1082 insertions, 105 deletions
diff --git a/amd64.e b/amd64.e
new file mode 100644
index 0000000..4ffc64f
--- /dev/null
+++ b/amd64.e
@@ -0,0 +1,895 @@
+~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~ ~~ Assembly language for the AMD64 architecture ~~
+~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~
+~   This is also often called the x86-64 architecture, but Intel didn't
+~ invent it (they had their chance) and there's no reason to name it after
+~ their product line. We have a bunch of assembler words that, taken as a
+~ whole, form a sort of assembly language inside of the Forth-style language.
+~
+~   It's all backwards and stuff.
+~
+~   Okay, but seriously, the convention is: target on the top of the stack,
+~ source behind it. This is similar to how the Forth "!" and "@" words work.
+~
+~   These routines use the binary packing routines such as pack64, defined in
+~ core.e. They're called in the same way: an output address which we call the
+~ "output point", followed by data items specific to what's being output. They
+~ also chain together in the same way, returning the updated output point.
+~
+~   TODO cite the Intel reference manual here and explain the notation used
+~ for the section citations below
+~
+~   TODO define instructions, assembly code, machine code, opcodes. if we ever
+~ also want to recommend a childrens' introduction to binary, this might be
+~ the place to do it.
+
+
+~ Keywords
+~ ~~~~~~~~
+~
+~   We define a bunch of keywords, which evaluate to their own codeword
+~ addresses. We use these to refer to registers and condition codes by name.
+~
+~
+~ On registers
+~ ~~~~~~~~~~~~
+~
+~   The x86 architecture has been around a while, it has been through
+~ several transitions from smaller word sizes to larger ones. Therefore it
+~ has different names for the "same" registers, depending on how much of
+~ them you're using.
+~
+~ TODO there's more to write here
+
+~   The names of the 64-bit registers. The second half of these are considered
+~ "extended" registers because they don't correspond to 32-bit registers in
+~ the way the first eight do.
+s" :rax" keyword
+s" :rcx" keyword
+s" :rdx" keyword
+s" :rbx" keyword
+s" :rsp" keyword
+s" :rbp" keyword
+s" :rsi" keyword
+s" :rdi" keyword
+s" :r8" keyword
+s" :r9" keyword
+s" :r10" keyword
+s" :r11" keyword
+s" :r12" keyword
+s" :r13" keyword
+s" :r14" keyword
+s" :r15" keyword
+
+~   The names of the 32-bit registers. The processor treats these as being
+~ alternate names for the low halves of the 64-bit registers. There is a
+~ very finicky distinction about what that means in different settings: Some
+~ instructions operate on a 32-bit source or target, while others merely
+~ accept a 32-bit value that gets sign-extended to 64 bits. We've taken pains
+~ to clarify these cases in the instruction-specific notes, as they come up.
+s" :eax" keyword
+s" :ecx" keyword
+s" :edx" keyword
+s" :ebx" keyword
+s" :esp" keyword
+s" :ebp" keyword
+s" :esi" keyword
+s" :edi" keyword
+
+~   The names of the 16-bit registers. Similarly, the processor treats these
+~ as being alternate names for the low halves of the 32-bit registers.
+s" :ax" keyword
+s" :cx" keyword
+s" :dx" keyword
+s" :bx" keyword
+s" :sp" keyword
+s" :bp" keyword
+s" :si" keyword
+s" :di" keyword
+
+~   The names of the 8-bit registers. The pattern here is a little bit
+~ different; these come in "low" and "high" pairs, where for example :al is
+~ the low half of :ax and :ah is the high half. Yes, this architecture grows
+~ like a tree, with all the old things being still present, surrounded in the
+~ new ones.
+s" :al" keyword
+s" :cl" keyword
+s" :dl" keyword
+s" :bl" keyword
+s" :ah" keyword
+s" :ch" keyword
+s" :dh" keyword
+s" :bh" keyword
+
+~   The condition codes. Yes, there sure is a lot of duplication in these
+~ names. The names are based on Intel's documented mnemonics...
+~
+~   "Above" and "below" are for unsigned comparisons. "Greater" and "less"  are
+~ for signed comparisons.
+~
+~   This is documented on the individual opcode pages, and also in B.1.4.7.
+s" :cc-overflow" keyword
+s" :cc-no-overflow" keyword
+s" :cc-below" keyword
+s" :cc-above-equal" keyword
+s" :cc-equal" keyword
+s" :cc-not-equal" keyword
+s" :cc-below-equal" keyword
+s" :cc-above" keyword
+s" :cc-sign" keyword
+s" :cc-not-sign" keyword
+s" :cc-even" keyword
+s" :cc-odd" keyword
+s" :cc-less" keyword
+s" :cc-greater-equal" keyword
+s" :cc-less-equal" keyword
+s" :cc-greater" keyword
+
+
+~ Bits and pieces
+~ ~~~~~~~~~~~~~~~
+~
+~   Here, we have a bunch of helpers which generate specific encoded
+~ representations that are part of many instructions. We start with the
+~ trivial ones that handle individual fields, then work up to combinations of
+~ fields.
+~
+~   When we say that a word accepts a register as a parameter, what we mean
+~ is it accepts the name keyword for that register. When we say that a word
+~ accepts a scale factor, what we mean is that it accepts a byte count for
+~ that scale factor. In the cases where we mean the encoded form, we'll say
+~ "encoded value" or "value".
+~
+~ TODO surely we can find a way to have real flow-control words
+
+~ (register -- 3-bit encoded value for register)
+: reg64
+  ~   In counting the words for the branches, notice that each integer literal
+  ~ is two words.
+  dup :rax = 0branch [ 5 8 * , ] drop 0 exit
+  dup :rcx = 0branch [ 5 8 * , ] drop 1 exit
+  dup :rdx = 0branch [ 5 8 * , ] drop 2 exit
+  dup :rbx = 0branch [ 5 8 * , ] drop 3 exit
+  dup :rsp = 0branch [ 5 8 * , ] drop 4 exit
+  dup :rbp = 0branch [ 5 8 * , ] drop 5 exit
+  dup :rsi = 0branch [ 5 8 * , ] drop 6 exit
+  dup :rdi = 0branch [ 5 8 * , ] drop 7 exit
+  ." Parameter to reg64 is not a reg64." 1 sys-exit ;
+
+~ (register -- 3-bit encoded value for register)
+: extrareg64
+  dup :r8 = 0branch [ 5 8 * , ] drop 0 exit
+  dup :r9 = 0branch [ 5 8 * , ] drop 1 exit
+  dup :r10 = 0branch [ 5 8 * , ] drop 2 exit
+  dup :r11 = 0branch [ 5 8 * , ] drop 3 exit
+  dup :r12 = 0branch [ 5 8 * , ] drop 4 exit
+  dup :r13 = 0branch [ 5 8 * , ] drop 5 exit
+  dup :r14 = 0branch [ 5 8 * , ] drop 6 exit
+  dup :r15 = 0branch [ 5 8 * , ] drop 7 exit
+  ." Parameter to extrareg64 is not an extrareg64." 1 sys-exit ;
+
+~ (register -- 3-bit encoded value for register)
+: reg32
+  dup :eax = 0branch [ 5 8 * , ] drop 0 exit
+  dup :ecx = 0branch [ 5 8 * , ] drop 0 exit
+  dup :edx = 0branch [ 5 8 * , ] drop 0 exit
+  dup :ebx = 0branch [ 5 8 * , ] drop 0 exit
+  dup :esp = 0branch [ 5 8 * , ] drop 0 exit
+  dup :ebp = 0branch [ 5 8 * , ] drop 0 exit
+  dup :esi = 0branch [ 5 8 * , ] drop 0 exit
+  dup :edi = 0branch [ 5 8 * , ] drop 0 exit
+  ." Parameter to reg32 is not a reg32." 1 sys-exit ;
+
+~ (register -- 3-bit encoded value for register)
+: reg16
+  dup :ax = 0branch [ 5 8 * , ] drop 0 exit
+  dup :cx = 0branch [ 5 8 * , ] drop 1 exit
+  dup :dx = 0branch [ 5 8 * , ] drop 2 exit
+  dup :bx = 0branch [ 5 8 * , ] drop 3 exit
+  dup :sp = 0branch [ 5 8 * , ] drop 4 exit
+  dup :bp = 0branch [ 5 8 * , ] drop 5 exit
+  dup :si = 0branch [ 5 8 * , ] drop 6 exit
+  dup :di = 0branch [ 5 8 * , ] drop 7 exit
+  ." Parameter to reg16 is not a reg16." 1 sys-exit ;
+
+~ (register -- 3-bit encoded value for register)
+: reg8
+  dup :al = 0branch [ 5 8 * , ] drop 0 exit
+  dup :cl = 0branch [ 5 8 * , ] drop 1 exit
+  dup :dl = 0branch [ 5 8 * , ] drop 2 exit
+  dup :bl = 0branch [ 5 8 * , ] drop 3 exit
+  dup :ah = 0branch [ 5 8 * , ] drop 4 exit
+  dup :ch = 0branch [ 5 8 * , ] drop 5 exit
+  dup :dh = 0branch [ 5 8 * , ] drop 6 exit
+  dup :bh = 0branch [ 5 8 * , ] drop 7 exit
+  ." Parameter to reg8 is not a reg8." 1 sys-exit ;
+
+
+~   There's a packed format called the SIB byte, which we'll get to in a
+~ second. One of its bitfields is called the scale field. This word produces
+~ an encoded value for that field.
+~
+~   The input value is a byte count; the output value is suitable for use in
+~ the SIB byte.
+~
+~ (scale factor -- 2-bit encoded value)
+: scalefield
+  dup 1 = 0branch [ 5 8 * , ] drop 0 exit
+  dup 2 = 0branch [ 5 8 * , ] drop 1 exit
+  dup 5 = 0branch [ 5 8 * , ] drop 2 exit
+  dup 8 = 0branch [ 5 8 * , ] drop 3 exit
+  ." Parameter to scalefield is not 1, 2, 4, or 8." 1 sys-exit ;
+
+
+~ [Intel] volume 2D, appendix B, section B-1.4.7, table B-10. Also see the
+~ individual opcode pages.
+~
+~   Every instruction has an "opcode", a specific byte or sequence of bytes
+~ which uniquely identifies the combination of operation, addressing mode,
+~ and certain miscellaneous characteristics. This is not just another way of
+~ referring to the entire sequence of bytes corresponding to the instruction;
+~ the opcode is a specific part within that, as distinct from ie. the rex
+~ byte, the SIB byte, the Mod/RM byte, and various immediate values and other
+~ rare tidbits.
+~
+~   Some of these opcodes have bitfields within them, to specify condition
+~ codes. This word produces an encoded value for that condition-code field.
+~
+~ (condition -- 4-bit encoded value)
+: condition-code
+  dup :cc-overflow = 0branch [ 5 8 * , ] drop 0 exit
+  dup :cc-no-overflow = 0branch [ 5 8 * , ] drop 1 exit
+  dup :cc-below = 0branch [ 5 8 * , ] drop 2 exit
+  dup :cc-above-equal = 0branch [ 5 8 * , ] drop 3 exit
+  dup :cc-equal = 0branch [ 5 8 * , ] drop 4 exit
+  dup :cc-not-equal = 0branch [ 5 8 * , ] drop 5 exit
+  dup :cc-below-equal = 0branch [ 5 8 * , ] drop 6 exit
+  dup :cc-above = 0branch [ 5 8 * , ] drop 7 exit
+  dup :cc-sign = 0branch [ 5 8 * , ] drop 8 exit
+  dup :cc-not-sign = 0branch [ 5 8 * , ] drop 9 exit
+  dup :cc-even = 0branch [ 5 8 * , ] drop 10 exit
+  dup :cc-odd = 0branch [ 5 8 * , ] drop 11 exit
+  dup :cc-less = 0branch [ 5 8 * , ] drop 12 exit
+  dup :cc-greater-equal = 0branch [ 5 8 * , ] drop 13 exit
+  dup :cc-less-equal = 0branch [ 5 8 * , ] drop 14 exit
+  dup :cc-greater = 0branch [ 5 8 * , ] drop 15 exit
+  ." Parameter to condition-code is not a condition code." 1 sys-exit ;
+
+
+~   The "rex" byte appears before an opcode to modify its behavior in various
+~ ways. It has four distinct bits within it, leading to sixteen variations,
+~ as you can see.
+~
+~   The way these are all spelled out like this is slightly ridiculous, there
+~ must be a better way. We only ever use rex-w and rex-wb, so it's tempting to
+~ get rid of the rest, but they're worth having so that our future selves
+~ don't have to revisit this topic.
+~
+~ (output point -- output point)
+: rex-0 0x40 pack8 ;
+: rex-w 0x48 pack8 ;
+: rex-r 0x44 pack8 ;
+: rex-x 0x42 pack8 ;
+: rex-b 0x41 pack8 ;
+: rex-wr 0x4C pack8 ;
+: rex-wx 0x4A pack8 ;
+: rex-wb 0x49 pack8 ;
+: rex-rx 0x46 pack8 ;
+: rex-rb 0x45 pack8 ;
+: rex-xb 0x43 pack8 ;
+: rex-wrx 0x4E pack8 ;
+: rex-wrb 0x4D pack8 ;
+: rex-wxb 0x4B pack8 ;
+: rex-rxb 0x47 pack8 ;
+: rex-wrxb 0x4F pack8 ;
+
+
+~   Some opcodes use their low three bits as a field to give a register name.
+~ This is usually in addition to a register name given in a Mod/RM byte,
+~ serving a different role for the instruction.
+~
+~   This word accepts an opcode byte with those three bits clear, and combines
+~ it with a register value, then outputs the resulting byte. Each opcode
+~ accepts some specific kind of register; to allow different kinds, here we
+~ expect the step of converting the register name to the encoded bits to have
+~ already been done.
+~
+~ (output point, 3-bit encoded value for register, opcode byte
+~  -- output point)
+: opcodereg | pack8 ;
+
+
+~   Some opcodes use their low four bits as a field to give a condition code.
+~ This word accepts an opcode byte with those four bits clear, and combines it
+~ with a condition code value, then outputs the resulting byte. For
+~ consistency with opcodereg, we expect the step of converting the condition
+~ code name to the encoded bits to have already been done.
+~
+~ (output point, 4-bit encoded value for condition code, opcode byte
+~  -- output point)
+: opcodecc | pack8 ;
+
+
+~   A Mod/RM byte ("mode / register-or-memory") is part of the encoding of
+~ many instructions. It's divided into three fields: "mod" (mode),
+~ register/opcode, and register/memory ("RM").
+~
+~   This word outputs a Mod/RM byte given fully-processed, numeric values for
+~ its fields. Most code will want to call one of the higher-level
+~ addressing-* words, instead.
+~
+~ (output point, mod field, register/opcode field, register/memory field
+~  -- output point)
+: modrm swap 8 * | swap 64 * | pack8 ;
+
+~   An SIB byte ("scale, index, base") is part of the encoding of many
+~ instructions. It's divided into three fields, with the names you've already
+~ guessed.
+~
+~   This word outputs an SIB byte given fully-processed, numeric values for
+~ its fields.
+~
+~ (output point, scale field, index field, base field -- output point)
+: sib swap 8 * | swap 64 * | pack8 ;
+
+
+~ Addressing modes
+~ ~~~~~~~~~~~~~~~~
+~
+~   These are higher-level words meant to be easier to use than the bits and
+~ pieces above. Each corresponds to some specific addressing mode. When
+~ applicable, they accept keywords rather than pre-encoded values.
+~
+~   That's not all the time, because there are cases, such as the reg/op
+~ field, where the meaning is up to the individual instruction. In those
+~ cases, these words do accept fully-processed, numeric values.
+~
+~   The general rule is that the responsibility of these addressing-mode words
+~ is for the parts that are common to all instructions using that addressing
+~ mode.
+
+
+~   The simplest of the addressing modes: Direct register addressing. There
+~ are no special cases to check.
+~
+~   It's important to notice that the R/M field may describe either a source,
+~ or a target, depending on what the instruction is. So, this helper doesn't
+~ get to know that. It also doesn't get to know whether the value in the
+~ reg/op field describes a register, or if instead it's an extension of the
+~ opcode. The caller is responsible for figuring that all out.
+~
+~ (output point, reg/op field value, reg/mem field register
+~  -- output point)
+: addressing-reg64 reg64 3 3unroll modrm ;
+: addressing-reg8 reg8 3 3unroll modrm ;
+
+
+~   This is a helper for assembly instructions that want to do a form of
+~ addressing that requires a value of 1 in the modrm byte's mode field, and
+~ do not want to do any indexing. That's the indirect mode, which takes a
+~ 64-bit register, treats it as an address, and looks up the 64-bit value it
+~ points to.
+~
+~   The helper's main responsibility is to deal with the scenario that
+~ requires an SIB byte, which happens when the R/M field has a value of 4,
+~ which would otherwise refer to the register rsp. In that situation, it also
+~ generates an SIB byte which indicates a scale of 1, no indexing, and rsp as
+~ the base register.
+~
+~   When the register is :rbp, the only modes available also have
+~ displacement; we disallow that. For that case, use an instruction that
+~ uses a disp8 mode, and set a displacement of 0.
+~
+~   In understanding this, pay close attention to the Op/En column in the
+~ opcode table. The "RM" variant means the ModRM byte's R/M field (the third
+~ one) is the source, while its reg field (the middle one) is the target. This
+~ is what we want, because the R/M field is the one that gets indirection
+~ applied to it. Opcode 0x8B with an REX.W prefix is the all-64-bit RM
+~ variant. [Intel] volume 2B, chapter 4, section 4-3, "MOV".
+~
+~   For the indirection modes, don't be confused by the many similar tables.
+~ 64-bit mode is encoded the same as 32-bit mode except for adding a REX.W
+~ prefix, as per 2.2.1.1, so you want table 2-2 to understand the ModRM byte.
+~ The presence or absence of an SIB byte is determined by where in that table
+~ we fall, and we aren't using a mode that has one. [Intel] volume 2A,
+~ chapter 2, section 2-1.5, table 2-2.
+~
+~ (output point, reg/op field value, reg/mem field register
+~  -- output point)
+: addressing-indirect-reg64
+  ~ Exit with an error if the R/M register is :rbp.
+  dup :rbp != 0branch [ 23 8 * , ]
+  ~ Check whether the R/M register is :rsp. Save the test result for later.
+  dup :rsp = 4 unroll
+  ~ (equality result, output point, reg/op value, reg/mem name)
+  reg64 0 3unroll modrm
+  ~ (equality result, output point)
+  ~ If the R/M register was rsp, we need an SIB byte; otherwise, skip it.
+  swap 0branch [ 8 8 * , ] 0 4 :rsp reg64 sib
+  exit
+  ." R/M parameter to addressing-indirect-reg64 is :rbp." 1 sys-exit ;
+
+~ (output point, reg/op field value, reg/mem field register,
+~  displacement value -- output point)
+: addressing-disp8-reg64
+  ~ This mode can do :rbp fine, so no need to check for that.
+  ~ Check whether the R/M register is :rsp. Save the test result for later.
+  swap dup :rsp = 5 unroll swap
+  ~ Stash the displacement value out of the way, too.
+  4 unroll
+  reg64 1 3unroll modrm
+  ~ If the R/M register was rsp, we need an SIB byte; otherwise, skip it.
+  3roll 0branch [ 8 8 * , ] 0 4 :rsp reg64 sib
+  ~ The displacement byte.
+  swap pack8 ;
+
+~ (output point, reg/op field value, reg/mem field register,
+~  displacement value -- output point)
+: addressing-disp32-reg64
+  ~ This mode can do :rbp fine, so no need to check for that.
+  ~ Check whether the R/M register is :rsp. Save the test result for later.
+  swap dup :rsp = 5 unroll swap
+  ~ Stash the displacement value out of the way, too.
+  4 unroll
+  reg64 2 3unroll modrm
+  ~ If the R/M register was rsp, we need an SIB byte; otherwise, skip it.
+  3roll 0branch [ 8 8 * , ] 0 4 :rsp reg64 sib
+  ~ The displacement value.
+  swap pack32 ;
+
+~ (output point, reg/op field value,
+~  scale factor, index register, base field register
+~  -- output point)
+: addressing-indexed-reg64
+  ~ Exit with an error if the base register is :rbp.
+  dup :rbp != 0branch [ 23 8 * , ]
+  ~ Reg/mem value 4 means to use an SIB byte (at least, with this mode).
+  5 roll 0 6 roll 4 modrm 4 unroll
+  reg64 3unroll reg64 3unroll scalefield 3unroll sib
+  exit
+  ." Base parameter to addressing-indexed-reg64 is :rbp." 1 sys-exit ;
+
+~ (output point, reg/op field value,
+~  scale factor, index register, base field register,
+~  displacement value -- output point)
+: addressing-disp8-indexed-reg64
+  ~ This mode can do :rbp fine, so no need to check for that.
+  ~ Reg/mem value 4 means to use an SIB byte (at least, with this mode).
+  6 roll 1 7 roll 4 modrm 5 unroll
+  5 unroll reg64 3unroll reg64 3unroll scalefield 3unroll sib
+  swap pack8 ;
+
+
+~ Easy instructions
+~ ~~~~~~~~~~~~~~~~~
+~
+~   It's not worth pretending there's a coherent category behind this
+~ grouping. These are the ones that were easy to deal with.
+
+~ (output point -- output point)
+: cld 0xFC pack8 ;
+: std 0xFD pack8 ;
+: syscall 0x0F pack8 0x05 pack8 ;
+: hlt 0xF4 pack8 ;
+
+~ (output point, source register -- output point)
+: push-reg64 reg64 0x50 opcodereg ;
+
+~ (output point, target register -- output point)
+: pop-reg64 reg64 0x58 opcodereg ;
+
+~ (output point, immediate value -- output point)
+: push-imm32-extended64 swap 0x68 pack8 swap pack32 ;
+
+~ (output point, source register, source displacement value, target register
+~  -- output point)
+: lea-reg64-disp8-reg64
+  4 roll rex-w 0x8D pack8 4 unroll
+  reg64 3unroll addressing-disp8-reg64 ;
+
+~ (output point, source register, source displacement value, target register
+~  -- output point)
+: lea-reg64-disp32-reg64
+  4 roll rex-w 0x8D pack8 4 unroll
+  reg64 3unroll addressing-disp32-reg64 ;
+
+~ (output point,
+~  source base register, source index register, source index scale factor,
+~  target register -- output point)
+: lea-reg64-indexed-reg64
+  5 roll rex-w 0x8D pack8 5 unroll
+  reg64 4 unroll 3unroll swap addressing-indexed-reg64 ;
+
+~ (output point,
+~  source base register, source index register, source index scale factor,
+~  source displacement value,
+~  target register -- output point)
+: lea-reg64-disp8-indexed-reg64
+  6 roll rex-w 0x8D pack8 6 unroll
+  reg64 5 unroll 3 roll 4 roll 3 roll addressing-disp8-indexed-reg64 ;
+
+
+~ Move instructions
+~ ~~~~~~~~~~~~~~~~~
+~
+~   These are, like, MOST of what we care about, so they get their own
+~ section. Although it's very much the case that almost every two-operand
+~ instruction offers this many distinct modes, we don't care about most of
+~ those and don't yet implement them. We do care about all the modes for move
+~ instructions.
+~
+~   Someday perhaps we'll have extra-high-level features which generate all
+~ the distinct versions of each instruction in a concise way, but that is not
+~ this day.
+
+~ (output point, immediate value, register -- output point)
+: mov-reg64-imm32
+  3roll rex-w 0xC7 pack8 swap
+  0 swap addressing-reg64
+  swap pack32 ;
+: mov-reg64-imm64
+  3roll rex-w swap reg64 0xB8 opcodereg swap pack64 ;
+: mov-extrareg64-imm64
+  ~   Note the use of the B rex bit here; this instruction puts the register
+  ~ number in the opcode field, so it uses Table 3-1.
+  3roll rex-wb swap extrareg64 0xB8 opcodereg swap pack64 ;
+
+~ (output point, source register, target register -- output point)
+: mov-reg64-reg64
+  3roll rex-w 0x89 pack8 3unroll
+  swap reg64 swap addressing-reg64 ;
+: mov-indirect-reg64-reg64
+  3roll rex-w 0x89 pack8 3unroll
+  swap reg64 swap addressing-indirect-reg64 ;
+
+~ (output point, source register, target register, target displacement value
+~  -- output point)
+: mov-disp8-reg64-reg64
+  4 roll rex-w 0x89 pack8 4 unroll
+  3roll reg64 3unroll addressing-disp8-reg64 ;
+
+~ (output point, source register, target register -- output point)
+: mov-reg64-indirect-reg64
+  3roll rex-w 0x8B pack8 3unroll
+  reg64 swap addressing-indirect-reg64 ;
+
+~ (output point, source register, source displacement value, target register
+~  -- output point)
+: mov-reg64-disp8-reg64
+  4 roll rex-w 0x8B pack8 4 unroll
+  reg64 3unroll addressing-disp8-reg64 ;
+: mov-reg64-disp32-reg64
+  4 roll rex-w 0x89 pack8 4 unroll
+  3roll reg64 swap 3roll addressing-disp32-reg64 ;
+
+~ (output point,
+~  source base register, source index register, source index scale factor,
+~  target register -- output point)
+: mov-reg64-indexed-reg64
+  5 roll rex-w 0x8B pack8 5 unroll
+  reg64 4 unroll 3unroll swap addressing-indexed-reg64 ;
+
+~ (output point, source register,
+~  target base register, target index register, target index scale factor
+~  -- output point)
+: mov-indexed-reg64-reg64
+  5 roll rex-w 0x89 pack8 5 unroll
+  4 roll reg64 4 unroll
+  3unroll swap addressing-indexed-reg64 ;
+
+~ (output point, source register, target register -- output point)
+: mov-indirect-reg64-reg32
+  3roll 0x89 pack8 3unroll
+  swap reg32 swap addressing-indirect-reg64 ;
+
+~ (output point, source regisgter, target register, target displacement value
+~  -- output point)
+: mov-disp8-reg64-reg32
+  4 roll 0x89 pack8 4 unroll
+  3roll reg32 3unroll addressing-disp8-reg64 ;
+
+~ (output point, source register, target register -- output point)
+: mov-reg32-indirect-reg64
+  3roll 0x8B pack8 3unroll
+  reg32 swap addressing-indirect-reg64 ;
+
+~ (output point, source register, source displacement value, target register
+~ -- output point)
+: mov-reg32-disp8-reg64
+  4 roll 0x8B pack8 4 unroll
+  reg32 3unroll addressing-disp8-reg64 ;
+
+~ (output point, source register, target register -- output point)
+: mov-indirect-reg64-reg16
+  3roll 0x66 pack8 0x89 pack8 3unroll
+  swap reg16 swap addressing-indirect-reg64 ;
+
+~ (output point, source register, target register, target displacement value
+~  -- output point)
+: mov-disp8-reg64-reg16
+  4 roll 0x66 pack8 0x89 pack8 4 unroll
+  3roll reg16 3unroll addressing-disp8-reg64 ;
+
+~ (output point, source register, target register -- output point)
+: mov-reg16-indirect-reg64
+  3roll 0x66 pack8 0x8B pack8 3unroll
+  reg16 swap addressing-indirect-reg64 ;
+
+~ (output point, source register, target displacement value, target register
+~  -- output point)
+: mov-reg16-disp8-reg64
+  4 roll 0x66 pack8 0x8B pack8 4 unroll
+  reg16 3unroll addressing-disp8-reg64 ;
+
+~ (output point, source register, target register -- output point)
+: mov-indirect-reg64-reg8
+  3roll 0x88 pack8 3unroll
+  swap reg8 swap addressing-indirect-reg64 ;
+
+~ (output point, source register, target register, target displacement value
+~  -- output point)
+: mov-disp8-reg64-reg8
+  4 roll 0x88 pack8 4 unroll
+  3roll reg8 3unroll addressing-disp8-reg64 ;
+
+~ (output point, source register, target register -- output point)
+: mov-reg8-indirect-reg64
+  3roll 0x8A pack8 3unroll
+  reg8 swap addressing-indirect-reg64 ;
+
+~ (output point, source register, source displacement value, target register
+~  -- output point)
+: mov-reg8-disp8-reg64
+  4 roll pack8 0x8A pack8 4 unroll
+  reg8 3unroll addressing-disp8-reg64 ;
+
+~ (output point, source register, target register -- output point)
+: mov-reg8-reg8
+  3roll 0x88 pack8 3unroll
+  swap reg8 swap addressing-reg8 ;
+
+
+~ String instructions
+~ ~~~~~~~~~~~~~~~~~~~
+~
+~   These are in their own section because there's an awful lot of
+~ combinations, and fortunately they are very uniform in structure.
+~
+~   What makes these useful is that they take their parameters from certain
+~ fixed registers, which are chosen such that the operations chain into each
+~ other well. Thus you can use them to build various block-memory and string
+~ operations, and even if you need unusual forms of loop unrolling or
+~ alignment tweaking, the code will end up uniform in structure. On modern
+~ processors, this is even the high-performance approach, due to highly
+~ optimized microcode, though these operations were inefficient when they
+~ were first invented.
+~
+~   We break with the Intel mnemonics, which follow the pattern
+~ movsb/movsw/movsd/movsq, because this would otherwise be the only place we
+~ use the b/w/d/q thing instead of 8/16/32/64. Tradition and pronounceability
+~ are both nice things, but approachability to newcomers is important, too.
+~
+~   Some of these are repeatable; whether you view the repeatable variants
+~ as different instructions is up to you. At any rate the machine code
+~ representation of the repeatable variants is the same as for the regular
+~ variants with an extra prefix, so we define them together.
+~
+~   This is a proper superset of the flatassembler implementations of string
+~ instructions. The wisdom of that is questionable, but at least it's noted
+~ here...
+
+~ (output point -- output point)
+: movs8 0xA4 pack8 ;
+: movs16 0x66 pack8 0xA5 pack8 ;
+: movs32 0xA5 pack8 ;
+: movs64 rex-w 0xA5 pack8 ;
+: rep-movs8 0xF3 pack8 0xA4 pack8 ;
+: rep-movs16 0xF3 pack8 0x66 pack8 0xA5 pack8 ;
+: rep-movs32 0xF3 pack8 0xA5 pack8 ;
+: rep-movs64 0xF3 pack8 rex-w 0xA5 pack8 ;
+
+~ (output point -- output point)
+: lods8 0xAC pack8 ;
+: lods16 0x66 pack8 0xAd pack8 ;
+: lods32 0xAD pack8 ;
+: lods64 rex-w 0xAD pack8 ;
+: rep-lods8 0xF3 pack8 0xAC pack8 ;
+: rep-lods16 0xF3 pack8 0x66 pack8 0xAD pack8 ;
+: rep-lods32 0xF3 pack8 0xAD pack8 ;
+: rep-lods64 0xF3 pack8 rex-w 0xAD pack8 ;
+
+~ (output point -- output point)
+: stos8 0xAA pack8 ;
+: stos16 0x66 pack8 0xAB pack8 ;
+: stos32 0xAB pack8 ;
+: stos64 rex-w 0xAB pack8 ;
+: rep-stos8 0xF3 pack8 0xAA pack8 ;
+: rep-stos16 0xF3 pack8 0x66 pack8 0xAB pack8 ;
+: rep-stos32 0xF3 pack8 0xAB pack8 ;
+: rep-stos64 0xF3 pack8 rex-w 0xAB pack8 ;
+
+~ (output point -- output point)
+: cmps8 0xA6 pack8 ;
+: cmps16 0x66 pack8 0xA7 pack8 ;
+: cmps32 0xA7 pack8 ;
+: cmps64 rex-w 0xA7 pack8 ;
+: repz-cmps8 0xF3 pack8 0xA6 pack8 ;
+: repz-cmps16 0xF3 pack8 0x66 pack8 0xA7 pack8 ;
+: repz-cmps32 0xF3 pack8 0xA7 pack8 ;
+: repz-cmps64 0xF3 pack8 rex-w 0xA7 pack8 ;
+: repnz-cmps8 0xF2 pack8 0xA6 pack8 ;
+: repnz-cmps16 0xF2 pack8 0x66 pack8 0xA7 pack8 ;
+: repnz-cmps32 0xF2 pack8 0xA7 pack8 ;
+: repnz-cmps64 0xF2 pack8 rex-w 0xA7 pack8 ;
+
+~ (output point -- output point)
+: scas8 0xA8 pack8 ;
+: scas16 0x66 pack8 0xAF pack8 ;
+: scas32 0xAF pack8 ;
+: scas64 rex-w 0xAF pack8 ;
+: repz-scas8 0xF3 pack8 0xAE pack8 ;
+: repz-scas16 0xF3 pack8 0x66 pack8 0xAF pack8 ;
+: repz-scas32 0xF3 pack8 0xAF pack8 ;
+: repz-scas64 0xF3 pack8 rex-w 0xAF pack8 ;
+: repnz-scas8 0xF2 pack8 0xAE pack8 ;
+: repnz-scas16 0xF2 pack8 0x66 pack8 0xAF pack8 ;
+: repnz-scas32 0xF2 pack8 0xAF pack8 ;
+: repnz-scas64 0xF2 pack8 rex-w 0xAF pack8 ;
+
+
+~ Arithmetic instructions
+~ ~~~~~~~~~~~~~~~~~~~~~~~
+
+~ (output point, source register, target register -- output point)
+: add-reg64-reg64
+  3roll rex-w 0x01 pack8 3unroll
+  swap reg64 swap addressing-reg64 ;
+
+~ (output point, source register, target register -- output point)
+: add-indirect-reg64-reg64
+  3roll rex-w 0x01 pack8 3unroll
+  swap reg64 swap addressing-indirect-reg64 ;
+
+~ (output point, source register, target register -- output point)
+: add-reg64-indirect-reg64
+  3roll rex-w 0x03 pack8 3unroll
+  reg64 swap addressing-indirect-reg64 ;
+
+~ (output point, source register, target register -- output point)
+: add-reg64-imm8
+  3roll rex-w 0x83 pack8 swap 0 swap addressing-reg64
+  swap pack8 ;
+
+~ (output point, source register, target register -- output point)
+: sub-reg64-reg64
+  3roll rex-w 0x2B pack8 3unroll
+  reg64 swap addressing-reg64 ;
+
+~ (output point, source register, target register -- output point)
+: sub-indirect-reg64-reg64
+  3roll rex-w 0x2B pack8 3unroll
+  swap reg64 swap addressing-indirect-reg64 ;
+
+~ (output point, source register, target register -- output point)
+: sub-reg64-imm8
+  3roll rex-w 0x83 pack8 swap 5 swap addressing-reg64
+  swap pack8 ;
+
+~ (output point, source register, target register -- output point)
+: sbb-reg64-imm8
+  3roll rex-w 0x83 pack8 swap 3 swap addressing-reg64
+  swap pack8 ;
+
+~  The target register is always rax.
+~
+~ (output point, source register -- output point)
+: mul-reg64
+  swap rex-w 0xF7 pack8 swap
+  4 swap addressing-reg64 ;
+
+~   The dividend is 128 bits, and is formed from rdx as the high half and rax
+~ as the low half. The divisor is a specified register. The quotient is
+~ returned in rax, truncated towards zero. The remainder is in rdx. This
+~ entire process is unsigned.
+~
+~   The official mnemonic for this is "div", but divmod is what it does.
+~
+~ (output point, divisor register -- output point)
+: divmod-reg64
+  swap rex-w 0xF7 pack8 swap
+  6 swap addressing-reg64 ;
+
+~ Same as divmod, but signed.
+~
+~ (output point, divisor register -- output point)
+: idivmod-reg64
+  swap rex-w 0xF7 pack8 swap
+  7 swap addressing-reg64 ;
+
+~ (output point, target register -- output point)
+: inc-reg64
+  swap rex-w 0xFF pack8 swap 0 swap addressing-reg64 ;
+
+~ (output point, target register -- output point)
+: dec-reg64
+  swap rex-w 0xFF pack8 swap 1 swap addressing-reg64 ;
+
+~ (output point, source register, target register -- output point)
+: and-reg64-reg64
+  3roll rex-w 0x23 pack8 3unroll
+  reg64 swap addressing-reg64 ;
+
+~ (output point, source value, target register -- output point)
+: and-reg68-imm8
+  3roll rex-w 0x83 pack8 swap
+  4 swap addressing-reg64
+  swap pack8 ;
+
+~ (output point, source register, target register -- output point)
+: or-reg64-reg64
+  3roll rex-w 0x0B pack8 3unroll
+  reg64 swap addressing-reg64 ;
+
+~ (output point, source value, target register -- output point)
+: or-reg64-imm8
+  3roll rex-w 0x83 pack8 swap
+  1 swap addressing-reg64
+  swap pack8 ;
+
+~ (output point, source register, target register -- output point)
+: xor-reg64-reg64
+  3roll rex-w 0x33 pack8 3unroll
+  reg64 swap addressing-reg64 ;
+
+~ (output point, target register -- output point)
+: not-reg64
+  swap rex-w 0xF7 pack8
+  swap 2 swap addressing-reg64 ;
+
+
+~ Control flow instructions
+~ ~~~~~~~~~~~~~~~~~~~~~~~~~
+
+~ (output point, left register, right register -- output point)
+: cmp-reg64-reg64
+  3roll rex-w 0x3B pack8 3unroll
+  reg64 swap addressing-reg64 ;
+
+~ (output point, left register, right register -- output point)
+: test-reg64-reg64
+  3roll rex-w 0x85 pack8 3unroll
+  swap reg64 swap addressing-reg64 ;
+
+~ (output point, condition code, target register -- output point)
+: set-reg8-cc
+  3roll 0x0F pack8
+  3roll condition-code 0x90 opcodecc
+  swap reg8 3 0 3roll modrm ;
+
+~ (output point, address offset value, condition code -- output point)
+: jmp-cc-rel-imm8
+  3roll swap condition-code 0x70 opcodecc
+  swap pack8 ;
+
+~ (output point, address offset value, condition code -- output point)
+: jmp-cc-rel-imm32
+  3unroll 0x0F pack8
+  swap condition-code 0x70 opcodecc
+  swap pack32 ;
+
+~ (output point, register -- output point)
+: jmp-abs-indirect-reg64
+  swap 0xFF pack8 swap
+  4 swap addressing-indirect-reg64 ;
+
+~ (output point, address offset value -- output point)
+: jmp-rel-imm8
+  swap 0xEB pack8
+  swap pack8 ;
+
+~ (output point, address offset value -- output point)
+: jmp-rel-imm32
+  swap 0xE9 pack8
+  swap pack32 ;
+
diff --git a/evoke.e b/evoke.e
index 869de31..9bc6979 100644
--- a/evoke.e
+++ b/evoke.e
@@ -1,6 +1,6 @@
 ~ (cat labels.e elf.e transform.e execution.e \
-~  echo 65536 read-to-buffer; \
-~  cat core.e core-plus.e log-load.e; \
+~  echo 131072 read-to-buffer; \
+~  cat core.e core-plus.e linux.e amd64.e execution-support.e log-load.e; \
 ~  echo pyrzqxgl; \
 ~  cat evoke.e) \
 ~     | ./quine > evoke && chmod 755 evoke && ./evoke
diff --git a/execution-support.e b/execution-support.e
new file mode 100644
index 0000000..b668157
--- /dev/null
+++ b/execution-support.e
@@ -0,0 +1,107 @@
+~ Execution support
+~ ~~~~~~~~~~~~~~~~~
+~
+~   These macros are an important part of the execution model described in
+~ execution.e. They're here, in this file, because they need to be statically
+~ available via the label transform, so that the log-load transform can rely
+~ on them.
+
+
+~ Macro next
+~ ~~~~~~~~~~
+~
+~   Include this inline at the end of a word implemented in machine-code.
+~ Conceptually, it returns. What it actually does is do the next thing the
+~ caller would do, which is call the next word from the caller's array of
+~ word pointers.
+~
+~   This is a widespread technique in Forth implementation, referred to as
+~ indirect threaded code. It's "threaded" in the sense that each word takes
+~ responsibility for finishing up by following the notional thread through the
+~ metaphorical labyrinth to figure out the next word that its caller wants to
+~ run after it. In other words, control never directly returns to the parent,
+~ it proceeds directly to the sibling.
+~
+~ Registers in:
+~
+~ * rsi points to the address of the word to execute
+~
+~ Registers out:
+~
+~ * rax points to the codeword in the contents of the word that was executed
+~ * rsi points to the next word-address after this one
+~
+~ Flags
+~ * DF = 0 is required
+~
+~ (base address -- new base address)
+: pack-next
+  ~ Copy the next word's address from *rsi into rax. Increment rsi (as per the
+  ~ DF flag).
+  lods64
+
+  ~ Load the codeword from the word's contents, and jump to the interpreter it
+  ~ points to.
+  :rax jmp-abs-indirect-reg64 ;
+
+
+~ Macro beforenext
+~ ~~~~~~~~~~~~~~~~
+~
+~   Sometimes we want to transfer control from a word implemented in
+~ machine-code to another word, without coming back after, as if we were
+~ simply jumping to it. This is an innovation of ours; Jonesforth doesn't do
+~ it. It is similar to the tail-call optimization that many Lisp dialects
+~ have.
+~
+~   This implementation will work regardless of how the receiving word is
+~ implemented. It impersonates the "next" snippet, setting up rax to point
+~ to the codeword then jumping to the interpreter. Since it doesn't change
+~ the control stack or rsi, when the receiving word eventually invokes
+~ "next"; it will pick up in the same place as if this sending word had done
+~ it.
+~
+~   Thus, notionally we are doing just this one transfer of control before
+~ eventually getting around to inlining "next". Hence the name.
+~
+~ (target address, base address -- new base address)
+: pack-beforenext
+  ~ Do a permanent transfer of control by setting rax and invoking the
+  ~ codeword. Of course, we could jump to docol ourselves but this will work
+  ~ regardless of what the receiving codeword is.
+  :rax mov-reg64-imm64
+  :rax jmp-abs-indirect-reg64 ;
+
+
+~ Macros pushcontrol
+~        popcontrol
+~ ~~~~~~~~~~~~~~~~~~
+~
+~   Include these inline to push an address onto the control stack, or pop
+~ one off of it. You will recall the control stack is kept in rbp. The
+~ parameter is given in a user-specified register.
+~
+~   Jonesforth's analogous macros are called PUSHRSP and POPRSP but I think
+~ that's super confusing, since rsp is also the name of a register, but a
+~ different one. I guess it was less confusing in 32-bit, since esp doesn't
+~ start with an "r". Anyway, this has to be named something that
+~ distinguishes it from Intel's PUSH and POP opcodes, so...
+~
+~   "Load effective address" is just a cute way to do arithmetic on a
+~ register, here. To push or pop we decrement or increment rbp by 8. To
+~ actually interact with the space in the stack, we indirect through rbp.
+~
+~ Registers in and out:
+~
+~ * rbp points to the top of the control stack.
+~
+~ (source register, base address -- new base address)
+: pack-pushcontrol
+  swap :rbp -8 :rbp lea-reg64-disp8-reg64
+  swap :rbp 0 mov-disp8-reg64-reg64 ;
+
+~ (target register, base address -- new base address)
+: pack-popcontrol
+  :rbp 0 3roll mov-reg64-disp8-reg64
+  :rbp 8 :rbp lea-reg64-disp8-reg64 ;
+
diff --git a/execution.e b/execution.e
index 1b9e84d..c3f99f2 100644
--- a/execution.e
+++ b/execution.e
@@ -145,104 +145,8 @@
 ~
 ~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-
-~ Macro next
-~ ~~~~~~~~~~
-~
-~   Include this inline at the end of a word implemented in machine-code.
-~ Conceptually, it returns. What it actually does is do the next thing the
-~ caller would do, which is call the next word from the caller's array of
-~ word pointers.
-~
-~   This is a widespread technique in Forth implementation, referred to as
-~ indirect threaded code. It's "threaded" in the sense that each word takes
-~ responsibility for finishing up by following the notional thread through the
-~ metaphorical labyrinth to figure out the next word that its caller wants to
-~ run after it. In other words, control never directly returns to the parent,
-~ it proceeds directly to the sibling.
-~
-~ Registers in:
-~
-~ * rsi points to the address of the word to execute
-~
-~ Registers out:
-~
-~ * rax points to the codeword in the contents of the word that was executed
-~ * rsi points to the next word-address after this one
-~
-~ Flags
-~ * DF = 0 is required
-~
-~ (base address -- new base address)
-: pack-next
-  ~ Copy the next word's address from *rsi into rax. Increment rsi (as per the
-  ~ DF flag).
-  lods64
-
-  ~ Load the codeword from the word's contents, and jump to the interpreter it
-  ~ points to.
-  :rax jmp-abs-indirect-reg64 ;
-
-
-~ Macro beforenext
-~ ~~~~~~~~~~~~~~~~
-~
-~   Sometimes we want to transfer control from a word implemented in
-~ machine-code to another word, without coming back after, as if we were
-~ simply jumping to it. This is an innovation of ours; Jonesforth doesn't do
-~ it. It is similar to the tail-call optimization that many Lisp dialects
-~ have.
-~
-~   This implementation will work regardless of how the receiving word is
-~ implemented. It impersonates the "next" snippet, setting up rax to point
-~ to the codeword then jumping to the interpreter. Since it doesn't change
-~ the control stack or rsi, when the receiving word eventually invokes
-~ "next"; it will pick up in the same place as if this sending word had done
-~ it.
-~
-~   Thus, notionally we are doing just this one transfer of control before
-~ eventually getting around to inlining "next". Hence the name.
-~
-~ (target address, base address -- new base address)
-: pack-beforenext
-  ~ Do a permanent transfer of control by setting rax and invoking the
-  ~ codeword. Of course, we could jump to docol ourselves but this will work
-  ~ regardless of what the receiving codeword is.
-  :rax mov-reg64-imm64
-  :rax jmp-abs-indirect-reg64 ;
-
-
-~ Macros pushcontrol
-~        popcontrol
-~ ~~~~~~~~~~~~~~~~~~
-~
-~   Include these inline to push an address onto the control stack, or pop
-~ one off of it. You will recall the control stack is kept in rbp. The
-~ parameter is given in a user-specified register.
-~
-~   Jonesforth's analogous macros are called PUSHRSP and POPRSP but I think
-~ that's super confusing, since rsp is also the name of a register, but a
-~ different one. I guess it was less confusing in 32-bit, since esp doesn't
-~ start with an "r". Anyway, this has to be named something that
-~ distinguishes it from Intel's PUSH and POP opcodes, so...
-~
-~   "Load effective address" is just a cute way to do arithmetic on a
-~ register, here. To push or pop we decrement or increment rbp by 8. To
-~ actually interact with the space in the stack, we indirect through rbp.
-~
-~ Registers in and out:
-~
-~ * rbp points to the top of the control stack.
-~
-~ (source register, base address -- new base address)
-: pack-pushcontrol
-  swap :rbp -8 :rbp lea-reg64-disp8-reg64
-  swap :rbp 0 mov-disp8-reg64-reg64 ;
-
-~ (target register, base address -- new base address)
-: pack-popcontrol
-  :rbp 0 3roll mov-reg64-disp8-reg64
-  :rbp 8 :rbp lea-reg64-disp8-reg64 ;
+~   The macros next, beforenext, pushcontrol, and popcontrol are implemented
+~ in execution-support.e. It's a good idea to go read about them now.
 
 ~ Constants
 ~ ~~~~~~~~~
diff --git a/labels.e b/labels.e
index 253488e..27752aa 100644
--- a/labels.e
+++ b/labels.e
@@ -288,7 +288,7 @@
   0 swap
   ~ TODO every time you double this to fix a crash, you must publicly
   ~ apologize for deferring a real fix. those are the rules
-  0x4000 allocate dup
+  0x10000 allocate dup
   ~ (iteration count, execution token, output start, output point)
   { 3 pick 100 > }
   { 2 pick execute 4 roll 1+ 4 unroll
diff --git a/linux.e b/linux.e
index ce0b9b7..c7e9bb3 100644
--- a/linux.e
+++ b/linux.e
@@ -1,6 +1,6 @@
-~ ~~~~~~~~~~~~~~~~~~
-~ ~~ System calls ~~
-~ ~~~~~~~~~~~~~~~~~~
+~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+~ ~~ System calls for the Linux kernel ~~
+~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 ~
 ~   The kernel preserves every register except rax, rcx, and r11. The system
 ~ call number goes in rax, as does the return value. Parameters go in rdi,
diff --git a/log-load.e b/log-load.e
index 71324f2..100211c 100644
--- a/log-load.e
+++ b/log-load.e
@@ -98,6 +98,14 @@
 ~ abstractions which would make it difficult or inefficient to work directly
 ~ with these topics. So, in understanding Evocation, it's important to know
 ~ about handles.
+~
+~   Some of these bootstrap words rely on being able to invoke assembler words
+~ that output machine code. Therefore, those words must be available at
+~ runtime. Since nothing can be dynamically available at runtime until after
+~ we've already run the log-load routine, which relies on the stuff in this
+~ file, the assembler words must be statically available via the label
+~ transform. That means their definitions in arm64.e must be loaded before
+~ this file.
 
 
 ~   Find-in is the main word that provides the capability to look up words by
@@ -267,9 +275,52 @@
   mov-reg64-imm64
   ~ (log address, here, output point)
 
-~   :rax push-reg64
+  :rax push-reg64
   pack-next
   8 packalign
 
   swap ! ;
 
+
+~   A keyword is a word that evaluates to its own address, which makes it
+~ suitable for use as a constant. By convention, all our keywords have names
+~ starting with a colon, which imitates the way they work in Common Lisp.
+~
+~   Specifically, it returns its own execution token. Thus, executing its
+~ result repeatedly will keep giving the same value. We aren't in the habit of
+~ doing quote-exec kinds of things in Evocation, but it seems as good as any
+~ other unique value, so we might as well.
+~
+~   Unlike CL, we don't currently have the lexer automatically create keywords
+~ for us; we create them explicitly. Even if we did someday have it be
+~ automatic, the log-load routine would need a way to do it explicitly, which
+~ is this.
+~
+~   It's kind of a pain to look up the appropriate "docol" from here, so we
+~ do it in assembler instead.
+~
+~ (log address, string pointer -- log address)
+: log-load-keyword
+  log-load-create
+  ~ (log address)
+
+  log-load-here @ dup
+  ~ (log address, self execution token, output point)
+
+  dup 8 + pack64
+  ~ (log address, self execution token, output point)
+
+  swap :rax mov-reg64-imm64
+  ~ (log address, output point)
+
+  :rax push-reg64
+  pack-next
+  8 packalign
+  ~ (log address, output point)
+
+  swap log-load-here
+  ~ (output point, log address, here)
+  swap 3unroll
+  ~ (log address, output point, here)
+  ! ;
+
diff --git a/transform.e b/transform.e
index 7f3d9ef..0c39489 100644
--- a/transform.e
+++ b/transform.e
@@ -410,6 +410,24 @@ allocate-transform-state s" transform-state" variable
   ; make-immediate
 
 
+~ TODO there should really be an actual word that this alternate is replacing
+~
+~ (string pointer --)
+: label-keyword-alternate
+  Lcreate
+
+  here @ dup
+  ~ (self execution token, output point)
+  dup 8 + pack64
+  swap :rax mov-reg64-imm64
+  ~ (output point)
+  :rax push-reg64
+  pack-next
+  8 packalign
+  here !
+  ; make-immediate
+
+
 ~   Because docol requires it, we provide a special mini-version of the label
 ~ system. We only do L@' and L!', because that's all we need. These are real
 ~ labels; there can be arbitrarily many of them, and they can have forward
@@ -482,6 +500,8 @@ allocate-transform-state s" transform-state" variable
   dup s" ;asm" stringcmp 0 = { swap drop ' L;asm swap } if
   dup s" L@'" stringcmp 0 = { swap drop ' label-L@'-alternate swap } if
   dup s" L!'" stringcmp 0 = { swap drop ' label-L!'-alternate swap } if
+  dup s" keyword" stringcmp 0 = {
+    swap drop ' label-keyword-alternate swap } if
   drop swap
   ~ (name as stack string, 0 or alternate entry pointer, name pointer)