summary refs log tree commit diff
path: root/hex.e
diff options
context:
space:
mode:
Diffstat (limited to 'hex.e')
-rw-r--r--hex.e241
1 files changed, 241 insertions, 0 deletions
diff --git a/hex.e b/hex.e
new file mode 100644
index 0000000..51e7703
--- /dev/null
+++ b/hex.e
@@ -0,0 +1,241 @@
+~ cat labels.e elf.e hex.e | ./evoke > hex && chmod 755 hex && ./hex
+
+~ (buffer start, output point, label offset
+~  -- buffer start, output point)
+: jmp-rel-imm8-from-here
+  over 3 pick - 2 + - jmp-rel-imm8 ;
+
+~ (buffer start, output point, label offset, condition code
+~  -- buffer start, output point)
+: jmp-cc-rel-imm8-from-here
+  swap 2 pick 4 pick - 2 + - swap jmp-cc-rel-imm8 ;
+
+~ (buffer start, output point, label offset -- buffer start, output point)
+: call-rel-imm32-from-here
+  over 3 pick - 5 + - call-rel-imm32 ;
+
+
+~ (buffer start, output point -- buffer start, output point)
+: output-start-routine
+  current-offset L!' cold-start
+  ~   The basic registers preserved across syscalls are rbx, rsp, rbp.
+  ~ To avoid redundant moves, we store the buffer pointer in rbx just once,
+  ~ and keep it there. We've made sure our load origin fits in 32 bits, so we
+  ~ can use imm32 for that. We're going to want to do an indirect load from
+  ~ it, so we can't use rbp for this.
+  L@' buffer L@' origin + :rbx mov-reg64-imm32
+
+  current-offset L!' input-loop-start
+  L@' read-byte call-rel-imm32-from-here
+
+  ~   If the length is 0, we got EOF. If it's less than zero, we got a read
+  ~ error. Either way, we exit. This is a signed comparison, as it needs to
+  ~ be.
+  0 :rax cmp-reg64-imm8
+  L@' exit :cc-equal jmp-cc-rel-imm8-from-here
+  L@' read-error :cc-less jmp-cc-rel-imm8-from-here
+
+  ~ Now that the length is handled, retrieve the input byte.
+  :rbx :rax mov-reg64-indirect-reg64
+
+  ~ If it's space or linefeed, skip it (go back to the loop start).
+  0x20 :rax cmp-reg64-imm8                  ~ ASCII space
+  L@' input-loop-start :cc-equal jmp-cc-rel-imm8-from-here
+  0x0a :rax cmp-reg64-imm8                  ~ ASCII linefeed
+  L@' input-loop-start :cc-equal jmp-cc-rel-imm8-from-here
+  ~ If it's a comment, skip the whole thing.
+  0x7e :rax cmp-reg64-imm8                  ~ ASCII tilde
+  L@' skip-comment :cc-equal jmp-cc-rel-imm8-from-here
+
+  ~ Decode the value, or exit with an error.
+  L@' decode-nibble call-rel-imm32-from-here
+
+  ~ We use rbp as a place to stash the high nibble.
+  :rax :rbp mov-reg64-reg64
+  4 :rbp rol-reg64-imm8
+
+  ~ Now we read another byte.
+  L@' read-byte call-rel-imm32-from-here
+
+  ~ Handle the length. A second hex digit is required here.
+  0 :rax cmp-reg64-imm8
+  L@' unexpected-eof :cc-equal jmp-cc-rel-imm8-from-here
+  L@' read-error :cc-less jmp-cc-rel-imm8-from-here
+
+  ~ Now that the length is handled, retrieve the input byte.
+  :rbx :rax mov-reg64-indirect-reg64
+
+  ~ Decode the value, or exit with an error.
+  L@' decode-nibble call-rel-imm32-from-here
+
+  ~ We OR in the low nibble.
+  :rax :rbp or-reg64-reg64
+
+  ~ Output the byte. We reuse the buffer as a place to store it.
+  :rbp :rbx mov-indirect-reg64-reg64
+  :rbx :rsi mov-reg64-reg64                 ~ buffer pointer
+  1 :rdx mov-reg64-imm32                    ~ buffer length
+  1 :rax mov-reg64-imm32                    ~ syscall number for sys-write
+  1 :rdi mov-reg64-imm32                    ~ file descriptor 1 is stdout
+  syscall
+
+  ~ Back to the start of the loop.
+  L@' input-loop-start jmp-rel-imm8-from-here
+
+  current-offset L!' skip-comment
+
+  ~ Read a byte for the comment.
+  L@' read-byte call-rel-imm32-from-here
+
+  ~ Handle the length. We're allowed to end in a comment.
+  0 :rax cmp-reg64-imm8
+  L@' exit :cc-equal jmp-cc-rel-imm8-from-here
+  L@' read-error :cc-less jmp-cc-rel-imm8-from-here
+
+  ~ Now that the length is handled, retrieve the input byte.
+  :rbx :rax mov-reg64-indirect-reg64
+
+  ~ If it's linefeed, the comment is over.
+  0x0a :rax cmp-reg64-imm8                  ~ ASCII linefeed
+  L@' input-loop-start :cc-equal jmp-cc-rel-imm8-from-here
+
+  ~ We're still in the comment, keep handling it.
+  L@' skip-comment jmp-rel-imm8-from-here ;
+
+
+~   This routine has no expectations; it reads a byte into L' buffer, keeps
+~ the return value of the syscall in :rax, and returns to its caller. The
+~ caller is responsible for doing something with the return value.
+~
+~ (output memory start, current output point
+~  -- output memory start, current output point)
+: output-read-byte
+  current-offset L!' read-byte
+  ~ We use self-xor as a concise way to set registers to zero.
+  :rax :rax xor-reg64-reg64                 ~ syscall number for sys-read
+  :rdi :rdi xor-reg64-reg64                 ~ file descriptor 0 is stdin
+  :rbx :rsi mov-reg64-reg64                 ~ buffer pointer
+  ~ We read one byte at a time, because it makes the loop structure simple.
+  1 :rdx mov-reg64-imm32                    ~ buffer length
+  syscall
+  ret ;
+
+
+~   This routine expects :rax to hold an ASCII byte, which must be a valid
+~ hexadecimal digit. When it returns, :rax holds a decoded nibble. If the
+~ input is invalid, it jumps to L' invalid-byte instead, thereby ending
+~ execution.
+~
+~ (output memory start, current output point
+~  -- output memory start, current output point)
+: output-decode-nibble
+  current-offset L!' decode-nibble
+
+  0x30 :rax sub-reg64-imm8                  ~ ASCII zero
+  ~ If it's negative, jump to the error path.
+  L@' invalid-byte :cc-less jmp-cc-rel-imm8-from-here
+  10 :rax cmp-reg64-imm8
+  ~ This is an unsigned comparison.
+  L@' got-nibble :cc-below jmp-cc-rel-imm8-from-here
+  0x41 0x30 - :rax sub-reg64-imm8           ~ ASCII capital A
+  ~ If it's negative, jump to the error path.
+  L@' invalid-byte :cc-less jmp-cc-rel-imm8-from-here
+  ~ To simplify the range adjustment, we do it unconditionally, before
+  ~ checking the upper bound.
+  10 :rax add-reg64-imm8
+  16 :rax cmp-reg64-imm8
+  ~ This is an unsigned comparison.
+  L@' got-nibble :cc-below jmp-cc-rel-imm8-from-here
+  0x61 0x41 - 10 + :rax sub-reg64-imm8      ~ ASCII lowercase a
+  ~ If it's negative, jump to the error path.
+  L@' invalid-byte :cc-less jmp-cc-rel-imm8-from-here
+  ~ Again, we adjust the range unconditionally, then check the upper bound.
+  10 :rax add-reg64-imm8
+  16 :rax cmp-reg64-imm8
+  ~ This is an unsigned comparison.
+  L@' got-nibble :cc-below jmp-cc-rel-imm8-from-here
+  ~ It's not hex, so jump to the error path.
+  L@' invalid-byte jmp-rel-imm8-from-here
+
+  current-offset L!' got-nibble
+  ret ;
+
+
+~ (output memory start, current output point
+~  -- output memory start, current output point)
+: output-exit
+  current-offset L!' exit
+  60 :rax mov-reg64-imm32                   ~ syscall number for sys-exit
+  0 :rdi mov-reg64-imm32                    ~ exit code
+  syscall ;
+
+
+~   Printing an error message makes sure we don't produce a valid-looking
+~ binary that inadvertently gets used. So, it's worth it, despite coming at
+~ a cost to code size.
+~
+~ (output memory start, current output point
+~  -- output memory start, current output point)
+: output-error-handlers
+  current-offset L!' invalid-byte
+  L@' origin L@' invalid-byte-message + :rsi mov-reg64-imm64
+  L@' invalid-byte-message-size :rdx mov-reg64-imm64
+  L@' exit-with-error jmp-rel-imm8-from-here
+
+  current-offset L!' unexpected-eof
+  L@' origin L@' unexpected-eof-message + :rsi mov-reg64-imm64
+  L@' unexpected-eof-message-size :rdx mov-reg64-imm64
+  L@' exit-with-error jmp-rel-imm8-from-here
+
+  current-offset L!' read-error
+  L@' origin L@' read-error-message + :rsi mov-reg64-imm64
+  L@' read-error-message-size :rdx mov-reg64-imm64
+  ~ Fall through.
+
+  current-offset L!' exit-with-error
+  1 :rax mov-reg64-imm32                    ~ syscall number for sys-write
+  2 :rdi mov-reg64-imm32                    ~ file descriptor 2 is stderr
+  syscall
+
+  60 :rax mov-reg64-imm32                   ~ syscall number for sys-exit
+  1 :rdi mov-reg64-imm32                    ~ exit code
+  syscall
+  ;
+
+
+~ (output memory start, current output point
+~  -- output memory start, current output point)
+: output-messages
+  current-offset dup L!' invalid-byte-message 3unroll
+  s" Invalid byte." packstring
+  current-offset 4 roll - L!' invalid-byte-message-size
+
+  current-offset dup L!' read-error-message 3unroll
+  s" Read error." packstring
+  current-offset 4 roll - L!' read-error-message-size
+
+  current-offset dup L!' unexpected-eof-message 3unroll
+  s" Unexpected EOF." packstring
+  current-offset 4 roll - L!' unexpected-eof-message-size ;
+
+~ (output memory start, current output point
+~  -- output memory start, current output point)
+~
+~ Everything directly called by all-contents has this same interface.
+~
+: all-contents
+  0x08000000 L!' origin
+  elf-file-header
+  elf-program-header-writable
+  output-start-routine
+  output-read-byte
+  output-exit
+  output-decode-nibble
+  output-error-handlers
+  output-messages
+  current-offset L!' buffer 0 pack64
+  current-offset L!' total-size ;
+
+' all-contents entry-to-execution-token label-loop
+swap sys-write bye
+