diff options
Diffstat (limited to 'hex.e')
| -rw-r--r-- | hex.e | 241 |
1 files changed, 241 insertions, 0 deletions
diff --git a/hex.e b/hex.e new file mode 100644 index 0000000..51e7703 --- /dev/null +++ b/hex.e @@ -0,0 +1,241 @@ +~ cat labels.e elf.e hex.e | ./evoke > hex && chmod 755 hex && ./hex + +~ (buffer start, output point, label offset +~ -- buffer start, output point) +: jmp-rel-imm8-from-here + over 3 pick - 2 + - jmp-rel-imm8 ; + +~ (buffer start, output point, label offset, condition code +~ -- buffer start, output point) +: jmp-cc-rel-imm8-from-here + swap 2 pick 4 pick - 2 + - swap jmp-cc-rel-imm8 ; + +~ (buffer start, output point, label offset -- buffer start, output point) +: call-rel-imm32-from-here + over 3 pick - 5 + - call-rel-imm32 ; + + +~ (buffer start, output point -- buffer start, output point) +: output-start-routine + current-offset L!' cold-start + ~ The basic registers preserved across syscalls are rbx, rsp, rbp. + ~ To avoid redundant moves, we store the buffer pointer in rbx just once, + ~ and keep it there. We've made sure our load origin fits in 32 bits, so we + ~ can use imm32 for that. We're going to want to do an indirect load from + ~ it, so we can't use rbp for this. + L@' buffer L@' origin + :rbx mov-reg64-imm32 + + current-offset L!' input-loop-start + L@' read-byte call-rel-imm32-from-here + + ~ If the length is 0, we got EOF. If it's less than zero, we got a read + ~ error. Either way, we exit. This is a signed comparison, as it needs to + ~ be. + 0 :rax cmp-reg64-imm8 + L@' exit :cc-equal jmp-cc-rel-imm8-from-here + L@' read-error :cc-less jmp-cc-rel-imm8-from-here + + ~ Now that the length is handled, retrieve the input byte. + :rbx :rax mov-reg64-indirect-reg64 + + ~ If it's space or linefeed, skip it (go back to the loop start). + 0x20 :rax cmp-reg64-imm8 ~ ASCII space + L@' input-loop-start :cc-equal jmp-cc-rel-imm8-from-here + 0x0a :rax cmp-reg64-imm8 ~ ASCII linefeed + L@' input-loop-start :cc-equal jmp-cc-rel-imm8-from-here + ~ If it's a comment, skip the whole thing. + 0x7e :rax cmp-reg64-imm8 ~ ASCII tilde + L@' skip-comment :cc-equal jmp-cc-rel-imm8-from-here + + ~ Decode the value, or exit with an error. + L@' decode-nibble call-rel-imm32-from-here + + ~ We use rbp as a place to stash the high nibble. + :rax :rbp mov-reg64-reg64 + 4 :rbp rol-reg64-imm8 + + ~ Now we read another byte. + L@' read-byte call-rel-imm32-from-here + + ~ Handle the length. A second hex digit is required here. + 0 :rax cmp-reg64-imm8 + L@' unexpected-eof :cc-equal jmp-cc-rel-imm8-from-here + L@' read-error :cc-less jmp-cc-rel-imm8-from-here + + ~ Now that the length is handled, retrieve the input byte. + :rbx :rax mov-reg64-indirect-reg64 + + ~ Decode the value, or exit with an error. + L@' decode-nibble call-rel-imm32-from-here + + ~ We OR in the low nibble. + :rax :rbp or-reg64-reg64 + + ~ Output the byte. We reuse the buffer as a place to store it. + :rbp :rbx mov-indirect-reg64-reg64 + :rbx :rsi mov-reg64-reg64 ~ buffer pointer + 1 :rdx mov-reg64-imm32 ~ buffer length + 1 :rax mov-reg64-imm32 ~ syscall number for sys-write + 1 :rdi mov-reg64-imm32 ~ file descriptor 1 is stdout + syscall + + ~ Back to the start of the loop. + L@' input-loop-start jmp-rel-imm8-from-here + + current-offset L!' skip-comment + + ~ Read a byte for the comment. + L@' read-byte call-rel-imm32-from-here + + ~ Handle the length. We're allowed to end in a comment. + 0 :rax cmp-reg64-imm8 + L@' exit :cc-equal jmp-cc-rel-imm8-from-here + L@' read-error :cc-less jmp-cc-rel-imm8-from-here + + ~ Now that the length is handled, retrieve the input byte. + :rbx :rax mov-reg64-indirect-reg64 + + ~ If it's linefeed, the comment is over. + 0x0a :rax cmp-reg64-imm8 ~ ASCII linefeed + L@' input-loop-start :cc-equal jmp-cc-rel-imm8-from-here + + ~ We're still in the comment, keep handling it. + L@' skip-comment jmp-rel-imm8-from-here ; + + +~ This routine has no expectations; it reads a byte into L' buffer, keeps +~ the return value of the syscall in :rax, and returns to its caller. The +~ caller is responsible for doing something with the return value. +~ +~ (output memory start, current output point +~ -- output memory start, current output point) +: output-read-byte + current-offset L!' read-byte + ~ We use self-xor as a concise way to set registers to zero. + :rax :rax xor-reg64-reg64 ~ syscall number for sys-read + :rdi :rdi xor-reg64-reg64 ~ file descriptor 0 is stdin + :rbx :rsi mov-reg64-reg64 ~ buffer pointer + ~ We read one byte at a time, because it makes the loop structure simple. + 1 :rdx mov-reg64-imm32 ~ buffer length + syscall + ret ; + + +~ This routine expects :rax to hold an ASCII byte, which must be a valid +~ hexadecimal digit. When it returns, :rax holds a decoded nibble. If the +~ input is invalid, it jumps to L' invalid-byte instead, thereby ending +~ execution. +~ +~ (output memory start, current output point +~ -- output memory start, current output point) +: output-decode-nibble + current-offset L!' decode-nibble + + 0x30 :rax sub-reg64-imm8 ~ ASCII zero + ~ If it's negative, jump to the error path. + L@' invalid-byte :cc-less jmp-cc-rel-imm8-from-here + 10 :rax cmp-reg64-imm8 + ~ This is an unsigned comparison. + L@' got-nibble :cc-below jmp-cc-rel-imm8-from-here + 0x41 0x30 - :rax sub-reg64-imm8 ~ ASCII capital A + ~ If it's negative, jump to the error path. + L@' invalid-byte :cc-less jmp-cc-rel-imm8-from-here + ~ To simplify the range adjustment, we do it unconditionally, before + ~ checking the upper bound. + 10 :rax add-reg64-imm8 + 16 :rax cmp-reg64-imm8 + ~ This is an unsigned comparison. + L@' got-nibble :cc-below jmp-cc-rel-imm8-from-here + 0x61 0x41 - 10 + :rax sub-reg64-imm8 ~ ASCII lowercase a + ~ If it's negative, jump to the error path. + L@' invalid-byte :cc-less jmp-cc-rel-imm8-from-here + ~ Again, we adjust the range unconditionally, then check the upper bound. + 10 :rax add-reg64-imm8 + 16 :rax cmp-reg64-imm8 + ~ This is an unsigned comparison. + L@' got-nibble :cc-below jmp-cc-rel-imm8-from-here + ~ It's not hex, so jump to the error path. + L@' invalid-byte jmp-rel-imm8-from-here + + current-offset L!' got-nibble + ret ; + + +~ (output memory start, current output point +~ -- output memory start, current output point) +: output-exit + current-offset L!' exit + 60 :rax mov-reg64-imm32 ~ syscall number for sys-exit + 0 :rdi mov-reg64-imm32 ~ exit code + syscall ; + + +~ Printing an error message makes sure we don't produce a valid-looking +~ binary that inadvertently gets used. So, it's worth it, despite coming at +~ a cost to code size. +~ +~ (output memory start, current output point +~ -- output memory start, current output point) +: output-error-handlers + current-offset L!' invalid-byte + L@' origin L@' invalid-byte-message + :rsi mov-reg64-imm64 + L@' invalid-byte-message-size :rdx mov-reg64-imm64 + L@' exit-with-error jmp-rel-imm8-from-here + + current-offset L!' unexpected-eof + L@' origin L@' unexpected-eof-message + :rsi mov-reg64-imm64 + L@' unexpected-eof-message-size :rdx mov-reg64-imm64 + L@' exit-with-error jmp-rel-imm8-from-here + + current-offset L!' read-error + L@' origin L@' read-error-message + :rsi mov-reg64-imm64 + L@' read-error-message-size :rdx mov-reg64-imm64 + ~ Fall through. + + current-offset L!' exit-with-error + 1 :rax mov-reg64-imm32 ~ syscall number for sys-write + 2 :rdi mov-reg64-imm32 ~ file descriptor 2 is stderr + syscall + + 60 :rax mov-reg64-imm32 ~ syscall number for sys-exit + 1 :rdi mov-reg64-imm32 ~ exit code + syscall + ; + + +~ (output memory start, current output point +~ -- output memory start, current output point) +: output-messages + current-offset dup L!' invalid-byte-message 3unroll + s" Invalid byte." packstring + current-offset 4 roll - L!' invalid-byte-message-size + + current-offset dup L!' read-error-message 3unroll + s" Read error." packstring + current-offset 4 roll - L!' read-error-message-size + + current-offset dup L!' unexpected-eof-message 3unroll + s" Unexpected EOF." packstring + current-offset 4 roll - L!' unexpected-eof-message-size ; + +~ (output memory start, current output point +~ -- output memory start, current output point) +~ +~ Everything directly called by all-contents has this same interface. +~ +: all-contents + 0x08000000 L!' origin + elf-file-header + elf-program-header-writable + output-start-routine + output-read-byte + output-exit + output-decode-nibble + output-error-handlers + output-messages + current-offset L!' buffer 0 pack64 + current-offset L!' total-size ; + +' all-contents entry-to-execution-token label-loop +swap sys-write bye + |