1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
|
~ cat labels.e elf.e hex.e | ./evoke > hex && chmod 755 hex && ./hex
~ (buffer start, output point, label offset
~ -- buffer start, output point)
: jmp-rel-imm8-from-here
over 3 pick - 2 + - jmp-rel-imm8 ;
~ (buffer start, output point, label offset, condition code
~ -- buffer start, output point)
: jmp-cc-rel-imm8-from-here
swap 2 pick 4 pick - 2 + - swap jmp-cc-rel-imm8 ;
~ (buffer start, output point, label offset -- buffer start, output point)
: call-rel-imm32-from-here
over 3 pick - 5 + - call-rel-imm32 ;
~ (buffer start, output point -- buffer start, output point)
: output-start-routine
current-offset L!' cold-start
~ The basic registers preserved across syscalls are rbx, rsp, rbp.
~ To avoid redundant moves, we store the buffer pointer in rbx just once,
~ and keep it there. We've made sure our load origin fits in 32 bits, so we
~ can use imm32 for that. We're going to want to do an indirect load from
~ it, so we can't use rbp for this.
L@' buffer L@' origin + :rbx mov-reg64-imm32
current-offset L!' input-loop-start
L@' read-byte call-rel-imm32-from-here
~ If the length is 0, we got EOF. If it's less than zero, we got a read
~ error. Either way, we exit. This is a signed comparison, as it needs to
~ be.
0 :rax cmp-reg64-imm8
L@' exit :cc-equal jmp-cc-rel-imm8-from-here
L@' read-error :cc-less jmp-cc-rel-imm8-from-here
~ Now that the length is handled, retrieve the input byte.
:rbx :rax mov-reg64-indirect-reg64
~ If it's space or linefeed, skip it (go back to the loop start).
0x20 :rax cmp-reg64-imm8 ~ ASCII space
L@' input-loop-start :cc-equal jmp-cc-rel-imm8-from-here
0x0a :rax cmp-reg64-imm8 ~ ASCII linefeed
L@' input-loop-start :cc-equal jmp-cc-rel-imm8-from-here
~ If it's a comment, skip the whole thing.
0x7e :rax cmp-reg64-imm8 ~ ASCII tilde
L@' skip-comment :cc-equal jmp-cc-rel-imm8-from-here
~ Decode the value, or exit with an error.
L@' decode-nibble call-rel-imm32-from-here
~ We use rbp as a place to stash the high nibble.
:rax :rbp mov-reg64-reg64
4 :rbp rol-reg64-imm8
~ Now we read another byte.
L@' read-byte call-rel-imm32-from-here
~ Handle the length. A second hex digit is required here.
0 :rax cmp-reg64-imm8
L@' unexpected-eof :cc-equal jmp-cc-rel-imm8-from-here
L@' read-error :cc-less jmp-cc-rel-imm8-from-here
~ Now that the length is handled, retrieve the input byte.
:rbx :rax mov-reg64-indirect-reg64
~ Decode the value, or exit with an error.
L@' decode-nibble call-rel-imm32-from-here
~ We OR in the low nibble.
:rax :rbp or-reg64-reg64
~ Output the byte. We reuse the buffer as a place to store it.
:rbp :rbx mov-indirect-reg64-reg64
:rbx :rsi mov-reg64-reg64 ~ buffer pointer
1 :rdx mov-reg64-imm32 ~ buffer length
1 :rax mov-reg64-imm32 ~ syscall number for sys-write
1 :rdi mov-reg64-imm32 ~ file descriptor 1 is stdout
syscall
~ Back to the start of the loop.
L@' input-loop-start jmp-rel-imm8-from-here
current-offset L!' skip-comment
~ Read a byte for the comment.
L@' read-byte call-rel-imm32-from-here
~ Handle the length. We're allowed to end in a comment.
0 :rax cmp-reg64-imm8
L@' exit :cc-equal jmp-cc-rel-imm8-from-here
L@' read-error :cc-less jmp-cc-rel-imm8-from-here
~ Now that the length is handled, retrieve the input byte.
:rbx :rax mov-reg64-indirect-reg64
~ If it's linefeed, the comment is over.
0x0a :rax cmp-reg64-imm8 ~ ASCII linefeed
L@' input-loop-start :cc-equal jmp-cc-rel-imm8-from-here
~ We're still in the comment, keep handling it.
L@' skip-comment jmp-rel-imm8-from-here ;
~ This routine has no expectations; it reads a byte into L' buffer, keeps
~ the return value of the syscall in :rax, and returns to its caller. The
~ caller is responsible for doing something with the return value.
~
~ (output memory start, current output point
~ -- output memory start, current output point)
: output-read-byte
current-offset L!' read-byte
~ We use self-xor as a concise way to set registers to zero.
:rax :rax xor-reg64-reg64 ~ syscall number for sys-read
:rdi :rdi xor-reg64-reg64 ~ file descriptor 0 is stdin
:rbx :rsi mov-reg64-reg64 ~ buffer pointer
~ We read one byte at a time, because it makes the loop structure simple.
1 :rdx mov-reg64-imm32 ~ buffer length
syscall
ret ;
~ This routine expects :rax to hold an ASCII byte, which must be a valid
~ hexadecimal digit. When it returns, :rax holds a decoded nibble. If the
~ input is invalid, it jumps to L' invalid-byte instead, thereby ending
~ execution.
~
~ (output memory start, current output point
~ -- output memory start, current output point)
: output-decode-nibble
current-offset L!' decode-nibble
0x30 :rax sub-reg64-imm8 ~ ASCII zero
~ If it's negative, jump to the error path.
L@' invalid-byte :cc-less jmp-cc-rel-imm8-from-here
10 :rax cmp-reg64-imm8
~ This is an unsigned comparison.
L@' got-nibble :cc-below jmp-cc-rel-imm8-from-here
0x41 0x30 - :rax sub-reg64-imm8 ~ ASCII capital A
~ If it's negative, jump to the error path.
L@' invalid-byte :cc-less jmp-cc-rel-imm8-from-here
~ To simplify the range adjustment, we do it unconditionally, before
~ checking the upper bound.
10 :rax add-reg64-imm8
16 :rax cmp-reg64-imm8
~ This is an unsigned comparison.
L@' got-nibble :cc-below jmp-cc-rel-imm8-from-here
0x61 0x41 - 10 + :rax sub-reg64-imm8 ~ ASCII lowercase a
~ If it's negative, jump to the error path.
L@' invalid-byte :cc-less jmp-cc-rel-imm8-from-here
~ Again, we adjust the range unconditionally, then check the upper bound.
10 :rax add-reg64-imm8
16 :rax cmp-reg64-imm8
~ This is an unsigned comparison.
L@' got-nibble :cc-below jmp-cc-rel-imm8-from-here
~ It's not hex, so jump to the error path.
L@' invalid-byte jmp-rel-imm8-from-here
current-offset L!' got-nibble
ret ;
~ (output memory start, current output point
~ -- output memory start, current output point)
: output-exit
current-offset L!' exit
60 :rax mov-reg64-imm32 ~ syscall number for sys-exit
0 :rdi mov-reg64-imm32 ~ exit code
syscall ;
~ Printing an error message makes sure we don't produce a valid-looking
~ binary that inadvertently gets used. So, it's worth it, despite coming at
~ a cost to code size.
~
~ (output memory start, current output point
~ -- output memory start, current output point)
: output-error-handlers
current-offset L!' invalid-byte
L@' origin L@' invalid-byte-message + :rsi mov-reg64-imm64
L@' invalid-byte-message-size :rdx mov-reg64-imm64
L@' exit-with-error jmp-rel-imm8-from-here
current-offset L!' unexpected-eof
L@' origin L@' unexpected-eof-message + :rsi mov-reg64-imm64
L@' unexpected-eof-message-size :rdx mov-reg64-imm64
L@' exit-with-error jmp-rel-imm8-from-here
current-offset L!' read-error
L@' origin L@' read-error-message + :rsi mov-reg64-imm64
L@' read-error-message-size :rdx mov-reg64-imm64
~ Fall through.
current-offset L!' exit-with-error
1 :rax mov-reg64-imm32 ~ syscall number for sys-write
2 :rdi mov-reg64-imm32 ~ file descriptor 2 is stderr
syscall
60 :rax mov-reg64-imm32 ~ syscall number for sys-exit
1 :rdi mov-reg64-imm32 ~ exit code
syscall
;
~ (output memory start, current output point
~ -- output memory start, current output point)
: output-messages
current-offset dup L!' invalid-byte-message 3unroll
s" Invalid byte." packstring
current-offset 4 roll - L!' invalid-byte-message-size
current-offset dup L!' read-error-message 3unroll
s" Read error." packstring
current-offset 4 roll - L!' read-error-message-size
current-offset dup L!' unexpected-eof-message 3unroll
s" Unexpected EOF." packstring
current-offset 4 roll - L!' unexpected-eof-message-size ;
~ (output memory start, current output point
~ -- output memory start, current output point)
~
~ Everything directly called by all-contents has this same interface.
~
: all-contents
0x08000000 L!' origin
elf-file-header
elf-program-header-writable
output-start-routine
output-read-byte
output-exit
output-decode-nibble
output-error-handlers
output-messages
current-offset L!' buffer 0 pack64
current-offset L!' total-size ;
' all-contents entry-to-execution-token label-loop
swap sys-write bye
|