transform.e


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228

~ (buffer size -- buffer address)
: read-to-buffer
  dup allocate dup dup
  ~ (buffer size, buffer address, word start, output point)
  { key
    ~ Exit if it's a zero byte.
    dup not {
      ~ Make sure to pack the zero to serve as a null terminator.
      pack8
      drop drop swap drop exit } if
    dup is-space
      { ~ (buffer size, buffer address, word start, output point, key)
        ~ Tuck the key out of the way until we've done some stuff.
        3unroll
        ~ If it's a space character, first check if we just consumed the magic
        ~ word...
        2dup swap - 8 = dup {
          drop
          ~ Add a null terminator so we can use stringcmp
          dup 0 swap !
          ~ Check for the magic word
          over s" pyrzqxgl" stringcmp 0 =
          } if
        { ~ It's magic, so exit.
          ~ Make sure to pack a zero to serve as a null terminator.
          0 pack8
          drop drop drop swap drop exit }
        { ~ It's not magic, so reset the word start. Of course whitespace is
          ~ not a word but this will help us keep track of things.
          3roll pack8
          swap drop dup } if-else }
      { ~ (buffer size, buffer address, word start, output point, key)
        ~ Tuck the key out of the way again.
        3unroll
        ~ Check if the word just started and the previous character is space.
        2dup = dup { drop dup @ is-space } if
          { ~ If so, this is the actual first character of the word.
            drop swap pack8 dup }
          { ~ If not, leave the word start alone.
            3roll pack8 } if-else } if-else } forever ;

~   In logical terms, this modifies an input buffer metadata structure
~ in-place to push a new, zeroed one into the start of the linked list formed
~ through the next-source field.
~
~   In physical terms, it works by allocating a new structure, copying the
~ fields of the existing one into it, and zeroing the existing one. That's
~ necessary because otherwise we'd need a mutable handle (a pointer to a
~ pointer) to update the start of the list, and there's no way to do that with
~ the main-input-buffer variable working the way it presently does.
~
~ (input buffer metadata pointer --)
: push-input-buffer
  allocate-input-buffer-metadata
  ~ (original metadata pointer, new metadata pointer)
  2dup swap 6 8 * memcopy
  ~ (original metadata pointer, new metadata pointer)
  swap dup zero-input-buffer-metadata
  input-buffer-next-source !
  ;

~   This does the inverse of push-input-buffer. In the event that the
~ next-source field is null, it zeroes the buffer.
~
~   Note, however, that it doesn't deallocate the memory, because that's not
~ how memory allocation on the log works. If necessary, it can be deallocated
~ with "forget", though as usual that requires careful planning.
~
~ (input buffer metadata pointer --)
: pop-input-buffer
  dup input-buffer-next-source @
  ~ (original metadata pointer, next source metadata pointer)
  dup { 6 8 * memcopy }
      { drop zero-input-buffer-metadata } if-else ;

: L:
  ' L' entry-to-execution-token execute
  { ' set-label entry-to-execution-token , }
  { set-label } if-else
  ; make-immediate
  ~ TODO probably needs to do more

8 allocate s" transform-state" variable
0 transform-state !

~   We allow immediate words to run, which means for example that flow-control
~ hexdump-between words such as if-else expect to be able to mutate the heap,
~ and they expect to find the stuff they've been compiling present on it. We
~ make this possible by swapping out the value of "here" during the execution
~ of transformed code. By focusing on "here", we get to keep the existing
~ ",".
~
~ (-- done)
: transform-inner
  word
  ." wrapped here " here @ .hex64 newline

  ~ If no word was returned, exit.
  dup 0 = { drop 0 exit } if

  ~ The string is on the top of the stack, so to get a pointer to it we get
  ~ the stack address.
  ~ (string)
  value@

  ~ If it's the magic word, end the transformation.
  dup s" pyrzqxgl" stringcmp 0 = { drop dropstring 1 exit } if
  ." flags " interpreter-flags @ . newline
  ." transformed: " dup emitstring newline

  ~ Otherwise, look it up to see what it means.
  find

  ~ Check whether the word was found in the dictionary.
  dup 0 != {
    ~ If the word is in the dictionary, check what mode we're in, then...
    dropstring-with-result
    ~ (entry pointer)
    interpreter-flags @ 0x01 & {
      ~ ... if we're in compile mode, there's still a chance it's an immediate
      ~ word, in which case we fall through to interpret mode...
      dup entry-flags@ 1 & 0 =

      ~ ... but it's a regular word, so append it to the heap.
      { entry-to-execution-token . 0 exit } if
    } if

    ~ ... if we're in interpret mode, or the word is immediate, run it.
    ." stack before immediate word " stackhex
    entry-to-execution-token execute 0 exit
  } if

  ~ If it's not in the dictionary, check whether it's a decimal number.
  drop
  ~ As before, we get the stack address and use it as a string pointer.
  ~ (string)
  value@ read-integer 0 = {
    ~ It's a number.
    interpreter-flags @ 0x01 & {
      ~ We're in compile mode; append first "lit", then the number, to the
      ~ heap. The version of "lit" we use is the one that's current when we
      ~ ourselves are compiled, hardcoded; doing a dynamic lookup would
      ~ require dealing with what happens if it's not found.
      dropstring-with-result
      [ ' lit entry-to-execution-token literal ]
      , ,
      0 exit
    } if

    ~ We're in interpret mode; push the number to the stack. Or at least, that's
    ~ what the code we're interpreting will see. Really it's already on the
    ~ stack, just clean everything else up and leave it there.
    dropstring-with-result 0 exit
  } if

  ~ If it's neither in the dictionary nor a number, just print an error.
  s" No such word: " emitstring value@ emitstring dropstring 0 ;

~ (output point -- output point, done)
: transform-one
  ~ Save the old value of "here", and set it to our output point.
  here @ transform-state ! here !
  ~ Now the stack has nothing of ours on it, so client code can do its thing.

  ~ Invoke client code.
  transform-inner
  ~ (done)

  ~ Swap them back.
  here @ transform-state @ here !

  ~ While we don't actually use transform-state outside of this invocation,
  ~ for tidiness we zero it out.
  0 transform-state !

  swap ;


~ ." input " main-input-buffer dup .hex64 newline dup hexdump @ dup .hex64 newline bye
~ 1024 read-to-buffer
~ foo bar baz biff
~ pyrzqxgl
~ stackhex dup hexdump emitstring bye
~ 
~ : breakza
~   ." original" newline
~   main-input-buffer dup 6 8 * hexdump-from
~   dup push-input-buffer
~   ." updated original" newline
~   dup 6 8 * hexdump-from
~   ." copy" newline
~   dup input-buffer-next-source @ 6 8 * hexdump-from
~   newline
~   stackhex
~   dup pop-input-buffer
~   ." copied back" newline
~   6 8 * hexdump-from
~   stackhex
~   bye ;

~ (output point -- output point)
: transform
  1024 read-to-buffer ." transformation input buffer" newline dup hexdump
  ~ (output point, string pointer)

  main-input-buffer dup push-input-buffer
  ~ TODO the arguments for this seem to be backwards from the documentation
  swap attach-string-to-input-buffer

  { transform-one
    { main-input-buffer pop-input-buffer
      exit } if } forever ;

1024 allocate dup
." compilation output buffer" newline dup hexdump
transform
: za ." ZA" 12 13 - . ;
: ' word value@ find dropstring-with-result
  interpreter-flags @ 1 & { literal } if ; make-immediate
~ ' za . newline
pyrzqxgl
." back back back " here @ .hex64 newline
~ ." stack after " stackhex
~ 2dup swap hexdump-between
~ : piz ." PIZ" ' za . newline ; piz
bye