From 0c7e96417a3691f59807243a9841c6f5e631edfa Mon Sep 17 00:00:00 2001 From: Irene Knapp Date: Thu, 7 May 2026 17:57:28 -0700 Subject: refactored the label code and ELF template into their own files nice, right? :) modular programming! :D Force-Push: yes Change-Id: I56dd219fd2147850a0bb5b4a8cb3f9760e787215 --- elf.e | 57 ++++++++++++++ hello.e | 35 +++++++++ labels.e | 142 +++++++++++++++++++++++++++++++++ make-hello.e | 251 ----------------------------------------------------------- 4 files changed, 234 insertions(+), 251 deletions(-) create mode 100644 elf.e create mode 100644 hello.e create mode 100644 labels.e delete mode 100644 make-hello.e diff --git a/elf.e b/elf.e new file mode 100644 index 0000000..4224b50 --- /dev/null +++ b/elf.e @@ -0,0 +1,57 @@ +~ ~~ +~ ~~ ELF header +~ ~~ +~ ~~ This is the top-level ELF header, for the entire file. An ELF always +~ ~~ has exactly one of this header, which is always at the start of the file. +~ ~~ +: elf-file-header + 0x7f pack8 s" ELF" pack-raw-string ~ magic number + 2 pack8 ~ 64-bit + 1 pack8 ~ little-endian + 1 pack8 ~ ELF header format v1 + 0 pack8 ~ System-V ABI + 0 pack64 ~ (padding) + + 2 pack16 ~ executable + 0x3e pack16 ~ Intel x86-64 + 1 pack32 ~ ELF format version + + L' start use-label origin + pack64 ~ entry point + ~ This includes the origin, intentionally. + + L' program-header use-label pack64 ~ program header offset + ~ We place the program header immediately after the ELF header. This + ~ offset is from the start of the file. + 0 pack64 ~ section header offset + 0 pack32 ~ processor flags + 64 pack16 ~ ELF header size + 56 pack16 ~ program header entry size + 1 pack16 ~ number of program header entries + 0 pack16 ~ section header entry size + 0 pack16 ~ number of section header entries + 0 pack16 ~ section name string table index + ; + +~ ~~ +~ ~~ Program header +~ ~~ +~ ~~ An ELF program header consists of any number of these entries; they are +~ ~~ always consecutive, but may be anywhere in the file. We always have +~ ~~ exactly one, and it's always right after the ELF file header. +~ ~~ +: elf-program-header + current-offset L' program-header set-label + 1 pack32 ~ "loadable" segment type + 0x05 pack32 ~ read+execute permission + 0 pack64 ~ offset in file + origin pack64 ~ virtual address + ~ required, but can be anything, subject to alignment + 0 pack64 ~ physical address (ignored) + + L' total-size use-label pack64 ~ size in file + L' total-size use-label pack64 ~ size in memory + + 0 pack64 ~ segment alignment + ~ for relocation, but this doesn't apply to us + ; + diff --git a/hello.e b/hello.e new file mode 100644 index 0000000..63f19c4 --- /dev/null +++ b/hello.e @@ -0,0 +1,35 @@ +~ cat labels.e elf.e hello.e | ./quine > hello && chmod 755 hello && ./hello + +: output-start-routine + current-offset L' start set-label + 1 :rax mov-reg64-imm32 + 1 :rdi mov-reg64-imm64 + origin L' greeting use-label + :rsi mov-reg64-imm64 + L' greeting-size use-label :rdx mov-reg64-imm64 + syscall + 60 :rax mov-reg64-imm32 + 0 :rdi mov-reg64-imm32 + syscall + ; + +: output-greeting + current-offset dup L' greeting set-label 3unroll + s" Hello, Irenes!" packstring + current-offset 4 roll - L' greeting-size set-label ; + +~ (output memory start, current output point +~ -- output memory start, current output point) +~ +~ Everything directly called by all-contents has this same interface. +~ +: all-contents + elf-file-header + elf-program-header + output-start-routine + output-greeting + current-offset L' total-size set-label + ; + +' all-contents entry-to-execution-token label-loop +swap sys-write bye + diff --git a/labels.e b/labels.e new file mode 100644 index 0000000..9c91fb6 --- /dev/null +++ b/labels.e @@ -0,0 +1,142 @@ +~ current output point, string pointer +: pack-raw-string + { unpack8 dup } { 3roll swap pack8 swap } while drop drop ; + +: origin 0x08000000 ; + +~ Status is a bit field: +~ bit zero is whether it's been used +~ bit one is whether it's been defined +~ bit two is whether it was used before being defined +~ bit three is whether the guessed value wound up equaling the actual value + +: init-labels + 8 allocate s" labels" variable + 0 s" labels" find entry-to-execution-token execute ! + ; +~ This needs to happen now because otherwise the word "labels" won't exist. +init-labels + +~ (entry pointer --) +: label-heading + dup entry-to-name dup emitstring space + stringlen 1+ 54 swap - 0 max indent + entry-to-execution-token dup 8 + @ .hex64 space @ .hex8 + newline ; + +~ (dictionary handle) +: oldest-entry-in + dup + dup { { dup @ } { @ } while } if + dup 3roll = { drop 0 } if + ; + +~ (entry pointer, dictionary handle) +: next-newer-entry-in + @ + 2dup = { 2drop 0 exit } if + { dup { 2dup @ != } if } + { @ } while swap drop ; + +: list-labels + labels oldest-entry-in { dup } + { dup label-heading labels next-newer-entry-in } while drop ; + +~ (name string pointer -- ) +: new-label labels create-in 0 , 0 , ; + +: label-status entry-to-execution-token ; +: label-value entry-to-execution-token 8 + ; + +: intern-label + dup labels swap find-in + dup { swap drop } + { drop dup new-label labels swap find-in } if-else ; + +: L' + word value@ + interpreter-flags @ 0x01 & + { ' litstring entry-to-execution-token , + here @ swap packstring 0 pack8 8 packalign here ! + ' intern-label entry-to-execution-token , + dropstring } + { intern-label dropstring-with-result } if-else + ; make-immediate + +~ (label entry pointer -- label value) +: use-label + ~ If it hasn't been defined yet, mark it used-before-set. + dup label-status dup @ dup 0x02 & not { 0x04 | swap ! } { drop drop } if-else + + ~ Mark it used. It's faster to just do this than to check if it's needed. + dup label-status dup @ 0x01 | swap ! + + ~ Return the guessed value. + label-value @ ; + ; + +~ (new label value, label entry pointer --) +: set-label + ~ We always set the defined bit to true. We leave the other status bits + ~ as-is, except that we check whether it's unused up to now, and whether + ~ the previous value equals the new value. If either of those holds, we set + ~ the guessed-equals-actual bit to true. + dup label-status @ 0x02 | + ~ (new value, entry, updated status) + 3unroll dup 4 unroll label-value @ swap dup 3unroll = + ~ (entry, updated status, new value, equality) + 3roll dup 4 unroll 0x01 & not + ~ (entry, updated status, new value, equality, unused) + || { swap 0x08 | swap } if + ~ (entry, updated status, new value) + 3roll dup 3unroll + ~ (updated status, entry, new value, entry) + label-value ! label-status ! + ; + +~ (output memory start, current output point +~ -- output memory start, current output point, offset) +: current-offset 2dup swap - ; + +~ For a label to have "converged", at least one of the following must be true: +~ +~ 1. The label must never have been used (bit zero clear); +~ 2. The label was both used and defined, but not used before it was defined +~ (bits zero and one set; bit two clear); +~ 3. The label was both used and defined, and the guessed value equalled the +~ actual value (bits zero, one, and three set). +~ (label entry pointer) +: check-label-converged + label-status @ + dup 0x01 & not swap + dup 0x03 & 0x03 = swap dup 0x04 & not swap 3unroll && swap + 0x0b & 0x0b = + || || ; + +: check-labels-converged + 1 + labels @ { dup } + { dup check-label-converged 3roll && swap + @ } while drop ; + +: reset-labels + labels @ { dup } + { dup label-status 0 swap ! + @ } while drop ; + +~ (execution token -- output start, output length) +: label-loop + 0 swap + 0x1000 allocate dup + ~ (iteration count, execution token, output start, output point) + { 3 pick 100 > } + { 2 pick execute 4 roll 1+ 4 unroll + check-labels-converged + { 4 roll drop + 3 roll drop + current-offset swap drop + exit } if + drop dup + reset-labels } while + drop drop drop ." Failed after " . ." iterations." newline ; + diff --git a/make-hello.e b/make-hello.e deleted file mode 100644 index c4bbb9e..0000000 --- a/make-hello.e +++ /dev/null @@ -1,251 +0,0 @@ -~ current output point, string pointer -: pack-raw-string - { unpack8 dup } { 3roll swap pack8 swap } while drop drop ; - -: origin 0x08000000 ; - -~ Status is a bit field: -~ bit zero is whether it's been used -~ bit one is whether it's been defined -~ bit two is whether it was used before being defined -~ bit three is whether the guessed value wound up equaling the actual value - -: init-labels - 8 allocate s" labels" variable - 0 s" labels" find entry-to-execution-token execute ! - ; -~ This needs to happen now because otherwise the word "labels" won't exist. -init-labels - -~ (entry pointer --) -: label-heading - dup entry-to-name dup emitstring space - stringlen 1+ 54 swap - 0 max indent - entry-to-execution-token dup 8 + @ .hex64 space @ .hex8 - newline ; - -~ (dictionary handle) -: oldest-entry-in - dup - dup { { dup @ } { @ } while } if - dup 3roll = { drop 0 } if - ; - -~ (entry pointer, dictionary handle) -: next-newer-entry-in - @ - 2dup = { 2drop 0 exit } if - { dup { 2dup @ != } if } - { @ } while swap drop ; - -: list-labels - labels oldest-entry-in { dup } - { dup label-heading labels next-newer-entry-in } while drop ; - -~ (name string pointer -- ) -: new-label labels create-in 0 , 0 , ; - -: label-status entry-to-execution-token ; -: label-value entry-to-execution-token 8 + ; - -: intern-label - dup labels swap find-in - dup { swap drop } - { drop dup new-label labels swap find-in } if-else ; - -: L' - word value@ - interpreter-flags @ 0x01 & - { ' litstring entry-to-execution-token , - here @ swap packstring 0 pack8 8 packalign here ! - ' intern-label entry-to-execution-token , - dropstring } - { intern-label dropstring-with-result } if-else - ; make-immediate - -~ (label entry pointer -- label value) -: use-label - ~ If it hasn't been defined yet, mark it used-before-set. - dup label-status dup @ dup 0x02 & not { 0x04 | swap ! } { drop drop } if-else - - ~ Mark it used. It's faster to just do this than to check if it's needed. - dup label-status dup @ 0x01 | swap ! - - ~ Return the guessed value. - label-value @ ; - ; - -~ (new label value, label entry pointer --) -: set-label - ~ We always set the defined bit to true. We leave the other status bits - ~ as-is, except that we check whether it's unused up to now, and whether - ~ the previous value equals the new value. If either of those holds, we set - ~ the guessed-equals-actual bit to true. - dup label-status @ 0x02 | - ~ (new value, entry, updated status) - 3unroll dup 4 unroll label-value @ swap dup 3unroll = - ~ (entry, updated status, new value, equality) - 3roll dup 4 unroll 0x01 & not - ~ (entry, updated status, new value, equality, unused) - || { swap 0x08 | swap } if - ~ (entry, updated status, new value) - 3roll dup 3unroll - ~ (updated status, entry, new value, entry) - label-value ! label-status ! - ; - -~ (output memory start, current output point -~ -- output memory start, current output point, offset) -: current-offset 2dup swap - ; - -~ For a label to have "converged", at least one of the following must be true: -~ -~ 1. The label must never have been used (bit zero clear); -~ 2. The label was both used and defined, but not used before it was defined -~ (bits zero and one set; bit two clear); -~ 3. The label was both used and defined, and the guessed value equalled the -~ actual value (bits zero, one, and three set). -~ (label entry pointer) -: check-label-converged - label-status @ - dup 0x01 & not swap - dup 0x03 & 0x03 = swap dup 0x04 & not swap 3unroll && swap - 0x0b & 0x0b = - || || ; - -: check-labels-converged - 1 - labels @ { dup } - { dup check-label-converged 3roll && swap - @ } while drop ; - -: reset-labels - labels @ { dup } - { dup label-status 0 swap ! - @ } while drop ; - -~ (execution token -- output start, output length) -: label-loop - 0 swap - 0x1000 allocate dup - ~ (iteration count, execution token, output start, output point) - { 3 pick 100 > } - { 2 pick execute 4 roll 1+ 4 unroll - check-labels-converged - { 4 roll drop - 3 roll drop - current-offset swap drop - exit } if - drop dup - reset-labels } while - drop drop drop ." Failed after " . ." iterations." newline ; - -~ ~~ -~ ~~ ELF header -~ ~~ -~ ~~ This is the top-level ELF header, for the entire file. An ELF always -~ ~~ has exactly one of this header, which is always at the start of the file. -~ ~~ -: elf-file-header - 0x7f pack8 s" ELF" pack-raw-string ~ magic number - 2 pack8 ~ 64-bit - 1 pack8 ~ little-endian - 1 pack8 ~ ELF header format v1 - 0 pack8 ~ System-V ABI - 0 pack64 ~ (padding) - - 2 pack16 ~ executable - 0x3e pack16 ~ Intel x86-64 - 1 pack32 ~ ELF format version - - ~ Compute the entry pointer. - ~ origin 0x78 + pack64 ~ entry point - L' start use-label origin + pack64 - ~ This includes the origin, intentionally. - - L' program-header use-label pack64 - ~ 0 pack64 ~ program header offset - ~ We place the program header immediately after the ELF header. This - ~ offset is from the start of the file. - 0 pack64 ~ section header offset - 0 pack32 ~ processor flags - 64 pack16 ~ ELF header size - 56 pack16 ~ program header entry size - 1 pack16 ~ number of program header entries - 0 pack16 ~ section header entry size - 0 pack16 ~ number of section header entries - 0 pack16 ~ section name string table index - ; - -~ ~~ -~ ~~ Program header -~ ~~ -~ ~~ An ELF program header consists of any number of these entries; they are -~ ~~ always consecutive, but may be anywhere in the file. We always have -~ ~~ exactly one, and it's always right after the ELF file header. -~ ~~ -: elf-program-header - current-offset L' program-header set-label - 1 pack32 ~ "loadable" segment type - 0x05 pack32 ~ read+execute permission - 0 pack64 ~ offset in file - origin pack64 ~ virtual address - ~ required, but can be anything, subject to alignment - 0 pack64 ~ physical address (ignored) - - L' total-size use-label pack64 ~ size in file - L' total-size use-label pack64 ~ size in memory - - 0 pack64 ~ segment alignment - ~ for relocation, but this doesn't apply to us - ; - -: output-start-routine - current-offset L' start set-label - 1 :rax mov-reg64-imm32 - 1 :rdi mov-reg64-imm64 - origin L' greeting use-label + :rsi mov-reg64-imm64 - L' greeting-size use-label :rdx mov-reg64-imm64 - syscall - 60 :rax mov-reg64-imm32 - 0 :rdi mov-reg64-imm32 - syscall - ; - -: output-greeting - current-offset dup L' greeting set-label 3unroll - s" Hello, Irenes!" packstring - current-offset 4 roll - L' greeting-size set-label ; - -~ (output memory start, current output point -~ -- output memory start, current output point) -~ -~ Everything directly called by all-contents has this same interface. -~ -: all-contents - elf-file-header - elf-program-header - output-start-routine - output-greeting - current-offset L' total-size set-label - ; - -~ 0x1000 allocate dup -~ output memory start, current output point - -~ all-contents -~ check-labels-converged . newline -~ list-labels reset-labels list-labels - -~ The two-pass magick: Reset the output offset to the beginning of the block. -~ drop dup - -~ all-contents -~ check-labels-converged . newline -~ list-labels reset-labels list-labels - -~ output memory start, current output point -~ over - swap sys-write bye -' all-contents entry-to-execution-token label-loop -swap sys-write bye - -- cgit 1.4.1