summary refs log tree commit diff
diff options
context:
space:
mode:
authorIrene Knapp <ireneista@irenes.space>2026-04-07 04:26:55 -0700
committerIrene Knapp <ireneista@irenes.space>2026-04-07 04:33:06 -0700
commitbde2b14a76ff968408260b5ceeee50bb33df0233 (patch)
tree80fb5e826255462e08e47e2253aaa0bf2e837d84
parent1c18c7b90b64563c884cee5f74c2799e885f3ba5 (diff)
it now completely prints out its own source, backwards
if it were Mr. Mxyzptlk, it would now be vanquished

Force-Push: yes
Change-Id: I7af53bc2a2f5ab69850d9b8bee152c27d1878dc4
-rw-r--r--quine.asm202
1 files changed, 193 insertions, 9 deletions
diff --git a/quine.asm b/quine.asm
index 66bb2cd..5bd86df 100644
--- a/quine.asm
+++ b/quine.asm
@@ -1577,9 +1577,10 @@ code_start:
 ;;;
 ;;; (overall start)
 ;;;            0x00 - 0x08                     Link (to next-oldest word)
-;;;            0x09 - 0x09  I0H00000           Flags
+;;;            0x09 - 0x09  I0H0000M           Flags
 ;;;                                                I - immediate
 ;;;                                                H - hidden
+;;;                                                M - metadata
 ;;;                                                all other bits reserved
 ;;; (name start)
 ;;;            0x0a - 0x0a                     Null byte (terminates name)
@@ -2639,7 +2640,7 @@ cold_start:
   dq swap, unroll3, pack_beforenext
   dq lit, 8, packalign, early_here_store
 
-  dq litstring, "pack8", early_find, early_show_source, lit, 0, sys_exit
+  dq early_describe_all, lit, 0, sys_exit
 
   ;;; For triage's sake, here's an inventory of everything else in the file.
   ;;;
@@ -3492,7 +3493,7 @@ defword reverse_padding_len,  0
 
   mov.qreg.qreg rbx, rdi
   xor.qreg.qreg rax, rax
-  mov.qreg.dimm rcx, 8
+  mov.qreg.dimm rcx, 9
 
   std
   repz scasb
@@ -5206,6 +5207,8 @@ defword early_next_newer_entry, 0
   dq swap, early_latest, fetch, swap, unroll3, swap, next_newer_entry_in
   dq exit
 
+; This returns zero if the entry isn't found at all.
+;
 ; Stack in:
 ;   dictionary to search within
 ;   entry address
@@ -5238,11 +5241,34 @@ defword entry_to_execution_token, 0
   dq lit, 7, invert, and
   dq exit
 
+; Stack in:
+;   entry address
+; Stack out:
+;   flags byte
+defword fetch_entry_flags, 0
+  dq docol
+  dq lit, 8, add, fetch, lit, 0xFF, and
+  dq exit
+
+; Stack in:
+;   entry address
+;   flags byte
+defword store_entry_flags, 0
+  dq docol
+  dq swap, lit, 8, add, dup, fetch
+  ; (new flags byte, address of flags byte, old flags byte + context)
+  dq roll3, lit, 0xFF, and
+  dq swap, lit, 0xFFFFFFFFFFFFFF00, and, or
+  dq store
+  dq exit
+
 defword entry_to_name, 0
   dq docol
   dq lit, 10, add
   dq exit
 
+; This doesn't work on the entry at the end of the ELF .text segment. It does
+; work on everything else.
 ; Stack in:
 ;   heap address
 ;   entry address
@@ -5266,10 +5292,21 @@ defword early_guess_entry_end, 0
 ;   next entry address or 0
 defword guess_entry_end_in, 0
   dq docol
+  ; Check whether the entry is flagged as metadata...
+  dq dup, fetch_entry_flags, lit, 0x01, and, lit, 0x01, eq, zbranch, 6*8
+  ; ... if so, return the same entry address as the result, which means we're
+  ; saying the length is zero.
+  dq swap, drop, swap, drop, exit
+  ; If not, search backwards from the end to find the following entry.
   dq next_newer_entry_in
   ; (here, next entry address or 0)
+
+  ; Check whether we were able to find the next-entry address.
   dq dup, lit, 0, eq, zbranch, 3*8
-  ; This is the branch where it's not found. Return "here".
+  ; This is the branch where the original entry is not in the list. Return
+  ; "here".
+  ;
+  ; TODO it would probably be better to return the original entry address.
   dq drop, exit
   ; This is the branch where it's found. Return the next entry address.
   dq swap, drop, exit
@@ -5302,20 +5339,153 @@ defword early_show_source, 0
 ;   end address
 defword show_source_between, 0
   dq docol
+
   dq swap, entry_to_execution_token, lit, 8, add
   ; (end address, current address)
+
+  ; This is the start of the loop.
+  ;
+  ; If it's zero-length, don't try to print its contents.
   dq dup2, ge, zbranch, 4*8, drop, drop, exit
+
+  ; If it's not a word address, the fetch will segfault. We accept this.
   dq dup, fetch, execution_token_to_entry, entry_to_name
+
   ; (end address, current address, name)
+
   dq dup, emitstring, litstring, " ", emitstring
   dq swap, lit, 8, add, swap
-  dq dup, litstring, "lit", stringcmp, zbranch, 4*8
-  ; This is the non-lit branch.
-  dq drop, branch, -29*8
-  ; This is the lit branch.
+
+  dq dup, litstring, "lit", stringcmp, zbranch, 51*8          ; 6 words
+  dq dup, litstring, "litpack8", 0, stringcmp, zbranch, 44*8  ; 7 words
+  dq dup, litstring, "litpack16", stringcmp, zbranch, 37*8    ; 7 words
+  dq dup, litstring, "litpack32", stringcmp, zbranch, 30*8    ; 7 words
+  dq dup, litstring, "litpack64", stringcmp, zbranch, 23*8    ; 7 words
+  dq dup, litstring, "branch", stringcmp, zbranch, 17*8       ; 6 words
+  dq dup, litstring, "zbranch", stringcmp, zbranch, 11*8      ; 6 words
+  dq dup, litstring, "litstring", stringcmp, zbranch, 16*8    ; 7 words
+
+  ; This is the plain, non-lit branch.
+  dq drop, branch, -76*8
+
+  ; This is the lit branch that expects to be followed by a single word.
   dq drop, dup, fetch, dot, litstring, " ", emitstring
   dq lit, 8, add
-  dq branch, -41*8
+  dq branch, -88*8
+
+  ; This is the lit branch that expects to be followed by a string. First, we
+  ; print the string...
+  dq drop
+  dq lit, 0x22, fetch_value_stack, emitstring, drop
+  dq dup, emitstring
+  dq lit, 0x22, fetch_value_stack, emitstring, drop
+  dq litstring, " ", emitstring
+
+  ; ... then, we skip the string.
+  ;
+  ; We always add a word as padding. If the length is a multiple of 8, this is
+  ; the zero word following the contents. If it's not, the division rounds
+  ; down so the padding word has the tail of the contents in it too.
+  dq dup, stringlen, lit, 8, divmod, swap, drop, lit, 1, add
+  ; (end address, current address, n words including padding)
+  dq lit, 8, mul, add
+
+  dq branch, -120*8
+
+; Stack in:
+;   heap address
+;   entry address
+; Stack out:
+;   heap address
+defword early_show_hex, 0
+  dq docol
+  dq dup, unroll3, early_guess_entry_end, swap, unroll3
+  ; (heap address, entry address, end address)
+  dq show_hex_between
+  dq exit
+
+; Stack in:
+;   entry address
+;   end address
+defword show_hex_between, 0
+  dq docol
+  dq swap, entry_to_execution_token, lit, 8, add
+  ; (end address, current address)
+  dq dup2, ge, zbranch, 4*8, drop, drop, exit
+  dq dup, fetch, dothex64, litstring, " ", emitstring
+  dq lit, 8, add
+  dq branch, -17*8
+
+; Stack in:
+;   heap address
+;   entry address
+; Stack out:
+;   heap address
+defword early_show_source_or_hex, 0
+  dq docol
+  dq dup, unroll3, early_guess_entry_end, swap, unroll3
+  ; (heap address, entry address, end address)
+  dq show_source_or_hex_between
+  dq exit
+
+; Stack in:
+;   entry address
+;   end address
+defword show_source_or_hex_between, 0
+  dq docol
+
+  ; If it's zero-length, don't try to print its contents.
+  dq dup2, eq, zbranch, 2*8, exit
+
+  dq swap, dup, unroll3, swap, roll3
+  ; (entry address, end address, entry address)
+  dq entry_to_execution_token, fetch, lit, docol, eq, zbranch, 3*8
+  dq show_source_between, exit
+  dq show_hex_between, exit
+
+; Stack in:
+;   heap address
+;   entry address
+; Stack out:
+;   heap address
+defword early_describe, 0
+  dq docol
+  dq dup, dothex64
+  dq litstring, 0x0a, emitstring
+  dq dup, entry_to_name, emitstring, litstring, ":", emitstring
+  dq litstring, 0x0a, emitstring
+  dq litstring, "  ", emitstring
+  dq early_show_source_or_hex
+  dq litstring, 0x0a, emitstring
+  dq exit
+
+; Stack in:
+;   heap address
+defword early_describe_all, 0
+  dq docol
+  dq early_latest
+  dq dup, lit, 0, eq, zbranch, 3*8, drop, exit
+  ; Start of the loop
+  dq fetch
+  dq dup, lit, 0, eq, zbranch, 3*8, drop, exit
+  dq dup2, early_describe, drop
+  dq branch, -13*8
+  dq exit
+;(gdb) x/8xg 0x1000010d28
+;0x1000010d28:   0x0000001000010cf8      0x63617074696c0000
+;0x1000010d38:   0x000000000000386b      0x0000001000010d48
+;0x1000010d48:   0x010b08b84850ad48      0x0020ff0000001000
+;0x1000010d58:   0x0000000000000000      0x0000000000000000
+;(gdb) x/s 0x1000010d32
+;0x1000010d32:   "litpack8"
+;(gdb) x/8xg 0x800b6b8
+;0x800b6b8:      0x000000000800b580      0x725f666c65730000
+;0x800b6c8:      0x0000000000007761      0x00000000080033a8
+;0x800b6d8:      0x0000000008003538      0x00000000080038d0
+;0x800b6e8:      0x00000000080000c4      0x00000000080038d0
+;(gdb) x/s 0x800b6c2
+;0x800b6c2:      "self_raw"
+
 
 ;   Allocate space by incrementing "here", and output a word header in it.
 ; Also add it to the "latest" linked list. Use zero as the flag values;
@@ -5754,6 +5924,20 @@ defword self_raw, 0
   dq memcopy
   dq exit
 
+; Routines that traverse the raw code need a way to guess where the end of a
+; word's contents are, lest they traverse off the edge of the world and
+; segfault. On the heap, we have the "here" pointer to provide an upper bound.
+; For code that's loaded from the ELF's text segment, we don't get to do that,
+; so we need another way.
+;
+; We could notionally use syscalls to check how large the text segment is, but
+; that would make us more dependent on specifics of the ABI and the kernel.
+; Instead, we invent this concept of a metadata word, which is designated with
+; a new flag bit and exists to annotate the address space rather than to be
+; invoked. This is the only one there is right now, and the traversal routines
+; hard-code their knowledge of it.
+defword end_segment, 1
+
 
 final_word_name = latest_word
 code_size = $ - code_start