From bde2b14a76ff968408260b5ceeee50bb33df0233 Mon Sep 17 00:00:00 2001 From: Irene Knapp Date: Tue, 7 Apr 2026 04:26:55 -0700 Subject: it now completely prints out its own source, backwards if it were Mr. Mxyzptlk, it would now be vanquished Force-Push: yes Change-Id: I7af53bc2a2f5ab69850d9b8bee152c27d1878dc4 --- quine.asm | 202 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 193 insertions(+), 9 deletions(-) diff --git a/quine.asm b/quine.asm index 66bb2cd..5bd86df 100644 --- a/quine.asm +++ b/quine.asm @@ -1577,9 +1577,10 @@ code_start: ;;; ;;; (overall start) ;;; 0x00 - 0x08 Link (to next-oldest word) -;;; 0x09 - 0x09 I0H00000 Flags +;;; 0x09 - 0x09 I0H0000M Flags ;;; I - immediate ;;; H - hidden +;;; M - metadata ;;; all other bits reserved ;;; (name start) ;;; 0x0a - 0x0a Null byte (terminates name) @@ -2639,7 +2640,7 @@ cold_start: dq swap, unroll3, pack_beforenext dq lit, 8, packalign, early_here_store - dq litstring, "pack8", early_find, early_show_source, lit, 0, sys_exit + dq early_describe_all, lit, 0, sys_exit ;;; For triage's sake, here's an inventory of everything else in the file. ;;; @@ -3492,7 +3493,7 @@ defword reverse_padding_len, 0 mov.qreg.qreg rbx, rdi xor.qreg.qreg rax, rax - mov.qreg.dimm rcx, 8 + mov.qreg.dimm rcx, 9 std repz scasb @@ -5206,6 +5207,8 @@ defword early_next_newer_entry, 0 dq swap, early_latest, fetch, swap, unroll3, swap, next_newer_entry_in dq exit +; This returns zero if the entry isn't found at all. +; ; Stack in: ; dictionary to search within ; entry address @@ -5238,11 +5241,34 @@ defword entry_to_execution_token, 0 dq lit, 7, invert, and dq exit +; Stack in: +; entry address +; Stack out: +; flags byte +defword fetch_entry_flags, 0 + dq docol + dq lit, 8, add, fetch, lit, 0xFF, and + dq exit + +; Stack in: +; entry address +; flags byte +defword store_entry_flags, 0 + dq docol + dq swap, lit, 8, add, dup, fetch + ; (new flags byte, address of flags byte, old flags byte + context) + dq roll3, lit, 0xFF, and + dq swap, lit, 0xFFFFFFFFFFFFFF00, and, or + dq store + dq exit + defword entry_to_name, 0 dq docol dq lit, 10, add dq exit +; This doesn't work on the entry at the end of the ELF .text segment. It does +; work on everything else. ; Stack in: ; heap address ; entry address @@ -5266,10 +5292,21 @@ defword early_guess_entry_end, 0 ; next entry address or 0 defword guess_entry_end_in, 0 dq docol + ; Check whether the entry is flagged as metadata... + dq dup, fetch_entry_flags, lit, 0x01, and, lit, 0x01, eq, zbranch, 6*8 + ; ... if so, return the same entry address as the result, which means we're + ; saying the length is zero. + dq swap, drop, swap, drop, exit + ; If not, search backwards from the end to find the following entry. dq next_newer_entry_in ; (here, next entry address or 0) + + ; Check whether we were able to find the next-entry address. dq dup, lit, 0, eq, zbranch, 3*8 - ; This is the branch where it's not found. Return "here". + ; This is the branch where the original entry is not in the list. Return + ; "here". + ; + ; TODO it would probably be better to return the original entry address. dq drop, exit ; This is the branch where it's found. Return the next entry address. dq swap, drop, exit @@ -5302,20 +5339,153 @@ defword early_show_source, 0 ; end address defword show_source_between, 0 dq docol + dq swap, entry_to_execution_token, lit, 8, add ; (end address, current address) + + ; This is the start of the loop. + ; + ; If it's zero-length, don't try to print its contents. dq dup2, ge, zbranch, 4*8, drop, drop, exit + + ; If it's not a word address, the fetch will segfault. We accept this. dq dup, fetch, execution_token_to_entry, entry_to_name + ; (end address, current address, name) + dq dup, emitstring, litstring, " ", emitstring dq swap, lit, 8, add, swap - dq dup, litstring, "lit", stringcmp, zbranch, 4*8 - ; This is the non-lit branch. - dq drop, branch, -29*8 - ; This is the lit branch. + + dq dup, litstring, "lit", stringcmp, zbranch, 51*8 ; 6 words + dq dup, litstring, "litpack8", 0, stringcmp, zbranch, 44*8 ; 7 words + dq dup, litstring, "litpack16", stringcmp, zbranch, 37*8 ; 7 words + dq dup, litstring, "litpack32", stringcmp, zbranch, 30*8 ; 7 words + dq dup, litstring, "litpack64", stringcmp, zbranch, 23*8 ; 7 words + dq dup, litstring, "branch", stringcmp, zbranch, 17*8 ; 6 words + dq dup, litstring, "zbranch", stringcmp, zbranch, 11*8 ; 6 words + dq dup, litstring, "litstring", stringcmp, zbranch, 16*8 ; 7 words + + ; This is the plain, non-lit branch. + dq drop, branch, -76*8 + + ; This is the lit branch that expects to be followed by a single word. dq drop, dup, fetch, dot, litstring, " ", emitstring dq lit, 8, add - dq branch, -41*8 + dq branch, -88*8 + + ; This is the lit branch that expects to be followed by a string. First, we + ; print the string... + dq drop + dq lit, 0x22, fetch_value_stack, emitstring, drop + dq dup, emitstring + dq lit, 0x22, fetch_value_stack, emitstring, drop + dq litstring, " ", emitstring + + ; ... then, we skip the string. + ; + ; We always add a word as padding. If the length is a multiple of 8, this is + ; the zero word following the contents. If it's not, the division rounds + ; down so the padding word has the tail of the contents in it too. + dq dup, stringlen, lit, 8, divmod, swap, drop, lit, 1, add + ; (end address, current address, n words including padding) + dq lit, 8, mul, add + + dq branch, -120*8 + +; Stack in: +; heap address +; entry address +; Stack out: +; heap address +defword early_show_hex, 0 + dq docol + dq dup, unroll3, early_guess_entry_end, swap, unroll3 + ; (heap address, entry address, end address) + dq show_hex_between + dq exit + +; Stack in: +; entry address +; end address +defword show_hex_between, 0 + dq docol + dq swap, entry_to_execution_token, lit, 8, add + ; (end address, current address) + dq dup2, ge, zbranch, 4*8, drop, drop, exit + dq dup, fetch, dothex64, litstring, " ", emitstring + dq lit, 8, add + dq branch, -17*8 + +; Stack in: +; heap address +; entry address +; Stack out: +; heap address +defword early_show_source_or_hex, 0 + dq docol + dq dup, unroll3, early_guess_entry_end, swap, unroll3 + ; (heap address, entry address, end address) + dq show_source_or_hex_between + dq exit + +; Stack in: +; entry address +; end address +defword show_source_or_hex_between, 0 + dq docol + + ; If it's zero-length, don't try to print its contents. + dq dup2, eq, zbranch, 2*8, exit + + dq swap, dup, unroll3, swap, roll3 + ; (entry address, end address, entry address) + dq entry_to_execution_token, fetch, lit, docol, eq, zbranch, 3*8 + dq show_source_between, exit + dq show_hex_between, exit + +; Stack in: +; heap address +; entry address +; Stack out: +; heap address +defword early_describe, 0 + dq docol + dq dup, dothex64 + dq litstring, 0x0a, emitstring + dq dup, entry_to_name, emitstring, litstring, ":", emitstring + dq litstring, 0x0a, emitstring + dq litstring, " ", emitstring + dq early_show_source_or_hex + dq litstring, 0x0a, emitstring + dq exit + +; Stack in: +; heap address +defword early_describe_all, 0 + dq docol + dq early_latest + dq dup, lit, 0, eq, zbranch, 3*8, drop, exit + ; Start of the loop + dq fetch + dq dup, lit, 0, eq, zbranch, 3*8, drop, exit + dq dup2, early_describe, drop + dq branch, -13*8 + dq exit +;(gdb) x/8xg 0x1000010d28 +;0x1000010d28: 0x0000001000010cf8 0x63617074696c0000 +;0x1000010d38: 0x000000000000386b 0x0000001000010d48 +;0x1000010d48: 0x010b08b84850ad48 0x0020ff0000001000 +;0x1000010d58: 0x0000000000000000 0x0000000000000000 +;(gdb) x/s 0x1000010d32 +;0x1000010d32: "litpack8" +;(gdb) x/8xg 0x800b6b8 +;0x800b6b8: 0x000000000800b580 0x725f666c65730000 +;0x800b6c8: 0x0000000000007761 0x00000000080033a8 +;0x800b6d8: 0x0000000008003538 0x00000000080038d0 +;0x800b6e8: 0x00000000080000c4 0x00000000080038d0 +;(gdb) x/s 0x800b6c2 +;0x800b6c2: "self_raw" + ; Allocate space by incrementing "here", and output a word header in it. ; Also add it to the "latest" linked list. Use zero as the flag values; @@ -5754,6 +5924,20 @@ defword self_raw, 0 dq memcopy dq exit +; Routines that traverse the raw code need a way to guess where the end of a +; word's contents are, lest they traverse off the edge of the world and +; segfault. On the heap, we have the "here" pointer to provide an upper bound. +; For code that's loaded from the ELF's text segment, we don't get to do that, +; so we need another way. +; +; We could notionally use syscalls to check how large the text segment is, but +; that would make us more dependent on specifics of the ABI and the kernel. +; Instead, we invent this concept of a metadata word, which is designated with +; a new flag bit and exists to annotate the address space rather than to be +; invoked. This is the only one there is right now, and the traversal routines +; hard-code their knowledge of it. +defword end_segment, 1 + final_word_name = latest_word code_size = $ - code_start -- cgit 1.4.1