From 6a11bca93d6b6ac4d3e5a1b51c2b50ce0720d2a7 Mon Sep 17 00:00:00 2001 From: Irene Knapp Date: Fri, 22 May 2026 20:04:58 -0700 Subject: fix our first binary reproducibility issue (wow) with this fix, the first-generation and second-generation builds are precisely identical it was an unused codeword in the bootstrapped docol, under the label transform, which inadvertently had a value from the host address space. the fix required some creativity. Force-Push: yes Change-Id: I05b373b4231fa093454fa31891784de16d81bb18 --- core.e | 2 +- dynamic.e | 1 - log-load.e | 12 ++++++++++++ transform.e | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 64 insertions(+), 4 deletions(-) diff --git a/core.e b/core.e index 51ab9f7..a636c07 100644 --- a/core.e +++ b/core.e @@ -80,8 +80,8 @@ here ! ~ We can't use colon to create docol, not even the part that's a word, ~ because colon tries to dynamically invoke docol to fill in the codeword. s" docol" create +self-codeword here @ -dup 8 + pack64 L@' docol-codeword-value :rax mov-reg64-imm64 :rax push-reg64 pack-next diff --git a/dynamic.e b/dynamic.e index 24fe388..fc94437 100644 --- a/dynamic.e +++ b/dynamic.e @@ -359,7 +359,6 @@ ; - : self-codeword here @ 8 + , ; diff --git a/log-load.e b/log-load.e index d1e4a2f..55fa25d 100644 --- a/log-load.e +++ b/log-load.e @@ -200,6 +200,18 @@ over log-load-here swap drop ! ; +~ (log address -- log address) +: log-load-self-codeword + log-load-here dup @ + ~ (log address, here, output point) + + dup 8 + pack64 + ~ (log address, here, output point) + + swap ! ; + + + ~ This is the same as ",", from dynamic.e, except that it takes the log's ~ address as a parameter rather than hardcoding it, so that it can be used in ~ situations where the normal compilation process isn't yet available. diff --git a/transform.e b/transform.e index 6a81454..bd1a431 100644 --- a/transform.e +++ b/transform.e @@ -783,8 +783,9 @@ allocate-transform-state s" transform-state" variable ~ Word not provided statically, but used during the log-load routine anyway. dup s" L@'" stringcmp 0 = { drop 1 exit } if dup s" L!'" stringcmp 0 = { drop -1 exit } if + dup s" labels" stringcmp 0 = { drop 1 exit } if + dup s" allocate-transform-state" stringcmp 0 = { drop 1 exit } if - dup s" foo" stringcmp 0 = { drop 0 exit } if ~ DO NOT SUBMIT ~ If we get here, that's a problem. Emit an error message to make sure ~ it's easy to diagnose. We also return a comically large negative value, ~ to make sure things fail as quickly as possible afterwards. @@ -855,6 +856,22 @@ allocate-transform-state s" transform-state" variable ] ; +~ So, the way docol's user-facing entry is set up in core.e, it really wants +~ to reference "here" to create its codeword, but "here" is in the host +~ address space. We solve this by having it call the word "self-codeword" and +~ then patching around the address space issue with alternates. +~ +~ If we didn't have this fix, everything would appear to work fine but the +~ statically generated user-facing docol from the label transform would have +~ a wrong codeword, varying based on the host's memory usage. It's never +~ actually called by evoke itself (it would crash horribly), though it may +~ be from programs that use the facilities differently. It's a binary +~ reproducibility issue - our only known one, in fact - so it's worth fixing. +: label-self-codeword-alternate + here @ dup 8 + host-address-space-to-target pack64 here ! + ; make-immediate + + ~ This is the alternate version of ";" for use with the label transform. Its ~ code is the same as the regular "create" except as noted below. It is likely ~ to be extremely useful to read and understand ";" in dynamic.e before @@ -1170,6 +1187,8 @@ allocate-transform-state s" transform-state" variable ~ (name as stack string, name pointer, placeholder, name pointer) dup s" create" stringcmp 0 = { swap drop ' label-create-alternate swap } if dup s" :" stringcmp 0 = { swap drop ' label-colon-alternate swap } if + dup s" self-codeword" stringcmp 0 = { + swap drop ' label-self-codeword-alternate swap } if dup s" ;" stringcmp 0 = { swap drop ' label-semicolon-alternate swap } if dup s" ;asm" stringcmp 0 = { swap drop ' label-semicolon-assembly-alternate swap } if @@ -1710,6 +1729,34 @@ allocate-transform-state s" transform-state" variable ; +~ See notes on label-self-codeword-alternate for what problem this is +~ solving. +~ +~ Since self-codeword's real definition is in dynamic.e, which absolutely +~ cannot be loaded yet at the time we output docol at the very beginning of +~ core.e, we need to provide an alternate for the log-load transform as well, +~ even though only the label transform has the reproducibility issue. +: log-load-self-codeword-alternate + ~ It's important to remember, this is not an immediate word! It needs to + ~ produce correct compiled output, when invoked in compile mode. Since it's + ~ also a regular word that exists, correct output is a reference to the + ~ non-alternate version. + interpreter-flags @ 0x01 & { + s" self-codeword" log-load-compile-dynamic-word + } { + log-load-roll-log-address + + swap-transform-variables + L@' log-load-self-codeword + swap-transform-variables + + offset-to-target-address-space , ~ log-load-self-codeword + + log-load-unroll-log-address + } if-else + ; make-immediate + + ~ This is the alternate version of ";" for use with the log-load transform. ~ Its code is the same as the regular ";" except as noted below. It is ~ likely to be extremely useful to read and understand ";" in dynamic.e @@ -2029,7 +2076,7 @@ allocate-transform-state s" transform-state" variable log-load-roll-log-address swap-transform-variables - L@' log-find + L@' log-load-find swap-transform-variables offset-to-target-address-space , ~ log-load-comma @@ -2251,6 +2298,8 @@ allocate-transform-state s" transform-state" variable swap drop ' log-load-create-alternate swap } if dup s" :" stringcmp 0 = { swap drop ' log-load-colon-alternate swap } if + dup s" self-codeword" stringcmp 0 = { + swap drop ' log-load-self-codeword-alternate swap } if dup s" ;" stringcmp 0 = { swap drop ' log-load-semicolon-alternate swap } if dup s" ;asm" stringcmp 0 = { -- cgit 1.4.1