fully implement the first-nonblank-column thing for H and L

surprisingly intricate, but although it looks messy now, this approach will clean up nicely Force-Push: yes Change-Id: Ic9c90982787a58110ec0a189844742a1e6c2216f
author: Irene Knapp <ireneista@irenes.space> 2026-03-27 16:59:00 -0700
committer: Irene Knapp <ireneista@irenes.space> 2026-03-27 16:59:00 -0700
commit: a80f9a1b97e1be194cb91a3b78717b0824d3bce8 (patch)
tree: a7481653125ce36b7d15a80ee763e87642ae053b /src
parent: 8d0a78e708dd46aec40d3a06459c86d9c10f1e3b (diff)
3 files changed, 63 insertions, 12 deletions
diff --git a/src/encoding.rs b/src/encoding.rs
index 08ffb39..ccd2031 100644
--- a/src/encoding.rs
+++ b/src/encoding.rs
@@ -2,7 +2,13 @@
 use crate::types::*;
 use smol::prelude::*;
 
-use smol::io::BoxedReader;
+
+#[derive(Debug)]
+pub struct Decode {
+  pub c: char,
+  pub skipped_bytes: usize,
+  pub found_bytes: usize,
+}
 
 
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
@@ -31,9 +37,12 @@ pub fn get_utf8_byte_type(b: u8) -> UTF8ByteType {
 }
 
 
-pub async fn read_utf8_char(input: &mut BoxedReader) -> Result<char> {
+pub async fn read_utf8_char(input: &mut (impl AsyncRead + Unpin))
+    -> Result<Decode>
+{
   let mut buf = vec![0; 4];
   let mut unread_byte: Option<u8> = None;
+  let mut skipped_bytes = 0;
 
   loop {
     if let Some(byte) = unread_byte {
@@ -43,56 +52,76 @@ pub async fn read_utf8_char(input: &mut BoxedReader) -> Result<char> {
       input.read_exact(&mut buf[0 .. 1]).await?;
     }
 
-    match get_utf8_byte_type(buf[0]) {
-      UTF8ByteType::Single => { },
+    let found_bytes = match get_utf8_byte_type(buf[0]) {
+      UTF8ByteType::Single => {
+        1
+      },
+
       UTF8ByteType::Introducer(2) => {
         input.read_exact(&mut buf[1 .. 2]).await?;
         if get_utf8_byte_type(buf[1]) != UTF8ByteType::Continuation {
           unread_byte = Some(buf[1]);
+          skipped_bytes += 1;
           continue;
         }
+
+        2
       },
+
       UTF8ByteType::Introducer(3) => {
         input.read_exact(&mut buf[1 .. 2]).await?;
         if get_utf8_byte_type(buf[1]) != UTF8ByteType::Continuation {
           unread_byte = Some(buf[1]);
+          skipped_bytes += 1;
           continue;
         }
 
         input.read_exact(&mut buf[2 .. 3]).await?;
         if get_utf8_byte_type(buf[2]) != UTF8ByteType::Continuation {
           unread_byte = Some(buf[2]);
+          skipped_bytes += 2;
           continue;
         }
+
+        3
       },
+
       UTF8ByteType::Introducer(4) => {
         input.read_exact(&mut buf[1 .. 2]).await?;
         if get_utf8_byte_type(buf[1]) != UTF8ByteType::Continuation {
           unread_byte = Some(buf[1]);
+          skipped_bytes += 1;
           continue;
         }
 
         input.read_exact(&mut buf[2 .. 3]).await?;
         if get_utf8_byte_type(buf[2]) != UTF8ByteType::Continuation {
           unread_byte = Some(buf[2]);
+          skipped_bytes += 2;
           continue;
         }
 
         input.read_exact(&mut buf[3 .. 4]).await?;
         if get_utf8_byte_type(buf[3]) != UTF8ByteType::Continuation {
           unread_byte = Some(buf[3]);
+          skipped_bytes += 3;
           continue;
         }
+
+        4
       },
 
       /* If it's not the start of a valid character, ignore it. */
-      _ => continue,
-    }
+      _ => {
+        skipped_bytes += 1;
+        continue;
+      }
+    };
 
     if let Ok(string) = std::str::from_utf8(&buf)
        && let Some(c) = string.chars().next()
     {
-      return Ok(c);
+      return Ok(Decode { c, skipped_bytes, found_bytes });
     }
   }
 }
diff --git a/src/main.rs b/src/main.rs
index 4b5dc5a..70bc0b4 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -3,6 +3,7 @@ use crate::types::*;
 use smol::prelude::*;
 
 use smol::fs::File;
+use smol::io::Cursor;
 use smol::lock::RwLock;
 use std::path::PathBuf;
 use std::process::ExitCode;
@@ -464,12 +465,31 @@ impl Ivy {
 
   async fn first_non_blank_column(&mut self) -> Result<()> {
     let row = *self.window.read().await.cursor_row.read().await;
+    let buffer = self.buffer.write().await;
+
+    if let Some(row_span) = buffer.line_span(row).await {
+      let mut offset = 0;
+      loop {
+        let sub_span = row_span.start + offset .. row_span.end;
+        let mut contents = buffer.contents.write().await;
+        let mut cursor = Cursor::new(&mut contents[sub_span]);
+
+        if let Ok(decode) = encoding::read_utf8_char(&mut cursor).await {
+          offset += decode.skipped_bytes;
+
+          if decode.c.is_whitespace() {
+            offset += decode.found_bytes;
+          } else {
+            break;
+          }
+        } else {
+          break;
+        }
+      }
 
-    if let Some(span) = self.buffer.read().await.line_span(row).await {
-      /* TODO */
       let window = self.window.write().await;
-      *window.cursor_column.write().await = 0;
-      *window.cursor_neutral_column.write().await = 0;
+      *window.cursor_column.write().await = offset;
+      *window.cursor_neutral_column.write().await = offset;
     } else {
       let window = self.window.write().await;
       *window.cursor_column.write().await = 0;
diff --git a/src/terminal.rs b/src/terminal.rs
index 1ba2b01..300fcfb 100644
--- a/src/terminal.rs
+++ b/src/terminal.rs
@@ -100,7 +100,9 @@ impl Terminal {
   }
 
   pub async fn read_char(&mut self) -> Result<char> {
-    encoding::read_utf8_char(&mut self.stdin).await
+    let decode = encoding::read_utf8_char(&mut self.stdin).await?;
+
+    Ok(decode.c)
   }
 
   pub async fn do_escape(&mut self, escape_type: EscapeType, code: &str,
author	Irene Knapp <ireneista@irenes.space>	2026-03-27 16:59:00 -0700
committer	Irene Knapp <ireneista@irenes.space>	2026-03-27 16:59:00 -0700
commit	a80f9a1b97e1be194cb91a3b78717b0824d3bce8 (patch)
tree	a7481653125ce36b7d15a80ee763e87642ae053b /src
parent	8d0a78e708dd46aec40d3a06459c86d9c10f1e3b (diff)