summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
authorIrene Knapp <ireneista@irenes.space>2026-03-27 08:52:52 -0700
committerIrene Knapp <ireneista@irenes.space>2026-03-27 08:52:52 -0700
commit916bce453c48f10d42eb3744aa4c62d8ca2c4c69 (patch)
tree6984f689fb0a57d9e5c65e6c3ddb48e4d6aac778 /src
parent3f3d62639b3160bd9ea7dc2c5ec6a53b3e9e11bc (diff)
deal with broken UTF8 even better
now it should self-synchronize properly if there's something really weird happening

also, that code is all refactored into encodings.rs

Force-Push: yes
Change-Id: I8bd9682448fc309b7aa6c0513e9b94cb5a4ace11
Diffstat (limited to 'src')
-rw-r--r--src/encoding.rs72
-rw-r--r--src/terminal.rs37
-rw-r--r--src/types.rs1
3 files changed, 77 insertions, 33 deletions
diff --git a/src/encoding.rs b/src/encoding.rs
index 7d5326e..08ffb39 100644
--- a/src/encoding.rs
+++ b/src/encoding.rs
@@ -1,7 +1,11 @@
 #![forbid(unsafe_code)]
+use crate::types::*;
+use smol::prelude::*;
 
+use smol::io::BoxedReader;
 
-#[derive(Clone, Copy, Debug)]
+
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
 pub enum UTF8ByteType {
   Single,
   Introducer(u8),
@@ -26,3 +30,69 @@ pub fn get_utf8_byte_type(b: u8) -> UTF8ByteType {
   }
 }
 
+
+pub async fn read_utf8_char(input: &mut BoxedReader) -> Result<char> {
+  let mut buf = vec![0; 4];
+  let mut unread_byte: Option<u8> = None;
+
+  loop {
+    if let Some(byte) = unread_byte {
+      buf[0] = byte;
+      unread_byte = None;
+    } else {
+      input.read_exact(&mut buf[0 .. 1]).await?;
+    }
+
+    match get_utf8_byte_type(buf[0]) {
+      UTF8ByteType::Single => { },
+      UTF8ByteType::Introducer(2) => {
+        input.read_exact(&mut buf[1 .. 2]).await?;
+        if get_utf8_byte_type(buf[1]) != UTF8ByteType::Continuation {
+          unread_byte = Some(buf[1]);
+          continue;
+        }
+      },
+      UTF8ByteType::Introducer(3) => {
+        input.read_exact(&mut buf[1 .. 2]).await?;
+        if get_utf8_byte_type(buf[1]) != UTF8ByteType::Continuation {
+          unread_byte = Some(buf[1]);
+          continue;
+        }
+
+        input.read_exact(&mut buf[2 .. 3]).await?;
+        if get_utf8_byte_type(buf[2]) != UTF8ByteType::Continuation {
+          unread_byte = Some(buf[2]);
+          continue;
+        }
+      },
+      UTF8ByteType::Introducer(4) => {
+        input.read_exact(&mut buf[1 .. 2]).await?;
+        if get_utf8_byte_type(buf[1]) != UTF8ByteType::Continuation {
+          unread_byte = Some(buf[1]);
+          continue;
+        }
+
+        input.read_exact(&mut buf[2 .. 3]).await?;
+        if get_utf8_byte_type(buf[2]) != UTF8ByteType::Continuation {
+          unread_byte = Some(buf[2]);
+          continue;
+        }
+
+        input.read_exact(&mut buf[3 .. 4]).await?;
+        if get_utf8_byte_type(buf[3]) != UTF8ByteType::Continuation {
+          unread_byte = Some(buf[3]);
+          continue;
+        }
+      },
+
+      /* If it's not the start of a valid character, ignore it. */
+      _ => continue,
+    }
+
+    if let Ok(string) = std::str::from_utf8(&buf)
+       && let Some(c) = string.chars().next()
+    {
+      return Ok(c);
+    }
+  }
+}
diff --git a/src/terminal.rs b/src/terminal.rs
index de3ca4f..28e83b3 100644
--- a/src/terminal.rs
+++ b/src/terminal.rs
@@ -5,6 +5,7 @@ use smol::prelude::*;
 use crate::encoding;
 
 use smol::{ unblock, Unblock };
+use smol::io::{ BoxedReader, BoxedWriter };
 use smol::lock::{ OnceCell, RwLock };
 use std::fmt::Display;
 use std::os::fd::AsRawFd;
@@ -12,8 +13,8 @@ use termios::Termios;
 
 
 pub struct Terminal {
-  pub stdin: Unblock<std::io::Stdin>,
-  pub stdout: Unblock<std::io::Stdout>,
+  pub stdin: BoxedReader,
+  pub stdout: BoxedWriter,
   pub initial_termios: OnceCell<Termios>,
   pub width: RwLock<usize>,
   pub height: RwLock<usize>,
@@ -28,8 +29,8 @@ pub enum EscapeType {
 impl Terminal {
   pub async fn new() -> Self {
     Terminal {
-      stdin: Unblock::new(std::io::stdin()),
-      stdout: Unblock::new(std::io::stdout()),
+      stdin: Unblock::new(std::io::stdin()).boxed_reader(),
+      stdout: Unblock::new(std::io::stdout()).boxed_writer(),
       initial_termios: OnceCell::new(),
       width: RwLock::new(80),
       height: RwLock::new(24),
@@ -91,33 +92,7 @@ impl Terminal {
   }
 
   pub async fn read_char(&mut self) -> Result<char> {
-    let mut buf = vec![0; 4];
-
-    loop {
-      self.stdin.read_exact(&mut buf[0 .. 1]).await?;
-
-      match encoding::get_utf8_byte_type(buf[0]) {
-        UTF8ByteType::Single => { },
-        UTF8ByteType::Introducer(2) => {
-          self.stdin.read_exact(&mut buf[1 .. 2]).await?;
-        },
-        UTF8ByteType::Introducer(3) => {
-          self.stdin.read_exact(&mut buf[1 .. 3]).await?;
-        },
-        UTF8ByteType::Introducer(4) => {
-          self.stdin.read_exact(&mut buf[1 .. 4]).await?;
-        },
-
-        /* If it's not the start of a valid character, ignore it. */
-        _ => continue,
-      }
-
-      if let Ok(string) = std::str::from_utf8(&buf)
-         && let Some(c) = string.chars().next()
-      {
-        return Ok(c);
-      }
-    }
+    encoding::read_utf8_char(&mut self.stdin).await
   }
 
   pub async fn do_escape(&mut self, escape_type: EscapeType, code: &str,
diff --git a/src/types.rs b/src/types.rs
index 1f74216..ef45492 100644
--- a/src/types.rs
+++ b/src/types.rs
@@ -1,5 +1,4 @@
 #![forbid(unsafe_code)]
-pub use crate::encoding::UTF8ByteType;
 pub use crate::terminal::Terminal;
 
 pub type Error = std::io::Error;