diff options
Diffstat (limited to 'src/encoding.rs')
| -rw-r--r-- | src/encoding.rs | 72 |
1 files changed, 71 insertions, 1 deletions
diff --git a/src/encoding.rs b/src/encoding.rs index 7d5326e..08ffb39 100644 --- a/src/encoding.rs +++ b/src/encoding.rs @@ -1,7 +1,11 @@ #![forbid(unsafe_code)] +use crate::types::*; +use smol::prelude::*; +use smol::io::BoxedReader; -#[derive(Clone, Copy, Debug)] + +#[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum UTF8ByteType { Single, Introducer(u8), @@ -26,3 +30,69 @@ pub fn get_utf8_byte_type(b: u8) -> UTF8ByteType { } } + +pub async fn read_utf8_char(input: &mut BoxedReader) -> Result<char> { + let mut buf = vec![0; 4]; + let mut unread_byte: Option<u8> = None; + + loop { + if let Some(byte) = unread_byte { + buf[0] = byte; + unread_byte = None; + } else { + input.read_exact(&mut buf[0 .. 1]).await?; + } + + match get_utf8_byte_type(buf[0]) { + UTF8ByteType::Single => { }, + UTF8ByteType::Introducer(2) => { + input.read_exact(&mut buf[1 .. 2]).await?; + if get_utf8_byte_type(buf[1]) != UTF8ByteType::Continuation { + unread_byte = Some(buf[1]); + continue; + } + }, + UTF8ByteType::Introducer(3) => { + input.read_exact(&mut buf[1 .. 2]).await?; + if get_utf8_byte_type(buf[1]) != UTF8ByteType::Continuation { + unread_byte = Some(buf[1]); + continue; + } + + input.read_exact(&mut buf[2 .. 3]).await?; + if get_utf8_byte_type(buf[2]) != UTF8ByteType::Continuation { + unread_byte = Some(buf[2]); + continue; + } + }, + UTF8ByteType::Introducer(4) => { + input.read_exact(&mut buf[1 .. 2]).await?; + if get_utf8_byte_type(buf[1]) != UTF8ByteType::Continuation { + unread_byte = Some(buf[1]); + continue; + } + + input.read_exact(&mut buf[2 .. 3]).await?; + if get_utf8_byte_type(buf[2]) != UTF8ByteType::Continuation { + unread_byte = Some(buf[2]); + continue; + } + + input.read_exact(&mut buf[3 .. 4]).await?; + if get_utf8_byte_type(buf[3]) != UTF8ByteType::Continuation { + unread_byte = Some(buf[3]); + continue; + } + }, + + /* If it's not the start of a valid character, ignore it. */ + _ => continue, + } + + if let Ok(string) = std::str::from_utf8(&buf) + && let Some(c) = string.chars().next() + { + return Ok(c); + } + } +} |