summary refs log tree commit diff
path: root/src/encoding.rs
diff options
context:
space:
mode:
Diffstat (limited to 'src/encoding.rs')
-rw-r--r--src/encoding.rs28
1 files changed, 28 insertions, 0 deletions
diff --git a/src/encoding.rs b/src/encoding.rs
new file mode 100644
index 0000000..7d5326e
--- /dev/null
+++ b/src/encoding.rs
@@ -0,0 +1,28 @@
+#![forbid(unsafe_code)]
+
+
+#[derive(Clone, Copy, Debug)]
+pub enum UTF8ByteType {
+  Single,
+  Introducer(u8),
+  Continuation,
+  Invalid,
+}
+
+
+pub fn get_utf8_byte_type(b: u8) -> UTF8ByteType {
+  if b & 0x80 == 0 {
+    UTF8ByteType::Single
+  } else if b & 0xC0 == 0x80 {
+    UTF8ByteType::Continuation
+  } else if b & 0xE0 == 0xC0 {
+    UTF8ByteType::Introducer(2)
+  } else if b & 0xF0 == 0xE0 {
+    UTF8ByteType::Introducer(3)
+  } else if b & 0xF8 == 0xF0 {
+    UTF8ByteType::Introducer(4)
+  } else {
+    UTF8ByteType::Invalid
+  }
+}
+