summary refs log tree commit diff
path: root/src/commandline.lalrpop
diff options
context:
space:
mode:
authorIrene Knapp <ireneista@irenes.space>2024-03-12 21:54:28 -0700
committerIrene Knapp <ireneista@irenes.space>2024-03-12 21:54:28 -0700
commit7be9acd0bb08901c9fdfa45b694b7d3d5a594e70 (patch)
tree8a38a3ff200bae284fc0e2009220dacf0b53205b /src/commandline.lalrpop
parent3086d361665aedf840f76ded2f46c6ff5204f776 (diff)
remove a lot of stuff that was part of the shell and does not need to be part of the line input library
Change-Id: Idd0435a4b29f5f525c9279e5c1d27916e6320685
Diffstat (limited to 'src/commandline.lalrpop')
-rw-r--r--src/commandline.lalrpop66
1 files changed, 0 insertions, 66 deletions
diff --git a/src/commandline.lalrpop b/src/commandline.lalrpop
deleted file mode 100644
index d52e741..0000000
--- a/src/commandline.lalrpop
+++ /dev/null
@@ -1,66 +0,0 @@
-grammar;
-
-pub Invocation: Vec<&'input str> = {
-  <WORD*>,
-};
-
-// Several of the regexps below make use of Unicode character classes. [1] is
-// the official reference to Unicode classes, and [2] is a site that is useful
-// for browsing to get an intuitive idea of what the classes mean.
-//
-// In maintaining these regexps, it's important to understand the structure
-// of Unicode character classes. There are seven top-level categories, each
-// with a single-character name (ie. "Z" for separators). Each top-level
-// category has several subcategories which form an exhaustive partition of it;
-// the subcategories have two-character names (ie. "Zs" for space separators).
-// Every allocated codepoint is in exactly one top-level category and exactly
-// one subcategory.
-//
-// It is important that these regexps exhaustively cover the entirety of
-// Unicode, without omission; otherwise lalrpop's lexer will give InvalidToken
-// errors for unrecognized characters. Overlaps will be less catastrophic, as
-// they'll be resoved by the precedence rules, but for clarity's sake they
-// should be avoided.
-//
-// [1] http://www.unicode.org/reports/tr44/#General_Category_Values
-// [2] https://www.compart.com/en/unicode/category
-//
-match {
-  // Zs is the Unicode class for space separators. This includes the ASCII
-  // space character.
-  //
-  r"\p{Zs}+" => { },
-
-  // Zl is the Unicode class for line separators. Zp is the Unicode class for
-  // paragraph separators. Newline and carriage return are included individually
-  // here, since Unicode classifies them with the control characters rather than
-  // with the space characters.
-  //
-  r"[\p{Zl}\p{Zp}\n\r]" => NEWLINE,
-
-  // This one recognizes exactly one character, the old-school double-quote. As
-  // tempting as it is to do something clever with character classes, shells have
-  // a long history of quoting syntaxes which are subtle and quick to anger, and
-  // for this project the decision is to be radically simple instead.
-  r#"["]"# => QUOTE,
-
-  // This one matches any control character other than line feed and carriage
-  // return. The grammar doesn't reference control characters, but having a
-  // token for them makes the error messages more informative.
-  r"[\p{C}&&[^\n\r]]" => CONTROL,
-
-  // Z is the unicode class for separators, which is exhaustively partitioned
-  // into line, paragraph, and space separators. Each of those subclasses is
-  // handled above. C is the class for control characters. This regexp tests
-  // for the intersection of the negation of these character classes, along
-  // with a negated class enumerating all the explicitly-recognized characters,
-  // which means it matches any character NOT in the regexps above.
-  //
-  // Note that, counterintuitively, line feed and carriage return are classified
-  // as control characters, not as line separators. Either way, this regexp would
-  // still exclude them, but the difference might be relevant when maintaining
-  // it.
-  //
-  r#"[\P{Z}&&\P{C}&&[^"]]+"# => WORD,
-}
-