diff options
author | Irene Knapp <ireneista@gmail.com> | 2020-09-22 14:19:40 -0700 |
---|---|---|
committer | Irene Knapp <ireneista@gmail.com> | 2020-09-22 14:19:40 -0700 |
commit | 71e647b349036069751a457f3e0e8fda1b54cd2a (patch) | |
tree | 4177c0096adc0c2ee20a9c9413afd8ecb59f640a | |
parent | cca86b496e00605163e96d93cda4b9b248df91fe (diff) |
Parse space-separated words.
-rw-r--r-- | src/commandline.lalrpop | 41 | ||||
-rw-r--r-- | src/main.rs | 4 |
2 files changed, 34 insertions, 11 deletions
diff --git a/src/commandline.lalrpop b/src/commandline.lalrpop index 0655281..73f7247 100644 --- a/src/commandline.lalrpop +++ b/src/commandline.lalrpop @@ -1,16 +1,39 @@ grammar; -// -// Z is the unicode class for separators, including line, paragraph, and space -// separators. C is the class for control characters. P is the class for -// punctuation. This regexp tests for the intersection of the negation of these -// character classes, which is any character NOT in one of these three classes. -// -// [1] is the official reference, and [2] is a site that is useful for browsing -// to get an intuitive idea of what these classes mean. +pub Invocation: Vec<&'input str> = { + <WORD+> NEWLINE, +}; + +// Several of the regexps below make use of Unicode character classes. [1] is +// the official reference to Unicode classes, and [2] is a site that is useful +// for browsing to get an intuitive idea of what the classes mean. // // [1] http://www.unicode.org/reports/tr44/#General_Category_Values // [2] https://www.compart.com/en/unicode/category // -pub Filename: String = <filename:r"[\P{Z}&&\P{C}&&\P{P}]+"> => filename.to_string(); +match { + // Zs is the Unicode class for space separators. This includes the ASCII + // space character. + // + r"\p{Zs}+" => { }, + + // Zl is the Unicode class for line separators. Zp is the Unicode class for + // paragraph separators. Newline and carriage return are included individually + // here, since Unicode classifies them with the control characters rather than + // with the space characters. + // + r"[\p{Zl}\p{Zp}\n\r]" => NEWLINE, + + // Z is the unicode class for separators, including line, paragraph, and space + // separators. C is the class for control characters. P is the class for + // punctuation. This regexp tests for the intersection of the negation of these + // character classes, which is any character NOT in one of these three classes. + // + // Note that, counterintuitively, line feed and carriage return are classified + // as control characters, not as line separators. Either way, this regexp would + // still exclude them, but the difference might be relevant when maintaining + // it. + // + r"[\P{Z}&&\P{C}&&\P{P}]+" => WORD, +} diff --git a/src/main.rs b/src/main.rs index 816ed49..dd1c31d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -67,10 +67,10 @@ fn read() -> Result<Input> { fn execute(input: &str) -> Result<()> { - let filename = commandline::FilenameParser::new().parse(input)?; + let invocation = commandline::InvocationParser::new().parse(input)?; println!("{}", input); - println!("filename '{}'", filename); + println!("invocation '{:?}'", invocation); Ok(()) } |