diff options
author | Clayton G. Hobbs <clay@lakeserv.net> | 2015-12-30 23:33:29 -0500 |
---|---|---|
committer | Clayton G. Hobbs <clay@lakeserv.net> | 2015-12-30 23:33:29 -0500 |
commit | e19d76f0515b291f9c6994bfd0faccccf5b894aa (patch) | |
tree | db29ef0dbecee06e0788d9f72c9d327caa1b1479 | |
parent | 57f58295a48dfa4d893eb1546c5f2f64133c0e7f (diff) |
Added number parsing capabilities
See commands.tmp for an example. It's pretty neat, but it could still use some work. I thought of a really clever way to parse numbers, better than the one I came up with last night, but since I have a working implementation now I figure I'd better commit it. We have a new bug which causes the dictionary to be updated every time the program starts. I hope I didn't force that to happen last night or something, but I have a vague feeling I did.
-rw-r--r-- | commands.tmp | 10 | ||||
-rw-r--r-- | config.py | 2 | ||||
-rw-r--r-- | gtktrayui.py | 2 | ||||
-rw-r--r-- | gtkui.py | 2 | ||||
-rwxr-xr-x | kaylee.py | 27 | ||||
-rw-r--r-- | languageupdater.py | 4 | ||||
-rw-r--r-- | numberparser.py | 182 | ||||
-rw-r--r-- | recognizer.py | 2 |
8 files changed, 216 insertions, 15 deletions
diff --git a/commands.tmp b/commands.tmp index 9e41147..10fa2c5 100644 --- a/commands.tmp +++ b/commands.tmp @@ -1,5 +1,7 @@ -# commands are key:value pairs -# key is the sentence to listen for -# value is the command to run when the key is spoken +# commands are pars of the form: +# KEY: VALUE +# KEY is the sentence to listen for +# VALUE is the command to run when the key is spoken -hello world:echo "hello world" +hello world: echo "hello world" +start a %d minute timer: (echo {0} minute timer started && sleep {0}m && echo {0} minute timer ended) & diff --git a/config.py b/config.py index 48db1d6..6bd8c9e 100644 --- a/config.py +++ b/config.py @@ -1,7 +1,7 @@ # This is part of Kaylee # -- this code is licensed GPLv3 -# Copyright 2013 Jezra # Copyright 2015 Clayton G. Hobbs +# Portions Copyright 2013 Jezra import json import os diff --git a/gtktrayui.py b/gtktrayui.py index 8c6c47c..f18c449 100644 --- a/gtktrayui.py +++ b/gtktrayui.py @@ -1,7 +1,7 @@ # This is part of Kaylee # -- this code is licensed GPLv3 -# Copyright 2013 Jezra # Copyright 2015 Clayton G. Hobbs +# Portions Copyright 2013 Jezra import sys import gi diff --git a/gtkui.py b/gtkui.py index b1e25ef..ffb39c2 100644 --- a/gtkui.py +++ b/gtkui.py @@ -1,7 +1,7 @@ # This is part of Kaylee # -- this code is licensed GPLv3 -# Copyright 2013 Jezra # Copyright 2015 Clayton G. Hobbs +# Portions Copyright 2013 Jezra import sys import gi diff --git a/kaylee.py b/kaylee.py index 7aedb22..0ea9a16 100755 --- a/kaylee.py +++ b/kaylee.py @@ -2,8 +2,8 @@ # This is part of Kaylee # -- this code is licensed GPLv3 -# Copyright 2013 Jezra # Copyright 2015 Clayton G. Hobbs +# Portions Copyright 2013 Jezra from __future__ import print_function import sys @@ -17,6 +17,7 @@ import json from recognizer import Recognizer from config import Config from languageupdater import LanguageUpdater +from numberparser import NumberParser class Kaylee: @@ -33,6 +34,9 @@ class Kaylee: self.config = Config() self.options = vars(self.config.options) + # Create number parser for later use + self.number_parser = NumberParser() + # Read the commands self.read_commands() @@ -79,7 +83,10 @@ class Kaylee: # This is a parsible line (key, value) = line.split(":", 1) self.commands[key.strip().lower()] = value.strip() - strings.write(key.strip() + "\n") + strings.write(key.strip().replace('%d', '') + "\n") + # Add number words to the corpus + for word in self.number_parser.number_words: + strings.write(word + "\n") # Close the strings file strings.close() @@ -104,6 +111,7 @@ class Kaylee: def recognizer_finished(self, recognizer, text): t = text.lower() + numt, nums = self.number_parser.parse_all_numbers(t) # Is there a matching command? if t in self.commands: # Run the valid_sentence_command if there is a valid sentence command @@ -113,9 +121,18 @@ class Kaylee: # Should we be passing words? if self.options['pass_words']: cmd += " " + t - self.run_command(cmd) - else: - self.run_command(cmd) + self.run_command(cmd) + self.log_history(text) + elif numt in self.commands: + # Run the valid_sentence_command if there is a valid sentence command + if self.options['valid_sentence_command']: + subprocess.call(self.options['valid_sentence_command'], shell=True) + cmd = self.commands[numt] + cmd = cmd.format(*nums) + # Should we be passing words? + if self.options['pass_words']: + cmd += " " + t + self.run_command(cmd) self.log_history(text) else: # Run the invalid_sentence_command if there is an invalid sentence command diff --git a/languageupdater.py b/languageupdater.py index 662a988..afdfc21 100644 --- a/languageupdater.py +++ b/languageupdater.py @@ -1,7 +1,7 @@ # This is part of Kaylee # -- this code is licensed GPLv3 -# Copyright 2013 Jezra # Copyright 2015 Clayton G. Hobbs +# Portions Copyright 2013 Jezra import hashlib import json @@ -56,7 +56,7 @@ class LanguageUpdater: # Parse response to get URLs of the files we need path_re = r'.*<title>Index of (.*?)</title>.*' - number_re = r'.*TAR[0-9]*?\.tgz.*' + number_re = r'.*TAR([0-9]*?)\.tgz.*' for line in r.text.split('\n'): # If we found the directory, keep it and don't break if re.search(path_re, line): diff --git a/numberparser.py b/numberparser.py new file mode 100644 index 0000000..fb04027 --- /dev/null +++ b/numberparser.py @@ -0,0 +1,182 @@ +#!/usr/bin/env python +# numberparser.py - Translate words to decimal + +# This is part of Kaylee +# -- this code is licensed GPLv3 +# Copyright 2015 Clayton G. Hobbs +# Portions Copyright 2013 Jezra +import re + +# Define the mappings from words to numbers +class NumberParser: + zero = { + 'zero': 0 + } + + ones = { + 'one': 1, + 'two': 2, + 'three': 3, + 'four': 4, + 'five': 5, + 'six': 6, + 'seven': 7, + 'eight': 8, + 'nine': 9 + } + + special_ones = { + 'ten': 10, + 'eleven': 11, + 'twelve': 12, + 'thirteen': 13, + 'fourteen': 14, + 'fifteen': 15, + 'sixteen': 16, + 'seventeen': 17, + 'eighteen': 18, + 'ninteen': 19 + } + + tens = { + 'twenty': 20, + 'thirty': 30, + 'fourty': 40, + 'fifty': 50, + 'sixty': 60, + 'seventy': 70, + 'eighty': 80, + 'ninty': 90 + } + + hundred = { + 'hundred': 100 + } + + exp = { + 'thousand': 1000, + 'million': 1000000, + 'billion': 1000000000 + } + + allowed = [ + 'and' + ] + + def __init__(self): + self.number_words = [] + for word in self.zero: + self.number_words.append(word) + for word in self.ones: + self.number_words.append(word) + for word in self.special_ones: + self.number_words.append(word) + for word in self.tens: + self.number_words.append(word) + for word in self.hundred: + self.number_words.append(word) + for word in self.exp: + self.number_words.append(word) + self.mandatory_number_words = self.number_words.copy() + for word in self.allowed: + self.number_words.append(word) + + def parse_number(self, text_line): + """ + Parse numbers from natural language into ints + + TODO: Throw more exceptions when invalid numbers are detected. Only + allow certian valueless words within numbers. Support zero. + """ + value = 0 + partial_value = 0 + last_list = None + + # Split text_line by commas, whitespace, and hyphens + text_line = text_line.strip() + text_words = re.split(r'[,\s-]+', text_line) + # Parse the number + for word in text_words: + if word in self.zero: + if last_list is not None: + raise ValueError('Invalid number') + value = 0 + last_list = self.zero + elif word in self.ones: + if last_list in (self.zero, self.ones, self.special_ones): + raise ValueError('Invalid number') + value += self.ones[word] + last_list = self.ones + elif word in self.special_ones: + if last_list in (self.zero, self.ones, self.special_ones, self.tens): + raise ValueError('Invalid number') + value += self.special_ones[word] + last_list = self.special_ones + elif word in self.tens: + if last_list in (self.zero, self.ones, self.special_ones, self.tens): + raise ValueError('Invalid number') + value += self.tens[word] + last_list = self.tens + elif word in self.hundred: + if last_list not in (self.ones, self.special_ones, self.tens): + raise ValueError('Invalid number') + value *= self.hundred[word] + last_list = self.hundred + elif word in self.exp: + if last_list in (self.zero, self.exp): + raise ValueError('Invalid number') + partial_value += value * self.exp[word] + value = 0 + last_list = self.exp + elif word not in self.allowed: + raise ValueError('Invalid number') + # Debugging information + #print(word, value, partial_value) + # Finish parsing the number + value += partial_value + return value + + def parse_all_numbers(self, text_line): + nums = [] + t_numless = '' + current_num = '' + + # Split text_line by commas, whitespace, and hyphens + text_line = text_line.strip() + text_words = re.split(r'[,\s-]+', text_line) + for word in text_words: + # If we aren't starting a number, add the word to the result string + if word not in self.mandatory_number_words: + if current_num: + if word in self.number_words: + current_num += word + ' ' + else: + try: + nums.append(self.parse_number(current_num)) + except ValueError: + nums.append(-1) + current_num = '' + t_numless += '%d' + ' ' + if not current_num: + t_numless += word + ' ' + else: + # We're parsing a number now + current_num += word + ' ' + if current_num: + try: + nums.append(self.parse_number(current_num)) + except ValueError: + nums.append(-1) + current_num = '' + t_numless += '%d' + ' ' + + return (t_numless.strip(), nums) + +if __name__ == '__main__': + np = NumberParser() + # Get the words to translate + text_line = input('Enter a number: ') + # Parse it to an integer + value = np.parse_all_numbers(text_line) + # Print the result + print('I claim that you meant the decimal number', value) diff --git a/recognizer.py b/recognizer.py index 3d6f4bf..4d60695 100644 --- a/recognizer.py +++ b/recognizer.py @@ -1,7 +1,7 @@ # This is part of Kaylee # -- this code is licensed GPLv3 -# Copyright 2013 Jezra # Copyright 2015 Clayton G. Hobbs +# Portions Copyright 2013 Jezra import os.path import sys |