Added number parsing capabilities

See commands.tmp for an example. It's pretty neat, but it could still use some work. I thought of a really clever way to parse numbers, better than the one I came up with last night, but since I have a working implementation now I figure I'd better commit it. We have a new bug which causes the dictionary to be updated every time the program starts. I hope I didn't force that to happen last night or something, but I have a vague feeling I did.
author: Clayton G. Hobbs <clay@lakeserv.net> 2015-12-30 23:33:29 -0500
committer: Clayton G. Hobbs <clay@lakeserv.net> 2015-12-30 23:33:29 -0500
commit: e19d76f0515b291f9c6994bfd0faccccf5b894aa (patch)
tree: db29ef0dbecee06e0788d9f72c9d327caa1b1479
parent: 57f58295a48dfa4d893eb1546c5f2f64133c0e7f (diff)
8 files changed, 216 insertions, 15 deletions
diff --git a/commands.tmp b/commands.tmp
index 9e41147..10fa2c5 100644
--- a/commands.tmp
+++ b/commands.tmp
@@ -1,5 +1,7 @@
-# commands are key:value pairs 
-# key is the sentence to listen for
-# value is the command to run when the key is spoken
+# commands are pars of the form:
+#     KEY: VALUE
+# KEY is the sentence to listen for
+# VALUE is the command to run when the key is spoken
 
-hello world:echo "hello world"
+hello world: echo "hello world"
+start a %d minute timer: (echo {0} minute timer started && sleep {0}m && echo {0} minute timer ended) &
diff --git a/config.py b/config.py
index 48db1d6..6bd8c9e 100644
--- a/config.py
+++ b/config.py
@@ -1,7 +1,7 @@
 # This is part of Kaylee
 # -- this code is licensed GPLv3
-# Copyright 2013 Jezra
 # Copyright 2015 Clayton G. Hobbs
+# Portions Copyright 2013 Jezra
 
 import json
 import os
diff --git a/gtktrayui.py b/gtktrayui.py
index 8c6c47c..f18c449 100644
--- a/gtktrayui.py
+++ b/gtktrayui.py
@@ -1,7 +1,7 @@
 # This is part of Kaylee
 # -- this code is licensed GPLv3
-# Copyright 2013 Jezra
 # Copyright 2015 Clayton G. Hobbs
+# Portions Copyright 2013 Jezra
 
 import sys
 import gi
diff --git a/gtkui.py b/gtkui.py
index b1e25ef..ffb39c2 100644
--- a/gtkui.py
+++ b/gtkui.py
@@ -1,7 +1,7 @@
 # This is part of Kaylee
 # -- this code is licensed GPLv3
-# Copyright 2013 Jezra
 # Copyright 2015 Clayton G. Hobbs
+# Portions Copyright 2013 Jezra
 
 import sys
 import gi
diff --git a/kaylee.py b/kaylee.py
index 7aedb22..0ea9a16 100755
--- a/kaylee.py
+++ b/kaylee.py
@@ -2,8 +2,8 @@
 
 # This is part of Kaylee
 # -- this code is licensed GPLv3
-# Copyright 2013 Jezra
 # Copyright 2015 Clayton G. Hobbs
+# Portions Copyright 2013 Jezra
 
 from __future__ import print_function
 import sys
@@ -17,6 +17,7 @@ import json
 from recognizer import Recognizer
 from config import Config
 from languageupdater import LanguageUpdater
+from numberparser import NumberParser
 
 
 class Kaylee:
@@ -33,6 +34,9 @@ class Kaylee:
         self.config = Config()
         self.options = vars(self.config.options)
 
+        # Create number parser for later use
+        self.number_parser = NumberParser()
+
         # Read the commands
         self.read_commands()
 
@@ -79,7 +83,10 @@ class Kaylee:
                 # This is a parsible line
                 (key, value) = line.split(":", 1)
                 self.commands[key.strip().lower()] = value.strip()
-                strings.write(key.strip() + "\n")
+                strings.write(key.strip().replace('%d', '') + "\n")
+        # Add number words to the corpus
+        for word in self.number_parser.number_words:
+            strings.write(word + "\n")
         # Close the strings file
         strings.close()
 
@@ -104,6 +111,7 @@ class Kaylee:
 
     def recognizer_finished(self, recognizer, text):
         t = text.lower()
+        numt, nums = self.number_parser.parse_all_numbers(t)
         # Is there a matching command?
         if t in self.commands:
             # Run the valid_sentence_command if there is a valid sentence command
@@ -113,9 +121,18 @@ class Kaylee:
             # Should we be passing words?
             if self.options['pass_words']:
                 cmd += " " + t
-                self.run_command(cmd)
-            else:
-                self.run_command(cmd)
+            self.run_command(cmd)
+            self.log_history(text)
+        elif numt in self.commands:
+            # Run the valid_sentence_command if there is a valid sentence command
+            if self.options['valid_sentence_command']:
+                subprocess.call(self.options['valid_sentence_command'], shell=True)
+            cmd = self.commands[numt]
+            cmd = cmd.format(*nums)
+            # Should we be passing words?
+            if self.options['pass_words']:
+                cmd += " " + t
+            self.run_command(cmd)
             self.log_history(text)
         else:
             # Run the invalid_sentence_command if there is an invalid sentence command
diff --git a/languageupdater.py b/languageupdater.py
index 662a988..afdfc21 100644
--- a/languageupdater.py
+++ b/languageupdater.py
@@ -1,7 +1,7 @@
 # This is part of Kaylee
 # -- this code is licensed GPLv3
-# Copyright 2013 Jezra
 # Copyright 2015 Clayton G. Hobbs
+# Portions Copyright 2013 Jezra
 
 import hashlib
 import json
@@ -56,7 +56,7 @@ class LanguageUpdater:
 
         # Parse response to get URLs of the files we need
         path_re = r'.*<title>Index of (.*?)</title>.*'
-        number_re = r'.*TAR[0-9]*?\.tgz.*'
+        number_re = r'.*TAR([0-9]*?)\.tgz.*'
         for line in r.text.split('\n'):
             # If we found the directory, keep it and don't break
             if re.search(path_re, line):
diff --git a/numberparser.py b/numberparser.py
new file mode 100644
index 0000000..fb04027
--- /dev/null
+++ b/numberparser.py
@@ -0,0 +1,182 @@
+#!/usr/bin/env python
+# numberparser.py - Translate words to decimal
+
+# This is part of Kaylee
+# -- this code is licensed GPLv3
+# Copyright 2015 Clayton G. Hobbs
+# Portions Copyright 2013 Jezra
+import re
+
+# Define the mappings from words to numbers
+class NumberParser:
+    zero = {
+        'zero': 0
+    }
+
+    ones = {
+        'one': 1,
+        'two': 2,
+        'three': 3,
+        'four': 4,
+        'five': 5,
+        'six': 6,
+        'seven': 7,
+        'eight': 8,
+        'nine': 9
+    }
+
+    special_ones = {
+        'ten': 10,
+        'eleven': 11,
+        'twelve': 12,
+        'thirteen': 13,
+        'fourteen': 14,
+        'fifteen': 15,
+        'sixteen': 16,
+        'seventeen': 17,
+        'eighteen': 18,
+        'ninteen': 19
+    }
+
+    tens = {
+        'twenty': 20,
+        'thirty': 30,
+        'fourty': 40,
+        'fifty': 50,
+        'sixty': 60,
+        'seventy': 70,
+        'eighty': 80,
+        'ninty': 90
+    }
+
+    hundred = {
+        'hundred': 100
+    }
+
+    exp = {
+        'thousand': 1000,
+        'million': 1000000,
+        'billion': 1000000000
+    }
+
+    allowed = [
+        'and'
+    ]
+
+    def __init__(self):
+        self.number_words = []
+        for word in self.zero:
+            self.number_words.append(word)
+        for word in self.ones:
+            self.number_words.append(word)
+        for word in self.special_ones:
+            self.number_words.append(word)
+        for word in self.tens:
+            self.number_words.append(word)
+        for word in self.hundred:
+            self.number_words.append(word)
+        for word in self.exp:
+            self.number_words.append(word)
+        self.mandatory_number_words = self.number_words.copy()
+        for word in self.allowed:
+            self.number_words.append(word)
+
+    def parse_number(self, text_line):
+        """
+        Parse numbers from natural language into ints
+
+        TODO: Throw more exceptions when invalid numbers are detected.  Only
+        allow certian valueless words within numbers.  Support zero.
+        """
+        value = 0
+        partial_value = 0
+        last_list = None
+
+        # Split text_line by commas, whitespace, and hyphens
+        text_line = text_line.strip()
+        text_words = re.split(r'[,\s-]+', text_line)
+        # Parse the number
+        for word in text_words:
+            if word in self.zero:
+                if last_list is not None:
+                    raise ValueError('Invalid number')
+                value = 0
+                last_list = self.zero
+            elif word in self.ones:
+                if last_list in (self.zero, self.ones, self.special_ones):
+                    raise ValueError('Invalid number')
+                value += self.ones[word]
+                last_list = self.ones
+            elif word in self.special_ones:
+                if last_list in (self.zero, self.ones, self.special_ones, self.tens):
+                    raise ValueError('Invalid number')
+                value += self.special_ones[word]
+                last_list = self.special_ones
+            elif word in self.tens:
+                if last_list in (self.zero, self.ones, self.special_ones, self.tens):
+                    raise ValueError('Invalid number')
+                value += self.tens[word]
+                last_list = self.tens
+            elif word in self.hundred:
+                if last_list not in (self.ones, self.special_ones, self.tens):
+                    raise ValueError('Invalid number')
+                value *= self.hundred[word]
+                last_list = self.hundred
+            elif word in self.exp:
+                if last_list in (self.zero, self.exp):
+                    raise ValueError('Invalid number')
+                partial_value += value * self.exp[word]
+                value = 0
+                last_list = self.exp
+            elif word not in self.allowed:
+                raise ValueError('Invalid number')
+            # Debugging information
+            #print(word, value, partial_value)
+        # Finish parsing the number
+        value += partial_value
+        return value
+
+    def parse_all_numbers(self, text_line):
+        nums = []
+        t_numless = ''
+        current_num = ''
+
+        # Split text_line by commas, whitespace, and hyphens
+        text_line = text_line.strip()
+        text_words = re.split(r'[,\s-]+', text_line)
+        for word in text_words:
+            # If we aren't starting a number, add the word to the result string
+            if word not in self.mandatory_number_words:
+                if current_num:
+                    if word in self.number_words:
+                        current_num += word + ' '
+                    else:
+                        try:
+                            nums.append(self.parse_number(current_num))
+                        except ValueError:
+                            nums.append(-1)
+                        current_num = ''
+                        t_numless += '%d' + ' '
+                if not current_num:
+                    t_numless += word + ' '
+            else:
+                # We're parsing a number now
+                current_num += word + ' '
+        if current_num:
+            try:
+                nums.append(self.parse_number(current_num))
+            except ValueError:
+                nums.append(-1)
+            current_num = ''
+            t_numless += '%d' + ' '
+
+        return (t_numless.strip(), nums)
+
+if __name__ == '__main__':
+    np = NumberParser()
+    # Get the words to translate
+    text_line = input('Enter a number: ')
+    # Parse it to an integer
+    value = np.parse_all_numbers(text_line)
+    # Print the result
+    print('I claim that you meant the decimal number', value)
diff --git a/recognizer.py b/recognizer.py
index 3d6f4bf..4d60695 100644
--- a/recognizer.py
+++ b/recognizer.py
@@ -1,7 +1,7 @@
 # This is part of Kaylee
 # -- this code is licensed GPLv3
-# Copyright 2013 Jezra
 # Copyright 2015 Clayton G. Hobbs
+# Portions Copyright 2013 Jezra
 
 import os.path
 import sys
author	Clayton G. Hobbs <clay@lakeserv.net>	2015-12-30 23:33:29 -0500
committer	Clayton G. Hobbs <clay@lakeserv.net>	2015-12-30 23:33:29 -0500
commit	e19d76f0515b291f9c6994bfd0faccccf5b894aa (patch)
tree	db29ef0dbecee06e0788d9f72c9d327caa1b1479
parent	57f58295a48dfa4d893eb1546c5f2f64133c0e7f (diff)