diff options
author | Irene Knapp <ireneista@irenes.space> | 2025-09-06 16:22:50 -0700 |
---|---|---|
committer | Irene Knapp <ireneista@irenes.space> | 2025-09-06 16:22:50 -0700 |
commit | 63ed2e5a680b6e0e352721a8238e2918cb3e70ff (patch) | |
tree | 3f3d679b82109c6101ecc14ff153e12516dd786a /src/pollyanna | |
parent | 18e38018cf4cc8f9689c453f5013426e60bf744a (diff) |
the vanity commit (change kaylee to pollyana everywhere)
except the copyright notices, those need more care and attention Force-Push: yes Change-Id: Ibddfb1b7a0edbb0d4adb6cfaf0ac4239537ade2f
Diffstat (limited to 'src/pollyanna')
-rw-r--r-- | src/pollyanna/__init__.py | 0 | ||||
-rw-r--r-- | src/pollyanna/gui.py | 213 | ||||
-rw-r--r-- | src/pollyanna/numbers.py | 183 | ||||
-rw-r--r-- | src/pollyanna/pollyanna.py | 209 | ||||
-rw-r--r-- | src/pollyanna/recognizer.py | 69 | ||||
-rw-r--r-- | src/pollyanna/util.py | 204 |
6 files changed, 878 insertions, 0 deletions
diff --git a/src/pollyanna/__init__.py b/src/pollyanna/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/src/pollyanna/__init__.py diff --git a/src/pollyanna/gui.py b/src/pollyanna/gui.py new file mode 100644 index 0000000..008110a --- /dev/null +++ b/src/pollyanna/gui.py @@ -0,0 +1,213 @@ +# This is part of Kaylee +# -- this code is licensed GPLv3 +# Copyright 2015-2016 Clayton G. Hobbs +# Portions Copyright 2013 Jezra + +import sys +import gi +from gi.repository import GObject +# Gtk +gi.require_version('Gtk', '3.0') +from gi.repository import Gtk, Gdk + + +class GTKTrayInterface(GObject.GObject): + __gsignals__ = { + 'command' : (GObject.SIGNAL_RUN_LAST, GObject.TYPE_NONE, + (GObject.TYPE_STRING,)) + } + idle_text = "Pollyanna - Idle" + listening_text = "Pollyanna - Listening" + + def __init__(self, args, continuous): + GObject.GObject.__init__(self) + self.continuous = continuous + + self.statusicon = Gtk.StatusIcon() + self.statusicon.set_title("Pollyanna") + self.statusicon.set_name("Pollyanna") + self.statusicon.set_tooltip_text(self.idle_text) + self.statusicon.set_has_tooltip(True) + self.statusicon.connect("activate", self.continuous_toggle) + self.statusicon.connect("popup-menu", self.popup_menu) + + self.menu = Gtk.Menu() + self.menu_listen = Gtk.MenuItem('Listen') + self.menu_continuous = Gtk.CheckMenuItem('Continuous') + self.menu_quit = Gtk.MenuItem('Quit') + self.menu.append(self.menu_listen) + self.menu.append(self.menu_continuous) + self.menu.append(self.menu_quit) + self.menu_listen.connect("activate", self.toggle_listen) + self.menu_continuous.connect("toggled", self.toggle_continuous) + self.menu_quit.connect("activate", self.quit) + self.menu.show_all() + + def continuous_toggle(self, item): + checked = self.menu_continuous.get_active() + self.menu_continuous.set_active(not checked) + + def toggle_continuous(self, item): + checked = self.menu_continuous.get_active() + self.menu_listen.set_sensitive(not checked) + if checked: + self.menu_listen.set_label("Listen") + self.emit('command', "continuous_listen") + self.statusicon.set_tooltip_text(self.listening_text) + self.set_icon_active() + else: + self.set_icon_inactive() + self.statusicon.set_tooltip_text(self.idle_text) + self.emit('command', "continuous_stop") + + def toggle_listen(self, item): + val = self.menu_listen.get_label() + if val == "Listen": + self.set_icon_active() + self.emit("command", "listen") + self.menu_listen.set_label("Stop") + self.statusicon.set_tooltip_text(self.listening_text) + else: + self.set_icon_inactive() + self.menu_listen.set_label("Listen") + self.emit("command", "stop") + self.statusicon.set_tooltip_text(self.idle_text) + + def popup_menu(self, item, button, time): + self.menu.popup(None, None, Gtk.StatusIcon.position_menu, item, button, time) + + def run(self): + # Set the icon + self.set_icon_inactive() + if self.continuous: + self.menu_continuous.set_active(True) + self.set_icon_active() + else: + self.menu_continuous.set_active(False) + self.statusicon.set_visible(True) + + def quit(self, item): + self.statusicon.set_visible(False) + self.emit("command", "quit") + + def finished(self, text): + if not self.menu_continuous.get_active(): + self.menu_listen.set_label("Listen") + self.set_icon_inactive() + self.statusicon.set_tooltip_text(self.idle_text) + + def set_icon_active_asset(self, i): + self.icon_active = i + + def set_icon_inactive_asset(self, i): + self.icon_inactive = i + + def set_icon_active(self): + self.statusicon.set_from_file(self.icon_active) + + def set_icon_inactive(self): + self.statusicon.set_from_file(self.icon_inactive) + + +class GTKInterface(GObject.GObject): + __gsignals__ = { + 'command': (GObject.SIGNAL_RUN_LAST, GObject.TYPE_NONE, + (GObject.TYPE_STRING,)) + } + + def __init__(self, args, continuous): + GObject.GObject.__init__(self) + self.continuous = continuous + # Make a window + self.window = Gtk.Window(Gtk.WindowType.TOPLEVEL) + self.window.connect("delete_event", self.delete_event) + # Give the window a name + self.window.set_title("Pollyanna") + self.window.set_resizable(False) + + layout = Gtk.VBox() + self.window.add(layout) + # Make a listen/stop button + self.lsbutton = Gtk.Button("Listen") + layout.add(self.lsbutton) + # Make a continuous button + self.ccheckbox = Gtk.CheckButton("Continuous Listen") + layout.add(self.ccheckbox) + + # Connect the buttons + self.lsbutton.connect("clicked", self.lsbutton_clicked) + self.ccheckbox.connect("clicked", self.ccheckbox_clicked) + + # Add a label to the UI to display the last command + self.label = Gtk.Label() + layout.add(self.label) + + # Create an accellerator group for this window + accel = Gtk.AccelGroup() + # Add the ctrl+q to quit + accel.connect(Gdk.keyval_from_name('q'), Gdk.ModifierType.CONTROL_MASK, + Gtk.AccelFlags.VISIBLE, self.accel_quit) + # Lock the group + accel.lock() + # Add the group to the window + self.window.add_accel_group(accel) + + def ccheckbox_clicked(self, widget): + checked = self.ccheckbox.get_active() + self.lsbutton.set_sensitive(not checked) + if checked: + self.lsbutton_stopped() + self.emit('command', "continuous_listen") + self.set_icon_active() + else: + self.emit('command', "continuous_stop") + self.set_icon_inactive() + + def lsbutton_stopped(self): + self.lsbutton.set_label("Listen") + + def lsbutton_clicked(self, button): + val = self.lsbutton.get_label() + if val == "Listen": + self.emit("command", "listen") + self.lsbutton.set_label("Stop") + # Clear the label + self.label.set_text("") + self.set_icon_active() + else: + self.lsbutton_stopped() + self.emit("command", "stop") + self.set_icon_inactive() + + def run(self): + # Set the default icon + self.set_icon_inactive() + self.window.show_all() + if self.continuous: + self.set_icon_active() + self.ccheckbox.set_active(True) + + def accel_quit(self, accel_group, acceleratable, keyval, modifier): + self.emit("command", "quit") + + def delete_event(self, x, y): + self.emit("command", "quit") + + def finished(self, text): + # If the continuous isn't pressed + if not self.ccheckbox.get_active(): + self.lsbutton_stopped() + self.set_icon_inactive() + self.label.set_text(text) + + def set_icon_active_asset(self, i): + self.icon_active = i + + def set_icon_inactive_asset(self, i): + self.icon_inactive = i + + def set_icon_active(self): + Gtk.Window.set_default_icon_from_file(self.icon_active) + + def set_icon_inactive(self): + Gtk.Window.set_default_icon_from_file(self.icon_inactive) diff --git a/src/pollyanna/numbers.py b/src/pollyanna/numbers.py new file mode 100644 index 0000000..be0036f --- /dev/null +++ b/src/pollyanna/numbers.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python +# This is part of Kaylee +# -- this code is licensed GPLv3 +# Copyright 2015-2016 Clayton G. Hobbs +# Portions Copyright 2013 Jezra + +import re + + +class NumberParser: + """Parses integers from English strings""" + + zero = { + 'zero': 0 + } + + ones = { + 'one': 1, + 'two': 2, + 'three': 3, + 'four': 4, + 'five': 5, + 'six': 6, + 'seven': 7, + 'eight': 8, + 'nine': 9 + } + + special_ones = { + 'ten': 10, + 'eleven': 11, + 'twelve': 12, + 'thirteen': 13, + 'fourteen': 14, + 'fifteen': 15, + 'sixteen': 16, + 'seventeen': 17, + 'eighteen': 18, + 'ninteen': 19 + } + + tens = { + 'twenty': 20, + 'thirty': 30, + 'forty': 40, + 'fifty': 50, + 'sixty': 60, + 'seventy': 70, + 'eighty': 80, + 'ninty': 90 + } + + hundred = { + 'hundred': 100 + } + + exp = { + 'thousand': 1000, + 'million': 1000000, + 'billion': 1000000000 + } + + allowed = [ + 'and' + ] + + def __init__(self): + self.number_words = [] + for word in sorted(self.zero.keys()): + self.number_words.append(word) + for word in sorted(self.ones.keys()): + self.number_words.append(word) + for word in sorted(self.special_ones.keys()): + self.number_words.append(word) + for word in sorted(self.tens.keys()): + self.number_words.append(word) + for word in sorted(self.hundred.keys()): + self.number_words.append(word) + for word in sorted(self.exp.keys()): + self.number_words.append(word) + self.mandatory_number_words = self.number_words.copy() + for word in sorted(self.allowed): + self.number_words.append(word) + + def parse_number(self, text_line): + """Parse a number from English into an int""" + value = 0 + partial_value = 0 + last_list = None + + # Split text_line by commas, whitespace, and hyphens + text_line = text_line.strip() + text_words = re.split(r'[,\s-]+', text_line) + # Parse the number + for word in text_words: + if word in self.zero: + if last_list is not None: + raise ValueError('Invalid number') + value = 0 + last_list = self.zero + elif word in self.ones: + if last_list in (self.zero, self.ones, self.special_ones): + raise ValueError('Invalid number') + value += self.ones[word] + last_list = self.ones + elif word in self.special_ones: + if last_list in (self.zero, self.ones, self.special_ones, self.tens): + raise ValueError('Invalid number') + value += self.special_ones[word] + last_list = self.special_ones + elif word in self.tens: + if last_list in (self.zero, self.ones, self.special_ones, self.tens): + raise ValueError('Invalid number') + value += self.tens[word] + last_list = self.tens + elif word in self.hundred: + if last_list not in (self.ones, self.special_ones, self.tens): + raise ValueError('Invalid number') + value *= self.hundred[word] + last_list = self.hundred + elif word in self.exp: + if last_list in (self.zero, self.exp): + raise ValueError('Invalid number') + partial_value += value * self.exp[word] + value = 0 + last_list = self.exp + elif word not in self.allowed: + raise ValueError('Invalid number') + # Debugging information + #print(word, value, partial_value) + # Finish parsing the number + value += partial_value + return value + + def parse_all_numbers(self, text_line): + """ + Parse all numbers from English to ints + + Returns a tuple whose first element is text_line with all English + numbers replaced with "%d", and whose second element is a list + containing all the parsed numbers as ints. + """ + nums = [] + t_numless = '' + + # Split text_line by commas, whitespace, and hyphens + text_words = re.split(r'[,\s-]+', text_line.strip()) + # Get a string of word classes + tw_classes = '' + for word in text_words: + if word in self.mandatory_number_words: + tw_classes += 'm' + elif word in self.allowed: + tw_classes += 'a' + else: + tw_classes += 'w' + + # For each string of number words: + last_end = 0 + for m in re.finditer('m[am]*m|m', tw_classes): + # Get the number words + num_words = ' '.join(text_words[m.start():m.end()]) + # Parse the number and store the value + try: + nums.append(self.parse_number(num_words)) + except ValueError: + nums.append(-1) + # Add words to t_numless + t_numless += ' '.join(text_words[last_end:m.start()]) + ' %d ' + last_end = m.end() + t_numless += ' '.join(text_words[last_end:]) + + return (t_numless.strip(), nums) + + +if __name__ == '__main__': + np = NumberParser() + # Get the words to translate + text_line = input('Enter a string: ') + # Parse it to an integer + value = np.parse_all_numbers(text_line) + # Print the result + print(value) diff --git a/src/pollyanna/pollyanna.py b/src/pollyanna/pollyanna.py new file mode 100644 index 0000000..295a1e5 --- /dev/null +++ b/src/pollyanna/pollyanna.py @@ -0,0 +1,209 @@ +# This is part of Kaylee +# -- this code is licensed GPLv3 +# Copyright 2015-2016 Clayton G. Hobbs +# Portions Copyright 2013 Jezra + +import sys +import signal +import os.path +import subprocess +from gi.repository import GObject, GLib + +from pollyanna.recognizer import Recognizer +from pollyanna.util import * +from pollyanna.numbers import NumberParser + + +class Pollyanna: + + def __init__(self): + self.ui = None + self.options = {} + self.continuous_listen = False + + # Load configuration + self.config = Config() + self.options = vars(self.config.options) + self.commands = self.options['commands'] + + # Create number parser for later use + self.number_parser = NumberParser() + + # Create a hasher + self.hasher = Hasher(self.config) + + # Create the strings file + self.update_voice_commands_if_changed() + + if self.options['interface']: + if self.options['interface'] == "g": + from pollyanna.gui import GTKInterface as UI + elif self.options['interface'] == "gt": + from pollyanna.gui import GTKTrayInterface as UI + else: + print("no GUI defined") + sys.exit() + + self.ui = UI(self.options, self.options['continuous']) + self.ui.connect("command", self.process_command) + # Can we load the icon resource? + icon = self.load_resource("icon_small.png") + if icon: + self.ui.set_icon_active_asset(icon) + # Can we load the icon_inactive resource? + icon_inactive = self.load_resource("icon_inactive_small.png") + if icon_inactive: + self.ui.set_icon_inactive_asset(icon_inactive) + + if self.options['history']: + self.history = [] + + # Update the language if necessary + self.language_updater = LanguageUpdater(self.config) + self.language_updater.update_language_if_changed() + + # Create the recognizer + self.recognizer = Recognizer(self.config) + self.recognizer.connect('finished', self.recognizer_finished) + + def update_voice_commands_if_changed(self): + """Use hashes to test if the voice commands have changed""" + stored_hash = self.hasher['voice_commands'] + + # Calculate the hash the voice commands have right now + hasher = self.hasher.get_hash_object() + for voice_cmd in self.commands.keys(): + hasher.update(voice_cmd.encode('utf-8')) + # Add a separator to avoid odd behavior + hasher.update('\n'.encode('utf-8')) + new_hash = hasher.hexdigest() + + if new_hash != stored_hash: + self.create_strings_file() + self.hasher['voice_commands'] = new_hash + self.hasher.store() + + def create_strings_file(self): + # Open the strings file + with open(self.config.strings_file, 'w') as strings: + # Add command words to the corpus + for voice_cmd in sorted(self.commands.keys()): + strings.write(voice_cmd.strip().replace('%d', '') + "\n") + # Add number words to the corpus + for word in self.number_parser.number_words: + strings.write(word + " ") + strings.write("\n") + + def log_history(self, text): + if self.options['history']: + self.history.append(text) + if len(self.history) > self.options['history']: + # Pop off the first item + self.history.pop(0) + + # Open and truncate the history file + with open(self.config.history_file, 'w') as hfile: + for line in self.history: + hfile.write(line + '\n') + + def run_command(self, cmd): + """Print the command, then run it""" + print(cmd) + subprocess.call(cmd, shell=True) + + def recognizer_finished(self, recognizer, text): + t = text.lower() + numt, nums = self.number_parser.parse_all_numbers(t) + # Is there a matching command? + if t in self.commands: + # Run the valid_sentence_command if it's set + if self.options['valid_sentence_command']: + subprocess.call(self.options['valid_sentence_command'], + shell=True) + cmd = self.commands[t] + # Should we be passing words? + if self.options['pass_words']: + cmd += " " + t + self.run_command(cmd) + self.log_history(text) + elif numt in self.commands: + # Run the valid_sentence_command if it's set + if self.options['valid_sentence_command']: + subprocess.call(self.options['valid_sentence_command'], + shell=True) + cmd = self.commands[numt] + cmd = cmd.format(*nums) + # Should we be passing words? + if self.options['pass_words']: + cmd += " " + t + self.run_command(cmd) + self.log_history(text) + else: + # Run the invalid_sentence_command if it's set + if self.options['invalid_sentence_command']: + subprocess.call(self.options['invalid_sentence_command'], + shell=True) + print("no matching command {0}".format(t)) + # If there is a UI and we are not continuous listen + if self.ui: + if not self.continuous_listen: + # Stop listening + self.recognizer.pause() + # Let the UI know that there is a finish + self.ui.finished(t) + + def run(self): + if self.ui: + self.ui.run() + else: + self.recognizer.listen() + + def quit(self): + sys.exit() + + def process_command(self, UI, command): + print(command) + if command == "listen": + self.recognizer.listen() + elif command == "stop": + self.recognizer.pause() + elif command == "continuous_listen": + self.continuous_listen = True + self.recognizer.listen() + elif command == "continuous_stop": + self.continuous_listen = False + self.recognizer.pause() + elif command == "quit": + self.quit() + + def load_resource(self, string): + # TODO: Use the Config object for this path management + local_data = os.path.join(os.path.dirname(__file__), '..', + 'usr', 'share', 'pollyanna') + paths = ["/usr/share/pollyanna/", "/usr/local/share/pollyanna", + local_data] + for path in paths: + resource = os.path.join(path, string) + if os.path.exists(resource): + return resource + # If we get this far, no resource was found + return False + + +def run(): + # Make our pollyanna object + pollyanna = Pollyanna() + # Init gobject threads + GObject.threads_init() + # We want a main loop + main_loop = GObject.MainLoop() + # Handle sigint + signal.signal(signal.SIGINT, signal.SIG_DFL) + # Run the pollyanna + pollyanna.run() + # Start the main loop + try: + main_loop.run() + except: + main_loop.quit() + sys.exit() diff --git a/src/pollyanna/recognizer.py b/src/pollyanna/recognizer.py new file mode 100644 index 0000000..09e14e4 --- /dev/null +++ b/src/pollyanna/recognizer.py @@ -0,0 +1,69 @@ +# This is part of Kaylee +# -- this code is licensed GPLv3 +# Copyright 2015-2016 Clayton G. Hobbs +# Portions Copyright 2013 Jezra + +import os.path +import sys + +import gi +gi.require_version('Gst', '1.0') +from gi.repository import GObject, Gst +GObject.threads_init() +Gst.init(None) + + +class Recognizer(GObject.GObject): + __gsignals__ = { + 'finished' : (GObject.SIGNAL_RUN_LAST, GObject.TYPE_NONE, + (GObject.TYPE_STRING,)) + } + + def __init__(self, config): + GObject.GObject.__init__(self) + self.commands = {} + + src = config.options.microphone + if src: + audio_src = 'alsasrc device="hw:{0},0"'.format(src) + else: + audio_src = 'autoaudiosrc' + + # Build the pipeline + cmd = ( + audio_src + + ' ! audioconvert' + + ' ! audioresample' + + ' ! pocketsphinx lm=' + config.lang_file + ' dict=' + + config.dic_file + + ' ! appsink sync=false' + ) + try: + self.pipeline = Gst.parse_launch(cmd) + except Exception as e: + print(e.message) + print("You may need to install gstreamer1.0-pocketsphinx") + raise e + + # Process results from the pipeline with self.result() + bus = self.pipeline.get_bus() + bus.add_signal_watch() + bus.connect('message::element', self.result) + + def listen(self): + self.pipeline.set_state(Gst.State.PLAYING) + + def pause(self): + self.pipeline.set_state(Gst.State.PAUSED) + + def result(self, bus, msg): + msg_struct = msg.get_structure() + # Ignore messages that aren't from pocketsphinx + msgtype = msg_struct.get_name() + if msgtype != 'pocketsphinx': + return + + # If we have a final command, send it for processing + command = msg_struct.get_string('hypothesis') + if command != '' and msg_struct.get_boolean('final')[1]: + self.emit("finished", command) diff --git a/src/pollyanna/util.py b/src/pollyanna/util.py new file mode 100644 index 0000000..6229669 --- /dev/null +++ b/src/pollyanna/util.py @@ -0,0 +1,204 @@ +# This is part of Kaylee +# -- this code is licensed GPLv3 +# Copyright 2015-2016 Clayton G. Hobbs +# Portions Copyright 2013 Jezra + +import re +import json +import hashlib +import os +from argparse import ArgumentParser, Namespace + +import requests + +from gi.repository import GLib + + +class Config: + """Keep track of the configuration of Pollyanna""" + # Name of the program, for later use + program_name = "pollyanna" + + # Directories + conf_dir = os.path.join(GLib.get_user_config_dir(), program_name) + cache_dir = os.path.join(GLib.get_user_cache_dir(), program_name) + data_dir = os.path.join(GLib.get_user_data_dir(), program_name) + + # Configuration files + opt_file = os.path.join(conf_dir, "options.json") + + # Cache files + history_file = os.path.join(cache_dir, program_name + "history") + hash_file = os.path.join(cache_dir, "hash.json") + + # Data files + strings_file = os.path.join(data_dir, "sentences.corpus") + lang_file = os.path.join(data_dir, 'lm') + dic_file = os.path.join(data_dir, 'dic') + + def __init__(self): + # Ensure necessary directories exist + self._make_dir(self.conf_dir) + self._make_dir(self.cache_dir) + self._make_dir(self.data_dir) + + # Set up the argument parser + self._parser = ArgumentParser() + self._parser.add_argument("-i", "--interface", type=str, + dest="interface", action='store', + help="Interface to use (if any). 'g' for GTK or 'gt' for GTK" + + " system tray icon") + + self._parser.add_argument("-c", "--continuous", + action="store_true", dest="continuous", default=False, + help="Start interface with 'continuous' listen enabled") + + self._parser.add_argument("-p", "--pass-words", + action="store_true", dest="pass_words", default=False, + help="Pass the recognized words as arguments to the shell" + + " command") + + self._parser.add_argument("-H", "--history", type=int, + action="store", dest="history", + help="Number of commands to store in history file") + + self._parser.add_argument("-m", "--microphone", type=int, + action="store", dest="microphone", default=None, + help="Audio input card to use (if other than system default)") + + self._parser.add_argument("--valid-sentence-command", type=str, + dest="valid_sentence_command", action='store', + help="Command to run when a valid sentence is detected") + + self._parser.add_argument("--invalid-sentence-command", type=str, + dest="invalid_sentence_command", action='store', + help="Command to run when an invalid sentence is detected") + + # Read the configuration file + self._read_options_file() + + # Parse command-line arguments, overriding config file as appropriate + self._parser.parse_args(namespace=self.options) + + def _make_dir(self, directory): + if not os.path.exists(directory): + os.makedirs(directory) + + def _read_options_file(self): + try: + with open(self.opt_file, 'r') as f: + self.options = json.load(f) + self.options = Namespace(**self.options) + except FileNotFoundError: + # Make an empty options namespace + self.options = Namespace() + + +class Hasher: + """Keep track of hashes for Pollyanna""" + + def __init__(self, config): + self.config = config + try: + with open(self.config.hash_file, 'r') as f: + self.hashes = json.load(f) + except IOError: + # No stored hash + self.hashes = {} + + def __getitem__(self, hashname): + try: + return self.hashes[hashname] + except (KeyError, TypeError): + return None + + def __setitem__(self, hashname, value): + self.hashes[hashname] = value + + def get_hash_object(self): + """Returns an object to compute a new hash""" + return hashlib.sha256() + + def store(self): + """Store the current hashes into a the hash file""" + with open(self.config.hash_file, 'w') as f: + json.dump(self.hashes, f) + + +class LanguageUpdater: + """ + Handles updating the language using the online lmtool. + + This class provides methods to check if the corpus has changed, and to + update the language to match the new corpus using the lmtool. This allows + us to automatically update the language if the corpus has changed, saving + the user from having to do this manually. + """ + + def __init__(self, config): + self.config = config + self.hasher = Hasher(config) + + def update_language_if_changed(self): + """Test if the language has changed, and if it has, update it""" + if self.language_has_changed(): + self.update_language() + self.save_language_hash() + + def language_has_changed(self): + """Use hashes to test if the language has changed""" + self.stored_hash = self.hasher['language'] + + # Calculate the hash the language file has right now + hasher = self.hasher.get_hash_object() + with open(self.config.strings_file, 'rb') as sfile: + buf = sfile.read() + hasher.update(buf) + self.new_hash = hasher.hexdigest() + + return self.new_hash != self.stored_hash + + def update_language(self): + """Update the language using the online lmtool""" + print('Updating language using online lmtool') + + host = 'http://www.speech.cs.cmu.edu' + url = host + '/cgi-bin/tools/lmtool/run' + + # Submit the corpus to the lmtool + response_text = "" + with open(self.config.strings_file, 'rb') as corpus: + files = {'corpus': corpus} + values = {'formtype': 'simple'} + + r = requests.post(url, files=files, data=values) + response_text = r.text + + # Parse response to get URLs of the files we need + path_re = r'.*<title>Index of (.*?)</title>.*' + number_re = r'.*TAR([0-9]*?)\.tgz.*' + for line in response_text.split('\n'): + # If we found the directory, keep it and don't break + if re.search(path_re, line): + path = host + re.sub(path_re, r'\1', line) + # If we found the number, keep it and break + elif re.search(number_re, line): + number = re.sub(number_re, r'\1', line) + break + + lm_url = path + '/' + number + '.lm' + dic_url = path + '/' + number + '.dic' + + self._download_file(lm_url, self.config.lang_file) + self._download_file(dic_url, self.config.dic_file) + + def save_language_hash(self): + self.hasher['language'] = self.new_hash + self.hasher.store() + + def _download_file(self, url, path): + r = requests.get(url, stream=True) + if r.status_code == 200: + with open(path, 'wb') as f: + for chunk in r: + f.write(chunk) |