diff options
Diffstat (limited to 'kayleevc')
-rw-r--r-- | kayleevc/__init__.py | 0 | ||||
-rw-r--r-- | kayleevc/gui.py | 213 | ||||
-rw-r--r-- | kayleevc/kaylee.py | 207 | ||||
-rw-r--r-- | kayleevc/numbers.py | 183 | ||||
-rw-r--r-- | kayleevc/recognizer.py | 69 | ||||
-rw-r--r-- | kayleevc/util.py | 204 |
6 files changed, 0 insertions, 876 deletions
diff --git a/kayleevc/__init__.py b/kayleevc/__init__.py deleted file mode 100644 index e69de29..0000000 --- a/kayleevc/__init__.py +++ /dev/null diff --git a/kayleevc/gui.py b/kayleevc/gui.py deleted file mode 100644 index 27085a8..0000000 --- a/kayleevc/gui.py +++ /dev/null @@ -1,213 +0,0 @@ -# This is part of Kaylee -# -- this code is licensed GPLv3 -# Copyright 2015-2016 Clayton G. Hobbs -# Portions Copyright 2013 Jezra - -import sys -import gi -from gi.repository import GObject -# Gtk -gi.require_version('Gtk', '3.0') -from gi.repository import Gtk, Gdk - - -class GTKTrayInterface(GObject.GObject): - __gsignals__ = { - 'command' : (GObject.SIGNAL_RUN_LAST, GObject.TYPE_NONE, - (GObject.TYPE_STRING,)) - } - idle_text = "Kaylee - Idle" - listening_text = "Kaylee - Listening" - - def __init__(self, args, continuous): - GObject.GObject.__init__(self) - self.continuous = continuous - - self.statusicon = Gtk.StatusIcon() - self.statusicon.set_title("Kaylee") - self.statusicon.set_name("Kaylee") - self.statusicon.set_tooltip_text(self.idle_text) - self.statusicon.set_has_tooltip(True) - self.statusicon.connect("activate", self.continuous_toggle) - self.statusicon.connect("popup-menu", self.popup_menu) - - self.menu = Gtk.Menu() - self.menu_listen = Gtk.MenuItem('Listen') - self.menu_continuous = Gtk.CheckMenuItem('Continuous') - self.menu_quit = Gtk.MenuItem('Quit') - self.menu.append(self.menu_listen) - self.menu.append(self.menu_continuous) - self.menu.append(self.menu_quit) - self.menu_listen.connect("activate", self.toggle_listen) - self.menu_continuous.connect("toggled", self.toggle_continuous) - self.menu_quit.connect("activate", self.quit) - self.menu.show_all() - - def continuous_toggle(self, item): - checked = self.menu_continuous.get_active() - self.menu_continuous.set_active(not checked) - - def toggle_continuous(self, item): - checked = self.menu_continuous.get_active() - self.menu_listen.set_sensitive(not checked) - if checked: - self.menu_listen.set_label("Listen") - self.emit('command', "continuous_listen") - self.statusicon.set_tooltip_text(self.listening_text) - self.set_icon_active() - else: - self.set_icon_inactive() - self.statusicon.set_tooltip_text(self.idle_text) - self.emit('command', "continuous_stop") - - def toggle_listen(self, item): - val = self.menu_listen.get_label() - if val == "Listen": - self.set_icon_active() - self.emit("command", "listen") - self.menu_listen.set_label("Stop") - self.statusicon.set_tooltip_text(self.listening_text) - else: - self.set_icon_inactive() - self.menu_listen.set_label("Listen") - self.emit("command", "stop") - self.statusicon.set_tooltip_text(self.idle_text) - - def popup_menu(self, item, button, time): - self.menu.popup(None, None, Gtk.StatusIcon.position_menu, item, button, time) - - def run(self): - # Set the icon - self.set_icon_inactive() - if self.continuous: - self.menu_continuous.set_active(True) - self.set_icon_active() - else: - self.menu_continuous.set_active(False) - self.statusicon.set_visible(True) - - def quit(self, item): - self.statusicon.set_visible(False) - self.emit("command", "quit") - - def finished(self, text): - if not self.menu_continuous.get_active(): - self.menu_listen.set_label("Listen") - self.set_icon_inactive() - self.statusicon.set_tooltip_text(self.idle_text) - - def set_icon_active_asset(self, i): - self.icon_active = i - - def set_icon_inactive_asset(self, i): - self.icon_inactive = i - - def set_icon_active(self): - self.statusicon.set_from_file(self.icon_active) - - def set_icon_inactive(self): - self.statusicon.set_from_file(self.icon_inactive) - - -class GTKInterface(GObject.GObject): - __gsignals__ = { - 'command': (GObject.SIGNAL_RUN_LAST, GObject.TYPE_NONE, - (GObject.TYPE_STRING,)) - } - - def __init__(self, args, continuous): - GObject.GObject.__init__(self) - self.continuous = continuous - # Make a window - self.window = Gtk.Window(Gtk.WindowType.TOPLEVEL) - self.window.connect("delete_event", self.delete_event) - # Give the window a name - self.window.set_title("Kaylee") - self.window.set_resizable(False) - - layout = Gtk.VBox() - self.window.add(layout) - # Make a listen/stop button - self.lsbutton = Gtk.Button("Listen") - layout.add(self.lsbutton) - # Make a continuous button - self.ccheckbox = Gtk.CheckButton("Continuous Listen") - layout.add(self.ccheckbox) - - # Connect the buttons - self.lsbutton.connect("clicked", self.lsbutton_clicked) - self.ccheckbox.connect("clicked", self.ccheckbox_clicked) - - # Add a label to the UI to display the last command - self.label = Gtk.Label() - layout.add(self.label) - - # Create an accellerator group for this window - accel = Gtk.AccelGroup() - # Add the ctrl+q to quit - accel.connect(Gdk.keyval_from_name('q'), Gdk.ModifierType.CONTROL_MASK, - Gtk.AccelFlags.VISIBLE, self.accel_quit) - # Lock the group - accel.lock() - # Add the group to the window - self.window.add_accel_group(accel) - - def ccheckbox_clicked(self, widget): - checked = self.ccheckbox.get_active() - self.lsbutton.set_sensitive(not checked) - if checked: - self.lsbutton_stopped() - self.emit('command', "continuous_listen") - self.set_icon_active() - else: - self.emit('command', "continuous_stop") - self.set_icon_inactive() - - def lsbutton_stopped(self): - self.lsbutton.set_label("Listen") - - def lsbutton_clicked(self, button): - val = self.lsbutton.get_label() - if val == "Listen": - self.emit("command", "listen") - self.lsbutton.set_label("Stop") - # Clear the label - self.label.set_text("") - self.set_icon_active() - else: - self.lsbutton_stopped() - self.emit("command", "stop") - self.set_icon_inactive() - - def run(self): - # Set the default icon - self.set_icon_inactive() - self.window.show_all() - if self.continuous: - self.set_icon_active() - self.ccheckbox.set_active(True) - - def accel_quit(self, accel_group, acceleratable, keyval, modifier): - self.emit("command", "quit") - - def delete_event(self, x, y): - self.emit("command", "quit") - - def finished(self, text): - # If the continuous isn't pressed - if not self.ccheckbox.get_active(): - self.lsbutton_stopped() - self.set_icon_inactive() - self.label.set_text(text) - - def set_icon_active_asset(self, i): - self.icon_active = i - - def set_icon_inactive_asset(self, i): - self.icon_inactive = i - - def set_icon_active(self): - Gtk.Window.set_default_icon_from_file(self.icon_active) - - def set_icon_inactive(self): - Gtk.Window.set_default_icon_from_file(self.icon_inactive) diff --git a/kayleevc/kaylee.py b/kayleevc/kaylee.py deleted file mode 100644 index 4e99d1a..0000000 --- a/kayleevc/kaylee.py +++ /dev/null @@ -1,207 +0,0 @@ -# This is part of Kaylee -# -- this code is licensed GPLv3 -# Copyright 2015-2016 Clayton G. Hobbs -# Portions Copyright 2013 Jezra - -import sys -import signal -import os.path -import subprocess -from gi.repository import GObject, GLib - -from kayleevc.recognizer import Recognizer -from kayleevc.util import * -from kayleevc.numbers import NumberParser - - -class Kaylee: - - def __init__(self): - self.ui = None - self.options = {} - self.continuous_listen = False - - # Load configuration - self.config = Config() - self.options = vars(self.config.options) - self.commands = self.options['commands'] - - # Create number parser for later use - self.number_parser = NumberParser() - - # Create a hasher - self.hasher = Hasher(self.config) - - # Create the strings file - self.update_voice_commands_if_changed() - - if self.options['interface']: - if self.options['interface'] == "g": - from kayleevc.gui import GTKInterface as UI - elif self.options['interface'] == "gt": - from kayleevc.gui import GTKTrayInterface as UI - else: - print("no GUI defined") - sys.exit() - - self.ui = UI(self.options, self.options['continuous']) - self.ui.connect("command", self.process_command) - # Can we load the icon resource? - icon = self.load_resource("icon_small.png") - if icon: - self.ui.set_icon_active_asset(icon) - # Can we load the icon_inactive resource? - icon_inactive = self.load_resource("icon_inactive_small.png") - if icon_inactive: - self.ui.set_icon_inactive_asset(icon_inactive) - - if self.options['history']: - self.history = [] - - # Update the language if necessary - self.language_updater = LanguageUpdater(self.config) - self.language_updater.update_language_if_changed() - - # Create the recognizer - self.recognizer = Recognizer(self.config) - self.recognizer.connect('finished', self.recognizer_finished) - - def update_voice_commands_if_changed(self): - """Use hashes to test if the voice commands have changed""" - stored_hash = self.hasher['voice_commands'] - - # Calculate the hash the voice commands have right now - hasher = self.hasher.get_hash_object() - for voice_cmd in self.commands.keys(): - hasher.update(voice_cmd.encode('utf-8')) - # Add a separator to avoid odd behavior - hasher.update('\n'.encode('utf-8')) - new_hash = hasher.hexdigest() - - if new_hash != stored_hash: - self.create_strings_file() - self.hasher['voice_commands'] = new_hash - self.hasher.store() - - def create_strings_file(self): - # Open the strings file - with open(self.config.strings_file, 'w') as strings: - # Add command words to the corpus - for voice_cmd in sorted(self.commands.keys()): - strings.write(voice_cmd.strip().replace('%d', '') + "\n") - # Add number words to the corpus - for word in self.number_parser.number_words: - strings.write(word + " ") - strings.write("\n") - - def log_history(self, text): - if self.options['history']: - self.history.append(text) - if len(self.history) > self.options['history']: - # Pop off the first item - self.history.pop(0) - - # Open and truncate the history file - with open(self.config.history_file, 'w') as hfile: - for line in self.history: - hfile.write(line + '\n') - - def run_command(self, cmd): - """Print the command, then run it""" - print(cmd) - subprocess.call(cmd, shell=True) - - def recognizer_finished(self, recognizer, text): - t = text.lower() - numt, nums = self.number_parser.parse_all_numbers(t) - # Is there a matching command? - if t in self.commands: - # Run the valid_sentence_command if it's set - if self.options['valid_sentence_command']: - subprocess.call(self.options['valid_sentence_command'], - shell=True) - cmd = self.commands[t] - # Should we be passing words? - if self.options['pass_words']: - cmd += " " + t - self.run_command(cmd) - self.log_history(text) - elif numt in self.commands: - # Run the valid_sentence_command if it's set - if self.options['valid_sentence_command']: - subprocess.call(self.options['valid_sentence_command'], - shell=True) - cmd = self.commands[numt] - cmd = cmd.format(*nums) - # Should we be passing words? - if self.options['pass_words']: - cmd += " " + t - self.run_command(cmd) - self.log_history(text) - else: - # Run the invalid_sentence_command if it's set - if self.options['invalid_sentence_command']: - subprocess.call(self.options['invalid_sentence_command'], - shell=True) - print("no matching command {0}".format(t)) - # If there is a UI and we are not continuous listen - if self.ui: - if not self.continuous_listen: - # Stop listening - self.recognizer.pause() - # Let the UI know that there is a finish - self.ui.finished(t) - - def run(self): - if self.ui: - self.ui.run() - else: - self.recognizer.listen() - - def quit(self): - sys.exit() - - def process_command(self, UI, command): - print(command) - if command == "listen": - self.recognizer.listen() - elif command == "stop": - self.recognizer.pause() - elif command == "continuous_listen": - self.continuous_listen = True - self.recognizer.listen() - elif command == "continuous_stop": - self.continuous_listen = False - self.recognizer.pause() - elif command == "quit": - self.quit() - - def load_resource(self, string): - # TODO: Use the Config object for this path management - local_data = os.path.join(os.path.dirname(__file__), '..', 'data') - paths = ["/usr/share/kaylee/", "/usr/local/share/kaylee", local_data] - for path in paths: - resource = os.path.join(path, string) - if os.path.exists(resource): - return resource - # If we get this far, no resource was found - return False - - -def run(): - # Make our kaylee object - kaylee = Kaylee() - # Init gobject threads - GObject.threads_init() - # We want a main loop - main_loop = GObject.MainLoop() - # Handle sigint - signal.signal(signal.SIGINT, signal.SIG_DFL) - # Run the kaylee - kaylee.run() - # Start the main loop - try: - main_loop.run() - except: - main_loop.quit() - sys.exit() diff --git a/kayleevc/numbers.py b/kayleevc/numbers.py deleted file mode 100644 index be0036f..0000000 --- a/kayleevc/numbers.py +++ /dev/null @@ -1,183 +0,0 @@ -#!/usr/bin/env python -# This is part of Kaylee -# -- this code is licensed GPLv3 -# Copyright 2015-2016 Clayton G. Hobbs -# Portions Copyright 2013 Jezra - -import re - - -class NumberParser: - """Parses integers from English strings""" - - zero = { - 'zero': 0 - } - - ones = { - 'one': 1, - 'two': 2, - 'three': 3, - 'four': 4, - 'five': 5, - 'six': 6, - 'seven': 7, - 'eight': 8, - 'nine': 9 - } - - special_ones = { - 'ten': 10, - 'eleven': 11, - 'twelve': 12, - 'thirteen': 13, - 'fourteen': 14, - 'fifteen': 15, - 'sixteen': 16, - 'seventeen': 17, - 'eighteen': 18, - 'ninteen': 19 - } - - tens = { - 'twenty': 20, - 'thirty': 30, - 'forty': 40, - 'fifty': 50, - 'sixty': 60, - 'seventy': 70, - 'eighty': 80, - 'ninty': 90 - } - - hundred = { - 'hundred': 100 - } - - exp = { - 'thousand': 1000, - 'million': 1000000, - 'billion': 1000000000 - } - - allowed = [ - 'and' - ] - - def __init__(self): - self.number_words = [] - for word in sorted(self.zero.keys()): - self.number_words.append(word) - for word in sorted(self.ones.keys()): - self.number_words.append(word) - for word in sorted(self.special_ones.keys()): - self.number_words.append(word) - for word in sorted(self.tens.keys()): - self.number_words.append(word) - for word in sorted(self.hundred.keys()): - self.number_words.append(word) - for word in sorted(self.exp.keys()): - self.number_words.append(word) - self.mandatory_number_words = self.number_words.copy() - for word in sorted(self.allowed): - self.number_words.append(word) - - def parse_number(self, text_line): - """Parse a number from English into an int""" - value = 0 - partial_value = 0 - last_list = None - - # Split text_line by commas, whitespace, and hyphens - text_line = text_line.strip() - text_words = re.split(r'[,\s-]+', text_line) - # Parse the number - for word in text_words: - if word in self.zero: - if last_list is not None: - raise ValueError('Invalid number') - value = 0 - last_list = self.zero - elif word in self.ones: - if last_list in (self.zero, self.ones, self.special_ones): - raise ValueError('Invalid number') - value += self.ones[word] - last_list = self.ones - elif word in self.special_ones: - if last_list in (self.zero, self.ones, self.special_ones, self.tens): - raise ValueError('Invalid number') - value += self.special_ones[word] - last_list = self.special_ones - elif word in self.tens: - if last_list in (self.zero, self.ones, self.special_ones, self.tens): - raise ValueError('Invalid number') - value += self.tens[word] - last_list = self.tens - elif word in self.hundred: - if last_list not in (self.ones, self.special_ones, self.tens): - raise ValueError('Invalid number') - value *= self.hundred[word] - last_list = self.hundred - elif word in self.exp: - if last_list in (self.zero, self.exp): - raise ValueError('Invalid number') - partial_value += value * self.exp[word] - value = 0 - last_list = self.exp - elif word not in self.allowed: - raise ValueError('Invalid number') - # Debugging information - #print(word, value, partial_value) - # Finish parsing the number - value += partial_value - return value - - def parse_all_numbers(self, text_line): - """ - Parse all numbers from English to ints - - Returns a tuple whose first element is text_line with all English - numbers replaced with "%d", and whose second element is a list - containing all the parsed numbers as ints. - """ - nums = [] - t_numless = '' - - # Split text_line by commas, whitespace, and hyphens - text_words = re.split(r'[,\s-]+', text_line.strip()) - # Get a string of word classes - tw_classes = '' - for word in text_words: - if word in self.mandatory_number_words: - tw_classes += 'm' - elif word in self.allowed: - tw_classes += 'a' - else: - tw_classes += 'w' - - # For each string of number words: - last_end = 0 - for m in re.finditer('m[am]*m|m', tw_classes): - # Get the number words - num_words = ' '.join(text_words[m.start():m.end()]) - # Parse the number and store the value - try: - nums.append(self.parse_number(num_words)) - except ValueError: - nums.append(-1) - # Add words to t_numless - t_numless += ' '.join(text_words[last_end:m.start()]) + ' %d ' - last_end = m.end() - t_numless += ' '.join(text_words[last_end:]) - - return (t_numless.strip(), nums) - - -if __name__ == '__main__': - np = NumberParser() - # Get the words to translate - text_line = input('Enter a string: ') - # Parse it to an integer - value = np.parse_all_numbers(text_line) - # Print the result - print(value) diff --git a/kayleevc/recognizer.py b/kayleevc/recognizer.py deleted file mode 100644 index 09e14e4..0000000 --- a/kayleevc/recognizer.py +++ /dev/null @@ -1,69 +0,0 @@ -# This is part of Kaylee -# -- this code is licensed GPLv3 -# Copyright 2015-2016 Clayton G. Hobbs -# Portions Copyright 2013 Jezra - -import os.path -import sys - -import gi -gi.require_version('Gst', '1.0') -from gi.repository import GObject, Gst -GObject.threads_init() -Gst.init(None) - - -class Recognizer(GObject.GObject): - __gsignals__ = { - 'finished' : (GObject.SIGNAL_RUN_LAST, GObject.TYPE_NONE, - (GObject.TYPE_STRING,)) - } - - def __init__(self, config): - GObject.GObject.__init__(self) - self.commands = {} - - src = config.options.microphone - if src: - audio_src = 'alsasrc device="hw:{0},0"'.format(src) - else: - audio_src = 'autoaudiosrc' - - # Build the pipeline - cmd = ( - audio_src + - ' ! audioconvert' + - ' ! audioresample' + - ' ! pocketsphinx lm=' + config.lang_file + ' dict=' + - config.dic_file + - ' ! appsink sync=false' - ) - try: - self.pipeline = Gst.parse_launch(cmd) - except Exception as e: - print(e.message) - print("You may need to install gstreamer1.0-pocketsphinx") - raise e - - # Process results from the pipeline with self.result() - bus = self.pipeline.get_bus() - bus.add_signal_watch() - bus.connect('message::element', self.result) - - def listen(self): - self.pipeline.set_state(Gst.State.PLAYING) - - def pause(self): - self.pipeline.set_state(Gst.State.PAUSED) - - def result(self, bus, msg): - msg_struct = msg.get_structure() - # Ignore messages that aren't from pocketsphinx - msgtype = msg_struct.get_name() - if msgtype != 'pocketsphinx': - return - - # If we have a final command, send it for processing - command = msg_struct.get_string('hypothesis') - if command != '' and msg_struct.get_boolean('final')[1]: - self.emit("finished", command) diff --git a/kayleevc/util.py b/kayleevc/util.py deleted file mode 100644 index 7984dc3..0000000 --- a/kayleevc/util.py +++ /dev/null @@ -1,204 +0,0 @@ -# This is part of Kaylee -# -- this code is licensed GPLv3 -# Copyright 2015-2016 Clayton G. Hobbs -# Portions Copyright 2013 Jezra - -import re -import json -import hashlib -import os -from argparse import ArgumentParser, Namespace - -import requests - -from gi.repository import GLib - - -class Config: - """Keep track of the configuration of Kaylee""" - # Name of the program, for later use - program_name = "kaylee" - - # Directories - conf_dir = os.path.join(GLib.get_user_config_dir(), program_name) - cache_dir = os.path.join(GLib.get_user_cache_dir(), program_name) - data_dir = os.path.join(GLib.get_user_data_dir(), program_name) - - # Configuration files - opt_file = os.path.join(conf_dir, "options.json") - - # Cache files - history_file = os.path.join(cache_dir, program_name + "history") - hash_file = os.path.join(cache_dir, "hash.json") - - # Data files - strings_file = os.path.join(data_dir, "sentences.corpus") - lang_file = os.path.join(data_dir, 'lm') - dic_file = os.path.join(data_dir, 'dic') - - def __init__(self): - # Ensure necessary directories exist - self._make_dir(self.conf_dir) - self._make_dir(self.cache_dir) - self._make_dir(self.data_dir) - - # Set up the argument parser - self._parser = ArgumentParser() - self._parser.add_argument("-i", "--interface", type=str, - dest="interface", action='store', - help="Interface to use (if any). 'g' for GTK or 'gt' for GTK" + - " system tray icon") - - self._parser.add_argument("-c", "--continuous", - action="store_true", dest="continuous", default=False, - help="Start interface with 'continuous' listen enabled") - - self._parser.add_argument("-p", "--pass-words", - action="store_true", dest="pass_words", default=False, - help="Pass the recognized words as arguments to the shell" + - " command") - - self._parser.add_argument("-H", "--history", type=int, - action="store", dest="history", - help="Number of commands to store in history file") - - self._parser.add_argument("-m", "--microphone", type=int, - action="store", dest="microphone", default=None, - help="Audio input card to use (if other than system default)") - - self._parser.add_argument("--valid-sentence-command", type=str, - dest="valid_sentence_command", action='store', - help="Command to run when a valid sentence is detected") - - self._parser.add_argument("--invalid-sentence-command", type=str, - dest="invalid_sentence_command", action='store', - help="Command to run when an invalid sentence is detected") - - # Read the configuration file - self._read_options_file() - - # Parse command-line arguments, overriding config file as appropriate - self._parser.parse_args(namespace=self.options) - - def _make_dir(self, directory): - if not os.path.exists(directory): - os.makedirs(directory) - - def _read_options_file(self): - try: - with open(self.opt_file, 'r') as f: - self.options = json.load(f) - self.options = Namespace(**self.options) - except FileNotFoundError: - # Make an empty options namespace - self.options = Namespace() - - -class Hasher: - """Keep track of hashes for Kaylee""" - - def __init__(self, config): - self.config = config - try: - with open(self.config.hash_file, 'r') as f: - self.hashes = json.load(f) - except IOError: - # No stored hash - self.hashes = {} - - def __getitem__(self, hashname): - try: - return self.hashes[hashname] - except (KeyError, TypeError): - return None - - def __setitem__(self, hashname, value): - self.hashes[hashname] = value - - def get_hash_object(self): - """Returns an object to compute a new hash""" - return hashlib.sha256() - - def store(self): - """Store the current hashes into a the hash file""" - with open(self.config.hash_file, 'w') as f: - json.dump(self.hashes, f) - - -class LanguageUpdater: - """ - Handles updating the language using the online lmtool. - - This class provides methods to check if the corpus has changed, and to - update the language to match the new corpus using the lmtool. This allows - us to automatically update the language if the corpus has changed, saving - the user from having to do this manually. - """ - - def __init__(self, config): - self.config = config - self.hasher = Hasher(config) - - def update_language_if_changed(self): - """Test if the language has changed, and if it has, update it""" - if self.language_has_changed(): - self.update_language() - self.save_language_hash() - - def language_has_changed(self): - """Use hashes to test if the language has changed""" - self.stored_hash = self.hasher['language'] - - # Calculate the hash the language file has right now - hasher = self.hasher.get_hash_object() - with open(self.config.strings_file, 'rb') as sfile: - buf = sfile.read() - hasher.update(buf) - self.new_hash = hasher.hexdigest() - - return self.new_hash != self.stored_hash - - def update_language(self): - """Update the language using the online lmtool""" - print('Updating language using online lmtool') - - host = 'http://www.speech.cs.cmu.edu' - url = host + '/cgi-bin/tools/lmtool/run' - - # Submit the corpus to the lmtool - response_text = "" - with open(self.config.strings_file, 'rb') as corpus: - files = {'corpus': corpus} - values = {'formtype': 'simple'} - - r = requests.post(url, files=files, data=values) - response_text = r.text - - # Parse response to get URLs of the files we need - path_re = r'.*<title>Index of (.*?)</title>.*' - number_re = r'.*TAR([0-9]*?)\.tgz.*' - for line in response_text.split('\n'): - # If we found the directory, keep it and don't break - if re.search(path_re, line): - path = host + re.sub(path_re, r'\1', line) - # If we found the number, keep it and break - elif re.search(number_re, line): - number = re.sub(number_re, r'\1', line) - break - - lm_url = path + '/' + number + '.lm' - dic_url = path + '/' + number + '.dic' - - self._download_file(lm_url, self.config.lang_file) - self._download_file(dic_url, self.config.dic_file) - - def save_language_hash(self): - self.hasher['language'] = self.new_hash - self.hasher.store() - - def _download_file(self, url, path): - r = requests.get(url, stream=True) - if r.status_code == 200: - with open(path, 'wb') as f: - for chunk in r: - f.write(chunk) |