summary refs log tree commit diff
path: root/src/pollyanna/util.py
diff options
context:
space:
mode:
authorIrene Knapp <ireneista@irenes.space>2025-09-06 16:22:50 -0700
committerIrene Knapp <ireneista@irenes.space>2025-09-06 16:22:50 -0700
commit63ed2e5a680b6e0e352721a8238e2918cb3e70ff (patch)
tree3f3d679b82109c6101ecc14ff153e12516dd786a /src/pollyanna/util.py
parent18e38018cf4cc8f9689c453f5013426e60bf744a (diff)
the vanity commit (change kaylee to pollyana everywhere)
except the copyright notices, those need more care and attention

Force-Push: yes
Change-Id: Ibddfb1b7a0edbb0d4adb6cfaf0ac4239537ade2f
Diffstat (limited to 'src/pollyanna/util.py')
-rw-r--r--src/pollyanna/util.py204
1 files changed, 204 insertions, 0 deletions
diff --git a/src/pollyanna/util.py b/src/pollyanna/util.py
new file mode 100644
index 0000000..6229669
--- /dev/null
+++ b/src/pollyanna/util.py
@@ -0,0 +1,204 @@
+# This is part of Kaylee
+# -- this code is licensed GPLv3
+# Copyright 2015-2016 Clayton G. Hobbs
+# Portions Copyright 2013 Jezra
+
+import re
+import json
+import hashlib
+import os
+from argparse import ArgumentParser, Namespace
+
+import requests
+
+from gi.repository import GLib
+
+
+class Config:
+    """Keep track of the configuration of Pollyanna"""
+    # Name of the program, for later use
+    program_name = "pollyanna"
+
+    # Directories
+    conf_dir = os.path.join(GLib.get_user_config_dir(), program_name)
+    cache_dir = os.path.join(GLib.get_user_cache_dir(), program_name)
+    data_dir = os.path.join(GLib.get_user_data_dir(), program_name)
+
+    # Configuration files
+    opt_file = os.path.join(conf_dir, "options.json")
+
+    # Cache files
+    history_file = os.path.join(cache_dir, program_name + "history")
+    hash_file = os.path.join(cache_dir, "hash.json")
+
+    # Data files
+    strings_file = os.path.join(data_dir, "sentences.corpus")
+    lang_file = os.path.join(data_dir, 'lm')
+    dic_file = os.path.join(data_dir, 'dic')
+
+    def __init__(self):
+        # Ensure necessary directories exist
+        self._make_dir(self.conf_dir)
+        self._make_dir(self.cache_dir)
+        self._make_dir(self.data_dir)
+
+        # Set up the argument parser
+        self._parser = ArgumentParser()
+        self._parser.add_argument("-i", "--interface", type=str,
+                dest="interface", action='store',
+                help="Interface to use (if any). 'g' for GTK or 'gt' for GTK" +
+                " system tray icon")
+
+        self._parser.add_argument("-c", "--continuous",
+                action="store_true", dest="continuous", default=False,
+                help="Start interface with 'continuous' listen enabled")
+
+        self._parser.add_argument("-p", "--pass-words",
+                action="store_true", dest="pass_words", default=False,
+                help="Pass the recognized words as arguments to the shell" +
+                " command")
+
+        self._parser.add_argument("-H", "--history", type=int,
+                action="store", dest="history",
+                help="Number of commands to store in history file")
+
+        self._parser.add_argument("-m", "--microphone", type=int,
+                action="store", dest="microphone", default=None,
+                help="Audio input card to use (if other than system default)")
+
+        self._parser.add_argument("--valid-sentence-command", type=str,
+                dest="valid_sentence_command", action='store',
+                help="Command to run when a valid sentence is detected")
+
+        self._parser.add_argument("--invalid-sentence-command", type=str,
+                dest="invalid_sentence_command", action='store',
+                help="Command to run when an invalid sentence is detected")
+
+        # Read the configuration file
+        self._read_options_file()
+
+        # Parse command-line arguments, overriding config file as appropriate
+        self._parser.parse_args(namespace=self.options)
+
+    def _make_dir(self, directory):
+        if not os.path.exists(directory):
+            os.makedirs(directory)
+
+    def _read_options_file(self):
+        try:
+            with open(self.opt_file, 'r') as f:
+                self.options = json.load(f)
+                self.options = Namespace(**self.options)
+        except FileNotFoundError:
+            # Make an empty options namespace
+            self.options = Namespace()
+
+
+class Hasher:
+    """Keep track of hashes for Pollyanna"""
+
+    def __init__(self, config):
+        self.config = config
+        try:
+            with open(self.config.hash_file, 'r') as f:
+                self.hashes = json.load(f)
+        except IOError:
+            # No stored hash
+            self.hashes = {}
+
+    def __getitem__(self, hashname):
+        try:
+            return self.hashes[hashname]
+        except (KeyError, TypeError):
+            return None
+
+    def __setitem__(self, hashname, value):
+        self.hashes[hashname] = value
+
+    def get_hash_object(self):
+        """Returns an object to compute a new hash"""
+        return hashlib.sha256()
+
+    def store(self):
+        """Store the current hashes into a the hash file"""
+        with open(self.config.hash_file, 'w') as f:
+            json.dump(self.hashes, f)
+
+
+class LanguageUpdater:
+    """
+    Handles updating the language using the online lmtool.
+
+    This class provides methods to check if the corpus has changed, and to
+    update the language to match the new corpus using the lmtool.  This allows
+    us to automatically update the language if the corpus has changed, saving
+    the user from having to do this manually.
+    """
+
+    def __init__(self, config):
+        self.config = config
+        self.hasher = Hasher(config)
+
+    def update_language_if_changed(self):
+        """Test if the language has changed, and if it has, update it"""
+        if self.language_has_changed():
+            self.update_language()
+            self.save_language_hash()
+
+    def language_has_changed(self):
+        """Use hashes to test if the language has changed"""
+        self.stored_hash = self.hasher['language']
+
+        # Calculate the hash the language file has right now
+        hasher = self.hasher.get_hash_object()
+        with open(self.config.strings_file, 'rb') as sfile:
+            buf = sfile.read()
+            hasher.update(buf)
+        self.new_hash = hasher.hexdigest()
+
+        return self.new_hash != self.stored_hash
+
+    def update_language(self):
+        """Update the language using the online lmtool"""
+        print('Updating language using online lmtool')
+
+        host = 'http://www.speech.cs.cmu.edu'
+        url = host + '/cgi-bin/tools/lmtool/run'
+
+        # Submit the corpus to the lmtool
+        response_text = ""
+        with open(self.config.strings_file, 'rb') as corpus:
+            files = {'corpus': corpus}
+            values = {'formtype': 'simple'}
+
+            r = requests.post(url, files=files, data=values)
+            response_text = r.text
+
+        # Parse response to get URLs of the files we need
+        path_re = r'.*<title>Index of (.*?)</title>.*'
+        number_re = r'.*TAR([0-9]*?)\.tgz.*'
+        for line in response_text.split('\n'):
+            # If we found the directory, keep it and don't break
+            if re.search(path_re, line):
+                path = host + re.sub(path_re, r'\1', line)
+            # If we found the number, keep it and break
+            elif re.search(number_re, line):
+                number = re.sub(number_re, r'\1', line)
+                break
+
+        lm_url = path + '/' + number + '.lm'
+        dic_url = path + '/' + number + '.dic'
+
+        self._download_file(lm_url, self.config.lang_file)
+        self._download_file(dic_url, self.config.dic_file)
+
+    def save_language_hash(self):
+        self.hasher['language'] = self.new_hash
+        self.hasher.store()
+
+    def _download_file(self, url, path):
+        r = requests.get(url, stream=True)
+        if r.status_code == 200:
+            with open(path, 'wb') as f:
+                for chunk in r:
+                    f.write(chunk)