summary refs log tree commit diff
path: root/language_updater.sh
blob: 5a2c232482b3c2ede5b62ed164727a4d7828c016 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
#!/bin/bash

blatherdir=~/.config/kaylee
blatherdatadir=~/.local/share/kaylee
blathercachedir=~/.cache/kaylee
sentences=$blatherdatadir/sentences.corpus
sourcefile=$blatherdir/commands.conf
tempfile=$blathercachedir/url.txt
lmtoolurl=http://www.speech.cs.cmu.edu/cgi-bin/tools/lmtool/run

cd $blatherdir

sed -f - $sourcefile > $sentences <<EOFcommands
  /^$/d
  /^#/d
  s/\:.*$//
EOFcommands

# upload corpus file, find the resulting dictionary file url
curl -L -F corpus=@"$sentences" -F formtype=simple $lmtoolurl \
  |grep -A 1 "base name" |grep http \
  | sed -e 's/^.*\="//' | sed -e 's/\.tgz.*$//' | sed -e 's/TAR//' > $tempfile

# download the .dic and .lm files
curl -C - -O $(cat $tempfile).dic
curl -C - -O $(cat $tempfile).lm

# mv em to the right name/place
mv *.dic $blatherdatadir/dic
mv *.lm $blatherdatadir/lm

rm $tempfile