blob: 5a2c232482b3c2ede5b62ed164727a4d7828c016 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
|
#!/bin/bash
blatherdir=~/.config/kaylee
blatherdatadir=~/.local/share/kaylee
blathercachedir=~/.cache/kaylee
sentences=$blatherdatadir/sentences.corpus
sourcefile=$blatherdir/commands.conf
tempfile=$blathercachedir/url.txt
lmtoolurl=http://www.speech.cs.cmu.edu/cgi-bin/tools/lmtool/run
cd $blatherdir
sed -f - $sourcefile > $sentences <<EOFcommands
/^$/d
/^#/d
s/\:.*$//
EOFcommands
# upload corpus file, find the resulting dictionary file url
curl -L -F corpus=@"$sentences" -F formtype=simple $lmtoolurl \
|grep -A 1 "base name" |grep http \
| sed -e 's/^.*\="//' | sed -e 's/\.tgz.*$//' | sed -e 's/TAR//' > $tempfile
# download the .dic and .lm files
curl -C - -O $(cat $tempfile).dic
curl -C - -O $(cat $tempfile).lm
# mv em to the right name/place
mv *.dic $blatherdatadir/dic
mv *.lm $blatherdatadir/lm
rm $tempfile
|