blob: ec5c868277225d583c81143c41a1742ff58fc90c (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
|
#!/bin/bash
blatherdir=~/.config/blather
sentences=$blatherdir/sentences.corpus
sourcefile=$blatherdir/commands.conf
langdir=$blatherdir/language
tempfile=$blatherdir/url.txt
lmtoolurl=http://www.speech.cs.cmu.edu/cgi-bin/tools/lmtool/run
cd $blatherdir
sed -f - $sourcefile > $sentences <<EOFcommands
/^$/d
/^#/d
s/\:.*$//
EOFcommands
# upload corpus file, find the resulting dictionary file url
curl -L -F corpus=@"$sentences" -F formtype=simple $lmtoolurl \
|grep -A 1 "base name" |grep http \
| sed -e 's/^.*\="//' | sed -e 's/\.tgz.*$//' | sed -e 's/TAR//' > $tempfile
# download the .dic and .lm files
curl -C - -O $(cat $tempfile).dic
curl -C - -O $(cat $tempfile).lm
# mv em to the right name/place
mv *.dic $langdir/dic
mv *.lm $langdir/lm
rm $tempfile
|