diff options
author | Jezra <jezra@jezra.net> | 2013-04-22 18:13:08 -0700 |
---|---|---|
committer | Jezra <jezra@jezra.net> | 2013-04-22 18:13:08 -0700 |
commit | 40b219723f2ac3775c66a0547b5c7ece061edee3 (patch) | |
tree | aafa7b04673fd13a303f485f9118af3f1fd6d0da | |
parent | abd0eacddb3f5ad44ad3e859e32826b4cd2fc8e4 (diff) |
patched language_updater.sh to generate corpus from command file
-rwxr-xr-x | language_updater.sh | 11 |
1 files changed, 9 insertions, 2 deletions
diff --git a/language_updater.sh b/language_updater.sh index 211793e..383e140 100755 --- a/language_updater.sh +++ b/language_updater.sh @@ -1,15 +1,22 @@ #!/bin/bash blatherdir=~/.config/blather -sourcefile=$blatherdir/sentences.corpus +sentences=$blatherdir/sentences.corpus +sourcefile=$blatherdir/commands langdir=$blatherdir/language tempfile=$blatherdir/url.txt lmtoolurl=http://www.speech.cs.cmu.edu/cgi-bin/tools/lmtool/run cd $blatherdir +sed -f - $sourcefile > $sentences <<EOFcommands + /^$/d + /^#/d + s/\:.*$// +EOFcommands + # upload corpus file, find the resulting dictionary file url -curl -L -F corpus=@"$sourcefile" -F formtype=simple $lmtoolurl \ +curl -L -F corpus=@"$sentences" -F formtype=simple $lmtoolurl \ |grep -A 1 "base name" |grep http \ | sed -e 's/^.*\="//' | sed -e 's/\.tgz.*$//' | sed -e 's/TAR//' > $tempfile |