summary refs log tree commit diff
diff options
context:
space:
mode:
authorJezra <jezra@jezra.net>2013-04-22 18:13:08 -0700
committerJezra <jezra@jezra.net>2013-04-22 18:13:08 -0700
commit40b219723f2ac3775c66a0547b5c7ece061edee3 (patch)
treeaafa7b04673fd13a303f485f9118af3f1fd6d0da
parentabd0eacddb3f5ad44ad3e859e32826b4cd2fc8e4 (diff)
patched language_updater.sh to generate corpus from command file
-rwxr-xr-xlanguage_updater.sh11
1 files changed, 9 insertions, 2 deletions
diff --git a/language_updater.sh b/language_updater.sh
index 211793e..383e140 100755
--- a/language_updater.sh
+++ b/language_updater.sh
@@ -1,15 +1,22 @@
 #!/bin/bash
 
 blatherdir=~/.config/blather
-sourcefile=$blatherdir/sentences.corpus
+sentences=$blatherdir/sentences.corpus
+sourcefile=$blatherdir/commands
 langdir=$blatherdir/language
 tempfile=$blatherdir/url.txt
 lmtoolurl=http://www.speech.cs.cmu.edu/cgi-bin/tools/lmtool/run
 
 cd $blatherdir
 
+sed -f - $sourcefile > $sentences <<EOFcommands
+  /^$/d
+  /^#/d
+  s/\:.*$//
+EOFcommands
+
 # upload corpus file, find the resulting dictionary file url
-curl -L -F corpus=@"$sourcefile" -F formtype=simple $lmtoolurl \
+curl -L -F corpus=@"$sentences" -F formtype=simple $lmtoolurl \
   |grep -A 1 "base name" |grep http \
   | sed -e 's/^.*\="//' | sed -e 's/\.tgz.*$//' | sed -e 's/TAR//' > $tempfile