#!/bin/bash -x
########################## vocab2list ##########################
###
# *  (C) 2014-21 - erfahren@atadcrazy.net 
# *  Licensed under GNU General Public License 3.0 or later. 
# *  Some rights reserved. See COPYING, AUTHORS.
# * @license GPL-3.0+ http://spdx.org/licenses/GPL-3.0+
#
# This script takes a list of words (single column in plain text format) 
# and produces a formatted text file with words and their definitions.
#
# dependencies: StarDict's sdcv (with WordNet dictionary); and zenity
# StarDict package available at http://stardict-4.sourceforge.net/
#
# WordNet dictionary in Stardict's directory (e.g. /usr/share/stardict/dic )
# for more info visit: https://owenh.net/stardict.html
# dictionary also available here: 
# http://www.atadcrazy.net/gnu-linux/misc/stardict_wordnet.tar.bz2
###
# script includes feature to format the list in alphabetical order and remove duplicates
# Note: Tested with 700 words. The sleeps may need to be increased if the entry word list is larger. 
# There is no progress indicator at this time so give it the minute
###
cd $(pwd)
## Zenity Info Dialog - program introduction dialog
zenity --info --title="vocab2list" --width=300 --height=100 --text="This script prompts for a plain text file containing \ a single column list of words in  to be processed."
## Zenity File Selector
FILE=`zenity --file-selection --width=600 --height=400 --title="Select a plain text file with list of words"`

case $? in
         0)
                echo "\"$FILE\" selected.";;
         1)
                echo "No file selected.";;
        -1)
                echo "An unexpected error has occurred.";;
esac
sleep 2 &&
cat $FILE | awk '{print $1}' > $FILE.tmp 
sleep 5 &&
mv $FILE.tmp $FILE 

########################## SORT ##########################

cp $FILE $FILE.back && sort -b -d $FILE > $FILE-srtd && uniq -u $FILE-srtd > $FILE-srtd-nodupes && sed '/^\s*$/d' $FILE-srtd-nodupes > $FILE-srtd && mv $FILE-srtd $FILE && rm $FILE-srtd-nodupes

##########################################################
mkdir -p ./vocabtmp
cd ./vocabtmp
cat $FILE | while read line
 do
 sdcv -n -u "WordNet" $line > ./defwordtmp1
cat ./defwordtmp1 | sed '1,4d' > ./defwordtmp2
## ensuring word was found and... 
## setting the first word out of the results as the variable "entryword"
entryword=$(awk 'NR == 1 { print $1 }' ./defwordtmp2)
if [ "$entryword" == "$line" ]
   then
## removes blank lines and cuts definition to ten lines
        sed '/^$/d' ./defwordtmp2 > ./defwordtmp1
        sed 10q ./defwordtmp1 > defwordtextmp 
       #echo -e "%" >> defwordfortunetmp ## (for fortune data file)
        echo "" >> defwordtextmp
        cat defwordtextmp >> ../vocabulary-words
else
        echo -e $line >> ../vocab-notfound.log
fi
done
###
zenity --info --title="Info" --text="words not found logged in vocab-notfound.log"
cd ../
## cleanup of its temp files ##
rm -r ./vocabtmp
exit 0
