#! /bin/sh
# Asterisk voicemail attachment conversion script, including voice recognition
# Use Voice Recognition Engine provided by IBM Watson Spech-to-Text API

# Revision history :
# 22/11/2010 - V1.0 - Creation by N. Bernaerts
# 07/02/2012 - V1.1 - Add handling of mails without attachment (thanks to Paul Thompson)
# 01/05/2012 - V1.2 - Use mktemp, pushd & popd
# 08/05/2012 - V1.3 - Change mp3 compression to CBR to solve some smartphone compatibility (thanks to Luca Mancino)
# 01/08/2012 - V1.4 - Add PATH definition to avoid any problem (thanks to Christopher Wolff)
# 01/06/2013 - V1.5 - Improved call quality of MP3
# 10/11/2014 - V1.6 - TB Sampson fix for iPhone playback too: http://nerd.bz/1vTN3Hq
# 12/03/2017 - V2.3 - modified for use with IBM Bluemix Speech-to-Text API (thanks to Jason Klein and Ward Mundy & Associates LLC)
# 11/12/2018 - V2.4 - modified for use with IBM Watson's new STT pricing plans by Ward Mundy & Associates LLC
# 11/20/2018 - V2.5 - removed continuous=true from syntax due to IBM syntax mod

# Special thanks: https://jrklein.com/2015/08/17/asterisk-voicemail-transcription-via-ibm-bluemix-speech-to-text-api/

# To obtain IBM credentials for Speech to Text transcription, see this tutorial: http://nerdvittles.com/?page_id=25616

API_KEY="XX-XXXXXXXXXXX-XXXXXXXXXXXXXXXXXXXXXXXXXXX-X"
URL="https://stream.watsonplatform.net/speech-to-text/api"

# username and password keys are no longer available from IBM
#API_USERNAME="XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX"
#API_PASSWORD="XXXXXXXXXXXX"

# set PATH
PATH="/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"

# save the current directory
pushd .

# create a temporary directory and cd to it
TMPDIR=$(mktemp -d)
cd $TMPDIR

# dump the stream to a temporary file
cat >> stream.org

# get the boundary
BOUNDARY=`grep "boundary=" stream.org | cut -d'"' -f 2`

# cut the file into parts
# stream.part - header before the boundary
# stream.part1 - header after the boundary
# stream.part2 - body of the message
# stream.part3 - attachment in base64 (WAV file)
# stream.part4 - footer of the message
awk '/'$BOUNDARY'/{i++}{print > "stream.part"i}' stream.org

# if mail is having no audio attachment (plain text)
PLAINTEXT=`cat stream.part1 | grep 'plain'`
if [ "$PLAINTEXT" != "" ]
then

  # prepare to send the original stream
  cat stream.org > stream.new

# else, if mail is having audio attachment
else

  # cut the attachment into parts
  # stream.part3.head - header of attachment
  # stream.part3.wav.base64 - wav file of attachment (encoded base64)
  sed '7,$d' stream.part3 > stream.part3.wav.head
  sed '1,6d' stream.part3 > stream.part3.wav.base64

  # convert the base64 file to a wav file
  dos2unix -o stream.part3.wav.base64
  base64 -di stream.part3.wav.base64 > stream.part3.wav

  # convert wav file to mp3 file
  # -b 24 is using CBR, giving better compatibility on smartphones (you can use -b 32 to increase quality)
  # -V 2 is using VBR, a good compromise between quality and size for voice audio files
  # lame -m m -b 64 stream.part3.wav stream.part3.mp3
  # TB Sampson mod for iPhone AND Android playback support
  lame --abr 24 -mm -h -c --resample 22.050 stream.part3.wav stream.part3.mp3
  # convert back mp3 to base64 file
  base64 stream.part3.mp3 > stream.part3.mp3.base64

  # generate the new mp3 attachment header
  # change Type: audio/x-wav to Type: audio/mpeg
  # change name="msg----.wav" to name="msg----.mp3"
  sed 's/x-wav/mpeg/g' stream.part3.wav.head | sed 's/.wav/.mp3/g' > stream.part3.mp3.head

CURL_OPTS=""

# Send WAV to Watson Speech to Text API. Must use "Narrowband" (aka 8k) model since WAV is 8k sample.
#curl -s $CURL_OPTS -k -u $API_USERNAME:$API_PASSWORD -X POST \
curl -s $CURL_OPTS -k -u "apikey:$API_KEY" -X POST \
    --limit-rate 40000 \
    --header "Content-Type: audio/wav" \
    --data-binary @stream.part3.wav \
    "$URL/v1/recognize?model=en-US_NarrowbandModel" 1>audio.txt

# Extract transcript results from JSON response
TRANSCRIPT=`cat audio.txt | grep transcript | sed 's#^.*"transcript": "##g' | sed 's# "$##g'`

  # generate first part of mail body, converting it to LF only
  mv stream.part stream.new
  cat stream.part1 >> stream.new
  cat stream.part2 >> stream.new
  echo -e "\r\n\r\n Message contents: $TRANSCRIPT" >> stream.new
  cat stream.part3.mp3.head >> stream.new
  dos2unix -o stream.new

  # append base64 mp3 to mail body, keeping CRLF
  unix2dos -o stream.part3.mp3.base64
  cat stream.part3.mp3.base64 >> stream.new

  # append end of mail body, converting it to LF only
  echo "" >> stream.tmp
  echo "" >> stream.tmp
  cat stream.part4 >> stream.tmp
  dos2unix -o stream.tmp
  cat stream.tmp >> stream.new

fi

# send the mail thru sendmail
cat stream.new | sendmail -t

# go back to original directory
popd

# remove all temporary files and temporary directory
rm -Rf $TMPDIR
