'Say' command, TTS: IVONA Cloud

Hi all.
I’m newbie (nub) with OpenHAB. Same time the English lang isn’t my native language, sorry…

I want use TTS and command ‘Say’, but the playback quality of speech integrated in OpenHAB I don’t like. Moreover I going to use ‘Russian’ (it is my native). After some research few TTSs I found IVONA Cloud: https://www.ivona.com/us/for-business/speech-cloud/

  1. In developer mode it is free (The total number of units requested per account is limited to 50,000 monthly)
  2. Russian synthesized speech quality is very high (I have not found better). I think English is also excellent.

I tried build source code of OpenHAB for addon development, but it is not finished yet (I’ve some compilation issues and will continue later). If anyone wants to write and build the addon yourself - use the information listed below.

I going to use Polipo (http://www.pps.univ-paris-diderot.fr/~jch/software/polipo/) caching proxy server for cache all *.mp3 files returned from Ivona Cloud so I think it get ability reduce monthly traffic from cloud.

I tried use the following code and it is works in browser (Chrome):

/*
http://crypto-js.googlecode.com/svn/tags/3.1.2/build/rollups/sha256.js
CryptoJS v3.1.2
code.google.com/p/crypto-js
(c) 2009-2013 by Jeff Mott. All rights reserved.
code.google.com/p/crypto-js/wiki/License
*/
var CryptoJS=CryptoJS||function(h,s){var f={},t=f.lib={},g=function(){},j=t.Base={extend:function(a){g.prototype=this;var c=new g;a&&c.mixIn(a);c.hasOwnProperty("init")||(c.init=function(){c.$super.init.apply(this,arguments)});c.init.prototype=c;c.$super=this;return c},create:function(){var a=this.extend();a.init.apply(a,arguments);return a},init:function(){},mixIn:function(a){for(var c in a)a.hasOwnProperty(c)&&(this[c]=a[c]);a.hasOwnProperty("toString")&&(this.toString=a.toString)},clone:function(){return this.init.prototype.extend(this)}},q=t.WordArray=j.extend({init:function(a,c){a=this.words=a||[];this.sigBytes=c!=s?c:4*a.length},toString:function(a){return(a||u).stringify(this)},concat:function(a){var c=this.words,d=a.words,b=this.sigBytes;a=a.sigBytes;this.clamp();if(b%4)for(var e=0;e<a;e++)c[b+e>>>2]|=(d[e>>>2]>>>24-8*(e%4)&255)<<24-8*((b+e)%4);else if(65535<d.length)for(e=0;e<a;e+=4)c[b+e>>>2]=d[e>>>2];else c.push.apply(c,d);this.sigBytes+=a;return this},clamp:function(){var a=this.words,c=this.sigBytes;a[c>>>2]&=4294967295<<32-8*(c%4);a.length=h.ceil(c/4)},clone:function(){var a=j.clone.call(this);a.words=this.words.slice(0);return a},random:function(a){for(var c=[],d=0;d<a;d+=4)c.push(4294967296*h.random()|0);return new q.init(c,a)}}),v=f.enc={},u=v.Hex={stringify:function(a){var c=a.words;a=a.sigBytes;for(var d=[],b=0;b<a;b++){var e=c[b>>>2]>>>24-8*(b%4)&255;d.push((e>>>4).toString(16));d.push((e&15).toString(16))}return d.join("")},parse:function(a){for(var c=a.length,d=[],b=0;b<c;b+=2)d[b>>>3]|=parseInt(a.substr(b,2),16)<<24-4*(b%8);return new q.init(d,c/2)}},k=v.Latin1={stringify:function(a){var c=a.words;a=a.sigBytes;for(var d=[],b=0;b<a;b++)d.push(String.fromCharCode(c[b>>>2]>>>24-8*(b%4)&255));return d.join("")},parse:function(a){for(var c=a.length,d=[],b=0;b<c;b++)d[b>>>2]|=(a.charCodeAt(b)&255)<<24-8*(b%4);return new q.init(d,c)}},l=v.Utf8={stringify:function(a){try{return decodeURIComponent(escape(k.stringify(a)))}catch(c){throw Error("Malformed UTF-8 data");}},parse:function(a){return k.parse(unescape(encodeURIComponent(a)))}},x=t.BufferedBlockAlgorithm=j.extend({reset:function(){this._data=new q.init;this._nDataBytes=0},_append:function(a){"string"==typeof a&&(a=l.parse(a));this._data.concat(a);this._nDataBytes+=a.sigBytes},_process:function(a){var c=this._data,d=c.words,b=c.sigBytes,e=this.blockSize,f=b/(4*e),f=a?h.ceil(f):h.max((f|0)-this._minBufferSize,0);a=f*e;b=h.min(4*a,b);if(a){for(var m=0;m<a;m+=e)this._doProcessBlock(d,m);m=d.splice(0,a);c.sigBytes-=b}return new q.init(m,b)},clone:function(){var a=j.clone.call(this);a._data=this._data.clone();return a},_minBufferSize:0});t.Hasher=x.extend({cfg:j.extend(),init:function(a){this.cfg=this.cfg.extend(a);this.reset()},reset:function(){x.reset.call(this);this._doReset()},update:function(a){this._append(a);this._process();return this},finalize:function(a){a&&this._append(a);return this._doFinalize()},blockSize:16,_createHelper:function(a){return function(c,d){return(new a.init(d)).finalize(c)}},_createHmacHelper:function(a){return function(c,d){return(new w.HMAC.init(a,d)).finalize(c)}}});var w=f.algo={};return f}(Math);(function(h){for(var s=CryptoJS,f=s.lib,t=f.WordArray,g=f.Hasher,f=s.algo,j=[],q=[],v=function(a){return 4294967296*(a-(a|0))|0},u=2,k=0;64>k;){var l;a:{l=u;for(var x=h.sqrt(l),w=2;w<=x;w++)if(!(l%w)){l=!1;break a}l=!0}l&&(8>k&&(j[k]=v(h.pow(u,0.5))),q[k]=v(h.pow(u,1/3)),k++);u++}var a=[],f=f.SHA256=g.extend({_doReset:function(){this._hash=new t.init(j.slice(0))},_doProcessBlock:function(c,d){for(var b=this._hash.words,e=b[0],f=b[1],m=b[2],h=b[3],p=b[4],j=b[5],k=b[6],l=b[7],n=0;64>n;n++){if(16>n)a[n]=c[d+n]|0;else{var r=a[n-15],g=a[n-2];a[n]=((r<<25|r>>>7)^(r<<14|r>>>18)^r>>>3)+a[n-7]+((g<<15|g>>>17)^(g<<13|g>>>19)^g>>>10)+a[n-16]}r=l+((p<<26|p>>>6)^(p<<21|p>>>11)^(p<<7|p>>>25))+(p&j^~p&k)+q[n]+a[n];g=((e<<30|e>>>2)^(e<<19|e>>>13)^(e<<10|e>>>22))+(e&f^e&m^f&m);l=k;k=j;j=p;p=h+r|0;h=m;m=f;f=e;e=r+g|0}b[0]=b[0]+e|0;b[1]=b[1]+f|0;b[2]=b[2]+m|0;b[3]=b[3]+h|0;b[4]=b[4]+p|0;b[5]=b[5]+j|0;b[6]=b[6]+k|0;b[7]=b[7]+l|0},_doFinalize:function(){var a=this._data,d=a.words,b=8*this._nDataBytes,e=8*a.sigBytes;d[e>>>5]|=128<<24-e%32;d[(e+64>>>9<<4)+14]=h.floor(b/4294967296);d[(e+64>>>9<<4)+15]=b;a.sigBytes=4*d.length;this._process();return this._hash},clone:function(){var a=g.clone.call(this);a._hash=this._hash.clone();return a}});s.SHA256=g._createHelper(f);s.HmacSHA256=g._createHmacHelper(f)})(Math);
/*
http://crypto-js.googlecode.com/svn/tags/3.1.2/build/rollups/hmac-sha256.js
CryptoJS v3.1.2
code.google.com/p/crypto-js
(c) 2009-2013 by Jeff Mott. All rights reserved.
code.google.com/p/crypto-js/wiki/License
*/
var CryptoJS=CryptoJS||function(h,s){var f={},g=f.lib={},q=function(){},m=g.Base={extend:function(a){q.prototype=this;var c=new q;a&&c.mixIn(a);c.hasOwnProperty("init")||(c.init=function(){c.$super.init.apply(this,arguments)});c.init.prototype=c;c.$super=this;return c},create:function(){var a=this.extend();a.init.apply(a,arguments);return a},init:function(){},mixIn:function(a){for(var c in a)a.hasOwnProperty(c)&&(this[c]=a[c]);a.hasOwnProperty("toString")&&(this.toString=a.toString)},clone:function(){return this.init.prototype.extend(this)}},r=g.WordArray=m.extend({init:function(a,c){a=this.words=a||[];this.sigBytes=c!=s?c:4*a.length},toString:function(a){return(a||k).stringify(this)},concat:function(a){var c=this.words,d=a.words,b=this.sigBytes;a=a.sigBytes;this.clamp();if(b%4)for(var e=0;e<a;e++)c[b+e>>>2]|=(d[e>>>2]>>>24-8*(e%4)&255)<<24-8*((b+e)%4);else if(65535<d.length)for(e=0;e<a;e+=4)c[b+e>>>2]=d[e>>>2];else c.push.apply(c,d);this.sigBytes+=a;return this},clamp:function(){var a=this.words,c=this.sigBytes;a[c>>>2]&=4294967295<<32-8*(c%4);a.length=h.ceil(c/4)},clone:function(){var a=m.clone.call(this);a.words=this.words.slice(0);return a},random:function(a){for(var c=[],d=0;d<a;d+=4)c.push(4294967296*h.random()|0);return new r.init(c,a)}}),l=f.enc={},k=l.Hex={stringify:function(a){var c=a.words;a=a.sigBytes;for(var d=[],b=0;b<a;b++){var e=c[b>>>2]>>>24-8*(b%4)&255;d.push((e>>>4).toString(16));d.push((e&15).toString(16))}return d.join("")},parse:function(a){for(var c=a.length,d=[],b=0;b<c;b+=2)d[b>>>3]|=parseInt(a.substr(b,2),16)<<24-4*(b%8);return new r.init(d,c/2)}},n=l.Latin1={stringify:function(a){var c=a.words;a=a.sigBytes;for(var d=[],b=0;b<a;b++)d.push(String.fromCharCode(c[b>>>2]>>>24-8*(b%4)&255));return d.join("")},parse:function(a){for(var c=a.length,d=[],b=0;b<c;b++)d[b>>>2]|=(a.charCodeAt(b)&255)<<24-8*(b%4);return new r.init(d,c)}},j=l.Utf8={stringify:function(a){try{return decodeURIComponent(escape(n.stringify(a)))}catch(c){throw Error("Malformed UTF-8 data");}},parse:function(a){return n.parse(unescape(encodeURIComponent(a)))}},u=g.BufferedBlockAlgorithm=m.extend({reset:function(){this._data=new r.init;this._nDataBytes=0},_append:function(a){"string"==typeof a&&(a=j.parse(a));this._data.concat(a);this._nDataBytes+=a.sigBytes},_process:function(a){var c=this._data,d=c.words,b=c.sigBytes,e=this.blockSize,f=b/(4*e),f=a?h.ceil(f):h.max((f|0)-this._minBufferSize,0);a=f*e;b=h.min(4*a,b);if(a){for(var g=0;g<a;g+=e)this._doProcessBlock(d,g);g=d.splice(0,a);c.sigBytes-=b}return new r.init(g,b)},clone:function(){var a=m.clone.call(this);a._data=this._data.clone();return a},_minBufferSize:0});g.Hasher=u.extend({cfg:m.extend(),init:function(a){this.cfg=this.cfg.extend(a);this.reset()},reset:function(){u.reset.call(this);this._doReset()},update:function(a){this._append(a);this._process();return this},finalize:function(a){a&&this._append(a);return this._doFinalize()},blockSize:16,_createHelper:function(a){return function(c,d){return(new a.init(d)).finalize(c)}},_createHmacHelper:function(a){return function(c,d){return(new t.HMAC.init(a,d)).finalize(c)}}});var t=f.algo={};return f}(Math);(function(h){for(var s=CryptoJS,f=s.lib,g=f.WordArray,q=f.Hasher,f=s.algo,m=[],r=[],l=function(a){return 4294967296*(a-(a|0))|0},k=2,n=0;64>n;){var j;a:{j=k;for(var u=h.sqrt(j),t=2;t<=u;t++)if(!(j%t)){j=!1;break a}j=!0}j&&(8>n&&(m[n]=l(h.pow(k,0.5))),r[n]=l(h.pow(k,1/3)),n++);k++}var a=[],f=f.SHA256=q.extend({_doReset:function(){this._hash=new g.init(m.slice(0))},_doProcessBlock:function(c,d){for(var b=this._hash.words,e=b[0],f=b[1],g=b[2],j=b[3],h=b[4],m=b[5],n=b[6],q=b[7],p=0;64>p;p++){if(16>p)a[p]=c[d+p]|0;else{var k=a[p-15],l=a[p-2];a[p]=((k<<25|k>>>7)^(k<<14|k>>>18)^k>>>3)+a[p-7]+((l<<15|l>>>17)^(l<<13|l>>>19)^l>>>10)+a[p-16]}k=q+((h<<26|h>>>6)^(h<<21|h>>>11)^(h<<7|h>>>25))+(h&m^~h&n)+r[p]+a[p];l=((e<<30|e>>>2)^(e<<19|e>>>13)^(e<<10|e>>>22))+(e&f^e&g^f&g);q=n;n=m;m=h;h=j+k|0;j=g;g=f;f=e;e=k+l|0}b[0]=b[0]+e|0;b[1]=b[1]+f|0;b[2]=b[2]+g|0;b[3]=b[3]+j|0;b[4]=b[4]+h|0;b[5]=b[5]+m|0;b[6]=b[6]+n|0;b[7]=b[7]+q|0},_doFinalize:function(){var a=this._data,d=a.words,b=8*this._nDataBytes,e=8*a.sigBytes;d[e>>>5]|=128<<24-e%32;d[(e+64>>>9<<4)+14]=h.floor(b/4294967296);d[(e+64>>>9<<4)+15]=b;a.sigBytes=4*d.length;this._process();return this._hash},clone:function(){var a=q.clone.call(this);a._hash=this._hash.clone();return a}});s.SHA256=q._createHelper(f);s.HmacSHA256=q._createHmacHelper(f)})(Math);(function(){var h=CryptoJS,s=h.enc.Utf8;h.algo.HMAC=h.lib.Base.extend({init:function(f,g){f=this._hasher=new f.init;"string"==typeof g&&(g=s.parse(g));var h=f.blockSize,m=4*h;g.sigBytes>m&&(g=f.finalize(g));g.clamp();for(var r=this._oKey=g.clone(),l=this._iKey=g.clone(),k=r.words,n=l.words,j=0;j<h;j++)k[j]^=1549556828,n[j]^=909522486;r.sigBytes=l.sigBytes=m;this.reset()},reset:function(){var f=this._hasher;f.reset();f.update(this._iKey)},update:function(f){this._hasher.update(f);return this},finalize:function(f){var g=this._hasher;f=g.finalize(f);g.reset();return g.finalize(this._oKey.clone().concat(f))}})})();

var IVONA_ACCESS_KEY = "YOUR_ACCESS_KEY_HERE";
var IVONA_SECRET_KEY = "your_secret_key_here";

function TM(number) {
    if (number < 10) 
        return "0" + number.toString();
    else
        return number.toString();
}
var now = new Date;
var utc_date = now.getUTCFullYear().toString() + TM(now.getUTCMonth()+1) + TM(now.getUTCDate());
var utc_time = utc_date + "T" + TM(now.getUTCHours()) + TM(now.getUTCMinutes()) + TM(now.getUTCSeconds()) + "Z";

var dateStamp = utc_date;
var dateTime = utc_time;
var regionName = 'eu-west-1';
var serviceName = 'tts';
// russian test string
// var requestData = '{"Input":{"Data":"Температура превысила 60 градусов"},"Voice":{"Name":"Tatyana","Language":"ru-RU","Gender":"Female"}}';
// english test string
var requestData = '{"Input":{"Data":"Hello world"}}';
function getSignatureKey(StringToSign) {
   var DateKey = CryptoJS.HmacSHA256(dateStamp, "AWS4" + IVONA_SECRET_KEY);
   var DateRegionKey = CryptoJS.HmacSHA256(regionName, DateKey);
   var DateRegionServiceKey = CryptoJS.HmacSHA256(serviceName, DateRegionKey);
   var SigningKey = CryptoJS.HmacSHA256("aws4_request", DateRegionServiceKey);
   var Signature = CryptoJS.HmacSHA256(StringToSign, SigningKey);
   return Signature;
}

var requestDataHash = CryptoJS.SHA256(requestData);
var Request = "POST\n\
/CreateSpeech\n\
\n\
content-type:application/json\n\
host:tts.eu-west-1.ivonacloud.com\n\
x-amz-content-sha256:" + requestDataHash + "\n\
x-amz-date:" + dateTime + "\n\
\n\
content-type;host;x-amz-content-sha256;x-amz-date\n" +
requestDataHash;
var RequestHash = CryptoJS.SHA256(Request);

var StringToSign = "AWS4-HMAC-SHA256\n" +
dateTime + "\n" +
dateStamp + "/" + regionName + "/" + serviceName + "/aws4_request\n" +
RequestHash;

var signature = getSignatureKey(StringToSign);

var xhr = new XMLHttpRequest();
var path="https://tts.eu-west-1.ivonacloud.com/CreateSpeech";

xhr.open("POST", path, true);
xhr.setRequestHeader("Content-Type", "application/json");
xhr.setRequestHeader("X-Amz-Date", dateTime);
xhr.setRequestHeader("Authorization", 
   "AWS4-HMAC-SHA256 Credential=" + IVONA_ACCESS_KEY + "/" + dateStamp + "/eu-west-1/tts/aws4_request, \
   SignedHeaders=content-type;host;x-amz-content-sha256;x-amz-date, \
   Signature=" + signature);
xhr.setRequestHeader("x-amz-content-sha256",requestDataHash);
xhr.setRequestHeader("Content-Length", 32);
xhr.onload = function() {
    if (this.status == 200) {  
        var blob = new Blob([xhr.response], {type: "audio/mp3"});
        // the following code need for download file only
            var a = document.createElement("a");
            document.body.appendChild(a);
            a.style = "display: none";
            var url = window.URL.createObjectURL(blob);
            a.href = url;
            a.download = "audio.mp3";
            a.click();
            window.URL.revokeObjectURL(url);
    }
}
xhr.responseType = 'arraybuffer';
xhr.send(requestData);

Sorry for few ugly code - this is working prototype, but it works.

  1. Register on http://developer.ivona.com/en/speechcloud/introduction.html (top right corner Sign In link, on next page check ‘I am a new customer’) for obtain an access and secret keys.

  2. In My account select ‘Speech Cloud’ tab, ‘Credentials’ section press ‘Generate new credentials’ button. Copy and insert in the code above, replacing YOUR_ACCESS_KEY_HERE and your_secret_key_here strings.

  3. Save this as ivona-cloud.js.

  4. Create a new html file named “ivona.html” (Remove space after < symbol - there HTML code is disabled):

    < DOCTYPE html>
    < html>
    < head>
    < script type=“text/javascript” src="./ivona-cloud.js">
    < /head>
    < body>
    < /body>
    < /html>

  5. Save this to same directory where placed ivona-cloud.js

  6. Right mouse click on ‘ivona.html’ file and select ‘Open with…’->‘Google Chrome’ (it is not tested in any other browsers)

  7. When page will loaded then audio file downloaded automatically. It should be “Hello world”.

Do you have any ideas, solutions, etc how it can be connected and used with OpenHAB by easy way?

Thanks.

1 Like

I was looking for a similar solution since the Google TTS service seems to shut down my requests pretty quickly. I also think that Ivona Speech Cloud cloud has very natural speaking, even in Danish and at the level or higher than Google.

However instead of your solution, I found a node js project Node IVONA (I already had node on my RPi) which suited my needs better. This script is able to get the mp3 stream directly and for instance pipe it to a file. In an openhab rule I can execute a node command generating the mp3 and save it in a folder shared by the openhab web server.
I have an Onkyo receiver that is on my network, so using the openhab binding I ask it to play this generated file (streaming it from RPi) and afterwards switch back to what ever state it had before.
This solution does have some delay (like 5-10 secs) caused mainly by 1. the generation of the mp3 (2-3 secs) and my receiver switching on (1-2secs) and initializing the NET source and buffering (3-4secs), but it all depends on the use cases whether that is ok.
If you can use the say() command directly, you would just need the generated valid URL like your example. It should be changed to a GET request (rather than POST). Then you can make a script that returns the URL and call this from a rule (using executeCommand) prior to the say() call.
A second option would be to reimplement your js in the openhab rule directly using the JAVA environment.
Thirdly you could write a TTS binding for Ivona

Good idea use say directly (say command doesn’t support url, but playStream(String url) can do it).
I don’t have experience with JAVA environment (but going to try later), so streaming from url it is best way in current time.
Ivona Cloud supports GET request, I plan to use transformation JavaScript function for prepare URL:

playStream(transform(“JS”, “ivona-cloud.js”, “Hello world”))

I’ve some doubts about ECMAScript engine used by OpenHAB. Will work the script from first post if I use transform function? I think Yes, because there no jQuery and XMLHttpRequest won’t used.

P.S. I going to use a caching proxy for reduce delays.

Thanks.

1 Like

Did you make any progress on using Ivona Brian ? I love the voice file - sounds so Ironman!

According to the Ivona website:

Amazon Polly will replace the IVONA Speech Cloud Beta service.

There has been a community thread, pull request and IoT marketplace addon for Amazon Polly for a while now.
I haven’t tried Polly myself, but it may have the same voices and quality as Ivona.

2 Likes