Transferring Sound Data with Binary.JS and Buffering for Smooth Playback

Socket.io is a great websockets module and it does a lot of things very well; however, one thing it doesn’t handle well is binary data.  Supposedly, version 1.0 added support for binary data; however, I wasn’t able to get it to behave properly.

Binary.js is a websockets module that is built for handling binary data.  In this example, we set up Binary.js on the server and also on the client, and allow clients to broadcast messages to all other clients.  The original application was audio chat for a multi-player game.  But the concepts here will be a good demonstration of Binary.js and also the html5 web audio API.

Here is the code to set up Binary.JS connection on the client side:  

 
var host = location.origin.replace(/^http/, 'ws') + '/binary-endpoint';
var client = new BinaryClient(host);

You’ll also need to include the binary.js file in the html, like so:

 
<script src=“...your-path.../binary.js"></script>

To write to the stream:

 
var stream = client.createStream();
stream.write(chunk);
stream.end();

Below is the code to capture audio via the HTML5 Web Audio API.  Since this blog post isn't focusing on that, I'll just refer you to the MDN Web Audio API documentation for details.

 
//initializing variables;
var soundController = {};
soundController.recording = false;

var audioContext = window.AudioContext || window.webkitAudioContext;

navigator.mediaDevices = navigator.mediaDevices || 
   ((navigator.mozGetUserMedia || navigator.webkitGetUserMedia) ? {
     getUserMedia: function(c) {
       return new Promise(function(y, n) {
         (navigator.mozGetUserMedia ||
          navigator.webkitGetUserMedia).call(navigator, c, y, n);
       });
     }
} : null);

if (!navigator.mediaDevices) {
  console.log("getUserMedia() not supported.");
}

soundController.device = navigator.mediaDevices.getUserMedia({ audio: true, 
  video: false });

soundController.device.then(function (stream) {
  var context = new audioContext();
  var audioInput = context.createMediaStreamSource(stream);
  var bufferSize = 2048;
  // create a javascript node
  soundController.recorder = context.createScriptProcessor(bufferSize, 1, 1);
  // specify the processing function
  soundController.recorder.onaudioprocess = soundController.recorderProcess;
  // connect stream to our recorder
  audioInput.connect(soundController.recorder);
  // connect our recorder to the previous destination
  soundController.recorder.connect(context.destination);
});

soundController.device.catch(function (err) {
  console.log("The following error occured: " + err.name);
});

function convertFloat32ToInt16(buffer) {
  l = buffer.length;
  buf = new Int16Array(l);
  while (l--) {
    buf[l] = Math.min(1, buffer[l])*0x7FFF;
  }
  return buf.buffer;
}

soundController.recorderProcess = function (e) {
  var left = e.inputBuffer.getChannelData(0);
  if (soundController.recording === true) {
    // var chunk = convertFloat32ToInt16(left);
    var chunk = left;
    console.dir(chunk);
    soundController.stream.write(chunk);
  }
};

soundController.startRecording = function () {

  if (soundController.recording === false) {
    console.log('>>> Start Recording');

    //open binary stream
    soundController.stream = client.createStream({data: 'audio'});
    soundController.recording = true;
  }

};

soundController.stopRecording = function () {
  
  if (soundController.recording === true) {
    console.log('||| Stop Recording');

    soundController.recording = false;

    //close binary stream
    soundController.stream.end();
  }
};

Now, on to the server side. Here is how to set up Binary.JS, assuming you’re using an express server named “server”, you can just set up your Binary.JS server on top of that, like so: 

 
var express = require('express');
var server = express();
var BinaryServer = require('binaryjs').BinaryServer;
var binaryserver = new BinaryServer({server: server, path: '/binary-endpoint'});

Here, we’ve chosen the '/binary-endpoint’ path to be used for sending binary data.  

Here’s the server side code that takes what it receives from the client and broadcasts the data to all other clients: 

 
binaryserver.on('connection', function(client){
  console.log('Binary Server connection started');

  client.on('stream', function(stream, meta) {
    console.log('>>>Incoming audio stream');

    // broadcast to all other clients
    for(var id in binaryserver.clients){
      if(binaryserver.clients.hasOwnProperty(id)){
        var otherClient = binaryserver.clients[id];
        if(otherClient != client){
          var send = otherClient.createStream(meta);
          stream.pipe(send);
        } // if (otherClient...
      } // if (binaryserver...
    } // for (var id in ...

    stream.on('end', function() {
      console.log('||| Audio stream ended');
    });
    
  }); //client.on
}); //binaryserver.on

On the client side, we can now play the data received:

 
var soundController = {};

soundController.speakerContext = new audioContext();

client.on('stream', function (stream) {
  soundController.nextTime = 0;
  var init = false;
  var audioCache = [];

  console.log('>>> Receiving Audio Stream');

  stream.on('data', function (data) {
    var array = new Float32Array(data);
    var buffer = soundController.speakerContext.createBuffer(1, 2048, 44100);
    buffer.copyToChannel(array, 0);

    audioCache.push(buffer);
    // make sure we put at least 5 chunks in the buffer before starting
    if ((init === true) || ((init === false) && (audioCache.length > 5))) { 
        init = true;
        soundController.playCache(audioCache);
    }
  });

  stream.on('end', function () {
    console.log('||| End of Audio Stream');    
  });

});

soundController.playCache = function (cache) {
  while (cache.length) {
    var buffer = cache.shift();
    var source    = soundController.speakerContext.createBufferSource();
    source.buffer = buffer;
    source.connect(soundController.speakerContext.destination);
    if (soundController.nextTime == 0) {
        // add a delay of 0.05 seconds
        soundController.nextTime = soundController.speakerContext.currentTime + 0.05;  
    }
    source.start(soundController.nextTime);
    // schedule buffers to be played consecutively
    soundController.nextTime+=source.buffer.duration;  
  }
};

Buffering 5 chunks yields about 0.25 seconds (each buffer chunk duration is 0.05 seconds) and it allows for reasonably smooth playback.

And there you have it!  Audio chat with websockets using Binary.JS!