javascript - html js voice recorder not working in django python implementation -
i have tried implement django application wherein user can record voice message pass ti watson api , receive transcribed data , show result user on separate page. recorder not working not able save file changes should made if want deploy application heroku , have voice recording saved there , pass audio file watson api?
code:
stt/stt/settings.py:
import os # build paths inside project this: os.path.join(base_dir, ...) base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) # quick-start development settings - unsuitable production # see https://docs.djangoproject.com/en/1.11/howto/deployment/checklist/ # security warning: keep secret key used in production secret! secret_key = 'my-secret-key' # security warning: don't run debug turned on in production! debug = true allowed_hosts = ['*'] # application definition installed_apps = [ 'django.contrib.admin', 'django.contrib.auth', 'django.contrib.contenttypes', 'django.contrib.sessions', 'django.contrib.messages', 'django.contrib.staticfiles', 'record', ] middleware = [ 'django.middleware.security.securitymiddleware', 'django.contrib.sessions.middleware.sessionmiddleware', 'django.middleware.common.commonmiddleware', 'django.middleware.csrf.csrfviewmiddleware', 'django.contrib.auth.middleware.authenticationmiddleware', 'django.contrib.messages.middleware.messagemiddleware', 'django.middleware.clickjacking.xframeoptionsmiddleware', ] root_urlconf = 'stt.urls' templates = [ { 'backend': 'django.template.backends.django.djangotemplates', 'dirs': [], 'app_dirs': true, 'options': { 'context_processors': [ 'django.template.context_processors.debug', 'django.template.context_processors.request', 'django.contrib.auth.context_processors.auth', 'django.contrib.messages.context_processors.messages', ], }, }, ] wsgi_application = 'stt.wsgi.application' # database # https://docs.djangoproject.com/en/1.11/ref/settings/#databases databases = { 'default': { 'engine': 'django.db.backends.sqlite3', 'name': os.path.join(base_dir, 'db.sqlite3'), } } # password validation # https://docs.djangoproject.com/en/1.11/ref/settings/#auth-password-validators auth_password_validators = [ { 'name': 'django.contrib.auth.password_validation.userattributesimilarityvalidator', }, { 'name': 'django.contrib.auth.password_validation.minimumlengthvalidator', }, { 'name': 'django.contrib.auth.password_validation.commonpasswordvalidator', }, { 'name': 'django.contrib.auth.password_validation.numericpasswordvalidator', }, ] # internationalization # https://docs.djangoproject.com/en/1.11/topics/i18n/ language_code = 'en-us' time_zone = 'utc' use_i18n = true use_l10n = true use_tz = true # static files (css, javascript, images) # https://docs.djangoproject.com/en/1.11/howto/static-files/ static_url = '/static/' template_dirs = ( os.path.join(base_dir, 'templates'), ) watson_username = 'watson-username' watson_password = 'watson-password' watson_endpoint = 'https://stream.watsonplatform.net/speech-to-text/api/v1/recognize' watson_default_params = { 'continuous': true, 'timestamps': true, 'word_confidence': true, } watson_default_headers = { 'content-type': 'audio/wav' }
stt/stt/urls.py:
from django.conf.urls import include, url django.contrib import admin urlpatterns = [ url(r'^admin/', admin.site.urls), url(r'^record/', include('record.urls')), ]
stt/stt/wsgi.py:
import os django.core.wsgi import get_wsgi_application os.environ.setdefault("django_settings_module", "stt.settings") application = get_wsgi_application()
stt/record/static/record/img/mic128.png:image file
stt/record/static/record/img/save.svg:image file
stt/record/static/record/js/recorderjs/recorder.js:
(function(window){ var worker_path = 'js/recorderjs/recorderworker.js'; var recorder = function(source, cfg){ var config = cfg || {}; var bufferlen = config.bufferlen || 4096; this.context = source.context; if(!this.context.createscriptprocessor){ this.node = this.context.createjavascriptnode(bufferlen, 2, 2); } else { this.node = this.context.createscriptprocessor(bufferlen, 2, 2); } var worker = new worker(config.workerpath || worker_path); worker.postmessage({ command: 'init', config: { samplerate: this.context.samplerate } }); var recording = false, currcallback; this.node.onaudioprocess = function(e){ if (!recording) return; worker.postmessage({ command: 'record', buffer: [ e.inputbuffer.getchanneldata(0), e.inputbuffer.getchanneldata(1) ] }); } this.configure = function(cfg){ (var prop in cfg){ if (cfg.hasownproperty(prop)){ config[prop] = cfg[prop]; } } } this.record = function(){ recording = true; } this.stop = function(){ recording = false; } this.clear = function(){ worker.postmessage({ command: 'clear' }); } this.getbuffers = function(cb) { currcallback = cb || config.callback; worker.postmessage({ command: 'getbuffers' }) } this.exportwav = function(cb, type){ currcallback = cb || config.callback; type = type || config.type || 'audio/wav'; if (!currcallback) throw new error('callback not set'); worker.postmessage({ command: 'exportwav', type: type }); } this.exportmonowav = function(cb, type){ currcallback = cb || config.callback; type = type || config.type || 'audio/wav'; if (!currcallback) throw new error('callback not set'); worker.postmessage({ command: 'exportmonowav', type: type }); } worker.onmessage = function(e){ var blob = e.data; currcallback(blob); } source.connect(this.node); this.node.connect(this.context.destination); // if script node not connected output "onaudioprocess" event not triggered in chrome. }; recorder.setupdownload = function(blob, filename){ var url = (window.url || window.webkiturl).createobjecturl(blob); var link = document.getelementbyid("save"); link.href = url; link.download = filename || 'output.wav'; } window.recorder = recorder; })(window);
stt/record/static/record/js/recorderjs/recorderworker.js:
var reclength = 0, recbuffersl = [], recbuffersr = [], samplerate; this.onmessage = function(e){ switch(e.data.command){ case 'init': init(e.data.config); break; case 'record': record(e.data.buffer); break; case 'exportwav': exportwav(e.data.type); break; case 'exportmonowav': exportmonowav(e.data.type); break; case 'getbuffers': getbuffers(); break; case 'clear': clear(); break; } }; function init(config){ samplerate = config.samplerate; } function record(inputbuffer){ recbuffersl.push(inputbuffer[0]); recbuffersr.push(inputbuffer[1]); reclength += inputbuffer[0].length; } function exportwav(type){ var bufferl = mergebuffers(recbuffersl, reclength); var bufferr = mergebuffers(recbuffersr, reclength); var interleaved = interleave(bufferl, bufferr); var dataview = encodewav(interleaved); var audioblob = new blob([dataview], { type: type }); this.postmessage(audioblob); } function exportmonowav(type){ var bufferl = mergebuffers(recbuffersl, reclength); var dataview = encodewav(bufferl, true); var audioblob = new blob([dataview], { type: type }); this.postmessage(audioblob); } function getbuffers() { var buffers = []; buffers.push( mergebuffers(recbuffersl, reclength) ); buffers.push( mergebuffers(recbuffersr, reclength) ); this.postmessage(buffers); } function clear(){ reclength = 0; recbuffersl = []; recbuffersr = []; } function mergebuffers(recbuffers, reclength){ var result = new float32array(reclength); var offset = 0; (var = 0; < recbuffers.length; i++){ result.set(recbuffers[i], offset); offset += recbuffers[i].length; } return result; } function interleave(inputl, inputr){ var length = inputl.length + inputr.length; var result = new float32array(length); var index = 0, inputindex = 0; while (index < length){ result[index++] = inputl[inputindex]; result[index++] = inputr[inputindex]; inputindex++; } return result; } function floatto16bitpcm(output, offset, input){ (var = 0; < input.length; i++, offset+=2){ var s = math.max(-1, math.min(1, input[i])); output.setint16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true); } } function writestring(view, offset, string){ (var = 0; < string.length; i++){ view.setuint8(offset + i, string.charcodeat(i)); } } function encodewav(samples, mono){ var buffer = new arraybuffer(44 + samples.length * 2); var view = new dataview(buffer); /* riff identifier */ writestring(view, 0, 'riff'); /* file length */ view.setuint32(4, 32 + samples.length * 2, true); /* riff type */ writestring(view, 8, 'wave'); /* format chunk identifier */ writestring(view, 12, 'fmt '); /* format chunk length */ view.setuint32(16, 16, true); /* sample format (raw) */ view.setuint16(20, 1, true); /* channel count */ view.setuint16(22, mono?1:2, true); /* sample rate */ view.setuint32(24, samplerate, true); /* byte rate (sample rate * block align) */ view.setuint32(28, samplerate * 4, true); /* block align (channel count * bytes per sample) */ view.setuint16(32, 4, true); /* bits per sample */ view.setuint16(34, 16, true); /* data chunk identifier */ writestring(view, 36, 'data'); /* data chunk length */ view.setuint32(40, samples.length * 2, true); floatto16bitpcm(view, 44, samples); return view; }
stt/record/static/record/js/audiodisplay.js:
function drawbuffer( width, height, context, data ) { var step = math.ceil( data.length / width ); var amp = height / 2; context.fillstyle = "silver"; context.clearrect(0,0,width,height); for(var i=0; < width; i++){ var min = 1.0; var max = -1.0; (j=0; j<step; j++) { var datum = data[(i*step)+j]; if (datum < min) min = datum; if (datum > max) max = datum; } context.fillrect(i,(1+min)*amp,1,math.max(1,(max-min)*amp)); } }
stt/record/static/record/js/main.js:
window.audiocontext = window.audiocontext || window.webkitaudiocontext; var audiocontext = new audiocontext(); var audioinput = null, realaudioinput = null, inputpoint = null, audiorecorder = null; var rafid = null; var analysercontext = null; var canvaswidth, canvasheight; var recindex = 0; /* todo: - offer mono option - "monitor input" switch */ function saveaudio() { audiorecorder.exportwav( doneencoding ); // mono instead saying // audiorecorder.exportmonowav( doneencoding ); } function gotbuffers( buffers ) { var canvas = document.getelementbyid( "wavedisplay" ); drawbuffer( canvas.width, canvas.height, canvas.getcontext('2d'), buffers[0] ); // time gotbuffers called right after new recording completed - // here's should set download. audiorecorder.exportwav( doneencoding ); } function doneencoding( blob ) { recorder.setupdownload( blob, "myrecording" + ((recindex<10)?"0":"") + recindex + ".wav" ); recindex++; } function togglerecording( e ) { if (e.classlist.contains("recording")) { // stop recording audiorecorder.stop(); e.classlist.remove("recording"); audiorecorder.getbuffers( gotbuffers ); } else { // start recording if (!audiorecorder) return; e.classlist.add("recording"); audiorecorder.clear(); audiorecorder.record(); } } function converttomono( input ) { var splitter = audiocontext.createchannelsplitter(2); var merger = audiocontext.createchannelmerger(2); input.connect( splitter ); splitter.connect( merger, 0, 0 ); splitter.connect( merger, 0, 1 ); return merger; } function cancelanalyserupdates() { window.cancelanimationframe( rafid ); rafid = null; } function updateanalysers(time) { if (!analysercontext) { var canvas = document.getelementbyid("analyser"); canvaswidth = canvas.width; canvasheight = canvas.height; analysercontext = canvas.getcontext('2d'); } // analyzer draw code here { var spacing = 3; var bar_width = 1; var numbars = math.round(canvaswidth / spacing); var freqbytedata = new uint8array(analysernode.frequencybincount); analysernode.getbytefrequencydata(freqbytedata); analysercontext.clearrect(0, 0, canvaswidth, canvasheight); analysercontext.fillstyle = '#f6d565'; analysercontext.linecap = 'round'; var multiplier = analysernode.frequencybincount / numbars; // draw rectangle each frequency bin. (var = 0; < numbars; ++i) { var magnitude = 0; var offset = math.floor( * multiplier ); // gotta sum/average block, or miss narrow-bandwidth spikes (var j = 0; j< multiplier; j++) magnitude += freqbytedata[offset + j]; magnitude = magnitude / multiplier; var magnitude2 = freqbytedata[i * multiplier]; analysercontext.fillstyle = "hsl( " + math.round((i*360)/numbars) + ", 100%, 50%)"; analysercontext.fillrect(i * spacing, canvasheight, bar_width, -magnitude); } } rafid = window.requestanimationframe( updateanalysers ); } function togglemono() { if (audioinput != realaudioinput) { audioinput.disconnect(); realaudioinput.disconnect(); audioinput = realaudioinput; } else { realaudioinput.disconnect(); audioinput = converttomono( realaudioinput ); } audioinput.connect(inputpoint); } function gotstream(stream) { inputpoint = audiocontext.creategain(); // create audionode stream. realaudioinput = audiocontext.createmediastreamsource(stream); audioinput = realaudioinput; audioinput.connect(inputpoint); // audioinput = converttomono( input ); analysernode = audiocontext.createanalyser(); analysernode.fftsize = 2048; inputpoint.connect( analysernode ); audiorecorder = new recorder( inputpoint ); zerogain = audiocontext.creategain(); zerogain.gain.value = 0.0; inputpoint.connect( zerogain ); zerogain.connect( audiocontext.destination ); updateanalysers(); } function initaudio() { if (!navigator.getusermedia) navigator.getusermedia = navigator.webkitgetusermedia || navigator.mozgetusermedia; if (!navigator.cancelanimationframe) navigator.cancelanimationframe = navigator.webkitcancelanimationframe || navigator.mozcancelanimationframe; if (!navigator.requestanimationframe) navigator.requestanimationframe = navigator.webkitrequestanimationframe || navigator.mozrequestanimationframe; navigator.getusermedia( { "audio": { "mandatory": { "googechocancellation": "false", "googautogaincontrol": "false", "goognoisesuppression": "false", "googhighpassfilter": "false" }, "optional": [] } }, gotstream, function(e) { alert('error getting audio'); console.log(e); }); } window.addeventlistener('load', initaudio );
stt/record/static/record/style.css:
html { overflow: hidden; } body { font: 14pt arial, sans-serif; background: lightgrey; display: flex; flex-direction: column; height: 100vh; width: 100%; margin: 0 0; } canvas { display: inline-block; background: #202020; width: 95%; height: 45%; box-shadow: 0 0 10px blue; } #controls { display: flex; flex-direction: row; align-items: center; justify-content: space-around; height: 20%; width: 100%; } #record { height: 15vh; } #record.recording { background: red; } #save, #save img { height: 10vh; } #save { opacity: 0.25;} #save[download] { opacity: 1;} #viz { height: 80%; width: 100%; display: flex; flex-direction: column; justify-content: space-around; align-items: center; } @media (orientation: landscape) { body { flex-direction: row;} #controls { flex-direction: column; height: 100%; width: 10%;} #viz { height: 100%; width: 90%;} }
stt/record/templates/record/index.html:
{% load static %} <!doctype html> <html> <head> <meta name="viewport" content="width=device-width,initial-scale=1"> <title>audio recorder</title> <link rel="stylesheet" href="{% static 'record/style.css' %}"/> <script src="{% static 'record/js/audiodisplay.js' %}"></script> <script src="{% static 'record/js/recorderjs/recorder.js' %}"></script> <script src="{% static 'record/js/main.js' %}"></script> </head> <body> <div id="viz"> <canvas id="analyser" width="1024" height="500"></canvas> <canvas id="wavedisplay" width="1024" height="500"></canvas> </div> <div id="controls"> <img id="record" src="{% static 'record/img/mic128.png' %}" onclick="togglerecording(this);"> <a id="save" href="#"><img src="{% static 'record/img/save.svg' %}"></a> </div> </body> </html>
stt/record/apps.py:
from django.apps import appconfig class recordconfig(appconfig): name = 'record'
stt/record/urls.py:
from django.conf.urls import url . import views urlpatterns = [ url(r'^$', views.index, name='index'), ]
stt/record/views.py:
from django.shortcuts import render pydub import audiosegment glob import glob math import ceil os.path import basename, splitext, exists import json import requests import csv stt.settings import watson_username, watson_password, watson_endpoint, watson_default_params, \ watson_default_headers def index(request): if request.method == 'get': return render(request, 'record/index.html') # post # via: http://www.propublica.org/podcast/item/how-a-reporter-pierced-the-hype-behind-theranos/ download_url = 'https://api.soundcloud.com/tracks/247345268/download?client_id=cua40o3jg3emvp6tv4u6ymyyo50nugpj' audio_filename = 'podcast.mp3' audio_segment_seconds = 300 if not exists(audio_filename): print("downloading from", download_url) resp = requests.get(download_url) open(audio_filename, 'wb') w: w.write(resp.content) print("wrote audio file to", audio_filename) # convert wav audio = audiosegment.from_mp3(audio_filename) xs = 0 while xs < audio.duration_seconds: ys = min(xs + audio_segment_seconds, ceil(audio.duration_seconds)) fname = str(xs).rjust(5, '0') + '-' + str(ys).rjust(5, '0') + '.wav' audio[xs * 1000:ys * 1000].export(fname, format='wav') print("saved", fname) xs = ys # transcribe each wav watson fname in glob("*.wav"): # download watson's response tname = splitext(basename(fname))[0] + '.json' if exists(tname): print("already transcribed", tname) else: print("transcribing", fname) open(fname, 'rb') r: watson_response = requests.post( watson_endpoint, data=r, auth=(watson_username, watson_password), params=watson_default_params, headers=watson_default_headers, stream=false ) open(tname, 'w') w: w.write(watson_response.text) print("wrote transcript to", tname) # print out raw transcript , word csv rawfile = open("raw.txt", "w") wordsfile = open("words.csv", "w") csvfile = csv.writer(wordsfile) csvfile.writerow(['word', 'confidence', 'start', 'end']) fname in sorted(glob("*.json")): open(fname, 'r') f: results = json.load(f)['results'] linenum, result in enumerate(results): # each result line if result.get('alternatives'): # each result may have many alternatives # pick best alternative lineobj = result.get('alternatives')[0] # rawfile.writeline(lineobj['transcript']) word_timestamps = lineobj['timestamps'] if word_timestamps: rawfile.write(lineobj['transcript'] + "\n") word_confidences = lineobj['word_confidence'] idx, wordts in enumerate(word_timestamps): txt, tstart, tend = wordts confidence = round(100 * word_confidences[idx][1]) csvfile.writerow([txt, confidence, tstart, tend]) rawfile.close() wordsfile.close()
in views.py file need able print rawfile text onto new html result page should open after user has recorded message , uploaded watson.
Comments
Post a Comment