index.html

<html>

<head>
    <title>Ah-um-like-so-cool detector</title>

    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <meta name="description" content="Real-time voice recognition app to help speakers reduce their use of filler words.">
    <meta name="author" content="Zack Akil">
    <meta name="keywords"
        content="Real-time voice recognition app to help speakers reduce their use of filler words machine-learning vue tensorflow voice-recognition tensor techable-machine">

    <script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs@1.3.1/dist/tf.min.js"></script>
    <script
        src="https://cdn.jsdelivr.net/npm/@tensorflow-models/speech-commands@0.4.0/dist/speech-commands.min.js"></script>

    <script src="https://cdn.jsdelivr.net/npm/easytimer@1.1.1/dist/easytimer.min.js"></script>

    <script src="https://cdn.jsdelivr.net/npm/vue@2.6.14/dist/vue.js"></script>

    <link rel="preconnect" href="https://fonts.googleapis.com">
    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
    <link href="https://fonts.googleapis.com/css2?family=Nunito:wght@300;400&family=Sedgwick+Ave&display=swap"
        rel="stylesheet">


    <style>
        body {
            font-family: 'Nunito', sans-serif;
            text-align: center;
            zoom: 200%;
        }

        h1 {
            font-family: 'Sedgwick Ave', cursive;
            margin: 0;
        }

        h1>span {
            font-family: 'Nunito', sans-serif;
            color: #319add;
            ;
            font-size: 1.1em;
        }

        .word {
            vertical-align: middle;
            background-color: rgb(250 250 250);
            border: solid #319add 2px;
            height: 60px;
            width: 60px;
            border-radius: 100%;
            display: inline-block;
            margin: 5px;

        }

        .word>div {
            display: flex;
            flex-direction: column;
            justify-content: center;
            height: 100%;
            width: 100%;
        }


        div>p {
            margin: 0;

        }

        .word_contents {
            font-size: 100%;

        }

        button {
            background-color: #319add;
            color: white;
            border: #319add solid 2px;
            border-radius: 5px;
        }

        button:hover {
            cursor: pointer;
            background-color: white;
            color: #319add;
            border: #319add solid 2px;
        }

        #intro {
            margin: 10px auto;
            max-width: 378px;
        }

        #bias{
            font-size: 0.5em;
        }
    </style>

</head>


<body>

    <div id="app">

        <h1>Ah-um-like-so-cool <span>detector*</span></h1>

        <p id="intro">Real-time voice recognition app to help speakers reduce their use of "filler words".
            <br><br>
            <a href="https://twitter.com/ZackAkil" target="_blank">@ZackAkil</a> | 
            <a href="https://github.com/ZackAkil/Ah-um-like-so-cool-detector" target="_blank">Github repo</a>
        </p>
        <br>


        <button type="button" onclick="init()">Start</button>
        <button type="button" onclick="stop()">Stop</button>
        <button type="button" onclick="reset()">Reset</button>

        <h4>{{time}}</h4>

        <div id="filler-words">
            <div class="word" v-for="word in trigger_words"
                v-bind:style="{ width: (counts[word] * 8) + 60 + 'px', height: (counts[word] * 8) + 60 + 'px' }">
                <div>
                    <div class="word_contents">
                        <p>{{word}}</p>
                        <p>{{counts[word]}}</p>
                    </div>
                </div>
            </div>
        </div>

        <br><br>

        <button type="button" onclick="export_stats()">Export Stats</button>

        <br><br><br><br>

        <p id="bias">*The model used in this demo was trained on a muddled Northern Irish male accent. It might help to try your best impersonation.</p>

    </div>

    <script type="text/javascript">

        const COUNT_THRESHOLD = 0.5

        var app = new Vue({
            el: '#app',
            data: {
                message: 'Hello Vue!',
                labels_list: ["Background Noise", "ah-um", "cool", "normal speaking", "so"],
                trigger_words: ["ah-um", "cool", "so"],
                counts: {},
                time: "00:00:00"
            }
        })

        var trigger_word_index_map = {}
        app.trigger_words.forEach(word => {
            trigger_word_index_map[word] = app.labels_list.indexOf(word)
            app.counts[word] = 0
        })

        var record
        init_record()


        const timer = new Timer()


        timer.addEventListener('secondsUpdated', function (e) {
            app.time = timer.getTimeValues().toString()
        });


        // more documentation available at
        // https://github.com/tensorflow/tfjs-models/tree/master/speech-commands

        // the link to your model provided by Teachable Machine export panel
        const URL = document.location.href + "tm-my-audio-model/";

        async function createModel() {
            const checkpointURL = URL + "model.json"; // model topology
            const metadataURL = URL + "metadata.json"; // model metadata

            const recognizer = speechCommands.create(
                "BROWSER_FFT", // fourier transform type, not useful to change
                undefined, // speech commands vocabulary feature, not useful for your models
                checkpointURL,
                metadataURL);

            // check that model and metadata are loaded via HTTPS requests.
            await recognizer.ensureModelLoaded();

            return recognizer;
        }

        var recognizer


        async function init() {

            timer.start()

            recognizer = await createModel();

            recognizer.listen(result => {

                const scores = result.scores; // probability of prediction for each class

                // let cool_score = result.scores[1].toFixed(2)
                // let so_score = result.scores[3].toFixed(2)

                let trigger_word_said = false


                app.trigger_words.forEach(word => {

                    let word_index = trigger_word_index_map[word]

                    if (result.scores[word_index] > COUNT_THRESHOLD) {
                        app.counts[word]++
                        trigger_word_said = true

                        // create record row
                        let row = new Array(app.trigger_words.length).fill(0)
                        row[app.trigger_words.indexOf(word)] = 1

                        record.push([timer.getTimeValues().toString(), ...row])
                        new Audio('buzz.mp3').play()
                    }
                })

                if (!trigger_word_said) {
                    let row = new Array(app.trigger_words.length).fill(0)
                    record.push([timer.getTimeValues().toString(), ...row])
                }

            }, {
                includeSpectrogram: true, // in case listen should return result.spectrogram
                probabilityThreshold: 0.75,
                invokeCallbackOnNoiseAndUnknown: true,
                overlapFactor: 0.50 // probably want between 0.5 and 0.75. More info in README
            });
        }

        async function stop() {
            recognizer.stopListening()
            timer.pause()
        }

        async function export_stats() {

            const rows = record

            let csvContent = "data:text/csv;charset=utf-8,";

            rows.forEach(function (rowArray) {
                let row = rowArray.join(",");
                csvContent += row + "\r\n";
            });

            var encodedUri = encodeURI(csvContent);
            window.open(encodedUri);
        }


        async function reset() {

            init_record()
            timer.stop()

            // reset counts
            app.trigger_words.forEach(word => {
                app.counts[word] = 0
            })

            app.time = "00:00:00"
            if (recognizer.streaming)
                recognizer.stopListening()
        }

        function init_record() {
            record = [['Time', ...app.trigger_words]]
        }


    </script>
</body>

</html>