|
| 1 | +// Copyright (c) 2023-2024 Xiaomi Corporation (authors: Fangjun Kuang) |
| 2 | +// |
| 3 | +const portAudio = require('naudiodon2'); |
| 4 | +// console.log(portAudio.getDevices()); |
| 5 | + |
| 6 | +const sherpa_onnx = require('sherpa-onnx-node'); |
| 7 | + |
| 8 | +function createOnlineRecognizer() { |
| 9 | + const config = { |
| 10 | + 'featConfig': { |
| 11 | + 'sampleRate': 16000, |
| 12 | + 'featureDim': 80, |
| 13 | + }, |
| 14 | + 'modelConfig': { |
| 15 | + 'zipformer2Ctc': { |
| 16 | + 'model': |
| 17 | + './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx', |
| 18 | + }, |
| 19 | + 'tokens': |
| 20 | + './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/tokens.txt', |
| 21 | + 'numThreads': 2, |
| 22 | + 'provider': 'cpu', |
| 23 | + 'debug': 1, |
| 24 | + }, |
| 25 | + 'ctcFstDecoderConfig': { |
| 26 | + 'graph': './sherpa-onnx-streaming-zipformer-ctc-small-2024-03-18/HLG.fst', |
| 27 | + }, |
| 28 | + 'enableEndpoint': true, |
| 29 | + 'rule1MinTrailingSilence': 2.4, |
| 30 | + 'rule2MinTrailingSilence': 1.2, |
| 31 | + 'rule3MinUtteranceLength': 20 |
| 32 | + }; |
| 33 | + |
| 34 | + return new sherpa_onnx.OnlineRecognizer(config); |
| 35 | +} |
| 36 | + |
| 37 | +const recognizer = createOnlineRecognizer(); |
| 38 | +const stream = recognizer.createStream(); |
| 39 | + |
| 40 | +let lastText = ''; |
| 41 | +let segmentIndex = 0; |
| 42 | + |
| 43 | +const ai = new portAudio.AudioIO({ |
| 44 | + inOptions: { |
| 45 | + channelCount: 1, |
| 46 | + closeOnError: true, // Close the stream if an audio error is detected, if |
| 47 | + // set false then just log the error |
| 48 | + deviceId: -1, // Use -1 or omit the deviceId to select the default device |
| 49 | + sampleFormat: portAudio.SampleFormatFloat32, |
| 50 | + sampleRate: recognizer.config.featConfig.sampleRate |
| 51 | + } |
| 52 | +}); |
| 53 | + |
| 54 | +const display = new sherpa_onnx.Display(50); |
| 55 | + |
| 56 | +ai.on('data', data => { |
| 57 | + const samples = new Float32Array(data.buffer); |
| 58 | + |
| 59 | + stream.acceptWaveform( |
| 60 | + {sampleRate: recognizer.config.featConfig.sampleRate, samples: samples}); |
| 61 | + |
| 62 | + while (recognizer.isReady(stream)) { |
| 63 | + recognizer.decode(stream); |
| 64 | + } |
| 65 | + |
| 66 | + const isEndpoint = recognizer.isEndpoint(stream); |
| 67 | + const text = recognizer.getResult(stream).text.toLowerCase(); |
| 68 | + |
| 69 | + if (text.length > 0 && lastText != text) { |
| 70 | + lastText = text; |
| 71 | + display.print(segmentIndex, lastText); |
| 72 | + } |
| 73 | + if (isEndpoint) { |
| 74 | + if (text.length > 0) { |
| 75 | + lastText = text; |
| 76 | + segmentIndex += 1; |
| 77 | + } |
| 78 | + recognizer.reset(stream) |
| 79 | + } |
| 80 | +}); |
| 81 | + |
| 82 | +ai.on('close', () => { |
| 83 | + console.log('Free resources'); |
| 84 | + stream.free(); |
| 85 | + recognizer.free(); |
| 86 | +}); |
| 87 | + |
| 88 | +ai.start(); |
| 89 | +console.log('Started! Please speak') |
0 commit comments