Swift Speech framework transcription Array always contains one transcription only - ios

I`m using Speech framework to transcript user response in a quiz game. We present a image of an animal and the user have to say the name of the animal. When I evaluate user response... "result.transcriptions" only have 1 transcription, the same as "result.bestTranscription". Are there any solution to get multiple response options?
Here is my Code
if recognitionTask != nil {
recognitionTask?.finish()
recognitionTask = nil
}
do {
try AVAudioSession.sharedInstance().setCategory(.playAndRecord, mode: .default, options: .mixWithOthers)
try AVAudioSession.sharedInstance().overrideOutputAudioPort(AVAudioSession.PortOverride.speaker)
try AVAudioSession.sharedInstance().setActive(true, options: .notifyOthersOnDeactivation)
} catch {
print("audioSession properties weren't set because of an error.")
}
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
let inputNode = audioEngine.inputNode
guard let recognitionRequest = recognitionRequest else {
fatalError("Unable to create an SFSpeechAudioBufferRecognitionRequest object")
}
recognitionRequest.shouldReportPartialResults = true
recognitionTask = speechRecognizer?.recognitionTask(with: recognitionRequest, resultHandler: { [weak self] result, error in
if let error = error {
debugPrint(error)
}
guard let result = result else { return }
if result.bestTranscription.formattedString.lowercased() == self.searchTerm.lowercased() {
found = true
self.response = result.bestTranscription.formattedString
} else {
result.transcriptions.forEach { transcription in
if transcription.formattedString.lowercased() == self.searchTerm.lowercased() {
self.response = transcription.formattedString
found = true
}
debugPrint("Transcription")
debugPrint(transcription.formattedString)
}
})

Related

How to terminate speech recognition when user stop speaking with swift 5

I'm trying to make user speak and get what user says right. I read like 20 different articles about speech recognition and almost all is same. It keeps listening to user for like 1 minute or more. I want it to stop recognation when user stop speaking. I want to catch a word/few words that user says. Is there something limiting the time that user speak?
My code block :
func recordAndRecognizeSpeech(){
if recognitionTask != nil {
recognitionTask?.cancel()
recognitionTask = nil
}
let audioSession = AVAudioSession.sharedInstance()
do {
try audioSession.setCategory(.record, mode: .measurement, options: .duckOthers)
try audioSession.setActive(true, options: .notifyOthersOnDeactivation)
} catch {
print("audioSession properties weren't set because of an error.")
}
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
let node = audioEngine.inputNode
guard let request = recognitionRequest else {
fatalError("Unable to create an SFSpeechAudioBufferRecognitionRequest object")
}
//request.shouldReportPartialResults = true
// Setting requiresOnDeviceRecognition to false would use the Apple Cloud for speech recognition.
if speechRecognizer?.supportsOnDeviceRecognition ?? false{
request.requiresOnDeviceRecognition = true
}
guard let myRecognizer = SFSpeechRecognizer() else {
// A recognizer is not supported for the current locale
return
}
if !myRecognizer.isAvailable {
// A recognizer is not available now
return
}
recognitionTask = speechRecognizer?.recognitionTask(with: request, resultHandler: { result, error in
if let result = result {
DispatchQueue.main.async {
let bestString = result.bestTranscription.formattedString
print(bestString)
}
} else if let error = error {
print(error)
self.audioEngine.stop()
node.removeTap(onBus: 0)
self.recognitionRequest = nil
self.recognitionTask = nil
self.speakButton.isEnabled = true
}
})
let recordingFormat = node.outputFormat(forBus: 0)
node.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat){buffer,_ in
self.recognitionRequest!.append(buffer)
}
audioEngine.prepare()
do {
try audioEngine.start()
} catch {
return print(error)
}
}
You can check the power of the sound input, and if it reach a minimum value, start a timer (like 3 seconds), and stop after the timer fire.
var recorder: AVAudioRecorder?
recorder.updateMeters()
let dB = recorder.averagePower(forChannel: 0)

nonstop iOS speech recognition?

In one of my application I am using the Speech framework for converting user's voice into Text.
Basically I want my application hands-free with some commands It can operate.
So there is a limit of Apple that has only 1000 request per hour and SFSpeechRecognitionTask only last about 1 minute only.
I want SFSpeechRecognitionTask should make alive and keep recognise the voice.
So what is the best way we can do with the code. Is it too much battery gain If I will do restart SFSpeechRecognitionTask in every 1 min?
I have done code like below to start detecting voice and it's going to stop after 1 minute.
Please help me out if there will be a way to achieve it.
func startRecording() {
if recognitionTask != nil {
recognitionTask?.cancel()
recognitionTask = nil
}
let audioSession = AVAudioSession.sharedInstance()
do {
try audioSession.setCategory(AVAudioSessionCategoryRecord)
try audioSession.setMode(AVAudioSessionModeMeasurement)
try audioSession.setActive(true, with: .notifyOthersOnDeactivation)
} catch {
print("audioSession properties weren't set because of an error.")
}
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
guard let inputNode = audioEngine.inputNode else {
fatalError("Audio engine has no input node")
}
guard let recognitionRequest = recognitionRequest else {
fatalError("Unable to create an SFSpeechAudioBufferRecognitionRequest object")
}
recognitionRequest.shouldReportPartialResults = true
recognitionTask = speechRecognizer?.recognitionTask(with: recognitionRequest, resultHandler: { (result, error) in
var isFinal = false
if result != nil {
if self.speechTimer != nil
{
if (self.speechTimer?.isValid)!
{
self.speechTimer?.invalidate()
}
self.speechTimer = nil;
}
print(result?.bestTranscription.formattedString as Any)
self.speechTimer = Timer.scheduledTimer(withTimeInterval: 2.0, repeats: false, block: { (timer) in
print("Recognition task restart")
})
isFinal = (result?.isFinal)!
if isFinal {
print("Final String: \(result?.bestTranscription.formattedString ?? "No string")")
}
}
})
let recordingFormat = inputNode.outputFormat(forBus: 0)
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer, when) in
self.recognitionRequest?.append(buffer)
}
audioEngine.prepare()
do {
try audioEngine.start()
} catch {
print("audioEngine couldn't start because of an error.")
}
}

Video Not Playing After SpeechRecognizer

Video not playing after SpeechRecognizer. Not getting any error just stuck on AVPlayerViewController. I have stopped speechRecognizer also. then after I am trying to play video. The video perfectly plays before speechRecognizer.
Maybe that possible speechRecognizer is not stopping by this code. So, Maybe the problem is in stopRecording().
#IBAction func btnRecord(_ sender: Any) {
player.pause()
player.seek(to: CMTime.init(value: 0, timescale: player.currentTime().timescale))
if self.audioEngine.isRunning {
self.audioEngine.stop()
self.recognitionRequest?.endAudio()
}
else {
try! self.startRecording()
}
}
private func startRecording() throws {
// Cancel the previous task if it's running.
if let recognitionTask = recognitionTask {
recognitionTask.cancel()
self.recognitionTask = nil
}
let audioSession = AVAudioSession.sharedInstance()
try audioSession.setCategory(AVAudioSession.Category.record, mode: .default, options: [])
try audioSession.setMode(AVAudioSession.Mode.measurement)
try audioSession.setActive(true, options: .notifyOthersOnDeactivation)
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
let inputNode = audioEngine.inputNode
//else { fatalError("Audio engine has no input node") }
guard let recognitionRequest = recognitionRequest else { fatalError("Unable to created a SFSpeechAudioBufferRecognitionRequest object") }
// Configure request so that results are returned before audio recording is finished
recognitionRequest.shouldReportPartialResults = true
// A recognition task represents a speech recognition session.
// We keep a reference to the task so that it can be cancelled.
recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in
var isFinal = false
if let result = result {
self.text = result.bestTranscription.formattedString
self.lblText.text = self.text
isFinal = result.isFinal
}
if error != nil || isFinal {
self.audioEngine.stop()
inputNode.removeTap(onBus: 0)
self.recognitionRequest = nil
self.recognitionTask = nil
}
}
let recordingFormat = inputNode.outputFormat(forBus: 0)
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
self.recognitionRequest?.append(buffer)
}
audioEngine.prepare()
try audioEngine.start()
}
private func stopRecording() {
audioEngine.stop()
recognitionRequest?.endAudio()
if let recognitionTask = recognitionTask {
recognitionTask.cancel()
self.recognitionTask = nil
}
}
#IBAction func btnDonePopup(_ sender: Any) {
self.stopRecording()
self.playVideo()
}
Please change audioSession.setCategory to default value:
if error != nil || isFinal {
self.audioEngine.stop()
inputNode.removeTap(onBus: 0)
self.recognitionRequest = nil
self.recognitionTask = nil
do {
try audioSession.setCategory(.soloAmbient, mode: .measurement, options: [])
} catch { }
}

Speech to text error - Swift 4

I am working on an app to convert the speech to text, for some reasons am getting the following error.
Utility] +[AFAggregator logDictationFailedWithError:] Error Domain=kAFAssistantErrorDomain Code=203 "Corrupt" UserInfo={NSLocalizedDescription=Corrupt, NSUnderlyingError=0x60c000253c50 {Error Domain=SiriSpeechErrorDomain Code=102 "(null)"}}
Please find the below code.
import Foundation
import Speech
protocol SpeechManagerDelegate {
func didReceiveText(text:String)
func didStartedListening(status:Bool)
}
class SpeechManager {
lazy var speechSynthesizer = AVSpeechSynthesizer()
let audioEngine = AVAudioEngine()
let speechRecognizer: SFSpeechRecognizer? = SFSpeechRecognizer()
var request = SFSpeechAudioBufferRecognitionRequest()
var recognitionTask: SFSpeechRecognitionTask?
let audioSession = AVAudioSession.sharedInstance()
var delegate:SpeechManagerDelegate?
static let shared:SpeechManager = {
let instance = SpeechManager()
return instance
}()
func startRecording() {
if recognitionTask != nil {
recognitionTask?.cancel()
recognitionTask = nil
}
do {
try audioSession.setCategory(AVAudioSessionCategoryRecord)
try audioSession.setMode(AVAudioSessionModeMeasurement)
try audioSession.setActive(true, with: .notifyOthersOnDeactivation)
} catch {
print("audioSession properties weren't set because of an error.")
}
request = SFSpeechAudioBufferRecognitionRequest()
guard let inputNode = audioEngine.inputNode as? AVAudioInputNode else {
fatalError("Audio engine has no input node")
}
guard let recognitionRequest = request as? SFSpeechAudioBufferRecognitionRequest else {
fatalError("Unable to create an SFSpeechAudioBufferRecognitionRequest object")
}
recognitionRequest.shouldReportPartialResults = true
recognitionTask = speechRecognizer?.recognitionTask(with: recognitionRequest, resultHandler: { (result, error) in
var isFinal = false
if result != nil {
self.delegate?.didReceiveText(text: (result?.bestTranscription.formattedString)!)
isFinal = (result?.isFinal)!
}
if error != nil || isFinal {
self.audioEngine.stop()
inputNode.removeTap(onBus: 0)
}
})
let recordingFormat = inputNode.outputFormat(forBus: 0)
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer, when) in
self.request.append(buffer)
}
audioEngine.prepare()
do {
try audioEngine.start()
} catch {
print("audioEngine couldn't start because of an error.")
}
delegate?.didStartedListening(status: true)
}
func stopRecording() {
if audioEngine.isRunning {
audioEngine.stop()
request.endAudio()
audioEngine.inputNode.removeTap(onBus: 0)
}
}
func keepRecording() {
if audioEngine.isRunning {
return
} else {
startRecording()
}
}
func isRecoding() -> Bool {
if audioEngine.isRunning {
return true
} else {
return false
}
}
func speak(text: String) {
do {
try audioSession.setCategory(AVAudioSessionCategoryPlayback)
try audioSession.setMode(AVAudioSessionModeMeasurement)
try audioSession.setActive(true, with: .notifyOthersOnDeactivation)
} catch {
print("audioSession properties weren't set because of an error.")
}
let speechUtterance = AVSpeechUtterance(string: text)
speechSynthesizer.speak(speechUtterance)
}
}
recognitonTask result returns nil. Any help appreciate.

Swift - Stop speech recognition on no talk [iOS 10]

I am working on an app that uses the new Speech framework in ios 10 to do some speech-to-text stuff. What is the best way of stopping the recognition when the user stops talking?
private func startRecording() {
isRecording = true
if let recognitionTask = recognitionTask {
recognitionTask.cancel()
self.recognitionTask = nil
}
let audioSession = AVAudioSession.sharedInstance()
do {
try audioSession.setCategory(AVAudioSessionCategoryRecord, mode: AVAudioSessionModeMeasurement)
try audioSession.setActive(true, with: .notifyOthersOnDeactivation)
} catch {
print("audioSession properties weren't set because of an error.")
return
}
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
guard let inputNode = audioEngine.inputNode else {
fatalError("Audio engine has no input node")
}
guard let recognitionRequest = recognitionRequest else {
fatalError("Unable to create an SFSpeechAudioBufferRecognitionRequest object")
}
recognitionRequest.shouldReportPartialResults = true
recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest, resultHandler: { (result, error) in
if let result = result {
if error != nil || result.isFinal {
self.audioEngine.stop()
inputNode.removeTap(onBus: 0)
self.recognitionRequest = nil
self.recognitionTask = nil
let questionText = result.bestTranscription.formattedString
isRecording = false
self.audioEngine.stop()
recognitionRequest.endAudio()
self.audioEngine.inputNode?.removeTap(onBus: 0)
}
}
})
let recordingFormat = inputNode.outputFormat(forBus: 0)
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
self.recognitionRequest?.append(buffer)
}
audioEngine.prepare()
try! audioEngine.start()
}
I want this code to be called once user does not talk
private func stopRecording() {
isRecording = false
audioEngine.stop()
recognitionRequest?.endAudio()
audioEngine.inputNode?.removeTap(onBus: 0)
}

Resources