I am working on an app to convert the speech to text, for some reasons am getting the following error.
Utility] +[AFAggregator logDictationFailedWithError:] Error Domain=kAFAssistantErrorDomain Code=203 "Corrupt" UserInfo={NSLocalizedDescription=Corrupt, NSUnderlyingError=0x60c000253c50 {Error Domain=SiriSpeechErrorDomain Code=102 "(null)"}}
Please find the below code.
import Foundation
import Speech
protocol SpeechManagerDelegate {
func didReceiveText(text:String)
func didStartedListening(status:Bool)
}
class SpeechManager {
lazy var speechSynthesizer = AVSpeechSynthesizer()
let audioEngine = AVAudioEngine()
let speechRecognizer: SFSpeechRecognizer? = SFSpeechRecognizer()
var request = SFSpeechAudioBufferRecognitionRequest()
var recognitionTask: SFSpeechRecognitionTask?
let audioSession = AVAudioSession.sharedInstance()
var delegate:SpeechManagerDelegate?
static let shared:SpeechManager = {
let instance = SpeechManager()
return instance
}()
func startRecording() {
if recognitionTask != nil {
recognitionTask?.cancel()
recognitionTask = nil
}
do {
try audioSession.setCategory(AVAudioSessionCategoryRecord)
try audioSession.setMode(AVAudioSessionModeMeasurement)
try audioSession.setActive(true, with: .notifyOthersOnDeactivation)
} catch {
print("audioSession properties weren't set because of an error.")
}
request = SFSpeechAudioBufferRecognitionRequest()
guard let inputNode = audioEngine.inputNode as? AVAudioInputNode else {
fatalError("Audio engine has no input node")
}
guard let recognitionRequest = request as? SFSpeechAudioBufferRecognitionRequest else {
fatalError("Unable to create an SFSpeechAudioBufferRecognitionRequest object")
}
recognitionRequest.shouldReportPartialResults = true
recognitionTask = speechRecognizer?.recognitionTask(with: recognitionRequest, resultHandler: { (result, error) in
var isFinal = false
if result != nil {
self.delegate?.didReceiveText(text: (result?.bestTranscription.formattedString)!)
isFinal = (result?.isFinal)!
}
if error != nil || isFinal {
self.audioEngine.stop()
inputNode.removeTap(onBus: 0)
}
})
let recordingFormat = inputNode.outputFormat(forBus: 0)
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer, when) in
self.request.append(buffer)
}
audioEngine.prepare()
do {
try audioEngine.start()
} catch {
print("audioEngine couldn't start because of an error.")
}
delegate?.didStartedListening(status: true)
}
func stopRecording() {
if audioEngine.isRunning {
audioEngine.stop()
request.endAudio()
audioEngine.inputNode.removeTap(onBus: 0)
}
}
func keepRecording() {
if audioEngine.isRunning {
return
} else {
startRecording()
}
}
func isRecoding() -> Bool {
if audioEngine.isRunning {
return true
} else {
return false
}
}
func speak(text: String) {
do {
try audioSession.setCategory(AVAudioSessionCategoryPlayback)
try audioSession.setMode(AVAudioSessionModeMeasurement)
try audioSession.setActive(true, with: .notifyOthersOnDeactivation)
} catch {
print("audioSession properties weren't set because of an error.")
}
let speechUtterance = AVSpeechUtterance(string: text)
speechSynthesizer.speak(speechUtterance)
}
}
recognitonTask result returns nil. Any help appreciate.
Related
This is working fine until I make a call using a twillo framework and again start listening. it is crashing with the following error
required condition is false: format.sampleRate == hwFormat.sampleRate'
it is crashing on this line
inputNode?.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) {[weak self] (buffer, when) in
self?.recognitionRequest?.append(buffer)
}
Here is the full code
class MySppechRecognizer: NSObject,SFSpeechRecognizerDelegate {
/// initalize the speech recognitior . it is a shared instace.
static let speechSharedInstance = MySppechRecognizer()
var isSppechRecognisationAvaible = true
var speechRecognizer:SFSpeechRecognizer? = nil
var audioSession = AVAudioSession.sharedInstance()
var audioEngine = AVAudioEngine()
var recognitionTask: SFSpeechRecognitionTask?
var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
var isFinalWord = false
var inputNode:AVAudioInputNode? = nil
var callBack:jimboSpeechCallBack? = nil
var isHotWordDetectedForApp = false
func setSpeechRec() {
if speechRecognizer == nil {
speechRecognizer = SFSpeechRecognizer(locale: kAppLocal)
speechRecognizer?.delegate = self
}
}
}
//MARK:- Delegate
func speechRecognizer(_ speechRecognizer: SFSpeechRecognizer, availabilityDidChange available: Bool) {
print("Availibility changes")
}
//MARK:- Audio engine
func startRecording(){
if recognitionTask != nil {
self.recognitionRequest?.endAudio()
recognitionTask?.cancel()
recognitionTask = nil
recognitionRequest = nil
inputNode?.reset()
inputNode?.removeTap(onBus: 0)
inputNode?.reset()
stopRecording()
}
do {
try audioSession.setCategory(AVAudioSession.Category.playAndRecord, mode: .measurement)
try audioSession.setMode(.measurement)
try audioSession.setPreferredSampleRate(44100)
try audioSession.setActive(true, options: .notifyOthersOnDeactivation)
} catch {
print("audioSession properties weren't set because of an error.")
}
inputNode = audioEngine.inputNode
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
recognitionRequest?.shouldReportPartialResults = true
recognitionTask = MySppechRecognizer.speechSharedInstance.speechRecognizer?.recognitionTask(with: recognitionRequest!, resultHandler: { (result, error) in
print("Result is===\(String(describing: result?.bestTranscription.formattedString))")
var isFinal = false
if result != nil {
isFinal = (result?.isFinal)!
self.isFinalWord = (result?.isFinal)!
}
if error != nil || isFinal {
self.audioEngine.stop()
self.inputNode?.removeTap(onBus: 0)
self.recognitionRequest = nil
self.recognitionTask = nil
}
if error != nil {
print(“Error === \(String(describing: error?.localizedDescription))")
self.isFinalWord = true
}
guard self.callBack == nil else {
self.callBack!(result,error)
return
}
})
let recordingFormat = inputNode?.outputFormat(forBus: 0)
inputNode?.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) {[weak self] (buffer, when) in
self?.recognitionRequest?.append(buffer)
}
audioEngine.prepare()
do {
try audioEngine.start()
} catch {
print("audioEngine couldn't start because of an error.")
}
}
/// to stop the audio session
func stopRecording() {
DispatchQueue.main.async {
if ((self.audioEngine.isRunning)){
self.recognitionRequest?.endAudio()
self.recognitionTask?.cancel()
self.recognitionTask = nil
self.recognitionRequest = nil
self.inputNode?.reset()
self.inputNode?.removeTap(onBus: 0)
self.inputNode?.reset()
self.audioEngine.inputNode.reset()
}
}
}
}
Video not playing after SpeechRecognizer. Not getting any error just stuck on AVPlayerViewController. I have stopped speechRecognizer also. then after I am trying to play video. The video perfectly plays before speechRecognizer.
Maybe that possible speechRecognizer is not stopping by this code. So, Maybe the problem is in stopRecording().
#IBAction func btnRecord(_ sender: Any) {
player.pause()
player.seek(to: CMTime.init(value: 0, timescale: player.currentTime().timescale))
if self.audioEngine.isRunning {
self.audioEngine.stop()
self.recognitionRequest?.endAudio()
}
else {
try! self.startRecording()
}
}
private func startRecording() throws {
// Cancel the previous task if it's running.
if let recognitionTask = recognitionTask {
recognitionTask.cancel()
self.recognitionTask = nil
}
let audioSession = AVAudioSession.sharedInstance()
try audioSession.setCategory(AVAudioSession.Category.record, mode: .default, options: [])
try audioSession.setMode(AVAudioSession.Mode.measurement)
try audioSession.setActive(true, options: .notifyOthersOnDeactivation)
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
let inputNode = audioEngine.inputNode
//else { fatalError("Audio engine has no input node") }
guard let recognitionRequest = recognitionRequest else { fatalError("Unable to created a SFSpeechAudioBufferRecognitionRequest object") }
// Configure request so that results are returned before audio recording is finished
recognitionRequest.shouldReportPartialResults = true
// A recognition task represents a speech recognition session.
// We keep a reference to the task so that it can be cancelled.
recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in
var isFinal = false
if let result = result {
self.text = result.bestTranscription.formattedString
self.lblText.text = self.text
isFinal = result.isFinal
}
if error != nil || isFinal {
self.audioEngine.stop()
inputNode.removeTap(onBus: 0)
self.recognitionRequest = nil
self.recognitionTask = nil
}
}
let recordingFormat = inputNode.outputFormat(forBus: 0)
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
self.recognitionRequest?.append(buffer)
}
audioEngine.prepare()
try audioEngine.start()
}
private func stopRecording() {
audioEngine.stop()
recognitionRequest?.endAudio()
if let recognitionTask = recognitionTask {
recognitionTask.cancel()
self.recognitionTask = nil
}
}
#IBAction func btnDonePopup(_ sender: Any) {
self.stopRecording()
self.playVideo()
}
Please change audioSession.setCategory to default value:
if error != nil || isFinal {
self.audioEngine.stop()
inputNode.removeTap(onBus: 0)
self.recognitionRequest = nil
self.recognitionTask = nil
do {
try audioSession.setCategory(.soloAmbient, mode: .measurement, options: [])
} catch { }
}
I am using Apple speech to recognize voice for one hour but Apple speech just recognize the voice one minute.
I read that I can make more the one request to recognize the voice more than one minute, but I don't know how.
here is my code
import UIKit
import Speech
public class ViewController: UIViewController, SFSpeechRecognizerDelegate {
// MARK: Properties
private let speechRecognizer = SFSpeechRecognizer(locale: Locale(identifier: "ar_SA"))!
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
private var recognitionTask: SFSpeechRecognitionTask?
private let audioEngine = AVAudioEngine()
#IBOutlet var textView : UITextView!
#IBOutlet var recordButton : UIButton!
var inString = ""
public override func viewDidLoad() {
super.viewDidLoad()
speechRecognizer.delegate = self
SFSpeechRecognizer.requestAuthorization { authStatus in
/*
The callback may not be called on the main thread. Add an
operation to the main queue to update the record button's state.
*/
OperationQueue.main.addOperation {
switch authStatus {
case .authorized:
print("Dalal")
case .denied:
print("Dalal2")
case .restricted:
print("Dalal3")
case .notDetermined:
print("Dalal4")
}
}
}
// Disable the record buttons until authorization has been granted.
try! startRecording()
}
private func startRecording() throws {
// Cancel the previous task if it's running.
if let recognitionTask = recognitionTask {
recognitionTask.cancel()
self.recognitionTask = nil
}
let audioSession = AVAudioSession.sharedInstance()
try audioSession.setCategory(AVAudioSessionCategoryRecord)
try audioSession.setMode(AVAudioSessionModeMeasurement)
try audioSession.setActive(true, with: .notifyOthersOnDeactivation)
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
guard let inputNode = audioEngine.inputNode else { fatalError("Audio engine has no input node") }
guard let recognitionRequest = recognitionRequest else { fatalError("Unable to created a SFSpeechAudioBufferRecognitionRequest object") }
// Configure request so that results are returned before audio recording is finished
recognitionRequest.shouldReportPartialResults = true
// A recognition task represents a speech recognition session.
// We keep a reference to the task so that it can be cancelled.
recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in
var isFinal = false
let fileName = "Test"
let dir = try? FileManager.default.url(for: .documentDirectory,
in: .userDomainMask, appropriateFor: nil, create: true)
// If the directory was found, we write a file to it and read it back
if let fileURL = dir?.appendingPathComponent(fileName).appendingPathExtension("txt") {
// Write to the file named Test
do {
if let result = result {
self.textView.text = result.bestTranscription.formattedString
isFinal = result.isFinal
}
try self.textView.text.write(to: fileURL, atomically: true, encoding: .utf8)
} catch {
print("Failed writing to URL: \(fileURL), Error: " + error.localizedDescription)
}
if error != nil || isFinal {
self.audioEngine.stop()
// self.addp()
inputNode.removeTap(onBus: 0)
self.recognitionRequest = nil
self.recognitionTask = nil
}
do {
self.inString = try String(contentsOf: fileURL)
} catch {
print("Failed reading from URL: \(fileURL), Error: " + error.localizedDescription)
}
print("Read from the file: \(self.inString)")
}
}
let recordingFormat = inputNode.outputFormat(forBus: 0)
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
self.recognitionRequest?.append(buffer)
}
audioEngine.prepare()
try audioEngine.start()
textView.text = "(listening)"
}
public func speechRecognizer(_ speechRecognizer: SFSpeechRecognizer, availabilityDidChange available: Bool) {
print("any text")
}
}//end class
any suggestions or help ?
thank you.
I am working on an app that uses the new Speech framework in ios 10 to do some speech-to-text stuff. What is the best way of stopping the recognition when the user stops talking?
private func startRecording() {
isRecording = true
if let recognitionTask = recognitionTask {
recognitionTask.cancel()
self.recognitionTask = nil
}
let audioSession = AVAudioSession.sharedInstance()
do {
try audioSession.setCategory(AVAudioSessionCategoryRecord, mode: AVAudioSessionModeMeasurement)
try audioSession.setActive(true, with: .notifyOthersOnDeactivation)
} catch {
print("audioSession properties weren't set because of an error.")
return
}
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
guard let inputNode = audioEngine.inputNode else {
fatalError("Audio engine has no input node")
}
guard let recognitionRequest = recognitionRequest else {
fatalError("Unable to create an SFSpeechAudioBufferRecognitionRequest object")
}
recognitionRequest.shouldReportPartialResults = true
recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest, resultHandler: { (result, error) in
if let result = result {
if error != nil || result.isFinal {
self.audioEngine.stop()
inputNode.removeTap(onBus: 0)
self.recognitionRequest = nil
self.recognitionTask = nil
let questionText = result.bestTranscription.formattedString
isRecording = false
self.audioEngine.stop()
recognitionRequest.endAudio()
self.audioEngine.inputNode?.removeTap(onBus: 0)
}
}
})
let recordingFormat = inputNode.outputFormat(forBus: 0)
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
self.recognitionRequest?.append(buffer)
}
audioEngine.prepare()
try! audioEngine.start()
}
I want this code to be called once user does not talk
private func stopRecording() {
isRecording = false
audioEngine.stop()
recognitionRequest?.endAudio()
audioEngine.inputNode?.removeTap(onBus: 0)
}
I am making an application that does Text-to-speech and speech-to-text.
The problem i am having right now is that text-to-speech works fine using AVSpeechSynthesizer. But after i record and do speech-to-text using SFSpeechRecognizer, the text-to-speech stops working (ie, doesn't talk back).
I am new to swift too. But i got this code from a couple of different tutorials and tried to merge them together.
Here's my code:
private var speechRecognizer = SFSpeechRecognizer(locale: Locale.init(identifier: "en-US"))!
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
private var recognitionTask: SFSpeechRecognitionTask?
private var audioEngine = AVAudioEngine()
#objc(speak:location:date:callback:)
func speak(name: String, location: String, date: NSNumber,_ callback: #escaping (NSObject) -> ()) -> Void {
let utterance = AVSpeechUtterance(string: name)
let synthesizer = AVSpeechSynthesizer()
synthesizer.speak(utterance)
}
#available(iOS 10.0, *)
#objc(startListening:location:date:callback:)
func startListening(name: String, location: String, date: NSNumber,_ callback: #escaping (NSObject) -> ()) -> Void {
if audioEngine.isRunning {
audioEngine.stop()
recognitionRequest?.endAudio()
} else {
if recognitionTask != nil { //1
recognitionTask?.cancel()
recognitionTask = nil
}
let audioSession = AVAudioSession.sharedInstance() //2
do {
try audioSession.setCategory(AVAudioSessionCategoryPlayAndRecord)
try audioSession.setMode(AVAudioSessionModeMeasurement)
try audioSession.setActive(true, with: .notifyOthersOnDeactivation)
} catch {
print("audioSession properties weren't set because of an error.")
}
recognitionRequest = SFSpeechAudioBufferRecognitionRequest() //3
guard let inputNode = audioEngine.inputNode else {
fatalError("Audio engine has no input node")
} //4
guard let recognitionRequest = recognitionRequest else {
fatalError("Unable to create an SFSpeechAudioBufferRecognitionRequest object")
} //5
recognitionRequest.shouldReportPartialResults = true //6
recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest, resultHandler: { (result, error) in //7
var isFinal = false //8
if result != nil {
print(result?.bestTranscription.formattedString) //9
isFinal = (result?.isFinal)!
}
if error != nil || isFinal { //10
self.audioEngine.stop()
inputNode.removeTap(onBus: 0)
self.recognitionRequest = nil
self.recognitionTask = nil
}
})
let recordingFormat = inputNode.outputFormat(forBus: 0) //11
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer, when) in
self.recognitionRequest?.append(buffer)
}
audioEngine.prepare() //12
do {
try audioEngine.start()
} catch {
print("audioEngine couldn't start because of an error.")
}
}
}
They both have an AVAudioSession.
For AVSpeechSynthesizer I suppose it has to be set to:
_audioSession.SetCategory(AVAudioSessionCategory.Playback,
AVAudioSessionCategoryOptions.MixWithOthers);
and For SFSpeechRecognizer:
_audioSession.SetCategory(AVAudioSessionCategory.PlayAndRecord,
AVAudioSessionCategoryOptions.MixWithOthers);
Hope it helps.