I am using Apple speech to recognize voice for one hour but Apple speech just recognize the voice one minute.
I read that I can make more the one request to recognize the voice more than one minute, but I don't know how.
here is my code
import UIKit
import Speech
public class ViewController: UIViewController, SFSpeechRecognizerDelegate {
// MARK: Properties
private let speechRecognizer = SFSpeechRecognizer(locale: Locale(identifier: "ar_SA"))!
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
private var recognitionTask: SFSpeechRecognitionTask?
private let audioEngine = AVAudioEngine()
#IBOutlet var textView : UITextView!
#IBOutlet var recordButton : UIButton!
var inString = ""
public override func viewDidLoad() {
super.viewDidLoad()
speechRecognizer.delegate = self
SFSpeechRecognizer.requestAuthorization { authStatus in
/*
The callback may not be called on the main thread. Add an
operation to the main queue to update the record button's state.
*/
OperationQueue.main.addOperation {
switch authStatus {
case .authorized:
print("Dalal")
case .denied:
print("Dalal2")
case .restricted:
print("Dalal3")
case .notDetermined:
print("Dalal4")
}
}
}
// Disable the record buttons until authorization has been granted.
try! startRecording()
}
private func startRecording() throws {
// Cancel the previous task if it's running.
if let recognitionTask = recognitionTask {
recognitionTask.cancel()
self.recognitionTask = nil
}
let audioSession = AVAudioSession.sharedInstance()
try audioSession.setCategory(AVAudioSessionCategoryRecord)
try audioSession.setMode(AVAudioSessionModeMeasurement)
try audioSession.setActive(true, with: .notifyOthersOnDeactivation)
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
guard let inputNode = audioEngine.inputNode else { fatalError("Audio engine has no input node") }
guard let recognitionRequest = recognitionRequest else { fatalError("Unable to created a SFSpeechAudioBufferRecognitionRequest object") }
// Configure request so that results are returned before audio recording is finished
recognitionRequest.shouldReportPartialResults = true
// A recognition task represents a speech recognition session.
// We keep a reference to the task so that it can be cancelled.
recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest) { result, error in
var isFinal = false
let fileName = "Test"
let dir = try? FileManager.default.url(for: .documentDirectory,
in: .userDomainMask, appropriateFor: nil, create: true)
// If the directory was found, we write a file to it and read it back
if let fileURL = dir?.appendingPathComponent(fileName).appendingPathExtension("txt") {
// Write to the file named Test
do {
if let result = result {
self.textView.text = result.bestTranscription.formattedString
isFinal = result.isFinal
}
try self.textView.text.write(to: fileURL, atomically: true, encoding: .utf8)
} catch {
print("Failed writing to URL: \(fileURL), Error: " + error.localizedDescription)
}
if error != nil || isFinal {
self.audioEngine.stop()
// self.addp()
inputNode.removeTap(onBus: 0)
self.recognitionRequest = nil
self.recognitionTask = nil
}
do {
self.inString = try String(contentsOf: fileURL)
} catch {
print("Failed reading from URL: \(fileURL), Error: " + error.localizedDescription)
}
print("Read from the file: \(self.inString)")
}
}
let recordingFormat = inputNode.outputFormat(forBus: 0)
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer: AVAudioPCMBuffer, when: AVAudioTime) in
self.recognitionRequest?.append(buffer)
}
audioEngine.prepare()
try audioEngine.start()
textView.text = "(listening)"
}
public func speechRecognizer(_ speechRecognizer: SFSpeechRecognizer, availabilityDidChange available: Bool) {
print("any text")
}
}//end class
any suggestions or help ?
thank you.
Related
We are creating an online book reading app in which we are initiating video call (group call:- for video call. we are using agora SDK) and at the join of call we start book reading and highlight words at other members' end also and recording/recognition text we are using SFSpeechRecognizer but whenever call kit start and video call start SFSpeechRecognizer start to record audio at others end it's getting failed always, can you please provide any solution to record audio during the video call.
//
// Speech.swift
// Edsoma
//
// Created by Kapil on 16/02/22.
//
import Foundation
import AVFoundation
import Speech
protocol SpeechRecognizerDelegate {
func didSpoke(speechRecognizer : SpeechRecognizer , word : String?)
}
class SpeechRecognizer: NSObject {
private let speechRecognizer = SFSpeechRecognizer(locale: Locale.init(identifier: "en-US")) //1
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
private var recognitionTask: SFSpeechRecognitionTask?
private let audioEngine = AVAudioEngine()
var delegate : SpeechRecognizerDelegate?
static let shared = SpeechRecognizer()
var isOn = false
func setup(){
speechRecognizer?.delegate = self //3
SFSpeechRecognizer.requestAuthorization { (authStatus) in //4
var isButtonEnabled = false
switch authStatus { //5
case .authorized:
isButtonEnabled = true
case .denied:
isButtonEnabled = false
print("User denied access to speech recognition")
case .restricted:
isButtonEnabled = false
print("Speech recognition restricted on this device" )
case .notDetermined:
isButtonEnabled = false
print("Speech recognition not yet authorized")
#unknown default:
break;
}
OperationQueue.main.addOperation() {
// self.microphoneButton.isEnabled = isButtonEnabled
}
}
}
func transcribeAudio(url: URL) {
// create a new recognizer and point it at our audio
let recognizer = SFSpeechRecognizer()
let request = SFSpeechURLRecognitionRequest(url: url)
// start recognition!
recognizer?.recognitionTask(with: request) { [unowned self] (result, error) in
// abort if we didn't get any transcription back
guard let result = result else {
print("There was an error: \(error!)")
return
}
// if we got the final transcription back, print it
if result.isFinal {
// pull out the best transcription...
print(result.bestTranscription.formattedString)
}
}
}
func startRecording() {
isOn = true
let inputNode = audioEngine.inputNode
if recognitionTask != nil {
inputNode.removeTap(onBus: 0)
self.audioEngine.stop()
self.recognitionRequest = nil
self.recognitionTask = nil
DispatchQueue.main.asyncAfter(deadline: DispatchTime.now() + 1) {
self.startRecording()
}
return
debugPrint("****** recognitionTask != nil *************")
}
let audioSession = AVAudioSession.sharedInstance()
do {
try audioSession.setCategory(AVAudioSession.Category.multiRoute)
try audioSession.setMode(AVAudioSession.Mode.measurement)
try audioSession.setActive(true, options: .notifyOthersOnDeactivation)
} catch {
print("audioSession properties weren't set because of an error.")
}
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
guard let recognitionRequest = recognitionRequest else {
fatalError("Unable to create an SFSpeechAudioBufferRecognitionRequest object")
}
recognitionRequest.shouldReportPartialResults = true
recognitionRequest.taskHint = .search
recognitionTask = speechRecognizer?.recognitionTask(with: recognitionRequest, resultHandler: { (result, error) in
var isFinal = false
if result != nil {
self.delegate?.didSpoke(speechRecognizer: self, word: result?.bestTranscription.formattedString)
debugPrint(result?.bestTranscription.formattedString)
isFinal = (result?.isFinal)!
}
if error != nil {
debugPrint("Speech Error ====>",error)
inputNode.removeTap(onBus: 0)
self.audioEngine.stop()
self.recognitionRequest = nil
self.recognitionTask = nil
if BookReadingSettings.isSTTEnable{
DispatchQueue.main.asyncAfter(deadline: DispatchTime.now() + 1) {
self.startRecording()
}
}
// self.microphoneButton.isEnabled = true
}
})
// let recordingFormat = AVAudioFormat.init(commonFormat: .pcmFormatFloat32, sampleRate: <#T##Double#>, interleaved: <#T##Bool#>, channelLayout: <#T##AVAudioChannelLayout#>)//inputNode.outputFormat(forBus: 0)
inputNode.removeTap(onBus: 0)
let sampleRate = AVAudioSession.sharedInstance().sampleRate
let recordingFormat = AVAudioFormat(standardFormatWithSampleRate: sampleRate, channels: 1)
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer, when) in
self.recognitionRequest?.append(buffer)
}
audioEngine.prepare()
do {
try audioEngine.start()
} catch {
print("audioEngine couldn't start because of an error.")
}
debugPrint("Say something, I'm listening!")
//textView.text = "Say something, I'm listening!"
}
/* func stopRecording(){
isOn = false
debugPrint("Recording stoped")
self.audioEngine.stop()
recognitionTask?.cancel()
let inputNode = audioEngine.inputNode
inputNode.removeTap(onBus: 0)
self.recognitionRequest = nil
self.recognitionTask = nil
}*/
func stopRecording(){
isOn = false
debugPrint("Recording stoped")
let inputNode = audioEngine.inputNode
inputNode.removeTap(onBus: 0)
self.audioEngine.stop()
recognitionTask?.cancel()
self.recognitionRequest = nil
self.recognitionTask = nil
}
}
I am working on an app to convert the speech to text, for some reasons am getting the following error.
Utility] +[AFAggregator logDictationFailedWithError:] Error Domain=kAFAssistantErrorDomain Code=203 "Corrupt" UserInfo={NSLocalizedDescription=Corrupt, NSUnderlyingError=0x60c000253c50 {Error Domain=SiriSpeechErrorDomain Code=102 "(null)"}}
Please find the below code.
import Foundation
import Speech
protocol SpeechManagerDelegate {
func didReceiveText(text:String)
func didStartedListening(status:Bool)
}
class SpeechManager {
lazy var speechSynthesizer = AVSpeechSynthesizer()
let audioEngine = AVAudioEngine()
let speechRecognizer: SFSpeechRecognizer? = SFSpeechRecognizer()
var request = SFSpeechAudioBufferRecognitionRequest()
var recognitionTask: SFSpeechRecognitionTask?
let audioSession = AVAudioSession.sharedInstance()
var delegate:SpeechManagerDelegate?
static let shared:SpeechManager = {
let instance = SpeechManager()
return instance
}()
func startRecording() {
if recognitionTask != nil {
recognitionTask?.cancel()
recognitionTask = nil
}
do {
try audioSession.setCategory(AVAudioSessionCategoryRecord)
try audioSession.setMode(AVAudioSessionModeMeasurement)
try audioSession.setActive(true, with: .notifyOthersOnDeactivation)
} catch {
print("audioSession properties weren't set because of an error.")
}
request = SFSpeechAudioBufferRecognitionRequest()
guard let inputNode = audioEngine.inputNode as? AVAudioInputNode else {
fatalError("Audio engine has no input node")
}
guard let recognitionRequest = request as? SFSpeechAudioBufferRecognitionRequest else {
fatalError("Unable to create an SFSpeechAudioBufferRecognitionRequest object")
}
recognitionRequest.shouldReportPartialResults = true
recognitionTask = speechRecognizer?.recognitionTask(with: recognitionRequest, resultHandler: { (result, error) in
var isFinal = false
if result != nil {
self.delegate?.didReceiveText(text: (result?.bestTranscription.formattedString)!)
isFinal = (result?.isFinal)!
}
if error != nil || isFinal {
self.audioEngine.stop()
inputNode.removeTap(onBus: 0)
}
})
let recordingFormat = inputNode.outputFormat(forBus: 0)
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer, when) in
self.request.append(buffer)
}
audioEngine.prepare()
do {
try audioEngine.start()
} catch {
print("audioEngine couldn't start because of an error.")
}
delegate?.didStartedListening(status: true)
}
func stopRecording() {
if audioEngine.isRunning {
audioEngine.stop()
request.endAudio()
audioEngine.inputNode.removeTap(onBus: 0)
}
}
func keepRecording() {
if audioEngine.isRunning {
return
} else {
startRecording()
}
}
func isRecoding() -> Bool {
if audioEngine.isRunning {
return true
} else {
return false
}
}
func speak(text: String) {
do {
try audioSession.setCategory(AVAudioSessionCategoryPlayback)
try audioSession.setMode(AVAudioSessionModeMeasurement)
try audioSession.setActive(true, with: .notifyOthersOnDeactivation)
} catch {
print("audioSession properties weren't set because of an error.")
}
let speechUtterance = AVSpeechUtterance(string: text)
speechSynthesizer.speak(speechUtterance)
}
}
recognitonTask result returns nil. Any help appreciate.
the app I'm currently making in Swift will help blind people navigate the world using this one comprehensive solution. I am looking to make a generic function for the app that when called, will immediately start recording, listen for the user to say something, and once the user stops speaking, it will automatically stop recording, convert the recording to a string, and return it. This function should be usable more than once in a single view controller.
I have tried using the technique from this article and it didn't work: https://medium.com/ios-os-x-development/speech-recognition-with-swift-in-ios-10-50d5f4e59c48
The recorder will be collecting the name of a building or a room in a building, so it doesn't need to be recording for terribly long - even a set length of time of 5 seconds would work. I am hoping to use a framework like Speech or something with Siri, but I am not opposed to using an external framework like Watson if it works better. Please help!
There's a beautiful appcoda tutorial here, which fits this perfectly.
This is the code they used to update a text field with the speech results. It can't be too difficult to channel the text going in their text field into whatever variable/function you use to process the result.
//
// ViewController.swift
// Siri
//
// Created by Sahand Edrisian on 7/14/16.
// Copyright © 2016 Sahand Edrisian. All rights reserved.
//
import UIKit
import Speech
class ViewController: UIViewController, SFSpeechRecognizerDelegate {
#IBOutlet weak var textView: UITextView!
#IBOutlet weak var microphoneButton: UIButton!
private let speechRecognizer = SFSpeechRecognizer(locale: Locale.init(identifier: "en-US"))!
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
private var recognitionTask: SFSpeechRecognitionTask?
private let audioEngine = AVAudioEngine()
override func viewDidLoad() {
super.viewDidLoad()
microphoneButton.isEnabled = false
speechRecognizer.delegate = self
SFSpeechRecognizer.requestAuthorization { (authStatus) in
var isButtonEnabled = false
switch authStatus {
case .authorized:
isButtonEnabled = true
case .denied:
isButtonEnabled = false
print("User denied access to speech recognition")
case .restricted:
isButtonEnabled = false
print("Speech recognition restricted on this device")
case .notDetermined:
isButtonEnabled = false
print("Speech recognition not yet authorized")
}
OperationQueue.main.addOperation() {
self.microphoneButton.isEnabled = isButtonEnabled
}
}
}
#IBAction func microphoneTapped(_ sender: AnyObject) {
if audioEngine.isRunning {
audioEngine.stop()
recognitionRequest?.endAudio()
microphoneButton.isEnabled = false
microphoneButton.setTitle("Start Recording", for: .normal)
} else {
startRecording()
microphoneButton.setTitle("Stop Recording", for: .normal)
}
}
func startRecording() {
if recognitionTask != nil { //1
recognitionTask?.cancel()
recognitionTask = nil
}
let audioSession = AVAudioSession.sharedInstance() //2
do {
try audioSession.setCategory(AVAudioSessionCategoryRecord)
try audioSession.setMode(AVAudioSessionModeMeasurement)
try audioSession.setActive(true, with: .notifyOthersOnDeactivation)
} catch {
print("audioSession properties weren't set because of an error.")
}
recognitionRequest = SFSpeechAudioBufferRecognitionRequest() //3
guard let inputNode = audioEngine.inputNode else {
fatalError("Audio engine has no input node")
} //4
guard let recognitionRequest = recognitionRequest else {
fatalError("Unable to create an SFSpeechAudioBufferRecognitionRequest object")
} //5
recognitionRequest.shouldReportPartialResults = true //6
recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest, resultHandler: { (result, error) in //7
var isFinal = false //8
if result != nil {
self.textView.text = result?.bestTranscription.formattedString //9
isFinal = (result?.isFinal)!
}
if error != nil || isFinal { //10
self.audioEngine.stop()
inputNode.removeTap(onBus: 0)
self.recognitionRequest = nil
self.recognitionTask = nil
self.microphoneButton.isEnabled = true
}
})
let recordingFormat = inputNode.outputFormat(forBus: 0) //11
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer, when) in
self.recognitionRequest?.append(buffer)
}
audioEngine.prepare() //12
do {
try audioEngine.start()
} catch {
print("audioEngine couldn't start because of an error.")
}
textView.text = "Say something, I'm listening!"
}
func speechRecognizer(_ speechRecognizer: SFSpeechRecognizer, availabilityDidChange available: Bool) {
if available {
microphoneButton.isEnabled = true
} else {
microphoneButton.isEnabled = false
}
}
}
I am making an application that does Text-to-speech and speech-to-text.
The problem i am having right now is that text-to-speech works fine using AVSpeechSynthesizer. But after i record and do speech-to-text using SFSpeechRecognizer, the text-to-speech stops working (ie, doesn't talk back).
I am new to swift too. But i got this code from a couple of different tutorials and tried to merge them together.
Here's my code:
private var speechRecognizer = SFSpeechRecognizer(locale: Locale.init(identifier: "en-US"))!
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
private var recognitionTask: SFSpeechRecognitionTask?
private var audioEngine = AVAudioEngine()
#objc(speak:location:date:callback:)
func speak(name: String, location: String, date: NSNumber,_ callback: #escaping (NSObject) -> ()) -> Void {
let utterance = AVSpeechUtterance(string: name)
let synthesizer = AVSpeechSynthesizer()
synthesizer.speak(utterance)
}
#available(iOS 10.0, *)
#objc(startListening:location:date:callback:)
func startListening(name: String, location: String, date: NSNumber,_ callback: #escaping (NSObject) -> ()) -> Void {
if audioEngine.isRunning {
audioEngine.stop()
recognitionRequest?.endAudio()
} else {
if recognitionTask != nil { //1
recognitionTask?.cancel()
recognitionTask = nil
}
let audioSession = AVAudioSession.sharedInstance() //2
do {
try audioSession.setCategory(AVAudioSessionCategoryPlayAndRecord)
try audioSession.setMode(AVAudioSessionModeMeasurement)
try audioSession.setActive(true, with: .notifyOthersOnDeactivation)
} catch {
print("audioSession properties weren't set because of an error.")
}
recognitionRequest = SFSpeechAudioBufferRecognitionRequest() //3
guard let inputNode = audioEngine.inputNode else {
fatalError("Audio engine has no input node")
} //4
guard let recognitionRequest = recognitionRequest else {
fatalError("Unable to create an SFSpeechAudioBufferRecognitionRequest object")
} //5
recognitionRequest.shouldReportPartialResults = true //6
recognitionTask = speechRecognizer.recognitionTask(with: recognitionRequest, resultHandler: { (result, error) in //7
var isFinal = false //8
if result != nil {
print(result?.bestTranscription.formattedString) //9
isFinal = (result?.isFinal)!
}
if error != nil || isFinal { //10
self.audioEngine.stop()
inputNode.removeTap(onBus: 0)
self.recognitionRequest = nil
self.recognitionTask = nil
}
})
let recordingFormat = inputNode.outputFormat(forBus: 0) //11
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer, when) in
self.recognitionRequest?.append(buffer)
}
audioEngine.prepare() //12
do {
try audioEngine.start()
} catch {
print("audioEngine couldn't start because of an error.")
}
}
}
They both have an AVAudioSession.
For AVSpeechSynthesizer I suppose it has to be set to:
_audioSession.SetCategory(AVAudioSessionCategory.Playback,
AVAudioSessionCategoryOptions.MixWithOthers);
and For SFSpeechRecognizer:
_audioSession.SetCategory(AVAudioSessionCategory.PlayAndRecord,
AVAudioSessionCategoryOptions.MixWithOthers);
Hope it helps.
I'm trying to run Text To Speech (AVSpeechSynthesizer) along with Speech To Text from Siri Kit, but I'm stuck with it.
My TTS works perfectly until I run the code to execute the STT, after that my TTS doesn't work anymore. I debugged the code and during the executing of the code, no errors happen, but my text is not transforming to speech. I think somehow my STT is disabling the output microphone and that's why the TTS doesn't transform the text to speech anymore, well, that's just a theory. Ops: My TTS stops working, but my STT works perfectly
Any tips?
Here's my viewController's code:
#IBOutlet weak var microphoneButton: UIButton!
//text to speech
let speechSynthesizer = AVSpeechSynthesizer()
//speech to text
private var speechRecognizer: SFSpeechRecognizer!
private var recognitionRequest: SFSpeechAudioBufferRecognitionRequest?
private var recognitionTask: SFSpeechRecognitionTask?
private var audioEngine = AVAudioEngine()
#IBAction func textToSpeech(_ sender: Any) {
if let word = wordTextField.text{
if !speechSynthesizer.isSpeaking {
//get current dictionary
let dictionary = fetchSelectedDictionary()
//get current language
let language = languagesWithCodes[(dictionary?.language)!]
let speechUtterance = AVSpeechUtterance(string: word)
speechUtterance.voice = AVSpeechSynthesisVoice(language: language)
speechUtterance.rate = 0.4
//speechUtterance.pitchMultiplier = pitch
//speechUtterance.volume = volume
speechSynthesizer.speak(speechUtterance)
}
else{
speechSynthesizer.continueSpeaking()
}
}
}
#IBAction func speechToText(_ sender: Any) {
if audioEngine.isRunning {
audioEngine.stop()
recognitionRequest?.endAudio()
microphoneButton.isEnabled = false
microphoneButton.setTitle("Start Recording", for: .normal)
} else {
startRecording()
microphoneButton.setTitle("Stop Recording", for: .normal)
}
}
func startRecording() {
if recognitionTask != nil {
recognitionTask?.cancel()
recognitionTask = nil
}
let audioSession = AVAudioSession.sharedInstance()
do {
try audioSession.setCategory(AVAudioSessionCategoryRecord)
try audioSession.setMode(AVAudioSessionModeMeasurement)
try audioSession.setActive(true, with: .notifyOthersOnDeactivation)
} catch {
print("audioSession properties weren't set because of an error.")
}
recognitionRequest = SFSpeechAudioBufferRecognitionRequest()
guard let inputNode = audioEngine.inputNode else {
fatalError("Audio engine has no input node")
}
guard let recognitionRequest = recognitionRequest else {
fatalError("Unable to create an SFSpeechAudioBufferRecognitionRequest object")
}
recognitionRequest.shouldReportPartialResults = true
recognitionTask = speechRecognizer?.recognitionTask(with: recognitionRequest, resultHandler: { (result, error) in
var isFinal = false
if result != nil {
self.wordTextField.text = result?.bestTranscription.formattedString
isFinal = (result?.isFinal)!
}
if error != nil || isFinal {
self.audioEngine.stop()
inputNode.removeTap(onBus: 0)
self.recognitionRequest = nil
self.recognitionTask = nil
self.microphoneButton.isEnabled = true
}
})
let recordingFormat = inputNode.outputFormat(forBus: 0)
inputNode.installTap(onBus: 0, bufferSize: 1024, format: recordingFormat) { (buffer, when) in
self.recognitionRequest?.append(buffer)
}
audioEngine.prepare()
do {
try audioEngine.start()
} catch {
print("audioEngine couldn't start because of an error.")
}
wordTextField.text = "Say something, I'm listening!"
}
}
This line:
try audioSession.setMode(AVAudioSessionModeMeasurement)
is probably the reason. It can cause the volume to be throttled so low, that it sounds like it is off. Try:
try audioSession.setMode(AVAudioSessionModeDefault)
and see if it works.
Probably because your audiosession is in Record mode, You have 2 solutions, first would be to set your try audioSession.setCategory(AVAudioSessionCategoryRecord) to AVAudioSessionCategoryPlayAndRecord (This will work) but a cleaner way would be to get a separate function for saying something and then set your AVAudioSessionCategory to AVAudioSessionCategoryPlayback
Hope this helped.