I want to use stereo instead of mono. I downloaded Apple's sample app. It says
Because a user can hold an iOS device in a variety of ways, you need to specify the orientation of the right and left channels in the stereo field. Set the built-in microphone’s directionality by configuring:
Polar pattern. The system represents the individual device >microphones, and beamformers that use multiple microphones, as >data sources. Select the front or back data source and set its >polar pattern to stereo.
Input orientation. When recording video, set the input >orientation to match the video orientation. When recording audio >only, set the input orientation to match the user interface >orientation. In both cases, don’t change the orientation during >recording.
I found this YCombinator thread that says there are issues when recording using the stereo speakers and holding the phone in different orientations. In Apple's sample code there is a Orientation enum, but this isn't clearly explained on what the differences are for.
With their app there is a segmented control that lets you choose between which speaker to use but it's not very clear on how to roll this into your own app.
I want to make it seamless so that the user doesn't have to choose, they can simply press record and the AVAudioRecorder takes it from there.
Start/Stop Recording
var micRecorder: AVAudioRecorder?
let audioSettings: [String:Any] = [AVFormatIDKey: kAudioFormatMPEG4AAC,
AVNumberOfChannelsKey: 2,
AVSampleRateKey: 44100.0,
AVEncoderBitRateKey: 64000,
AVEncoderAudioQualityKey: AVAudioQuality.min.rawValue]
func viewDidLoad() {
super.viewDidLoad()
do {
try AVAudioSession.sharedInstance().setCategory(.playAndRecord, mode: .default, options: [ .allowBluetoothA2DP, .defaultToSpeaker]
try AVAudioSession.sharedInstance().setActive(true)
} catch {
}
enableBuiltInMic()
}
#IBAction func startRecording(_ sender: UIButton) {
do {
// How to use the AVAudioRecorder to start recording using the Stereo speaker here?
let fileURL = NSTemporaryDirectory()...
micRecorder = try AVAudioRecorder(url: fileURL, settings: audioSettings)
micRecorder?.delegate = self
micRecorder?.isMeteringEnabled = true
micRecorder?.record()
} catch {
}
}
This is the code that I pulled from Apple's sample app. I stuffed into the same file that the micRecorder is in above.
enum Orientation: Int {
case unknown = 0
case portrait = 1
case portraitUpsideDown = 2
case landscapeLeft = 4
case landscapeRight = 3
}
fileprivate extension Orientation {
// Convenience property to retrieve the AVAudioSession.StereoOrientation.
var inputOrientation: AVAudioSession.StereoOrientation {
return AVAudioSession.StereoOrientation(rawValue: rawValue)!
}
}
var isStereoSupported = false
private var windowOrientation: UIInterfaceOrientation { view.window?.windowScene?.interfaceOrientation ?? .unknown }
struct RecordingOption: Comparable {
let name: String
fileprivate let dataSourceName: String
static func < (lhs: RecordingOption, rhs: RecordingOption) -> Bool {
lhs.name < rhs.name
}
}
var recordingOptions: [RecordingOption] = {
let front = AVAudioSession.Orientation.front
let back = AVAudioSession.Orientation.back
let bottom = AVAudioSession.Orientation.bottom
let session = AVAudioSession.sharedInstance()
guard let dataSources = session.preferredInput?.dataSources else { return [] }
var options = [RecordingOption]()
dataSources.forEach { dataSource in
switch dataSource.orientation {
case front:
options.append(RecordingOption(name: "Front Stereo", dataSourceName: front.rawValue))
case back:
options.append(RecordingOption(name: "Back Stereo", dataSourceName: back.rawValue))
case bottom:
options.append(RecordingOption(name: "Mono", dataSourceName: bottom.rawValue))
default: ()
}
}
// Sort alphabetically
options.sort()
return options
}()
func enableBuiltInMic() {
// Get the shared audio session.
let session = AVAudioSession.sharedInstance()
// Find the built-in microphone input.
guard let availableInputs = session.availableInputs,
let builtInMicInput = availableInputs.first(where: { $0.portType == .builtInMic }) else {
print("The device must have a built-in microphone.")
return
}
// Make the built-in microphone input the preferred input.
do {
try session.setPreferredInput(builtInMicInput)
} catch {
print("Unable to set the built-in mic as the preferred input.")
}
}
func selectRecordingOption(_ option: RecordingOption, orientation: Orientation, completion: (StereoLayout) -> Void) {
// Get the shared audio session.
let session = AVAudioSession.sharedInstance()
// Find the built-in microphone input's data sources,
// and select the one that matches the specified name.
guard let preferredInput = session.preferredInput,
let dataSources = preferredInput.dataSources,
let newDataSource = dataSources.first(where: { $0.dataSourceName == option.dataSourceName }),
let supportedPolarPatterns = newDataSource.supportedPolarPatterns else {
completion(.none)
return
}
do {
isStereoSupported = supportedPolarPatterns.contains(.stereo)
// If the data source supports stereo, set it as the preferred polar pattern.
if isStereoSupported {
// Set the preferred polar pattern to stereo.
try newDataSource.setPreferredPolarPattern(.stereo)
}
// Set the preferred data source and polar pattern.
try preferredInput.setPreferredDataSource(newDataSource)
// Update the input orientation to match the current user interface orientation.
try session.setPreferredInputOrientation(orientation.inputOrientation)
} catch {
print("Unable to select the \(option.dataSourceName) data source.")
}
// Call the completion handler with the updated stereo layout.
completion(StereoLayout(orientation: newDataSource.orientation!,
stereoOrientation: session.inputOrientation))
}
Stero File:
import AVFoundation
enum StereoLayout: String {
case none = "none"
case mono = "Mono"
case frontLandscapeLeft = "Front LandscapeLeft"
case frontLandscapeRight = "Front LandscapeRight"
case frontPortrait = "Front Portrait"
case frontPortraitUpsideDown = "Front PortraitUpsideDown"
case backLandscapeLeft = "Back LandscapeLeft"
case backLandscapeRight = "Back LandscapeRight"
case backPortrait = "Back Portrait"
case backPortraitUpsideDown = "Back PortraitUpsideDown"
init(orientation: AVAudioSession.Orientation, stereoOrientation: AVAudioSession.StereoOrientation) {
let front: AVAudioSession.Orientation = .front
let back: AVAudioSession.Orientation = .back
switch (orientation, stereoOrientation) {
// Front
case (front, .none):
self.init(rawValue: StereoLayout.mono.rawValue)!
case (front, .landscapeLeft):
self.init(rawValue: StereoLayout.frontLandscapeLeft.rawValue)!
case (front, .landscapeRight):
self.init(rawValue: StereoLayout.frontLandscapeRight.rawValue)!
case (front, .portrait):
self.init(rawValue: StereoLayout.frontPortrait.rawValue)!
case (front, .portraitUpsideDown):
self.init(rawValue: StereoLayout.frontPortraitUpsideDown.rawValue)!
// Back
case (back, .none):
self.init(rawValue: StereoLayout.mono.rawValue)!
case (back, .landscapeLeft):
self.init(rawValue: StereoLayout.backLandscapeLeft.rawValue)!
case (back, .landscapeRight):
self.init(rawValue: StereoLayout.backLandscapeRight.rawValue)!
case (back, .portrait):
self.init(rawValue: StereoLayout.backPortrait.rawValue)!
case (back, .portraitUpsideDown):
self.init(rawValue: StereoLayout.backPortraitUpsideDown.rawValue)!
default:
self.init(rawValue: StereoLayout.none.rawValue)!
}
}
}
Related
I am following the apple phone number recognition sample. Normally it creates a red outline around the recognized text. Mine does not seem to do recognizing the text and creating the red outline even though I used their code. The only difference is my view controller class is called "TextScanViewController" where their's is just "ViewController". I went through and made sure that any "ViewControllers" were changed to "TextScanViewController". Am I missing something else that I should change?
Here is what it should look like (when I use the original Apple project) compared to what it is doing (should have red outlines but is not showing them even if the text is perfectly in the center of the rectangle)
Should look like:
Looks like:
There are 5 different swift files I am using (PreviewView, TextScanViewController, VisionViewController, StringUtils, AppDelegate)
TextScanViewController:
import UIKit
import AVFoundation
import Vision
class TextScanViewController: UIViewController {
// MARK: - UI objects
#IBOutlet weak var previewView: PreviewView!
#IBOutlet weak var cutoutView: UIView!
#IBOutlet weak var numberView: UILabel!
var maskLayer = CAShapeLayer()
// Device orientation. Updated whenever the orientation changes to a
// different supported orientation.
var currentOrientation = UIDeviceOrientation.portrait
// MARK: - Capture related objects
private let captureSession = AVCaptureSession()
let captureSessionQueue = DispatchQueue(label: "com.example.apple-samplecode.CaptureSessionQueue")
var captureDevice: AVCaptureDevice?
var videoDataOutput = AVCaptureVideoDataOutput()
let videoDataOutputQueue = DispatchQueue(label: "com.example.apple-samplecode.VideoDataOutputQueue")
// MARK: - Region of interest (ROI) and text orientation
// Region of video data output buffer that recognition should be run on.
// Gets recalculated once the bounds of the preview layer are known.
var regionOfInterest = CGRect(x: 0, y: 0, width: 1, height: 1)
// Orientation of text to search for in the region of interest.
var textOrientation = CGImagePropertyOrientation.up
// MARK: - Coordinate transforms
var bufferAspectRatio: Double!
// Transform from UI orientation to buffer orientation.
var uiRotationTransform = CGAffineTransform.identity
// Transform bottom-left coordinates to top-left.
var bottomToTopTransform = CGAffineTransform(scaleX: 1, y: -1).translatedBy(x: 0, y: -1)
// Transform coordinates in ROI to global coordinates (still normalized).
var roiToGlobalTransform = CGAffineTransform.identity
// Vision -> AVF coordinate transform.
var visionToAVFTransform = CGAffineTransform.identity
// MARK: - View controller methods
override func viewDidLoad() {
super.viewDidLoad()
// Set up preview view.
previewView.session = captureSession
// Set up cutout view.
cutoutView.backgroundColor = UIColor.gray.withAlphaComponent(0.5)
maskLayer.backgroundColor = UIColor.clear.cgColor
maskLayer.fillRule = .evenOdd
cutoutView.layer.mask = maskLayer
// Starting the capture session is a blocking call. Perform setup using
// a dedicated serial dispatch queue to prevent blocking the main thread.
captureSessionQueue.async {
self.setupCamera()
// Calculate region of interest now that the camera is setup.
DispatchQueue.main.async {
// Figure out initial ROI.
self.calculateRegionOfInterest()
}
}
}
override func viewWillTransition(to size: CGSize, with coordinator: UIViewControllerTransitionCoordinator) {
super.viewWillTransition(to: size, with: coordinator)
// Only change the current orientation if the new one is landscape or
// portrait. You can't really do anything about flat or unknown.
let deviceOrientation = UIDevice.current.orientation
if deviceOrientation.isPortrait || deviceOrientation.isLandscape {
currentOrientation = deviceOrientation
}
// Handle device orientation in the preview layer.
if let videoPreviewLayerConnection = previewView.videoPreviewLayer.connection {
if let newVideoOrientation = AVCaptureVideoOrientation(deviceOrientation: deviceOrientation) {
videoPreviewLayerConnection.videoOrientation = newVideoOrientation
}
}
// Orientation changed: figure out new region of interest (ROI).
calculateRegionOfInterest()
}
override func viewDidLayoutSubviews() {
super.viewDidLayoutSubviews()
updateCutout()
}
// MARK: - Setup
func calculateRegionOfInterest() {
// In landscape orientation the desired ROI is specified as the ratio of
// buffer width to height. When the UI is rotated to portrait, keep the
// vertical size the same (in buffer pixels). Also try to keep the
// horizontal size the same up to a maximum ratio.
let desiredHeightRatio = 0.15
let desiredWidthRatio = 0.6
let maxPortraitWidth = 0.8
// Figure out size of ROI.
let size: CGSize
if currentOrientation.isPortrait || currentOrientation == .unknown {
size = CGSize(width: min(desiredWidthRatio * bufferAspectRatio, maxPortraitWidth), height: desiredHeightRatio / bufferAspectRatio)
} else {
size = CGSize(width: desiredWidthRatio, height: desiredHeightRatio)
}
// Make it centered.
regionOfInterest.origin = CGPoint(x: (1 - size.width) / 2, y: (1 - size.height) / 2)
regionOfInterest.size = size
// ROI changed, update transform.
setupOrientationAndTransform()
// Update the cutout to match the new ROI.
DispatchQueue.main.async {
// Wait for the next run cycle before updating the cutout. This
// ensures that the preview layer already has its new orientation.
self.updateCutout()
}
}
func updateCutout() {
// Figure out where the cutout ends up in layer coordinates.
let roiRectTransform = bottomToTopTransform.concatenating(uiRotationTransform)
let cutout = previewView.videoPreviewLayer.layerRectConverted(fromMetadataOutputRect: regionOfInterest.applying(roiRectTransform))
// Create the mask.
let path = UIBezierPath(rect: cutoutView.frame)
path.append(UIBezierPath(rect: cutout))
maskLayer.path = path.cgPath
// Move the number view down to under cutout.
var numFrame = cutout
numFrame.origin.y += numFrame.size.height
numberView.frame = numFrame
}
func setupOrientationAndTransform() {
// Recalculate the affine transform between Vision coordinates and AVF coordinates.
// Compensate for region of interest.
let roi = regionOfInterest
roiToGlobalTransform = CGAffineTransform(translationX: roi.origin.x, y: roi.origin.y).scaledBy(x: roi.width, y: roi.height)
// Compensate for orientation (buffers always come in the same orientation).
switch currentOrientation {
case .landscapeLeft:
textOrientation = CGImagePropertyOrientation.up
uiRotationTransform = CGAffineTransform.identity
case .landscapeRight:
textOrientation = CGImagePropertyOrientation.down
uiRotationTransform = CGAffineTransform(translationX: 1, y: 1).rotated(by: CGFloat.pi)
case .portraitUpsideDown:
textOrientation = CGImagePropertyOrientation.left
uiRotationTransform = CGAffineTransform(translationX: 1, y: 0).rotated(by: CGFloat.pi / 2)
default: // We default everything else to .portraitUp
textOrientation = CGImagePropertyOrientation.right
uiRotationTransform = CGAffineTransform(translationX: 0, y: 1).rotated(by: -CGFloat.pi / 2)
}
// Full Vision ROI to AVF transform.
visionToAVFTransform = roiToGlobalTransform.concatenating(bottomToTopTransform).concatenating(uiRotationTransform)
}
func setupCamera() {
guard let captureDevice = AVCaptureDevice.default(.builtInWideAngleCamera, for: AVMediaType.video, position: .back) else {
print("Could not create capture device.")
return
}
self.captureDevice = captureDevice
// NOTE:
// Requesting 4k buffers allows recognition of smaller text but will
// consume more power. Use the smallest buffer size necessary to keep
// down battery usage.
if captureDevice.supportsSessionPreset(.hd4K3840x2160) {
captureSession.sessionPreset = AVCaptureSession.Preset.hd4K3840x2160
bufferAspectRatio = 3840.0 / 2160.0
} else {
captureSession.sessionPreset = AVCaptureSession.Preset.hd1920x1080
bufferAspectRatio = 1920.0 / 1080.0
}
guard let deviceInput = try? AVCaptureDeviceInput(device: captureDevice) else {
print("Could not create device input.")
return
}
if captureSession.canAddInput(deviceInput) {
captureSession.addInput(deviceInput)
}
// Configure video data output.
videoDataOutput.alwaysDiscardsLateVideoFrames = true
videoDataOutput.setSampleBufferDelegate(self, queue: videoDataOutputQueue)
videoDataOutput.videoSettings = [kCVPixelBufferPixelFormatTypeKey as String: kCVPixelFormatType_420YpCbCr8BiPlanarFullRange]
if captureSession.canAddOutput(videoDataOutput) {
captureSession.addOutput(videoDataOutput)
// NOTE:
// There is a trade-off to be made here. Enabling stabilization will
// give temporally more stable results and should help the recognizer
// converge. But if it's enabled the VideoDataOutput buffers don't
// match what's displayed on screen, which makes drawing bounding
// boxes very hard. Disable it in this app to allow drawing detected
// bounding boxes on screen.
videoDataOutput.connection(with: AVMediaType.video)?.preferredVideoStabilizationMode = .off
} else {
print("Could not add VDO output")
return
}
// Set zoom and autofocus to help focus on very small text.
do {
try captureDevice.lockForConfiguration()
captureDevice.videoZoomFactor = 2
captureDevice.autoFocusRangeRestriction = .near
captureDevice.unlockForConfiguration()
} catch {
print("Could not set zoom level due to error: \(error)")
return
}
captureSession.startRunning()
}
// MARK: - UI drawing and interaction
func showString(string: String) {
// Found a definite number.
// Stop the camera synchronously to ensure that no further buffers are
// received. Then update the number view asynchronously.
captureSessionQueue.sync {
self.captureSession.stopRunning()
DispatchQueue.main.async {
self.numberView.text = string
self.numberView.isHidden = false
}
}
}
#IBAction func handleTap(_ sender: UITapGestureRecognizer) {
captureSessionQueue.async {
if !self.captureSession.isRunning {
self.captureSession.startRunning()
}
DispatchQueue.main.async {
self.numberView.isHidden = true
}
}
}
}
// MARK: - AVCaptureVideoDataOutputSampleBufferDelegate
extension TextScanViewController: AVCaptureVideoDataOutputSampleBufferDelegate {
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
// This is implemented in VisionViewController.
}
}
// MARK: - Utility extensions
extension AVCaptureVideoOrientation {
init?(deviceOrientation: UIDeviceOrientation) {
switch deviceOrientation {
case .portrait: self = .portrait
case .portraitUpsideDown: self = .portraitUpsideDown
case .landscapeLeft: self = .landscapeRight
case .landscapeRight: self = .landscapeLeft
default: return nil
}
}
}
PreviewView:
import Foundation
import UIKit
import AVFoundation
class PreviewView: UIView {
var videoPreviewLayer: AVCaptureVideoPreviewLayer {
guard let layer = layer as? AVCaptureVideoPreviewLayer else {
fatalError("Expected `AVCaptureVideoPreviewLayer` type for layer. Check PreviewView.layerClass implementation.")
}
return layer
}
var session: AVCaptureSession? {
get {
return videoPreviewLayer.session
}
set {
videoPreviewLayer.session = newValue
}
}
// MARK: UIView
override class var layerClass: AnyClass {
return AVCaptureVideoPreviewLayer.self
}
}
VisionViewController:
import UIKit
import AVFoundation
import Vision
class VisionViewController: TextScanViewController {
var request: VNRecognizeTextRequest!
// Temporal string tracker
let numberTracker = StringTracker()
override func viewDidLoad() {
// Set up vision request before letting ViewController set up the camera
// so that it exists when the first buffer is received.
request = VNRecognizeTextRequest(completionHandler: recognizeTextHandler)
super.viewDidLoad()
}
// MARK: - Text recognition
// Vision recognition handler.
func recognizeTextHandler(request: VNRequest, error: Error?) {
var numbers = [String]()
var redBoxes = [CGRect]() // Shows all recognized text lines
var greenBoxes = [CGRect]() // Shows words that might be serials
guard let results = request.results as? [VNRecognizedTextObservation] else {
return
}
let maximumCandidates = 1
for visionResult in results {
guard let candidate = visionResult.topCandidates(maximumCandidates).first else { continue }
// Draw red boxes around any detected text, and green boxes around
// any detected phone numbers. The phone number may be a substring
// of the visionResult. If a substring, draw a green box around the
// number and a red box around the full string. If the number covers
// the full result only draw the green box.
var numberIsSubstring = true
if let result = candidate.string.extractPhoneNumber() {
let (range, number) = result
// Number may not cover full visionResult. Extract bounding box
// of substring.
if let box = try? candidate.boundingBox(for: range)?.boundingBox {
numbers.append(number)
greenBoxes.append(box)
numberIsSubstring = !(range.lowerBound == candidate.string.startIndex && range.upperBound == candidate.string.endIndex)
}
}
if numberIsSubstring {
redBoxes.append(visionResult.boundingBox)
}
}
// Log any found numbers.
numberTracker.logFrame(strings: numbers)
show(boxGroups: [(color: UIColor.red.cgColor, boxes: redBoxes), (color: UIColor.green.cgColor, boxes: greenBoxes)])
// Check if we have any temporally stable numbers.
if let sureNumber = numberTracker.getStableString() {
showString(string: sureNumber)
numberTracker.reset(string: sureNumber)
}
}
override func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
if let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) {
// Configure for running in real-time.
request.recognitionLevel = .fast
// Language correction won't help recognizing phone numbers. It also
// makes recognition slower.
request.usesLanguageCorrection = false
// Only run on the region of interest for maximum speed.
request.regionOfInterest = regionOfInterest
let requestHandler = VNImageRequestHandler(cvPixelBuffer: pixelBuffer, orientation: textOrientation, options: [:])
do {
try requestHandler.perform([request])
} catch {
print(error)
}
}
}
// MARK: - Bounding box drawing
// Draw a box on screen. Must be called from main queue.
var boxLayer = [CAShapeLayer]()
func draw(rect: CGRect, color: CGColor) {
let layer = CAShapeLayer()
layer.opacity = 0.5
layer.borderColor = color
layer.borderWidth = 2
layer.frame = rect
boxLayer.append(layer)
previewView.videoPreviewLayer.insertSublayer(layer, at: 1)
}
// Remove all drawn boxes. Must be called on main queue.
func removeBoxes() {
for layer in boxLayer {
layer.removeFromSuperlayer()
}
boxLayer.removeAll()
}
typealias ColoredBoxGroup = (color: CGColor, boxes: [CGRect])
// Draws groups of colored boxes.
func show(boxGroups: [ColoredBoxGroup]) {
DispatchQueue.main.async {
let layer = self.previewView.videoPreviewLayer
self.removeBoxes()
for boxGroup in boxGroups {
let color = boxGroup.color
for box in boxGroup.boxes {
let rect = layer.layerRectConverted(fromMetadataOutputRect: box.applying(self.visionToAVFTransform))
self.draw(rect: rect, color: color)
}
}
}
}
}
StringUtils:
import Foundation
extension Character {
// Given a list of allowed characters, try to convert self to those in list
// if not already in it. This handles some common misclassifications for
// characters that are visually similar and can only be correctly recognized
// with more context and/or domain knowledge. Some examples (should be read
// in Menlo or some other font that has different symbols for all characters):
// 1 and l are the same character in Times New Roman
// I and l are the same character in Helvetica
// 0 and O are extremely similar in many fonts
// oO, wW, cC, sS, pP and others only differ by size in many fonts
func getSimilarCharacterIfNotIn(allowedChars: String) -> Character {
let conversionTable = [
"s": "S",
"S": "5",
"5": "S",
"o": "O",
"Q": "O",
"O": "0",
"0": "O",
"l": "I",
"I": "1",
"1": "I",
"B": "8",
"8": "B"
]
// Allow a maximum of two substitutions to handle 's' -> 'S' -> '5'.
let maxSubstitutions = 2
var current = String(self)
var counter = 0
while !allowedChars.contains(current) && counter < maxSubstitutions {
if let altChar = conversionTable[current] {
current = altChar
counter += 1
} else {
// Doesn't match anything in our table. Give up.
break
}
}
return current.first!
}
}
extension String {
// Extracts the first US-style phone number found in the string, returning
// the range of the number and the number itself as a tuple.
// Returns nil if no number is found.
func extractPhoneNumber() -> (Range<String.Index>, String)? {
// Do a first pass to find any substring that could be a US phone
// number. This will match the following common patterns and more:
// xxx-xxx-xxxx
// xxx xxx xxxx
// (xxx) xxx-xxxx
// (xxx)xxx-xxxx
// xxx.xxx.xxxx
// xxx xxx-xxxx
// xxx/xxx.xxxx
// +1-xxx-xxx-xxxx
// Note that this doesn't only look for digits since some digits look
// very similar to letters. This is handled later.
let pattern = #"""
(?x) # Verbose regex, allows comments
(?:\+1-?)? # Potential international prefix, may have -
[(]? # Potential opening (
\b(\w{3}) # Capture xxx
[)]? # Potential closing )
[\ -./]? # Potential separator
(\w{3}) # Capture xxx
[\ -./]? # Potential separator
(\w{4})\b # Capture xxxx
"""#
guard let range = self.range(of: pattern, options: .regularExpression, range: nil, locale: nil) else {
// No phone number found.
return nil
}
// Potential number found. Strip out punctuation, whitespace and country
// prefix.
var phoneNumberDigits = ""
let substring = String(self[range])
let nsrange = NSRange(substring.startIndex..., in: substring)
do {
// Extract the characters from the substring.
let regex = try NSRegularExpression(pattern: pattern, options: [])
if let match = regex.firstMatch(in: substring, options: [], range: nsrange) {
for rangeInd in 1 ..< match.numberOfRanges {
let range = match.range(at: rangeInd)
let matchString = (substring as NSString).substring(with: range)
phoneNumberDigits += matchString as String
}
}
} catch {
print("Error \(error) when creating pattern")
}
// Must be exactly 10 digits.
guard phoneNumberDigits.count == 10 else {
return nil
}
// Substitute commonly misrecognized characters, for example: 'S' -> '5' or 'l' -> '1'
var result = ""
let allowedChars = "0123456789"
for var char in phoneNumberDigits {
char = char.getSimilarCharacterIfNotIn(allowedChars: allowedChars)
guard allowedChars.contains(char) else {
return nil
}
result.append(char)
}
return (range, result)
}
}
class StringTracker {
var frameIndex: Int64 = 0
typealias StringObservation = (lastSeen: Int64, count: Int64)
// Dictionary of seen strings. Used to get stable recognition before
// displaying anything.
var seenStrings = [String: StringObservation]()
var bestCount = Int64(0)
var bestString = ""
func logFrame(strings: [String]) {
for string in strings {
if seenStrings[string] == nil {
seenStrings[string] = (lastSeen: Int64(0), count: Int64(-1))
}
seenStrings[string]?.lastSeen = frameIndex
seenStrings[string]?.count += 1
print("Seen \(string) \(seenStrings[string]?.count ?? 0) times")
}
var obsoleteStrings = [String]()
// Go through strings and prune any that have not been seen in while.
// Also find the (non-pruned) string with the greatest count.
for (string, obs) in seenStrings {
// Remove previously seen text after 30 frames (~1s).
if obs.lastSeen < frameIndex - 30 {
obsoleteStrings.append(string)
}
// Find the string with the greatest count.
let count = obs.count
if !obsoleteStrings.contains(string) && count > bestCount {
bestCount = Int64(count)
bestString = string
}
}
// Remove old strings.
for string in obsoleteStrings {
seenStrings.removeValue(forKey: string)
}
frameIndex += 1
}
func getStableString() -> String? {
// Require the recognizer to see the same string at least 10 times.
if bestCount >= 10 {
return bestString
} else {
return nil
}
}
func reset(string: String) {
seenStrings.removeValue(forKey: string)
bestCount = 0
bestString = ""
}
}
AppDelegate:
import UIKit
#UIApplicationMain
class AppDelegate: UIResponder, UIApplicationDelegate {
var window: UIWindow?
}
I was using the wrong class on the view controller.. instead of it being TextScanViewController it should have been set to Visionviewcontroller... it was skipping a whole class. I didn't realize how classes are inherited and that there was an important order to them. I have a lot to learn but learning a lot! :)
We are writing an app which analyzes a real world 3D data by using the TrueDepth camera on the front of an iPhone, and an AVCaptureSession configured to produce AVDepthData along with image data. This worked great on iPhone 12, but the same code on iPhone 13 produces an unwanted "smoothing" effect which makes the scene impossible to process and breaks our app. We are unable to find any information on this effect, from Apple or otherwise, much less how to avoid it, so we are asking you experts.
At the bottom of this post (Figure 3) is our code which configures the capture session, using an AVCaptureDataOutputSynchronizer, to produce frames of 640x480 image and depth data. I boiled it down as much as possible, sorry it's so long. The main two parts are the configure function, which sets up our capture session, and the dataOutputSynchronizer function, near the bottom, which fires when a sycned set of data is available. In the latter function I've included my code which extracts the information from the AVDepthData object, including looping through all 640x480 depth data points (in meters). I've excluded further processing for brevity (believe it or not :)).
On an iPhone 12 device, the PNG data and the depth data merge nicely. The front view and side view of the merged pointcloud are below (Figure 1) . The angles visible in the side view are due to the application of the focal length which "de-perspectives" the data and places them in their proper position in xyz space.
The same code on an iPhone 13 produces depth maps that result in point cloud further below (Figure 2 -- straight on view, angled view, and side view). There is no longer any clear distinction between objects and the background becasue the depth data appears to be "smoothed" between the mannequin and the background -- i.e., there are seven or eight points between the subject and background that are not realistic and make it impossible to do any meaningful processing such as segmenting the scene.
Has anyone else encountered this issue, or have any insight into how we might change our code to avoid it? Any help or ideas are MUCH appreciated, since this is a definite showstopper (we can't tell people to only run our App on older phones :)). Thank you!
Figure 1 -- Merged depth data and image into point cloud, from iPhone 12
Figure 2 -- Merged depth data and image into point cloud, from iPhone 13; unwanted smoothing effect visible
Figure 3 -- Our configuration code and capture handler; edited to remove downstream processing of captured data (which was basically formatting it into an XML file and uploading to the cloud)
import Foundation
import Combine
import AVFoundation
import Photos
import UIKit
import FirebaseStorage
public struct AlertError {
public var title: String = ""
public var message: String = ""
public var primaryButtonTitle = "Accept"
public var secondaryButtonTitle: String?
public var primaryAction: (() -> ())?
public var secondaryAction: (() -> ())?
public init(title: String = "", message: String = "", primaryButtonTitle: String = "Accept", secondaryButtonTitle: String? = nil, primaryAction: (() -> ())? = nil, secondaryAction: (() -> ())? = nil) {
self.title = title
self.message = message
self.primaryAction = primaryAction
self.primaryButtonTitle = primaryButtonTitle
self.secondaryAction = secondaryAction
}
}
///////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////
//
//
// this is the CameraService class, which configures and runs a capture session
// which acquires syncronized image and depth data
// using an AVCaptureDataOutputSynchronizer
//
//
///////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////
public class CameraService: NSObject,
AVCaptureVideoDataOutputSampleBufferDelegate,
AVCaptureDepthDataOutputDelegate,
AVCaptureDataOutputSynchronizerDelegate,
MyFirebaseProtocol,
ObservableObject{
#Published public var shouldShowAlertView = false
#Published public var shouldShowSpinner = false
public var labelStatus: String = "Ready"
var images: [UIImage?] = []
public var alertError: AlertError = AlertError()
public let session = AVCaptureSession()
var isSessionRunning = false
var isConfigured = false
var setupResult: SessionSetupResult = .success
private let sessionQueue = DispatchQueue(label: "session queue") // Communicate with the session and other session objects on this queue.
#objc dynamic var videoDeviceInput: AVCaptureDeviceInput!
private let videoDeviceDiscoverySession = AVCaptureDevice.DiscoverySession(deviceTypes: [.builtInTrueDepthCamera], mediaType: .video, position: .front)
var videoCaptureDevice : AVCaptureDevice? = nil
let videoDataOutput: AVCaptureVideoDataOutput = AVCaptureVideoDataOutput() // Define frame output.
let depthDataOutput = AVCaptureDepthDataOutput()
var outputSynchronizer: AVCaptureDataOutputSynchronizer? = nil
let dataOutputQueue = DispatchQueue(label: "video data queue", qos: .userInitiated, attributes: [], autoreleaseFrequency: .workItem)
var scanStateCounter: Int = 0
var m_DepthDatasetsToUpload = [AVCaptureSynchronizedDepthData]()
var m_FrameBufferToUpload = [AVCaptureSynchronizedSampleBufferData]()
var firebaseDepthDatasetsArray: [String] = []
#Published var firebaseImageUploadCount = 0
#Published var firebaseTextFileUploadCount = 0
public func configure() {
/*
Setup the capture session.
In general, it's not safe to mutate an AVCaptureSession or any of its
inputs, outputs, or connections from multiple threads at the same time.
Don't perform these tasks on the main queue because
AVCaptureSession.startRunning() is a blocking call, which can
take a long time. Dispatch session setup to the sessionQueue, so
that the main queue isn't blocked, which keeps the UI responsive.
*/
sessionQueue.async {
self.configureSession()
}
}
// MARK: Checks for user's permisions
public func checkForPermissions() {
switch AVCaptureDevice.authorizationStatus(for: .video) {
case .authorized:
// The user has previously granted access to the camera.
break
case .notDetermined:
/*
The user has not yet been presented with the option to grant
video access. Suspend the session queue to delay session
setup until the access request has completed.
*/
sessionQueue.suspend()
AVCaptureDevice.requestAccess(for: .video, completionHandler: { granted in
if !granted {
self.setupResult = .notAuthorized
}
self.sessionQueue.resume()
})
default:
// The user has previously denied access.
setupResult = .notAuthorized
DispatchQueue.main.async {
self.alertError = AlertError(title: "Camera Access", message: "SwiftCamera doesn't have access to use your camera, please update your privacy settings.", primaryButtonTitle: "Settings", secondaryButtonTitle: nil, primaryAction: {
UIApplication.shared.open(URL(string: UIApplication.openSettingsURLString)!,
options: [:], completionHandler: nil)
}, secondaryAction: nil)
self.shouldShowAlertView = true
}
}
}
// MARK: Session Management
// Call this on the session queue.
/// - Tag: ConfigureSession
private func configureSession() {
if setupResult != .success {
return
}
session.beginConfiguration()
session.sessionPreset = AVCaptureSession.Preset.vga640x480
// Add video input.
do {
var defaultVideoDevice: AVCaptureDevice?
let frontCameraDevice = AVCaptureDevice.default(.builtInTrueDepthCamera, for: .video, position: .front)
// If the rear wide angle camera isn't available, default to the front wide angle camera.
defaultVideoDevice = frontCameraDevice
videoCaptureDevice = defaultVideoDevice
guard let videoDevice = defaultVideoDevice else {
print("Default video device is unavailable.")
setupResult = .configurationFailed
session.commitConfiguration()
return
}
let videoDeviceInput = try AVCaptureDeviceInput(device: videoDevice)
if session.canAddInput(videoDeviceInput) {
session.addInput(videoDeviceInput)
self.videoDeviceInput = videoDeviceInput
} else if session.inputs.isEmpty == false {
self.videoDeviceInput = videoDeviceInput
} else {
print("Couldn't add video device input to the session.")
setupResult = .configurationFailed
session.commitConfiguration()
return
}
} catch {
print("Couldn't create video device input: \(error)")
setupResult = .configurationFailed
session.commitConfiguration()
return
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
// MARK: add video output to session
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
videoDataOutput.videoSettings = [(kCVPixelBufferPixelFormatTypeKey as NSString) : NSNumber(value: kCVPixelFormatType_32BGRA)] as [String : Any]
videoDataOutput.alwaysDiscardsLateVideoFrames = true
videoDataOutput.setSampleBufferDelegate(self, queue: DispatchQueue(label: "camera_frame_processing_queue"))
if session.canAddOutput(self.videoDataOutput) {
session.addOutput(self.videoDataOutput)
} else if session.outputs.contains(videoDataOutput) {
} else {
print("Couldn't create video device output")
setupResult = .configurationFailed
session.commitConfiguration()
return
}
guard let connection = self.videoDataOutput.connection(with: AVMediaType.video),
connection.isVideoOrientationSupported else { return }
connection.videoOrientation = .portrait
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
// MARK: add depth output to session
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Add a depth data output
if session.canAddOutput(depthDataOutput) {
session.addOutput(depthDataOutput)
depthDataOutput.isFilteringEnabled = false
//depthDataOutput.setDelegate(T##delegate: AVCaptureDepthDataOutputDelegate?##AVCaptureDepthDataOutputDelegate?, callbackQueue: <#T##DispatchQueue?#>)
depthDataOutput.setDelegate(self, callbackQueue: DispatchQueue(label: "depth_frame_processing_queue"))
if let connection = depthDataOutput.connection(with: .depthData) {
connection.isEnabled = true
} else {
print("No AVCaptureConnection")
}
} else if session.outputs.contains(depthDataOutput){
} else {
print("Could not add depth data output to the session")
session.commitConfiguration()
return
}
// Search for highest resolution with half-point depth values
let depthFormats = videoCaptureDevice!.activeFormat.supportedDepthDataFormats
let filtered = depthFormats.filter({
CMFormatDescriptionGetMediaSubType($0.formatDescription) == kCVPixelFormatType_DepthFloat16
})
let selectedFormat = filtered.max(by: {
first, second in CMVideoFormatDescriptionGetDimensions(first.formatDescription).width < CMVideoFormatDescriptionGetDimensions(second.formatDescription).width
})
do {
try videoCaptureDevice!.lockForConfiguration()
videoCaptureDevice!.activeDepthDataFormat = selectedFormat
videoCaptureDevice!.unlockForConfiguration()
} catch {
print("Could not lock device for configuration: \(error)")
session.commitConfiguration()
return
}
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
// Use an AVCaptureDataOutputSynchronizer to synchronize the video data and depth data outputs.
// The first output in the dataOutputs array, in this case the AVCaptureVideoDataOutput, is the "master" output.
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
outputSynchronizer = AVCaptureDataOutputSynchronizer(dataOutputs: [videoDataOutput, depthDataOutput])
outputSynchronizer!.setDelegate(self, queue: dataOutputQueue)
session.commitConfiguration()
self.isConfigured = true
//self.start()
}
// MARK: Device Configuration
/// - Tag: Stop capture session
public func stop(completion: (() -> ())? = nil) {
sessionQueue.async {
//print("entered stop")
if self.isSessionRunning {
//print(self.setupResult)
if self.setupResult == .success {
//print("entered success")
DispatchQueue.main.async{
self.session.stopRunning()
self.isSessionRunning = self.session.isRunning
if !self.session.isRunning {
DispatchQueue.main.async {
completion?()
}
}
}
}
}
}
}
/// - Tag: Start capture session
public func start() {
// We use our capture session queue to ensure our UI runs smoothly on the main thread.
sessionQueue.async {
if !self.isSessionRunning && self.isConfigured {
switch self.setupResult {
case .success:
self.session.startRunning()
self.isSessionRunning = self.session.isRunning
if self.session.isRunning {
}
case .configurationFailed, .notAuthorized:
print("Application not authorized to use camera")
DispatchQueue.main.async {
self.alertError = AlertError(title: "Camera Error", message: "Camera configuration failed. Either your device camera is not available or its missing permissions", primaryButtonTitle: "Accept", secondaryButtonTitle: nil, primaryAction: nil, secondaryAction: nil)
self.shouldShowAlertView = true
}
}
}
}
}
// ------------------------------------------------------------------------
// MARK: CAPTURE HANDLERS
// ------------------------------------------------------------------------
public func dataOutputSynchronizer(_ synchronizer: AVCaptureDataOutputSynchronizer, didOutput synchronizedDataCollection: AVCaptureSynchronizedDataCollection) {
//printWithTime("Capture")
guard let syncedDepthData: AVCaptureSynchronizedDepthData =
synchronizedDataCollection.synchronizedData(for: depthDataOutput) as? AVCaptureSynchronizedDepthData else {
return
}
guard let syncedVideoData: AVCaptureSynchronizedSampleBufferData =
synchronizedDataCollection.synchronizedData(for: videoDataOutput) as? AVCaptureSynchronizedSampleBufferData else {
return
}
///////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////
//
//
// Below is the code that extracts the information from depth data
// The depth data is 640x480, which matches the size of the synchronized image
// I save this info to a file, upload it to the cloud, and merge it with the image
// on a PC to create a pointcloud
//
//
///////////////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////////////
let depth_data : AVDepthData = syncedDepthData.depthData
let cvpixelbuffer : CVPixelBuffer = depth_data.depthDataMap
let height : Int = CVPixelBufferGetHeight(cvpixelbuffer)
let width : Int = CVPixelBufferGetWidth(cvpixelbuffer)
let quality : AVDepthData.Quality = depth_data.depthDataQuality
let accuracy : AVDepthData.Accuracy = depth_data.depthDataAccuracy
let pixelsize : Float = depth_data.cameraCalibrationData!.pixelSize
let camcaldata : AVCameraCalibrationData = depth_data.cameraCalibrationData!
let intmat : matrix_float3x3 = camcaldata.intrinsicMatrix
let cal_lensdistort_x : CGFloat = camcaldata.lensDistortionCenter.x
let cal_lensdistort_y : CGFloat = camcaldata.lensDistortionCenter.y
let cal_matrix_width : CGFloat = camcaldata.intrinsicMatrixReferenceDimensions.width
let cal_matrix_height : CGFloat = camcaldata.intrinsicMatrixReferenceDimensions.height
let intrinsics_fx : Float = camcaldata.intrinsicMatrix.columns.0.x
let intrinsics_fy : Float = camcaldata.intrinsicMatrix.columns.1.y
let intrinsics_ox : Float = camcaldata.intrinsicMatrix.columns.2.x
let intrinsics_oy : Float = camcaldata.intrinsicMatrix.columns.2.y
let pixelformattype : OSType = CVPixelBufferGetPixelFormatType(cvpixelbuffer)
CVPixelBufferLockBaseAddress(cvpixelbuffer, CVPixelBufferLockFlags(rawValue: 0))
let int16Buffer = unsafeBitCast(CVPixelBufferGetBaseAddress(cvpixelbuffer), to: UnsafeMutablePointer<Float16>.self)
let int16PerRow = CVPixelBufferGetBytesPerRow(cvpixelbuffer) / 2
for x in 0...height-1
{
for y in 0...width-1
{
let luma = int16Buffer[x * int16PerRow + y]
/////////////////////////
// SAVE DEPTH VALUE 'luma' to FILE FOR PROCESSING
}
}
CVPixelBufferUnlockBaseAddress(cvpixelbuffer, CVPixelBufferLockFlags(rawValue: 0))
}
I am looking into making a toggle switch for the camera input. What I am aiming at is having an enum that I can call like CameraPosition.rear or CameraPosition.front and the camera input changes accordingly.
This is what I got so far:
var currentCameraPosition: CameraPosition?
var frontCameraInput: AVCaptureDeviceInput?
var rearCameraInput: AVCaptureDeviceInput?
var currentCameraInput: AVCaptureDeviceInput?
enum CameraPosition {
case front
case rear
func toggle(){
switch self {
case .front:
currentCameraInput = frontCameraInput
case .rear:
currentCameraInput = rearCameraInput
}
}
}
The problem is that my compiler complains Instance member 'currentCameraInput' cannot be used on type 'Camera'. How should I fix this, or how can I rewrite it?
You can totally use an enum for this if you like. I'd recommend using didSet to toggle rather than having to set your CameraPosition and then separately call toggle. This way, setting your camera position will automatically update your camera input. I'd set it up like this:
var currentCameraPosition: CameraPosition? {
didSet {
if let position = currentCameraPosition {
switch position {
case .front: currentCameraInput = frontCameraInput
case .rear: currentCameraInput = rearCameraInput
}
}
}
}
var frontCameraInput: AVCaptureDeviceInput?
var rearCameraInput: AVCaptureDeviceInput?
var currentCameraInput: AVCaptureDeviceInput?
enum CameraPosition {
case front
case rear
}
You could even ditch the switch and use a ternary operator since you only have 2 options:
var currentCameraPosition: CameraPosition? {
didSet {
if let position = currentCameraPosition {
currentCameraInput = position == .front ? frontCameraInput : rearCameraInput
}
}
}
I want to allow background audio while the app is not in focus. I currently have this code, which should allow that:
do {
try AKSettings.setSession(category: .playback, with: .mixWithOthers)
} catch {
print("error")
}
AKSettings.playbackWhileMuted = true
I also have the setting 'Audio, Airplay and Picture in Picture' enabled in capabilities settings. However, when I press the home button on my device the audio doesn't keep playing. What am I doing wrong? I am using AudioKit to produce sounds if that matters.
I am using a singleton to house all of the AudioKit components which I named AudioPlayer.swift. Here is what I have in my AudioPlayer.swift singleton file:
class AudioPlayer: NSObject {
var currentFrequency = String()
var soundIsPlaying = false
var leftOscillator = AKOscillator()
var rightOscillator = AKOscillator()
var rain = try! AKAudioFile()
var rainPlayer: AKAudioPlayer!
var envelope = AKAmplitudeEnvelope()
override init() {
super.init()
do {
try AKSettings.setSession(category: .playback, with: .mixWithOthers)
} catch {
print("error")
}
AKSettings.playbackWhileMuted = true
AudioKit.output = envelope
AudioKit.start()
}
func setupFrequency(left: AKOscillator, right: AKOscillator, frequency: String) {
currentFrequency = frequency
leftOscillator = left
rightOscillator = right
let leftPanner = AKPanner(leftOscillator)
leftPanner.pan = -1
let rightPanner = AKPanner(rightOscillator)
rightPanner.pan = 1
//Set up rain and rainPlayer
do {
rain = try AKAudioFile(readFileName: "rain.wav")
rainPlayer = try AKAudioPlayer(file: rain, looping: true, deferBuffering: false, completionHandler: nil)
} catch { print(error) }
let mixer = AKMixer(leftPanner, rightPanner, rainPlayer)
//Put mixer in sound envelope
envelope = AKAmplitudeEnvelope(mixer)
envelope.attackDuration = 2.0
envelope.decayDuration = 0
envelope.sustainLevel = 1
envelope.releaseDuration = 0.2
//Start AudioKit stuff
AudioKit.output = envelope
AudioKit.start()
leftOscillator.start()
rightOscillator.start()
rainPlayer.start()
envelope.start()
soundIsPlaying = true
}
}
And here is an example of one of my sound effect view controllers, which reference the AudioKit singleton to send it a certain frequency (I have about a dozen of these view controllers, each with its own frequency settings):
class CalmView: UIViewController {
let leftOscillator = AKOscillator()
let rightOscillator = AKOscillator()
override func viewDidLoad() {
super.viewDidLoad()
leftOscillator.amplitude = 0.3
leftOscillator.frequency = 220
rightOscillator.amplitude = 0.3
rightOscillator.frequency = 230
}
#IBAction func playSound(_ sender: Any) {
if shared.soundIsPlaying == false {
AudioKit.stop()
shared.setupFrequency(left: leftOscillator, right: rightOscillator, frequency: "Calm")
} else if shared.soundIsPlaying == true && shared.currentFrequency != "Calm" {
AudioKit.stop()
shared.leftOscillator.stop()
shared.rightOscillator.stop()
shared.rainPlayer.stop()
shared.envelope.stop()
shared.setupFrequency(left: leftOscillator, right: rightOscillator, frequency: "Calm")
} else {
shared.soundIsPlaying = false
shared.envelope.stop()
}
}
}
I instantiated the AudioPlayer singleton in my ViewController.swift file.
It depends on when you are doing your configuration in relation to when AudioKit is started. If you're using AudioKit you should be using its AKSettings to manage your session category. Basically not only the playback category but also mixWithOthers. By default, does this:
/// Set the audio session type
#objc open static func setSession(category: SessionCategory,
with options: AVAudioSessionCategoryOptions = [.mixWithOthers]) throws {
So you'd do something like this in your ViewController:
do {
if #available(iOS 10.0, *) {
try AKSettings.setSession(category: .playAndRecord, with: [.defaultToSpeaker, .allowBluetooth, .allowBluetoothA2DP])
} else {
// Fallback on earlier versions
}
} catch {
print("Errored setting category.")
}
So I think its a matter of getting that straight. It might also help to have inter-app audio set up. If you still have trouble and provide more information, I can help more, but this is as good an answer as I can muster based on the info you've given so far.
I am working with webrtc mobile(ios). I can't disable audio in webrtc(ios). I have got no flag to disable audio.By changing in framwork/library it can done easily. My purpose is that I have to disable audio without changing in framework/library. Can anyone help me?.
Update your question with code snippet, how you are creating mediaStrem or tracks(audio/video).
Generally with default Native WebRTC Framework,
RTCMediaStream localStream = [_factory mediaStreamWithStreamId:kARDMediaStreamId];
if(audioRequired) {
RTCAudioTrack *aTrack = [_lmStream createLocalAudioTrack];
[localStream addAudioTrack:aTrack];
}
RTCVideoTrack *vTrack = [_lmStream createLocalVideoTrack];
[localStream addVideoTrack:vTrack];
[_peerConnection addStream:localStream];
If you want to mute the Audio during the call, use below function.
- (void)enableAudio:(NSString *)id isAudioEnabled:(BOOL) isAudioEnabled {
NSLog(#"Auido enabled: %d streams count:%d ", id, isAudioEnabled, _peerConnection.localStreams.count);
if(_peerConnection.localStreams.count > 0) {
RTCMediaStream *lStream = _peerConnection.localStreams[0];
if(lStream.audioTracks.count > 0) { // Usually we will have only one track. If you have more than one, need to traverse all.
// isAudioEnabled == 1 -> Unmute
// isAudioEnabled == 0 -> Mute
[lStream.audioTracks[0] setIsEnabled:isAudioEnabled];
}
}
}
in my case I didnt use streams and directly add audio track to peerconnection.
private func createMediaSenders() {
let streamId = "stream"
// Audio
let audioTrack = self.createAudioTrack()
self.pc.add(audioTrack, streamIds: [streamId])
// Video
/* let videoTrack = self.createVideoTrack()
self.localVideoTrack = videoTrack
self.peerConnection.add(videoTrack, streamIds: [streamId])
self.remoteVideoTrack = self.peerConnection.transceivers.first { $0.mediaType == .video }?.receiver.track as? RTCVideoTrack
// Data
if let dataChannel = createDataChannel() {
dataChannel.delegate = self
self.localDataChannel = dataChannel
}*/
}
private func createAudioTrack() -> RTCAudioTrack {
let audioConstrains = RTCMediaConstraints(mandatoryConstraints: nil, optionalConstraints: nil)
let audioSource = sessionFactory.audioSource(with: audioConstrains)
let audioTrack = sessionFactory.audioTrack(with: audioSource, trackId: "audio0")
return audioTrack
}
to mute and unmute microphone I use this function
public func muteMicrophone(_ mute:Bool){
for sender in pc.senders{
if (sender.track?.kind == "audio") {
sender.track?.isEnabled = mute
}
}
}