I need to write an application that can recognize a cat or a dog and measure the number of breaths per minute.
For this, I would like to take 100 colors of points from the cat area every 100 milliseconds. These points will be processed by Fourier Transform function that measures cat's breaths based on cyclical color changes.
The Vision library can detect cat or dog by the camera snapshot, but I'm afraid that every 100ms the ARAnchor will be placed elsewhere and I won't be able to get the same 100 points from the cat area.
Here is my ViewController:
import SceneKit
import ARKit
import Vision
class ViewController: UIViewController, ARSCNViewDelegate, ARSessionDelegate {
var detectionAvailableUntil: Int64 = .zero
#IBOutlet var sceneView: ARSCNView!
private var viewportSize: CGSize!
override func viewDidLoad() {
super.viewDidLoad()
sceneView.delegate = self
viewportSize = sceneView.frame.size
}
override func viewWillAppear(_ animated: Bool) {
super.viewWillAppear(animated)
resetTracking()
}
override func viewWillDisappear(_ animated: Bool) {
super.viewWillDisappear(animated)
sceneView.session.pause()
}
private func resetTracking() {
let configuration = ARWorldTrackingConfiguration()
configuration.planeDetection = []
sceneView.session.run(configuration)
}
lazy var catRequest: VNRecognizeAnimalsRequest = {
do {
let req = VNRecognizeAnimalsRequest() { [weak self] request, error in
self?.processCatDetections(for: request as! VNRecognizeAnimalsRequest, error: error)
}
return req
} catch {
fatalError("Fatal error")
}
}()
func processCatDetections(for request: VNRecognizeAnimalsRequest, error: Error?) {
guard error == nil else {
print("Object detection error: \(error!.localizedDescription)")
return
}
if let result = request.results?.first as? VNRecognizedObjectObservation {
let cats = result.labels.filter({$0.identifier == "Cat"})
for _ in cats {
guard let currentFrame = self.sceneView.session.currentFrame,
result.confidence > 0.3 else { continue }
let fromCameraImageToViewTransform = currentFrame.displayTransform(for: .portrait, viewportSize: self.viewportSize)
let boundingBox = result.boundingBox
let viewNormalizedBoundingBox = boundingBox.applying(fromCameraImageToViewTransform)
let t = CGAffineTransform(scaleX: self.viewportSize.width, y: self.viewportSize.height)
let viewBoundingBox = viewNormalizedBoundingBox.applying(t)
let midPoint = CGPoint(x: viewBoundingBox.midX, y: viewBoundingBox.midY)
let results = self.sceneView.hitTest(midPoint, types: .featurePoint)
guard let result = results.first else { continue }
let anchor = ARAnchor(name: "catAnchor", transform: result.worldTransform)
self.sceneView.session.add(anchor: anchor)
}
}
}
func renderer(_ renderer: SCNSceneRenderer, willRenderScene scene: SCNScene, atTime time: TimeInterval) {
guard let capturedImage = sceneView.session.currentFrame?.capturedImage else { return }
let requestHandler = VNImageRequestHandler(cvPixelBuffer: capturedImage)
do {
try requestHandler.perform([self.catRequest])
} catch {
print("Error: Vision request failed")
}
}
func renderer(_ renderer: SCNSceneRenderer, didAdd node: SCNNode, for anchor: ARAnchor) {
let date = Int64(NSDate().timeIntervalSince1970 * 1000)
guard anchor.name == "catAnchor",
date > self.detectionAvailableUntil else { return }
print("catAnchor added")
self.detectionAvailableUntil = Int64(NSDate().timeIntervalSince1970 * 1000 + 500)
let sphereNode = SCNNode(geometry: SCNSphere(radius: 0.01))
sphereNode.geometry?.firstMaterial?.diffuse.contents = UIColor.red
sphereNode.simdWorldTransform = anchor.transform
node.addChildNode(sphereNode)
}
}
The problem is the sphereNode is drawing in different places not on the cat's area (belly or head).
Apple provides libraries for tracking human body or face so I suppose that I need something like ARFaceAnchor or ARBodyAnchor but for cat.
If the app is able to track the cat, it will be able to retrieve the same points within the ARAnchor.
Is it possible?
Related
I wrote the code base on Apple's example,
somehow the app will crash if accessing the setter of the custom entity component (the reveal() method)
I tried to initialize the component to see what will happen, but it crashed at anchor.addChild(card) (ViewController line 89) instead.
But it runs without any problem on iPadOS 14 which makes it even weirder.
Does anyone know why it happens and how to fix it?
my code
// CardEntity.swift
// Card-Game
//
// Created by Ian Liu on 2020/7/21.
//
import Foundation
import RealityKit
class CardEntity: Entity, HasModel, HasCollision {
required init() {
super.init()
self.components[ModelComponent] = ModelComponent(
mesh: .generateBox(width: 0.09, height: 0.005, depth: 0.09),
materials: [SimpleMaterial(
color: .orange,
isMetallic: false)
]
)
}
var card: CardComponent {
get { components[CardComponent] ?? CardComponent() }
set { components[CardComponent] = newValue }
}
}
extension CardEntity {
func reveal() {
card.revealed = true
var transform = self.transform
transform.rotation = simd_quatf(angle: 0, axis: [1, 0, 0])
move(to: transform, relativeTo: parent, duration: 0.25, timingFunction: .easeInOut)
}
func hide() {
card.revealed = false
var transform = self.transform
transform.rotation = simd_quatf(angle: .pi, axis: [1, 0, 0])
move(to: transform, relativeTo: parent, duration: 0.25, timingFunction: .easeInOut)
}
}
// CardComponent.swift
// Card-Game
//
// Created by Ian Liu on 2020/7/21.
//
import Foundation
import RealityKit
struct CardComponent: Component, Codable {
var revealed = false
var id = -1
init() {
self.revealed = false
self.id = -1
}
}
//
// ViewController.swift
// Card-Game
//
// Created by 劉學逸 on 7/16/20.
//
import UIKit
import ARKit
import RealityKit
import Combine
class ViewController: UIViewController, ARSessionDelegate {
#IBOutlet var arView: ARView!
let cardCount = 16
var prevCard: CardEntity? = nil
override func viewDidLoad() {
super.viewDidLoad()
arView.center = self.view.center
arView.centerXAnchor.constraint(equalTo: self.view.centerXAnchor).isActive = true
arView.centerYAnchor.constraint(equalTo: self.view.centerYAnchor).isActive = true
arView.session.delegate = self
let config = ARWorldTrackingConfiguration()
if ARWorldTrackingConfiguration.supportsFrameSemantics(.personSegmentation) {
config.frameSemantics.insert(.personSegmentationWithDepth)
}
config.planeDetection = [.horizontal]
arView.session.run(config)
addCoachingOverlay()
let anchor = AnchorEntity(plane: .horizontal, minimumBounds: [0.2, 0.2])
arView.scene.addAnchor(anchor)
CardComponent.registerComponent()
var cards: [CardEntity] = []
var models: [Entity] = []
for index in 1...cardCount {
let cardModel = CardEntity()
cardModel.name = "card_\((index+1)/2)"
cardModel.card.id = (index+1)/2
cards.append(cardModel)
}
var cancellable: Cancellable? = nil
// will build for a long time if load 8 models
cancellable = ModelEntity.loadModelAsync(named: "memory_card_1")
.append(ModelEntity.loadModelAsync(named: "memory_card_2"))
.append(ModelEntity.loadModelAsync(named: "memory_card_3"))
.append(ModelEntity.loadModelAsync(named: "memory_card_4"))
.append(ModelEntity.loadModelAsync(named: "memory_card_5"))
.append(ModelEntity.loadModelAsync(named: "memory_card_6"))
.append(ModelEntity.loadModelAsync(named: "memory_card_7"))
.append(ModelEntity.loadModelAsync(named: "memory_card_8"))
.collect().sink(receiveCompletion: {error in
print("error: \(error)")
cancellable?.cancel()},
receiveValue: { entities in
for (index, entity) in entities.enumerated() {
entity.setScale(SIMD3<Float>(0.0025,0.0025,0.0025), relativeTo: anchor)
entity.position = entity.position + SIMD3<Float>(0, 0.0025, 0)
entity.name = "memory_card_\(index)"
for _ in 1...2 {
models.append(entity.clone(recursive: true))
}
}
for (index, card) in cards.enumerated() {
card.addChild(models[index])
}
cards.shuffle()
for card in cards {
var flipTransform = card.transform
flipTransform.rotation = simd_quatf(angle: .pi, axis: [1, 0, 0])
card.move(to: flipTransform, relativeTo: card.parent)
}
//attach cards to the anchor
for (index, card) in cards.enumerated() {
card.generateCollisionShapes(recursive: true)
let x = Float(index % Int(sqrt(Double(self.cardCount)))) - 1.5
let z = Float(index / Int(sqrt(Double(self.cardCount)))) - 1.5
card.position = [x * 0.1, 0, z * 0.1]
anchor.addChild(card)
}
cancellable?.cancel()
})
// Adding Occlusion Box
// Create box mesh, 0.7 meters on all sides
let boxSize: Float = 0.7
let boxMesh = MeshResource.generateBox(size: boxSize)
// Create Occlusion Material
let material = OcclusionMaterial()
// Create ModelEntity using mesh and materials
let occlusionBox = ModelEntity(mesh: boxMesh, materials: [material])
// Position box with top slightly below game board
occlusionBox.position.y = -boxSize / 2 - 0.001
// Add to anchor
anchor.addChild(occlusionBox)
}
override func viewWillTransition(to size: CGSize, with coordinator: UIViewControllerTransitionCoordinator) {
super.viewWillTransition(to: size, with: coordinator)
arView.center = view.center
centerCoachingView()
}
override func viewWillAppear(_ animated: Bool) {
super.viewWillAppear(animated)
guard let configuration = arView.session.configuration else { return }
arView.session.run(configuration)
}
override func viewWillDisappear(_ animated: Bool) {
super.viewWillDisappear(animated)
arView.session.pause()
}
#IBAction func onTap(_ sender: UITapGestureRecognizer) {
let tapLocation = sender.location(in: arView)
if let entity = arView.entity(at: tapLocation) {
var card = entity
if entity.name.hasPrefix("memory_card") {
card = entity.parent!
}
guard let cardEntity = card as? CardEntity else { return }
print("tapped card id: \(cardEntity.card.id)")
if cardEntity.card.revealed {
return
} else {
cardEntity.reveal()
}
if let lastCard = prevCard {
print("last card id: \(lastCard.card.id)")
if lastCard.card.id != cardEntity.card.id {
DispatchQueue.main.asyncAfter(deadline: .now() + 1) {
cardEntity.hide()
lastCard.hide()
}
}
self.prevCard = nil
} else {
print("last card id: N\\A")
prevCard = cardEntity
}
}
}
}
extension ViewController: ARCoachingOverlayViewDelegate {
func addCoachingOverlay() {
let coachingOverlay = ARCoachingOverlayView()
coachingOverlay.autoresizingMask = [.flexibleHeight, .flexibleWidth]
coachingOverlay.center = view.center
coachingOverlay.delegate = self
coachingOverlay.session = arView.session
coachingOverlay.goal = .horizontalPlane
arView.addSubview(coachingOverlay)
NSLayoutConstraint.activate([
coachingOverlay.centerXAnchor.constraint(equalTo: view.centerXAnchor),
coachingOverlay.centerYAnchor.constraint(equalTo: view.centerYAnchor),
coachingOverlay.widthAnchor.constraint(equalTo: view.widthAnchor),
coachingOverlay.heightAnchor.constraint(equalTo: view.heightAnchor)
])
coachingOverlay.activatesAutomatically = true
}
//TODO: Fix coachingView won't center when first orientation change
func centerCoachingView() {
for subview in arView.subviews {
if let coachingView = subview as? ARCoachingOverlayView {
coachingView.center = self.view.center
}
}
}
}
I am trying to build a document scanner that is able to read text off of any document/card. However, it sometimes has trouble identifying text correctly off of a credit card. The accuracy is decent, but there is definitely room for improvement. I used the VisionTextRecognition framework and have used all the standard settings which are the right ones for setting up text recognition.
This is what I had to setup the text recognition request
textRecognitionRequest = VNRecognizeTextRequest(completionHandler: { (request, error) in
if let results = request.results, !results.isEmpty {
if let requestResults = request.results as? [VNRecognizedTextObservation] {
var foundText = ""
for observation in recognizedText {
guard let candidate = observation.topCandidates(1).first else { continue }
foundText.append(candidate.string + "\n")
}
}
}
})
textRecognitionRequest.recognitionLevel = .accurate
textRecognitionRequest.usesLanguageCorrection = true
Does anyone have any suggestions for improving the identification programmatically by either pre-processing or post-processing the scan at some point?
UPDATE: I've made a fully open source project that may help you do exactly what you need. Check it out: https://github.com/ethanwa/credit-card-scanner-and-validator
**
You can't do much to improve accuracy beyond adding some preset values to specifically look for, which doesn't make sense with CC numbers so I won't even bother showing that code. You'll need to rely on Apple to improve their text recognition model as iOS iterates for it to truly improve.
What I suggest in the meantime are these two things you can do:
Do validation on your credit card numbers that you think you're recieving. For example, Visa starts with 4, MasterCard starts with 5, Discover with 6, Amex with 3, etc. They have specific lengths and so on. See here: https://www.freeformatter.com/credit-card-number-generator-validator.html
Keep iterating over and over on a camera feed until you get a number that validates. I'm not sure if you are currently just taking a picture of the card, and processing that image (which it sounds like you are doing), but you should be processing many images per second until you get a valid CC. This is most likely how Apple does it when adding a card via Apple Pay on your phone, or when depositing checks digitally using banking apps (finding valid routing and account numbers).
Here's an example of what I mean...
I wrote this code that can pick out and validate ISBN numbers (basically 10 and 13 digit numbers that catalog books, which have a check digit for validation) in any given text and will keep looking until it finds all the numbers and then validates. It works extremely well and is very fast. Check out this Swift 5.3 code:
import UIKit
import Vision
import Photos
import AVFoundation
class ViewController: UIViewController, AVCaptureVideoDataOutputSampleBufferDelegate {
var recognizedText = ""
var finalText = ""
var image: UIImage?
var processing = false
#IBOutlet weak var nameLabel: UILabel!
#IBOutlet weak var setLabel: UILabel!
#IBOutlet weak var numberLabel: UILabel!
lazy var textDetectionRequest: VNRecognizeTextRequest = {
let request = VNRecognizeTextRequest(completionHandler: self.handleDetectedText)
request.recognitionLevel = .accurate
request.usesLanguageCorrection = false
return request
}()
private let videoOutput = AVCaptureVideoDataOutput()
private let captureSession = AVCaptureSession()
private lazy var previewLayer: AVCaptureVideoPreviewLayer = {
let preview = AVCaptureVideoPreviewLayer(session: self.captureSession)
preview.videoGravity = .resizeAspect
return preview
}()
// MARK: AV
override func viewDidLoad() {
super.viewDidLoad()
self.addCameraInput()
self.addVideoOutput()
}
private func addCameraInput() {
let device = AVCaptureDevice.default(for: .video)!
let cameraInput = try! AVCaptureDeviceInput(device: device)
self.captureSession.addInput(cameraInput)
}
override func viewDidLayoutSubviews() {
super.viewDidLayoutSubviews()
self.previewLayer.frame = self.view.bounds
}
private func addVideoOutput() {
self.videoOutput.videoSettings = [(kCVPixelBufferPixelFormatTypeKey as NSString) : NSNumber(value: kCVPixelFormatType_32BGRA)] as [String : Any]
self.videoOutput.setSampleBufferDelegate(self, queue: DispatchQueue(label: "my.image.handling.queue"))
self.captureSession.addOutput(self.videoOutput)
}
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection)
{
if !processing
{
guard let frame = CMSampleBufferGetImageBuffer(sampleBuffer) else {
debugPrint("unable to get image from sample buffer")
return
}
print("did receive image frame")
// process image here
self.processing = true
let ciimage : CIImage = CIImage(cvPixelBuffer: frame)
let theimage : UIImage = self.convert(cmage: ciimage)
self.image = theimage
processImage()
}
}
// Convert CIImage to CGImage
func convert(cmage:CIImage) -> UIImage
{
let context:CIContext = CIContext.init(options: nil)
let cgImage:CGImage = context.createCGImage(cmage, from: cmage.extent)!
let image:UIImage = UIImage.init(cgImage: cgImage)
return image
}
// AV
func processImage()
{
DispatchQueue.main.async {
self.nameLabel.text = ""
self.setLabel.text = ""
self.numberLabel.text = ""
}
guard let image = image, let cgImage = image.cgImage else { return }
let requests = [textDetectionRequest]
let imageRequestHandler = VNImageRequestHandler(cgImage: cgImage, orientation: .right, options: [:])
DispatchQueue.global(qos: .userInitiated).async {
do {
try imageRequestHandler.perform(requests)
} catch let error {
print("Error: \(error)")
}
}
}
fileprivate func handleDetectedText(request: VNRequest?, error: Error?)
{
self.finalText = ""
if let error = error {
print(error.localizedDescription)
self.processing = false
return
}
guard let results = request?.results, results.count > 0 else {
print("No text was found.")
self.processing = false
return
}
if let requestResults = request?.results as? [VNRecognizedTextObservation] {
self.recognizedText = ""
for observation in requestResults {
guard let candidiate = observation.topCandidates(1).first else { return }
self.recognizedText += candidiate.string
self.recognizedText += " "
}
var replaced = self.recognizedText.replacingOccurrences(of: "-", with: "")
replaced = String(replaced.filter { !"\n\t\r".contains($0) })
let replacedArr = replaced.components(separatedBy: " ")
for here in replacedArr
{
let final = here.trimmingCharacters(in: CharacterSet.whitespacesAndNewlines)
if (final.count == 10 || final.count == 13) && final.containsISBNnums && Validate.isbn(final) // validate barcode
{
self.finalText += final
print(final)
self.captureSession.stopRunning()
DispatchQueue.main.async {
self.previewLayer.removeFromSuperlayer()
}
break
}
}
DispatchQueue.main.async {
self.numberLabel.text = self.finalText
}
}
self.processing = false
}
// MARK: Buttons
// This is a live camera view that will start a capture session
#IBAction func takePhoto(_ sender: Any) {
self.view.layer.addSublayer(self.previewLayer)
self.captureSession.startRunning()
}
#IBAction func choosePhoto(_ sender: Any) {
presentPhotoPicker(type: .photoLibrary)
}
fileprivate func presentPhotoPicker(type: UIImagePickerController.SourceType) {
let controller = UIImagePickerController()
controller.sourceType = type
controller.delegate = self
present(controller, animated: true, completion: nil)
}
}
extension ViewController: UIImagePickerControllerDelegate, UINavigationControllerDelegate {
func imagePickerControllerDidCancel(_ picker: UIImagePickerController) {
dismiss(animated: true, completion: nil)
}
func imagePickerController(_ picker: UIImagePickerController, didFinishPickingMediaWithInfo info: [UIImagePickerController.InfoKey : Any]) {
dismiss(animated: true, completion: nil)
image = info[.originalImage] as? UIImage
processImage()
}
}
extension String {
var containsISBNnums: Bool {
guard self.count > 0 else { return false }
let nums: Set<Character> = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "X"]
return Set(self).isSubset(of: nums)
}
}
Detect and track faces from the selfie cam feed in real time. I could get that based on source :- https://developer.apple.com/documentation/vision/tracking_the_user_s_face_in_real_time. The following image show that rectangle will be placed in face,
As you can see the red rectangular part of screen, I need to capture only the inside part of rectangle as image and save not the full screen as image. How could I get that?
I have tried with some other source which gives me only the full screen as image but not the rectangular part.
The source code for Live face detection is,
import UIKit
import AVKit
import Vision
class ViewController: UIViewController, AVCaptureVideoDataOutputSampleBufferDelegate {
// Main view for showing camera content.
#IBOutlet weak var previewView: UIView?
// AVCapture variables to hold sequence data
var session: AVCaptureSession?
var previewLayer: AVCaptureVideoPreviewLayer?
var videoDataOutput: AVCaptureVideoDataOutput?
var videoDataOutputQueue: DispatchQueue?
var captureDevice: AVCaptureDevice?
var captureDeviceResolution: CGSize = CGSize()
// Layer UI for drawing Vision results
var rootLayer: CALayer?
var detectionOverlayLayer: CALayer?
var detectedFaceRectangleShapeLayer: CAShapeLayer?
var detectedFaceLandmarksShapeLayer: CAShapeLayer?
// Vision requests
private var detectionRequests: [VNDetectFaceRectanglesRequest]?
private var trackingRequests: [VNTrackObjectRequest]?
lazy var sequenceRequestHandler = VNSequenceRequestHandler()
// MARK: UIViewController overrides
override func viewDidLoad() {
super.viewDidLoad()
self.session = self.setupAVCaptureSession()
self.prepareVisionRequest()
self.session?.startRunning()
}
override func didReceiveMemoryWarning() {
super.didReceiveMemoryWarning()
}
// Ensure that the interface stays locked in Portrait.
override var supportedInterfaceOrientations: UIInterfaceOrientationMask {
return .portrait
}
// Ensure that the interface stays locked in Portrait.
override var preferredInterfaceOrientationForPresentation: UIInterfaceOrientation {
return .portrait
}
// MARK: AVCapture Setup
/// - Tag: CreateCaptureSession
fileprivate func setupAVCaptureSession() -> AVCaptureSession? {
let captureSession = AVCaptureSession()
do {
let inputDevice = try self.configureFrontCamera(for: captureSession)
self.configureVideoDataOutput(for: inputDevice.device, resolution: inputDevice.resolution, captureSession: captureSession)
self.designatePreviewLayer(for: captureSession)
return captureSession
} catch let executionError as NSError {
self.presentError(executionError)
} catch {
self.presentErrorAlert(message: "An unexpected failure has occured")
}
self.teardownAVCapture()
return nil
}
/// - Tag: ConfigureDeviceResolution
fileprivate func highestResolution420Format(for device: AVCaptureDevice) -> (format: AVCaptureDevice.Format, resolution: CGSize)? {
var highestResolutionFormat: AVCaptureDevice.Format? = nil
var highestResolutionDimensions = CMVideoDimensions(width: 0, height: 0)
for format in device.formats {
let deviceFormat = format as AVCaptureDevice.Format
let deviceFormatDescription = deviceFormat.formatDescription
if CMFormatDescriptionGetMediaSubType(deviceFormatDescription) == kCVPixelFormatType_420YpCbCr8BiPlanarFullRange {
let candidateDimensions = CMVideoFormatDescriptionGetDimensions(deviceFormatDescription)
if (highestResolutionFormat == nil) || (candidateDimensions.width > highestResolutionDimensions.width) {
highestResolutionFormat = deviceFormat
highestResolutionDimensions = candidateDimensions
}
}
}
if highestResolutionFormat != nil {
let resolution = CGSize(width: CGFloat(highestResolutionDimensions.width), height: CGFloat(highestResolutionDimensions.height))
return (highestResolutionFormat!, resolution)
}
return nil
}
fileprivate func configureFrontCamera(for captureSession: AVCaptureSession) throws -> (device: AVCaptureDevice, resolution: CGSize) {
let deviceDiscoverySession = AVCaptureDevice.DiscoverySession(deviceTypes: [.builtInWideAngleCamera], mediaType: .video, position: .front)
if let device = deviceDiscoverySession.devices.first {
if let deviceInput = try? AVCaptureDeviceInput(device: device) {
if captureSession.canAddInput(deviceInput) {
captureSession.addInput(deviceInput)
}
if let highestResolution = self.highestResolution420Format(for: device) {
try device.lockForConfiguration()
device.activeFormat = highestResolution.format
device.unlockForConfiguration()
return (device, highestResolution.resolution)
}
}
}
throw NSError(domain: "ViewController", code: 1, userInfo: nil)
}
/// - Tag: CreateSerialDispatchQueue
fileprivate func configureVideoDataOutput(for inputDevice: AVCaptureDevice, resolution: CGSize, captureSession: AVCaptureSession) {
let videoDataOutput = AVCaptureVideoDataOutput()
videoDataOutput.alwaysDiscardsLateVideoFrames = true
// Create a serial dispatch queue used for the sample buffer delegate as well as when a still image is captured.
// A serial dispatch queue must be used to guarantee that video frames will be delivered in order.
let videoDataOutputQueue = DispatchQueue(label: "com.example.apple-samplecode.VisionFaceTrack")
videoDataOutput.setSampleBufferDelegate(self, queue: videoDataOutputQueue)
if captureSession.canAddOutput(videoDataOutput) {
captureSession.addOutput(videoDataOutput)
}
videoDataOutput.connection(with: .video)?.isEnabled = true
if let captureConnection = videoDataOutput.connection(with: AVMediaType.video) {
if captureConnection.isCameraIntrinsicMatrixDeliverySupported {
captureConnection.isCameraIntrinsicMatrixDeliveryEnabled = true
}
}
self.videoDataOutput = videoDataOutput
self.videoDataOutputQueue = videoDataOutputQueue
self.captureDevice = inputDevice
self.captureDeviceResolution = resolution
}
/// - Tag: DesignatePreviewLayer
fileprivate func designatePreviewLayer(for captureSession: AVCaptureSession) {
let videoPreviewLayer = AVCaptureVideoPreviewLayer(session: captureSession)
self.previewLayer = videoPreviewLayer
videoPreviewLayer.name = "CameraPreview"
videoPreviewLayer.backgroundColor = UIColor.black.cgColor
videoPreviewLayer.videoGravity = AVLayerVideoGravity.resizeAspectFill
if let previewRootLayer = self.previewView?.layer {
self.rootLayer = previewRootLayer
previewRootLayer.masksToBounds = true
videoPreviewLayer.frame = previewRootLayer.bounds
previewRootLayer.addSublayer(videoPreviewLayer)
}
}
// Removes infrastructure for AVCapture as part of cleanup.
fileprivate func teardownAVCapture() {
self.videoDataOutput = nil
self.videoDataOutputQueue = nil
if let previewLayer = self.previewLayer {
previewLayer.removeFromSuperlayer()
self.previewLayer = nil
}
}
// MARK: Helper Methods for Error Presentation
fileprivate func presentErrorAlert(withTitle title: String = "Unexpected Failure", message: String) {
let alertController = UIAlertController(title: title, message: message, preferredStyle: .alert)
self.present(alertController, animated: true)
}
fileprivate func presentError(_ error: NSError) {
self.presentErrorAlert(withTitle: "Failed with error \(error.code)", message: error.localizedDescription)
}
// MARK: Helper Methods for Handling Device Orientation & EXIF
fileprivate func radiansForDegrees(_ degrees: CGFloat) -> CGFloat {
return CGFloat(Double(degrees) * Double.pi / 180.0)
}
func exifOrientationForDeviceOrientation(_ deviceOrientation: UIDeviceOrientation) -> CGImagePropertyOrientation {
switch deviceOrientation {
case .portraitUpsideDown:
return .rightMirrored
case .landscapeLeft:
return .downMirrored
case .landscapeRight:
return .upMirrored
default:
return .leftMirrored
}
}
func exifOrientationForCurrentDeviceOrientation() -> CGImagePropertyOrientation {
return exifOrientationForDeviceOrientation(UIDevice.current.orientation)
}
// MARK: Performing Vision Requests
/// - Tag: WriteCompletionHandler
fileprivate func prepareVisionRequest() {
//self.trackingRequests = []
var requests = [VNTrackObjectRequest]()
let faceDetectionRequest = VNDetectFaceRectanglesRequest(completionHandler: { (request, error) in
if error != nil {
print("FaceDetection error: \(String(describing: error)).")
}
guard let faceDetectionRequest = request as? VNDetectFaceRectanglesRequest,
let results = faceDetectionRequest.results as? [VNFaceObservation] else {
return
}
DispatchQueue.main.async {
// Add the observations to the tracking list
for observation in results {
let faceTrackingRequest = VNTrackObjectRequest(detectedObjectObservation: observation)
requests.append(faceTrackingRequest)
}
self.trackingRequests = requests
}
})
// Start with detection. Find face, then track it.
self.detectionRequests = [faceDetectionRequest]
self.sequenceRequestHandler = VNSequenceRequestHandler()
self.setupVisionDrawingLayers()
}
// MARK: Drawing Vision Observations
fileprivate func setupVisionDrawingLayers() {
let captureDeviceResolution = self.captureDeviceResolution
let captureDeviceBounds = CGRect(x: 0,
y: 0,
width: captureDeviceResolution.width,
height: captureDeviceResolution.height)
let captureDeviceBoundsCenterPoint = CGPoint(x: captureDeviceBounds.midX,
y: captureDeviceBounds.midY)
let normalizedCenterPoint = CGPoint(x: 0.5, y: 0.5)
guard let rootLayer = self.rootLayer else {
self.presentErrorAlert(message: "view was not property initialized")
return
}
let overlayLayer = CALayer()
overlayLayer.name = "DetectionOverlay"
overlayLayer.masksToBounds = true
overlayLayer.anchorPoint = normalizedCenterPoint
overlayLayer.bounds = captureDeviceBounds
overlayLayer.position = CGPoint(x: rootLayer.bounds.midX, y: rootLayer.bounds.midY)
let faceRectangleShapeLayer = CAShapeLayer()
faceRectangleShapeLayer.name = "RectangleOutlineLayer"
faceRectangleShapeLayer.bounds = captureDeviceBounds
faceRectangleShapeLayer.anchorPoint = normalizedCenterPoint
faceRectangleShapeLayer.position = captureDeviceBoundsCenterPoint
faceRectangleShapeLayer.fillColor = nil
faceRectangleShapeLayer.strokeColor = UIColor.green.withAlphaComponent(0.7).cgColor
faceRectangleShapeLayer.lineWidth = 5
faceRectangleShapeLayer.shadowOpacity = 0.7
faceRectangleShapeLayer.shadowRadius = 5
let faceLandmarksShapeLayer = CAShapeLayer()
faceLandmarksShapeLayer.name = "FaceLandmarksLayer"
faceLandmarksShapeLayer.bounds = captureDeviceBounds
faceLandmarksShapeLayer.anchorPoint = normalizedCenterPoint
faceLandmarksShapeLayer.position = captureDeviceBoundsCenterPoint
faceLandmarksShapeLayer.fillColor = nil
faceLandmarksShapeLayer.strokeColor = UIColor.yellow.withAlphaComponent(0.7).cgColor
faceLandmarksShapeLayer.lineWidth = 3
faceLandmarksShapeLayer.shadowOpacity = 0.7
faceLandmarksShapeLayer.shadowRadius = 5
overlayLayer.addSublayer(faceRectangleShapeLayer)
faceRectangleShapeLayer.addSublayer(faceLandmarksShapeLayer)
rootLayer.addSublayer(overlayLayer)
self.detectionOverlayLayer = overlayLayer
self.detectedFaceRectangleShapeLayer = faceRectangleShapeLayer
self.detectedFaceLandmarksShapeLayer = faceLandmarksShapeLayer
self.updateLayerGeometry()
}
fileprivate func updateLayerGeometry() {
guard let overlayLayer = self.detectionOverlayLayer,
let rootLayer = self.rootLayer,
let previewLayer = self.previewLayer
else {
return
}
CATransaction.setValue(NSNumber(value: true), forKey: kCATransactionDisableActions)
let videoPreviewRect = previewLayer.layerRectConverted(fromMetadataOutputRect: CGRect(x: 0, y: 0, width: 1, height: 1))
var rotation: CGFloat
var scaleX: CGFloat
var scaleY: CGFloat
// Rotate the layer into screen orientation.
switch UIDevice.current.orientation {
case .portraitUpsideDown:
rotation = 180
scaleX = videoPreviewRect.width / captureDeviceResolution.width
scaleY = videoPreviewRect.height / captureDeviceResolution.height
case .landscapeLeft:
rotation = 90
scaleX = videoPreviewRect.height / captureDeviceResolution.width
scaleY = scaleX
case .landscapeRight:
rotation = -90
scaleX = videoPreviewRect.height / captureDeviceResolution.width
scaleY = scaleX
default:
rotation = 0
scaleX = videoPreviewRect.width / captureDeviceResolution.width
scaleY = videoPreviewRect.height / captureDeviceResolution.height
}
// Scale and mirror the image to ensure upright presentation.
let affineTransform = CGAffineTransform(rotationAngle: radiansForDegrees(rotation))
.scaledBy(x: scaleX, y: -scaleY)
overlayLayer.setAffineTransform(affineTransform)
// Cover entire screen UI.
let rootLayerBounds = rootLayer.bounds
overlayLayer.position = CGPoint(x: rootLayerBounds.midX, y: rootLayerBounds.midY)
}
fileprivate func addPoints(in landmarkRegion: VNFaceLandmarkRegion2D, to path: CGMutablePath, applying affineTransform: CGAffineTransform, closingWhenComplete closePath: Bool) {
let pointCount = landmarkRegion.pointCount
if pointCount > 1 {
let points: [CGPoint] = landmarkRegion.normalizedPoints
path.move(to: points[0], transform: affineTransform)
path.addLines(between: points, transform: affineTransform)
if closePath {
path.addLine(to: points[0], transform: affineTransform)
path.closeSubpath()
}
}
}
fileprivate func addIndicators(to faceRectanglePath: CGMutablePath, faceLandmarksPath: CGMutablePath, for faceObservation: VNFaceObservation) {
let displaySize = self.captureDeviceResolution
let faceBounds = VNImageRectForNormalizedRect(faceObservation.boundingBox, Int(displaySize.width), Int(displaySize.height))
faceRectanglePath.addRect(faceBounds)
if let landmarks = faceObservation.landmarks {
// Landmarks are relative to -- and normalized within --- face bounds
let affineTransform = CGAffineTransform(translationX: faceBounds.origin.x, y: faceBounds.origin.y)
.scaledBy(x: faceBounds.size.width, y: faceBounds.size.height)
// Treat eyebrows and lines as open-ended regions when drawing paths.
let openLandmarkRegions: [VNFaceLandmarkRegion2D?] = [
landmarks.leftEyebrow,
landmarks.rightEyebrow,
landmarks.faceContour,
landmarks.noseCrest,
landmarks.medianLine
]
for openLandmarkRegion in openLandmarkRegions where openLandmarkRegion != nil {
self.addPoints(in: openLandmarkRegion!, to: faceLandmarksPath, applying: affineTransform, closingWhenComplete: false)
}
// Draw eyes, lips, and nose as closed regions.
let closedLandmarkRegions: [VNFaceLandmarkRegion2D?] = [
landmarks.leftEye,
landmarks.rightEye,
landmarks.outerLips,
landmarks.innerLips,
landmarks.nose
]
for closedLandmarkRegion in closedLandmarkRegions where closedLandmarkRegion != nil {
self.addPoints(in: closedLandmarkRegion!, to: faceLandmarksPath, applying: affineTransform, closingWhenComplete: true)
}
}
}
/// - Tag: DrawPaths
fileprivate func drawFaceObservations(_ faceObservations: [VNFaceObservation]) {
guard let faceRectangleShapeLayer = self.detectedFaceRectangleShapeLayer,
let faceLandmarksShapeLayer = self.detectedFaceLandmarksShapeLayer
else {
return
}
CATransaction.begin()
CATransaction.setValue(NSNumber(value: true), forKey: kCATransactionDisableActions)
let faceRectanglePath = CGMutablePath()
let faceLandmarksPath = CGMutablePath()
for faceObservation in faceObservations {
self.addIndicators(to: faceRectanglePath,
faceLandmarksPath: faceLandmarksPath,
for: faceObservation)
}
faceRectangleShapeLayer.path = faceRectanglePath
faceLandmarksShapeLayer.path = faceLandmarksPath
self.updateLayerGeometry()
CATransaction.commit()
}
// MARK: AVCaptureVideoDataOutputSampleBufferDelegate
/// - Tag: PerformRequests
// Handle delegate method callback on receiving a sample buffer.
public func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
var requestHandlerOptions: [VNImageOption: AnyObject] = [:]
let cameraIntrinsicData = CMGetAttachment(sampleBuffer, key: kCMSampleBufferAttachmentKey_CameraIntrinsicMatrix, attachmentModeOut: nil)
if cameraIntrinsicData != nil {
requestHandlerOptions[VNImageOption.cameraIntrinsics] = cameraIntrinsicData
}
guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else {
print("Failed to obtain a CVPixelBuffer for the current output frame.")
return
}
let exifOrientation = self.exifOrientationForCurrentDeviceOrientation()
guard let requests = self.trackingRequests, !requests.isEmpty else {
// No tracking object detected, so perform initial detection
let imageRequestHandler = VNImageRequestHandler(cvPixelBuffer: pixelBuffer,
orientation: exifOrientation,
options: requestHandlerOptions)
do {
guard let detectRequests = self.detectionRequests else {
return
}
try imageRequestHandler.perform(detectRequests)
} catch let error as NSError {
NSLog("Failed to perform FaceRectangleRequest: %#", error)
}
return
}
do {
try self.sequenceRequestHandler.perform(requests,
on: pixelBuffer,
orientation: exifOrientation)
} catch let error as NSError {
NSLog("Failed to perform SequenceRequest: %#", error)
}
// Setup the next round of tracking.
var newTrackingRequests = [VNTrackObjectRequest]()
for trackingRequest in requests {
guard let results = trackingRequest.results else {
return
}
guard let observation = results[0] as? VNDetectedObjectObservation else {
return
}
if !trackingRequest.isLastFrame {
if observation.confidence > 0.3 {
trackingRequest.inputObservation = observation
} else {
trackingRequest.isLastFrame = true
}
newTrackingRequests.append(trackingRequest)
}
}
self.trackingRequests = newTrackingRequests
if newTrackingRequests.isEmpty {
// Nothing to track, so abort.
return
}
// Perform face landmark tracking on detected faces.
var faceLandmarkRequests = [VNDetectFaceLandmarksRequest]()
// Perform landmark detection on tracked faces.
for trackingRequest in newTrackingRequests {
let faceLandmarksRequest = VNDetectFaceLandmarksRequest(completionHandler: { (request, error) in
if error != nil {
print("FaceLandmarks error: \(String(describing: error)).")
}
guard let landmarksRequest = request as? VNDetectFaceLandmarksRequest,
let results = landmarksRequest.results as? [VNFaceObservation] else {
return
}
// Perform all UI updates (drawing) on the main queue, not the background queue on which this handler is being called.
DispatchQueue.main.async {
self.drawFaceObservations(results)
}
})
guard let trackingResults = trackingRequest.results else {
return
}
guard let observation = trackingResults[0] as? VNDetectedObjectObservation else {
return
}
let faceObservation = VNFaceObservation(boundingBox: observation.boundingBox)
faceLandmarksRequest.inputFaceObservations = [faceObservation]
// Continue to track detected facial landmarks.
faceLandmarkRequests.append(faceLandmarksRequest)
let imageRequestHandler = VNImageRequestHandler(cvPixelBuffer: pixelBuffer,
orientation: exifOrientation,
options: requestHandlerOptions)
do {
try imageRequestHandler.perform(faceLandmarkRequests)
} catch let error as NSError {
NSLog("Failed to perform FaceLandmarkRequest: %#", error)
}
}
}
}
If you want to save only that rectangular part of the image by knowing that rectangular frame(CGRect) value, you can crop the original image by passing the required frame(CGRect) value.
let rectFrame = CGRect()
let image = UIImage()
image.cgImage?.cropping(to: rectFrame)
I have implemented an image recognition system, when it recognizes this image, the plane with the material is superimposed.
I want that when I press a button the image is changed.
This is part of my code:
func renderer(_ renderer: SCNSceneRenderer, nodeFor anchor: ARAnchor) -> SCNNode? {
guard let imageAnchor = anchor as? ARImageAnchor else {return nil}
guard let paintingName = imageAnchor.referenceImage.name else {return nil}
guard let painting = paintings[paintingName] else {return nil}
let plane = SCNPlane(width: imageAnchor.referenceImage.physicalSize.width, height: imageAnchor.referenceImage.physicalSize.height)
let firstImageDetail: UIImage = UIImage(named: namePicture)!
if imageAnchor.referenceImage.name == "BalsaDeMedusa" {
plane.firstMaterial?.diffuse.contents = firstImageDetail
}
The button logic:
#IBAction func nextDetail(_ sender: UIButton) {
if i < detailsPicture.count - 1 {
namePicture = detailsPicture[i+1]
i += 1
}
else {
i = 0
namePicture = detailsPicture[i]
}
}
I am working on a IPhone app that uses CoreML and ARKit simultaneously. The CoreML is supposed to recognize a number and the ARKit should detect a vertical plane (aka wall) and add some planes over that same wall with the content displayed on those planes depending on the recognised number.
So, the CoreML is working 100%. Everytime I "change" the number the topPrediction variable updates automatically ( so far so good ). The thing is that my variable in func renderer(_ renderer: SCNSceneRenderer, didAdd node: SCNNode, for anchor: ARAnchor does not update! What I mean is that the first number recognized by the CoreML is correctly sent to the renderer func and it works like a charm but if I turn the camera to another number it stills assumes that it's the first number! As you may see in the code, I even tried making a func getGabNum() -> Int and then calling it in the renderer func ( var num = getGabNum() ) but I continue getting the warning "Variable 'num' was never mutated; consider changing to 'let' constant" which means that something is not right. So guys, here is my code! Hope you can help me and thank you!
struct Room : Decodable {
let id : Int?
let num : Int?
//Adicionar Schedules
var horario = [Schedule]()
}
struct Schedule : Decodable {
let id : Int?
let hora_ini : Date?
let hora_fim : Date?
let descr : String?
private enum CodingKeys: String, CodingKey {
case id
case hora_ini
case hora_fim
case descr
}
}
class ViewController: UIViewController, ARSCNViewDelegate {
#IBOutlet weak var debugLabel: UILabel!
#IBOutlet weak var debugTextView: UITextView!
#IBOutlet weak var sceneView: ARSCNView!
let dispatchQueueML = DispatchQueue(label: "com.hw.dispatchqueueml") // A Serial Queue
var visionRequests = [VNRequest]()
var room: Room?
var room_array: [[Int]] = [[17, 0], [43, 0], [120,0]]
var teste = 0
var num = -1
override func viewDidLoad() {
super.viewDidLoad()
sceneView.delegate = self
sceneView.showsStatistics = true
let scene = SCNScene()
sceneView.scene = scene
configureLighting()
guard let selectedModel = try? VNCoreMLModel(for: SalasMLv6().model) else {
fatalError("Could not load model.")
}
let classificationRequest = VNCoreMLRequest(model: selectedModel, completionHandler: classificationCompleteHandler)
classificationRequest.imageCropAndScaleOption = VNImageCropAndScaleOption.centerCrop // Crop from centre of images and scale to appropriate size.
visionRequests = [classificationRequest]
loopCoreMLUpdate()
}
override func viewWillAppear(_ animated: Bool) {
super.viewWillAppear(animated)
setUpSceneView()
}
func setUpSceneView()
{
let configuration = ARWorldTrackingConfiguration()
configuration.planeDetection = .vertical
sceneView.session.run(configuration)
sceneView.debugOptions = [ARSCNDebugOptions.showFeaturePoints]
}
override func viewWillDisappear(_ animated: Bool) {
super.viewWillDisappear(animated)
sceneView.session.pause()
}
func configureLighting()
{
sceneView.automaticallyUpdatesLighting = true
sceneView.autoenablesDefaultLighting = true
}
override func didReceiveMemoryWarning() {
super.didReceiveMemoryWarning()
}
// MARK: - ARSCNViewDelegate
func renderer(_ renderer: SCNSceneRenderer, updateAtTime time: TimeInterval) {
DispatchQueue.main.async {
// Do any desired updates to SceneKit here.
}
}
func loopCoreMLUpdate() {
dispatchQueueML.async {
self.updateCoreML()
self.loopCoreMLUpdate()
}
}
func updateCoreML() {
// Get Camera Image as RGB
let pixbuff : CVPixelBuffer? = (sceneView.session.currentFrame?.capturedImage)
if pixbuff == nil { return }
let ciImage = CIImage(cvPixelBuffer: pixbuff!)
// Prepare CoreML/Vision Request
let imageRequestHandler = VNImageRequestHandler(ciImage: ciImage, options: [:])
// Run Vision Image Request
do {
try imageRequestHandler.perform(self.visionRequests)
} catch {
print(error)
}
}
func classificationCompleteHandler(request: VNRequest, error: Error?) {
// Catch Errors
if error != nil {
print("Error: " + (error?.localizedDescription)!)
return
}
guard let observations = request.results else {
print("No results")
return
}
// Get Classifications
let classifications = observations[0...2] // top 3 results
.compactMap({ $0 as? VNClassificationObservation })
.map({ "\($0.identifier) \(String(format:" : %.2f", $0.confidence))" })
.joined(separator: "\n")
// Render Classifications
DispatchQueue.main.async {
// Display Debug Text on screen
self.debugTextView.text = "TOP 3 PROBABILITIES: \n" + classifications
// Display Top Symbol
var symbol = "❌"
var gabNum: Int?
let topPrediction = classifications.components(separatedBy: "\n")[0]
let topPredictionName = topPrediction.components(separatedBy: ":")[0].trimmingCharacters(in: .whitespaces)
// Only display a prediction if confidence is above 90%
let topPredictionScore:Float? = Float(topPrediction.components(separatedBy: ":")[1].trimmingCharacters(in: .whitespaces))
if (topPredictionScore != nil && topPredictionScore! > 0.05) {
if (topPredictionName == "120") {
symbol = "1️⃣2️⃣0️⃣"
gabNum = 120
self.teste = gabNum!
}
if (topPredictionName == "43") {
symbol = "4️⃣3️⃣"
gabNum = 43
self.teste = gabNum!
}
if (topPredictionName == "17") {
symbol = "1️⃣7️⃣"
gabNum = 17
self.teste = gabNum!
}
}
if let gn = gabNum {
// get room from REST
let jsonURL = "someURL\(gn)"
guard let url = URL(string: jsonURL) else {
return
}
URLSession.shared.dataTask(with: url) { (data, response, error) in
if error != nil{
print("error)")
return
}
do {
self.room = try JSONDecoder().decode(Room.self, from: data!)
}catch{
print(“Decoder Error”)
}
}.resume()
}
self.debugLabel.text = symbol
}
}
// MARK: - HIDE STATUS BAR
override var prefersStatusBarHidden : Bool { return true }
func getGabNum() -> Int {
return self.teste
}
func renderer(_ renderer: SCNSceneRenderer, didAdd node: SCNNode, for anchor: ARAnchor)
{
guard room != nil else {
print("room == nil")
return
}
guard let planeAnchor = anchor as? ARPlaneAnchor else {
return
}
num = getGabNum()
if( num == room_array[0][0] && room_array[0][1] == 1 ){
return
}else{
if( num == room_array[1][0] && room_array[1][1] == 1 ){
return
}else{
if( num == room_array[2][0] && room_array[2][1] == 1 ){
return
}else{
var i = 0
for horario in (self.room?.horario)!{
// Planes and Nodes Stuff Right Here
}
switch self.room?.num{
case 17: room_array[0][1] = 1
case 43: room_array[1][1] = 1
case 120:room_array[2][1] = 1
}
}
}
}
}
}