I am actually experimenting with the Vision Framework.
I have simply an UIImageView in my Storyboard and my class is from type UIViewController.
But when I try to override viewDidAppear(_ animated: Bool) I get the error message: Method does not override any method from its superclass
Do anyone know what the issue is? Couldn't find anything that works for me...
So after looking at the code, you are declaring a new class called UIViewController. Maybe you meant extension instead of class.
That's the reason the compiler could not find the method because you overwrote it by creating that new class called UIViewController.
replace
class UIViewController {
with
extension UIViewController {
This is my complete code:
import UIKit
import Vision
class ViewController: UIViewController {
#IBOutlet weak var imageView: UIImageView!
var imageOrientation = CGImagePropertyOrientation(.up)
override func viewDidAppear(_ animated: Bool) {
super.viewDidAppear(animated)
if let image = UIImage(named: "group") {
imageView.image = image
imageView.contentMode = .scaleAspectFit
imageOrientation = CGImagePropertyOrientation(image.imageOrientation)
guard let cgImage = image.cgImage else {return}
setupVision(image: cgImage)
}
}
private func setupVision (image: CGImage) {
let faceDetectionRequest = VNDetectFaceRectanglesRequest(completionHandler: self.handelFaceDetectionRequest)
let imageRequestHandler = VNImageRequestHandler(cgImage: image, orientation: imageOrientation, options: [:])
do {
try imageRequestHandler.perform([faceDetectionRequest])
}catch let error as NSError {
print(error)
return
}
}
private func handelFaceDetectionRequest (request: VNRequest?, error: Error?) {
if let requestError = error as NSError? {
print(requestError)
return
}
guard let image = imageView.image else {return}
guard let cgImage = image.cgImage else {return}
let imageRect = self.determineScale(cgImage: cgImage, imageViewFrame: imageView.frame)
self.imageView.layer.sublayers = nil
if let results = request?.results as? [VNFaceObservation] {
for observation in results {
let faceRect = convertUnitToPoint(originalImageRect: imageRect, targetRect: observation.boundingBox)
let emojiRect = CGRect(x: faceRect.origin.x, y: faceRect.origin.y - 5, width: faceRect.size.width + 5, height: faceRect.size.height + 5)
let textLayer = CATextLayer()
textLayer.string = "π¦ΈββοΈ"
textLayer.fontSize = faceRect.width
textLayer.frame = emojiRect
textLayer.contentsScale = UIScreen.main.scale
self.imageView.layer.addSublayer(textLayer)
}
}
}
}
and:
import UIKit
class UIViewController {
public func convertUnitToPoint (originalImageRect: CGRect, targetRect: CGRect) -> CGRect {
var pointRect = targetRect
pointRect.origin.x = originalImageRect.origin.x + (targetRect.origin.x * originalImageRect.size.width)
pointRect.origin.y = originalImageRect.origin.y + (1 - targetRect.origin.y - targetRect.height)
pointRect.size.width *= originalImageRect.size.width
pointRect.size.height *= originalImageRect.size.height
return pointRect
}
public func determineScale (cgImage: CGImage, imageViewFrame: CGRect) -> CGRect {
let originalWidth = CGFloat(cgImage.width)
let originalHeigth = CGFloat(cgImage.height)
let imageFrame = imageViewFrame
let widthRatio = originalWidth / imageFrame.width
let heigthRatio = originalHeigth / imageFrame.height
let scaleRatio = max(widthRatio, heigthRatio)
let scaledImageWidth = originalWidth / scaleRatio
let scaledImageHeigth = originalHeigth / scaleRatio
let scaledImageX = (imageFrame.width - scaledImageWidth) / 2
let scaledImageY = (imageFrame.height - scaledImageHeigth) / 2
return CGRect(x: scaledImageX, y: scaledImageY, width: scaledImageWidth, height: scaledImageHeigth)
}
}
extension CGImagePropertyOrientation {
init(_ orientation: UIImage.Orientation) {
switch orientation {
case .up: self = .up
case .upMirrored: self = .upMirrored
case .down: self = .down
case .downMirrored: self = .downMirrored
case .right: self = .right
case .rightMirrored: self = .rightMirrored
default: self = .up
}
}
}
The first code snipped is from the ViewController file
Related
I have an application where a button is pressed, the player is paused and then I want to extract the image that is shown on screen. That is to do some processing and display that result.
This was my first attempt: Extracting bitmap from AVPlayer is very uncertain
There is some problem there, I can't get the correct image back at the moment. So I thought maybe a different approach would be to create a bitmap using the view that contain the video.
import Foundation
import UIKit
import AVKit
import TensorFlowLite
class VideoPlayerController : UIViewController {
#IBOutlet weak var videoPlayerView: UIView!
#IBOutlet weak var playButton: UIButton!
#IBOutlet weak var forwardButton: UIButton!
#IBOutlet weak var rewindButton: UIButton!
#IBOutlet weak var playSlowButton: UIButton!
var playbackSlider: UISlider?
var videoDuration: CMTime?
var videoDurationSeconds: Float64?
var movieName: String?
var player: AVPlayer?
var isPlaying = false
var lines = [UIView]()
var playerViewController: AVPlayerViewController?
let delta = 0.02
let slowRate: Float = 0.03
var currentRate: Float = 1.0
var times = [NSValue]()
var timeObserverToken: Any?
func setTimeObserverToken() {
timeObserverToken = player!.addBoundaryTimeObserver(forTimes: times, queue: .main) {
let time = CMTimeGetSeconds(self.player!.currentTime())
self.updateSlider(time: Float(time))
}
}
func addBoundaryTimeObserver() {
// Divide the asset's duration into quarters.
let interval = CMTimeMultiplyByFloat64(videoDuration!, multiplier: 0.0001)
var currentTime = CMTime.zero
// Calculate boundary times
while currentTime < videoDuration! {
currentTime = currentTime + interval
times.append(NSValue(time:currentTime))
}
setTimeObserverToken()
}
func updateSlider(time: Float) {
playbackSlider?.setValue(time, animated: true)
}
func removeBoundaryTimeObserver() {
if let timeObserverToken = timeObserverToken {
player!.removeTimeObserver(timeObserverToken)
self.timeObserverToken = nil
}
}
func setupPlayer(movieName: String) {
let movieUrl = fileURL(for: movieName)
player = AVPlayer(url: movieUrl)
let playerFrame = CGRect(x: 0, y: 0, width: videoPlayerView.frame.width, height: videoPlayerView.frame.height)
playerViewController = AVPlayerViewController()
playerViewController!.player = player
playerViewController?.videoGravity = .resizeAspectFill
playerViewController!.view.frame = playerFrame
playerViewController!.showsPlaybackControls = false
addChild(playerViewController!)
videoPlayerView.addSubview(playerViewController!.view)
playerViewController!.didMove(toParent: self)
}
func setupPlaybackSlider() {
let sliderWidth = 300
let sliderHeight = 20
let sliderX = Int((self.view.frame.width - CGFloat(sliderWidth)) / 2.0)
let sliderY = Int(self.view.frame.height - 2.5 * self.playButton.frame.height)
playbackSlider = UISlider(frame:CGRect(x:sliderX, y: sliderY, width:sliderWidth, height:sliderHeight))
playbackSlider!.minimumValue = 0
videoDuration = (player!.currentItem?.asset.duration)!
videoDurationSeconds = CMTimeGetSeconds(videoDuration!)
playbackSlider!.maximumValue = Float(videoDurationSeconds!)
playbackSlider!.isContinuous = true
playbackSlider!.tintColor = UIColor.green
playbackSlider!.addTarget(self, action: #selector(self.playbackSliderValueChanged(_:)), for: .valueChanged)
self.view.addSubview(playbackSlider!)
addBoundaryTimeObserver()
}
override func viewDidLoad() {
super.viewDidLoad()
if let movieName = movieName {
setupPlayer(movieName: movieName)
setupPlaybackSlider();
}
let rewindText = "- \(delta) s"
let forwardText = "+ \(delta) s"
rewindButton.setTitle(rewindText, for: .normal)
forwardButton.setTitle(forwardText, for: .normal)
}
#objc func playbackSliderValueChanged(_ playbackSlider:UISlider)
{
if (isPlaying) {
pausePlayer()
}
let seconds : Int64 = Int64(playbackSlider.value)
let targetTime:CMTime = CMTimeMake(value: seconds, timescale: 1)
player!.seek(to: targetTime)
}
func getNewLineViews(lines: [LineSegment], color: UIColor) -> [UIView] {
let widthScale = videoPlayerView.frame.width / CGFloat(inputWidth)
let heightScale = videoPlayerView.frame.height / CGFloat(inputHeight)
var lineViews = [UIView]()
for line in lines {
let view = UIView()
let path = UIBezierPath()
let start = CGPoint(x: Double(widthScale) * line.start.x,
y: Double(heightScale) * line.start.y)
let end = CGPoint(x: Double(widthScale) * line.end.x,
y: Double(heightScale) * line.end.y)
path.move(to: start)
path.addLine(to: end)
let shapeLayer = CAShapeLayer()
shapeLayer.path = path.cgPath
shapeLayer.strokeColor = color.cgColor
shapeLayer.lineWidth = 2.0
view.layer.addSublayer(shapeLayer)
lineViews.append(view)
}
return lineViews
}
func getLines(lines: [LineSegment]) {
let widthScale = videoPlayerView.frame.width / CGFloat(inputWidth)
let heightScale = videoPlayerView.frame.height / CGFloat(inputHeight)
for line in lines {
let view = UIView()
let path = UIBezierPath()
let start = CGPoint(x: Double(widthScale) * line.start.x,
y: Double(heightScale) * line.start.y)
let end = CGPoint(x: Double(widthScale) * line.end.x,
y: Double(heightScale) * line.end.y)
path.move(to: start)
path.addLine(to: end)
let shapeLayer = CAShapeLayer()
shapeLayer.path = path.cgPath
shapeLayer.strokeColor = UIColor.green.cgColor
shapeLayer.lineWidth = 2.0
view.layer.addSublayer(shapeLayer)
self.lines.append(view)
}
}
func getValidRectangles(rectangles: [LineSegment?]) -> [LineSegment] {
var result = [LineSegment]()
for rectangle in rectangles {
if let rectangle = rectangle {
result.append(rectangle)
}
}
return result
}
func drawNewResults(result: Result) {
for view in self.lines {
view.removeFromSuperview()
}
let lines = getValidRectangles(rectangles: result.inferences)
self.lines = getNewLineViews(lines: lines, color: UIColor.red)
for lineView in self.lines {
videoPlayerView!.addSubview(lineView)
}
}
func pausePlayer() {
player?.pause()
playButton.setTitle("Play", for: .normal)
isPlaying = false
}
func startPlayer() {
setTimeObserverToken()
player!.play()
player!.rate = currentRate
playButton.setTitle("Pause", for: .normal)
isPlaying = true
}
#IBAction func playVideo(_ sender: Any) {
if (!isPlaying) {
startPlayer()
} else {
pausePlayer()
}
let currentTime = player!.currentTime()
updateSlider(time: Float(CMTimeGetSeconds(currentTime)))
}
#IBAction func playSlow(_ sender: Any) {
if (currentRate > 0.99) {
currentRate = slowRate
playSlowButton.setTitle("Normal", for: .normal)
} else {
currentRate = 1.0
playSlowButton.setTitle("Slow", for: .normal)
}
if (isPlaying) {
player!.rate = currentRate
}
}
#IBAction func rewindPlayer(_ sender: Any) {
if (isPlaying) {
pausePlayer()
}
let currentTime = player!.currentTime()
let zeroTime = CMTime(seconds: 0, preferredTimescale: currentTime.timescale)
let deltaTime = CMTime(seconds: delta, preferredTimescale: currentTime.timescale)
let seekTime = max(zeroTime, (currentTime - deltaTime))
player!.seek(to: seekTime, toleranceBefore: deltaTime, toleranceAfter: zeroTime)
updateSlider(time: Float(CMTimeGetSeconds(seekTime)))
}
#IBAction func forwardPlayer(_ sender: Any) {
if (isPlaying) {
pausePlayer()
}
let currentTime = player!.currentTime()
let endTime = player!.currentItem?.duration
let deltaTime = CMTime(seconds: delta, preferredTimescale: currentTime.timescale)
let seekTime = min(endTime!, (currentTime + deltaTime))
let zeroTime = CMTime(seconds: 0, preferredTimescale: currentTime.timescale)
player!.seek(to: seekTime, toleranceBefore: zeroTime, toleranceAfter: deltaTime)
updateSlider(time: Float(CMTimeGetSeconds(seekTime)))
}
override func viewWillDisappear(_ animated: Bool) {
removeBoundaryTimeObserver()
}
#IBAction func analyzeImage(_ sender: Any) {
pausePlayer()
let time = player!.currentTime()
imageFromVideo(url: fileURL(for: movieName!), at: time.seconds) { image in
let result = self.detectLines(image: image!)
if let result = result {
// Display results by handing off to the InferenceViewController.
DispatchQueue.main.async {
self.drawNewResults(result: result)
}
}
}
}
func detectLines(image: UIImage) -> Result?{
let newImage = videoPlayerView!.asImage()
let outputTensor: Tensor
guard let rgbData = newImage.scaledData(
with: CGSize(width: inputWidth, height: inputHeight),
byteCount: inputWidth * inputHeight * inputChannels
* batchSize,
isQuantized: false
)
else {
print("Failed to convert the image buffer to RGB data.")
return nil
}
do {
let interpreter = getInterpreter()!
try interpreter.copy(rgbData, toInputAt: 0)
try interpreter.invoke()
outputTensor = try interpreter.output(at: 0)
} catch {
print("Failed to invoke the interpreter with error: \(error.localizedDescription)")
return nil
}
let outputArray = outputTensor.data.toArray(type: Float32.self)
return extractLinesFromPoints(output: outputArray, outputShape: outputTensor.shape.dimensions)
}
func getInterpreter() -> Interpreter? {
var interpreter: Interpreter
let modelFilename = MobileNet.modelInfo.name
guard let modelPath = Bundle.main.path(
forResource: modelFilename,
ofType: MobileNet.modelInfo.extension
) else {
print("Failed to load the model with name: \(modelFilename).")
return nil
}
let options = Interpreter.Options()
do {
interpreter = try Interpreter(modelPath: modelPath, options: options)
try interpreter.allocateTensors()
} catch {
print("Failed to create the interpreter with error: \(error.localizedDescription)")
return nil
}
return interpreter
}
}
extension UIView {
// Using a function since `var image` might conflict with an existing variable
// (like on `UIImageView`)
func asImage() -> UIImage {
if #available(iOS 10.0, *) {
let renderer = UIGraphicsImageRenderer(bounds: bounds)
return renderer.image { rendererContext in
layer.render(in: rendererContext.cgContext)
}
} else {
UIGraphicsBeginImageContext(self.frame.size)
self.layer.render(in:UIGraphicsGetCurrentContext()!)
let image = UIGraphicsGetImageFromCurrentImageContext()
UIGraphicsEndImageContext()
return UIImage(cgImage: image!.cgImage!)
}
}
}
I have tried this and some other things, but all I get is a black image.
I wrote the code base on Apple's example,
somehow the app will crash if accessing the setter of the custom entity component (the reveal() method)
I tried to initialize the component to see what will happen, but it crashed at anchor.addChild(card) (ViewController line 89) instead.
But it runs without any problem on iPadOS 14 which makes it even weirder.
Does anyone know why it happens and how to fix it?
my code
// CardEntity.swift
// Card-Game
//
// Created by Ian Liu on 2020/7/21.
//
import Foundation
import RealityKit
class CardEntity: Entity, HasModel, HasCollision {
required init() {
super.init()
self.components[ModelComponent] = ModelComponent(
mesh: .generateBox(width: 0.09, height: 0.005, depth: 0.09),
materials: [SimpleMaterial(
color: .orange,
isMetallic: false)
]
)
}
var card: CardComponent {
get { components[CardComponent] ?? CardComponent() }
set { components[CardComponent] = newValue }
}
}
extension CardEntity {
func reveal() {
card.revealed = true
var transform = self.transform
transform.rotation = simd_quatf(angle: 0, axis: [1, 0, 0])
move(to: transform, relativeTo: parent, duration: 0.25, timingFunction: .easeInOut)
}
func hide() {
card.revealed = false
var transform = self.transform
transform.rotation = simd_quatf(angle: .pi, axis: [1, 0, 0])
move(to: transform, relativeTo: parent, duration: 0.25, timingFunction: .easeInOut)
}
}
// CardComponent.swift
// Card-Game
//
// Created by Ian Liu on 2020/7/21.
//
import Foundation
import RealityKit
struct CardComponent: Component, Codable {
var revealed = false
var id = -1
init() {
self.revealed = false
self.id = -1
}
}
//
// ViewController.swift
// Card-Game
//
// Created by εεΈιΈ on 7/16/20.
//
import UIKit
import ARKit
import RealityKit
import Combine
class ViewController: UIViewController, ARSessionDelegate {
#IBOutlet var arView: ARView!
let cardCount = 16
var prevCard: CardEntity? = nil
override func viewDidLoad() {
super.viewDidLoad()
arView.center = self.view.center
arView.centerXAnchor.constraint(equalTo: self.view.centerXAnchor).isActive = true
arView.centerYAnchor.constraint(equalTo: self.view.centerYAnchor).isActive = true
arView.session.delegate = self
let config = ARWorldTrackingConfiguration()
if ARWorldTrackingConfiguration.supportsFrameSemantics(.personSegmentation) {
config.frameSemantics.insert(.personSegmentationWithDepth)
}
config.planeDetection = [.horizontal]
arView.session.run(config)
addCoachingOverlay()
let anchor = AnchorEntity(plane: .horizontal, minimumBounds: [0.2, 0.2])
arView.scene.addAnchor(anchor)
CardComponent.registerComponent()
var cards: [CardEntity] = []
var models: [Entity] = []
for index in 1...cardCount {
let cardModel = CardEntity()
cardModel.name = "card_\((index+1)/2)"
cardModel.card.id = (index+1)/2
cards.append(cardModel)
}
var cancellable: Cancellable? = nil
// will build for a long time if load 8 models
cancellable = ModelEntity.loadModelAsync(named: "memory_card_1")
.append(ModelEntity.loadModelAsync(named: "memory_card_2"))
.append(ModelEntity.loadModelAsync(named: "memory_card_3"))
.append(ModelEntity.loadModelAsync(named: "memory_card_4"))
.append(ModelEntity.loadModelAsync(named: "memory_card_5"))
.append(ModelEntity.loadModelAsync(named: "memory_card_6"))
.append(ModelEntity.loadModelAsync(named: "memory_card_7"))
.append(ModelEntity.loadModelAsync(named: "memory_card_8"))
.collect().sink(receiveCompletion: {error in
print("error: \(error)")
cancellable?.cancel()},
receiveValue: { entities in
for (index, entity) in entities.enumerated() {
entity.setScale(SIMD3<Float>(0.0025,0.0025,0.0025), relativeTo: anchor)
entity.position = entity.position + SIMD3<Float>(0, 0.0025, 0)
entity.name = "memory_card_\(index)"
for _ in 1...2 {
models.append(entity.clone(recursive: true))
}
}
for (index, card) in cards.enumerated() {
card.addChild(models[index])
}
cards.shuffle()
for card in cards {
var flipTransform = card.transform
flipTransform.rotation = simd_quatf(angle: .pi, axis: [1, 0, 0])
card.move(to: flipTransform, relativeTo: card.parent)
}
//attach cards to the anchor
for (index, card) in cards.enumerated() {
card.generateCollisionShapes(recursive: true)
let x = Float(index % Int(sqrt(Double(self.cardCount)))) - 1.5
let z = Float(index / Int(sqrt(Double(self.cardCount)))) - 1.5
card.position = [x * 0.1, 0, z * 0.1]
anchor.addChild(card)
}
cancellable?.cancel()
})
// Adding Occlusion Box
// Create box mesh, 0.7 meters on all sides
let boxSize: Float = 0.7
let boxMesh = MeshResource.generateBox(size: boxSize)
// Create Occlusion Material
let material = OcclusionMaterial()
// Create ModelEntity using mesh and materials
let occlusionBox = ModelEntity(mesh: boxMesh, materials: [material])
// Position box with top slightly below game board
occlusionBox.position.y = -boxSize / 2 - 0.001
// Add to anchor
anchor.addChild(occlusionBox)
}
override func viewWillTransition(to size: CGSize, with coordinator: UIViewControllerTransitionCoordinator) {
super.viewWillTransition(to: size, with: coordinator)
arView.center = view.center
centerCoachingView()
}
override func viewWillAppear(_ animated: Bool) {
super.viewWillAppear(animated)
guard let configuration = arView.session.configuration else { return }
arView.session.run(configuration)
}
override func viewWillDisappear(_ animated: Bool) {
super.viewWillDisappear(animated)
arView.session.pause()
}
#IBAction func onTap(_ sender: UITapGestureRecognizer) {
let tapLocation = sender.location(in: arView)
if let entity = arView.entity(at: tapLocation) {
var card = entity
if entity.name.hasPrefix("memory_card") {
card = entity.parent!
}
guard let cardEntity = card as? CardEntity else { return }
print("tapped card id: \(cardEntity.card.id)")
if cardEntity.card.revealed {
return
} else {
cardEntity.reveal()
}
if let lastCard = prevCard {
print("last card id: \(lastCard.card.id)")
if lastCard.card.id != cardEntity.card.id {
DispatchQueue.main.asyncAfter(deadline: .now() + 1) {
cardEntity.hide()
lastCard.hide()
}
}
self.prevCard = nil
} else {
print("last card id: N\\A")
prevCard = cardEntity
}
}
}
}
extension ViewController: ARCoachingOverlayViewDelegate {
func addCoachingOverlay() {
let coachingOverlay = ARCoachingOverlayView()
coachingOverlay.autoresizingMask = [.flexibleHeight, .flexibleWidth]
coachingOverlay.center = view.center
coachingOverlay.delegate = self
coachingOverlay.session = arView.session
coachingOverlay.goal = .horizontalPlane
arView.addSubview(coachingOverlay)
NSLayoutConstraint.activate([
coachingOverlay.centerXAnchor.constraint(equalTo: view.centerXAnchor),
coachingOverlay.centerYAnchor.constraint(equalTo: view.centerYAnchor),
coachingOverlay.widthAnchor.constraint(equalTo: view.widthAnchor),
coachingOverlay.heightAnchor.constraint(equalTo: view.heightAnchor)
])
coachingOverlay.activatesAutomatically = true
}
//TODO: Fix coachingView won't center when first orientation change
func centerCoachingView() {
for subview in arView.subviews {
if let coachingView = subview as? ARCoachingOverlayView {
coachingView.center = self.view.center
}
}
}
}
I'm a hobby developer and I'm making an app for my job, I need to crop a detected object (a product label) and send it to another view (to apply later an ocr). Now I have downloaded an example of apple that displays a yellow rounded rectangle, I replace the model with a one I made and successfully recognize the image (paper label). Now I want to display it in a swiftui view. This is my code
Swift UI uiviewcontrollerrepresentable struct
struct CameraCont:UIViewControllerRepresentable {
#Binding var croppedImage:UIImage?
func makeCoordinator() -> Coordinator {
return Coordinator(croppedImage: $croppedImage)
}
class Coordinator:NSObject, SendImageToSwiftUIDelegate {
#Binding var croppedImage:UIImage!
init(croppedImage:Binding<UIImage?>) {
_croppedImage = croppedImage
}
func sendImage(image: UIImage) {
croppedImage = image
print("Delegate called")
}
}
typealias UIViewControllerType = VisionObjectRecognitionViewController
func makeUIViewController(context: Context) -> VisionObjectRecognitionViewController {
let vision = VisionObjectRecognitionViewController()
vision.sendImageDelegate = context.coordinator
return vision
}
func updateUIViewController(_ uiViewController: VisionObjectRecognitionViewController, context: Context) {
}
}
Now I have created a protocol to communicate with the uiviewrepresentable and it's delegate gets called successfully. This is my viewcontroller subclass.
protocol SendImageToSwiftUIDelegate {
func sendImage(image:UIImage)
}
class VisionObjectRecognitionViewController: ViewController {
var sendImageDelegate:SendImageToSwiftUIDelegate!
private var detectionOverlay: CALayer! = nil
private var observationWidthBiggherThan180 = false
private var rectToCrop = CGRect()
// Vision parts
private var requests = [VNRequest]()
#discardableResult
func setupVision() -> NSError? {
// Setup Vision parts
let error: NSError! = nil
guard let modelURL = Bundle.main.url(forResource: "exampleModelFP16", withExtension: "mlmodelc") else {
return NSError(domain: "VisionObjectRecognitionViewController", code: -1, userInfo: [NSLocalizedDescriptionKey: "Model file is missing"])
}
do {
let visionModel = try VNCoreMLModel(for: MLModel(contentsOf: modelURL))
let objectRecognition = VNCoreMLRequest(model: visionModel, completionHandler: { (request, error) in
DispatchQueue.main.async(execute: {
// perform all the UI updates on the main queue
if let results = request.results {
self.drawVisionRequestResults(results)
}
})
})
self.requests = [objectRecognition]
} catch let error as NSError {
print("Model loading went wrong: \(error)")
}
return error
}
func drawVisionRequestResults(_ results: [Any]) {
CATransaction.begin()
CATransaction.setValue(kCFBooleanTrue, forKey: kCATransactionDisableActions)
detectionOverlay.sublayers = nil // remove all the old recognized objects
for observation in results where observation is VNRecognizedObjectObservation {
guard let objectObservation = observation as? VNRecognizedObjectObservation else {
continue
}
// Select only the label with the highest confidence.
let topLabelObservation = objectObservation.labels[0]
let objectBounds = VNImageRectForNormalizedRect(objectObservation.boundingBox, Int(bufferSize.width), Int(bufferSize.height))
let shapeLayer = self.createRoundedRectLayerWithBounds(objectBounds)
let textLayer = self.createTextSubLayerInBounds(objectBounds,
identifier: topLabelObservation.identifier,
confidence: topLabelObservation.confidence)
shapeLayer.addSublayer(textLayer)
detectionOverlay.addSublayer(shapeLayer)
if shapeLayer.bounds.size.width > 180 {
// perform crop and apply perspective filter
self.observationWidthBiggherThan180 = true
self.rectToCrop = shapeLayer.bounds
} else {
self.observationWidthBiggherThan180 = false
}
}
self.updateLayerGeometry()
CATransaction.commit()
}
// MARK: - Function to capure the output of the camera and perform the image recognition
override func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else {
return
}
if observationWidthBiggherThan180 {
let imageToCrop = CIImage(cvPixelBuffer: pixelBuffer)
let perspectiveTransform = CIFilter(name: "CIPerspectiveTransform")
sendImageDelegate.sendImage(image: UIImage(ciImage: imageToCrop))
print(imageToCrop)
}
let exifOrientation = exifOrientationFromDeviceOrientation()
let imageRequestHandler = VNImageRequestHandler(cvPixelBuffer: pixelBuffer, orientation: exifOrientation, options: [:])
do {
try imageRequestHandler.perform(self.requests)
} catch {
print(error)
}
}
override func setupAVCapture() {
super.setupAVCapture()
// setup Vision parts
setupLayers()
updateLayerGeometry()
setupVision()
// start the capture
startCaptureSession()
}
func setupLayers() {
detectionOverlay = CALayer() // container layer that has all the renderings of the observations
detectionOverlay.name = "DetectionOverlay"
detectionOverlay.bounds = CGRect(x: 0.0,
y: 0.0,
width: bufferSize.width,
height: bufferSize.height)
detectionOverlay.position = CGPoint(x: rootLayer.bounds.midX, y: rootLayer.bounds.midY)
rootLayer.addSublayer(detectionOverlay)
}
func updateLayerGeometry() {
let bounds = rootLayer.bounds
var scale: CGFloat
let xScale: CGFloat = bounds.size.width / bufferSize.height
let yScale: CGFloat = bounds.size.height / bufferSize.width
scale = fmax(xScale, yScale)
if scale.isInfinite {
scale = 1.0
}
CATransaction.begin()
CATransaction.setValue(kCFBooleanTrue, forKey: kCATransactionDisableActions)
// rotate the layer into screen orientation and scale and mirror
detectionOverlay.setAffineTransform(CGAffineTransform(rotationAngle: CGFloat(.pi / 2.0)).scaledBy(x: scale, y: -scale))
// center the layer
detectionOverlay.position = CGPoint(x: bounds.midX, y: bounds.midY)
CATransaction.commit()
}
func createTextSubLayerInBounds(_ bounds: CGRect, identifier: String, confidence: VNConfidence) -> CATextLayer {
let textLayer = CATextLayer()
textLayer.name = "Object Label"
let formattedString = NSMutableAttributedString(string: String(format: "\(identifier)\nConfidence: %.2f", confidence))
let largeFont = UIFont(name: "Helvetica", size: 24.0)!
formattedString.addAttributes([NSAttributedString.Key.font: largeFont], range: NSRange(location: 0, length: identifier.count))
textLayer.string = formattedString
textLayer.bounds = CGRect(x: 0, y: 0, width: bounds.size.height - 10, height: bounds.size.width - 10)
textLayer.position = CGPoint(x: bounds.midX, y: bounds.midY)
textLayer.shadowOpacity = 0.7
textLayer.shadowOffset = CGSize(width: 2, height: 2)
textLayer.foregroundColor = CGColor(colorSpace: CGColorSpaceCreateDeviceRGB(), components: [0.0, 0.0, 0.0, 1.0])
textLayer.contentsScale = 2.0 // retina rendering
// rotate the layer into screen orientation and scale and mirror
textLayer.setAffineTransform(CGAffineTransform(rotationAngle: CGFloat(.pi / 2.0)).scaledBy(x: 1.0, y: -1.0))
return textLayer
}
func createRoundedRectLayerWithBounds(_ bounds: CGRect) -> CALayer {
let shapeLayer = CALayer()
shapeLayer.bounds = bounds
shapeLayer.position = CGPoint(x: bounds.midX, y: bounds.midY)
shapeLayer.name = "Found Object"
shapeLayer.backgroundColor = CGColor(colorSpace: CGColorSpaceCreateDeviceRGB(), components: [1.0, 1.0, 0.2, 0.4])
shapeLayer.cornerRadius = 7
return shapeLayer
}
}
If the view is bigger than 180 px wide (phone close to the label). The problem is that I cannot display anything I crop of eve get from the pixel buffer. When I close my displaying view a white space is displayed and not show any image.
struct ContentView: View {
#State private var image:Image?
#State private var showingCamera = false
#State private var inputImage:UIImage?
var body: some View {
VStack {
image?
.resizable()
.scaledToFit()
Button("Show camera") {
self.showingCamera = true
}
}
.sheet(isPresented: $showingCamera, onDismiss: loadImage) {
CameraCont(croppedImage: self.$inputImage)
.edgesIgnoringSafeArea(.top)
}
}
func loadImage() {
guard let inputImage = inputImage else {return}
image = Image(uiImage: inputImage)
}
}
If anyone has a clue of where I'm failing please give me a hint.
I think the problem is when I create the image
let imageToCrop = CIImage(cvPixelBuffer: pixelBuffer)
But I don't know the solution.
Thanks a lot.
Also I need to learn about AVFoundation and transforming the coordinates, can anyone recommend me a good place to learn it?
Solved. Yeas in deed the wrong line was when I created the UIImage.
This is the correct way of creating a UIImage:
if observationWidthBiggherThan180 {
let ciimage : CIImage = CIImage(cvPixelBuffer: pixelBuffer)
let context:CIContext = CIContext(options: nil)
let cgImage:CGImage = context.createCGImage(ciimage, from: ciimage.extent)!
let myImage:UIImage = UIImage(cgImage: cgImage)
sendImageDelegate.sendImage(image:myImage)
}
Detect and track faces from the selfie cam feed in real time. I could get that based on source :- https://developer.apple.com/documentation/vision/tracking_the_user_s_face_in_real_time. The following image show that rectangle will be placed in face,
As you can see the red rectangular part of screen, I need to capture only the inside part of rectangle as image and save not the full screen as image. How could I get that?
I have tried with some other source which gives me only the full screen as image but not the rectangular part.
The source code for Live face detection is,
import UIKit
import AVKit
import Vision
class ViewController: UIViewController, AVCaptureVideoDataOutputSampleBufferDelegate {
// Main view for showing camera content.
#IBOutlet weak var previewView: UIView?
// AVCapture variables to hold sequence data
var session: AVCaptureSession?
var previewLayer: AVCaptureVideoPreviewLayer?
var videoDataOutput: AVCaptureVideoDataOutput?
var videoDataOutputQueue: DispatchQueue?
var captureDevice: AVCaptureDevice?
var captureDeviceResolution: CGSize = CGSize()
// Layer UI for drawing Vision results
var rootLayer: CALayer?
var detectionOverlayLayer: CALayer?
var detectedFaceRectangleShapeLayer: CAShapeLayer?
var detectedFaceLandmarksShapeLayer: CAShapeLayer?
// Vision requests
private var detectionRequests: [VNDetectFaceRectanglesRequest]?
private var trackingRequests: [VNTrackObjectRequest]?
lazy var sequenceRequestHandler = VNSequenceRequestHandler()
// MARK: UIViewController overrides
override func viewDidLoad() {
super.viewDidLoad()
self.session = self.setupAVCaptureSession()
self.prepareVisionRequest()
self.session?.startRunning()
}
override func didReceiveMemoryWarning() {
super.didReceiveMemoryWarning()
}
// Ensure that the interface stays locked in Portrait.
override var supportedInterfaceOrientations: UIInterfaceOrientationMask {
return .portrait
}
// Ensure that the interface stays locked in Portrait.
override var preferredInterfaceOrientationForPresentation: UIInterfaceOrientation {
return .portrait
}
// MARK: AVCapture Setup
/// - Tag: CreateCaptureSession
fileprivate func setupAVCaptureSession() -> AVCaptureSession? {
let captureSession = AVCaptureSession()
do {
let inputDevice = try self.configureFrontCamera(for: captureSession)
self.configureVideoDataOutput(for: inputDevice.device, resolution: inputDevice.resolution, captureSession: captureSession)
self.designatePreviewLayer(for: captureSession)
return captureSession
} catch let executionError as NSError {
self.presentError(executionError)
} catch {
self.presentErrorAlert(message: "An unexpected failure has occured")
}
self.teardownAVCapture()
return nil
}
/// - Tag: ConfigureDeviceResolution
fileprivate func highestResolution420Format(for device: AVCaptureDevice) -> (format: AVCaptureDevice.Format, resolution: CGSize)? {
var highestResolutionFormat: AVCaptureDevice.Format? = nil
var highestResolutionDimensions = CMVideoDimensions(width: 0, height: 0)
for format in device.formats {
let deviceFormat = format as AVCaptureDevice.Format
let deviceFormatDescription = deviceFormat.formatDescription
if CMFormatDescriptionGetMediaSubType(deviceFormatDescription) == kCVPixelFormatType_420YpCbCr8BiPlanarFullRange {
let candidateDimensions = CMVideoFormatDescriptionGetDimensions(deviceFormatDescription)
if (highestResolutionFormat == nil) || (candidateDimensions.width > highestResolutionDimensions.width) {
highestResolutionFormat = deviceFormat
highestResolutionDimensions = candidateDimensions
}
}
}
if highestResolutionFormat != nil {
let resolution = CGSize(width: CGFloat(highestResolutionDimensions.width), height: CGFloat(highestResolutionDimensions.height))
return (highestResolutionFormat!, resolution)
}
return nil
}
fileprivate func configureFrontCamera(for captureSession: AVCaptureSession) throws -> (device: AVCaptureDevice, resolution: CGSize) {
let deviceDiscoverySession = AVCaptureDevice.DiscoverySession(deviceTypes: [.builtInWideAngleCamera], mediaType: .video, position: .front)
if let device = deviceDiscoverySession.devices.first {
if let deviceInput = try? AVCaptureDeviceInput(device: device) {
if captureSession.canAddInput(deviceInput) {
captureSession.addInput(deviceInput)
}
if let highestResolution = self.highestResolution420Format(for: device) {
try device.lockForConfiguration()
device.activeFormat = highestResolution.format
device.unlockForConfiguration()
return (device, highestResolution.resolution)
}
}
}
throw NSError(domain: "ViewController", code: 1, userInfo: nil)
}
/// - Tag: CreateSerialDispatchQueue
fileprivate func configureVideoDataOutput(for inputDevice: AVCaptureDevice, resolution: CGSize, captureSession: AVCaptureSession) {
let videoDataOutput = AVCaptureVideoDataOutput()
videoDataOutput.alwaysDiscardsLateVideoFrames = true
// Create a serial dispatch queue used for the sample buffer delegate as well as when a still image is captured.
// A serial dispatch queue must be used to guarantee that video frames will be delivered in order.
let videoDataOutputQueue = DispatchQueue(label: "com.example.apple-samplecode.VisionFaceTrack")
videoDataOutput.setSampleBufferDelegate(self, queue: videoDataOutputQueue)
if captureSession.canAddOutput(videoDataOutput) {
captureSession.addOutput(videoDataOutput)
}
videoDataOutput.connection(with: .video)?.isEnabled = true
if let captureConnection = videoDataOutput.connection(with: AVMediaType.video) {
if captureConnection.isCameraIntrinsicMatrixDeliverySupported {
captureConnection.isCameraIntrinsicMatrixDeliveryEnabled = true
}
}
self.videoDataOutput = videoDataOutput
self.videoDataOutputQueue = videoDataOutputQueue
self.captureDevice = inputDevice
self.captureDeviceResolution = resolution
}
/// - Tag: DesignatePreviewLayer
fileprivate func designatePreviewLayer(for captureSession: AVCaptureSession) {
let videoPreviewLayer = AVCaptureVideoPreviewLayer(session: captureSession)
self.previewLayer = videoPreviewLayer
videoPreviewLayer.name = "CameraPreview"
videoPreviewLayer.backgroundColor = UIColor.black.cgColor
videoPreviewLayer.videoGravity = AVLayerVideoGravity.resizeAspectFill
if let previewRootLayer = self.previewView?.layer {
self.rootLayer = previewRootLayer
previewRootLayer.masksToBounds = true
videoPreviewLayer.frame = previewRootLayer.bounds
previewRootLayer.addSublayer(videoPreviewLayer)
}
}
// Removes infrastructure for AVCapture as part of cleanup.
fileprivate func teardownAVCapture() {
self.videoDataOutput = nil
self.videoDataOutputQueue = nil
if let previewLayer = self.previewLayer {
previewLayer.removeFromSuperlayer()
self.previewLayer = nil
}
}
// MARK: Helper Methods for Error Presentation
fileprivate func presentErrorAlert(withTitle title: String = "Unexpected Failure", message: String) {
let alertController = UIAlertController(title: title, message: message, preferredStyle: .alert)
self.present(alertController, animated: true)
}
fileprivate func presentError(_ error: NSError) {
self.presentErrorAlert(withTitle: "Failed with error \(error.code)", message: error.localizedDescription)
}
// MARK: Helper Methods for Handling Device Orientation & EXIF
fileprivate func radiansForDegrees(_ degrees: CGFloat) -> CGFloat {
return CGFloat(Double(degrees) * Double.pi / 180.0)
}
func exifOrientationForDeviceOrientation(_ deviceOrientation: UIDeviceOrientation) -> CGImagePropertyOrientation {
switch deviceOrientation {
case .portraitUpsideDown:
return .rightMirrored
case .landscapeLeft:
return .downMirrored
case .landscapeRight:
return .upMirrored
default:
return .leftMirrored
}
}
func exifOrientationForCurrentDeviceOrientation() -> CGImagePropertyOrientation {
return exifOrientationForDeviceOrientation(UIDevice.current.orientation)
}
// MARK: Performing Vision Requests
/// - Tag: WriteCompletionHandler
fileprivate func prepareVisionRequest() {
//self.trackingRequests = []
var requests = [VNTrackObjectRequest]()
let faceDetectionRequest = VNDetectFaceRectanglesRequest(completionHandler: { (request, error) in
if error != nil {
print("FaceDetection error: \(String(describing: error)).")
}
guard let faceDetectionRequest = request as? VNDetectFaceRectanglesRequest,
let results = faceDetectionRequest.results as? [VNFaceObservation] else {
return
}
DispatchQueue.main.async {
// Add the observations to the tracking list
for observation in results {
let faceTrackingRequest = VNTrackObjectRequest(detectedObjectObservation: observation)
requests.append(faceTrackingRequest)
}
self.trackingRequests = requests
}
})
// Start with detection. Find face, then track it.
self.detectionRequests = [faceDetectionRequest]
self.sequenceRequestHandler = VNSequenceRequestHandler()
self.setupVisionDrawingLayers()
}
// MARK: Drawing Vision Observations
fileprivate func setupVisionDrawingLayers() {
let captureDeviceResolution = self.captureDeviceResolution
let captureDeviceBounds = CGRect(x: 0,
y: 0,
width: captureDeviceResolution.width,
height: captureDeviceResolution.height)
let captureDeviceBoundsCenterPoint = CGPoint(x: captureDeviceBounds.midX,
y: captureDeviceBounds.midY)
let normalizedCenterPoint = CGPoint(x: 0.5, y: 0.5)
guard let rootLayer = self.rootLayer else {
self.presentErrorAlert(message: "view was not property initialized")
return
}
let overlayLayer = CALayer()
overlayLayer.name = "DetectionOverlay"
overlayLayer.masksToBounds = true
overlayLayer.anchorPoint = normalizedCenterPoint
overlayLayer.bounds = captureDeviceBounds
overlayLayer.position = CGPoint(x: rootLayer.bounds.midX, y: rootLayer.bounds.midY)
let faceRectangleShapeLayer = CAShapeLayer()
faceRectangleShapeLayer.name = "RectangleOutlineLayer"
faceRectangleShapeLayer.bounds = captureDeviceBounds
faceRectangleShapeLayer.anchorPoint = normalizedCenterPoint
faceRectangleShapeLayer.position = captureDeviceBoundsCenterPoint
faceRectangleShapeLayer.fillColor = nil
faceRectangleShapeLayer.strokeColor = UIColor.green.withAlphaComponent(0.7).cgColor
faceRectangleShapeLayer.lineWidth = 5
faceRectangleShapeLayer.shadowOpacity = 0.7
faceRectangleShapeLayer.shadowRadius = 5
let faceLandmarksShapeLayer = CAShapeLayer()
faceLandmarksShapeLayer.name = "FaceLandmarksLayer"
faceLandmarksShapeLayer.bounds = captureDeviceBounds
faceLandmarksShapeLayer.anchorPoint = normalizedCenterPoint
faceLandmarksShapeLayer.position = captureDeviceBoundsCenterPoint
faceLandmarksShapeLayer.fillColor = nil
faceLandmarksShapeLayer.strokeColor = UIColor.yellow.withAlphaComponent(0.7).cgColor
faceLandmarksShapeLayer.lineWidth = 3
faceLandmarksShapeLayer.shadowOpacity = 0.7
faceLandmarksShapeLayer.shadowRadius = 5
overlayLayer.addSublayer(faceRectangleShapeLayer)
faceRectangleShapeLayer.addSublayer(faceLandmarksShapeLayer)
rootLayer.addSublayer(overlayLayer)
self.detectionOverlayLayer = overlayLayer
self.detectedFaceRectangleShapeLayer = faceRectangleShapeLayer
self.detectedFaceLandmarksShapeLayer = faceLandmarksShapeLayer
self.updateLayerGeometry()
}
fileprivate func updateLayerGeometry() {
guard let overlayLayer = self.detectionOverlayLayer,
let rootLayer = self.rootLayer,
let previewLayer = self.previewLayer
else {
return
}
CATransaction.setValue(NSNumber(value: true), forKey: kCATransactionDisableActions)
let videoPreviewRect = previewLayer.layerRectConverted(fromMetadataOutputRect: CGRect(x: 0, y: 0, width: 1, height: 1))
var rotation: CGFloat
var scaleX: CGFloat
var scaleY: CGFloat
// Rotate the layer into screen orientation.
switch UIDevice.current.orientation {
case .portraitUpsideDown:
rotation = 180
scaleX = videoPreviewRect.width / captureDeviceResolution.width
scaleY = videoPreviewRect.height / captureDeviceResolution.height
case .landscapeLeft:
rotation = 90
scaleX = videoPreviewRect.height / captureDeviceResolution.width
scaleY = scaleX
case .landscapeRight:
rotation = -90
scaleX = videoPreviewRect.height / captureDeviceResolution.width
scaleY = scaleX
default:
rotation = 0
scaleX = videoPreviewRect.width / captureDeviceResolution.width
scaleY = videoPreviewRect.height / captureDeviceResolution.height
}
// Scale and mirror the image to ensure upright presentation.
let affineTransform = CGAffineTransform(rotationAngle: radiansForDegrees(rotation))
.scaledBy(x: scaleX, y: -scaleY)
overlayLayer.setAffineTransform(affineTransform)
// Cover entire screen UI.
let rootLayerBounds = rootLayer.bounds
overlayLayer.position = CGPoint(x: rootLayerBounds.midX, y: rootLayerBounds.midY)
}
fileprivate func addPoints(in landmarkRegion: VNFaceLandmarkRegion2D, to path: CGMutablePath, applying affineTransform: CGAffineTransform, closingWhenComplete closePath: Bool) {
let pointCount = landmarkRegion.pointCount
if pointCount > 1 {
let points: [CGPoint] = landmarkRegion.normalizedPoints
path.move(to: points[0], transform: affineTransform)
path.addLines(between: points, transform: affineTransform)
if closePath {
path.addLine(to: points[0], transform: affineTransform)
path.closeSubpath()
}
}
}
fileprivate func addIndicators(to faceRectanglePath: CGMutablePath, faceLandmarksPath: CGMutablePath, for faceObservation: VNFaceObservation) {
let displaySize = self.captureDeviceResolution
let faceBounds = VNImageRectForNormalizedRect(faceObservation.boundingBox, Int(displaySize.width), Int(displaySize.height))
faceRectanglePath.addRect(faceBounds)
if let landmarks = faceObservation.landmarks {
// Landmarks are relative to -- and normalized within --- face bounds
let affineTransform = CGAffineTransform(translationX: faceBounds.origin.x, y: faceBounds.origin.y)
.scaledBy(x: faceBounds.size.width, y: faceBounds.size.height)
// Treat eyebrows and lines as open-ended regions when drawing paths.
let openLandmarkRegions: [VNFaceLandmarkRegion2D?] = [
landmarks.leftEyebrow,
landmarks.rightEyebrow,
landmarks.faceContour,
landmarks.noseCrest,
landmarks.medianLine
]
for openLandmarkRegion in openLandmarkRegions where openLandmarkRegion != nil {
self.addPoints(in: openLandmarkRegion!, to: faceLandmarksPath, applying: affineTransform, closingWhenComplete: false)
}
// Draw eyes, lips, and nose as closed regions.
let closedLandmarkRegions: [VNFaceLandmarkRegion2D?] = [
landmarks.leftEye,
landmarks.rightEye,
landmarks.outerLips,
landmarks.innerLips,
landmarks.nose
]
for closedLandmarkRegion in closedLandmarkRegions where closedLandmarkRegion != nil {
self.addPoints(in: closedLandmarkRegion!, to: faceLandmarksPath, applying: affineTransform, closingWhenComplete: true)
}
}
}
/// - Tag: DrawPaths
fileprivate func drawFaceObservations(_ faceObservations: [VNFaceObservation]) {
guard let faceRectangleShapeLayer = self.detectedFaceRectangleShapeLayer,
let faceLandmarksShapeLayer = self.detectedFaceLandmarksShapeLayer
else {
return
}
CATransaction.begin()
CATransaction.setValue(NSNumber(value: true), forKey: kCATransactionDisableActions)
let faceRectanglePath = CGMutablePath()
let faceLandmarksPath = CGMutablePath()
for faceObservation in faceObservations {
self.addIndicators(to: faceRectanglePath,
faceLandmarksPath: faceLandmarksPath,
for: faceObservation)
}
faceRectangleShapeLayer.path = faceRectanglePath
faceLandmarksShapeLayer.path = faceLandmarksPath
self.updateLayerGeometry()
CATransaction.commit()
}
// MARK: AVCaptureVideoDataOutputSampleBufferDelegate
/// - Tag: PerformRequests
// Handle delegate method callback on receiving a sample buffer.
public func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
var requestHandlerOptions: [VNImageOption: AnyObject] = [:]
let cameraIntrinsicData = CMGetAttachment(sampleBuffer, key: kCMSampleBufferAttachmentKey_CameraIntrinsicMatrix, attachmentModeOut: nil)
if cameraIntrinsicData != nil {
requestHandlerOptions[VNImageOption.cameraIntrinsics] = cameraIntrinsicData
}
guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else {
print("Failed to obtain a CVPixelBuffer for the current output frame.")
return
}
let exifOrientation = self.exifOrientationForCurrentDeviceOrientation()
guard let requests = self.trackingRequests, !requests.isEmpty else {
// No tracking object detected, so perform initial detection
let imageRequestHandler = VNImageRequestHandler(cvPixelBuffer: pixelBuffer,
orientation: exifOrientation,
options: requestHandlerOptions)
do {
guard let detectRequests = self.detectionRequests else {
return
}
try imageRequestHandler.perform(detectRequests)
} catch let error as NSError {
NSLog("Failed to perform FaceRectangleRequest: %#", error)
}
return
}
do {
try self.sequenceRequestHandler.perform(requests,
on: pixelBuffer,
orientation: exifOrientation)
} catch let error as NSError {
NSLog("Failed to perform SequenceRequest: %#", error)
}
// Setup the next round of tracking.
var newTrackingRequests = [VNTrackObjectRequest]()
for trackingRequest in requests {
guard let results = trackingRequest.results else {
return
}
guard let observation = results[0] as? VNDetectedObjectObservation else {
return
}
if !trackingRequest.isLastFrame {
if observation.confidence > 0.3 {
trackingRequest.inputObservation = observation
} else {
trackingRequest.isLastFrame = true
}
newTrackingRequests.append(trackingRequest)
}
}
self.trackingRequests = newTrackingRequests
if newTrackingRequests.isEmpty {
// Nothing to track, so abort.
return
}
// Perform face landmark tracking on detected faces.
var faceLandmarkRequests = [VNDetectFaceLandmarksRequest]()
// Perform landmark detection on tracked faces.
for trackingRequest in newTrackingRequests {
let faceLandmarksRequest = VNDetectFaceLandmarksRequest(completionHandler: { (request, error) in
if error != nil {
print("FaceLandmarks error: \(String(describing: error)).")
}
guard let landmarksRequest = request as? VNDetectFaceLandmarksRequest,
let results = landmarksRequest.results as? [VNFaceObservation] else {
return
}
// Perform all UI updates (drawing) on the main queue, not the background queue on which this handler is being called.
DispatchQueue.main.async {
self.drawFaceObservations(results)
}
})
guard let trackingResults = trackingRequest.results else {
return
}
guard let observation = trackingResults[0] as? VNDetectedObjectObservation else {
return
}
let faceObservation = VNFaceObservation(boundingBox: observation.boundingBox)
faceLandmarksRequest.inputFaceObservations = [faceObservation]
// Continue to track detected facial landmarks.
faceLandmarkRequests.append(faceLandmarksRequest)
let imageRequestHandler = VNImageRequestHandler(cvPixelBuffer: pixelBuffer,
orientation: exifOrientation,
options: requestHandlerOptions)
do {
try imageRequestHandler.perform(faceLandmarkRequests)
} catch let error as NSError {
NSLog("Failed to perform FaceLandmarkRequest: %#", error)
}
}
}
}
If you want to save only that rectangular part of the image by knowing that rectangular frame(CGRect) value, you can crop the original image by passing the required frame(CGRect) value.
let rectFrame = CGRect()
let image = UIImage()
image.cgImage?.cropping(to: rectFrame)
I am new to Swift and I am trying to implement some projects. I got this code from Github and it is working fine.
When you click on the app, it starts a video on the iPhone screen and it detects letters and characters using 'TesseractOCR'.
The problem is the video is covering all the screen, I am not able to add any buttons. If I add a button, it disappears under the video.
I tried to add session.sessionPreset = .photo to crop the video from top and down but it did not work. I also tried to add preview.sessionPreset = .photo but did not work either
Note: Main.storyboard is empty.
Here is the code:
import AVFoundation
import UIKit
import Vision
import TesseractOCR
class ViewController: UIViewController, G8TesseractDelegate {
override func viewDidLoad() {
super.viewDidLoad()
// Do any additional setup after loading the view, typically from a nib.
tesseract?.pageSegmentationMode = .sparseText
// Recognize only these characters
// tesseract?.charWhitelist = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890()-+*!/?.,##$%&"
tesseract?.charWhitelist = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890"
if isAuthorized() {
configureTextDetection()
configureCamera()
}
}
override func didReceiveMemoryWarning() {
super.didReceiveMemoryWarning()
// Dispose of any resources that can be recreated.
}
private func configureTextDetection() {
textDetectionRequest = VNDetectTextRectanglesRequest(completionHandler: handleDetection)
textDetectionRequest?.reportCharacterBoxes = true
}
private func configureCamera() {
preview.session = session
let cameraDevices = AVCaptureDevice.DiscoverySession(deviceTypes: [.builtInWideAngleCamera], mediaType: AVMediaType.video, position: .back)
var cameraDevice: AVCaptureDevice?
for device in cameraDevices.devices {
if device.position == .back {
cameraDevice = device
break
}
}
do {
let captureDeviceInput = try AVCaptureDeviceInput(device: cameraDevice!)
if session.canAddInput(captureDeviceInput) {
session.addInput(captureDeviceInput)
}
}
catch {
print("Error occured \(error)")
return
}
session.sessionPreset = .photo // It was .high
let videoDataOutput = AVCaptureVideoDataOutput()
videoDataOutput.setSampleBufferDelegate(self, queue: DispatchQueue(label: "Buffer Queue", qos: .userInteractive, attributes: .concurrent, autoreleaseFrequency: .inherit, target: nil))
if session.canAddOutput(videoDataOutput) {
session.addOutput(videoDataOutput)
}
preview.videoPreviewLayer.videoGravity = .resize
session.startRunning()
}
private func handleDetection(request: VNRequest, error: Error?) {
guard let detectionResults = request.results else {
print("No detection results")
return
}
let textResults = detectionResults.map() {
return $0 as? VNTextObservation
}
if textResults.isEmpty {
return
}
textObservations = textResults as! [VNTextObservation]
DispatchQueue.main.async {
guard let sublayers = self.view.layer.sublayers else {
return
}
for layer in sublayers[1...] {
if (layer as? CATextLayer) == nil {
layer.removeFromSuperlayer()
}
}
let viewWidth = self.view.frame.size.width
let viewHeight = self.view.frame.size.height
for result in textResults {
if let textResult = result {
let layer = CALayer()
var rect = textResult.boundingBox
rect.origin.x *= viewWidth
rect.size.height *= viewHeight
rect.origin.y = ((1 - rect.origin.y) * viewHeight) - rect.size.height
rect.size.width *= viewWidth
layer.frame = rect
layer.borderWidth = 2
layer.borderColor = UIColor.red.cgColor
self.view.layer.addSublayer(layer)
}
}
}
}
private var preview: PreviewView {
return view as! PreviewView
}
// private var cameraView: CameraView {
// return view as! CameraView
// }
private func isAuthorized() -> Bool {
let authorizationStatus = AVCaptureDevice.authorizationStatus(for: AVMediaType.video)
switch authorizationStatus {
case .notDetermined:
AVCaptureDevice.requestAccess(for: AVMediaType.video,
completionHandler: { (granted:Bool) -> Void in
if granted {
DispatchQueue.main.async {
self.configureTextDetection()
self.configureCamera()
}
}
})
return true
case .authorized:
return true
case .denied, .restricted: return false
}
}
private var textDetectionRequest: VNDetectTextRectanglesRequest?
private let session = AVCaptureSession()
private var textObservations = [VNTextObservation]()
private var tesseract = G8Tesseract(language: "eng", engineMode: .tesseractOnly)
private var font = CTFontCreateWithName("Helvetica" as CFString, 18, nil)
}
extension ViewController: AVCaptureVideoDataOutputSampleBufferDelegate {
// MARK: - Camera Delegate and Setup
func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
guard let pixelBuffer = CMSampleBufferGetImageBuffer(sampleBuffer) else {
return
}
var imageRequestOptions = [VNImageOption: Any]()
if let cameraData = CMGetAttachment(sampleBuffer, key: kCMSampleBufferAttachmentKey_CameraIntrinsicMatrix, attachmentModeOut: nil) {
imageRequestOptions[.cameraIntrinsics] = cameraData
}
let imageRequestHandler = VNImageRequestHandler(cvPixelBuffer: pixelBuffer, orientation: CGImagePropertyOrientation(rawValue: 6)!, options: imageRequestOptions)
do {
try imageRequestHandler.perform([textDetectionRequest!])
}
catch {
print("Error occured \(error)")
}
var ciImage = CIImage(cvPixelBuffer: pixelBuffer)
let transform = ciImage.orientationTransform(for: CGImagePropertyOrientation(rawValue: 6)!)
ciImage = ciImage.transformed(by: transform)
let size = ciImage.extent.size
var recognizedTextPositionTuples = [(rect: CGRect, text: String)]()
for textObservation in textObservations {
guard let rects = textObservation.characterBoxes else {
continue
}
var xMin = CGFloat.greatestFiniteMagnitude
var xMax: CGFloat = 0
var yMin = CGFloat.greatestFiniteMagnitude
var yMax: CGFloat = 0
for rect in rects {
xMin = min(xMin, rect.bottomLeft.x)
xMax = max(xMax, rect.bottomRight.x)
yMin = min(yMin, rect.bottomRight.y)
yMax = max(yMax, rect.topRight.y)
}
let imageRect = CGRect(x: xMin * size.width, y: yMin * size.height, width: (xMax - xMin) * size.width, height: (yMax - yMin) * size.height)
let context = CIContext(options: nil)
guard let cgImage = context.createCGImage(ciImage, from: imageRect) else {
continue
}
let uiImage = UIImage(cgImage: cgImage)
tesseract?.image = uiImage
tesseract?.recognize()
guard var text = tesseract?.recognizedText else {
continue
}
text = text.trimmingCharacters(in: CharacterSet.newlines)
if !text.isEmpty {
let x = xMin
let y = 1 - yMax
let width = xMax - xMin
let height = yMax - yMin
recognizedTextPositionTuples.append((rect: CGRect(x: x, y: y, width: width, height: height), text: text))
}
}
textObservations.removeAll()
DispatchQueue.main.async {
let viewWidth = self.view.frame.size.width
let viewHeight = self.view.frame.size.height
guard let sublayers = self.view.layer.sublayers else {
return
}
for layer in sublayers[1...] {
if let _ = layer as? CATextLayer {
layer.removeFromSuperlayer()
}
}
for tuple in recognizedTextPositionTuples {
let textLayer = CATextLayer()
textLayer.backgroundColor = UIColor.clear.cgColor
textLayer.font = self.font
var rect = tuple.rect
rect.origin.x *= viewWidth
rect.size.width *= viewWidth
rect.origin.y *= viewHeight
rect.size.height *= viewHeight
// Increase the size of text layer to show text of large lengths
rect.size.width += 100
rect.size.height += 100
textLayer.frame = rect
textLayer.string = tuple.text
textLayer.foregroundColor = UIColor.green.cgColor
self.view.layer.addSublayer(textLayer)
}
}
}
}
Basically the CameraView is being set as the root view of ViewController, which is why you cannot change its size. You need to make the CameraView to a child view of ViewController's root view in order to change it's size.
Something similar to:
Select ViewController.swift
Remove the following
private var cameraView: CameraView {
return view as! CameraView
}
Replace all cameraView with self.cameraView
Add the following line:
#IBOutlet var cameraView: CameraView!
Replace all self.view with self.cameraView
Select Main.storyboard
Select Camera View in Document Outline
Go to Identity Inspector (β₯β3) and clear Class, which should contain CameraView. After you cleared it, it should show UIView
Open Library (β§βL) and add a new View inside the original Camera View (Feel free to adjust the size of this new view)
Select this new view and go to Identity Inspector (β₯β3) and change the Class to CameraView
Select View Controller in Storyboard and go to Connections Inspector (β₯β6)
Connect the outlet CameraView
And if you don't like text going out of bound of cameraView, you can simply add the following in viewDidLoad:
self.cameraView.clipsToBounds = true