Проблемы с записью звука в камере (AV Foundation)

swift
ios

#1

Добрый день товарищи.

Работаю над приложением с возможностью наложения эффектов при работе камеры. С видео проблем нет. Эффекты накладываются, файл записывается. Проблемы начинаются при попытке подцепить запись звука.

Вот метод делегата. Тут всё организовано:

func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
        
        if let buffer = sampleBuffer.imageBuffer {
            //Тут отдаю кадры в превью камеры
            self.renderer?.buffer = buffer
            self.renderer?.mtkView?.draw()
        }
        
        let timestamp = CMSampleBufferGetPresentationTimeStamp(sampleBuffer).seconds
        
        switch captureState {
        case .start:
            // Инициация записи
            self.filename = UUID().uuidString
            let videoPath = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask).first!.appendingPathComponent("\(filename).mov")
            let writer = try! AVAssetWriter(outputURL: videoPath, fileType: .mov)
            let videoSettings = videoOut.recommendedVideoSettingsForAssetWriter(writingTo: .mov)
            let videoInput = AVAssetWriterInput(mediaType: .video, outputSettings: videoSettings)
            videoInput.mediaTimeScale = CMTimeScale(bitPattern: 600)
            videoInput.expectsMediaDataInRealTime = true
            
            switch self.orientation {
            
            case .portraitUpsideDown:
                videoInput.transform = CGAffineTransform(rotationAngle: .pi)
            case .landscapeRight:
                videoInput.transform = CGAffineTransform(rotationAngle: .pi*3/2)
            case .landscapeLeft:
                videoInput.transform = CGAffineTransform(rotationAngle: .pi/2)
            default:
                videoInput.transform = CGAffineTransform(rotationAngle: 0)
            }
            
            
            let adaptor = AVAssetWriterInputPixelBufferAdaptor(assetWriterInput: videoInput, sourcePixelBufferAttributes: nil)
            if writer.canAdd(videoInput) { writer.add(videoInput) }
            
            let audioSettings = audioOut.recommendedAudioSettingsForAssetWriter(writingTo: .mov)
            let audioInput = AVAssetWriterInput(mediaType: .audio, outputSettings: audioSettings)
            if writer.canAdd(audioInput) { writer.add(audioInput) }
            
            writer.startWriting()
            let sourceTime = CMTime(seconds: 0.1, preferredTimescale: CMTimeScale(600))
            writer.startSession(atSourceTime: sourceTime)
            assetWriter = writer
            assetWriterVideoInput = videoInput
            assetWriterAudioInput = audioInput
            self.adaptor = adaptor
            DispatchQueue.main.async { self.captureState = .capturing }
            time = timestamp
        case .capturing:
            
            //процесс записи
            if let buffer = CMSampleBufferGetImageBuffer(sampleBuffer) {
                if assetWriterVideoInput?.isReadyForMoreMediaData == true {
                    DispatchQueue.main.async { self.duration = Int(timestamp - self.time) }
                    let time = CMTime(seconds: timestamp - time, preferredTimescale: CMTimeScale(600))
                    
                    adaptor?.append(buffer, withPresentationTime: time)
                }
            }
            // Проблема тут, как только добавляю эти пять строк, получаю кривой параметр duration у видеозаписи, и звука нет
            if sampleBuffer.formatDescription?.mediaType == .audio {
                if assetWriterAudioInput?.isReadyForMoreMediaData == true {
                    assetWriterAudioInput?.append(sampleBuffer)
                }
            }
            
            break
        case .end:
            guard assetWriterVideoInput?.isReadyForMoreMediaData == true,
                    assetWriterAudioInput?.isReadyForMoreMediaData == true,
                    assetWriter!.status != .failed else { break }
            
            let url = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask).first!.appendingPathComponent("\(filename).mov")
            assetWriterVideoInput?.markAsFinished()
            assetWriterAudioInput?.markAsFinished()
            assetWriter?.finishWriting { [weak self] in
                DispatchQueue.main.async { self?.captureState = .idle }
                self?.assetWriter = nil
                self?.assetWriterVideoInput = nil
                self?.assetWriterAudioInput = nil
                
                //Далее идёт обработка результата
                self?.imageFromVideo(url: url, at: 0, orientation: connection.videoOrientation) { img in
                    
                    var data = Data()
                    
                    if let img = img {
                        let preview = img.resizeImage(targetSize: CGSize(width: 256, height: 256*img.size.height/img.size.width))
                        data = preview.pngData() ?? Data()
                    }
                
                    let path = url.lastPathComponent
                    
                    if let id = url.lastPathComponent.components(separatedBy: ".").first {
                        let item = LibraryItem(id: id, fileName: path, date: Date(), type: 2, preview: data, duration: Int16(self?.duration ?? 0), zoom: 1.0)
                        PHCameraView.result.send(item)
                        self?.duration = 0
                    }
                }
            }
        default:
            break
        }
        
    }

Вот это класс для работы с камерой. Пришлось выкинуть функции для захвата фото. Не влезают в лимит поста:

import AVFoundation
import Combine
import SwiftUI

class PHCameraViewModel: NSObject, ObservableObject, AVCapturePhotoCaptureDelegate, AVCaptureVideoDataOutputSampleBufferDelegate, AVCaptureAudioDataOutputSampleBufferDelegate {
    
    // Authorization status
    @Published var cameraAuthStatus = AVCaptureDevice.authorizationStatus(for: .video)
    @Published var micAuthStatus = AVCaptureDevice.authorizationStatus(for: .audio)
    
    //Devices
    @Published var backDevices: [AVCaptureDevice.DeviceType] = []
    @Published var frontDevice: AVCaptureDevice.DeviceType?
    @Published var curBackDevice: AVCaptureDevice.DeviceType?
    // Camera modes
    @Published var mode: CaptureMode = .photo
    @Published var position: AVCaptureDevice.Position = .back
    @Published var flashMode: AVCaptureDevice.FlashMode = .off
    @Published var torchMode: AVCaptureDevice.TorchMode = .off
    @Published var supportedFlashModes: [AVCaptureDevice.FlashMode] = []
    @Published var supportedTorchModes: [AVCaptureDevice.TorchMode] = []
    @Published var orientation = AVCaptureVideoOrientation.portrait
    
    @Published var session = AVCaptureSession()
    
    @Published var photoOut = AVCapturePhotoOutput()
    @Published var videoOut = AVCaptureVideoDataOutput()
    @Published var audioOut = AVCaptureAudioDataOutput()
    
    @Published var isRunning = false
    @Published var rawAvaliable = false
    @Published var flashAvaliable = false
    @Published var torchAvaliable = false
    @Published var showFlashButton = false
    @Published var isCapturing = false
    @Published var captureState = CaptureState.idle
    
    @Published var duration: Int = 0
    
    var cancellable: Set<AnyCancellable> = []
    var renderer: CameraRenderer?
    var vDevice = AVCaptureDevice.default(for: .video)!
    
    private var assetWriter: AVAssetWriter?
    private var assetWriterVideoInput: AVAssetWriterInput?
    private var assetWriterAudioInput: AVAssetWriterInput?
    private var adaptor: AVAssetWriterInputPixelBufferAdaptor?
    private var filename = ""
    private var time: Double = 0
    

    let sessionQueue = DispatchQueue(label: "session", qos: .userInitiated, attributes: .concurrent, autoreleaseFrequency: .inherit)
    let videoQueue = DispatchQueue(label: "video", qos: .userInitiated, attributes: .concurrent, autoreleaseFrequency: .workItem)
  
    override init() {
        super.init()
        $cameraAuthStatus
            .sink { value in
                switch value {
                case .authorized:
                    self.backDevices = self.getAvaliableBackDevices()
                    self.frontDevice = self.getAvaliableFrontDevice()
                case .notDetermined:
                    AVCaptureDevice.requestAccess(for: .video) { result in
                        DispatchQueue.main.async { self.cameraAuthStatus = AVCaptureDevice.authorizationStatus(for: .video) }
                    }
                default:
                    guard let settings = URL(string: UIApplication.openSettingsURLString) else { return }
                    if UIApplication.shared.canOpenURL(settings) {
                        UIApplication.shared.open(settings, options: [:]) { value in
                            DispatchQueue.main.async { self.cameraAuthStatus = AVCaptureDevice.authorizationStatus(for: .video) }
                        }
                    }
                }
            }
            .store(in: &cancellable)
        $backDevices
            .sink { value in
                if value.isEmpty { return }
                self.curBackDevice = self.setDefaultDevice()
            }
            .store(in: &cancellable)
        $curBackDevice
            .sink { value in
                guard let value = value else { return }
                self.setupMotionManager()
                self.startNewSession(to: self.mode, with: value, in: self.position)
            }
            .store(in: &cancellable)
        $position
            .dropFirst()
            .sink { value in
                var device: AVCaptureDevice.DeviceType?
                
                switch value {
                case .back: device = self.curBackDevice
                case .front: device = self.frontDevice
                default: return
                }
                
                guard let device = device else { return }
                self.startNewSession(to: self.mode, with: device, in: value)
            }
            .store(in: &cancellable)
        $mode
            .dropFirst()
            .sink { value in
                
                var device: AVCaptureDevice.DeviceType?
                
                switch self.position {
                case .back: device = self.curBackDevice
                case .front: device = self.frontDevice
                default: return
                }
                
                guard let device = device else { return }
                self.startNewSession(to: value, with: device, in: self.position)
            }
            .store(in: &cancellable)
        $torchMode
            .dropFirst()
            .sink { value in
                self.applyTorchMode(value)
            }
            .store(in: &cancellable)
    }
    
    enum CaptureState {
        case idle, start, capturing, end
    }
    
    func getAvaliableBackDevices() -> [AVCaptureDevice.DeviceType] {
        
        var devices: [AVCaptureDevice.DeviceType] = []
        
        if let device = AVCaptureDevice.default(.builtInUltraWideCamera, for: .video, position: .back) {
            devices.append(device.deviceType)
        }
        if let device = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: .back) {
            devices.append(device.deviceType)
        }
        if let device = AVCaptureDevice.default(.builtInTelephotoCamera, for: .video, position: .back) {
            devices.append(device.deviceType)
        }

        return devices
    }
    
    func getAvaliableFrontDevice() -> AVCaptureDevice.DeviceType? {
        
        let discoverSession = AVCaptureDevice.DiscoverySession(deviceTypes: [.builtInTrueDepthCamera, .builtInDualCamera, .builtInDualWideCamera, .builtInTelephotoCamera, .builtInTripleCamera, .builtInUltraWideCamera, .builtInWideAngleCamera], mediaType: .video, position: .front)
        
        let devices = discoverSession.devices
        
        return devices.first?.deviceType
    }
    
    func setDefaultDevice() -> AVCaptureDevice.DeviceType? {
        
        if AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: .back) != nil {
            return .builtInWideAngleCamera
        }
        if AVCaptureDevice.default(.builtInUltraWideCamera, for: .video, position: .back) != nil {
            return .builtInUltraWideCamera
        }
        if AVCaptureDevice.default(.builtInTelephotoCamera, for: .video, position: .back) != nil {
            return .builtInTelephotoCamera
        }
        
        return nil
    }
    
    func getSupportedTorchModes(for device: AVCaptureDevice) -> [AVCaptureDevice.TorchMode]{
        var supportedModes: [AVCaptureDevice.TorchMode] = []
        
        if device.isTorchModeSupported(.off) { supportedModes.append(.off)}
        if device.isTorchModeSupported(.on) { supportedModes.append(.on)}
        if device.isTorchModeSupported(.auto) { supportedModes.append(.auto)}
        
        return supportedModes
    }
    
    func startNewSession(to mode: CaptureMode, with device: AVCaptureDevice.DeviceType, in position: AVCaptureDevice.Position){
       
        sessionQueue.async {
            self.stopSession()
            
            do {
            
                switch mode {
                case .photo:
                    self.session.beginConfiguration()
                    self.session.sessionPreset = .photo
                    self.vDevice = AVCaptureDevice.default(device, for: .video, position: position)!
                    
                    let vInput = try AVCaptureDeviceInput(device: self.vDevice)
                    if self.session.canAddInput(vInput) { self.session.addInput(vInput) }

                    if self.session.canAddOutput(self.photoOut) {
                        self.session.addOutput(self.photoOut)
                        self.photoOut.isHighResolutionCaptureEnabled = true
                        
                        if position == .front && self.photoOut.isAppleProRAWSupported { self.photoOut.isAppleProRAWEnabled = true }
                    }
                    
                    if self.session.canAddOutput(self.videoOut) {
            
                        self.session.addOutput(self.videoOut)
                        self.videoOut.videoSettings = [kCVPixelBufferPixelFormatTypeKey as String: kCVPixelFormatType_32BGRA]
                        
                        self.videoOut.alwaysDiscardsLateVideoFrames = true
                        
                        let videoConnection = self.videoOut.connection(with: .video)
                        videoConnection?.videoOrientation = .portrait
                        self.videoOut.setSampleBufferDelegate(self, queue: self.videoQueue)
                        
                    }
                    
                    self.session.commitConfiguration()
                    self.session.startRunning()
                    
                    if self.session.isRunning {
                        
                        let rawPixelFormats = self.photoOut.supportedRawPhotoPixelFormatTypes(for: .dng)
                        
                        DispatchQueue.main.async {
                            if !rawPixelFormats.isEmpty { self.rawAvaliable = true } else { self.rawAvaliable = false }
                            self.flashAvaliable = self.vDevice.isFlashAvailable
                            self.supportedFlashModes = self.photoOut.supportedFlashModes
                            self.supportedTorchModes.removeAll()
                            self.torchAvaliable = false
                            self.isRunning = true
                            if self.supportedFlashModes.count > 1 { self.showFlashButton = true } else { self.showFlashButton = false }
                        }
                        
                    }
                    
                case .video:
                    if position == .back {
                        
                        self.session.beginConfiguration()
                        self.session.sessionPreset = .hd4K3840x2160
                        
                        self.vDevice = AVCaptureDevice.default(device, for: .video, position: position)!
                        
                        let vInput = try AVCaptureDeviceInput(device: self.vDevice)
                        if self.session.canAddInput(vInput) { self.session.addInput(vInput) }
                        
                        let settings: [String : Any] = [
                            kCVPixelBufferPixelFormatTypeKey as String: NSNumber(value: kCVPixelFormatType_32BGRA),
                        ]
                        
                        self.videoOut.videoSettings = settings
                        self.videoOut.alwaysDiscardsLateVideoFrames = true
                        self.videoOut.setSampleBufferDelegate(self, queue: self.videoQueue)
                        if self.session.canAddOutput(self.videoOut) {
                            self.session.addOutput(self.videoOut)
                        }
                        
                        self.videoOut.connection(with: .video)?.videoOrientation = .portrait
                        
                        if AVCaptureDevice.authorizationStatus(for: .audio) == .authorized {
                            let aDevice = AVCaptureDevice.default(for: .audio)!
                            let aInput = try AVCaptureDeviceInput(device: aDevice)
                            if self.session.canAddInput(aInput) { self.session.addInput(aInput) }
                        }
                        
                        self.audioOut.setSampleBufferDelegate(self, queue: self.videoQueue)
                        if self.session.canAddOutput(self.audioOut) {
                            self.session.addOutput(self.audioOut)
                        }
                    } else if position == .front {
                        self.session.beginConfiguration()
                        self.session.sessionPreset = .hd1920x1080
                        
                        self.vDevice = AVCaptureDevice.default(device, for: .video, position: position)!
                        
                        let vInput = try AVCaptureDeviceInput(device: self.vDevice)
                        if self.session.canAddInput(vInput) { self.session.addInput(vInput) }
                        
                        let settings: [String : Any] = [
                            kCVPixelBufferPixelFormatTypeKey as String: NSNumber(value: kCVPixelFormatType_32BGRA),
                        ]
                        
                        self.videoOut.videoSettings = settings
                        self.videoOut.alwaysDiscardsLateVideoFrames = true
                        self.videoOut.setSampleBufferDelegate(self, queue: self.videoQueue)
                        if self.session.canAddOutput(self.videoOut) {
                            self.session.addOutput(self.videoOut)
                        }
                        
                        self.videoOut.connection(with: .video)?.videoOrientation = .portrait
                        
                        if AVCaptureDevice.authorizationStatus(for: .audio) == .authorized {
                            let aDevice = AVCaptureDevice.default(for: .audio)!
                            let aInput = try AVCaptureDeviceInput(device: aDevice)
                            if self.session.canAddInput(aInput) { self.session.addInput(aInput) }
                        }
                        
                        self.audioOut.setSampleBufferDelegate(self, queue: self.videoQueue)
                        if self.session.canAddOutput(self.audioOut) {  self.session.addOutput(self.audioOut) }
                    }
                    
                    self.session.commitConfiguration()
                    self.session.startRunning()
                    
                    if self.session.isRunning {
                        
                        DispatchQueue.main.async {
                            self.flashAvaliable = false
                            self.torchAvaliable = self.vDevice.isTorchAvailable
                            self.supportedTorchModes = self.getSupportedTorchModes(for: self.vDevice)
                            self.rawAvaliable = false
                            self.isRunning = true
                            if self.supportedTorchModes.count > 1 { self.showFlashButton = true } else { self.showFlashButton = false }
                        }
                        
                    }
                    
                }
            } catch {
                print(error.localizedDescription)
            }
            
        }
        
    }
    
    func stopSession() {
        
        if self.session.isRunning {
            DispatchQueue.main.async { self.isRunning = false }
            self.session.stopRunning()
        }
        
        self.session.inputs.forEach{self.session.removeInput($0)}
        self.session.outputs.forEach{self.session.removeOutput($0)}
    
    }
    
    func applyTorchMode( _ mode: AVCaptureDevice.TorchMode) {
        if self.vDevice.isTorchAvailable {
            do {
                try self.vDevice.lockForConfiguration()
                self.vDevice.torchMode = mode
                self.vDevice.unlockForConfiguration()
            } catch {
                print(error.localizedDescription)
            }
        }
    }
    
    func capture() {
        switch captureState {
        case .idle:
            captureState = .start
        case .capturing:
            captureState = .end
        default:
            break
        }
    }
    
    func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
        
        if let buffer = sampleBuffer.imageBuffer {
            //Тут отдаю кадры в превью камеры
            self.renderer?.buffer = buffer
            self.renderer?.mtkView?.draw()
        }
        
        let timestamp = CMSampleBufferGetPresentationTimeStamp(sampleBuffer).seconds
        
        switch captureState {
        case .start:
            // Инициация записи
            self.filename = UUID().uuidString
            let videoPath = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask).first!.appendingPathComponent("\(filename).mov")
            let writer = try! AVAssetWriter(outputURL: videoPath, fileType: .mov)
            let videoSettings = videoOut.recommendedVideoSettingsForAssetWriter(writingTo: .mov)
            let videoInput = AVAssetWriterInput(mediaType: .video, outputSettings: videoSettings)
            videoInput.mediaTimeScale = CMTimeScale(bitPattern: 600)
            videoInput.expectsMediaDataInRealTime = true
            
            switch self.orientation {
            
            case .portraitUpsideDown:
                videoInput.transform = CGAffineTransform(rotationAngle: .pi)
            case .landscapeRight:
                videoInput.transform = CGAffineTransform(rotationAngle: .pi*3/2)
            case .landscapeLeft:
                videoInput.transform = CGAffineTransform(rotationAngle: .pi/2)
            default:
                videoInput.transform = CGAffineTransform(rotationAngle: 0)
            }
            
            
            let adaptor = AVAssetWriterInputPixelBufferAdaptor(assetWriterInput: videoInput, sourcePixelBufferAttributes: nil)
            if writer.canAdd(videoInput) { writer.add(videoInput) }
            
            let audioSettings = audioOut.recommendedAudioSettingsForAssetWriter(writingTo: .mov)
            let audioInput = AVAssetWriterInput(mediaType: .audio, outputSettings: audioSettings)
            if writer.canAdd(audioInput) { writer.add(audioInput) }
            
            writer.startWriting()
            let sourceTime = CMTime(seconds: 0.1, preferredTimescale: CMTimeScale(600))
            writer.startSession(atSourceTime: sourceTime)
            assetWriter = writer
            assetWriterVideoInput = videoInput
            assetWriterAudioInput = audioInput
            self.adaptor = adaptor
            DispatchQueue.main.async { self.captureState = .capturing }
            time = timestamp
        case .capturing:
            
            //процесс записи
            if let buffer = CMSampleBufferGetImageBuffer(sampleBuffer) {
                if assetWriterVideoInput?.isReadyForMoreMediaData == true {
                    DispatchQueue.main.async { self.duration = Int(timestamp - self.time) }
                    let time = CMTime(seconds: timestamp - time, preferredTimescale: CMTimeScale(600))
                    
                    adaptor?.append(buffer, withPresentationTime: time)
                }
            }
            // Проблема тут, как только добавляю эти пять строк, получаю кривой параметр duration у видеозаписи, и звука нет
            if sampleBuffer.formatDescription?.mediaType == .audio {
                if assetWriterAudioInput?.isReadyForMoreMediaData == true {
                    assetWriterAudioInput?.append(sampleBuffer)
                }
            }
            
            break
        case .end:
            guard assetWriterVideoInput?.isReadyForMoreMediaData == true,
                    assetWriterAudioInput?.isReadyForMoreMediaData == true,
                    assetWriter!.status != .failed else { break }
            
            let url = FileManager.default.urls(for: .documentDirectory, in: .userDomainMask).first!.appendingPathComponent("\(filename).mov")
            assetWriterVideoInput?.markAsFinished()
            assetWriterAudioInput?.markAsFinished()
            assetWriter?.finishWriting { [weak self] in
                DispatchQueue.main.async { self?.captureState = .idle }
                self?.assetWriter = nil
                self?.assetWriterVideoInput = nil
                self?.assetWriterAudioInput = nil
                
                //Далее идёт обработка результата
                self?.imageFromVideo(url: url, at: 0, orientation: connection.videoOrientation) { img in
                    
                    var data = Data()
                    
                    if let img = img {
                        let preview = img.resizeImage(targetSize: CGSize(width: 256, height: 256*img.size.height/img.size.width))
                        data = preview.pngData() ?? Data()
                    }
                
                    let path = url.lastPathComponent
                    
                    if let id = url.lastPathComponent.components(separatedBy: ".").first {
                        let item = LibraryItem(id: id, fileName: path, date: Date(), type: 2, preview: data, duration: Int16(self?.duration ?? 0), zoom: 1.0)
                        PHCameraView.result.send(item)
                        self?.duration = 0
                    }
                }
            }
        default:
            break
        }
        
    }
    
    func imageFromVideo(url: URL, at time: TimeInterval, orientation: AVCaptureVideoOrientation, completion: @escaping (UIImage?) -> Void) {
        DispatchQueue.global(qos: .background).async {
            let asset = AVURLAsset(url: url)

            let assetIG = AVAssetImageGenerator(asset: asset)
            assetIG.appliesPreferredTrackTransform = true
            assetIG.apertureMode = AVAssetImageGenerator.ApertureMode.encodedPixels

            let cmTime = CMTime(seconds: time, preferredTimescale: 60)
            let thumbnailImageRef: CGImage
            do {
                thumbnailImageRef = try assetIG.copyCGImage(at: cmTime, actualTime: nil)
            } catch let error {
                print("Error: \(error)")
                return completion(nil)
            }

            DispatchQueue.main.async {
                
                let initImage = UIImage(cgImage: thumbnailImageRef, scale: 1.0, orientation: .up)
                
                completion(initImage)
            }
        }
    }
    
}

Заранее благодарю за помощь.


#2

Вам стоит использовать
AVAudioEngine