问题描述
我有一个代码可以检测物体并将它们转录成语音。但是,如果在同一帧中检测到各种对象,语音输出就会混乱,并开始说对象的名称,这对用户来说毫无意义。我正在为视障人士开发这个应用程序,所以我试图专注于获取一个对象的名称,将其转录给用户,然后继续处理下一个对象。
这是下面的代码。
import UIKit
import AVKit
import Vision
import CoreML
import AVFoundation
class ViewController: UIViewController,AVCaptureVideoDataOutputSampleBufferDelegate {
@IBOutlet weak var innerView: UIView!
@IBOutlet weak var viewLable: UILabel!
var previewLayer: AVCaptureVideoPreviewLayer?
override func viewDidLoad() {
super.viewDidLoad()
updateLable(newLable: "new lable")
//Start the Camera
let captureSession = AVCaptureSession()
captureSession.sessionPreset = .photo
// get back camera as Video Capture Device
guard let captureDevice = AVCaptureDevice.default(for: .video)
else { self.quickErr(myLine: #line,inputStr: "") ; return }
try? captureDevice.lockForConfiguration()
captureDevice.activeVideoMinFrameDuration = CMTimeMake(1,2)
captureDevice.activeVideoMaxFrameDuration = CMTimeMake(1,2)
captureDevice.unlockForConfiguration()
guard let input = try? AVCaptureDeviceInput(device: captureDevice)
else { self.quickErr(myLine: #line,inputStr: "") ; return }
captureSession.addInput(input)
captureSession.startRunning()
self.previewLayer = AVCaptureVideoPreviewLayer(session: captureSession)
self.previewLayer?.frame.size = self.innerView.frame.size
self.previewLayer?.videoGravity = AVLayerVideoGravity.resizeAspectFill
self.innerView.layer.addSublayer(self.previewLayer!)
self.previewLayer?.frame = view.frame
// let label = UILabel(frame: CGRect(x: 0,y: 0,width: 200,height: 21))
// label.center = CGPoint(x: 160,y: 285)
// label.textAlignment = .center
// label.text = "I'am a test label"
// self.view.addSubview(label)
// label.text = ""
//get access to video frames
let dataOutput = AVCaptureVideoDataOutput()
dataOutput.setSampleBufferDelegate(self,queue: dispatchQueue(label: "VideoQueue"))
captureSession.addOutput(dataOutput)
}
override func viewDidAppear(_ animated: Bool) {
super.viewDidAppear(animated)
self.previewLayer?.frame.size = self.innerView.frame.size
}
func captureOutput(_ output: AVCaptureOutput,didOutput sampleBuffer: CMSampleBuffer,from connection: AVCaptureConnection) {
//print("Camera was able to capture a frame ",Date())
guard let pixcelBuffer:CVPixelBuffer = CMSampleBufferGetimageBuffer(sampleBuffer)
else { self.quickErr(myLine: #line,inputStr: "") ; return }
guard let model = try? VNCoreMLModel(for: resnet50().model)
else { self.quickErr(myLine: #line,inputStr: "") ; return }
let request = VNCoreMLRequest(model: model) { (finishedReq,err) in
//check err
//print(finishedReq.results)
guard let results = finishedReq.results as? [VNClassificationObservation]
else { self.quickErr(myLine: #line,inputStr: "") ; return }
guard let firstObservation = results.first
else { self.quickErr(myLine: #line,inputStr: "") ; return }
var myMessage = ""
var myConfident = 0
if (firstObservation.confidence > 0.2 ) {
myConfident = Int ( firstObservation.confidence * 100 )
let myIdentifier = firstObservation.identifier.split(separator: ",")
myMessage = "I am \(myConfident) % confidence this object is : \(myIdentifier[0]) "
} else {
myMessage = "I am not confidence to detect this object"
}
print(myMessage)
self.updateLable(newLable: myMessage)
if ( myConfident >= 70 ){
self.readyMe(myText: myMessage,myLang: "en_EN")
}
}
// Anaylize image
try? VNImageRequestHandler(cvPixelBuffer: pixcelBuffer,options: [:]).perform([request])
}
func readyMe(myText :String,myLang : String ) {
let uttrace = AVSpeechUtterance(string: myText )
uttrace.voice = AVSpeechSynthesisVoice(language: myLang)
uttrace.rate = 0.5
let synthesizer = AVSpeechSynthesizer()
synthesizer.speak(uttrace)
}
func quickErr(myLine: Int,inputStr : String = "" ) {
print("===> Guard Error \(inputStr) :\n file:\(#file)\n line:\(myLine)\n function:\(#function) ")
}
func updateLable(newLable: String){
dispatchQueue.main.async { // Correct
self.viewLable?.text = "[ " + newLable + " ]"
}
}
}
解决方法
暂无找到可以解决该程序问题的有效方法,小编努力寻找整理中!
如果你已经找到好的解决方法,欢迎将解决方案带上本链接一起发送给小编。
小编邮箱:dio#foxmail.com (将#修改为@)