这篇是iOS双语字幕软件的开发日志,目标是在iOS端实现在观看视频时,实时对播放的内容进行识别和翻译,显示双语字幕,用于打破外语视频内容观看门槛。
模块流程
iOS音频捕获与数据共享
本文介绍iOS系统音频捕获的实现方案,使用Broadcast Upload Extension捕获系统播放的音频,并通过App Group与主应用共享数据。
目录
- • Broadcast Upload Extension配置
一、系统音频捕获
iOS系统出于安全和隐私考虑,不允许应用直接捕获系统音频(如视频播放、音乐等,使用通话模式的APP播放的声音捕获不到)。必须使用Broadcast Upload Extension,通过屏幕录制的形式获取音频数据。
Broadcast Upload Extension配置
要让Extension收到ReplayKit的数据,必须同时满足:
- 1. 工程里有Broadcast Upload Extension target
- 3. Extension的Info.plist / Capabilities / 类继承全部正确
创建步骤:
- 1. 在Xcode中新建Extension类型中选择Broadcast Upload Extension
<key>NSExtension</key><dict> <key>NSExtensionPointIdentifier</key> <string>com.apple.broadcast-services-upload</string> <key>NSExtensionPrincipalClass</key> <string>$(PRODUCT_MODULE_NAME).SampleHandler</string></dict>
- 3. 主App配置UIBackgroundModes:
<key>UIBackgroundModes</key><array> <string>audio</string></array>
注意:未配置audio可能导致音频接收不到或屏幕锁定后Extension被暂停。
启动与关闭
Broadcast upload extension不能在代码中直接启动,只能由系统UI触发。Extension只能自己调用finishBroadcastWithError关闭,主App只能"间接控制"关闭。
let picker = RPSystemBroadcastPickerView( frame: CGRect(x: 0, y: 0, width: 44, height: 44))picker.preferredExtension = "com.xxx.broadcast"picker.showsMicrophoneButton = trueview.addSubview(picker)
音频格式转换
语音识别引擎接收的音频格式需要是16kHz单声道音频,因此这里需要先进行格式转换。这里需要注意是,并没有官方文档说ReplayKit回调的数据格式类型是怎样的,因此这里需要兼容各种格式。
格式检测与提取:
override func processSampleBuffer(_ sampleBuffer: CMSampleBuffer, with sampleBufferType: RPSampleBufferType) { guard case .audioApp = sampleBufferType else { return } guard let formatDescription = CMSampleBufferGetFormatDescription(sampleBuffer), let streamDesc = CMAudioFormatDescriptionGetStreamBasicDescription(formatDescription)?.pointee else { return } let inputSampleRate = streamDesc.mSampleRate let channelCount = Int(streamDesc.mChannelsPerFrame) let bitsPerChannel = streamDesc.mBitsPerChannel let formatFlags = streamDesc.mFormatFlags let isFloat = (formatFlags & kAudioFormatFlagIsFloat) != 0 let isNonInterleaved = (formatFlags & kAudioFormatFlagIsNonInterleaved) != 0 let isBigEndian = (formatFlags & kAudioFormatFlagIsBigEndian) != 0 // 提取音频数据...}
完整的音频处理实现:
private let targetSampleRate: Double = 16000.0/// 处理音频样本bufferprivate func processAudioBuffer(_ sampleBuffer: CMSampleBuffer, source: String) { guard let formatDescription = CMSampleBufferGetFormatDescription(sampleBuffer), let streamDesc = CMAudioFormatDescriptionGetStreamBasicDescription(formatDescription)?.pointee else { return } let inputSampleRate = streamDesc.mSampleRate let channelCount = Int(streamDesc.mChannelsPerFrame) let bitsPerChannel = streamDesc.mBitsPerChannel let formatFlags = streamDesc.mFormatFlags let isFloat = (formatFlags & kAudioFormatFlagIsFloat) != 0 let isNonInterleaved = (formatFlags & kAudioFormatFlagIsNonInterleaved) != 0 let isBigEndian = (formatFlags & kAudioFormatFlagIsBigEndian) != 0 // 获取 AudioBufferList var audioBufferList = AudioBufferList() var blockBuffer: CMBlockBuffer? let status = CMSampleBufferGetAudioBufferListWithRetainedBlockBuffer( sampleBuffer, bufferListSizeNeededOut: nil, bufferListOut: &audioBufferList, bufferListSize: MemoryLayout<AudioBufferList>.size, blockBufferAllocator: nil, blockBufferMemoryAllocator: nil, flags: kCMSampleBufferFlag_AudioBufferList_Assure16ByteAlignment, blockBufferOut: &blockBuffer ) guard status == noErr else { return } let audioBufferListPointer = UnsafeMutableAudioBufferListPointer( UnsafeMutablePointer<AudioBufferList>.allocate(capacity: 1) ) defer { audioBufferListPointer.unsafeMutablePointer.deallocate() } let numBuffers = audioBufferListPointer.count let frameCount = CMSampleBufferGetNumSamples(sampleBuffer) var floatSamples: [Float] = [] // 处理非交错格式 if isNonInterleaved { var channelData: [[Float]] = [] for bufferIndex in 0..<numBuffers { let buffer = audioBufferListPointer[bufferIndex] guard let data = buffer.mData else { continue } let dataByteSize = Int(buffer.mDataByteSize) var channelSamples: [Float] = [] if isFloat && bitsPerChannel == 32 { let floatPtr = data.assumingMemoryBound(to: Float.self) let count = dataByteSize / MemoryLayout<Float>.size for i in 0..<count { var value = floatPtr[i] if isBigEndian { value = Float(bitPattern: value.bitPattern.bigEndian) } channelSamples.append(value) } } else if bitsPerChannel == 16 { let int16Ptr = data.assumingMemoryBound(to: Int16.self) let count = dataByteSize / MemoryLayout<Int16>.size for i in 0..<count { var value = int16Ptr[i] if isBigEndian { value = value.bigEndian } channelSamples.append(Float(value) / 32768.0) } } channelData.append(channelSamples) } // 混音为单声道 if let firstChannel = channelData.first { if channelData.count == 1 { floatSamples = firstChannel } else { for i in 0..<firstChannel.count { var sum: Float = 0 for ch in channelData where i < ch.count { sum += ch[i] } floatSamples.append(sum / Float(channelData.count)) } } } } else { // 交错格式处理... } // 重采样到16kHz if inputSampleRate != targetSampleRate { floatSamples = resample(floatSamples, from: inputSampleRate, to: targetSampleRate) } sharedBuffer.writeAudioSamples(floatSamples)}/// 使用AVAudioConverter重采样private func resample(_ samples: [Float], from inputRate: Double, to outputRate: Double) -> [Float] { guard inputRate > 0 && outputRate > 0, inputRate != outputRate, !samples.isEmpty else { return samples } guard let inputFormat = AVAudioFormat( commonFormat: .pcmFormatFloat32, sampleRate: inputRate, channels: 1, interleaved: false ), let outputFormat = AVAudioFormat( commonFormat: .pcmFormatFloat32, sampleRate: outputRate, channels: 1, interleaved: false ), let converter = AVAudioConverter(from: inputFormat, to: outputFormat) else { return samples } guard let inputBuffer = AVAudioPCMBuffer(pcmFormat: inputFormat, frameCapacity: AVAudioFrameCount(samples.count)), let outputBuffer = AVAudioPCMBuffer(pcmFormat: outputFormat) else { return samples } inputBuffer.frameLength = AVAudioFrameCount(samples.count) let inputData = inputBuffer.floatChannelData! for i in 0..<samples.count { inputData[0][i] = samples[i] } let ratio = outputRate / inputRate let outputFrameCount = Int(ceil(Double(samples.count) * ratio)) outputBuffer.frameCapacity = AVAudioFrameCount(outputFrameCount) var error: NSError? let status = converter.convert(to: outputBuffer, error: &error) { _, outStatus in outStatus.pointee = .haveData return inputBuffer } guard status == .haveData, error == nil else { return samples } let outputData = outputBuffer.floatChannelData! let outputLength = Int(outputBuffer.frameLength) return (0..<outputLength).map { outputData[0][$0] }}
实现要点:
- 1. 内存安全:使用
UnsafeMutableAudioBufferListPointer 和 defer 确保内存正确释放 - 2. 多格式支持:支持 Float32、Int16、Int32 格式
- 4. 非交错格式:正确处理每个通道独立 buffer 的格式
- 6. 高质量重采样:使用系统 AVAudioConverter
二、Extension与主应用数据共享
Broadcast Extension与主应用运行在不同进程,涉及到进程间通信,这里选择使用实现比较简单的App Group共享容器进行数据交换。
App Group配置
Entitlements配置:
<key>com.apple.security.application-groups</key><array> <string>group.com.xxx.shared</string></array>
需要在Apple Developer Portal创建App Group,并在XCode中为两个target启用。
数据读写实现
class AudioSharedBuffer { static let appGroupId = "group.com.xxx.shared" private let sharedContainerURL = FileManager.default.containerURL( forSecurityApplicationGroupIdentifier: Self.appGroupId ) // Extension写入处理后的音频 func writeAudioSamples(_ samples: [Float]) { guard let url = sharedContainerURL?.appendingPathComponent("audio.raw") else { return } let data = samples.withUnsafeBufferPointer { Data(buffer: $0) } if FileManager.default.fileExists(atPath: url.path) { let handle = try? FileHandle(forWritingTo: url) handle?.seekToEndOfFile() handle?.write(data) handle?.closeFile() } else { try? data.write(to: url) } postDarwinNotification("com.xxx.newAudioData") } // 主应用读取音频 func readAudioSamples() -> [Float]? { guard let url = sharedContainerURL?.appendingPathComponent("audio.raw"), FileManager.default.fileExists(atPath: url.path) else { return nil } let data = try? Data(contentsOf: url) try? FileManager.default.removeItem(at: url) let floatCount = data?.count ?? 0 / MemoryLayout<Float>.size var samples = [Float](repeating: 0, count: floatCount) data?.copyBytes(to: samples.withUnsafeMutableBufferPointer { $0 }) return samples } private func postDarwinNotification(_ name: String) { let center = CFNotificationCenterGetDarwinNotifyCenter() CFNotificationCenterPostNotification(center, CFNotificationName(name as CFString), nil, nil, true) }}
Darwin通知
Extension写入数据后发送Darwin通知,主应用监听后立即读取:
func startListening() { CFNotificationCenterAddObserver( CFNotificationCenterGetDarwinNotifyCenter(), Unmanaged.passUnretained(self).toOpaque(), { _, observer, _, _, _ in guard let observer = observer else { return } let selfPtr = Unmanaged<YourClass>.fromOpaque(observer).takeUnretainedValue() if let samples = selfPtr.audioBuffer.readAudioSamples() { selfPtr.onAudioReceived?(samples) } }, "com.xxx.newAudioData" as CFString, nil, .deliverImmediately )}
SampleHandler完整示例
class SampleHandler: RPBroadcastSampleHandler { private let sharedBuffer = AudioSharedBuffer() private let targetSampleRate: Double = 16000.0 override func broadcastStarted(withSetupInfo setupInfo: [String : NSObject]?) { sharedBuffer.clearAudioData() } override func broadcastPaused() {} override func broadcastResumed() {} override func broadcastFinished() {} override func processSampleBuffer(_ sampleBuffer: CMSampleBuffer, with sampleBufferType: RPSampleBufferType) { switch sampleBufferType { case .audioApp: // 处理应用音频(系统播放的音频) let samples = convertTo16kMono(sampleBuffer) sharedBuffer.writeAudioSamples(samples) case .audioMic: // 忽略麦克风音频 break case .video: // 忽略视频 break } }}