라이브룸 V2V 번역 자막 기능을 추가한다

라이브룸에서 진행자 언어와 기기 언어가 다를 때 자막 토글을 제공한다.
룸 정보 응답에 V2V 워커 토큰과 진행자 언어 코드를 포함한다.
Agora V2V 에이전트 참여와 종료 API 연동을 추가한다
This commit is contained in:
Yu Sung
2026-02-09 21:11:17 +09:00
parent 7f703024d8
commit b796f6d9c5
11 changed files with 816 additions and 2 deletions

View File

@@ -0,0 +1,53 @@
//
// V2vApi.swift
// SodaLive
//
// Created by klaus on 2/9/26.
//
import Foundation
import Moya
enum V2vApi {
case join(request: V2VJoinRequest)
case leave(agentId: String)
}
extension V2vApi: TargetType {
var baseURL: URL {
URL(string: "https://api.agora.io/api/speech-to-speech-translation/v2/")!
}
var path: String {
switch self {
case .join:
return "projects/\(AGORA_APP_ID)/join"
case .leave(let agentId):
return "projects/\(AGORA_APP_ID)/agents/\(agentId)/leave"
}
}
var method: Moya.Method {
.post
}
var task: Task {
switch self {
case .join(let request):
return .requestJSONEncodable(request)
case .leave:
return .requestPlain
}
}
var headers: [String: String]? {
let credential = "\(AGORA_CUSTOMER_ID):\(AGORA_CUSTOMER_SECRET)"
let basicToken = Data(credential.utf8).base64EncodedString()
return [
"Authorization": "Basic \(basicToken)",
"X-Request-Id": UUID().uuidString,
"Content-Type": "application/json"
]
}
}

View File

@@ -0,0 +1,109 @@
//
// V2vModels.swift
// SodaLive
//
// Created by klaus on 2/9/26.
//
import Foundation
struct V2VJoinRequest: Encodable {
let name: String
let preset: String
let properties: Properties
init(roomInfo: GetRoomInfoResponse, sourceLanguage: String, targetLanguage: String) {
let rtcUid = UserDefaults.int(forKey: .userId)
self.name = "sodalive-v2v-\(roomInfo.roomId)-\(UUID().uuidString)"
self.preset = "v2vt_base"
self.properties = .init(
channel: roomInfo.channelName,
token: roomInfo.v2vWorkerToken,
agentRtcUid: "\(rtcUid)333",
remoteRtcUids: ["\(roomInfo.creatorId)"],
idleTimeout: 300,
advancedFeatures: .init(enableRtm: false),
parameters: .init(dataChannel: "datastream"),
asr: .init(language: sourceLanguage),
translation: .init(language: targetLanguage),
tts: .init(enable: false)
)
}
struct Properties: Encodable {
let channel: String
let token: String
let agentRtcUid: String
let remoteRtcUids: [String]
let idleTimeout: Int
let advancedFeatures: AdvancedFeatures
let parameters: Parameters
let asr: Asr
let translation: Translation
let tts: Tts
enum CodingKeys: String, CodingKey {
case channel
case token
case agentRtcUid = "agent_rtc_uid"
case remoteRtcUids = "remote_rtc_uids"
case idleTimeout = "idle_timeout"
case advancedFeatures = "advanced_features"
case parameters
case asr
case translation
case tts
}
}
struct AdvancedFeatures: Encodable {
let enableRtm: Bool
enum CodingKeys: String, CodingKey {
case enableRtm = "enable_rtm"
}
}
struct Parameters: Encodable {
let dataChannel: String
enum CodingKeys: String, CodingKey {
case dataChannel = "data_channel"
}
}
struct Asr: Encodable {
let language: String
}
struct Translation: Encodable {
let language: String
}
struct Tts: Encodable {
let enable: Bool
}
}
struct V2VJoinResponse: Decodable {
let agentId: String
let createTs: Int
let status: String
enum CodingKeys: String, CodingKey {
case agentId = "agent_id"
case createTs = "create_ts"
case status
}
}
struct V2VLeaveResponse: Decodable {
let agentId: String
let status: String
enum CodingKeys: String, CodingKey {
case agentId = "agent_id"
case status
}
}

View File

@@ -0,0 +1,128 @@
//
// V2vRepository.swift
// SodaLive
//
// Created by klaus on 2/9/26.
//
import Foundation
import Combine
import Moya
import CombineMoya
protocol V2VRepository {
func join(request: V2VJoinRequest) -> AnyPublisher<String, V2VRepositoryError>
func leave(agentId: String) -> AnyPublisher<Void, V2VRepositoryError>
}
enum V2VRepositoryError: Error {
case network(message: String)
case decoding
case business(message: String)
var userMessage: String {
switch self {
case .network(let message):
return message
case .decoding:
return I18n.Common.commonError
case .business(let message):
return message
}
}
}
final class V2VRepositoryImpl: V2VRepository {
private let api: MoyaProvider<V2vApi>
init(api: MoyaProvider<V2vApi> = MoyaProvider<V2vApi>()) {
self.api = api
}
func join(request: V2VJoinRequest) -> AnyPublisher<String, V2VRepositoryError> {
api.requestPublisher(.join(request: request))
.tryMap { response in
try Self.validateStatusCode(response)
do {
let decoded = try JSONDecoder().decode(V2VJoinResponse.self, from: response.data)
guard !decoded.agentId.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty else {
throw V2VRepositoryError.decoding
}
return decoded.agentId
} catch let error as V2VRepositoryError {
throw error
} catch {
throw V2VRepositoryError.decoding
}
}
.mapError { Self.mapError($0) }
.eraseToAnyPublisher()
}
func leave(agentId: String) -> AnyPublisher<Void, V2VRepositoryError> {
api.requestPublisher(.leave(agentId: agentId))
.tryMap { response in
try Self.validateStatusCode(response)
if !response.data.isEmpty {
if let text = String(data: response.data, encoding: .utf8) {
DEBUG_LOG("[V2V] leave response: \(text)")
}
_ = try? JSONDecoder().decode(V2VLeaveResponse.self, from: response.data)
}
return ()
}
.mapError { Self.mapError($0) }
.eraseToAnyPublisher()
}
private static func validateStatusCode(_ response: Response) throws {
guard (200..<300).contains(response.statusCode) else {
throw V2VRepositoryError.business(message: parseBusinessMessage(from: response.data) ?? I18n.Common.commonError)
}
}
private static func mapError(_ error: Error) -> V2VRepositoryError {
if let mapped = error as? V2VRepositoryError {
return mapped
}
guard let moyaError = error as? MoyaError else {
return .network(message: I18n.Common.commonError)
}
switch moyaError {
case .statusCode(let response):
if let message = parseBusinessMessage(from: response.data) {
return .business(message: message)
}
return .business(message: I18n.Common.commonError)
case .objectMapping, .jsonMapping, .encodableMapping, .stringMapping, .imageMapping:
return .decoding
default:
return .network(message: I18n.Common.commonError)
}
}
private static func parseBusinessMessage(from data: Data) -> String? {
if let decoded = try? JSONDecoder().decode(V2VErrorResponse.self, from: data),
let message = decoded.message?.trimmingCharacters(in: .whitespacesAndNewlines),
!message.isEmpty {
return message
}
if let decoded = try? JSONDecoder().decode(ApiResponseWithoutData.self, from: data),
let message = decoded.message?.trimmingCharacters(in: .whitespacesAndNewlines),
!message.isEmpty {
return message
}
return nil
}
}
private struct V2VErrorResponse: Decodable {
let message: String?
}

View File

@@ -0,0 +1,274 @@
//
// V2vState.swift
// SodaLive
//
// Created by klaus on 2/9/26.
//
import Foundation
struct V2vState {
var isAvailable: Bool = false
var isCaptionOn: Bool = false
var captionText: String = ""
var agentId: String? = nil
var sourceLanguage: String? = nil
var targetLanguage: String? = nil
}
enum V2vLanguageMapper {
static func mapToAgoraLanguage(_ code: String?) -> String? {
guard let normalized = code?.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() else {
return nil
}
if normalized.hasPrefix("ko") { return "ko-KR" }
if normalized.hasPrefix("ja") { return "ja-JP" }
if normalized.hasPrefix("en") { return "en-US" }
return nil
}
}
enum V2vAppLanguageResolver {
static func currentLanguageCode() -> String {
let headerCode = LanguageHeaderProvider.current.trimmingCharacters(in: .whitespacesAndNewlines).lowercased()
if headerCode == "ko" || headerCode == "ja" || headerCode == "en" {
return headerCode
}
let saved = UserDefaults.standard.string(forKey: "app.language")
if let saved,
let option = LanguageOption(rawValue: saved),
option != .system {
return option.rawValue
}
guard let preferred = Locale.preferredLanguages.first?.lowercased() else {
return "ko"
}
if preferred.hasPrefix("ko") { return "ko" }
if preferred.hasPrefix("ja") { return "ja" }
return "en"
}
}
final class V2vMessageAssembler {
private struct Chunk {
let messageId: String
let partIdx: Int
let partSum: Int
let content: String
}
private struct Buffer {
var partSum: Int
var createdAt: Date
var parts: [Int: String]
}
private var buffers: [String: Buffer] = [:]
private let timeout: TimeInterval = 10
func consume(data: Data) -> String? {
cleanupExpiredBuffers()
guard let chunk = parseChunk(data: data) else {
DEBUG_LOG("[V2V] chunk parsing failed. raw=\(preview(data))")
return nil
}
DEBUG_LOG("[V2V] chunk received messageId=\(chunk.messageId), partIdx=\(chunk.partIdx), partSum=\(chunk.partSum), contentLength=\(chunk.content.count)")
var buffer = buffers[chunk.messageId] ?? Buffer(
partSum: chunk.partSum,
createdAt: Date(),
parts: [:]
)
buffer.partSum = max(buffer.partSum, chunk.partSum)
// partIdx
if buffer.parts[chunk.partIdx] == nil {
buffer.parts[chunk.partIdx] = chunk.content
}
buffers[chunk.messageId] = buffer
guard buffer.parts.count >= buffer.partSum else {
DEBUG_LOG("[V2V] chunk buffering \(chunk.messageId): \(buffer.parts.count)/\(buffer.partSum)")
return nil
}
let sortedParts = buffer.parts
.sorted { $0.key < $1.key }
.prefix(buffer.partSum)
.map { $0.value }
guard sortedParts.count == buffer.partSum else {
return nil
}
let combined = sortedParts.joined()
DEBUG_LOG("[V2V] chunk assembled messageId=\(chunk.messageId), assembledLength=\(combined.count)")
buffers[chunk.messageId] = nil
return decodeCaptionText(from: combined)
}
func reset() {
buffers.removeAll()
}
private func cleanupExpiredBuffers() {
let now = Date()
buffers = buffers.filter { _, value in
now.timeIntervalSince(value.createdAt) <= timeout
}
}
private func parseChunk(data: Data) -> Chunk? {
if let text = String(data: data, encoding: .utf8) {
if let parsed = parsePipeChunk(text) {
return parsed
}
if let parsed = parseJSONChunk(text) {
return parsed
}
}
return nil
}
private func parsePipeChunk(_ raw: String) -> Chunk? {
let items = raw.split(separator: "|", maxSplits: 3, omittingEmptySubsequences: false)
guard items.count == 4,
let partIdx = Int(items[1]),
let partSum = Int(items[2]),
partIdx >= 0,
partSum > 0 else {
return nil
}
return Chunk(
messageId: String(items[0]),
partIdx: partIdx,
partSum: partSum,
content: String(items[3])
)
}
private func parseJSONChunk(_ raw: String) -> Chunk? {
guard let data = raw.data(using: .utf8),
let object = try? JSONSerialization.jsonObject(with: data) as? [String: Any] else {
return nil
}
guard let messageId = object["message_id"] as? String,
let partIdx = anyToInt(object["part_idx"]),
let partSum = anyToInt(object["part_sum"]),
let content = object["content"] as? String,
partIdx >= 0,
partSum > 0 else {
return nil
}
return Chunk(
messageId: messageId,
partIdx: partIdx,
partSum: partSum,
content: content
)
}
private func anyToInt(_ value: Any?) -> Int? {
if let intValue = value as? Int {
return intValue
}
if let stringValue = value as? String {
return Int(stringValue)
}
if let number = value as? NSNumber {
return number.intValue
}
return nil
}
private func decodeCaptionText(from base64String: String) -> String? {
if let plainData = base64String.data(using: .utf8),
let plainJson = try? JSONSerialization.jsonObject(with: plainData),
let text = extractTranslationText(from: plainJson) {
DEBUG_LOG("[V2V] plain json subtitle parsed")
return text
}
guard let decodedData = decodeBase64Payload(base64String),
let jsonObject = try? JSONSerialization.jsonObject(with: decodedData) else {
DEBUG_LOG("[V2V] base64 or final json parsing failed. payloadPrefix=\(String(base64String.prefix(80)))")
return nil
}
let text = extractTranslationText(from: jsonObject)
if text == nil {
DEBUG_LOG("[V2V] final json parsed but translation text not found")
}
return text
}
private func extractTranslationText(from object: Any) -> String? {
if let dict = object as? [String: Any] {
let eventType = (dict["object"] as? String) ?? (dict["type"] as? String) ?? (dict["event"] as? String)
if let eventType,
(eventType == "user.translation" || eventType == "agent.translation"),
let text = (dict["text"] as? String)?.trimmingCharacters(in: .whitespacesAndNewlines),
!text.isEmpty {
return text
}
for value in dict.values {
if let text = extractTranslationText(from: value) {
return text
}
}
}
if let array = object as? [Any] {
for value in array {
if let text = extractTranslationText(from: value) {
return text
}
}
}
return nil
}
private func decodeBase64Payload(_ raw: String) -> Data? {
let trimmed = raw.trimmingCharacters(in: .whitespacesAndNewlines)
if let data = Data(base64Encoded: trimmed) {
return data
}
let normalized = trimmed
.replacingOccurrences(of: "-", with: "+")
.replacingOccurrences(of: "_", with: "/")
let remainder = normalized.count % 4
let padded = remainder == 0 ? normalized : normalized + String(repeating: "=", count: 4 - remainder)
return Data(base64Encoded: padded)
}
private func preview(_ data: Data) -> String {
if let text = String(data: data, encoding: .utf8) {
return String(text.prefix(80))
}
return "\(data.count)bytes"
}
}