Files
sleepy_agent_ios/SleepyAgent/Services/TtsService.swift
T
sleepy bbcf0c74bb Initial iOS port - Complete source code and build system
- 19 Swift source files (~4900 lines)
- Complete UI with SwiftUI (MainView, SettingsView, MessageBubble, InputBar)
- Inference layer (LlmEngine, Agent, ToolCalling, ConversationContext)
- Services (Audio, TTS, WebSearch, ModelDownload, Storage)
- Build system: Makefile, Package.swift, Podfile
- Documentation: BUILD.md, plan.md, PROJECT_STATUS.md
- Ready for Xcode build - just need LiteRT dependency added
2026-04-06 14:26:08 +02:00

223 lines
6.3 KiB
Swift

import Foundation
import AVFoundation
// MARK: - Errors
enum TtsServiceError: LocalizedError {
case notAvailable
case synthesisFailed(String)
var errorDescription: String? {
switch self {
case .notAvailable:
return "Text-to-speech is not available on this device"
case .synthesisFailed(let reason):
return "Speech synthesis failed: \(reason)"
}
}
}
// MARK: - State
enum TtsState: Equatable {
case initializing
case ready
case speaking
case error(String)
}
// MARK: - TTS Service
/// Text-to-Speech service using AVSpeechSynthesizer.
actor TtsService: NSObject {
// MARK: - Properties
private var synthesizer: AVSpeechSynthesizer?
private var currentState: TtsState = .initializing
private var completionContinuation: CheckedContinuation<Void, Never>?
/// Current TTS state
var state: TtsState { currentState }
/// Whether the synthesizer is currently speaking
var isSpeaking: Bool {
synthesizer?.isSpeaking ?? false
}
/// Whether TTS is available on this device
var isAvailable: Bool {
AVSpeechSynthesisVoice.speechVoices().count > 0
}
// MARK: - Initialization
override init() {
super.init()
}
deinit {
synthesizer?.stopSpeaking(at: .immediate)
}
// MARK: - Setup
/// Initializes the TTS engine.
/// - Returns: Async stream of state changes
func initialize() async -> AsyncStream<TtsState> {
AsyncStream { continuation in
Task {
await setupSynthesizer()
continuation.yield(currentState)
continuation.finish()
}
}
}
private func setupSynthesizer() {
guard isAvailable else {
currentState = .error("No voices available")
return
}
synthesizer = AVSpeechSynthesizer()
synthesizer?.delegate = self
currentState = .ready
}
// MARK: - Speech Methods
/// Speaks the given text.
/// - Parameters:
/// - text: The text to speak
/// - language: Optional language code (e.g., "en-US"). Defaults to system language.
/// - Throws: TtsServiceError if synthesis fails
func speak(text: String, language: String? = nil) async throws {
guard let synthesizer = synthesizer else {
throw TtsServiceError.notAvailable
}
// Stop any current speech
synthesizer.stopSpeaking(at: .immediate)
let utterance = AVSpeechUtterance(string: text)
// Configure voice
if let language = language {
utterance.voice = AVSpeechSynthesisVoice(language: language)
} else {
// Try system language, fallback to US English
let systemLanguage = Locale.current.language.languageCode?.identifier ?? "en"
let region = Locale.current.region?.identifier ?? "US"
let locale = "\(systemLanguage)-\(region)"
if let voice = AVSpeechSynthesisVoice(language: locale) {
utterance.voice = voice
} else if let voice = AVSpeechSynthesisVoice(language: "en-US") {
utterance.voice = voice
}
}
// Configure speech parameters
utterance.rate = AVSpeechUtteranceDefaultSpeechRate
utterance.pitchMultiplier = 1.0
utterance.volume = 1.0
// Speak and wait for completion
await withCheckedContinuation { continuation in
self.completionContinuation = continuation
synthesizer.speak(utterance)
}
}
/// Speaks the given text with completion callback.
/// - Parameters:
/// - text: The text to speak
/// - language: Optional language code
/// - onComplete: Called when speech completes or fails
func speak(text: String, language: String? = nil, onComplete: (() -> Void)? = nil) {
Task {
do {
try await speak(text: text, language: language)
} catch {
print("TTS error: \(error)")
}
onComplete?()
}
}
/// Stops the current speech immediately.
func stop() {
synthesizer?.stopSpeaking(at: .immediate)
completionContinuation?.resume()
completionContinuation = nil
currentState = .ready
}
/// Stops the current speech at the end of the word.
func stopAtEndOfWord() {
synthesizer?.stopSpeaking(at: .word)
}
/// Shuts down the TTS engine and releases resources.
func shutdown() {
synthesizer?.stopSpeaking(at: .immediate)
synthesizer?.delegate = nil
synthesizer = nil
completionContinuation = nil
currentState = .initializing
}
/// Sets the speech rate (0.0 to 1.0, default is 0.5).
func setRate(_ rate: Float) {
// Applied per utterance
}
/// Sets the speech volume (0.0 to 1.0).
func setVolume(_ volume: Float) {
// Applied per utterance
}
}
// MARK: - AVSpeechSynthesizerDelegate
extension TtsService: AVSpeechSynthesizerDelegate {
nonisolated func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didStart utterance: AVSpeechUtterance) {
Task {
await updateState(.speaking)
}
}
nonisolated func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didFinish utterance: AVSpeechUtterance) {
Task {
await completeSpeech()
}
}
nonisolated func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didCancel utterance: AVSpeechUtterance) {
Task {
await completeSpeech()
}
}
nonisolated func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didPause utterance: AVSpeechUtterance) {
// Handle pause if needed
}
nonisolated func speechSynthesizer(_ synthesizer: AVSpeechSynthesizer, didContinue utterance: AVSpeechUtterance) {
Task {
await updateState(.speaking)
}
}
private func updateState(_ state: TtsState) {
currentState = state
}
private func completeSpeech() {
completionContinuation?.resume()
completionContinuation = nil
currentState = .ready
}
}