// speech.jsx — TTS with ElevenLabs (natural) → browser fallback (robotic)
// ElevenLabs free tier: 10k chars/month, no credit card.
// Get key at elevenlabs.io → paste in Tweaks panel.

// ── ElevenLabs config ────────────────────────────────────────────────────────
// "Adam" — deep, warm, authoritative male. Works in all languages with
// eleven_multilingual_v2 model.
const ELEVEN_VOICE_ID = 'pNInz6obpgDQGcFmaJgB'; // Adam (premade, free)
const ELEVEN_MODEL    = 'eleven_multilingual_v2';
const ELEVEN_API      = 'https://api.elevenlabs.io/v1/text-to-speech';

let _currentAudio   = null;  // active playing audio
let _currentAbort   = null;  // AbortController for in-flight fetch
let _speakSeq       = 0;     // monotonic id — only latest result is honored

async function speakElevenLabs(text, key, volume = 1) {
  // Cancel any in-flight request AND any playing audio
  if (_currentAbort) { try { _currentAbort.abort(); } catch(e) {} }
  if (_currentAudio) { try { _currentAudio.pause(); _currentAudio.src = ''; } catch(e) {} _currentAudio = null; }

  const mySeq = ++_speakSeq;
  const ctrl  = new AbortController();
  _currentAbort = ctrl;

  try {
    const res = await fetch(`${ELEVEN_API}/${ELEVEN_VOICE_ID}`, {
      method: 'POST',
      signal: ctrl.signal,
      headers: {
        'xi-api-key':   key,
        'Content-Type': 'application/json',
        'Accept':       'audio/mpeg',
      },
      body: JSON.stringify({
        text,
        model_id: ELEVEN_MODEL,
        voice_settings: {
          stability:        0.45,
          similarity_boost: 0.80,
          style:            0.25,
          use_speaker_boost: true,
        },
      }),
    });

    // If a newer speak() was called while we were waiting, drop this one
    if (mySeq !== _speakSeq) return null;

    if (!res.ok) {
      const err = await res.text();
      console.warn('[ElevenLabs] error', res.status, err);
      return null; // caller will use browser TTS fallback
    }

    const blob = await res.blob();
    if (mySeq !== _speakSeq) return null; // check again after blob

    const url = URL.createObjectURL(blob);
    const audio = new Audio(url);
    audio.volume = Math.max(0, Math.min(1, volume));
    _currentAudio = audio;

    return new Promise((resolve) => {
      audio.onended = () => { URL.revokeObjectURL(url); if (_currentAudio === audio) _currentAudio = null; resolve(true); };
      audio.onerror = () => { URL.revokeObjectURL(url); resolve(false); };
      // Wait for canplay so first word isn't cut off
      audio.oncanplay = () => audio.play().catch(() => resolve(false));
      audio.load();
    });
  } catch (e) {
    if (e.name !== 'AbortError') console.warn('[ElevenLabs] fetch failed:', e);
    return null;
  }
}

// ── Browser TTS fallback ──────────────────────────────────────────────────────
const SPEECH_LOCALE = {
  es: 'es-ES', en: 'en-US', fr: 'fr-FR', de: 'de-DE',
  it: 'it-IT', pt: 'pt-PT', zh: 'zh-CN', ru: 'ru-RU',
};

const IS_FEMALE = /female|mujer|femme|frau|donna|mulher|woman|girl|\b(paloma|monica|amelie|emma|sofia|maria|alice|samantha|fiona|karen|anna|katja|laura|zira|hazel|eva|julia|sara|nora|luciana|isabella|francisca|camila|lupe|dalia|conchita|elsa)\b/i;
const IS_MALE   = /\b(male|hombre|mann|uomo|homem|jorge|carlos|diego|enrique|alvaro|pablo|antonio|alex|daniel|aaron|jack|tom|oliver|thomas|luca|marco|ryan|michael|david|james|mark|stefan|dmitry|ivan|andres|paulo|felix|guy|eric|arthur)\b/i;

let _voices = [];
function _loadVoices() {
  if ('speechSynthesis' in window) _voices = window.speechSynthesis.getVoices() || [];
}
if ('speechSynthesis' in window) {
  _loadVoices();
  window.speechSynthesis.onvoiceschanged = _loadVoices;
}

function _pickVoice(lang) {
  if (!_voices.length) _loadVoices();
  const locale = SPEECH_LOCALE[lang] || 'en-US';
  const prefix = locale.slice(0, 2).toLowerCase();
  const exact  = _voices.filter(v => v.lang === locale);
  const nearby = _voices.filter(v => v.lang.toLowerCase().startsWith(prefix));
  const pool   = exact.length ? exact : nearby;
  if (!pool.length) return null;

  return (
    pool.find(v => /microsoft/i.test(v.name) && IS_MALE.test(v.name) && !IS_FEMALE.test(v.name)) ||
    pool.find(v => /google/i.test(v.name)    && IS_MALE.test(v.name) && !IS_FEMALE.test(v.name)) ||
    pool.find(v => /microsoft/i.test(v.name) && !IS_FEMALE.test(v.name)) ||
    pool.find(v => IS_MALE.test(v.name)      && !IS_FEMALE.test(v.name)) ||
    pool.find(v => !IS_FEMALE.test(v.name)) ||
    pool[0]
  );
}

function _speakBrowser(text, lang, volume) {
  if (!('speechSynthesis' in window)) return null;
  window.speechSynthesis.cancel();
  const utt = new SpeechSynthesisUtterance(text);
  utt.lang   = SPEECH_LOCALE[lang] || 'en-US';
  utt.rate   = 0.92;
  utt.pitch  = 0.75;
  utt.volume = Math.max(0, Math.min(1, volume));
  const v = _pickVoice(lang);
  if (v) { utt.voice = v; console.log('[Leo voice browser]', v.name); }
  window.speechSynthesis.speak(utt);
  return utt;
}

// ── Public API ────────────────────────────────────────────────────────────────
// Returns: SpeechSynthesisUtterance (browser) | Promise<bool> (ElevenLabs) | null
function speak(text, lang = 'en', opts = {}) {
  if (!text) return null;
  // Use ride volume if set, otherwise use opts.volume
  const rideVol = typeof window.__rideVolume === 'number' ? window.__rideVolume : -1;
  const volume  = rideVol >= 0 ? rideVol : (typeof opts.volume === 'number' ? opts.volume : 1);
  if (volume === 0) return null; // muted
  // Always read fresh from localStorage — never trust window globals that can be overwritten
  let key = (window.__ELEVEN_KEY || '').trim();
  if (!key) { try { key = (localStorage.getItem('hola_eleven_key') || '').trim(); } catch(e) {} }

  if (key) {
    // ElevenLabs with automatic browser TTS fallback
    return speakElevenLabs(text, key, volume).then(ok => {
      if (ok) return ok;
      // ElevenLabs failed → fall back to browser TTS silently
      return _speakBrowser(text, lang, volume);
    });
  }

  // Browser TTS
  return _speakBrowser(text, lang, volume);
}

function stopSpeaking() {
  _speakSeq++; // invalidate any pending speak result
  if (_currentAbort) { try { _currentAbort.abort(); } catch(e) {} _currentAbort = null; }
  if (_currentAudio) {
    try { _currentAudio.pause(); _currentAudio.src = ''; } catch(e) {}
    _currentAudio = null;
  }
  if ('speechSynthesis' in window) window.speechSynthesis.cancel();
}

// Update volume on currently-playing audio (for live slider feedback)
function setPlayingVolume(v) {
  const vol = Math.max(0, Math.min(1, v));
  window.__rideVolume = vol;

  if (vol === 0) {
    // MUTE: stop everything immediately
    if (_currentAbort) { try { _currentAbort.abort(); } catch(e) {} _currentAbort = null; }
    if (_currentAudio) {
      try { _currentAudio.pause(); _currentAudio.src = ''; } catch(e) {}
      _currentAudio = null;
    }
    if ('speechSynthesis' in window) window.speechSynthesis.cancel();
    return;
  }

  // NON-ZERO: ElevenLabs audio changes in real time (no interruption needed)
  if (_currentAudio) _currentAudio.volume = vol;
  // Browser TTS: volume can't change mid-utterance (W3C spec limitation).
  // __rideVolume is already updated above — next speak() will use the new level.
  // We do NOT cancel here: cancelling the current sentence would create a "mute" effect
  // every time the user drags the slider, which is confusing.
}

Object.assign(window, { speak, stopSpeaking, SPEECH_LOCALE, setPlayingVolume });
