import {
  StartStreamTranscriptionCommand,
  TranscribeStreamingClient,
} from '@aws-sdk/client-transcribe-streaming'
import { T } from '@tolgee/react'
import axios from 'axios'
import { errorToast } from 'helpers'
import { useCallback, useEffect, useRef, useState } from 'react'
import mic from 'microphone-stream'
import { Buffer } from 'buffer'
import process from 'process'

window.Buffer = Buffer
if (!('process' in window)) {
  window.process = process
}

function createAudioBlobFromFloat32(audioChunks, sampleRate) {
  let totalLength = 0
  for (const chunk of audioChunks) {
    totalLength += chunk.length
  }

  const mergedAudio = new Float32Array(totalLength)

  let offset = 0
  for (const chunk of audioChunks) {
    mergedAudio.set(chunk, offset)
    offset += chunk.length
  }

  return float32ToWav(mergedAudio, sampleRate)
}

function float32ToWav(float32Audio, sampleRate) {
  const numChannels = 1 // Mono
  const bitsPerSample = 16

  // Convert Float32 to Int16
  const int16Audio = new Int16Array(float32Audio.length)
  for (let i = 0; i < float32Audio.length; i++) {
    const sample = Math.max(-1, Math.min(1, float32Audio[i]))
    int16Audio[i] = sample < 0 ? sample * 0x8000 : sample * 0x7fff
  }

  const dataSize = int16Audio.length * 2 // 2 bytes per sample (16-bit)
  const buffer = new ArrayBuffer(44 + dataSize)
  const view = new DataView(buffer)

  writeString(view, 0, 'RIFF')
  view.setUint32(4, 36 + dataSize, true)
  writeString(view, 8, 'WAVE')

  writeString(view, 12, 'fmt ')
  view.setUint32(16, 16, true) // subchunk1 size
  view.setUint16(20, 1, true) // PCM format
  view.setUint16(22, numChannels, true)
  view.setUint32(24, sampleRate, true)
  view.setUint32(28, sampleRate * numChannels * 2, true) // byte rate
  view.setUint16(32, numChannels * 2, true) // block align
  view.setUint16(34, bitsPerSample, true)

  writeString(view, 36, 'data')
  view.setUint32(40, dataSize, true)

  for (let i = 0; i < int16Audio.length; i++) {
    view.setInt16(44 + i * 2, int16Audio[i], true)
  }

  return new Blob([buffer], { type: 'audio/wav' })
}

function writeString(view, offset, string) {
  for (let i = 0; i < string.length; i++) {
    view.setUint8(offset + i, string.charCodeAt(i))
  }
}

const encodePCMChunk = (chunk) => {
  const input = mic.toRaw(chunk)
  let offset = 0
  const buffer = new ArrayBuffer(input.length * 2)
  const view = new DataView(buffer)
  for (let i = 0; i < input.length; i++, offset += 2) {
    let s = Math.max(-1, Math.min(1, input[i]))
    view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7fff, true)
  }
  return Buffer.from(buffer)
}

const startStreaming = async ({
  handleTranscribeOutput,
  credentials,
  handleStopTranscribing,
  micRef,
  setIsLoading,
  setIsTranscribing,
  isStopOnPause,
  handlePause,
  setSpeechVolume,
  language,
  voiceDetectedCallback,
}) => {
  let micStream
  let mediaStream
  let audioContext
  let transcriptionClient
  let silenceInterval
  let speakingTimeout
  let stopStreamTimeout

  const audioChunks = []

  try {
    mediaStream = await window.navigator.mediaDevices.getUserMedia({ audio: true }).catch((err) => {
      if (err.name === 'NotAllowedError') {
        errorToast(
          <T keyName='eleo-error-microphone-permission'>
            You have to enable the microphone permission
          </T>
        )
      }

      throw new Error('Audio context error')
    })

    audioContext = new window.AudioContext()

    micStream = new mic({ bufferSize: 2048, context: audioContext })
    micStream.setStream(mediaStream)

    micRef.current = micStream

    console.log(micStream)
    micStream.on('format', function (format) {
      console.log(format)
    })

    // TESTING save the recorded audio
    micStream.on('data', function (chunk) {
      audioChunks.push(new Float32Array(chunk.buffer))
    })

    console.log('AudioContext sample rate:', audioContext.sampleRate)

    // Setup analyser for pause detection
    const source = audioContext.createMediaStreamSource(mediaStream)
    const analyser = audioContext.createAnalyser()
    source.connect(analyser)
    analyser.minDecibels = -60
    const analyserData = new Uint8Array(analyser.frequencyBinCount)

    const getAudioStream = async function* () {
      for await (const chunk of micStream) {
        if (chunk.length <= 44100) {
          // chuck length 4096
          yield {
            AudioEvent: {
              AudioChunk: encodePCMChunk(chunk),
            },
          }
        }
      }
    }

    transcriptionClient = new TranscribeStreamingClient({
      region: 'eu-central-1',
      credentials: credentials,
    })

    const languages = ['en-US', 'pl-PL', 'fr-FR', 'es-ES']
    if (language && !languages.includes(language)) languages.push(language)

    const command = new StartStreamTranscriptionCommand({
      MediaEncoding: 'pcm',
      MediaSampleRateHertz: audioContext.sampleRate,
      AudioStream: getAudioStream(),
      IdentifyLanguage: true,
      LanguageOptions: languages.join(','),
    })
    const data = await transcriptionClient.send(command)

    // Handle pause detection
    let silence_start = performance.now()
    let started = false

    function detectSilence() {
      analyser.getByteFrequencyData(analyserData) // get current data
      if (analyserData.some((v) => v)) {
        // if there is data above the given db limit
        if (!started) {
          console.log('speech detected. Waiting for silence...')
          started = true
        }
        silence_start = performance.now() // set it to now
        voiceDetectedCallback?.(silence_start)
      } else if (started && performance.now() - silence_start > 3000) {
        console.log('Silence detected.')
        if (silenceInterval) clearInterval(silenceInterval)

        if (isStopOnPause) {
          handleStopTranscribing()
          return
        }
        started = false
        handlePause?.(performance.now())

        setTimeout(() => {
          silenceInterval = setInterval(() => {
            detectSilence()
          }, 100)
        }, 500)
      }

      // Volume detection
      if (setSpeechVolume) {
        let sum = 0
        for (let i = 0; i < analyserData.length; i++) {
          sum += analyserData[i]
        }
        const average = sum / analyserData.length
        const threshold = 0.1

        if (average > threshold) {
          if (speakingTimeout) clearTimeout(speakingTimeout)
          setSpeechVolume(Math.min(average / 10, 0.2))
          speakingTimeout = setTimeout(() => {
            setSpeechVolume(0)
          }, 100)
        }
      }
    }

    silenceInterval = setInterval(() => {
      detectSilence()
    }, 100)

    setIsLoading(false)
    setIsTranscribing(true)

    // Abandon the stream after 1min of inactivity
    stopStreamTimeout = setTimeout(handleStopTranscribing, 60000)

    if (data.TranscriptResultStream) {
      for await (const event of data.TranscriptResultStream) {
        if (event?.TranscriptEvent?.Transcript) {
          for (const result of event?.TranscriptEvent?.Transcript.Results || []) {
            if (result?.Alternatives && result?.Alternatives[0].Items) {
              silence_start = performance.now()
              voiceDetectedCallback?.(silence_start)

              console.log(result?.Alternatives[0])
              handleTranscribeOutput(result?.Alternatives[0].Transcript, result.IsPartial || false)

              if (stopStreamTimeout) clearTimeout(stopStreamTimeout)
              stopStreamTimeout = setTimeout(handleStopTranscribing, 60000)
            }
          }
        }
      }
    }
  } catch (err) {
    console.log(err)
    handleStopTranscribing()

    throw err
  } finally {
    // TESTING display the recorded audio
    if (audioChunks.length > 0) {
      const audioBlob = createAudioBlobFromFloat32(audioChunks, audioContext.sampleRate)

      const audioElement =
        document.getElementById('preview-audio') ?? document.createElement('audio')
      audioElement.controls = true
      audioElement.style.position = 'absolute'
      audioElement.style.top = '70px'
      audioElement.style.right = '20px'
      audioElement.style['z-index'] = '9999'
      audioElement.id = 'preview-audio'
      audioElement.src = URL.createObjectURL(audioBlob)

      if (!document.getElementById('preview-audio')) document.body.appendChild(audioElement)
    }

    // Cleanup
    mediaStream.getAudioTracks().forEach(function (track) {
      track.stop()
    })

    if (audioContext) {
      audioContext.close().catch((err) => console.log(err))
    }

    if (transcriptionClient) transcriptionClient.destroy()

    if (stopStreamTimeout) clearTimeout(stopStreamTimeout)
    if (silenceInterval) clearInterval(silenceInterval)
  }
}

const stopStreaming = (micRef) => {
  if (micRef?.current) {
    micRef.current.stop()
    micRef.current.destroy()
    micRef.current = undefined

    console.log('Mic cleanup')
  }
}

const useTranscribe = ({
  callback,
  pauseCallback,
  voiceDetectedCallback,
  setSpeechVolume,
  language,
  isStopOnPause = true,
  isChatbot = false,
}) => {
  const [isTranscribing, setIsTranscribing] = useState(false)
  const [isLoading, setIsLoading] = useState(false)

  const [lines, setLines] = useState([])
  const [currentLine, setCurrentLine] = useState('')
  const [pauseTimestamp, setPauseTimestamp] = useState()

  const micRef = useRef()

  useEffect(() => {
    if (currentLine.length || lines.length) {
      const text = [...lines, currentLine].join(' ')
      callback?.(text)
    }
  }, [lines, currentLine, callback])

  useEffect(() => {
    if (pauseTimestamp) pauseCallback?.()
  }, [pauseTimestamp])

  useEffect(() => {
    return () => {
      stopTranscribe(true)
    }
  }, [])

  const handleTranscribeOutput = useCallback((data, isPartial) => {
    if (isPartial) {
      setCurrentLine(data)
    } else {
      setCurrentLine('')
      setLines((prev) => [...prev, data])
    }
  }, [])

  const handlePause = useCallback((timestamp) => {
    setLines([])
    setCurrentLine('')
    setPauseTimestamp(timestamp)
  }, [])

  const stopTranscribe = useCallback(
    async (isForced = false) => {
      if (isLoading && !isForced) return // don't allow stopping before setup is complete as this prevents proper cleanup

      setLines([])
      setCurrentLine('')
      if (isLoading) setIsLoading(false)

      setIsTranscribing(false)
      stopStreaming(micRef)
    },
    [isLoading]
  )

  const startTranscribe = useCallback(async () => {
    setIsLoading(true)

    try {
      const res = await axios.get(
        isChatbot ? '/api/chat-bot/transcribe/token' : '/api/utility/transcribe/token'
      )
      const credentials = {
        accessKeyId: res.data.Credentials.AccessKeyId,
        secretAccessKey: res.data.Credentials.SecretAccessKey,
        sessionToken: res.data.Credentials.SessionToken,
      }

      await startStreaming({
        handleTranscribeOutput,
        handleStopTranscribing: stopTranscribe,
        setIsLoading,
        setIsTranscribing,
        credentials,
        micRef,
        isStopOnPause,
        handlePause,
        setSpeechVolume,
        language,
        voiceDetectedCallback,
      })
    } catch (error) {
      console.error(error)
      setIsLoading(false)
      await stopTranscribe(true)
    }
  }, [handleTranscribeOutput, stopTranscribe, isStopOnPause, language])

  const toggleTranscribe = useCallback(async () => {
    if (!isTranscribing) {
      console.log('startRecording')
      await startTranscribe()
    } else {
      console.log('stopRecording')
      await stopTranscribe()
    }
  }, [isTranscribing, startTranscribe, stopTranscribe])

  return {
    startTranscribe,
    stopTranscribe,
    toggleTranscribe,
    isLoading,
    isTranscribing,
    lines,
    currentLine,
  }
}

export default useTranscribe
