import React, {
	createContext,
	useContext,
	useEffect,
	useState,
	ReactNode,
	useRef,
} from "react";
import * as sdk from "microsoft-cognitiveservices-speech-sdk";
import { useAuth } from "./useAuth";
import { SERVER_URL } from "../Constants";
import _ from "lodash";
import { calculateOverallPronunciationScore } from "../utils/Helpers";
import { AudioRecorder } from "../utils/AudioRecorder";
import { RecognitionCallbacks } from "../types";
import { useBlendShapes } from "./BlendShapeProvider";

interface Auth {
	token: string;
	region: string;
	lastTokenFetch: number;
}


interface AzureContextType {
	auth: Auth | null;
	ensureToken: () => Promise<Auth | null>;
	speakTextAsync: (params: SpeakTextParams) => Promise<void>;
	stopTextAsync: () => Promise<void>;
	startPronunciationAssessment: (
		referenceText: string,
		callbacks: RecognitionCallbacks,
		language?: string
	) => Promise<void>;
	stopPronunciationAssessment: () => Promise<void>;
}

interface SpeakTextParams {
	text: string;
	language?: string;
	voice?: string;
	rate?: number; // between -100% and 100%
	cb: () => void;
	updateBlendShapes?: boolean;
	handleWordBoundary?: any;
}



const AzureContext = createContext<AzureContextType | null>(null);

interface AzureProviderProps {
	children: ReactNode;
}

function estimateSpeakingTime(text: string, rate: number): number {
	// Average speaking rate (words per minute) for Microsoft Cognitive Services
	const averageWordsPerMinute = 150;

	// Remove extra whitespace and split the text into words
	const words = text.trim().replace(/\s+/g, " ").split(" ");
	const wordCount = words.length;

	// Calculate the estimated time in seconds
	const estimatedTimeInSeconds = (wordCount / averageWordsPerMinute) * 60;

	// Round the result to two decimal places
	const rounded = Math.ceil(estimatedTimeInSeconds * 100) / 100;

	console.log("Rounded", rounded, rate);

	// Adjust for rate where it is between -100% and 100% (approx 2x to 0.5x)
	return rounded * (1 - 0.0075 * rate);
}

export const AzureProvider: React.FC<AzureProviderProps> = ({ children }) => {
	const { addToBlendShapeQueue, clearBlendShapeQueue, addEndBlendShapeFrame } = useBlendShapes();

	const [auth, setAuth] = useState<Auth | null>(null);
	const recognizerRef = useRef<sdk.SpeechRecognizer | null>(null);
	const playerRef = useRef<sdk.SpeakerAudioDestination | null>(null);
	const synthesizerRef = useRef<sdk.SpeechSynthesizer | undefined>(undefined);
	const callbackIntervalRef = useRef<any>(null);

	// Audio playback
	const recorderRef = useRef<any>(null);

	const allWords = useRef<any[]>([]);
	const currentText = useRef<string[]>([]);
	const startOffset = useRef(0);
	const recognizedWords = useRef<any[]>([]);
	const fluencyScores = useRef<number[]>([]);
	const prosodyScores = useRef<number[]>([]);
	const durations = useRef<number[]>([]);
	const jsonResult = useRef<any>({});


	const { getAccessToken } = useAuth();


	const fetchToken = async (): Promise<Auth | null> => {
		try {
			const token = await getAccessToken();

			if (!token) throw new Error("User not loaded yet.");

			const response = await fetch(
				`${SERVER_URL}/token`,
				{
					method: "GET",
					headers: { Authorization: `Bearer ${token}` },
				}
			);
			if (!response.ok) throw new Error("Something went wrong");
			const data = await response.json();
			if (!data.token) throw new Error("Token not available");

			const newAuth: Auth = {
				token: data.token,
				region: data.region,
				lastTokenFetch: Date.now(),
			};
			setAuth(newAuth);
			return newAuth;
		} catch (error) {
			console.error(error);
			// setDisplayText("Error: Unable to fetch token.");
			return null;
		}
	};

	const ensureToken = async (): Promise<Auth | null> => {

		if (!auth || Date.now() - auth.lastTokenFetch > 9 * 60 * 1000) {
			console.log("Fetching new token");
			return await fetchToken();
		}
		return auth;
	};

	const checkToken = () => {
		return auth && Date.now() - auth.lastTokenFetch < 9 * 60 * 1000;
	}

	// Token refresh logic
	useEffect(() => {
		const refreshInterval = setInterval(async () => {
			await ensureToken();
		}, 590000); // 9 minutes and 50 seconds in milliseconds

		ensureToken();

		return () => clearInterval(refreshInterval); // Cleanup on unmount
	}, [auth]);


	const cleanUp = () => {
		if (callbackIntervalRef.current) {
			clearInterval(callbackIntervalRef.current);
		}
		if (playerRef.current) {
			playerRef.current.close();
			playerRef.current = null;
		}
		if (synthesizerRef.current) {
			synthesizerRef.current.close();
			synthesizerRef.current = undefined;
		}
	};

	// https://github.com/Azure-Samples/cognitive-services-speech-sdk/blob/633b8fd841a98deeb8b22319271487dffd518755/samples/js/browser/public/synthesis.html#L341
	const speakTextAsync = async ({
		text,
		language = "en-US",
		voice = "en-US-AriaNeural",
		rate = 0,
		cb,
		updateBlendShapes = false,
		handleWordBoundary = null,
	}: SpeakTextParams): Promise<void> => {
		const currentAuth = await ensureToken();
		if (!currentAuth) {
			console.error("Token not available");
			return;
		}

		const speechConfig = sdk.SpeechConfig.fromAuthorizationToken(
			currentAuth.token,
			currentAuth.region
		);


		playerRef.current = new sdk.SpeakerAudioDestination();
		const audioConfig = sdk.AudioConfig.fromSpeakerOutput(playerRef.current);


		speechConfig.speechSynthesisLanguage = language;
		speechConfig.speechSynthesisVoiceName = voice;
		const synthesizer = new sdk.SpeechSynthesizer(speechConfig, audioConfig);

		let ssml = `<speak xmlns='http://www.w3.org/2001/10/synthesis' xmlns:mstts='http://www.w3.org/2001/mstts' version='1.0' xml:lang='en-US'><voice name='${voice}'><prosody rate="${rate}%"> <mstts:viseme type="FacialExpression"/>${text}</prosody></voice></speak>`;

		let lastWordBoundaryTime = 0;
		let speechEnded = false;

		// Track word boundaries
		synthesizer.wordBoundary = (s: any, e: any) => {
			lastWordBoundaryTime = e.audioOffset / 10000000; // Convert to milliseconds
			if (handleWordBoundary) {
				handleWordBoundary(lastWordBoundaryTime, e.text);
			}
		};

		// Set up a check for the end of speech
		callbackIntervalRef.current = setInterval(() => {
			const currentTime = playerRef.current?.currentTime || 0;

			if (speechEnded && currentTime > lastWordBoundaryTime) {
				cleanUp();
				cb();
				addEndBlendShapeFrame();
			}
		}, 100);

		if (updateBlendShapes) {
			clearBlendShapeQueue();

			synthesizer.visemeReceived = function (s: any, e: any) {
				const blendShapeData = JSON.parse(e.animation);

				// Add each frame to the queue with its timestamp
				addToBlendShapeQueue({
					frameIndex: blendShapeData.FrameIndex,
					blendShapes: blendShapeData.BlendShapes, // This is now expected to be number[][]
					timestamp: e.audioOffset / 10000 // Convert to milliseconds
				});
			}
		}


		synthesizer.speakSsmlAsync(
			ssml,
			() => {
				speechEnded = true;
				//console.log("Speech synthesis finished.");
			},
			(error) => {
				console.error("Speech synthesis error:", error);
			},

		);
	};

	const stopTextAsync = async (): Promise<void> => {
		if (playerRef.current) {
			playerRef.current.pause();
		}
		if (synthesizerRef.current) {
			synthesizerRef.current.close();
		}
		cleanUp();
	};

	const startPronunciationAssessment = async (
		referenceText: string,
		callbacks: RecognitionCallbacks,
		language: string = "en-US"
	): Promise<void> => {

		if (!recorderRef.current) {
			recorderRef.current = new AudioRecorder();
			await recorderRef.current.init();
		}

		// Reset all refs
		allWords.current = [];
		currentText.current = [];
		startOffset.current = 0;
		recognizedWords.current = [];
		fluencyScores.current = [];
		prosodyScores.current = [];
		durations.current = [];
		jsonResult.current = {};

		if (recorderRef.current && !recorderRef.current.isRecording()) {
			await recorderRef.current.start();
		}

		//onResultsReadyCallback.current = onResultsReady;

		/*const callbacks: RecognitionCallbacks = {
			recognizing: (sender, event) => {
				console.log(`RECOGNIZING: Text=${event.result.text}`);
			},
			recognized: (sender, event) => {
				if (event.result.reason === sdk.ResultReason.RecognizedSpeech) {
					console.log(`RECOGNIZED: Text=${event.result.text}`);
					processRecognitionResult(event.result);
				}
			},
			sessionStopped: (sender, event) => {
				console.log("Session stopped.");
			},
		};*/

		console.log("Starting continuous recognition");

		const currentAuth = await ensureToken();
		if (!currentAuth) {
			console.error(
				"Token not available or reference text not provided."
			);
			return;
		}

		// if already running, then return
		if (recognizerRef.current) {
			console.log("Already running");
			return;
		}

		const speechConfig = sdk.SpeechConfig.fromAuthorizationToken(
			currentAuth.token,
			currentAuth.region
		);
		speechConfig.speechRecognitionLanguage = language;
		const audioConfig = sdk.AudioConfig.fromDefaultMicrophoneInput();
		const recognizer = new sdk.SpeechRecognizer(speechConfig, audioConfig);

		const pronunciationAssessmentConfig =
			new sdk.PronunciationAssessmentConfig(
				referenceText,
				sdk.PronunciationAssessmentGradingSystem.HundredMark,
				sdk.PronunciationAssessmentGranularity.Phoneme,
				true
			);
		pronunciationAssessmentConfig.phonemeAlphabet = "IPA";
		pronunciationAssessmentConfig.enableProsodyAssessment = true;
		pronunciationAssessmentConfig.enableMiscue = false;
		pronunciationAssessmentConfig.applyTo(recognizer);

		recognizer.recognizing = (s: any, e: any) => {
			console.log(`RECOGNIZING: Text=${e.result.text}`);
			callbacks.recognizing(s, e);
		}

		recognizer.recognized = async (s: any, e: any) => {
			if (e.result.reason === sdk.ResultReason.RecognizedSpeech) {
				console.log(`RECOGNIZED: Text=${e.result.text}`);
			} else if (e.result.reason === sdk.ResultReason.NoMatch) {
				console.log("No speech could be recognized.");
				return;
			}

			const jsonResult = JSON.parse(
				e.result.properties.getProperty(
					sdk.PropertyId.SpeechServiceResponse_JsonResult
				)
			);

			if (
				!_.isEmpty(jsonResult) &&
				jsonResult["NBest"] &&
				jsonResult["NBest"].length > 0
			) {
				try {
					// Process recognition result
					const nb = jsonResult["NBest"][0];
					startOffset.current = nb.Words[0].Offset;
					const localtext = _.map(nb.Words, (item: any) =>
						item.Word.toLowerCase()
					);
					currentText.current = currentText.current.concat(localtext);
					fluencyScores.current.push(
						nb.PronunciationAssessment.FluencyScore
					);
					prosodyScores.current.push(
						nb.PronunciationAssessment.ProsodyScore
					);
					const isSucceeded = jsonResult.RecognitionStatus === "Success";
					const nBestWords = jsonResult.NBest[0].Words;
					const durationList = [] as any;
					_.forEach(nBestWords, (word: any) => {
						recognizedWords.current.push(word);
						durationList.push(word.Duration);
					});
					durations.current.push(_.sum(durationList));

					if (isSucceeded && nBestWords) {
						allWords.current.push(...nBestWords);
					}

					// Check if all words in the current speech element have been said
					const currentSpeechElement = referenceText
						.toLowerCase()
						// Allow alphanumeric characters, hyphens, and spaces
						.replace(/[^a-zA-Z0-9- ]/g, "")
						.split(" ");

					// Keep alphanumeric characters and hyphens
					const processedWords = currentText.current.map((word: any) => {
						return word.replace(/[^a-zA-Z0-9-]/g, "");
					});

					const recognizedSet = new Set(processedWords);

					const allWordsSaid = currentSpeechElement.every((word: string) =>
						recognizedSet.has(word)
					);

					console.log(allWordsSaid, currentSpeechElement, recognizedSet);

					if (allWordsSaid) {
						const [words, scores] =
							calculateOverallPronunciationScore(
								referenceText,
								allWords.current,
								currentText.current as [],
								startOffset.current,
								recognizedWords.current,
								fluencyScores.current,
								prosodyScores.current,
								durations.current,
								jsonResult
							);

						const recordedBlob = await recorderRef.current.stop();

						callbacks.results(
							words,
							scores,
							recordedBlob
						)
					} else {
						console.log("Not all words said");
						return;
					}
				} catch (error) {
					console.error("Error processing speech", error);
					return;
				}
			}



			//callbacks.recognized(s, e);
		}

		recognizer.canceled = (s, e) => {
			console.log(`Canceled: ${e.errorDetails}`);
			recognizer.stopContinuousRecognitionAsync(() => {
				recognizer.close();
			});
			callbacks.canceled?.(e);
		};

		recognizer.startContinuousRecognitionAsync();
		recognizerRef.current = recognizer;
	};

	const stopPronunciationAssessment = async (): Promise<void> => {
		if (recorderRef.current && recorderRef.current.isRecording()) {
			await recorderRef.current.stop();
		}

		if (recognizerRef.current) {
			return new Promise((resolve) => {
				recognizerRef.current?.stopContinuousRecognitionAsync(() => {
					recognizerRef.current?.close();
					recognizerRef.current = null;
					resolve();
				});
			});
		}
	};

	const value: AzureContextType = {
		auth,
		ensureToken,
		speakTextAsync,
		stopTextAsync,
		startPronunciationAssessment,
		stopPronunciationAssessment,
	};

	return (
		<AzureContext.Provider value={value}>{children}</AzureContext.Provider>
	);
};

export const useAzure = (): AzureContextType => {
	const context = useContext(AzureContext);
	if (context === null) {
		throw new Error("useAzure must be used within an AzureProvider");
	}
	return context;
};
