# Date and time manipulation
import time
from datetime import timedelta

# Data handling and configuration
import pandas as pd
pd.set_option("future.no_silent_downcasting", True)

# Data processing and persistence
from processData.loadData import loadData

__all__ = ["getLiveData"]

def formatUTC(series):
    """
    Ensures a datetime Series is timezone-aware and in UTC.

    Parameters:
        series (pd.Series): A Series of datetime strings or objects.

    Returns:
        pd.Series: Datetimes converted to UTC timezone.
    """

    series = pd.to_datetime(series)
    if series.dt.tz is None:
        return series.dt.tz_localize("UTC")
    return series.dt.tz_convert("UTC")

def buildUrl(session, time, previousTime, driverNumber, table, extraData=""):
    """
    Constructs the OpenF1 API URL with filters based on inputs.

    Parameters:
        session (str): Session key for the race.
        time (str or None): Upper bound of the time filter.
        previousTime (str or None): Lower bound of the time filter.
        driverNumber (str or None): Driver number to filter.
        table (str): API table to query.
        extraData (str): Additional query parameters.

    Returns:
        str: The full URL to call the OpenF1 API.
    """

    date = "date" if table != "laps" else "date_start"
    query = [f"session_key={session}"]

    if table not in ["stints", "drivers", "race_control"]:
        if table in ["weather", "position"]:
            if time:
                query.append(f"{date}%3C={time}")
        else:
            if previousTime and time:
                query.append(f"{date}%3E={previousTime}")
                query.append(f"{date}%3C={time}")

    if driverNumber is not None:
        query.append(f"driver_number={driverNumber}")

    if extraData:
        query.append(extraData)

    return f"https://api.openf1.org/v1/{table}?" + "&".join(query)

def readJSON(url, columns, retries=5, delay=0.5):
    """
    Reads a JSON url and returns only the specified columns as a DataFrame.

    Parameters:
        url (str): The url returning JSON data.
        columns (list): List of column names to extract.
        retries (int): Number of retry attempts before giving up (default: 5).
        delay (float): Seconds to wait between retries (default: 0.5).

    Returns:
        pd.DataFrame: Filtered DataFrame with only the requested columns or an empty DataFrame if missing or no data.
    """

    for attempt in range(1, retries + 1):
        try:
            df = pd.read_json(url)
        except Exception:
            df = pd.DataFrame()
        
        # Check that the DataFrame is not empty and contains all requested columns
        if not df.empty and all(col in df.columns for col in columns):
            sub = df[columns]

            # If any NaN values are present in the selected columns, treat as failure
            if sub.isnull().values.any():
                time.sleep(delay)
                continue

            # No missing values: return the filtered DataFrame
            return sub

        # If DataFrame is empty or missing columns, wait and retry
        time.sleep(delay)

    # All retry attempts exhausted: return empty DataFrame with correct columns
    return pd.DataFrame(columns=columns)

def getChange(session, driverNumber, time, previousTime, lapsData):
    """
    Detects if a new lap was started and constructs a change event list.

    Parameters:
        session (str): Session key.
        driverNumber (str): Target driver's number.
        time (str): Current time.
        previousTime (str): Previous check time.
        lapsData (pd.DataFrame): Existing laps data.

    Returns:
        tuple: Updated laps data and list of detected changes.
    """

    url = buildUrl(session, time, previousTime, driverNumber, "laps")
    newLaps = readJSON(url, ["driver_number", "date_start", "lap_number"], 1)
        
    if newLaps.empty:
        return lapsData, []

    # Rename and convert types
    newLaps.rename(columns={"driver_number": "DriverNumber", "date_start": "Time", "lap_number": "LapNumber"}, inplace=True)
    newLaps["Time"] = pd.to_datetime(newLaps["Time"])
    newLaps["DriverNumber"] = newLaps["DriverNumber"].astype(str)

    # Append and deduplicate
    combined = pd.concat([lapsData, newLaps], ignore_index=True)
    combined.drop_duplicates(subset=["DriverNumber", "LapNumber"], inplace=True)
    combined.sort_values(by=["DriverNumber", "LapNumber"], inplace=True)

    # Detect new laps
    changes = []
    updatedLaps = combined.copy()
    grouped = combined.groupby("DriverNumber")

    for driver, group in grouped:
        group = group.sort_values("LapNumber").reset_index(drop=True)

        # No previous lap to compare with
        if len(group) < 2:
            continue

        # Get the last 2 rows for that driver
        lastTwoRows = group.tail(2)
        if lastTwoRows.iloc[0]["LapNumber"] + 1 == lastTwoRows.iloc[1]["LapNumber"]:
            changes.append({
                "DriverNumber": driver,
                "LapNumber": lastTwoRows.iloc[0]["LapNumber"],
                "previousTime": lastTwoRows.iloc[0]["Time"].isoformat(),
                "time": (lastTwoRows.iloc[1]["Time"]-timedelta(seconds=1)).isoformat()
            })

            # Drop the old lap from updatedLaps
            mask = (
                (updatedLaps["DriverNumber"] == driver) &
                (updatedLaps["LapNumber"] == lastTwoRows.iloc[0]["LapNumber"])
            )
            updatedLaps = updatedLaps[~mask]

    return updatedLaps, changes

def getLaps(session, time, previousTime, driverNumber):
    """
    Retrieves and merges lap, pit and driver data from OpenF1 API.

    Parameters:
        session (str): Session key.
        time (str): Current time.
        previousTime (str): Previous check time.
        driverNumber (str): Driver number.

    Returns:
        pd.DataFrame: Merged and formatted race data.
    """

    # We define the table and data to load
    data = {
        "laps": ["driver_number", "date_start", "duration_sector_1", "duration_sector_2", "duration_sector_3", "lap_duration", "lap_number"],
        "pit": ["driver_number", "lap_number", "pit_duration"],
        "drivers": ["driver_number", "name_acronym", "team_name"],
    }

    # Initialize final dataframe
    df = None

    for table, cols in data.items():
        extraData = ""

        if table == "laps":
            extraData = f"lap_number>1"

        url = buildUrl(session, time, previousTime, driverNumber, table, extraData)
        restultDf = readJSON(url, columns=cols)
        
        # Merge all dataframes on driver_number
        if df is None:
            df = restultDf
        else:
            mergeKeys = ["driver_number"]
            if "lap_number" in restultDf.columns and "lap_number" in df.columns:
                mergeKeys.append("lap_number")

            df = pd.merge(df, restultDf, on=mergeKeys, how="outer")
    
    # Rename columns
    df.rename(columns={
        "date_start": "Time",
        "driver_number": "DriverNumber",
        "duration_sector_1": "Sector1Time",
        "duration_sector_2": "Sector2Time",
        "duration_sector_3": "Sector3Time",
        "lap_duration": "LapTime",
        "lap_number": "LapNumber",
        "pit_duration": "PitTime",
        "name_acronym": "Driver",
        "team_name": "Team",
        "gap_to_leader": "GapToLeader",
        "interval": "GapToNext"
    }, inplace=True)

    df["PitTime"] = df["PitTime"].fillna(0).infer_objects(copy=False)
    return df

def loadGaps(session, time, previousTime, driverNumber, df):
    """
    Loads interval data and merges average gap metrics into lap data.

    Parameters:
        session (str): Session key.
        time (str): Current time.
        previousTime (str): Previous check time.
        driverNumber (str): Driver number.
        df (pd.DataFrame): Lap data.

    Returns:
        pd.DataFrame: Updated with GapToLeader and GapToNext.
    """

    # Load interval data
    intervalUrl = buildUrl(session, time, previousTime, driverNumber, "intervals")
    interval = readJSON(intervalUrl, ["driver_number", "date", "gap_to_leader", "interval"])

    df["GapToLeader"] = None
    df["GapToNext"] = None

    if not interval.empty:
        df["GapToLeader"] = interval["gap_to_leader"].mean()
        df["GapToNext"] = interval["interval"].mean()

    return df

def loadStint(session, time, previousTime, driverNumber, df):
    """
    Adds stint, compound, tyre life, and fresh tyre data to the lap records.

    Parameters:
        session (str): Session key.
        time (str): Current time.
        previousTime (str): Previous check time.
        driverNumber (str): Driver number.
        df (pd.DataFrame): Lap-level data.

    Returns:
        pd.DataFrame: Updated DataFrame with stint-related fields.
    """

    # Load stint data
    stintUrl = buildUrl(session, time, previousTime, driverNumber, "stints")
    stint = readJSON(stintUrl, ["driver_number", "compound", "lap_end", "lap_start", "stint_number", "tyre_age_at_start"])

    # Sort by driver and stint_number to ensure order
    stint = stint.sort_values(by=["driver_number", "stint_number"])

    # Forward-fill compound values per driver
    stint["compound"] = stint.groupby("driver_number")["compound"].ffill()

    # Initialize columns in df
    df["Compound"] = None
    df["Stint"] = None
    df["TyreLife"] = None
    df["FreshTyre"] = None

    # Assign stint data to matching laps
    for _, row in stint.iterrows():
        mask = (
            (df["DriverNumber"] == row["driver_number"]) &
            (df["LapNumber"] >= row["lap_start"]) &
            (df["LapNumber"] <= row["lap_end"])
        )

        df.loc[mask, "Compound"] = row["compound"]
        df.loc[mask, "Stint"] = row["stint_number"]
        df.loc[mask, "TyreLife"] = row["tyre_age_at_start"] + (df.loc[mask, "LapNumber"] - row["lap_start"])
        df.loc[mask, "FreshTyre"] = 1 if row["tyre_age_at_start"] == 0 else 0

    return df

def getCurrentTrackStatus(session, lapNumber):
    """
    Determines current track status (SC, VSC, or green) based on control messages.

    Parameters:
        session (str): Session key.
        lapNumber (int): Current lap number.

    Returns:
        int: Track status code (1 = green, 6 = VSC, 8 = SC).
    """

    # Get all relevant SC/VSC messages up to and including the current lap
    safetyCarInData = f"message=SAFETY%20CAR%20DEPLOYED&lap_number<={lapNumber}"
    safetyCarOutData = f"message=SAFETY%20CAR%20IN%20THIS%20LAP&lap_number<={lapNumber}"
    virtualSafetyCarInData = f"message=VIRTUAL%20SAFETY%20CAR%20DEPLOYED&lap_number<={lapNumber}"
    virtualSafetyCarOutData = f"message=VIRTUAL%20SAFETY%20CAR%20ENDING&lap_number<={lapNumber}"

    safetyCarIn = readJSON(buildUrl(session, None, None, None, "race_control", safetyCarInData), ["lap_number"])
    safetyCarOut = readJSON(buildUrl(session, None, None, None, "race_control", safetyCarOutData), ["lap_number"])
    virtualSafetyCarIn = readJSON(buildUrl(session, None, None, None, "race_control", virtualSafetyCarInData), ["lap_number"])
    virtualSafetyCarOut = readJSON(buildUrl(session, None, None, None, "race_control", virtualSafetyCarOutData), ["lap_number"])

    # Get latest deployment lap before this one
    lastSCDeploy = safetyCarIn["lap_number"].max() if not safetyCarIn.empty else None
    lastSCOut = safetyCarOut["lap_number"].max() if not safetyCarOut.empty else None
    lastVSCDeploy = virtualSafetyCarIn["lap_number"].max() if not virtualSafetyCarIn.empty else None
    lastVSCOut = virtualSafetyCarOut["lap_number"].max() if not virtualSafetyCarOut.empty else None

    # Check if we're still in SC
    if pd.notna(lastSCDeploy):
        if pd.isna(lastSCOut) or lastSCOut < lastSCDeploy:
            if lapNumber >= lastSCDeploy:
                return 8
            
    # Check if we're still in VSC
    if pd.notna(lastVSCDeploy):
        if pd.isna(lastVSCOut) or lastVSCOut < lastVSCDeploy:
            if lapNumber >= lastVSCDeploy:
                return 6

    # No SC/VSC active
    return 1

def loadTrackStatus(session, lapNumber, df):
    """
    Adds track status code to lap data for a specific lap.

    Parameters:
        session (str): Session key.
        lapNumber (int): Current lap number.
        df (pd.DataFrame): Lap data.

    Returns:
        pd.DataFrame: Updated DataFrame with 'TrackStatus' column.
    """

    trackStatus = getCurrentTrackStatus(session, lapNumber)
    df.loc[df["LapNumber"] == lapNumber, "TrackStatus"] = trackStatus
    return df

def loadWeather(session, time, previousTime, df):
    """
    Attaches weather data to lap records using nearest past timestamp.

    Parameters:
        session (str): Session key.
        time (str): Current time.
        previousTime (str): Previous check time.
        df (pd.DataFrame): Lap data.

    Returns:
        pd.DataFrame: DataFrame with weather metrics joined.
    """

    # Convert time columns to datetime
    df["Time"] = pd.to_datetime(df["Time"])
    
    # Drop rows where Time is null (required for merge_asof)
    df = df.dropna(subset=["Time"])
    
    # Load weather data
    weatherUrl = buildUrl(session, time, previousTime, None, "weather")
    weather = readJSON(weatherUrl, ["date", "air_temperature", "humidity", "pressure", "rainfall", "track_temperature", "wind_direction", "wind_speed"])
    
    # Sort both DataFrames by time for merge_asof
    df = df.sort_values("Time")
    weather = weather.sort_values("date")

    # Convert weather date column to datetime
    df["Time"] = formatUTC(df["Time"])
    weather["date"] = formatUTC(weather["date"])

    # Merge closest previous weather row to each lap based on timestamp
    df = pd.merge_asof(
        df,
        weather,
        left_on="Time",
        right_on="date",
        direction="backward"
    )

    df.drop(columns=["date"], inplace=True)

    # Rename columns
    df.rename(columns={
        "air_temperature": "AirTemp",
        "humidity": "Humidity",
        "pressure": "Pressure",
        "rainfall": "Rainfall",
        "track_temperature": "TrackTemp",
        "wind_direction": "WindDirection",
        "wind_speed": "WindSpeed"
    }, inplace=True)
    
    return df

def loadPosition(session, time, previousTime, driverNumber, df):
    """
    Merges position data into lap data based on driver and timestamp.

    Parameters:
        session (str): Session key.
        time (str): Current time.
        previousTime (str): Previous check time.
        driverNumber (str): Driver number.
        df (pd.DataFrame): Lap data.

    Returns:
        pd.DataFrame: Lap data with position column added.
    """

    # Load position data
    positionUrl = buildUrl(session, time, previousTime, driverNumber, "position")
    position = readJSON(positionUrl, ["date", "driver_number", "position"])

    # Format datetime
    position["date"] = pd.to_datetime(position["date"])
    position["date"] = formatUTC(position["date"]) 
    position.rename(columns={"driver_number": "DriverNumber"}, inplace=True)

    # Put DriverNumber as string in position and df
    position["DriverNumber"] = position["DriverNumber"].astype(str)
    df["DriverNumber"] = df["DriverNumber"].astype(str)

    # Sort both DataFrames
    position = position.sort_values("date")
    df["Time"] = formatUTC(df["Time"])
    df = df.sort_values("Time")

    # Merge using asof per driver
    df = pd.merge_asof(
        df,
        position,
        by="DriverNumber",
        left_on="Time",
        right_on="date",
        direction="backward"
    )

    # Clean up
    df.drop(columns=["date"], inplace=True)
    df.rename(columns={"position": "Position"}, inplace=True)

    return df

def splitData(df, qualifyingData):
    """
    Splits lap data into race data and qualifying dataframes.

    Parameters:
        df (pd.DataFrame): Combined lap and qualifying data.
        qualifyingData (dict): Preloaded qualifying data.

    Returns:
        tuple: (race_df, qualifying_df)
    """

    # Add qualifying data
    qualifying = pd.DataFrame.from_dict(qualifyingData, orient="index").reset_index()
    qualifying.rename(columns={"index": "DriverNumber"}, inplace=True)
    qualifying["DriverNumber"] = qualifying["DriverNumber"].astype(str)
    df["DriverNumber"] = df["DriverNumber"].astype(str)

    # Merge with main data
    df = pd.merge(df, qualifying, on="DriverNumber", how="left")
    # Qualifying columns
    qualifyingColumns = ["Q1", "Q2", "Q3", "GridPosition"]

    # Qualifying dataframe
    qualifying = df[["DriverNumber"] + qualifyingColumns].drop_duplicates()
    qualifying = qualifying.rename(columns={"GridPosition": "Position"})

    # Race dataframe
    race = df.drop(columns=qualifyingColumns)

    return race, qualifying

def loadTelemetry(session, time, previousTime, driverNumber, lapNumber, driver):
    """
    Loads downsampled telemetry data for a specific driver and lap.

    Parameters:
        session (str): Session key.
        time (datetime): End time.
        previousTime (datetime): Start time.
        driverNumber (str): Driver number.
        lapNumber (int): Lap number.
        driver (str): Driver code or name.

    Returns:
        pd.DataFrame: Telemetry snapshot DataFrame (one row per 10s).
    """
    try:
        # Convert to datetime objects if needed
        previousTime = pd.to_datetime(previousTime)
        time = pd.to_datetime(time)

        telemetryChunks = []
        current = previousTime

        while current < time:
            nextStep = current + timedelta(seconds=10)
            url = buildUrl(session, nextStep.isoformat(), (nextStep-timedelta(seconds=0.3)).isoformat(), driverNumber, "car_data")
            chunk = readJSON(url, ["date", "driver_number", "rpm", "speed", "throttle"])
            
            if not chunk.empty:
                chunk["date"] = pd.to_datetime(chunk["date"], format="mixed")
                chunk.rename(columns={
                    "driver_number": "DriverNumber",
                    "rpm": "RPM",
                    "speed": "Speed",
                    "throttle": "Throttle",
                    "date": "Time"
                }, inplace=True)
                
                chunk["DriverNumber"] = chunk["DriverNumber"].astype(str)
                chunk["LapNumber"] = lapNumber
                chunk["Driver"] = driver

                telemetryChunks.append(chunk.iloc[[0]])

            current = nextStep

        # Combine all chunks
        if telemetryChunks:
            return pd.concat(telemetryChunks, ignore_index=True)

        return pd.DataFrame()
    except Exception as e:
        return pd.DataFrame()

def processChanges(change, session, qualifyingData):
    """
    Processes all data sources for a single detected lap change.

    Parameters:
        change (dict): Change dict with time window and driver info.
        session (str): Session key.
        qualifyingData (dict): Static qualifying data.

    Returns:
        tuple: race_df, qualifying_df, telemetry_df
    """

    time = change["time"]
    previousTime = change["previousTime"]
    driverNumber = change["DriverNumber"]
    lapNumber = int(change["LapNumber"])

    df = getLaps(session, time, previousTime, driverNumber)
    df = loadGaps(session, time, previousTime, driverNumber, df)
    df = loadStint(session, time, previousTime, driverNumber, df)
    df = loadWeather(session, time, previousTime, df)
    df = loadPosition(session, time, previousTime, driverNumber, df)
    df = loadTrackStatus(session, lapNumber, df)
    raceSplit, qualifyingSplit = splitData(df, qualifyingData)

    telemetry = loadTelemetry(session, time, previousTime, driverNumber, lapNumber, raceSplit["Driver"].iloc[0])
    return raceSplit, qualifyingSplit, telemetry

def returnData(dataList):
    """
    Concatenates a list of DataFrames, returning an empty frame if the list is empty.

    Parameters:
        dataList (list): List of pd.DataFrames.

    Returns:
        pd.DataFrame: Concatenated DataFrame.
    """

    if dataList:
        return pd.concat(dataList, ignore_index=True)
    
    return pd.DataFrame()

def getLiveData(outputFileName, dataTable, lapsTable, telemetryTable, year, roundNumber, city, session, qualifyingData, raceStartTime, driverNumber, time, previousTime, lapsData):
    """
    Checks for new lap completions and loads fresh data into the pipeline.

    Parameters:
        outputFileName (str): Base name for TFRecord export.
        dataTable (str): Final data table name.
        lapsTable (str): Staging laps table name.
        telemetryTable (str): Staging telemetry table name.
        year (int): Race year.
        roundNumber (int): Race round number.
        city (str): Host city.
        session (str): OpenF1 session key.
        qualifyingData (dict): Preloaded qualifying data.
        raceStartTime (datetime): Start timestamp of the race.
        driverNumber (str): Driver to monitor for changes.
        time (str): Current time.
        previousTime (str): Previous check time.
        lapsData (pd.DataFrame): Cached lap state for change detection.

    Returns:
        tuple: (changeDetected (bool), updatedLapsData (pd.DataFrame), firstNewLapNumber (int))
    """
    
    # Check if there's a lap change
    lapsData, changes = getChange(session, driverNumber, time, previousTime, lapsData)

    if len(changes) == 0:
        return False, lapsData, 0

    lapNumber = min(change["LapNumber"] for change in changes) if changes else None

    allRaces = []
    allQualifying = []
    allTelemetry = []

    for change in changes:
        try:
            raceResult, qualifyingResult, telemetryResult = processChanges(change, session, qualifyingData)
            if raceResult is not None:
                allRaces.append(raceResult)
            if qualifyingResult is not None:
                allQualifying.append(qualifyingResult)
            if telemetryResult is not None:
                allTelemetry.append(telemetryResult)
        except Exception as e:
            pass

    raceDf = returnData(allRaces)
    qualifyingDf = returnData(allQualifying)
    telemetryDf = returnData(allTelemetry)
    
    loadData(outputFileName, dataTable, lapsTable, telemetryTable, year, roundNumber, city, True, raceStartTime, raceDf, qualifyingDf, telemetryDf)

    return True, lapsData, lapNumber