#!/usr/bin/env python3
"""Recolector recurrente de metricas YouTube.

Publico (API key):
- Seguidores (subscriberCount)
- Vistas totales del canal (viewCount)
- Contenidos recientes subidos
- Viewers en vivo por stream activo (concurrentViewers)

Privado (OAuth, opcional):
- Vistas en un periodo (Analytics API)
- Monetizacion general en un periodo
- Monetizacion por video
"""

from __future__ import annotations

import argparse
import json
import logging
import os
import sys
import time
from dataclasses import dataclass
from datetime import date, datetime, timedelta, timezone
from pathlib import Path
from typing import Any, Dict, List, Optional, Sequence

from dotenv import load_dotenv
from google.auth.transport.requests import Request
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
import mysql.connector

DATA_API_SERVICE = "youtube"
DATA_API_VERSION = "v3"
ANALYTICS_API_SERVICE = "youtubeAnalytics"
ANALYTICS_API_VERSION = "v2"

ANALYTICS_SCOPES = [
    "https://www.googleapis.com/auth/yt-analytics.readonly",
    "https://www.googleapis.com/auth/yt-analytics-monetary.readonly",
    "https://www.googleapis.com/auth/youtube.readonly",
]


@dataclass
class CollectorConfig:
    api_key: str
    channel_ids: List[str]
    output_dir: Path
    uploads_limit: int
    live_scan_limit: int
    interval_seconds: int
    oauth_client_secrets: Optional[Path]
    oauth_token_file: Optional[Path]
    private_lookback_days: int
    private_recent_videos_limit: int
    mysql_host: Optional[str]
    mysql_user: Optional[str]
    mysql_password: Optional[str]
    mysql_database: Optional[str]


def utc_now_iso() -> str:
    return datetime.now(timezone.utc).replace(microsecond=0).isoformat()


def iso_to_mysql_datetime(value: Optional[str]) -> Optional[str]:
    if not value:
        return None

    normalized = value.replace("T", " ")
    if normalized.endswith("Z"):
        return normalized[:-1]
    if normalized.endswith("+00:00"):
        return normalized[:-6]
    return normalized


def append_jsonl(path: Path, payload: Dict[str, Any]) -> None:
    path.parent.mkdir(parents=True, exist_ok=True)
    with path.open("a", encoding="utf-8") as fh:
        fh.write(json.dumps(payload, ensure_ascii=False) + "\n")


def write_json(path: Path, payload: Dict[str, Any]) -> None:
    path.parent.mkdir(parents=True, exist_ok=True)
    with path.open("w", encoding="utf-8") as fh:
        json.dump(payload, fh, ensure_ascii=False, indent=2)


def parse_env_config() -> CollectorConfig:
    load_dotenv()

    api_key = os.getenv("YOUTUBE_API_KEY", "").strip()
    raw_channel_ids = os.getenv("YOUTUBE_CHANNEL_IDS", "").strip()
    output_dir = Path(os.getenv("YT_OUTPUT_DIR", "./data")).resolve()
    uploads_limit = int(os.getenv("YT_UPLOADS_LIMIT", "8"))
    live_scan_limit = int(os.getenv("YT_LIVE_SCAN_LIMIT", "12"))
    interval_seconds = int(os.getenv("YT_INTERVAL_SECONDS", "300"))
    private_lookback_days = int(os.getenv("YT_PRIVATE_LOOKBACK_DAYS", "7"))
    private_recent_videos_limit = int(os.getenv("YT_PRIVATE_RECENT_VIDEOS_LIMIT", "5"))
    mysql_host = os.getenv("MYSQL_HOST", "").strip() or None
    mysql_user = os.getenv("MYSQL_USER", "").strip() or None
    mysql_password = os.getenv("MYSQL_PASSWORD", "").strip() or None
    mysql_database = os.getenv("MYSQL_DATABASE", "").strip() or None

    oauth_client_secrets = os.getenv("YT_OAUTH_CLIENT_SECRETS", "").strip() or None
    oauth_token_file = os.getenv("YT_OAUTH_TOKEN", "").strip() or None

    channel_ids = [cid.strip() for cid in raw_channel_ids.split(",") if cid.strip()]

    if not api_key:
        raise ValueError("Falta YOUTUBE_API_KEY en el entorno.")
    if not channel_ids:
        raise ValueError("Falta YOUTUBE_CHANNEL_IDS en el entorno.")

    return CollectorConfig(
        api_key=api_key,
        channel_ids=channel_ids,
        output_dir=output_dir,
        uploads_limit=max(1, uploads_limit),
        live_scan_limit=max(1, live_scan_limit),
        interval_seconds=max(30, interval_seconds),
        oauth_client_secrets=Path(oauth_client_secrets).resolve() if oauth_client_secrets else None,
        oauth_token_file=Path(oauth_token_file).resolve() if oauth_token_file else None,
        private_lookback_days=max(1, private_lookback_days),
        private_recent_videos_limit=max(0, private_recent_videos_limit),
        mysql_host=mysql_host,
        mysql_user=mysql_user,
        mysql_password=mysql_password,
        mysql_database=mysql_database,
    )


class MySQLSnapshotStore:
    def __init__(self, config: CollectorConfig):
        self.config = config

    def is_enabled(self) -> bool:
        return all(
            [
                self.config.mysql_host,
                self.config.mysql_user,
                self.config.mysql_password,
                self.config.mysql_database,
            ]
        )

    def connect(self):
        if not self.is_enabled():
            raise RuntimeError("Persistencia MySQL no configurada en entorno.")

        return mysql.connector.connect(
            host=self.config.mysql_host,
            user=self.config.mysql_user,
            password=self.config.mysql_password,
            database=self.config.mysql_database,
        )

    def ensure_schema(self) -> None:
        conn = self.connect()
        cursor = conn.cursor()
        statements = [
            """
            CREATE TABLE IF NOT EXISTS youtube_channel_metrics (
                id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
                collected_at_utc DATETIME NOT NULL,
                channel_id VARCHAR(64) NOT NULL,
                channel_title VARCHAR(255) NOT NULL,
                subscriber_count BIGINT UNSIGNED NOT NULL,
                total_view_count BIGINT UNSIGNED NOT NULL,
                video_count BIGINT UNSIGNED NOT NULL,
                created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
                PRIMARY KEY (id),
                KEY idx_channel_metrics_channel_time (channel_id, collected_at_utc)
            ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
            """,
            """
            CREATE TABLE IF NOT EXISTS youtube_live_metrics (
                id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
                collected_at_utc DATETIME NOT NULL,
                channel_id VARCHAR(64) NOT NULL,
                channel_title VARCHAR(255) NOT NULL,
                video_id VARCHAR(32) NOT NULL,
                title VARCHAR(255) NOT NULL,
                concurrent_viewers BIGINT UNSIGNED NOT NULL,
                video_view_count BIGINT UNSIGNED NOT NULL,
                started_at DATETIME NULL,
                video_url VARCHAR(255) NULL,
                created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
                PRIMARY KEY (id),
                KEY idx_live_metrics_video_time (video_id, collected_at_utc),
                KEY idx_live_metrics_channel_time (channel_id, collected_at_utc)
            ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
            """,
            """
            CREATE TABLE IF NOT EXISTS youtube_uploads (
                video_id VARCHAR(32) NOT NULL,
                channel_id VARCHAR(64) NOT NULL,
                channel_title VARCHAR(255) NOT NULL,
                title VARCHAR(255) NOT NULL,
                published_at DATETIME NULL,
                video_url VARCHAR(255) NULL,
                first_seen_utc DATETIME NOT NULL,
                last_seen_utc DATETIME NOT NULL,
                created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
                updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
                PRIMARY KEY (video_id),
                KEY idx_uploads_channel_published (channel_id, published_at)
            ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
            """,
            """
            CREATE TABLE IF NOT EXISTS youtube_channel_metrics_current (
                channel_id VARCHAR(64) NOT NULL,
                channel_title VARCHAR(255) NOT NULL,
                collected_at_utc DATETIME NOT NULL,
                subscriber_count BIGINT UNSIGNED NOT NULL,
                total_view_count BIGINT UNSIGNED NOT NULL,
                video_count BIGINT UNSIGNED NOT NULL,
                updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
                PRIMARY KEY (channel_id)
            ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
            """,
            """
            CREATE TABLE IF NOT EXISTS youtube_live_metrics_current (
                video_id VARCHAR(32) NOT NULL,
                channel_id VARCHAR(64) NOT NULL,
                channel_title VARCHAR(255) NOT NULL,
                collected_at_utc DATETIME NOT NULL,
                title VARCHAR(255) NOT NULL,
                concurrent_viewers BIGINT UNSIGNED NOT NULL,
                video_view_count BIGINT UNSIGNED NOT NULL,
                started_at DATETIME NULL,
                video_url VARCHAR(255) NULL,
                updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
                PRIMARY KEY (video_id),
                KEY idx_live_current_channel (channel_id)
            ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
            """,
            """
            CREATE TABLE IF NOT EXISTS youtube_private_channel_analytics (
                id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
                collected_at_utc DATETIME NOT NULL,
                channel_id VARCHAR(64) NOT NULL,
                channel_title VARCHAR(255) NOT NULL,
                start_date DATE NOT NULL,
                end_date DATE NOT NULL,
                views BIGINT UNSIGNED NOT NULL,
                estimated_revenue DECIMAL(18,6) NOT NULL,
                monetized_playbacks DECIMAL(18,2) NOT NULL,
                cpm DECIMAL(18,6) NOT NULL,
                created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
                PRIMARY KEY (id),
                KEY idx_private_channel_time (channel_id, collected_at_utc),
                KEY idx_private_channel_period (channel_id, start_date, end_date)
            ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
            """,
            """
            CREATE TABLE IF NOT EXISTS youtube_private_channel_analytics_current (
                channel_id VARCHAR(64) NOT NULL,
                start_date DATE NOT NULL,
                end_date DATE NOT NULL,
                channel_title VARCHAR(255) NOT NULL,
                collected_at_utc DATETIME NOT NULL,
                views BIGINT UNSIGNED NOT NULL,
                estimated_revenue DECIMAL(18,6) NOT NULL,
                monetized_playbacks DECIMAL(18,2) NOT NULL,
                cpm DECIMAL(18,6) NOT NULL,
                updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
                PRIMARY KEY (channel_id, start_date, end_date)
            ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
            """,
            """
            CREATE TABLE IF NOT EXISTS youtube_private_video_analytics (
                id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT,
                collected_at_utc DATETIME NOT NULL,
                channel_id VARCHAR(64) NOT NULL,
                channel_title VARCHAR(255) NOT NULL,
                video_id VARCHAR(32) NOT NULL,
                start_date DATE NOT NULL,
                end_date DATE NOT NULL,
                views BIGINT UNSIGNED NOT NULL,
                estimated_revenue DECIMAL(18,6) NOT NULL,
                cpm DECIMAL(18,6) NOT NULL,
                created_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP,
                PRIMARY KEY (id),
                KEY idx_private_video_time (video_id, collected_at_utc),
                KEY idx_private_video_period (video_id, start_date, end_date),
                KEY idx_private_video_channel (channel_id, start_date, end_date)
            ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
            """,
            """
            CREATE TABLE IF NOT EXISTS youtube_private_video_analytics_current (
                channel_id VARCHAR(64) NOT NULL,
                video_id VARCHAR(32) NOT NULL,
                start_date DATE NOT NULL,
                end_date DATE NOT NULL,
                channel_title VARCHAR(255) NOT NULL,
                collected_at_utc DATETIME NOT NULL,
                views BIGINT UNSIGNED NOT NULL,
                estimated_revenue DECIMAL(18,6) NOT NULL,
                cpm DECIMAL(18,6) NOT NULL,
                updated_at TIMESTAMP NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
                PRIMARY KEY (channel_id, video_id, start_date, end_date)
            ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
            """,
        ]

        for statement in statements:
            cursor.execute(statement)

        conn.commit()
        cursor.close()
        conn.close()

    def save_public_snapshot(self, snapshot: Dict[str, Any]) -> None:
        conn = self.connect()
        cursor = conn.cursor()
        collected_at = iso_to_mysql_datetime(snapshot["collected_at_utc"])

        channel_metrics_sql = """
            INSERT INTO youtube_channel_metrics (
                collected_at_utc, channel_id, channel_title,
                subscriber_count, total_view_count, video_count
            ) VALUES (%s, %s, %s, %s, %s, %s)
        """
        channel_current_sql = """
            INSERT INTO youtube_channel_metrics_current (
                channel_id, channel_title, collected_at_utc,
                subscriber_count, total_view_count, video_count
            ) VALUES (%s, %s, %s, %s, %s, %s)
            ON DUPLICATE KEY UPDATE
                channel_title = VALUES(channel_title),
                collected_at_utc = VALUES(collected_at_utc),
                subscriber_count = VALUES(subscriber_count),
                total_view_count = VALUES(total_view_count),
                video_count = VALUES(video_count)
        """
        live_metrics_sql = """
            INSERT INTO youtube_live_metrics (
                collected_at_utc, channel_id, channel_title, video_id,
                title, concurrent_viewers, video_view_count, started_at, video_url
            ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
        """
        live_current_sql = """
            INSERT INTO youtube_live_metrics_current (
                video_id, channel_id, channel_title, collected_at_utc,
                title, concurrent_viewers, video_view_count, started_at, video_url
            ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
            ON DUPLICATE KEY UPDATE
                channel_id = VALUES(channel_id),
                channel_title = VALUES(channel_title),
                collected_at_utc = VALUES(collected_at_utc),
                title = VALUES(title),
                concurrent_viewers = VALUES(concurrent_viewers),
                video_view_count = VALUES(video_view_count),
                started_at = VALUES(started_at),
                video_url = VALUES(video_url)
        """
        uploads_sql = """
            INSERT INTO youtube_uploads (
                video_id, channel_id, channel_title, title,
                published_at, video_url, first_seen_utc, last_seen_utc
            ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
            ON DUPLICATE KEY UPDATE
                channel_id = VALUES(channel_id),
                channel_title = VALUES(channel_title),
                title = VALUES(title),
                published_at = VALUES(published_at),
                video_url = VALUES(video_url),
                last_seen_utc = VALUES(last_seen_utc)
        """

        for channel in snapshot.get("channels", []):
            channel_values = (
                collected_at,
                channel["channel_id"],
                channel["channel_title"],
                channel["subscriber_count"],
                channel["total_view_count"],
                channel["video_count"],
            )
            cursor.execute(channel_metrics_sql, channel_values)
            cursor.execute(
                channel_current_sql,
                (
                    channel["channel_id"],
                    channel["channel_title"],
                    collected_at,
                    channel["subscriber_count"],
                    channel["total_view_count"],
                    channel["video_count"],
                ),
            )

            for live_item in channel.get("live_now", []):
                started_at = iso_to_mysql_datetime(live_item.get("started_at"))
                live_values = (
                    collected_at,
                    channel["channel_id"],
                    channel["channel_title"],
                    live_item["video_id"],
                    live_item["title"],
                    live_item["concurrent_viewers"],
                    live_item["video_view_count"],
                    started_at,
                    live_item.get("video_url"),
                )
                cursor.execute(live_metrics_sql, live_values)
                cursor.execute(
                    live_current_sql,
                    (
                        live_item["video_id"],
                        channel["channel_id"],
                        channel["channel_title"],
                        collected_at,
                        live_item["title"],
                        live_item["concurrent_viewers"],
                        live_item["video_view_count"],
                        started_at,
                        live_item.get("video_url"),
                    ),
                )

            for upload in channel.get("latest_uploads", []):
                published_at = iso_to_mysql_datetime(upload.get("published_at"))
                cursor.execute(
                    uploads_sql,
                    (
                        upload["video_id"],
                        channel["channel_id"],
                        channel["channel_title"],
                        upload["title"],
                        published_at,
                        upload.get("video_url"),
                        collected_at,
                        collected_at,
                    ),
                )

        conn.commit()
        cursor.close()
        conn.close()

    def save_private_snapshot(self, payload: Dict[str, Any]) -> None:
        conn = self.connect()
        cursor = conn.cursor()
        collected_at = iso_to_mysql_datetime(payload["collected_at_utc"])
        channel_info = payload.get("channel", {})
        channel_id = channel_info.get("channel_id") or "MINE"
        channel_title = channel_info.get("channel_title") or "Canal autenticado"
        period_views = payload.get("period_views", {})
        monetization = payload.get("general_monetization", {})

        channel_history_sql = """
            INSERT INTO youtube_private_channel_analytics (
                collected_at_utc, channel_id, channel_title, start_date, end_date,
                views, estimated_revenue, monetized_playbacks, cpm
            ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
        """
        channel_current_sql = """
            INSERT INTO youtube_private_channel_analytics_current (
                channel_id, start_date, end_date, channel_title, collected_at_utc,
                views, estimated_revenue, monetized_playbacks, cpm
            ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
            ON DUPLICATE KEY UPDATE
                channel_title = VALUES(channel_title),
                collected_at_utc = VALUES(collected_at_utc),
                views = VALUES(views),
                estimated_revenue = VALUES(estimated_revenue),
                monetized_playbacks = VALUES(monetized_playbacks),
                cpm = VALUES(cpm)
        """
        video_history_sql = """
            INSERT INTO youtube_private_video_analytics (
                collected_at_utc, channel_id, channel_title, video_id,
                start_date, end_date, views, estimated_revenue, cpm
            ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
        """
        video_current_sql = """
            INSERT INTO youtube_private_video_analytics_current (
                channel_id, video_id, start_date, end_date, channel_title,
                collected_at_utc, views, estimated_revenue, cpm
            ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
            ON DUPLICATE KEY UPDATE
                channel_title = VALUES(channel_title),
                collected_at_utc = VALUES(collected_at_utc),
                views = VALUES(views),
                estimated_revenue = VALUES(estimated_revenue),
                cpm = VALUES(cpm)
        """

        channel_values = (
            collected_at,
            channel_id,
            channel_title,
            period_views["start_date"],
            period_views["end_date"],
            period_views["views"],
            monetization["estimated_revenue"],
            monetization["monetized_playbacks"],
            monetization["cpm"],
        )
        cursor.execute(channel_history_sql, channel_values)
        cursor.execute(
            channel_current_sql,
            (
                channel_id,
                period_views["start_date"],
                period_views["end_date"],
                channel_title,
                collected_at,
                period_views["views"],
                monetization["estimated_revenue"],
                monetization["monetized_playbacks"],
                monetization["cpm"],
            ),
        )

        for video in payload.get("video_monetization", {}).get("videos", []):
            video_values = (
                collected_at,
                channel_id,
                channel_title,
                video["video_id"],
                period_views["start_date"],
                period_views["end_date"],
                video["views"],
                video["estimated_revenue"],
                video["cpm"],
            )
            cursor.execute(video_history_sql, video_values)
            cursor.execute(
                video_current_sql,
                (
                    channel_id,
                    video["video_id"],
                    period_views["start_date"],
                    period_views["end_date"],
                    channel_title,
                    collected_at,
                    video["views"],
                    video["estimated_revenue"],
                    video["cpm"],
                ),
            )

        conn.commit()
        cursor.close()
        conn.close()


class YouTubeMetricsCollector:
    def __init__(self, config: CollectorConfig):
        self.config = config
        self.youtube = build(DATA_API_SERVICE, DATA_API_VERSION, developerKey=config.api_key)

    def get_recent_playlist_items(
        self, uploads_playlist_id: str, max_results: int
    ) -> List[Dict[str, Any]]:
        response = self.youtube.playlistItems().list(
            part="snippet,contentDetails",
            playlistId=uploads_playlist_id,
            maxResults=max_results,
        ).execute()

        return response.get("items", [])

    def get_channel_public_metrics(self, channel_id: str) -> Dict[str, Any]:
        response = self.youtube.channels().list(
            part="snippet,statistics,contentDetails",
            id=channel_id,
            maxResults=1,
        ).execute()

        items = response.get("items", [])
        if not items:
            raise ValueError(f"No se encontro el canal: {channel_id}")

        channel = items[0]
        snippet = channel.get("snippet", {})
        stats = channel.get("statistics", {})
        details = channel.get("contentDetails", {})
        uploads_playlist = details.get("relatedPlaylists", {}).get("uploads")

        return {
            "channel_id": channel_id,
            "channel_title": snippet.get("title"),
            "subscriber_count": int(stats.get("subscriberCount", 0)),
            "total_view_count": int(stats.get("viewCount", 0)),
            "video_count": int(stats.get("videoCount", 0)),
            "uploads_playlist_id": uploads_playlist,
        }

    def get_latest_uploads(self, playlist_items: List[Dict[str, Any]], max_results: int) -> List[Dict[str, Any]]:
        uploads: List[Dict[str, Any]] = []
        for item in playlist_items[:max_results]:
            snippet = item.get("snippet", {})
            content = item.get("contentDetails", {})
            video_id = content.get("videoId")
            uploads.append(
                {
                    "video_id": video_id,
                    "title": snippet.get("title"),
                    "published_at": content.get("videoPublishedAt"),
                    "video_url": f"https://www.youtube.com/watch?v={video_id}" if video_id else None,
                }
            )

        return uploads

    def get_live_realtime_views(self, playlist_items: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
        ids = [item.get("contentDetails", {}).get("videoId") for item in playlist_items]
        video_ids = [v for v in ids if v]
        if not video_ids:
            return []

        videos_response = self.youtube.videos().list(
            part="snippet,liveStreamingDetails,statistics",
            id=",".join(video_ids),
        ).execute()

        out: List[Dict[str, Any]] = []
        for item in videos_response.get("items", []):
            live = item.get("liveStreamingDetails", {})
            snippet = item.get("snippet", {})
            stats = item.get("statistics", {})

            if snippet.get("liveBroadcastContent") != "live":
                continue
            if "actualEndTime" in live:
                continue

            out.append(
                {
                    "video_id": item.get("id"),
                    "title": snippet.get("title"),
                    "concurrent_viewers": int(live.get("concurrentViewers", 0)),
                    "video_view_count": int(stats.get("viewCount", 0)),
                    "started_at": live.get("actualStartTime"),
                    "video_url": f"https://www.youtube.com/watch?v={item.get('id')}",
                }
            )

        return out

    def collect_public_snapshot(self) -> Dict[str, Any]:
        snapshot = {
            "collected_at_utc": utc_now_iso(),
            "channels": [],
        }

        for channel_id in self.config.channel_ids:
            try:
                public_metrics = self.get_channel_public_metrics(channel_id)
                uploads_playlist_id = public_metrics.get("uploads_playlist_id")
                playlist_items = (
                    self.get_recent_playlist_items(
                        uploads_playlist_id,
                        max(self.config.uploads_limit, self.config.live_scan_limit),
                    )
                    if uploads_playlist_id
                    else []
                )
                latest_uploads = (
                    self.get_latest_uploads(playlist_items, self.config.uploads_limit)
                )
                live_now = self.get_live_realtime_views(playlist_items[: self.config.live_scan_limit])

                channel_payload = {
                    **public_metrics,
                    "live_now": live_now,
                    "latest_uploads": latest_uploads,
                }
                snapshot["channels"].append(channel_payload)

                append_jsonl(
                    self.config.output_dir / "public_channel_metrics.jsonl",
                    {
                        "collected_at_utc": snapshot["collected_at_utc"],
                        "channel_id": channel_id,
                        "subscriber_count": public_metrics["subscriber_count"],
                        "total_view_count": public_metrics["total_view_count"],
                        "video_count": public_metrics["video_count"],
                    },
                )

                for live_video in live_now:
                    append_jsonl(
                        self.config.output_dir / "realtime_live_views.jsonl",
                        {
                            "collected_at_utc": snapshot["collected_at_utc"],
                            "channel_id": channel_id,
                            **live_video,
                        },
                    )

                for upload in latest_uploads:
                    append_jsonl(
                        self.config.output_dir / "latest_uploads.jsonl",
                        {
                            "collected_at_utc": snapshot["collected_at_utc"],
                            "channel_id": channel_id,
                            **upload,
                        },
                    )
            except HttpError as err:
                logging.error("Error API publico para %s: %s", channel_id, err)
            except Exception as err:  # noqa: BLE001
                logging.error("Error inesperado canal %s: %s", channel_id, err)

        write_json(self.config.output_dir / "latest_public_snapshot.json", snapshot)
        return snapshot


class YouTubePrivateAnalytics:
    def __init__(self, oauth_client_secrets: Path, oauth_token_file: Path):
        self.oauth_client_secrets = oauth_client_secrets
        self.oauth_token_file = oauth_token_file

    def init_oauth(self) -> None:
        if not self.oauth_client_secrets.exists():
            raise FileNotFoundError(
                f"No existe archivo de credenciales OAuth: {self.oauth_client_secrets}"
            )

        secrets_payload = json.loads(self.oauth_client_secrets.read_text(encoding="utf-8"))
        if "web" in secrets_payload:
            raise RuntimeError(
                "El client_secret configurado es de tipo web. Usa /yt/oauth2start.php para generar oauth_token.json."
            )

        flow = InstalledAppFlow.from_client_secrets_file(
            str(self.oauth_client_secrets), ANALYTICS_SCOPES
        )
        creds = flow.run_local_server(port=0)

        self.oauth_token_file.parent.mkdir(parents=True, exist_ok=True)
        self.oauth_token_file.write_text(creds.to_json(), encoding="utf-8")
        logging.info("Token OAuth guardado en %s", self.oauth_token_file)

    def _load_credentials(self) -> Credentials:
        if not self.oauth_token_file.exists():
            raise FileNotFoundError(
                f"No existe token OAuth en {self.oauth_token_file}. Ejecuta --oauth-init"
            )

        # Cargamos con los scopes realmente concedidos en el token para evitar
        # errores invalid_scope cuando el refresh token fue emitido con un set distinto.
        creds = Credentials.from_authorized_user_file(str(self.oauth_token_file))
        if creds.expired and creds.refresh_token:
            creds.refresh(Request())
            self.oauth_token_file.write_text(creds.to_json(), encoding="utf-8")

        if not creds.valid:
            raise RuntimeError("Credenciales OAuth invalidas.")

        return creds

    def build_service(self):
        creds = self._load_credentials()
        return build(ANALYTICS_API_SERVICE, ANALYTICS_API_VERSION, credentials=creds)

    def get_authenticated_channel(self) -> Dict[str, str]:
        creds = self._load_credentials()
        youtube = build(DATA_API_SERVICE, DATA_API_VERSION, credentials=creds)
        response = youtube.channels().list(part="id,snippet", mine=True, maxResults=1).execute()
        items = response.get("items", [])
        if not items:
            raise RuntimeError("No fue posible resolver el canal autenticado con OAuth.")

        channel = items[0]
        return {
            "channel_id": channel.get("id", "MINE"),
            "channel_title": channel.get("snippet", {}).get("title", "Canal autenticado"),
        }

    def get_views_in_period(self, start_date: str, end_date: str) -> Dict[str, Any]:
        service = self.build_service()
        result = service.reports().query(
            ids="channel==MINE",
            startDate=start_date,
            endDate=end_date,
            metrics="views",
        ).execute()
        rows = result.get("rows", [])
        total_views = int(rows[0][0]) if rows else 0
        return {
            "start_date": start_date,
            "end_date": end_date,
            "views": total_views,
        }

    def get_general_monetization(self, start_date: str, end_date: str) -> Dict[str, Any]:
        service = self.build_service()
        try:
            result = service.reports().query(
                ids="channel==MINE",
                startDate=start_date,
                endDate=end_date,
                metrics="estimatedRevenue,monetizedPlaybacks,cpm",
            ).execute()
            rows = result.get("rows", [])
            revenue, monetized_playbacks, cpm = (rows[0] if rows else [0, 0, 0])
        except HttpError as err:
            # Si el token no incluye alcance monetario, mantenemos el pipeline vivo
            # y dejamos monetizacion en cero, preservando al menos las views privadas.
            logging.warning(
                "Monetizacion no disponible para el token actual (%s). Se guardan ceros.",
                err,
            )
            revenue, monetized_playbacks, cpm = (0, 0, 0)

        return {
            "start_date": start_date,
            "end_date": end_date,
            "estimated_revenue": float(revenue),
            "monetized_playbacks": float(monetized_playbacks),
            "cpm": float(cpm),
        }

    def get_video_monetization(
        self, start_date: str, end_date: str, video_ids: List[str]
    ) -> Dict[str, Any]:
        service = self.build_service()
        video_results: List[Dict[str, Any]] = []

        for video_id in video_ids:
            try:
                query = service.reports().query(
                    ids="channel==MINE",
                    startDate=start_date,
                    endDate=end_date,
                    metrics="estimatedRevenue,views,cpm",
                    filters=f"video=={video_id}",
                )
                result = query.execute()
                rows = result.get("rows", [])
                revenue, views, cpm = (rows[0] if rows else [0, 0, 0])
            except HttpError as err:
                logging.warning(
                    "Monetizacion por video no disponible para %s (%s). Se guardan ceros.",
                    video_id,
                    err,
                )
                revenue, views, cpm = (0, 0, 0)

            video_results.append(
                {
                    "video_id": video_id,
                    "estimated_revenue": float(revenue),
                    "views": int(views),
                    "cpm": float(cpm),
                }
            )

        return {
            "start_date": start_date,
            "end_date": end_date,
            "videos": video_results,
        }


def resolve_private_date_range(
    args: argparse.Namespace, config: CollectorConfig
) -> tuple[str, str]:
    if args.start_date:
        start_date = args.start_date
    else:
        start_date = (date.today() - timedelta(days=config.private_lookback_days - 1)).isoformat()

    end_date = args.end_date or date.today().isoformat()
    validate_date_str(start_date, "start-date")
    validate_date_str(end_date, "end-date")

    if start_date > end_date:
        raise ValueError("start-date no puede ser mayor que end-date.")

    return start_date, end_date


def resolve_private_video_ids(
    args: argparse.Namespace, config: CollectorConfig, snapshot: Dict[str, Any]
) -> List[str]:
    explicit_ids = [v.strip() for v in args.video_ids.split(",") if v.strip()]
    if explicit_ids:
        return explicit_ids

    if config.private_recent_videos_limit <= 0:
        return []

    recent_ids: List[str] = []
    seen_ids = set()
    for channel in snapshot.get("channels", []):
        for upload in channel.get("latest_uploads", []):
            video_id = upload.get("video_id")
            if not video_id or video_id in seen_ids:
                continue
            recent_ids.append(video_id)
            seen_ids.add(video_id)
            if len(recent_ids) >= config.private_recent_videos_limit:
                return recent_ids

    return recent_ids


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description="Recolector YouTube publico y privado")
    parser.add_argument(
        "--interval-seconds",
        type=int,
        default=None,
        help="Intervalo entre ejecuciones en modo loop (default: YT_INTERVAL_SECONDS).",
    )
    parser.add_argument(
        "--runs",
        type=int,
        default=1,
        help="Cantidad de iteraciones. Usa 0 para infinito.",
    )
    parser.add_argument(
        "--oauth-init",
        action="store_true",
        help="Inicializa el token OAuth para Analytics API.",
    )
    parser.add_argument(
        "--start-date",
        type=str,
        help="Inicio de periodo YYYY-MM-DD para analytics privados.",
    )
    parser.add_argument(
        "--end-date",
        type=str,
        help="Fin de periodo YYYY-MM-DD para analytics privados.",
    )
    parser.add_argument(
        "--video-ids",
        type=str,
        default="",
        help="IDs de video separados por coma para monetizacion por video.",
    )
    parser.add_argument(
        "--include-private",
        action="store_true",
        help="Intenta extraer datos privados (views periodo y monetizacion).",
    )
    parser.add_argument(
        "--private-lookback-days",
        type=int,
        default=None,
        help="Si no indicas fechas, usa esta ventana de dias hacia atras para analytics privados.",
    )
    parser.add_argument(
        "--private-recent-videos-limit",
        type=int,
        default=None,
        help="Si no indicas video IDs, consulta monetizacion para los uploads mas recientes hasta este limite.",
    )
    return parser.parse_args()


def validate_date_str(value: str, field_name: str) -> None:
    try:
        date.fromisoformat(value)
    except ValueError as err:
        raise ValueError(f"Fecha invalida para {field_name}: {value}") from err


def run() -> int:
    logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s [%(levelname)s] %(message)s",
        handlers=[logging.StreamHandler(sys.stdout)],
    )

    args = parse_args()

    try:
        config = parse_env_config()
    except ValueError as err:
        logging.error("Configuracion invalida: %s", err)
        return 2

    if args.interval_seconds:
        config.interval_seconds = max(30, args.interval_seconds)
    if args.private_lookback_days is not None:
        config.private_lookback_days = max(1, args.private_lookback_days)
    if args.private_recent_videos_limit is not None:
        config.private_recent_videos_limit = max(0, args.private_recent_videos_limit)

    collector = YouTubeMetricsCollector(config)
    mysql_store = MySQLSnapshotStore(config)

    private_analytics: Optional[YouTubePrivateAnalytics] = None
    if config.oauth_client_secrets and config.oauth_token_file:
        private_analytics = YouTubePrivateAnalytics(
            config.oauth_client_secrets, config.oauth_token_file
        )

    if args.oauth_init:
        if not private_analytics:
            logging.error(
                "Faltan YT_OAUTH_CLIENT_SECRETS y/o YT_OAUTH_TOKEN para iniciar OAuth."
            )
            return 2
        private_analytics.init_oauth()
        return 0

    run_forever = args.runs == 0
    remaining_runs = args.runs

    while run_forever or remaining_runs > 0:
        started = utc_now_iso()
        logging.info("Inicio de ciclo %s", started)

        snapshot = collector.collect_public_snapshot()
        logging.info("Snapshot publico generado con %d canales", len(snapshot["channels"]))

        if mysql_store.is_enabled():
            try:
                mysql_store.ensure_schema()
                mysql_store.save_public_snapshot(snapshot)
                logging.info("Snapshot publico persistido en MySQL")
            except Exception as err:  # noqa: BLE001
                logging.error("No fue posible persistir snapshot en MySQL: %s", err)
        else:
            logging.info("Persistencia MySQL no configurada; se omite guardado en DB")

        if args.include_private:
            if not private_analytics:
                logging.warning(
                    "Se solicito include-private, pero faltan variables OAuth; se omite."
                )
            else:
                try:
                    authenticated_channel: Optional[Dict[str, str]] = None
                    try:
                        authenticated_channel = private_analytics.get_authenticated_channel()
                    except Exception as auth_err:  # noqa: BLE001
                        logging.warning(
                            "No fue posible validar canal OAuth con channels.mine (%s). "
                            "Se usa canal del snapshot publico para persistencia.",
                            auth_err,
                        )

                    if (
                        authenticated_channel
                        and authenticated_channel.get("channel_id")
                        and authenticated_channel.get("channel_id") not in config.channel_ids
                    ):
                        logging.error(
                            "El token OAuth esta autenticado para el canal %s (%s), "
                            "pero YOUTUBE_CHANNEL_IDS=%s. Se omite guardado de datos privados.",
                            authenticated_channel.get("channel_title", "Canal"),
                            authenticated_channel.get("channel_id", "desconocido"),
                            ",".join(config.channel_ids),
                        )
                        raise RuntimeError(
                            "Canal OAuth no coincide con YOUTUBE_CHANNEL_IDS. Reautoriza con la cuenta correcta."
                        )

                    start_date, end_date = resolve_private_date_range(args, config)
                    period_views = private_analytics.get_views_in_period(start_date, end_date)
                    general_monetization = private_analytics.get_general_monetization(
                        start_date, end_date
                    )

                    first_channel = snapshot["channels"][0] if snapshot.get("channels") else {}
                    channel_info = {
                        "channel_id": (
                            (authenticated_channel or {}).get("channel_id")
                            or first_channel.get("channel_id")
                            or config.channel_ids[0]
                        ),
                        "channel_title": (
                            (authenticated_channel or {}).get("channel_title")
                            or first_channel.get("channel_title")
                            or "Canal autenticado"
                        ),
                    }

                    private_payload: Dict[str, Any] = {
                        "collected_at_utc": utc_now_iso(),
                        "channel": channel_info,
                        "period_views": period_views,
                        "general_monetization": general_monetization,
                    }

                    video_ids = resolve_private_video_ids(args, config, snapshot)
                    if video_ids:
                        private_payload["video_monetization"] = (
                            private_analytics.get_video_monetization(
                                start_date, end_date, video_ids
                            )
                        )

                    append_jsonl(config.output_dir / "private_analytics.jsonl", private_payload)
                    write_json(config.output_dir / "latest_private_analytics.json", private_payload)
                    if mysql_store.is_enabled():
                        mysql_store.ensure_schema()
                        mysql_store.save_private_snapshot(private_payload)
                    logging.info("Datos privados actualizados")
                except Exception as err:  # noqa: BLE001
                    logging.error("No fue posible extraer datos privados: %s", err)

        if not run_forever:
            remaining_runs -= 1
            if remaining_runs <= 0:
                break

        logging.info("Esperando %s segundos", config.interval_seconds)
        time.sleep(config.interval_seconds)

    return 0


if __name__ == "__main__":
    raise SystemExit(run())
