"""
APEX V16 — News filter (ForexFactory High-impact gating).

V14 port. The V14 original had two bugs we fix here:
  1. URL `.json` parsed as XML via ElementTree → V16 uses json.loads
     against the actual JSON endpoint.
  2. Hardcoded `+timedelta(hours=6)` "EST→IT" conversion drifts during
     DST mismatch periods (≈2 weeks/year). V16 parses the ISO 8601
     offset embedded in the JSON (`-04:00` / `-05:00`) and converts to
     UTC via `.astimezone(timezone.utc)` — always correct.

Window V14-fidelity: block 45 min BEFORE → 15 min AFTER each High event.
Only HIGH impact gates entry. MEDIUM cached but not blocking.

Fail-open policy (V14 fidelity):
  - Sync HTTP/parse failure → log warning, keep last successful cache,
    last_sync_at NOT advanced. is_blocked stays operational on cache.
  - Never-synced state → is_blocked returns None (no blocking).
  - Unknown symbol (not in ASSET_CURRENCIES) → returns None (onboarding-friendly).

Pure I/O at the edges (urllib via asyncio.to_thread to avoid blocking
the event loop). No broker, no AI.
"""

from __future__ import annotations

import asyncio
import json
import os
import tempfile
import time as _time
import urllib.error
import urllib.request
from dataclasses import dataclass
from datetime import datetime, timedelta, timezone
from pathlib import Path
from typing import Optional

from core import config_futures as cfg_fut


# Minimum interval between fetch attempts after a failure. ForexFactory
# rate-limits aggressively (HTTP 429) so we back off for an hour and keep
# serving the cached events in the meantime.
_FAILURE_RETRY_INTERVAL = timedelta(hours=1)


# ============================================================
# DATACLASSES
# ============================================================

@dataclass(frozen=True)
class NewsEvent:
    """Single calendar event, post-parse, in UTC."""
    dt: datetime          # tz-aware UTC
    country: str          # "USD", "EUR", "GBP", "JPY", ...
    impact: str           # "High" | "Medium" | "Low"
    title: str


@dataclass(frozen=True)
class BlockingEvent:
    """Wrapper around the NewsEvent that triggered the block."""
    event: NewsEvent
    minutes_to_event: float   # signed: + future, - past
    reason: str               # "BEFORE" | "AFTER"


# ============================================================
# NEWS FILTER
# ============================================================

class NewsFilter:
    """
    Stateful filter: holds the last successfully parsed event list and
    answers `is_blocked(symbol, now_utc)` synchronously.

    Sync is async (network I/O wrapped via asyncio.to_thread). Caller
    schedules sync periodically; this class doesn't own a background task.
    """

    def __init__(
        self,
        *,
        enabled: bool,
        before_min: int,
        after_min: int,
        source_url: str,
        http_timeout: int = 10,
        logger=None,
        cache_dir: Optional[Path] = None,
    ) -> None:
        self.enabled = bool(enabled)
        self._before_min = int(before_min)
        self._after_min = int(after_min)
        self._source_url = source_url
        self._http_timeout = int(http_timeout)
        self._logger = logger
        self._upcoming: list[NewsEvent] = []
        self._last_sync_at: Optional[datetime] = None
        # Daily-cache state. _last_fetch_date is the UTC date (YYYY-MM-DD)
        # of the most recent successful fetch (either via HTTP or disk
        # reload); used to short-circuit sync() within the same day.
        # _last_failed_fetch_at throttles retries after an HTTP failure.
        self._cache_dir: Optional[Path] = (
            Path(cache_dir).expanduser() if cache_dir is not None else None
        )
        self._last_fetch_date: Optional[str] = None
        self._last_failed_fetch_at: Optional[datetime] = None

    # ------------------------------------------------------------
    # Public properties
    # ------------------------------------------------------------

    @property
    def last_sync_at(self) -> Optional[datetime]:
        return self._last_sync_at

    @property
    def upcoming_count(self) -> int:
        return len(self._upcoming)

    @property
    def high_impact_count(self) -> int:
        return sum(1 for e in self._upcoming if e.impact == "High")

    # ------------------------------------------------------------
    # SYNC
    # ------------------------------------------------------------

    async def sync(self) -> int:
        """
        Daily-cached calendar refresh. Returns event count currently
        served (memory cache after this call).

        Strategy:
          1. Same UTC day already fetched (memory hit) → no I/O.
          2. Different UTC day but a disk cache exists for today
             (e.g. bot restart mid-session) → reload from disk, no HTTP.
          3. Need a fresh fetch. If the previous attempt failed less
             than 1h ago → skip (fail-open on whatever's in memory).
          4. Otherwise fetch + parse + persist disk cache.

        Fail-open at every step: HTTP error (incl. 429), parse error
        and disk-I/O error never raise to the caller and never clear
        the in-memory cache.
        """
        if not self.enabled:
            return 0

        now = datetime.now(timezone.utc)
        today = now.date().isoformat()

        # (1) Memory cache hit — already fetched today.
        if self._last_fetch_date == today and self._upcoming:
            self._log_cache_hit(source="memory", date=today,
                                events_cached=len(self._upcoming))
            return len(self._upcoming)

        # (2) Disk cache hit — covers bot restarts within the same day.
        disk_events = self._load_disk_cache(today)
        if disk_events is not None:
            self._upcoming = disk_events
            self._last_fetch_date = today
            self._last_sync_at = now
            self._log_cache_hit(source="disk", date=today,
                                events_cached=len(disk_events))
            return len(disk_events)

        # (3) Failure-retry throttle.
        if (
            self._last_failed_fetch_at is not None
            and (now - self._last_failed_fetch_at) < _FAILURE_RETRY_INTERVAL
        ):
            wait_s = int(
                (_FAILURE_RETRY_INTERVAL
                 - (now - self._last_failed_fetch_at)).total_seconds()
            )
            self._log_sync_failed(
                error="retry_throttled", error_kind="RetryThrottle",
                next_retry_in_seconds=wait_s,
            )
            return len(self._upcoming)

        # (4) Fresh fetch.
        t0 = _time.monotonic()
        try:
            raw = await asyncio.to_thread(self._fetch_blocking)
            events = self._parse(raw)
        except urllib.error.HTTPError as e:
            self._last_failed_fetch_at = now
            self._log_sync_failed(
                error=f"HTTP {getattr(e, 'code', '?')}",
                error_kind="HTTPError",
                http_status=int(getattr(e, "code", 0) or 0),
            )
            return len(self._upcoming)
        except Exception as e:
            self._last_failed_fetch_at = now
            self._log_sync_failed(error=str(e), error_kind=type(e).__name__)
            return len(self._upcoming)

        self._upcoming = events
        self._last_sync_at = now
        self._last_fetch_date = today
        self._last_failed_fetch_at = None
        self._save_disk_cache(today, events)
        latency_ms = int((_time.monotonic() - t0) * 1000)
        self._log_sync_ok(
            cache_source="fresh",
            date=today,
            events_loaded=len(events),
            high_impact_count=self.high_impact_count,
            latency_ms=latency_ms,
        )
        return len(events)

    def _fetch_blocking(self) -> bytes:
        """Synchronous urllib GET — runs inside asyncio.to_thread."""
        req = urllib.request.Request(
            self._source_url,
            headers={"User-Agent": "Mozilla/5.0 (apex-v16)"},
        )
        with urllib.request.urlopen(req, timeout=self._http_timeout) as r:
            return r.read()

    # ------------------------------------------------------------
    # DISK CACHE (best-effort, fail-open)
    # ------------------------------------------------------------

    def _cache_file_path(self, date: str) -> Optional[Path]:
        if self._cache_dir is None:
            return None
        return self._cache_dir / f"ff_calendar_{date}.json"

    def _load_disk_cache(self, date: str) -> Optional[list[NewsEvent]]:
        path = self._cache_file_path(date)
        if path is None or not path.exists():
            return None
        try:
            with path.open("r", encoding="utf-8") as f:
                payload = json.load(f)
            raw_events = payload.get("events", [])
            events: list[NewsEvent] = []
            for it in raw_events:
                dt = datetime.fromisoformat(it["dt"])
                if dt.tzinfo is None:
                    continue
                events.append(NewsEvent(
                    dt=dt.astimezone(timezone.utc),
                    country=str(it.get("country", "")),
                    impact=str(it.get("impact", "")),
                    title=str(it.get("title", ""))[:120],
                ))
            events.sort(key=lambda e: e.dt)
            return events
        except (OSError, ValueError, KeyError, TypeError):
            return None

    def _save_disk_cache(self, date: str, events: list[NewsEvent]) -> None:
        path = self._cache_file_path(date)
        if path is None:
            return
        try:
            self._cache_dir.mkdir(parents=True, exist_ok=True)  # type: ignore[union-attr]
        except OSError:
            return
        payload = {
            "fetched_at": datetime.now(timezone.utc).isoformat(),
            "date": date,
            "source_url": self._source_url,
            "events": [
                {
                    "dt": e.dt.isoformat(),
                    "country": e.country,
                    "impact": e.impact,
                    "title": e.title,
                }
                for e in events
            ],
        }
        try:
            fd, tmp = tempfile.mkstemp(
                prefix=path.stem + ".", suffix=".tmp", dir=str(path.parent),
            )
            with open(fd, "w", encoding="utf-8") as f:
                json.dump(payload, f, ensure_ascii=False)
                f.flush()
                try:
                    os.fsync(f.fileno())
                except OSError:
                    pass
            Path(tmp).replace(path)
        except OSError:
            try:
                Path(tmp).unlink(missing_ok=True)  # type: ignore[possibly-undefined]
            except Exception:
                pass

    @staticmethod
    def _parse(raw: bytes) -> list[NewsEvent]:
        """
        Parse FF JSON. Schema (one event):
            {"title": "...", "country": "USD", "impact": "High",
             "date": "2026-04-30T08:30:00-04:00", ...}

        - Drop events with impact not in {High, Medium}.
        - Drop events outside the next-24h window (V14 behavior).
        - Drop events with unparseable date.
        - All datetimes converted to UTC.
        """
        data = json.loads(raw)
        if not isinstance(data, list):
            raise ValueError(f"FF JSON not a list: got {type(data).__name__}")

        now_utc = datetime.now(timezone.utc)
        cutoff = now_utc + timedelta(days=1)
        events: list[NewsEvent] = []
        for item in data:
            try:
                impact = str(item.get("impact", ""))
                if impact not in ("High", "Medium"):
                    continue
                date_str = item.get("date")
                if not date_str:
                    continue
                # ISO 8601 with offset, e.g. "2026-04-30T08:30:00-04:00"
                dt = datetime.fromisoformat(date_str)
                if dt.tzinfo is None:
                    # Defensive: FF should always carry offset; if not, skip.
                    continue
                dt_utc = dt.astimezone(timezone.utc)
                if not (now_utc <= dt_utc <= cutoff):
                    continue
                events.append(NewsEvent(
                    dt=dt_utc,
                    country=str(item.get("country", "")),
                    impact=impact,
                    title=str(item.get("title", ""))[:120],
                ))
            except (ValueError, TypeError, KeyError):
                continue

        events.sort(key=lambda e: e.dt)
        return events

    # ------------------------------------------------------------
    # GATE
    # ------------------------------------------------------------

    def is_blocked(
        self, symbol: str, now_utc: datetime,
    ) -> Optional[BlockingEvent]:
        """
        Return the BlockingEvent triggering a block, or None.

        Block iff (V14 fidelity):
          - filter enabled
          - any HIGH-impact event whose country is in symbol's currency
            exposure (ASSET_CURRENCIES) is within
            [now - after_min, now + before_min].

        MEDIUM events are kept in cache but never block.

        Fail-open conditions (return None):
          - filter disabled
          - cache empty (never synced or all syncs failed)
          - symbol absent from ASSET_CURRENCIES
        """
        if not self.enabled:
            return None
        if not self._upcoming:
            return None

        currencies = cfg_fut.ASSET_CURRENCIES.get(symbol)
        if not currencies:
            return None

        currency_set = set(currencies)
        for ev in self._upcoming:
            if ev.impact != "High":
                continue
            if ev.country not in currency_set:
                continue
            delta_min = (ev.dt - now_utc).total_seconds() / 60.0
            # delta_min > 0 -> event is in the future (we're BEFORE)
            # delta_min < 0 -> event is in the past   (we're AFTER)
            if 0 <= delta_min <= self._before_min:
                return BlockingEvent(
                    event=ev,
                    minutes_to_event=delta_min,
                    reason="BEFORE",
                )
            if -self._after_min <= delta_min < 0:
                return BlockingEvent(
                    event=ev,
                    minutes_to_event=delta_min,
                    reason="AFTER",
                )
        return None

    # ------------------------------------------------------------
    # LOGGING
    # ------------------------------------------------------------

    def _log_sync_ok(self, **fields) -> None:
        if self._logger is None:
            return
        try:
            self._logger.brain_log.write(
                "news_sync",
                source_url=self._source_url,
                **fields,
            )
        except Exception:
            pass

    def _log_cache_hit(self, **fields) -> None:
        """Emit a 'news_cache_hit' event so memory/disk hits are
        distinguishable from fresh fetches in brain_log forensics."""
        if self._logger is None:
            return
        try:
            self._logger.brain_log.write(
                "news_cache_hit",
                source_url=self._source_url,
                **fields,
            )
        except Exception:
            pass

    def _log_sync_failed(self, **fields) -> None:
        if self._logger is None:
            return
        try:
            self._logger.brain_log.write(
                "news_sync_failed",
                source_url=self._source_url,
                events_in_cache=len(self._upcoming),
                **fields,
            )
        except Exception:
            pass
