"""
APEX V16 — Anthropic Claude AI client.

Wraps the Anthropic Python SDK with:
  - Async-native interface (no time.sleep in async loops)
  - Structured error categorization (credit / overload / invalid / unknown)
  - Retry policy with explicit logging (logs to brain_log.jsonl)
  - Configurable model, max_tokens, temperature, retries

Replaces V15's MarketRouter.call_ai() which was sync and called
from async code via asyncio.to_thread(). V16 is async-first.

Environment:
  ANTHROPIC_API_KEY: required, read from env (typically loaded from .env
                    by main.py before instantiating AIClient).

Single-key design: V16 uses ONE Anthropic key (paid plan).
No round-robin (which V15 had only as a Gemini compat shim anyway).
"""

from __future__ import annotations

import asyncio
import os
import time as _time
from dataclasses import dataclass
from typing import Optional

try:
    import anthropic
except ImportError as _e:
    anthropic = None
    _import_error = _e
else:
    _import_error = None


# ============================================================
# RESULT TYPE
# ============================================================

@dataclass(frozen=True)
class AIResponse:
    """Result of an AI call. Either text is set, or error is set."""
    text: Optional[str]
    error_kind: Optional[str] = None    # "credit" | "overload" | "invalid" | "unknown" | None
    attempts: int = 0
    error_detail: str = ""


# ============================================================
# SYSTEM PROMPTS
# ============================================================

# Universal Brain-decision guard. Anthropic's `system=` parameter takes
# precedence over per-turn user instructions, so this defends against
# prompt drift where a Brain prompt forgets the "JSON only" clause
# (V16 incident 2026-04-29: TF entry prompt missing the clause that
# MR/bias both had -> AI emitted markdown CoT, parser failed). System
# prompt does not consume `max_tokens` (output budget).
SYSTEM_PROMPT_BRAIN_DECISIONS: str = (
    "All Brain decisions must be JSON only. "
    "No markdown, no prose outside the JSON object. "
    "Output a single valid JSON document."
)


# ============================================================
# AI CLIENT
# ============================================================

class AIClient:
    """
    Async Anthropic Claude client.

    Usage:
        client = AIClient(model="claude-haiku-4-5-20251001")
        await client.connect()
        resp = await client.ask("What is 2+2?")
        if resp.text:
            print(resp.text)
    """

    DEFAULT_MODEL = "claude-haiku-4-5-20251001"
    DEFAULT_MAX_TOKENS = 2048
    DEFAULT_TEMPERATURE = 0.2
    DEFAULT_MAX_RETRIES = 3
    DEFAULT_RETRY_SLEEP_SEC = 2.0

    def __init__(
        self,
        model: Optional[str] = None,
        api_key: Optional[str] = None,
        max_tokens: int = DEFAULT_MAX_TOKENS,
        max_retries: int = DEFAULT_MAX_RETRIES,
        retry_sleep_sec: float = DEFAULT_RETRY_SLEEP_SEC,
        logger=None,
    ) -> None:
        """
        Args:
            model: Claude model string (default: claude-haiku-4-5-20251001).
            api_key: Anthropic key. If None, read from ANTHROPIC_API_KEY env.
            max_tokens: per-call cap.
            max_retries: number of attempts on retryable errors.
            retry_sleep_sec: seconds to wait between retries (async).
            logger: LoggerBundle (optional). If provided, errors logged
                    to error_log.jsonl and system log.
        """
        if anthropic is None:
            raise RuntimeError(
                f"anthropic SDK not installed: {_import_error}. "
                f"Run: pip install anthropic"
            )

        self.model = model or self.DEFAULT_MODEL
        self.api_key = api_key or os.getenv("ANTHROPIC_API_KEY")
        if not self.api_key:
            raise RuntimeError(
                "ANTHROPIC_API_KEY not set. Either pass api_key= or "
                "set the environment variable (typically via .env)."
            )

        self.max_tokens = max_tokens
        self.max_retries = max_retries
        self.retry_sleep_sec = retry_sleep_sec
        self.logger = logger

        self._client: Optional[anthropic.Anthropic] = None
        self._connected = False

    # ============================================================
    # CONNECTION
    # ============================================================

    async def connect(self) -> bool:
        """
        Initialize the Anthropic client. Cheap operation (no network call).
        Returns True on success.
        """
        try:
            self._client = anthropic.Anthropic(api_key=self.api_key)
            self._connected = True
            return True
        except Exception as e:
            self._log_error("ai.connect", str(e))
            return False

    def is_connected(self) -> bool:
        return self._connected and self._client is not None

    # ============================================================
    # ASK
    # ============================================================

    async def ask(
        self,
        prompt: str,
        temperature: float = DEFAULT_TEMPERATURE,
        max_tokens: Optional[int] = None,
        system: Optional[str] = None,
    ) -> AIResponse:
        """
        Send a prompt to Claude and return the response.

        Handles:
          - Empty prompt: returns error_kind='invalid'
          - Credit exhausted: NO retry (429 on /v1/messages with
            credit_balance_too_low or similar) - returns error_kind='credit'
          - Overload (529): retry with backoff up to max_retries
          - Invalid request (400): NO retry - returns error_kind='invalid'
          - Network/transient: retry up to max_retries

        Returns:
            AIResponse. Caller checks resp.text (str on success, None on failure)
            and resp.error_kind for diagnostics.
        """
        if not self.is_connected():
            return AIResponse(
                text=None,
                error_kind="unknown",
                attempts=0,
                error_detail="client not connected; call connect() first",
            )

        clean = self._sanitize_prompt(prompt)
        if not clean:
            return AIResponse(
                text=None,
                error_kind="invalid",
                attempts=0,
                error_detail="empty prompt after sanitization",
            )

        mt = max_tokens if max_tokens is not None else self.max_tokens
        last_error = ""

        # Anthropic SDK rejects system="" but accepts a missing key,
        # so we only inject when caller actually supplied one.
        create_kwargs: dict = {
            "model": self.model,
            "max_tokens": mt,
            "temperature": temperature,
            "messages": [{"role": "user", "content": clean}],
        }
        if system:
            create_kwargs["system"] = system

        for attempt in range(1, self.max_retries + 1):
            try:
                # SDK call is sync; run in thread to keep loop responsive
                msg = await asyncio.to_thread(
                    self._client.messages.create,
                    **create_kwargs,
                )
                # Extract text from content blocks
                text = self._extract_text(msg)
                return AIResponse(text=text, attempts=attempt)

            except Exception as e:
                err = str(e)
                last_error = err
                lower = err.lower()

                # Credit/billing -> no retry
                if any(s in lower for s in ("credit", "billing", "balance")):
                    self._log_error("ai.ask", f"credit exhausted: {err[:120]}")
                    return AIResponse(
                        text=None,
                        error_kind="credit",
                        attempts=attempt,
                        error_detail=err[:200],
                    )

                # 400 invalid request -> no retry
                if "400" in err or "invalid_request" in lower:
                    self._log_error("ai.ask", f"invalid request: {err[:120]}")
                    return AIResponse(
                        text=None,
                        error_kind="invalid",
                        attempts=attempt,
                        error_detail=err[:200],
                    )

                # 529 overloaded -> retry
                if "529" in err or "overloaded" in lower:
                    self._log_warning("ai.ask", f"overloaded, retry {attempt}/{self.max_retries}")
                    if attempt < self.max_retries:
                        await asyncio.sleep(self.retry_sleep_sec)
                    continue

                # Generic transient -> retry
                self._log_warning("ai.ask", f"{err[:80]}, retry {attempt}/{self.max_retries}")
                if attempt < self.max_retries:
                    await asyncio.sleep(self.retry_sleep_sec)

        # All retries exhausted
        self._log_error("ai.ask", f"all {self.max_retries} retries failed: {last_error[:120]}")
        return AIResponse(
            text=None,
            error_kind="unknown",
            attempts=self.max_retries,
            error_detail=last_error[:200],
        )

    # ============================================================
    # ============================================================
    # ASK FOR DECISION (specialized for Brain trading decisions)
    # ============================================================

    async def ask_for_decision(
        self,
        prompt: str,
        max_tokens: int = 1200,
        where: Optional[str] = None,
    ) -> AIResponse:
        """
        Specialized ask() for Brain trading decisions.

        - temperature=0.0 (deterministic).
        - system=SYSTEM_PROMPT_BRAIN_DECISIONS forces JSON-only output
          regardless of per-turn prompt phrasing (V16 incident 2026-04-29
          TF entry: missing per-turn JSON guard caused markdown CoT
          replies, parser failure, lost trades). System prompt does not
          consume the output token budget.
        - max_tokens=1200: TF entry CoT (step_1/2/3 + 8 fields) plus
          MR setup descriptions can legitimately reach ~500 tokens.
          The previous 600-cap risked truncation before the closing
          brace.

        Brain modules (brain_tf, brain_mr) MUST use this method, not
        the general ask(), for trade open/exit decisions.

        When `where` is provided (e.g. "manage_exit"), an `ai_call`
        event is emitted to brain_log with latency_ms / ok / attempts /
        error_kind so the operator can audit AI usage post-trade.
        Default None preserves existing behaviour for entry-prompt
        callers that do not yet pass `where`.
        """
        started = _time.monotonic()
        resp = await self.ask(
            prompt,
            temperature=0.0,
            max_tokens=max_tokens,
            system=SYSTEM_PROMPT_BRAIN_DECISIONS,
        )
        if where is not None and self.logger is not None:
            elapsed_ms = (_time.monotonic() - started) * 1000.0
            try:
                self.logger.brain_log.write(
                    "ai_call",
                    where=where,
                    latency_ms=round(elapsed_ms, 1),
                    ok=(resp.text is not None),
                    attempts=resp.attempts,
                    error_kind=resp.error_kind,
                )
            except Exception:
                pass
        return resp

    # INTERNAL
    # ============================================================

    @staticmethod
    def _extract_text(msg) -> str:
        """
        Extract concatenated text from a Claude messages response.
        Claude responses are a list of content blocks; usually we want
        only text blocks (other types: tool_use, etc.).
        """
        parts = []
        for block in getattr(msg, "content", []):
            block_type = getattr(block, "type", None)
            if block_type == "text":
                parts.append(getattr(block, "text", ""))
        return "\n".join(parts).strip()

    @staticmethod
    def _sanitize_prompt(prompt: str) -> str:
        """
        Light sanitization: strip whitespace, remove NUL bytes,
        ensure non-empty. Heavy filtering should happen upstream
        (Brain logic) where context is known.
        """
        if not prompt:
            return ""
        cleaned = prompt.replace("\x00", "").strip()
        return cleaned

    def _log_error(self, where: str, detail: str) -> None:
        if self.logger is not None:
            self.logger.log_error(where=where, error=detail)
        # Stdout fallback (so you see errors even without LoggerBundle)
        else:
            print(f"[AI ERROR] {where}: {detail}")

    def _log_warning(self, where: str, detail: str) -> None:
        if self.logger is not None:
            self.logger.system.warning(f"[{where}] {detail}")


# ============================================================
# JSON HELPERS (for Brain JSON parsing)
# ============================================================
# Re-export from core.json_parsing so existing call sites
# (brain_tf._parse_ai_json, brain_mr._parse_ai_json) keep working
# without importing from core directly. Single source of truth in core/.
from core.json_parsing import extract_json_from_response  # noqa: F401