Source code for impact_engine_evaluate.review.engine

"""ReviewEngine: orchestrates a single artifact review."""

from __future__ import annotations

import logging
from datetime import datetime, timezone
from pathlib import Path
from typing import Any

import jinja2
import litellm
import yaml

from impact_engine_evaluate.config import load_config
from impact_engine_evaluate.review.models import (
    ArtifactPayload,
    PromptSpec,
    ReviewDimension,
    ReviewResponse,
    ReviewResult,
)

logger = logging.getLogger(__name__)


# ---------------------------------------------------------------------------
# PromptBuilder
# ---------------------------------------------------------------------------



[docs]
class PromptBuilder:
    """Load prompt specs and knowledge, then render chat messages.

    Encapsulates the Jinja2 template rendering and knowledge loading steps
    shared across all method reviewers.  This is the shared entry layer
    inside the Evaluation Engine that runs before any LLM call.
    """


[docs]
    def load_spec(self, path: Path) -> PromptSpec:
        """Load a PromptSpec from a YAML file.

        Parameters
        ----------
        path : Path
            Path to a YAML prompt template file.

        Returns
        -------
        PromptSpec

        Raises
        ------
        FileNotFoundError
            If *path* does not exist.
        """
        if not path.exists():
            msg = f"Prompt template not found: {path}"
            raise FileNotFoundError(msg)

        data = _load_yaml(path)
        dims = data.get("dimensions", [])
        if isinstance(dims, str):
            dims = [d.strip() for d in dims.split(",")]
        return PromptSpec(
            name=data.get("name", "unknown"),
            version=str(data.get("version", "0.0")),
            description=data.get("description", ""),
            dimensions=dims,
            system_template=data.get("system", ""),
            user_template=data.get("user", ""),
        )



[docs]
    def load_knowledge(self, directory: Path) -> str:
        """Concatenate all ``.md`` and ``.txt`` files in a directory.

        Parameters
        ----------
        directory : Path
            Directory containing knowledge files.

        Returns
        -------
        str
            Combined content separated by section dividers.
        """
        if not directory.is_dir():
            return ""

        parts: list[str] = []
        for ext in ("*.md", "*.txt"):
            for filepath in sorted(directory.glob(ext)):
                content = filepath.read_text(encoding="utf-8")
                parts.append(content)
                logger.debug("Loaded knowledge file: %s (%d chars)", filepath, len(content))

        return "\n\n---\n\n".join(parts)



[docs]
    def build(self, spec: PromptSpec, variables: dict[str, Any]) -> list[dict[str, str]]:
        """Render a prompt spec into chat messages.

        Parameters
        ----------
        spec : PromptSpec
            The prompt template to render.
        variables : dict[str, Any]
            Template variables.

        Returns
        -------
        list[dict[str, str]]
            Chat messages suitable for LLM completion.
        """
        system_text = _render_template(spec.system_template, variables)
        user_text = _render_template(spec.user_template, variables)

        messages: list[dict[str, str]] = []
        if system_text:
            messages.append({"role": "system", "content": system_text})
        if user_text:
            messages.append({"role": "user", "content": user_text})
        return messages




# ---------------------------------------------------------------------------
# ResultsBuilder
# ---------------------------------------------------------------------------



[docs]
class ResultsBuilder:
    """Parse structured LLM output into a ReviewResult.

    Translates the raw Pydantic ``ReviewResponse`` from LiteLLM into the
    ``ReviewResult`` dataclass used downstream.  This is the shared exit
    layer inside the Evaluation Engine that runs after every LLM call.
    """


[docs]
    def parse(
        self,
        artifact: ArtifactPayload,
        spec: PromptSpec,
        model: str,
        response: Any,
    ) -> ReviewResult:
        """Parse a LiteLLM structured response into a ReviewResult.

        Parameters
        ----------
        artifact : ArtifactPayload
            The artifact that was reviewed.
        spec : PromptSpec
            Prompt spec used for the review.
        model : str
            Model identifier that produced the response.
        response : Any
            Raw LiteLLM completion response.  Either ``choices[0].message.parsed``
            (OpenAI-style structured output) or ``choices[0].message.content``
            (JSON string, as returned by ollama and other backends) is accepted.

        Returns
        -------
        ReviewResult
        """
        message = response.choices[0].message
        parsed: ReviewResponse = getattr(message, "parsed", None)
        if parsed is None:
            # Backends such as ollama return JSON in .content rather than
            # populating .parsed (litellm structured-output path).
            parsed = ReviewResponse.model_validate_json(message.content or "{}")
        dimensions = [
            ReviewDimension(name=d.name, score=d.score, justification=d.justification) for d in parsed.dimensions
        ]
        result = ReviewResult(
            initiative_id=artifact.initiative_id,
            prompt_name=spec.name,
            prompt_version=spec.version,
            backend_name="litellm",
            model=model,
            dimensions=dimensions,
            overall_score=parsed.overall,
            raw_response=parsed.model_dump_json(),
            timestamp=datetime.now(timezone.utc).isoformat(),
        )
        logger.info(
            "Reviewed initiative=%s prompt=%s overall=%.3f",
            result.initiative_id,
            result.prompt_name,
            result.overall_score,
        )
        return result




# ---------------------------------------------------------------------------
# ReviewEngine
# ---------------------------------------------------------------------------



[docs]
class ReviewEngine:
    """Execute an artifact review via LiteLLM.

    Parameters
    ----------
    default_model : str
        Default model identifier for completions.
    default_temperature : float
        Default temperature for completions.
    default_max_tokens : int
        Default max tokens for completions.
    litellm_extra : dict[str, Any] | None
        Additional kwargs forwarded to ``litellm.completion()``.
    """


[docs]
    def __init__(
        self,
        *,
        default_model: str = "claude-sonnet-4-5-20250929",
        default_temperature: float = 0.0,
        default_max_tokens: int = 4096,
        litellm_extra: dict[str, Any] | None = None,
    ) -> None:
        self._default_model = default_model
        self._default_temperature = default_temperature
        self._default_max_tokens = default_max_tokens
        self._litellm_extra = litellm_extra or {}
        self._prompt_builder = PromptBuilder()
        self._results_builder = ResultsBuilder()



[docs]
    @classmethod
    def from_config(cls, config: dict | str | None = None) -> ReviewEngine:
        """Construct a ReviewEngine from a config dict or raw source.

        Parameters
        ----------
        config : dict | str | None
            A config dict, a YAML file path, or ``None`` for defaults.

        Returns
        -------
        ReviewEngine
        """
        config = load_config(config)
        backend = config["backend"]
        return cls(
            default_model=backend["model"],
            default_temperature=backend["temperature"],
            default_max_tokens=backend["max_tokens"],
            litellm_extra=backend.get("extra", {}),
        )



[docs]
    def review(
        self,
        artifact: ArtifactPayload,
        spec: PromptSpec,
        knowledge_context: str = "",
        *,
        model: str | None = None,
        temperature: float | None = None,
        max_tokens: int | None = None,
    ) -> ReviewResult:
        """Execute a review of the given artifact.

        Parameters
        ----------
        artifact : ArtifactPayload
            The artifact to review.
        spec : PromptSpec
            Prompt template specification.
        knowledge_context : str
            Pre-loaded domain knowledge text.
        model : str | None
            Model override for this call.
        temperature : float | None
            Temperature override for this call.
        max_tokens : int | None
            Max tokens override for this call.

        Returns
        -------
        ReviewResult
        """
        variables: dict[str, Any] = {
            "artifact": artifact.artifact_text,
            "model_type": artifact.model_type,
            "sample_size": artifact.sample_size,
            "knowledge_context": knowledge_context,
            **artifact.metadata,
        }

        messages = self._prompt_builder.build(spec, variables)
        used_model = model or self._default_model

        kwargs: dict[str, Any] = {
            "model": used_model,
            "messages": messages,
            "temperature": temperature if temperature is not None else self._default_temperature,
            "max_tokens": max_tokens or self._default_max_tokens,
            "response_format": ReviewResponse,
            **self._litellm_extra,
        }
        logger.debug("litellm request model=%s messages=%d", kwargs["model"], len(messages))
        response = litellm.completion(**kwargs)

        return self._results_builder.parse(artifact, spec, used_model, response)




# ---------------------------------------------------------------------------
# Module-level shims (backward compatibility)
# ---------------------------------------------------------------------------

_prompt_builder = PromptBuilder()



[docs]
def load_prompt_spec(path: Path) -> PromptSpec:
    """Load a PromptSpec from a YAML file.

    Parameters
    ----------
    path : Path
        Path to a YAML prompt template file.

    Returns
    -------
    PromptSpec
    """
    return _prompt_builder.load_spec(path)




[docs]
def render(spec: PromptSpec, variables: dict[str, Any]) -> list[dict[str, str]]:
    """Render a prompt spec into chat messages.

    Parameters
    ----------
    spec : PromptSpec
        The prompt template to render.
    variables : dict[str, Any]
        Template variables.

    Returns
    -------
    list[dict[str, str]]
    """
    return _prompt_builder.build(spec, variables)




[docs]
def load_knowledge(directory: Path) -> str:
    """Concatenate all ``.md`` and ``.txt`` files in a directory.

    Parameters
    ----------
    directory : Path
        Directory containing knowledge files.

    Returns
    -------
    str
    """
    return _prompt_builder.load_knowledge(directory)



# ---------------------------------------------------------------------------
# Internal utilities
# ---------------------------------------------------------------------------


def _load_yaml(path: Path) -> dict[str, Any]:
    """Load a YAML file."""
    with open(path, encoding="utf-8") as fh:
        return yaml.safe_load(fh) or {}


def _render_template(template: str, variables: dict[str, Any]) -> str:
    """Render a Jinja2 template string."""
    if not template:
        return ""
    env = jinja2.Environment(undefined=jinja2.Undefined)
    return env.from_string(template).render(**variables)