"""Job directory manifest: load, validate, and update."""
from __future__ import annotations
import json
import logging
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any
logger = logging.getLogger(__name__)
MANIFEST_FILENAME = "manifest.json"
[docs]
@dataclass
class FileEntry:
"""A single file reference within a manifest.
Parameters
----------
path : str
Relative path to the file within the job directory.
format : str
File format identifier (e.g. ``"json"``, ``"yaml"``, ``"csv"``).
"""
path: str
format: str
[docs]
@dataclass
class Manifest:
"""Parsed manifest for a job directory.
Parameters
----------
model_type : str
Causal inference methodology label.
created_at : str
ISO-8601 creation timestamp.
files : dict[str, FileEntry]
Mapping of logical names to file entries.
initiative_id : str
Initiative identifier. Defaults to the job directory name.
evaluate_strategy : str
Evaluation strategy: ``"review"`` (LLM review) or
``"score"`` (deterministic confidence). Defaults to ``"review"``.
"""
model_type: str
created_at: str = ""
files: dict[str, FileEntry] = field(default_factory=dict)
initiative_id: str = ""
evaluate_strategy: str = "review"
[docs]
def load_manifest(job_dir: str | Path) -> Manifest:
"""Load and validate a manifest from a job directory.
Parameters
----------
job_dir : str | Path
Path to the job directory containing ``manifest.json``.
Returns
-------
Manifest
Raises
------
FileNotFoundError
If ``manifest.json`` does not exist.
ValueError
If required fields are missing.
"""
job_dir = Path(job_dir)
manifest_path = job_dir / MANIFEST_FILENAME
if not manifest_path.exists():
msg = f"Manifest not found: {manifest_path}"
raise FileNotFoundError(msg)
with open(manifest_path, encoding="utf-8") as fh:
data: dict[str, Any] = json.load(fh)
# Validate required fields
if "model_type" not in data:
msg = "Manifest missing required field: 'model_type'"
raise ValueError(msg)
files: dict[str, FileEntry] = {}
for name, entry in data.get("files", {}).items():
files[name] = FileEntry(path=entry["path"], format=entry["format"])
initiative_id = data.get("initiative_id", "") or job_dir.name
logger.debug("Loaded manifest from %s: model_type=%s", manifest_path, data["model_type"])
return Manifest(
model_type=data["model_type"],
created_at=data.get("created_at", ""),
files=files,
initiative_id=initiative_id,
evaluate_strategy=data.get("evaluate_strategy", "review"),
)