"""
Synthesizer-based simulation backend for metrics.
Takes products DataFrame and config path.
No error handling, hard failures only.
"""
from typing import Dict
import numpy as np
import pandas as pd
[docs]
def simulate_metrics_synthesizer_based(products: pd.DataFrame, config: Dict) -> pd.DataFrame:
"""
Generate synthetic product metrics using Gaussian Copula synthesizer.
Args:
products: DataFrame of products (unused in current implementation)
config: Complete configuration dictionary
Returns:
DataFrame of synthetic metrics
"""
try:
from sdv.metadata import SingleTableMetadata
from sdv.single_table import GaussianCopulaSynthesizer
except ImportError:
raise ImportError(
"SDV is required for synthesizer-based simulation. "
"Install with: pip install online-retail-simulator[synthesizer]"
)
params = config["SYNTHESIZER"]["METRICS"]["PARAMS"]
training_data_path, num_rows, seed = (
params["training_data_path"],
params["num_rows"],
params["seed"],
)
# Load training data
training_data = pd.read_csv(training_data_path)
# Step 1: Create metadata and synthesizer
metadata = SingleTableMetadata()
metadata.detect_from_dataframe(training_data)
synthesizer = GaussianCopulaSynthesizer(metadata)
# Step 2: Train the synthesizer
synthesizer.fit(training_data)
# Step 3: Generate synthetic data with seed (legacy API required by SDV internals)
np.random.seed(seed)
synthetic_metrics = synthesizer.sample(num_rows=num_rows)
return synthetic_metrics