"""JSON IO Manager for storing asset outputs as JSON files""" import json from pathlib import Path from typing import Any from dagster import IOManager, io_manager, OutputContext, InputContext class JSONIOManager(IOManager): """IO Manager that stores outputs as JSON files in data/{run_id}/ directory""" def __init__(self, base_path: str = "data"): self.base_path = Path(base_path) self.base_path.mkdir(parents=True, exist_ok=True) def _get_path(self, context) -> Path: """Get file path for asset with run_id subdirectory""" asset_name = context.asset_key.path[-1] # For InputContext, try upstream run_id first, fallback to finding latest if isinstance(context, InputContext): try: run_id = context.upstream_output.run_id except: # If upstream run_id not available, find the most recent run directory # that contains this asset (for partial re-runs) run_dirs = sorted([d for d in self.base_path.iterdir() if d.is_dir()], key=lambda d: d.stat().st_mtime, reverse=True) for run_dir in run_dirs: potential_path = run_dir / f"{asset_name}.json" if potential_path.exists(): return potential_path # If not found, use the latest run_dir run_id = run_dirs[0].name if run_dirs else "unknown" else: run_id = context.run_id # Create run-specific directory run_dir = self.base_path / run_id run_dir.mkdir(parents=True, exist_ok=True) return run_dir / f"{asset_name}.json" def handle_output(self, context: OutputContext, obj: Any): """Save asset output to JSON file""" file_path = self._get_path(context) with open(file_path, 'w') as f: json.dump(obj, f, indent=2) context.log.info(f"Saved {context.asset_key.path[-1]} to {file_path}") def load_input(self, context: InputContext) -> Any: """Load asset input from JSON file""" file_path = self._get_path(context) if not file_path.exists(): raise FileNotFoundError(f"Asset output not found: {file_path}") with open(file_path, 'r') as f: obj = json.load(f) context.log.info(f"Loaded {context.asset_key.path[-1]} from {file_path}") return obj @io_manager def json_io_manager(): """Factory for JSON IO Manager""" return JSONIOManager(base_path="data")