Files
pickle_vision/dagster_project/io_managers/json_io_manager.py
Ruslan Bakiev 549fd1da9d Initial commit
2026-03-06 09:43:52 +07:00

71 lines
2.5 KiB
Python

"""JSON IO Manager for storing asset outputs as JSON files"""
import json
from pathlib import Path
from typing import Any
from dagster import IOManager, io_manager, OutputContext, InputContext
class JSONIOManager(IOManager):
"""IO Manager that stores outputs as JSON files in data/{run_id}/ directory"""
def __init__(self, base_path: str = "data"):
self.base_path = Path(base_path)
self.base_path.mkdir(parents=True, exist_ok=True)
def _get_path(self, context) -> Path:
"""Get file path for asset with run_id subdirectory"""
asset_name = context.asset_key.path[-1]
# For InputContext, try upstream run_id first, fallback to finding latest
if isinstance(context, InputContext):
try:
run_id = context.upstream_output.run_id
except:
# If upstream run_id not available, find the most recent run directory
# that contains this asset (for partial re-runs)
run_dirs = sorted([d for d in self.base_path.iterdir() if d.is_dir()],
key=lambda d: d.stat().st_mtime, reverse=True)
for run_dir in run_dirs:
potential_path = run_dir / f"{asset_name}.json"
if potential_path.exists():
return potential_path
# If not found, use the latest run_dir
run_id = run_dirs[0].name if run_dirs else "unknown"
else:
run_id = context.run_id
# Create run-specific directory
run_dir = self.base_path / run_id
run_dir.mkdir(parents=True, exist_ok=True)
return run_dir / f"{asset_name}.json"
def handle_output(self, context: OutputContext, obj: Any):
"""Save asset output to JSON file"""
file_path = self._get_path(context)
with open(file_path, 'w') as f:
json.dump(obj, f, indent=2)
context.log.info(f"Saved {context.asset_key.path[-1]} to {file_path}")
def load_input(self, context: InputContext) -> Any:
"""Load asset input from JSON file"""
file_path = self._get_path(context)
if not file_path.exists():
raise FileNotFoundError(f"Asset output not found: {file_path}")
with open(file_path, 'r') as f:
obj = json.load(f)
context.log.info(f"Loaded {context.asset_key.path[-1]} from {file_path}")
return obj
@io_manager
def json_io_manager():
"""Factory for JSON IO Manager"""
return JSONIOManager(base_path="data")