Initial commit
This commit is contained in:
70
dagster_project/io_managers/json_io_manager.py
Normal file
70
dagster_project/io_managers/json_io_manager.py
Normal file
@@ -0,0 +1,70 @@
|
||||
"""JSON IO Manager for storing asset outputs as JSON files"""
|
||||
|
||||
import json
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from dagster import IOManager, io_manager, OutputContext, InputContext
|
||||
|
||||
|
||||
class JSONIOManager(IOManager):
|
||||
"""IO Manager that stores outputs as JSON files in data/{run_id}/ directory"""
|
||||
|
||||
def __init__(self, base_path: str = "data"):
|
||||
self.base_path = Path(base_path)
|
||||
self.base_path.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
def _get_path(self, context) -> Path:
|
||||
"""Get file path for asset with run_id subdirectory"""
|
||||
asset_name = context.asset_key.path[-1]
|
||||
|
||||
# For InputContext, try upstream run_id first, fallback to finding latest
|
||||
if isinstance(context, InputContext):
|
||||
try:
|
||||
run_id = context.upstream_output.run_id
|
||||
except:
|
||||
# If upstream run_id not available, find the most recent run directory
|
||||
# that contains this asset (for partial re-runs)
|
||||
run_dirs = sorted([d for d in self.base_path.iterdir() if d.is_dir()],
|
||||
key=lambda d: d.stat().st_mtime, reverse=True)
|
||||
for run_dir in run_dirs:
|
||||
potential_path = run_dir / f"{asset_name}.json"
|
||||
if potential_path.exists():
|
||||
return potential_path
|
||||
# If not found, use the latest run_dir
|
||||
run_id = run_dirs[0].name if run_dirs else "unknown"
|
||||
else:
|
||||
run_id = context.run_id
|
||||
|
||||
# Create run-specific directory
|
||||
run_dir = self.base_path / run_id
|
||||
run_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
return run_dir / f"{asset_name}.json"
|
||||
|
||||
def handle_output(self, context: OutputContext, obj: Any):
|
||||
"""Save asset output to JSON file"""
|
||||
file_path = self._get_path(context)
|
||||
|
||||
with open(file_path, 'w') as f:
|
||||
json.dump(obj, f, indent=2)
|
||||
|
||||
context.log.info(f"Saved {context.asset_key.path[-1]} to {file_path}")
|
||||
|
||||
def load_input(self, context: InputContext) -> Any:
|
||||
"""Load asset input from JSON file"""
|
||||
file_path = self._get_path(context)
|
||||
|
||||
if not file_path.exists():
|
||||
raise FileNotFoundError(f"Asset output not found: {file_path}")
|
||||
|
||||
with open(file_path, 'r') as f:
|
||||
obj = json.load(f)
|
||||
|
||||
context.log.info(f"Loaded {context.asset_key.path[-1]} from {file_path}")
|
||||
return obj
|
||||
|
||||
|
||||
@io_manager
|
||||
def json_io_manager():
|
||||
"""Factory for JSON IO Manager"""
|
||||
return JSONIOManager(base_path="data")
|
||||
Reference in New Issue
Block a user