Compare commits
4 Commits
c654d25230
...
codex/seed
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e8f11116a2 | ||
|
|
4a8e69f3c3 | ||
|
|
0f857192d4 | ||
|
|
1e8b5ad859 |
@@ -2,9 +2,13 @@
|
||||
Seed Suppliers and Offers for African cocoa belt.
|
||||
Creates offers via Temporal workflow so they sync to the graph.
|
||||
"""
|
||||
import csv
|
||||
import os
|
||||
import random
|
||||
import uuid
|
||||
from pathlib import Path
|
||||
from decimal import Decimal
|
||||
import time
|
||||
|
||||
import requests
|
||||
from django.core.management.base import BaseCommand
|
||||
@@ -64,6 +68,9 @@ SUPPLIER_NAMES = [
|
||||
"Dar Coast Commodities", "Maputo Export House",
|
||||
]
|
||||
|
||||
# Default GLEIF Africa LEI dataset path (repo-local)
|
||||
DEFAULT_GLEIF_PATH = "datasets/gleif/africa_lei_companies.csv"
|
||||
|
||||
# Fixed product catalog (10 items) with realistic prices per ton (USD)
|
||||
PRODUCT_CATALOG = [
|
||||
{"name": "Cocoa Beans", "category": "Cocoa", "price": Decimal("2450.00")},
|
||||
@@ -128,11 +135,17 @@ class Command(BaseCommand):
|
||||
default=200,
|
||||
help="Batch size for bulk_create (default: 200)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--sleep-ms",
|
||||
type=int,
|
||||
default=0,
|
||||
help="Sleep between offer creations in milliseconds (default: 0)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--geo-url",
|
||||
type=str,
|
||||
default="http://geo:8000/graphql/public/",
|
||||
help="Geo service GraphQL URL (default: http://geo:8000/graphql/public/)",
|
||||
default=None,
|
||||
help="Geo service GraphQL URL (defaults to GEO_INTERNAL_URL env var)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--odoo-url",
|
||||
@@ -169,6 +182,12 @@ class Command(BaseCommand):
|
||||
default=None,
|
||||
help="Filter offers by product name (e.g., 'Cocoa Beans')",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--company-csv",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Path to CSV with real company names (default: datasets/gleif/africa_lei_companies.csv)",
|
||||
)
|
||||
|
||||
def handle(self, *args, **options):
|
||||
if options["clear"]:
|
||||
@@ -186,13 +205,25 @@ class Command(BaseCommand):
|
||||
use_workflow = not options["no_workflow"]
|
||||
use_bulk = options["bulk"]
|
||||
bulk_size = max(1, options["bulk_size"])
|
||||
geo_url = options["geo_url"]
|
||||
# Enforce fixed 1s delay to protect infra regardless of CLI flags
|
||||
sleep_ms = 1000
|
||||
geo_url = (
|
||||
options["geo_url"]
|
||||
or os.getenv("GEO_INTERNAL_URL")
|
||||
or os.getenv("GEO_EXTERNAL_URL")
|
||||
or os.getenv("GEO_URL")
|
||||
)
|
||||
if not geo_url:
|
||||
self.stdout.write(self.style.ERROR("Geo URL is not set. Provide --geo-url or GEO_INTERNAL_URL."))
|
||||
return
|
||||
geo_url = self._normalize_geo_url(geo_url)
|
||||
odoo_url = options["odoo_url"]
|
||||
product_filter = options["product"]
|
||||
ensure_products = options["ensure_products"]
|
||||
odoo_db = options["odoo_db"]
|
||||
odoo_user = options["odoo_user"]
|
||||
odoo_password = options["odoo_password"]
|
||||
company_csv = options["company_csv"]
|
||||
|
||||
# Fetch products from Odoo
|
||||
self.stdout.write("Fetching products from Odoo...")
|
||||
@@ -225,14 +256,13 @@ class Command(BaseCommand):
|
||||
hubs = self._fetch_african_hubs(geo_url)
|
||||
|
||||
if not hubs:
|
||||
self.stdout.write(self.style.WARNING(
|
||||
"No African hubs found. Using default locations."
|
||||
))
|
||||
hubs = self._default_african_hubs()
|
||||
self.stdout.write(self.style.ERROR("No African hubs found from geo service. Aborting seed."))
|
||||
return
|
||||
|
||||
self.stdout.write(f"Found {len(hubs)} African hubs")
|
||||
|
||||
# Create suppliers
|
||||
self._company_pool = self._load_company_pool(company_csv)
|
||||
self.stdout.write(f"Creating {suppliers_count} suppliers...")
|
||||
new_suppliers = self._create_suppliers(suppliers_count, hubs)
|
||||
self.stdout.write(self.style.SUCCESS(f"Created {len(new_suppliers)} suppliers"))
|
||||
@@ -244,7 +274,7 @@ class Command(BaseCommand):
|
||||
return
|
||||
if use_workflow:
|
||||
created_offers = self._create_offers_via_workflow(
|
||||
offers_count, hubs, products, supplier_location_ratio
|
||||
offers_count, hubs, products, supplier_location_ratio, sleep_ms
|
||||
)
|
||||
elif use_bulk:
|
||||
created_offers = self._create_offers_direct_bulk(
|
||||
@@ -252,7 +282,7 @@ class Command(BaseCommand):
|
||||
)
|
||||
else:
|
||||
created_offers = self._create_offers_direct(
|
||||
offers_count, hubs, products, supplier_location_ratio
|
||||
offers_count, hubs, products, supplier_location_ratio, sleep_ms
|
||||
)
|
||||
self.stdout.write(self.style.SUCCESS(f"Created {len(created_offers)} offers"))
|
||||
|
||||
@@ -564,15 +594,28 @@ class Command(BaseCommand):
|
||||
lat += random.uniform(-0.5, 0.5)
|
||||
lng += random.uniform(-0.5, 0.5)
|
||||
|
||||
name = self._generate_supplier_name(idx)
|
||||
company = self._pick_company(idx)
|
||||
if company:
|
||||
name = company["name"]
|
||||
company_code = company.get("country_code")
|
||||
mapped_name = self._country_name_from_code(company_code)
|
||||
if mapped_name:
|
||||
country = mapped_name
|
||||
country_code = company_code
|
||||
supplier_uuid = self._stable_uuid("supplier", company.get("lei") or name)
|
||||
team_uuid = self._stable_uuid("team", company.get("lei") or name)
|
||||
else:
|
||||
name = self._generate_supplier_name(idx)
|
||||
supplier_uuid = str(uuid.uuid4())
|
||||
team_uuid = str(uuid.uuid4())
|
||||
description = (
|
||||
f"{name} is a reliable supplier based in {country}, "
|
||||
"focused on consistent quality and transparent logistics."
|
||||
)
|
||||
|
||||
profile = SupplierProfile.objects.create(
|
||||
uuid=str(uuid.uuid4()),
|
||||
team_uuid=str(uuid.uuid4()),
|
||||
uuid=supplier_uuid,
|
||||
team_uuid=team_uuid,
|
||||
name=name,
|
||||
description=description,
|
||||
country=country,
|
||||
@@ -592,6 +635,77 @@ class Command(BaseCommand):
|
||||
return SUPPLIER_NAMES[index]
|
||||
return f"{random.choice(SUPPLIER_NAMES)} Group"
|
||||
|
||||
def _find_default_company_csv(self) -> str | None:
|
||||
"""Locate default company CSV in repo (datasets/gleif/africa_lei_companies.csv)."""
|
||||
here = Path(__file__).resolve()
|
||||
for parent in here.parents:
|
||||
candidate = parent / DEFAULT_GLEIF_PATH
|
||||
if candidate.exists():
|
||||
return str(candidate)
|
||||
return None
|
||||
|
||||
def _load_company_pool(self, csv_path: str | None) -> list[dict]:
|
||||
"""Load real company names from CSV; returns list of dicts."""
|
||||
path = csv_path or self._find_default_company_csv()
|
||||
if not path or not os.path.exists(path):
|
||||
self.stdout.write(self.style.WARNING("Company CSV not found; using fallback names."))
|
||||
return []
|
||||
|
||||
companies = []
|
||||
seen = set()
|
||||
try:
|
||||
with open(path, newline="", encoding="utf-8") as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
name = (row.get("entity_name") or "").strip()
|
||||
if not name:
|
||||
continue
|
||||
if name in seen:
|
||||
continue
|
||||
seen.add(name)
|
||||
companies.append(
|
||||
{
|
||||
"name": name,
|
||||
"lei": (row.get("lei") or "").strip(),
|
||||
"country_code": (row.get("legal_address_country") or row.get("headquarters_country") or "").strip(),
|
||||
"city": (row.get("legal_address_city") or row.get("headquarters_city") or "").strip(),
|
||||
}
|
||||
)
|
||||
except Exception as e:
|
||||
self.stdout.write(self.style.WARNING(f"Failed to read company CSV: {e}"))
|
||||
return []
|
||||
|
||||
random.shuffle(companies)
|
||||
self.stdout.write(f"Loaded {len(companies)} company names from CSV")
|
||||
return companies
|
||||
|
||||
def _pick_company(self, index: int) -> dict | None:
|
||||
if not getattr(self, "_company_pool", None):
|
||||
return None
|
||||
if index < len(self._company_pool):
|
||||
return self._company_pool[index]
|
||||
return random.choice(self._company_pool)
|
||||
|
||||
def _stable_uuid(self, prefix: str, value: str) -> str:
|
||||
return str(uuid.uuid5(uuid.NAMESPACE_DNS, f"{prefix}:{value}"))
|
||||
|
||||
def _country_name_from_code(self, code: str | None) -> str | None:
|
||||
if not code:
|
||||
return None
|
||||
for name, country_code, _, _ in AFRICAN_COUNTRIES:
|
||||
if country_code == code:
|
||||
return name
|
||||
return None
|
||||
|
||||
def _normalize_geo_url(self, url: str) -> str:
|
||||
"""Ensure geo URL has scheme and GraphQL path."""
|
||||
value = url.strip()
|
||||
if not value.startswith(("http://", "https://")):
|
||||
value = f"http://{value}"
|
||||
if "/graphql" not in value:
|
||||
value = value.rstrip("/") + "/graphql/public/"
|
||||
return value
|
||||
|
||||
def _price_for_product(self, product_name: str) -> Decimal:
|
||||
for item in PRODUCT_CATALOG:
|
||||
if item["name"].lower() == product_name.lower():
|
||||
@@ -620,7 +734,9 @@ class Command(BaseCommand):
|
||||
"longitude": supplier.longitude,
|
||||
}
|
||||
|
||||
def _create_offers_via_workflow(self, count: int, hubs: list, products: list, supplier_ratio: float) -> list:
|
||||
def _create_offers_via_workflow(
|
||||
self, count: int, hubs: list, products: list, supplier_ratio: float, sleep_ms: int
|
||||
) -> list:
|
||||
"""Create offers via Temporal workflow (syncs to graph)"""
|
||||
created = []
|
||||
suppliers = list(SupplierProfile.objects.all())
|
||||
@@ -658,10 +774,14 @@ class Command(BaseCommand):
|
||||
created.append(offer_uuid)
|
||||
except Exception as e:
|
||||
self.stdout.write(self.style.ERROR(f" [{idx+1}/{count}] Failed: {e}"))
|
||||
if sleep_ms:
|
||||
time.sleep(sleep_ms / 1000.0)
|
||||
|
||||
return created
|
||||
|
||||
def _create_offers_direct(self, count: int, hubs: list, products: list, supplier_ratio: float) -> list:
|
||||
def _create_offers_direct(
|
||||
self, count: int, hubs: list, products: list, supplier_ratio: float, sleep_ms: int
|
||||
) -> list:
|
||||
"""Create offers directly in DB (no workflow, no graph sync)"""
|
||||
created = []
|
||||
suppliers = list(SupplierProfile.objects.all())
|
||||
@@ -696,6 +816,8 @@ class Command(BaseCommand):
|
||||
description=f"{product_name} available from {hub['name']} in {hub['country']}",
|
||||
)
|
||||
created.append(offer)
|
||||
if sleep_ms:
|
||||
time.sleep(sleep_ms / 1000.0)
|
||||
|
||||
return created
|
||||
|
||||
|
||||
Reference in New Issue
Block a user