From 569924cabbc203de2c79be1e3d4269cfc98ebbd9 Mon Sep 17 00:00:00 2001 From: Ruslan Bakiev <572431+veikab@users.noreply.github.com> Date: Thu, 5 Feb 2026 18:42:55 +0700 Subject: [PATCH] Seed from HS CSV and require real data --- offers/management/commands/seed_exchange.py | 105 ++++++++++++-------- 1 file changed, 64 insertions(+), 41 deletions(-) diff --git a/offers/management/commands/seed_exchange.py b/offers/management/commands/seed_exchange.py index 5f1817f..162ac9f 100644 --- a/offers/management/commands/seed_exchange.py +++ b/offers/management/commands/seed_exchange.py @@ -11,7 +11,7 @@ from decimal import Decimal import time import requests -from django.core.management.base import BaseCommand +from django.core.management.base import BaseCommand, CommandError from django.db import transaction from offers.models import Offer @@ -71,19 +71,8 @@ SUPPLIER_NAMES = [ # Default GLEIF Africa LEI dataset path (repo-local) DEFAULT_GLEIF_PATH = "datasets/gleif/africa_lei_companies.csv" -# Fixed product catalog (10 items) with realistic prices per ton (USD) -PRODUCT_CATALOG = [ - {"name": "Cocoa Beans", "category": "Cocoa", "price": Decimal("2450.00")}, - {"name": "Shea Butter", "category": "Oils & Fats", "price": Decimal("1800.00")}, - {"name": "Cashew Nuts", "category": "Nuts", "price": Decimal("5200.00")}, - {"name": "Palm Oil", "category": "Oils & Fats", "price": Decimal("980.00")}, - {"name": "Coffee Beans", "category": "Coffee", "price": Decimal("3800.00")}, - {"name": "Sesame Seeds", "category": "Seeds", "price": Decimal("2100.00")}, - {"name": "Cotton", "category": "Fiber", "price": Decimal("1650.00")}, - {"name": "Maize", "category": "Grains", "price": Decimal("260.00")}, - {"name": "Sorghum", "category": "Grains", "price": Decimal("230.00")}, - {"name": "Natural Rubber", "category": "Industrial", "price": Decimal("1750.00")}, -] +# Default HS product mapping CSV (repo-local) +DEFAULT_HS_PRODUCTS_PATH = "datasets/hs/exchange_seed_product_hs_map.csv" class Command(BaseCommand): @@ -108,6 +97,12 @@ class Command(BaseCommand): default=10, help="How many distinct products to use (default: 10)", ) + parser.add_argument( + "--product-csv", + type=str, + default="", + help="Path to HS product CSV (defaults to datasets/hs/exchange_seed_product_hs_map.csv)", + ) parser.add_argument( "--supplier-location-ratio", type=float, @@ -217,25 +212,13 @@ class Command(BaseCommand): self.stdout.write(self.style.ERROR("Geo URL is not set. Provide --geo-url or GEO_INTERNAL_URL.")) return geo_url = self._normalize_geo_url(geo_url) - odoo_url = options["odoo_url"] product_filter = options["product"] - ensure_products = options["ensure_products"] - odoo_db = options["odoo_db"] - odoo_user = options["odoo_user"] - odoo_password = options["odoo_password"] + product_csv = options["product_csv"] company_csv = options["company_csv"] - # Fetch products from Odoo - self.stdout.write("Fetching products from Odoo...") - products = self._fetch_products_from_odoo(odoo_url, odoo_db, odoo_user, odoo_password) - if ensure_products: - self.stdout.write("Ensuring product catalog exists in Odoo...") - products = self._ensure_products_in_odoo( - odoo_url, odoo_db, odoo_user, odoo_password, products - ) - if not products: - self.stdout.write(self.style.WARNING("No products found in Odoo. Falling back to catalog only.")) - products = self._catalog_products() + # Load products from HS CSV + self.stdout.write("Loading products from HS CSV...") + products = self._load_product_pool(product_csv) self.stdout.write(f"Found {len(products)} products") # Filter by product name if specified @@ -286,8 +269,50 @@ class Command(BaseCommand): ) self.stdout.write(self.style.SUCCESS(f"Created {len(created_offers)} offers")) - def _catalog_products(self) -> list: - return [(p["name"], p["category"], str(uuid.uuid4()), p["price"]) for p in PRODUCT_CATALOG] + def _find_default_product_csv(self) -> str | None: + """Locate default HS product CSV in repo (datasets/hs/exchange_seed_product_hs_map.csv).""" + here = Path(__file__).resolve() + for parent in here.parents: + candidate = parent / DEFAULT_HS_PRODUCTS_PATH + if candidate.exists(): + return str(candidate) + return None + + def _load_product_pool(self, csv_path: str | None) -> list[tuple]: + """Load real product names from HS CSV; returns list of tuples.""" + path = csv_path or self._find_default_product_csv() + if not path or not os.path.exists(path): + raise CommandError( + "HS product CSV not found. Seed requires real product data; " + "ensure datasets/hs/exchange_seed_product_hs_map.csv is available." + ) + + products: list[tuple] = [] + seen = set() + try: + with open(path, newline="", encoding="utf-8") as f: + reader = csv.DictReader(f) + for row in reader: + name = (row.get("product") or "").strip() + hs6 = (row.get("hs6") or "").strip() + label = (row.get("label") or "").strip() + if not name or not hs6: + continue + key = (name, hs6) + if key in seen: + continue + seen.add(key) + product_uuid = self._stable_uuid("hs6", hs6) + category = label or name + products.append((name, category, product_uuid, Decimal("1000.00"))) + except Exception as e: + raise CommandError(f"Failed to read HS product CSV: {e}") + + if not products: + raise CommandError("HS product CSV is empty. Seed requires real product data.") + + random.shuffle(products) + return products def _fetch_products_from_odoo(self, odoo_url: str, odoo_db: str, odoo_user: int, odoo_password: str) -> list: """Fetch products from Odoo via JSON-RPC""" @@ -630,10 +655,7 @@ class Command(BaseCommand): return created def _generate_supplier_name(self, index: int) -> str: - """Pick a realistic supplier name; fall back if list is exhausted.""" - if index < len(SUPPLIER_NAMES): - return SUPPLIER_NAMES[index] - return f"{random.choice(SUPPLIER_NAMES)} Group" + raise CommandError("Supplier name fallback is disabled. Provide real company CSV.") def _find_default_company_csv(self) -> str | None: """Locate default company CSV in repo (datasets/gleif/africa_lei_companies.csv).""" @@ -648,8 +670,10 @@ class Command(BaseCommand): """Load real company names from CSV; returns list of dicts.""" path = csv_path or self._find_default_company_csv() if not path or not os.path.exists(path): - self.stdout.write(self.style.WARNING("Company CSV not found; using fallback names.")) - return [] + raise CommandError( + "Company CSV not found. Seed requires real company names; " + "ensure datasets/gleif/africa_lei_companies.csv is available." + ) companies = [] seen = set() @@ -672,8 +696,7 @@ class Command(BaseCommand): } ) except Exception as e: - self.stdout.write(self.style.WARNING(f"Failed to read company CSV: {e}")) - return [] + raise CommandError(f"Failed to read company CSV: {e}") random.shuffle(companies) self.stdout.write(f"Loaded {len(companies)} company names from CSV") @@ -681,7 +704,7 @@ class Command(BaseCommand): def _pick_company(self, index: int) -> dict | None: if not getattr(self, "_company_pool", None): - return None + raise CommandError("Company pool is empty. Seed requires real company CSV.") if index < len(self._company_pool): return self._company_pool[index] return random.choice(self._company_pool)