Seed from HS CSV and require real data
All checks were successful
Build Docker Image / build (push) Successful in 2m58s

This commit is contained in:
Ruslan Bakiev
2026-02-05 18:42:55 +07:00
parent e8f11116a2
commit 569924cabb

View File

@@ -11,7 +11,7 @@ from decimal import Decimal
import time import time
import requests import requests
from django.core.management.base import BaseCommand from django.core.management.base import BaseCommand, CommandError
from django.db import transaction from django.db import transaction
from offers.models import Offer from offers.models import Offer
@@ -71,19 +71,8 @@ SUPPLIER_NAMES = [
# Default GLEIF Africa LEI dataset path (repo-local) # Default GLEIF Africa LEI dataset path (repo-local)
DEFAULT_GLEIF_PATH = "datasets/gleif/africa_lei_companies.csv" DEFAULT_GLEIF_PATH = "datasets/gleif/africa_lei_companies.csv"
# Fixed product catalog (10 items) with realistic prices per ton (USD) # Default HS product mapping CSV (repo-local)
PRODUCT_CATALOG = [ DEFAULT_HS_PRODUCTS_PATH = "datasets/hs/exchange_seed_product_hs_map.csv"
{"name": "Cocoa Beans", "category": "Cocoa", "price": Decimal("2450.00")},
{"name": "Shea Butter", "category": "Oils & Fats", "price": Decimal("1800.00")},
{"name": "Cashew Nuts", "category": "Nuts", "price": Decimal("5200.00")},
{"name": "Palm Oil", "category": "Oils & Fats", "price": Decimal("980.00")},
{"name": "Coffee Beans", "category": "Coffee", "price": Decimal("3800.00")},
{"name": "Sesame Seeds", "category": "Seeds", "price": Decimal("2100.00")},
{"name": "Cotton", "category": "Fiber", "price": Decimal("1650.00")},
{"name": "Maize", "category": "Grains", "price": Decimal("260.00")},
{"name": "Sorghum", "category": "Grains", "price": Decimal("230.00")},
{"name": "Natural Rubber", "category": "Industrial", "price": Decimal("1750.00")},
]
class Command(BaseCommand): class Command(BaseCommand):
@@ -108,6 +97,12 @@ class Command(BaseCommand):
default=10, default=10,
help="How many distinct products to use (default: 10)", help="How many distinct products to use (default: 10)",
) )
parser.add_argument(
"--product-csv",
type=str,
default="",
help="Path to HS product CSV (defaults to datasets/hs/exchange_seed_product_hs_map.csv)",
)
parser.add_argument( parser.add_argument(
"--supplier-location-ratio", "--supplier-location-ratio",
type=float, type=float,
@@ -217,25 +212,13 @@ class Command(BaseCommand):
self.stdout.write(self.style.ERROR("Geo URL is not set. Provide --geo-url or GEO_INTERNAL_URL.")) self.stdout.write(self.style.ERROR("Geo URL is not set. Provide --geo-url or GEO_INTERNAL_URL."))
return return
geo_url = self._normalize_geo_url(geo_url) geo_url = self._normalize_geo_url(geo_url)
odoo_url = options["odoo_url"]
product_filter = options["product"] product_filter = options["product"]
ensure_products = options["ensure_products"] product_csv = options["product_csv"]
odoo_db = options["odoo_db"]
odoo_user = options["odoo_user"]
odoo_password = options["odoo_password"]
company_csv = options["company_csv"] company_csv = options["company_csv"]
# Fetch products from Odoo # Load products from HS CSV
self.stdout.write("Fetching products from Odoo...") self.stdout.write("Loading products from HS CSV...")
products = self._fetch_products_from_odoo(odoo_url, odoo_db, odoo_user, odoo_password) products = self._load_product_pool(product_csv)
if ensure_products:
self.stdout.write("Ensuring product catalog exists in Odoo...")
products = self._ensure_products_in_odoo(
odoo_url, odoo_db, odoo_user, odoo_password, products
)
if not products:
self.stdout.write(self.style.WARNING("No products found in Odoo. Falling back to catalog only."))
products = self._catalog_products()
self.stdout.write(f"Found {len(products)} products") self.stdout.write(f"Found {len(products)} products")
# Filter by product name if specified # Filter by product name if specified
@@ -286,8 +269,50 @@ class Command(BaseCommand):
) )
self.stdout.write(self.style.SUCCESS(f"Created {len(created_offers)} offers")) self.stdout.write(self.style.SUCCESS(f"Created {len(created_offers)} offers"))
def _catalog_products(self) -> list: def _find_default_product_csv(self) -> str | None:
return [(p["name"], p["category"], str(uuid.uuid4()), p["price"]) for p in PRODUCT_CATALOG] """Locate default HS product CSV in repo (datasets/hs/exchange_seed_product_hs_map.csv)."""
here = Path(__file__).resolve()
for parent in here.parents:
candidate = parent / DEFAULT_HS_PRODUCTS_PATH
if candidate.exists():
return str(candidate)
return None
def _load_product_pool(self, csv_path: str | None) -> list[tuple]:
"""Load real product names from HS CSV; returns list of tuples."""
path = csv_path or self._find_default_product_csv()
if not path or not os.path.exists(path):
raise CommandError(
"HS product CSV not found. Seed requires real product data; "
"ensure datasets/hs/exchange_seed_product_hs_map.csv is available."
)
products: list[tuple] = []
seen = set()
try:
with open(path, newline="", encoding="utf-8") as f:
reader = csv.DictReader(f)
for row in reader:
name = (row.get("product") or "").strip()
hs6 = (row.get("hs6") or "").strip()
label = (row.get("label") or "").strip()
if not name or not hs6:
continue
key = (name, hs6)
if key in seen:
continue
seen.add(key)
product_uuid = self._stable_uuid("hs6", hs6)
category = label or name
products.append((name, category, product_uuid, Decimal("1000.00")))
except Exception as e:
raise CommandError(f"Failed to read HS product CSV: {e}")
if not products:
raise CommandError("HS product CSV is empty. Seed requires real product data.")
random.shuffle(products)
return products
def _fetch_products_from_odoo(self, odoo_url: str, odoo_db: str, odoo_user: int, odoo_password: str) -> list: def _fetch_products_from_odoo(self, odoo_url: str, odoo_db: str, odoo_user: int, odoo_password: str) -> list:
"""Fetch products from Odoo via JSON-RPC""" """Fetch products from Odoo via JSON-RPC"""
@@ -630,10 +655,7 @@ class Command(BaseCommand):
return created return created
def _generate_supplier_name(self, index: int) -> str: def _generate_supplier_name(self, index: int) -> str:
"""Pick a realistic supplier name; fall back if list is exhausted.""" raise CommandError("Supplier name fallback is disabled. Provide real company CSV.")
if index < len(SUPPLIER_NAMES):
return SUPPLIER_NAMES[index]
return f"{random.choice(SUPPLIER_NAMES)} Group"
def _find_default_company_csv(self) -> str | None: def _find_default_company_csv(self) -> str | None:
"""Locate default company CSV in repo (datasets/gleif/africa_lei_companies.csv).""" """Locate default company CSV in repo (datasets/gleif/africa_lei_companies.csv)."""
@@ -648,8 +670,10 @@ class Command(BaseCommand):
"""Load real company names from CSV; returns list of dicts.""" """Load real company names from CSV; returns list of dicts."""
path = csv_path or self._find_default_company_csv() path = csv_path or self._find_default_company_csv()
if not path or not os.path.exists(path): if not path or not os.path.exists(path):
self.stdout.write(self.style.WARNING("Company CSV not found; using fallback names.")) raise CommandError(
return [] "Company CSV not found. Seed requires real company names; "
"ensure datasets/gleif/africa_lei_companies.csv is available."
)
companies = [] companies = []
seen = set() seen = set()
@@ -672,8 +696,7 @@ class Command(BaseCommand):
} }
) )
except Exception as e: except Exception as e:
self.stdout.write(self.style.WARNING(f"Failed to read company CSV: {e}")) raise CommandError(f"Failed to read company CSV: {e}")
return []
random.shuffle(companies) random.shuffle(companies)
self.stdout.write(f"Loaded {len(companies)} company names from CSV") self.stdout.write(f"Loaded {len(companies)} company names from CSV")
@@ -681,7 +704,7 @@ class Command(BaseCommand):
def _pick_company(self, index: int) -> dict | None: def _pick_company(self, index: int) -> dict | None:
if not getattr(self, "_company_pool", None): if not getattr(self, "_company_pool", None):
return None raise CommandError("Company pool is empty. Seed requires real company CSV.")
if index < len(self._company_pool): if index < len(self._company_pool):
return self._company_pool[index] return self._company_pool[index]
return random.choice(self._company_pool) return random.choice(self._company_pool)