Seed from HS CSV and require real data
All checks were successful
Build Docker Image / build (push) Successful in 2m58s

This commit is contained in:
Ruslan Bakiev
2026-02-05 18:42:55 +07:00
parent e8f11116a2
commit 569924cabb

View File

@@ -11,7 +11,7 @@ from decimal import Decimal
import time
import requests
from django.core.management.base import BaseCommand
from django.core.management.base import BaseCommand, CommandError
from django.db import transaction
from offers.models import Offer
@@ -71,19 +71,8 @@ SUPPLIER_NAMES = [
# Default GLEIF Africa LEI dataset path (repo-local)
DEFAULT_GLEIF_PATH = "datasets/gleif/africa_lei_companies.csv"
# Fixed product catalog (10 items) with realistic prices per ton (USD)
PRODUCT_CATALOG = [
{"name": "Cocoa Beans", "category": "Cocoa", "price": Decimal("2450.00")},
{"name": "Shea Butter", "category": "Oils & Fats", "price": Decimal("1800.00")},
{"name": "Cashew Nuts", "category": "Nuts", "price": Decimal("5200.00")},
{"name": "Palm Oil", "category": "Oils & Fats", "price": Decimal("980.00")},
{"name": "Coffee Beans", "category": "Coffee", "price": Decimal("3800.00")},
{"name": "Sesame Seeds", "category": "Seeds", "price": Decimal("2100.00")},
{"name": "Cotton", "category": "Fiber", "price": Decimal("1650.00")},
{"name": "Maize", "category": "Grains", "price": Decimal("260.00")},
{"name": "Sorghum", "category": "Grains", "price": Decimal("230.00")},
{"name": "Natural Rubber", "category": "Industrial", "price": Decimal("1750.00")},
]
# Default HS product mapping CSV (repo-local)
DEFAULT_HS_PRODUCTS_PATH = "datasets/hs/exchange_seed_product_hs_map.csv"
class Command(BaseCommand):
@@ -108,6 +97,12 @@ class Command(BaseCommand):
default=10,
help="How many distinct products to use (default: 10)",
)
parser.add_argument(
"--product-csv",
type=str,
default="",
help="Path to HS product CSV (defaults to datasets/hs/exchange_seed_product_hs_map.csv)",
)
parser.add_argument(
"--supplier-location-ratio",
type=float,
@@ -217,25 +212,13 @@ class Command(BaseCommand):
self.stdout.write(self.style.ERROR("Geo URL is not set. Provide --geo-url or GEO_INTERNAL_URL."))
return
geo_url = self._normalize_geo_url(geo_url)
odoo_url = options["odoo_url"]
product_filter = options["product"]
ensure_products = options["ensure_products"]
odoo_db = options["odoo_db"]
odoo_user = options["odoo_user"]
odoo_password = options["odoo_password"]
product_csv = options["product_csv"]
company_csv = options["company_csv"]
# Fetch products from Odoo
self.stdout.write("Fetching products from Odoo...")
products = self._fetch_products_from_odoo(odoo_url, odoo_db, odoo_user, odoo_password)
if ensure_products:
self.stdout.write("Ensuring product catalog exists in Odoo...")
products = self._ensure_products_in_odoo(
odoo_url, odoo_db, odoo_user, odoo_password, products
)
if not products:
self.stdout.write(self.style.WARNING("No products found in Odoo. Falling back to catalog only."))
products = self._catalog_products()
# Load products from HS CSV
self.stdout.write("Loading products from HS CSV...")
products = self._load_product_pool(product_csv)
self.stdout.write(f"Found {len(products)} products")
# Filter by product name if specified
@@ -286,8 +269,50 @@ class Command(BaseCommand):
)
self.stdout.write(self.style.SUCCESS(f"Created {len(created_offers)} offers"))
def _catalog_products(self) -> list:
return [(p["name"], p["category"], str(uuid.uuid4()), p["price"]) for p in PRODUCT_CATALOG]
def _find_default_product_csv(self) -> str | None:
"""Locate default HS product CSV in repo (datasets/hs/exchange_seed_product_hs_map.csv)."""
here = Path(__file__).resolve()
for parent in here.parents:
candidate = parent / DEFAULT_HS_PRODUCTS_PATH
if candidate.exists():
return str(candidate)
return None
def _load_product_pool(self, csv_path: str | None) -> list[tuple]:
"""Load real product names from HS CSV; returns list of tuples."""
path = csv_path or self._find_default_product_csv()
if not path or not os.path.exists(path):
raise CommandError(
"HS product CSV not found. Seed requires real product data; "
"ensure datasets/hs/exchange_seed_product_hs_map.csv is available."
)
products: list[tuple] = []
seen = set()
try:
with open(path, newline="", encoding="utf-8") as f:
reader = csv.DictReader(f)
for row in reader:
name = (row.get("product") or "").strip()
hs6 = (row.get("hs6") or "").strip()
label = (row.get("label") or "").strip()
if not name or not hs6:
continue
key = (name, hs6)
if key in seen:
continue
seen.add(key)
product_uuid = self._stable_uuid("hs6", hs6)
category = label or name
products.append((name, category, product_uuid, Decimal("1000.00")))
except Exception as e:
raise CommandError(f"Failed to read HS product CSV: {e}")
if not products:
raise CommandError("HS product CSV is empty. Seed requires real product data.")
random.shuffle(products)
return products
def _fetch_products_from_odoo(self, odoo_url: str, odoo_db: str, odoo_user: int, odoo_password: str) -> list:
"""Fetch products from Odoo via JSON-RPC"""
@@ -630,10 +655,7 @@ class Command(BaseCommand):
return created
def _generate_supplier_name(self, index: int) -> str:
"""Pick a realistic supplier name; fall back if list is exhausted."""
if index < len(SUPPLIER_NAMES):
return SUPPLIER_NAMES[index]
return f"{random.choice(SUPPLIER_NAMES)} Group"
raise CommandError("Supplier name fallback is disabled. Provide real company CSV.")
def _find_default_company_csv(self) -> str | None:
"""Locate default company CSV in repo (datasets/gleif/africa_lei_companies.csv)."""
@@ -648,8 +670,10 @@ class Command(BaseCommand):
"""Load real company names from CSV; returns list of dicts."""
path = csv_path or self._find_default_company_csv()
if not path or not os.path.exists(path):
self.stdout.write(self.style.WARNING("Company CSV not found; using fallback names."))
return []
raise CommandError(
"Company CSV not found. Seed requires real company names; "
"ensure datasets/gleif/africa_lei_companies.csv is available."
)
companies = []
seen = set()
@@ -672,8 +696,7 @@ class Command(BaseCommand):
}
)
except Exception as e:
self.stdout.write(self.style.WARNING(f"Failed to read company CSV: {e}"))
return []
raise CommandError(f"Failed to read company CSV: {e}")
random.shuffle(companies)
self.stdout.write(f"Loaded {len(companies)} company names from CSV")
@@ -681,7 +704,7 @@ class Command(BaseCommand):
def _pick_company(self, index: int) -> dict | None:
if not getattr(self, "_company_pool", None):
return None
raise CommandError("Company pool is empty. Seed requires real company CSV.")
if index < len(self._company_pool):
return self._company_pool[index]
return random.choice(self._company_pool)