Add dual CSI camera web detection stream for Jetson
GStreamer hardware ISP capture, YOLOv8n CUDA inference, JPEG snapshot-based web UI for both cameras simultaneously. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
325
jetson/web_detection_stream.py
Normal file
325
jetson/web_detection_stream.py
Normal file
@@ -0,0 +1,325 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Web-based real-time ball detection stream for Jetson Orin Nano.
|
||||
Dual CSI cameras via GStreamer nvarguscamerasrc → hardware ISP → YOLO → Web.
|
||||
|
||||
Single detection loop alternates between cameras to avoid GIL issues.
|
||||
|
||||
Usage:
|
||||
python3 web_detection_stream.py [--port 8080]
|
||||
"""
|
||||
|
||||
import cv2
|
||||
import time
|
||||
import argparse
|
||||
import threading
|
||||
import subprocess
|
||||
import numpy as np
|
||||
from flask import Flask, Response, render_template_string, jsonify
|
||||
from ultralytics import YOLO
|
||||
|
||||
BALL_CLASS_ID = 32 # sports ball in COCO
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
# Per-camera shared state
|
||||
cameras = {}
|
||||
|
||||
|
||||
HTML_PAGE = """
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Pickle Vision - Live Detection</title>
|
||||
<style>
|
||||
* { margin: 0; padding: 0; box-sizing: border-box; }
|
||||
body {
|
||||
background: #1a1a2e;
|
||||
color: #eee;
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
align-items: center;
|
||||
min-height: 100vh;
|
||||
}
|
||||
header { padding: 20px; text-align: center; }
|
||||
h1 { font-size: 24px; color: #4ecca3; }
|
||||
.subtitle { color: #888; font-size: 14px; margin-top: 4px; }
|
||||
.cameras {
|
||||
display: flex;
|
||||
gap: 16px;
|
||||
max-width: 1920px;
|
||||
width: 95%;
|
||||
flex-wrap: wrap;
|
||||
justify-content: center;
|
||||
}
|
||||
.cam-box { flex: 1; min-width: 400px; max-width: 960px; }
|
||||
.cam-box img {
|
||||
width: 100%;
|
||||
border-radius: 8px;
|
||||
border: 2px solid #333;
|
||||
}
|
||||
.cam-label {
|
||||
text-align: center;
|
||||
padding: 8px;
|
||||
color: #4ecca3;
|
||||
font-weight: bold;
|
||||
}
|
||||
.stats {
|
||||
display: flex;
|
||||
gap: 20px;
|
||||
margin-top: 16px;
|
||||
padding: 12px 20px;
|
||||
background: #16213e;
|
||||
border-radius: 8px;
|
||||
font-size: 14px;
|
||||
}
|
||||
.stat-label { color: #888; }
|
||||
.stat-value { color: #4ecca3; font-weight: bold; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<header>
|
||||
<h1>🏓 Pickle Vision</h1>
|
||||
<div class="subtitle">Dual camera real-time ball detection — Jetson Orin Nano Super</div>
|
||||
</header>
|
||||
<div class="cameras">
|
||||
<div class="cam-box">
|
||||
<div class="cam-label">CAM 0</div>
|
||||
<img id="cam0" alt="Camera 0" />
|
||||
</div>
|
||||
<div class="cam-box">
|
||||
<div class="cam-label">CAM 1</div>
|
||||
<img id="cam1" alt="Camera 1" />
|
||||
</div>
|
||||
</div>
|
||||
<div class="stats">
|
||||
<div><span class="stat-label">FPS: </span><span class="stat-value" id="fps">--</span></div>
|
||||
<div><span class="stat-label">Model: </span><span class="stat-value">YOLOv8n CUDA</span></div>
|
||||
<div><span class="stat-label">Cameras: </span><span class="stat-value">2x CSI IMX219</span></div>
|
||||
</div>
|
||||
<script>
|
||||
function refreshCam(id) {
|
||||
var img = document.getElementById('cam' + id);
|
||||
var newImg = new Image();
|
||||
newImg.onload = function() {
|
||||
img.src = newImg.src;
|
||||
setTimeout(function() { refreshCam(id); }, 30);
|
||||
};
|
||||
newImg.onerror = function() {
|
||||
setTimeout(function() { refreshCam(id); }, 500);
|
||||
};
|
||||
newImg.src = '/frame/' + id + '?' + Date.now();
|
||||
}
|
||||
refreshCam(0);
|
||||
refreshCam(1);
|
||||
|
||||
setInterval(function() {
|
||||
fetch('/api/stats').then(r => r.json()).then(d => {
|
||||
var fps0 = d['0'] ? d['0'].fps.toFixed(1) : '--';
|
||||
var fps1 = d['1'] ? d['1'].fps.toFixed(1) : '--';
|
||||
document.getElementById('fps').textContent =
|
||||
'CAM0: ' + fps0 + ' | CAM1: ' + fps1;
|
||||
});
|
||||
}, 2000);
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
|
||||
class CameraReader:
|
||||
"""Non-blocking camera frame reader using a background thread."""
|
||||
|
||||
def __init__(self, sensor_id, width, height, fps):
|
||||
self.sensor_id = sensor_id
|
||||
self.width = width
|
||||
self.height = height
|
||||
self.frame = None
|
||||
self.lock = threading.Lock()
|
||||
self.running = True
|
||||
|
||||
gst_pipeline = (
|
||||
f"gst-launch-1.0 --quiet -e "
|
||||
f"nvarguscamerasrc sensor-id={sensor_id} ! "
|
||||
f"'video/x-raw(memory:NVMM),width={width},height={height},framerate={fps}/1' ! "
|
||||
f"nvvidconv ! 'video/x-raw,format=BGRx,width={width},height={height}' ! "
|
||||
f"fdsink"
|
||||
)
|
||||
print(f"[CAM {sensor_id}] Starting GStreamer: {width}x{height}@{fps}")
|
||||
self.proc = subprocess.Popen(
|
||||
gst_pipeline, shell=True,
|
||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
|
||||
)
|
||||
self.frame_bytes = width * height * 4
|
||||
|
||||
time.sleep(2)
|
||||
if self.proc.poll() is not None:
|
||||
stderr = self.proc.stderr.read().decode()
|
||||
print(f"[CAM {sensor_id}] GStreamer failed: {stderr[:200]}")
|
||||
print(f"[CAM {sensor_id}] Falling back to V4L2")
|
||||
self.use_gst = False
|
||||
self.cap = cv2.VideoCapture(f"/dev/video{sensor_id}", cv2.CAP_V4L2)
|
||||
else:
|
||||
self.use_gst = True
|
||||
self.cap = None
|
||||
|
||||
# Start reader thread
|
||||
self.thread = threading.Thread(target=self._read_loop, daemon=True)
|
||||
self.thread.start()
|
||||
|
||||
def _read_loop(self):
|
||||
"""Continuously read frames into self.frame."""
|
||||
while self.running:
|
||||
if self.use_gst:
|
||||
raw = self.proc.stdout.read(self.frame_bytes)
|
||||
if len(raw) != self.frame_bytes:
|
||||
print(f"[CAM {self.sensor_id}] Pipe broken")
|
||||
break
|
||||
f = np.frombuffer(raw, dtype=np.uint8).reshape(
|
||||
self.height, self.width, 4)[:, :, :3].copy()
|
||||
else:
|
||||
ret, f = self.cap.read()
|
||||
if not ret:
|
||||
time.sleep(0.01)
|
||||
continue
|
||||
if f.shape[1] != self.width or f.shape[0] != self.height:
|
||||
f = cv2.resize(f, (self.width, self.height))
|
||||
|
||||
with self.lock:
|
||||
self.frame = f
|
||||
|
||||
def grab(self):
|
||||
"""Get latest frame (non-blocking)."""
|
||||
with self.lock:
|
||||
return self.frame.copy() if self.frame is not None else None
|
||||
|
||||
|
||||
def detection_loop(cam_readers, model, conf_threshold):
|
||||
"""Single loop: alternate cameras, run YOLO, update JPEGs."""
|
||||
frame_counts = {sid: 0 for sid in cam_readers}
|
||||
start_times = {sid: time.time() for sid in cam_readers}
|
||||
|
||||
while True:
|
||||
for sensor_id, reader in cam_readers.items():
|
||||
cam = cameras[sensor_id]
|
||||
|
||||
frame = reader.grab()
|
||||
if frame is None:
|
||||
continue
|
||||
|
||||
results = model(frame, verbose=False, classes=[BALL_CLASS_ID], conf=conf_threshold)
|
||||
|
||||
det_count = 0
|
||||
for r in results:
|
||||
for box in r.boxes:
|
||||
x1, y1, x2, y2 = map(int, box.xyxy[0])
|
||||
conf = float(box.conf[0])
|
||||
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 3)
|
||||
label = f"Ball {conf:.0%}"
|
||||
(tw, th), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2)
|
||||
cv2.rectangle(frame, (x1, y1 - th - 10), (x1 + tw, y1), (0, 255, 0), -1)
|
||||
cv2.putText(frame, label, (x1, y1 - 5),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2)
|
||||
det_count += 1
|
||||
|
||||
frame_counts[sensor_id] += 1
|
||||
elapsed = time.time() - start_times[sensor_id]
|
||||
fps_actual = frame_counts[sensor_id] / elapsed if elapsed > 0 else 0
|
||||
|
||||
cv2.putText(frame, f"CAM {sensor_id} | FPS: {fps_actual:.1f}", (10, 30),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)
|
||||
if det_count > 0:
|
||||
cv2.putText(frame, f"Balls: {det_count}", (10, 60),
|
||||
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
|
||||
|
||||
_, jpeg = cv2.imencode('.jpg', frame, [cv2.IMWRITE_JPEG_QUALITY, 80])
|
||||
with cam["lock"]:
|
||||
cam["frame"] = jpeg.tobytes()
|
||||
cam["fps"] = fps_actual
|
||||
cam["detections"] = det_count
|
||||
|
||||
if frame_counts[sensor_id] % 150 == 0:
|
||||
print(f"[CAM {sensor_id}] Frame {frame_counts[sensor_id]}, "
|
||||
f"FPS: {fps_actual:.1f}, Det: {det_count}")
|
||||
|
||||
|
||||
@app.route('/')
|
||||
def index():
|
||||
return render_template_string(HTML_PAGE)
|
||||
|
||||
|
||||
@app.route('/frame/<int:sensor_id>')
|
||||
def frame(sensor_id):
|
||||
if sensor_id not in cameras:
|
||||
return "Camera not found", 404
|
||||
cam = cameras[sensor_id]
|
||||
with cam["lock"]:
|
||||
jpg = cam["frame"]
|
||||
if jpg is None:
|
||||
return "No frame yet", 503
|
||||
return Response(jpg, mimetype='image/jpeg',
|
||||
headers={'Cache-Control': 'no-cache, no-store'})
|
||||
|
||||
|
||||
@app.route('/api/stats')
|
||||
def api_stats():
|
||||
return {
|
||||
str(k): {"fps": v["fps"], "detections": v["detections"]}
|
||||
for k, v in cameras.items()
|
||||
}
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description='Pickle Vision Dual Camera Detection')
|
||||
parser.add_argument('--width', type=int, default=1280)
|
||||
parser.add_argument('--height', type=int, default=720)
|
||||
parser.add_argument('--fps', type=int, default=30)
|
||||
parser.add_argument('--model', type=str, default='yolov8n.pt')
|
||||
parser.add_argument('--conf', type=float, default=0.25)
|
||||
parser.add_argument('--port', type=int, default=8080)
|
||||
args = parser.parse_args()
|
||||
|
||||
print(f"Loading YOLO model: {args.model}")
|
||||
model = YOLO(args.model)
|
||||
try:
|
||||
model.to("cuda")
|
||||
print("Inference on CUDA")
|
||||
except Exception:
|
||||
print("CUDA unavailable, using CPU")
|
||||
|
||||
# Start camera readers
|
||||
cam_readers = {}
|
||||
for sensor_id in [0, 1]:
|
||||
cameras[sensor_id] = {
|
||||
"frame": None, "lock": threading.Lock(), "fps": 0, "detections": 0
|
||||
}
|
||||
cam_readers[sensor_id] = CameraReader(sensor_id, args.width, args.height, args.fps)
|
||||
|
||||
# Wait for at least one camera
|
||||
print("Waiting for cameras...")
|
||||
for _ in range(100):
|
||||
if any(r.grab() is not None for r in cam_readers.values()):
|
||||
break
|
||||
time.sleep(0.1)
|
||||
|
||||
# Start detection loop in background
|
||||
det_thread = threading.Thread(
|
||||
target=detection_loop,
|
||||
args=(cam_readers, model, args.conf),
|
||||
daemon=True
|
||||
)
|
||||
det_thread.start()
|
||||
|
||||
# Wait for first detected frames
|
||||
time.sleep(2)
|
||||
|
||||
print(f"\n{'=' * 50}")
|
||||
print(f" Open in browser: http://192.168.1.253:{args.port}")
|
||||
print(f"{'=' * 50}\n")
|
||||
|
||||
app.run(host='0.0.0.0', port=args.port, threaded=True)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user