Initial commit

This commit is contained in:
Ruslan Bakiev
2026-03-06 09:43:52 +07:00
commit 549fd1da9d
250 changed files with 9114 additions and 0 deletions

View File

@@ -0,0 +1,202 @@
#!/usr/bin/env python3
"""
Pickleball detection using YOLOv8 on Jetson.
Works with video file or camera input.
Outputs RTSP stream with bounding boxes around detected balls.
"""
import cv2
import time
import argparse
import gi
gi.require_version('Gst', '1.0')
gi.require_version('GstRtspServer', '1.0')
from gi.repository import Gst, GstRtspServer, GLib
from ultralytics import YOLO
import threading
# COCO class 32 = sports ball
BALL_CLASS_ID = 32
# Stream settings
STREAM_WIDTH = 1280
STREAM_HEIGHT = 720
FPS = 30
class RTSPServer:
"""Simple RTSP server using GStreamer."""
def __init__(self, port=8554):
Gst.init(None)
self.server = GstRtspServer.RTSPServer()
self.server.set_service(str(port))
self.factory = GstRtspServer.RTSPMediaFactory()
self.factory.set_launch(
'( appsrc name=source is-live=true block=true format=GST_FORMAT_TIME '
'caps=video/x-raw,format=BGR,width=1280,height=720,framerate=30/1 ! '
'videoconvert ! x264enc tune=zerolatency bitrate=2000 speed-preset=ultrafast ! '
'rtph264pay name=pay0 pt=96 )'
)
self.factory.set_shared(True)
self.server.get_mount_points().add_factory("/live", self.factory)
self.server.attach(None)
print(f"RTSP server started at rtsp://pickle:{port}/live")
def detect_ball(frame, model):
"""Run YOLO detection on frame."""
results = model(frame, verbose=False, classes=[BALL_CLASS_ID], conf=0.3)
detections = []
for result in results:
for box in result.boxes:
x1, y1, x2, y2 = map(int, box.xyxy[0])
conf = float(box.conf[0])
detections.append((x1, y1, x2, y2, conf))
return detections
def draw_detections(frame, detections):
"""Draw bounding boxes on frame."""
for x1, y1, x2, y2, conf in detections:
# Green box for ball
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
label = f"Ball {conf:.2f}"
cv2.putText(frame, label, (x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
return frame
def main():
parser = argparse.ArgumentParser(description='Pickleball Detection Stream')
parser.add_argument('--source', type=str, default='0',
help='Video source: 0 for camera, or path to video file')
parser.add_argument('--rtsp-port', type=int, default=8554,
help='RTSP server port')
parser.add_argument('--model', type=str, default='yolov8n.pt',
help='YOLO model to use')
parser.add_argument('--display', action='store_true',
help='Show local display window')
parser.add_argument('--save', type=str, default=None,
help='Save output to video file')
args = parser.parse_args()
print(f"Loading YOLO model: {args.model}")
model = YOLO(args.model)
# Try to use CUDA
try:
model.to("cuda")
print("Using CUDA for inference")
except:
print("CUDA not available, using CPU")
# Open video source
print(f"Opening video source: {args.source}")
if args.source.isdigit():
# Camera
cap = cv2.VideoCapture(int(args.source))
cap.set(cv2.CAP_PROP_FRAME_WIDTH, STREAM_WIDTH)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, STREAM_HEIGHT)
cap.set(cv2.CAP_PROP_FPS, FPS)
else:
# Video file
cap = cv2.VideoCapture(args.source)
if not cap.isOpened():
# Try GStreamer pipeline for CSI camera
print("Trying CSI camera via GStreamer...")
cap = cv2.VideoCapture(
"nvarguscamerasrc ! "
"video/x-raw(memory:NVMM),width=1280,height=720,framerate=30/1 ! "
"nvvidconv ! video/x-raw,format=BGRx ! "
"videoconvert ! video/x-raw,format=BGR ! appsink drop=1",
cv2.CAP_GSTREAMER
)
if not cap.isOpened():
print("ERROR: Cannot open video source!")
return
# Get video properties
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS) or 30
print(f"Video: {width}x{height} @ {fps}fps")
# Setup video writer if saving
out = None
if args.save:
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(args.save, fourcc, fps, (width, height))
print(f"Saving output to: {args.save}")
frame_count = 0
start_time = time.time()
total_detections = 0
print("Starting detection loop... Press Ctrl+C to stop")
try:
while True:
ret, frame = cap.read()
if not ret:
if not args.source.isdigit():
# Video file ended, loop
cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
continue
print("Failed to grab frame")
break
# Resize if needed
if frame.shape[1] != STREAM_WIDTH or frame.shape[0] != STREAM_HEIGHT:
frame = cv2.resize(frame, (STREAM_WIDTH, STREAM_HEIGHT))
# Run detection
detections = detect_ball(frame, model)
total_detections += len(detections)
# Draw detections
frame = draw_detections(frame, detections)
# Add FPS counter
frame_count += 1
if frame_count % 30 == 0:
elapsed = time.time() - start_time
current_fps = frame_count / elapsed
print(f"FPS: {current_fps:.1f}, Frame: {frame_count}, "
f"Detections this frame: {len(detections)}")
# Add FPS to frame
cv2.putText(frame, f"FPS: {frame_count / (time.time() - start_time):.1f}",
(10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
# Save if requested
if out:
out.write(frame)
# Display if requested
if args.display:
cv2.imshow("Pickleball Detection", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
except KeyboardInterrupt:
print("\nStopping...")
finally:
elapsed = time.time() - start_time
print(f"\nProcessed {frame_count} frames in {elapsed:.1f}s")
print(f"Average FPS: {frame_count / elapsed:.1f}")
print(f"Total ball detections: {total_detections}")
cap.release()
if out:
out.release()
cv2.destroyAllWindows()
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,222 @@
#!/usr/bin/env python3
"""
RTSP server with YOLOv8 ball detection for Jetson.
Streams video with detections over RTSP.
"""
import cv2
import time
import argparse
import threading
import gi
gi.require_version('Gst', '1.0')
gi.require_version('GstRtspServer', '1.0')
from gi.repository import Gst, GstRtspServer, GLib
from ultralytics import YOLO
import numpy as np
# COCO class 32 = sports ball
BALL_CLASS_ID = 32
class DetectionRTSPServer:
"""RTSP server that streams video with YOLO detections."""
def __init__(self, source, model_path='yolov8n.pt', port=8554, width=1280, height=720, fps=30):
self.source = source
self.width = width
self.height = height
self.fps = fps
self.port = port
self.running = False
self.frame = None
self.lock = threading.Lock()
# Load YOLO model
print(f"Loading YOLO model: {model_path}")
self.model = YOLO(model_path)
try:
self.model.to("cuda")
print("Using CUDA")
except:
print("Using CPU")
# Init GStreamer
Gst.init(None)
def detect_and_draw(self, frame):
"""Run detection and draw boxes."""
results = self.model(frame, verbose=False, classes=[BALL_CLASS_ID], conf=0.25)
for result in results:
for box in result.boxes:
x1, y1, x2, y2 = map(int, box.xyxy[0])
conf = float(box.conf[0])
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 3)
cv2.putText(frame, f"Ball {conf:.2f}", (x1, y1 - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 0), 2)
return frame
def capture_loop(self):
"""Capture frames and run detection."""
print(f"Opening source: {self.source}")
if self.source.isdigit():
cap = cv2.VideoCapture(int(self.source))
elif self.source == 'csi':
# CSI camera on Jetson
cap = cv2.VideoCapture(
f"nvarguscamerasrc ! video/x-raw(memory:NVMM),width={self.width},height={self.height},"
f"framerate={self.fps}/1 ! nvvidconv ! video/x-raw,format=BGRx ! "
f"videoconvert ! video/x-raw,format=BGR ! appsink drop=1",
cv2.CAP_GSTREAMER
)
else:
cap = cv2.VideoCapture(self.source)
if not cap.isOpened():
print("ERROR: Cannot open video source!")
return
frame_count = 0
start_time = time.time()
while self.running:
ret, frame = cap.read()
if not ret:
if not self.source.isdigit() and self.source != 'csi':
cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
continue
break
# Resize
frame = cv2.resize(frame, (self.width, self.height))
# Detect
frame = self.detect_and_draw(frame)
# FPS overlay
frame_count += 1
fps = frame_count / (time.time() - start_time)
cv2.putText(frame, f"FPS: {fps:.1f}", (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
with self.lock:
self.frame = frame.copy()
if frame_count % 100 == 0:
print(f"FPS: {fps:.1f}")
cap.release()
def start(self):
"""Start RTSP server and capture."""
self.running = True
# Start capture thread
self.capture_thread = threading.Thread(target=self.capture_loop)
self.capture_thread.start()
# Wait for first frame
print("Waiting for first frame...")
while self.frame is None and self.running:
time.sleep(0.1)
# Create RTSP server with test source first, then we'll push frames
self.server = GstRtspServer.RTSPServer.new()
self.server.set_service(str(self.port))
# Create factory with appsrc
self.factory = GstRtspServer.RTSPMediaFactory.new()
# Pipeline that accepts raw video and encodes to H264
launch_str = (
f'( appsrc name=mysrc is-live=true block=false format=GST_FORMAT_TIME '
f'caps=video/x-raw,format=BGR,width={self.width},height={self.height},framerate={self.fps}/1 ! '
f'queue ! videoconvert ! video/x-raw,format=I420 ! '
f'x264enc tune=zerolatency bitrate=4000 speed-preset=ultrafast ! '
f'rtph264pay config-interval=1 name=pay0 pt=96 )'
)
self.factory.set_launch(launch_str)
self.factory.set_shared(True)
self.factory.connect('media-configure', self.on_media_configure)
mounts = self.server.get_mount_points()
mounts.add_factory('/live', self.factory)
self.server.attach(None)
print(f"\n{'='*50}")
print(f"RTSP stream ready at: rtsp://pickle:{self.port}/live")
print(f"{'='*50}\n")
# Run GLib main loop
self.loop = GLib.MainLoop()
try:
self.loop.run()
except KeyboardInterrupt:
pass
self.stop()
def on_media_configure(self, factory, media):
"""Configure media when client connects."""
print("Client connected!")
appsrc = media.get_element().get_child_by_name('mysrc')
appsrc.connect('need-data', self.on_need_data)
def on_need_data(self, src, length):
"""Push frame to appsrc when needed."""
with self.lock:
if self.frame is None:
return
frame = self.frame.copy()
# Create buffer
data = frame.tobytes()
buf = Gst.Buffer.new_allocate(None, len(data), None)
buf.fill(0, data)
# Set timestamp
timestamp = int(time.time() * Gst.SECOND)
buf.pts = timestamp
buf.duration = int(Gst.SECOND / self.fps)
src.emit('push-buffer', buf)
def stop(self):
"""Stop server."""
self.running = False
if hasattr(self, 'capture_thread'):
self.capture_thread.join()
print("Server stopped")
def main():
parser = argparse.ArgumentParser(description='RTSP Detection Server')
parser.add_argument('--source', type=str, default='csi',
help='Video source: csi, 0 (USB cam), or video file path')
parser.add_argument('--model', type=str, default='yolov8n.pt',
help='YOLO model')
parser.add_argument('--port', type=int, default=8554,
help='RTSP port')
parser.add_argument('--width', type=int, default=1280)
parser.add_argument('--height', type=int, default=720)
parser.add_argument('--fps', type=int, default=30)
args = parser.parse_args()
server = DetectionRTSPServer(
source=args.source,
model_path=args.model,
port=args.port,
width=args.width,
height=args.height,
fps=args.fps
)
server.start()
if __name__ == '__main__':
main()

133
jetson/rtsp_yolo_stream.py Normal file
View File

@@ -0,0 +1,133 @@
#!/usr/bin/env python3
"""
RTSP stream with YOLOv8 ball detection using GStreamer pipeline.
"""
import cv2
import time
import sys
from ultralytics import YOLO
BALL_CLASS_ID = 32 # sports ball
WIDTH = 1280
HEIGHT = 720
FPS = 25
PORT = 8554
def main():
source = sys.argv[1] if len(sys.argv) > 1 else "/opt/nvidia/deepstream/deepstream/samples/streams/sample_1080p_h264.mp4"
print("Loading YOLOv8n model...")
model = YOLO("yolov8n.pt")
try:
model.to("cuda")
print("Using CUDA")
except:
print("Using CPU")
print(f"Opening: {source}")
cap = cv2.VideoCapture(source)
if not cap.isOpened():
print("ERROR: Cannot open source")
return 1
# GStreamer RTSP output pipeline
gst_out = (
f"appsrc ! "
f"video/x-raw,format=BGR,width={WIDTH},height={HEIGHT},framerate={FPS}/1 ! "
f"queue ! videoconvert ! video/x-raw,format=I420 ! "
f"x264enc tune=zerolatency bitrate=2000 speed-preset=ultrafast key-int-max=30 ! "
f"video/x-h264,profile=baseline ! "
f"rtspclientsink location=rtsp://127.0.0.1:{PORT}/live"
)
# Try simple UDP multicast instead
gst_out_udp = (
f"appsrc ! "
f"video/x-raw,format=BGR,width={WIDTH},height={HEIGHT},framerate={FPS}/1 ! "
f"videoconvert ! video/x-raw,format=I420 ! "
f"x264enc tune=zerolatency bitrate=2000 speed-preset=ultrafast ! "
f"h264parse ! "
f"mpegtsmux ! "
f"udpsink host=224.1.1.1 port=5000 auto-multicast=true"
)
# Or just write to file for testing
gst_file = (
f"appsrc ! "
f"video/x-raw,format=BGR,width={WIDTH},height={HEIGHT},framerate={FPS}/1 ! "
f"videoconvert ! video/x-raw,format=I420 ! "
f"x264enc tune=zerolatency ! "
f"mp4mux ! "
f"filesink location=/tmp/output_detection.mp4"
)
print(f"\nStarting detection stream...")
print(f"Output: /tmp/output_detection.mp4")
print("Press Ctrl+C to stop\n")
out = cv2.VideoWriter(gst_file, cv2.CAP_GSTREAMER, 0, FPS, (WIDTH, HEIGHT), True)
if not out.isOpened():
print("GStreamer writer failed, using regular file output")
out = cv2.VideoWriter('/tmp/output_detection.mp4',
cv2.VideoWriter_fourcc(*'mp4v'), FPS, (WIDTH, HEIGHT))
frame_count = 0
start = time.time()
total_detections = 0
try:
while True:
ret, frame = cap.read()
if not ret:
cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
continue
frame = cv2.resize(frame, (WIDTH, HEIGHT))
# Detection
results = model(frame, verbose=False, classes=[BALL_CLASS_ID], conf=0.25)
for r in results:
for box in r.boxes:
x1, y1, x2, y2 = map(int, box.xyxy[0])
conf = float(box.conf[0])
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 3)
cv2.putText(frame, f"Ball {conf:.2f}", (x1, y1-10),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
total_detections += 1
# FPS overlay
frame_count += 1
elapsed = time.time() - start
fps = frame_count / elapsed
cv2.putText(frame, f"FPS: {fps:.1f}", (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
out.write(frame)
if frame_count % 50 == 0:
print(f"Frame {frame_count}, FPS: {fps:.1f}, Detections: {total_detections}")
# Stop after 10 seconds for test
if elapsed > 10:
print("\n10 second test complete")
break
except KeyboardInterrupt:
print("\nStopping...")
finally:
elapsed = time.time() - start
print(f"\nProcessed {frame_count} frames in {elapsed:.1f}s")
print(f"Average FPS: {frame_count/elapsed:.1f}")
print(f"Total detections: {total_detections}")
print(f"Output saved to: /tmp/output_detection.mp4")
cap.release()
out.release()
return 0
if __name__ == "__main__":
sys.exit(main())

View File

@@ -0,0 +1,100 @@
#!/usr/bin/env python3
"""
Simple RTSP stream with YOLOv8 detection using OpenCV + subprocess for RTSP.
"""
import cv2
import subprocess
import time
from ultralytics import YOLO
BALL_CLASS_ID = 32 # sports ball in COCO
WIDTH = 1280
HEIGHT = 720
FPS = 25
def main():
print("Loading YOLOv8...")
model = YOLO("yolov8n.pt")
model.to("cuda")
print("Model loaded on CUDA")
# Open video source
source = "/opt/nvidia/deepstream/deepstream/samples/streams/sample_1080p_h264.mp4"
cap = cv2.VideoCapture(source)
if not cap.isOpened():
print("ERROR: Cannot open video")
return
# Start ffmpeg to serve RTSP via tcp
ffmpeg_cmd = [
'ffmpeg',
'-y',
'-f', 'rawvideo',
'-vcodec', 'rawvideo',
'-pix_fmt', 'bgr24',
'-s', f'{WIDTH}x{HEIGHT}',
'-r', str(FPS),
'-i', '-',
'-c:v', 'libx264',
'-preset', 'ultrafast',
'-tune', 'zerolatency',
'-f', 'rtsp',
'-rtsp_transport', 'tcp',
'rtsp://localhost:8554/live'
]
print("Starting ffmpeg...")
proc = subprocess.Popen(ffmpeg_cmd, stdin=subprocess.PIPE)
print(f"\nRTSP stream: rtsp://pickle:8554/live")
print("Press Ctrl+C to stop\n")
frame_count = 0
start = time.time()
try:
while True:
ret, frame = cap.read()
if not ret:
cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
continue
frame = cv2.resize(frame, (WIDTH, HEIGHT))
# Detection
results = model(frame, verbose=False, classes=[BALL_CLASS_ID], conf=0.3)
for r in results:
for box in r.boxes:
x1, y1, x2, y2 = map(int, box.xyxy[0])
conf = float(box.conf[0])
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 2)
cv2.putText(frame, f"Ball {conf:.2f}", (x1, y1-10),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
# FPS
frame_count += 1
fps = frame_count / (time.time() - start)
cv2.putText(frame, f"FPS: {fps:.1f}", (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 255), 2)
# Write to ffmpeg
proc.stdin.write(frame.tobytes())
if frame_count % 100 == 0:
print(f"Frame {frame_count}, FPS: {fps:.1f}")
except KeyboardInterrupt:
print("\nStopping...")
except BrokenPipeError:
print("ffmpeg pipe broken")
finally:
cap.release()
proc.stdin.close()
proc.wait()
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,140 @@
#!/usr/bin/env python3
"""
RTSP stream with YOLOv8 ball detection.
Pushes to mediamtx RTSP server using ffmpeg.
Usage:
1. Start mediamtx: /tmp/mediamtx &
2. Run this script: python3 stream_with_detection.py [source]
3. View stream: vlc rtsp://pickle:8554/live
"""
import cv2
import subprocess
import time
import sys
from ultralytics import YOLO
BALL_CLASS_ID = 32 # sports ball in COCO
WIDTH = 1280
HEIGHT = 720
FPS = 15 # Lower FPS for stable streaming
RTSP_URL = "rtsp://localhost:8554/live"
def main():
source = sys.argv[1] if len(sys.argv) > 1 else "/opt/nvidia/deepstream/deepstream/samples/streams/sample_1080p_h264.mp4"
print("Loading YOLOv8n...")
model = YOLO("yolov8n.pt")
try:
model.to("cuda")
print("Using CUDA")
except:
print("Using CPU")
print(f"Opening: {source}")
cap = cv2.VideoCapture(source)
if not cap.isOpened():
print("ERROR: Cannot open source")
return 1
# FFmpeg command to push RTSP
ffmpeg_cmd = [
'ffmpeg',
'-y',
'-f', 'rawvideo',
'-vcodec', 'rawvideo',
'-pix_fmt', 'bgr24',
'-s', f'{WIDTH}x{HEIGHT}',
'-r', str(FPS),
'-i', '-',
'-c:v', 'libx264',
'-preset', 'ultrafast',
'-tune', 'zerolatency',
'-g', str(FPS * 2),
'-f', 'rtsp',
'-rtsp_transport', 'tcp',
RTSP_URL
]
print("Starting ffmpeg RTSP publisher...")
ffmpeg = subprocess.Popen(
ffmpeg_cmd,
stdin=subprocess.PIPE,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL
)
print(f"\n{'='*50}")
print(f"RTSP STREAM: rtsp://pickle:8554/live")
print(f"{'='*50}")
print("Press Ctrl+C to stop\n")
frame_count = 0
start = time.time()
total_detections = 0
frame_time = 1.0 / FPS
try:
while True:
loop_start = time.time()
ret, frame = cap.read()
if not ret:
cap.set(cv2.CAP_PROP_POS_FRAMES, 0)
continue
frame = cv2.resize(frame, (WIDTH, HEIGHT))
# Detection
results = model(frame, verbose=False, classes=[BALL_CLASS_ID], conf=0.25)
det_count = 0
for r in results:
for box in r.boxes:
x1, y1, x2, y2 = map(int, box.xyxy[0])
conf = float(box.conf[0])
cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 255, 0), 3)
cv2.putText(frame, f"Ball {conf:.2f}", (x1, y1-10),
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 0), 2)
det_count += 1
total_detections += 1
# FPS overlay
frame_count += 1
elapsed = time.time() - start
fps = frame_count / elapsed
cv2.putText(frame, f"FPS: {fps:.1f} | Det: {det_count}", (10, 30),
cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)
# Push to ffmpeg
try:
ffmpeg.stdin.write(frame.tobytes())
except BrokenPipeError:
print("FFmpeg pipe broken, restarting...")
break
if frame_count % 100 == 0:
print(f"Frame {frame_count}, FPS: {fps:.1f}, Total Det: {total_detections}")
# Rate limiting
proc_time = time.time() - loop_start
if proc_time < frame_time:
time.sleep(frame_time - proc_time)
except KeyboardInterrupt:
print("\nStopping...")
finally:
elapsed = time.time() - start
print(f"\nProcessed {frame_count} frames in {elapsed:.1f}s")
print(f"Average FPS: {frame_count/elapsed:.1f}")
print(f"Total detections: {total_detections}")
cap.release()
ffmpeg.stdin.close()
ffmpeg.wait()
return 0
if __name__ == "__main__":
sys.exit(main())