在实际项目中经常需要从 RTSP 视频流中抓取一帧利用深度学习模型检测画面中的人员并绘制检测框与关键点最终输出带标注的图片。本文基于 C# 和 .NET WinForms 将其核心逻辑抽取成一个独立、可复用的方法接收视频流地址和模型路径输出带人员框选的图片完全抛弃世界坐标转换只关注视觉标注结果。传入 RTSP 地址或本地视频文件和 ONNX 模型路径程序自动从视频流抓取一帧运行 YOLOv8l 行人检测在图片上绘制矩形框、脚点、置信度保存标注后的图片到本地不需要任何坐标计算即拿即用。核心依赖包名作用OpenCvSharp4图像处理、绘制、视频帧读取Microsoft.ML.OnnxRuntime运行 ONNX 模型System.Drawing.Common辅助实际可用 OpenCV 的 Point安装命令NuGetInstall-Package OpenCvSharp4Install-Package Microsoft.ML.OnnxRuntimeInstall-Package System.Drawing.Common建议同时安装OpenCvSharp4.runtime.win以避免本机依赖问题。该方法内部封装使用 OpenCvSharp 的VideoCapture读取一帧替代原始代码中的 ffmpeg 进程更简洁加载 ONNX 模型并执行推理后处理缩放、Padding、NMS绘制矩形框 / 脚点 / 置信度文字保存完整实现代码using System;using System.Collections.Generic;using System.IO;using System.Linq;using Microsoft.ML.OnnxRuntime;using Microsoft.ML.OnnxRuntime.Tensors;using OpenCvSharp;public class YoloPersonDetector{private const int YoloInputSize 1280; // 可改为 模型输入尺寸private const int PersonClassId 0; // COCO 数据集中 person 类别 id/// summary/// 从视频流检测行人并保存带框的图片/// /summarypublic static bool DetectAndSaveImage(string videoUrl, string modelPath,string outputImagePath, float confThreshold 0.5f, float nmsThreshold 0.45f){if (!File.Exists(modelPath)){Console.WriteLine($模型文件不存在: {modelPath});return false;}// 1. 捕获一帧using (var cap new VideoCapture(videoUrl)){if (!cap.IsOpened()){Console.WriteLine($无法打开视频流: {videoUrl});return false;}using (Mat frame new Mat()){if (!cap.Read(frame) || frame.Empty()){Console.WriteLine(读取视频帧失败);return false;}// 2. 加载 ONNX 模型建议只加载一次多次调用时外部缓存 sessionusing (var session new InferenceSession(modelPath)){// 3. 执行检测var detections DetectPersons(session, frame, confThreshold, nmsThreshold);if (detections.Count 0){Console.WriteLine(未检测到人员);return false;}// 4. 绘制标注Mat output frame.Clone();DrawDetections(output, detections);// 5. 保存图片Cv2.ImWrite(outputImagePath, output);Console.WriteLine($标注图片已保存: {outputImagePath});return true;}}}}/// summary/// 检测人员返回边界框和置信度/// /summaryprivate static List(Rect Box, float Confidence) DetectPersons(InferenceSession session, Mat frame, float confThreshold, float nmsThreshold){int origW frame.Width;int origH frame.Height;// 1. 预处理缩放 填充至 YoloInputSize 正方形float scale Math.Min((float)YoloInputSize / origW, (float)YoloInputSize / origH);int newW (int)(origW * scale);int newH (int)(origH * scale);int padX (YoloInputSize - newW) / 2;int padY (YoloInputSize - newH) / 2;Mat resized new Mat();Cv2.Resize(frame, resized, new Size(newW, newH));Mat padded new Mat(new Size(YoloInputSize, YoloInputSize), MatType.CV_8UC3, new Scalar(114, 114, 114));resized.CopyTo(padded[new Rect(padX, padY, newW, newH)]);// 2. 转为 RGB 并归一化Mat rgb new Mat();Cv2.CvtColor(padded, rgb, ColorConversionCodes.BGR2RGB);float[] inputData new float[3 * YoloInputSize * YoloInputSize];for (int y 0; y YoloInputSize; y){for (int x 0; x YoloInputSize; x){Vec3b pixel rgb.AtVec3b(y, x);inputData[0 * YoloInputSize * YoloInputSize y * YoloInputSize x] pixel[0] / 255f;inputData[1 * YoloInputSize * YoloInputSize y * YoloInputSize x] pixel[1] / 255f;inputData[2 * YoloInputSize * YoloInputSize y * YoloInputSize x] pixel[2] / 255f;}}var inputTensor new DenseTensorfloat(inputData, new[] { 1, 3, YoloInputSize, YoloInputSize });var inputs new ListNamedOnnxValue { NamedOnnxValue.CreateFromTensor(images, inputTensor) };// 3. 推理using (var results session.Run(inputs)){var outputTensor results.First().AsTensorfloat();var output outputTensor.ToArray();// 解析预测框YOLOv8 格式84 个通道 4 bbox 80 class probsint numPredictions output.Length / 84; // 例如 8400ListYoloPrediction predictions new ListYoloPrediction();for (int i 0; i numPredictions; i){float xCenter output[i];float yCenter output[numPredictions i];float width output[2 * numPredictions i];float height output[3 * numPredictions i];// 获取最高类别概率仅 personfloat maxProb 0;int classId -1;for (int c 0; c 80; c){float prob output[(4 c) * numPredictions i];if (prob maxProb){maxProb prob;classId c;}}if (classId PersonClassId maxProb confThreshold){// 将预测坐标从 Padding 缩放空间映射回原始图像float x1_pad xCenter - width / 2;float y1_pad yCenter - height / 2;float x2_pad xCenter width / 2;float y2_pad yCenter height / 2;float x1_orig (x1_pad - padX) / scale;float y1_orig (y1_pad - padY) / scale;float x2_orig (x2_pad - padX) / scale;float y2_orig (y2_pad - padY) / scale;x1_orig Math.Clamp(x1_orig, 0, origW);y1_orig Math.Clamp(y1_orig, 0, origH);x2_orig Math.Clamp(x2_orig, 0, origW);y2_orig Math.Clamp(y2_orig, 0, origH);predictions.Add(new YoloPrediction{Box new Rect((int)x1_orig, (int)y1_orig, (int)(x2_orig - x1_orig), (int)(y2_orig - y1_orig)),Confidence maxProb});}}// NMS 过滤var nmsResult Nms(predictions, nmsThreshold);return nmsResult.Select(p (p.Box, p.Confidence)).ToList();}}private static ListYoloPrediction Nms(ListYoloPrediction predictions, float iouThreshold){if (predictions.Count 0) return new ListYoloPrediction();predictions predictions.OrderByDescending(p p.Confidence).ToList();ListYoloPrediction result new ListYoloPrediction();while (predictions.Count 0){var best predictions[0];result.Add(best);predictions.RemoveAt(0);for (int i predictions.Count - 1; i 0; i--){if (CalculateIou(best.Box, predictions[i].Box) iouThreshold)predictions.RemoveAt(i);}}return result;}private static float CalculateIou(Rect a, Rect b){int x1 Math.Max(a.X, b.X);int y1 Math.Max(a.Y, b.Y);int x2 Math.Min(a.X a.Width, b.X b.Width);int y2 Math.Min(a.Y a.Height, b.Y b.Height);int interArea Math.Max(0, x2 - x1) * Math.Max(0, y2 - y1);int areaA a.Width * a.Height;int areaB b.Width * b.Height;return (float)interArea / (areaA areaB - interArea);}private static void DrawDetections(Mat image, List(Rect Box, float Confidence) detections){foreach (var det in detections){// 绘制矩形框绿色Cv2.Rectangle(image, det.Box, new Scalar(0, 255, 0), 2);// 绘制脚点红色圆点int footX det.Box.X det.Box.Width / 2;int footY det.Box.Y det.Box.Height;Cv2.Circle(image, new Point(footX, footY), 5, new Scalar(0, 0, 255), -1);// 显示置信度文字string label $person: {det.Confidence:F2};Cv2.PutText(image, label, new Point(det.Box.X, det.Box.Y - 5),HersheyFonts.HersheySimplex, 0.6, new Scalar(0, 255, 255), 1);}}private class YoloPrediction{public Rect Box { get; set; }public float Confidence { get; set; }}}使用示例控制台或 WinFormsstring rtspUrl rtsp://admin:password192.168.1.100:554/stream1; string modelFile C:\models\yolov8n.onnx; // 请使用自己的 ONNX 模型 string outputPic D:\detected_person.jpg; bool ok YoloPersonDetector.DetectAndSaveImage(rtspUrl, modelFile, outputPic, 0.5f, 0.45f); if (ok) Console.WriteLine(成功生成带框图片); else Console.WriteLine(检测失败或无人员);如果需要适应 WinForms 中的buttonClick逻辑只需将上面方法放入项目并在按钮点击事件中调用private void button3Click(object sender, EventArgs e) { string videoUrl comboBox1.SelectedItem?.ToString(); // 或者手动输入 RTSP string modelPath Path.Combine(Application.StartupPath, models, yolov8l.onnx); string outputPath Path.Combine(Application.StartupPath, TestImages, $result_{DateTime.Now:yyyyMMdd_HHmmss}.jpg); YoloPersonDetector.DetectAndSaveImage(videoUrl, modelPath, outputPath); }本示例采用VideoCapture更加轻量。如果你的 RTSP 流需要指定传输协议如 TCP可以在VideoCapture前设置环境变量或使用Cv2.CapProp一般默认自动协商也足够。若遇到连接问题可回退到原始 ffmpeg 方案。⚠️ 注意事项模型输入尺寸本文默认YoloInputSize 1280如果使用yolov8l原始模型为其他尺寸你可根据模型实际输入修改常量。ONNX 模型导出确保模型输出为[1,84,8400]这种形状YOLOv8 标准格式。RTSP 稳定性生产环境建议增加重连机制或使用ffmpeg解码如原文那样以应对复杂流。性能每调用一次都会加载 ONNX 模型较慢如需频繁处理应将InferenceSession缓存为静态或单例