配置环境

项目地址

conda create -n yolov10 python=3.9
conda activate yolov10
pip install -r requirements.txt
# 安装依赖时注释掉torch, torchvision，后面再手动安装
pip install -e .

torch不同版本安装

制作训练集

自动截图：

import time
import os
import pygetwindow as gw
import pyautogui

# 指定窗口标题
window_title = "您的窗口标题"  # 替换为您想要截图的窗口标题

# 获取当前脚本所在目录
script_dir = os.path.dirname(os.path.abspath(__file__))

# 定义图片文件夹路径
pictures_folder = os.path.join(script_dir, "Pictures")

# 如果文件夹不存在，则创建
os.makedirs(pictures_folder, exist_ok=True)

# 主循环
try:
    while True:
        # 获取窗口对象
        window = gw.getWindowsWithTitle(window_title)
        
        if window:
            # 获取窗口的左上角和右下角坐标
            window = window[0]  # 选择第一个匹配的窗口
            x, y, width, height = window.left, window.top, window.width, window.height
            
            # 截图指定区域
            screenshot = pyautogui.screenshot(region=(x, y, width, height))
            
            # 保存截图
            timestamp = time.strftime("%Y%m%d_%H%M%S")
            screenshot_path = os.path.join(pictures_folder, f"screenshot_{timestamp}.png")
            screenshot.save(screenshot_path)
            print(f"Saved screenshot: {screenshot_path}")
        
        # 每0.1秒截图一次
        time.sleep(0.1)

except KeyboardInterrupt:
    print("截图程序已停止。")

将图片上传roboflow做标记

训练

1	yolo detect train data=data/data.yaml model=yolov10n/s/m/b/l/x.yaml epochs=500 batch=8 imgsz=640 device=0

或者

from ultralytics import YOLOv10

model = YOLOv10()
model.train(data='data/data.yaml', epochs=500, batch=8, imgsz=640)

预测

from ultralytics import YOLOv10

model = YOLOv10("best.pt")
device = torch.device("cuda:0")
model.to(device)
result = model.predict(source=image_det, imgsz=640, conf=0.1, save=True)

自动化脚本示例

from ultralytics import YOLOv10 
import time
import pyautogui as pt
import pygetwindow
import numpy as np
import cv2 as cv
import torch
from PIL import ImageGrab

model = YOLOv10("best.pt")

window_title = ""
window = pygetwindow.getWindowsWithTitle(window_title)[0]

device = torch.device("cuda:0")
model.to(device)

while True:
    if window:
        x, y, w, h = window.left, window.top, window.width, window.height
        screenshot = ImageGrab.grab(bbox=[x, y, x + w, y + h])
        image_src = cv.cvtColor(np.array(screenshot), cv.COLOR_RGB2BGR)
        size_x, size_y = image_src.shape[1], image_src.shape[0]
        image_det = cv.resize(image_src, (640, 640))
        result = model.predict(source=image_det, imgsz=640, conf=0.1, save=False)
        boxes = result[0].boxes.xywhn
        boxes = sorted(boxes, key=lambda x:x[0])
        for box in boxes:
            cv.rectangle(image_src, (int((box[0] - box[2]/2) * size_x), int((box[1] - box[3]/2) * size_y)),
                         (int((box[0] + box[2]/2) * size_x), int((box[1] + box[3]/2) * size_y)), 
                         color=(255, 255, 0), thickness=2)
            pt.click(x=x + box[0] * size_x, y=y + box[1] * size_y)
        time.sleep(0.8)