zhi做网站,湖北网站推广方案,贵阳制作,毕业了智慧团建密码忘了RTDETR系列绘制热力图指路#xff1a;
RT-DETR系列对指定图片绘制模型热力图-CSDN博客https://blog.csdn.net/qq_54708219/article/details/148657372?spm1001.2014.3001.5502任务#xff1a;实现类激活映射#xff08;Class Activation Mapping, CAM#xff09;可视化
RT-DETR系列对指定图片绘制模型热力图-CSDN博客https://blog.csdn.net/qq_54708219/article/details/148657372?spm1001.2014.3001.5502任务实现类激活映射Class Activation Mapping, CAM可视化生成热力图以展示模型在图像中关注的关键区域。
核心功能 加载YOLOv8模型使用预训练权重和配置文件初始化检测模型 图像预处理通过letterbox函数调整图像尺寸并添加填充 热力图生成使用Grad-CAM系列算法GradCAM/GradCAM/XGradCAM可视化模型关注区域 结果保存为每张输入图像生成多个检测目标的热力图
注意在运行以下代码时安装grad-cam库
pip install grad-cam
然后准备以下参数
def get_params():params {weight: /data/ctc/yolov11/YOLO11m.pt, # 训练出来的权重文件cfg: /data/ctc/yolov11/ultralytics/cfg/models/11/yolo11m.yaml, # 训练权重对应的yaml配置文件device: cuda:1, # CPU/GPUmethod: GradCAM, # GradCAMPlusPlus, GradCAM, XGradCAM , 使用的热力图库文件不同的效果不一样可以多尝试layer: model.model[9], # 想要检测的对应层backward_type: all, # class, box, allconf_threshold: 0.01, # 0.6 # 置信度阈值有的时候你的进度条到一半就停止了就是因为没有高于此值的了ratio: 0.02 # 0.02-0.1}return paramsif __name__ __main__:img_folder_path /data/ctc/yolov11/jitan_images # 需要绘制热力图的图片放在一个文件夹下面output_path result # 输出文件夹里面每个文件夹的名字就是img_folder_path的图片名字
注意cfg的yaml文件请严格指定模型大小在yolo[版本号]后面添加n,s,m,l,x
img_folder_path 其余的都是YOLO目标检测框架常用文件这里不作过多解释。
完整代码如下
import warnings
warnings.filterwarnings(ignore)
warnings.simplefilter(ignore)
import torch, yaml, cv2, os, shutil
import numpy as np
np.random.seed(0)
import matplotlib.pyplot as plt
from tqdm import trange
from PIL import Image
from ultralytics.nn.tasks import DetectionModel as Model
from ultralytics.utils.torch_utils import intersect_dicts
from ultralytics.utils.ops import xywh2xyxy
from pytorch_grad_cam import GradCAMPlusPlus, GradCAM, XGradCAM
from pytorch_grad_cam.utils.image import show_cam_on_image
from pytorch_grad_cam.activations_and_gradients import ActivationsAndGradientsdef letterbox(im, new_shape(1024, 1024), color(114, 114, 114), autoTrue, scaleFillFalse, scaleupTrue, stride32):# Resize and pad image while meeting stride-multiple constraintsshape im.shape[:2] # current shape [height, width]if isinstance(new_shape, int):new_shape (new_shape, new_shape)# Scale ratio (new / old)r min(new_shape[0] / shape[0], new_shape[1] / shape[1])if not scaleup: # only scale down, do not scale up (for better val mAP)r min(r, 1.0)# Compute paddingratio r, r # width, height ratiosnew_unpad int(round(shape[1] * r)), int(round(shape[0] * r))dw, dh new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh paddingif auto: # minimum rectangledw, dh np.mod(dw, stride), np.mod(dh, stride) # wh paddingelif scaleFill: # stretchdw, dh 0.0, 0.0new_unpad (new_shape[1], new_shape[0])ratio new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratiosdw / 2 # divide padding into 2 sidesdh / 2if shape[::-1] ! new_unpad: # resizeim cv2.resize(im, new_unpad, interpolationcv2.INTER_LINEAR)top, bottom int(round(dh - 0.1)), int(round(dh 0.1))left, right int(round(dw - 0.1)), int(round(dw 0.1))im cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, valuecolor) # add borderreturn im, ratio, (dw, dh)class yolov8_heatmap:def __init__(self, weight, cfg, device, method, layer, backward_type, conf_threshold, ratio):device torch.device(device)ckpt torch.load(weight)model_names ckpt[model].namescsd ckpt[model].float().state_dict() # checkpoint state_dict as FP32model Model(cfg, ch3, nclen(model_names)).to(device)csd intersect_dicts(csd, model.state_dict(), exclude[anchor]) # intersectmodel.load_state_dict(csd, strictFalse) # loadmodel.eval()print(fTransferred {len(csd)}/{len(model.state_dict())} items)target_layers [eval(layer)]method eval(method)colors np.random.uniform(0, 255, size(len(model_names), 3)).astype(np.int32)self.__dict__.update(locals())def post_process(self, result):logits_ result[:, 4:]boxes_ result[:, :4]sorted, indices torch.sort(logits_.max(1)[0], descendingTrue)return torch.transpose(logits_[0], dim00, dim11)[indices[0]], torch.transpose(boxes_[0], dim00, dim11)[indices[0]], xywh2xyxy(torch.transpose(boxes_[0], dim00, dim11)[indices[0]]).cpu().detach().numpy()def draw_detections(self, box, color, name, img):xmin, ymin, xmax, ymax list(map(int, list(box)))cv2.rectangle(img, (xmin, ymin), (xmax, ymax), tuple(int(x) for x in color), 2)cv2.putText(img, str(name), (xmin, ymin - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.8, tuple(int(x) for x in color), 2, lineTypecv2.LINE_AA)return imgdef __call__(self, img_path, save_path):# remove dir if existif os.path.exists(save_path):shutil.rmtree(save_path)# make dir if not existos.makedirs(save_path, exist_okTrue)# img processimg cv2.imread(img_path)img letterbox(img)[0]img cv2.cvtColor(img, cv2.COLOR_BGR2RGB)img np.float32(img) / 255.0tensor torch.from_numpy(np.transpose(img, axes[2, 0, 1])).unsqueeze(0).to(self.device)# init ActivationsAndGradientsgrads ActivationsAndGradients(self.model, self.target_layers, reshape_transformNone)# get ActivationsAndResultresult grads(tensor)# len(result): 2# result[0].shape: torch.Size([1, 8, 21504])# len(result[1]): 3# result[1][0].shape: torch.Size([1, 68, 128, 128])# result[1][1].shape: torch.Size([1, 68, 64, 64])# result[1][2].shape: torch.Size([1, 68, 32, 32])activations grads.activations[0].cpu().detach().numpy()# postprocess to yolo outputpost_result, pre_post_boxes, post_boxes self.post_process(result[0])for i in trange(int(post_result.size(0) * self.ratio)):try:if float(post_result[i].max()) self.conf_threshold:breakself.model.zero_grad()# get max probability for this predictionif self.backward_type class or self.backward_type all:score post_result[i].max()score.backward(retain_graphTrue)if self.backward_type box or self.backward_type all:for j in range(4):score pre_post_boxes[i, j]score.backward(retain_graphTrue)# process heatmapif self.backward_type class:gradients grads.gradients[0]elif self.backward_type box:gradients grads.gradients[0] grads.gradients[1] grads.gradients[2] grads.gradients[3]else:gradients grads.gradients[0] grads.gradients[1] grads.gradients[2] grads.gradients[3] grads.gradients[4]b, k, u, v gradients.size()weights self.method.get_cam_weights(self.method, None, None, None, activations, gradients.detach().numpy())weights weights.reshape((b, k, 1, 1))saliency_map np.sum(weights * activations, axis1)saliency_map np.squeeze(np.maximum(saliency_map, 0))saliency_map cv2.resize(saliency_map, (tensor.size(3), tensor.size(2)))saliency_map_min, saliency_map_max saliency_map.min(), saliency_map.max()if (saliency_map_max - saliency_map_min) 0:continuesaliency_map (saliency_map - saliency_map_min) / (saliency_map_max - saliency_map_min)# add heatmap and box to imagecam_image show_cam_on_image(img.copy(), saliency_map, use_rgbTrue)不想在图片中绘画出边界框和置信度注释下面的一行代码即可# cam_image self.draw_detections(post_boxes[i], self.colors[int(post_result[i, :].argmax())], f{self.model_names[int(post_result[i, :].argmax())]} {float(post_result[i].max()):.2f}, cam_image)cam_image Image.fromarray(cam_image)cam_image.save(f{save_path}/{i}.png)finally:# 清理当前目标的资源torch.cuda.empty_cache()def get_params():params {weight: /data/ctc/yolov11/YOLO11m.pt, # 训练出来的权重文件cfg: /data/ctc/yolov11/ultralytics/cfg/models/11/yolo11m.yaml, # 训练权重对应的yaml配置文件device: cuda:1, # CPU/GPUmethod: GradCAM, # GradCAMPlusPlus, GradCAM, XGradCAM , 使用的热力图库文件不同的效果不一样可以多尝试layer: model.model[9], # 想要检测的对应层backward_type: all, # class, box, allconf_threshold: 0.01, # 0.6 # 置信度阈值有的时候你的进度条到一半就停止了就是因为没有高于此值的了ratio: 0.02 # 0.02-0.1}return paramsif __name__ __main__:img_folder_path /data/ctc/yolov11/jitan_images # 需要绘制热力图的图片放在一个文件夹下面output_path result # 输出文件夹里面每个文件夹的名字就是img_folder_path的图片名字for i, filename in enumerate(os.listdir(img_folder_path)):model yolov8_heatmap(**get_params())print(f({i 1}/{len(os.listdir(img_folder_path))}):{filename})base_name os.path.splitext(filename)[0]img_path os.path.join(img_folder_path, filename)result_heatmap_path os.path.join(output_path, base_name)model(img_path, result_heatmap_path) # 第一个是检测的文件, 第二个是保存的路径
看到以下界面运行成功 批注在 YOLOv11以及类似架构中post_result.size(0) 的值 21504 表示模型在单张图像上生成的预测框总数。这个数字是由模型的架构设计和输入分辨率决定的具体计算方式如下
我都模型输入是1024×1024大小图片模型使用三个不同尺度的特征图进行预测各特征图分辨率分别为128×128步长864×64步长1632×32步长32。在 YOLOv11 中每个特征图位置生成1个预测框因此预测总数是
(128 × 128) (64 × 64) (32 × 32) 16384 4096 1024 21504
如果改变输入分辨率预测总数会相应变化 输入 512×512预测总数 (64×64) (32×32) (16×16) 4096 1024 256 5376 输入 1280×1280预测总数 (160×160) (80×80) (40×40) 25600 6400 1600 33600
跑出来的结果文件夹output_path 读者可以挑选效果较好的图片进行展示。