AttriDet / utils /loss.py
ryhm's picture
Upload 253 files
ee8e6f1 verified
raw
history blame
50.4 kB
# YOLOv5 🚀 by Ultralytics, AGPL-3.0 license
"""Loss functions."""
import torch
import torch.nn as nn
import torchvision
import torch.nn.functional as F
from utils.metrics import bbox_iou
from utils.torch_utils import de_parallel
from torch.nn.functional import cosine_similarity
import numpy as np
from utils.general import (
LOGGER,
TQDM_BAR_FORMAT,
Profile,
xywh2xyxy,
xyxy2xywh,
non_max_suppression_ps,
scale_boxes,
get_fixed_xyxy,
)
def smooth_BCE(eps=0.1):
"""Returns label smoothing BCE targets for reducing overfitting; pos: `1.0 - 0.5*eps`, neg: `0.5*eps`. For details see https://github.com/ultralytics/yolov3/issues/238#issuecomment-598028441"""
return 1.0 - 0.5 * eps, 0.5 * eps
class BCEBlurWithLogitsLoss(nn.Module):
# BCEwithLogitLoss() with reduced missing label effects.
def __init__(self, alpha=0.05):
"""Initializes a modified BCEWithLogitsLoss with reduced missing label effects, taking optional alpha smoothing
parameter.
"""
super().__init__()
self.loss_fcn = nn.BCEWithLogitsLoss(reduction="none") # must be nn.BCEWithLogitsLoss()
self.alpha = alpha
def forward(self, pred, true):
"""Computes modified BCE loss for YOLOv5 with reduced missing label effects, taking pred and true tensors,
returns mean loss.
"""
loss = self.loss_fcn(pred, true)
pred = torch.sigmoid(pred) # prob from logits
dx = pred - true # reduce only missing label effects
# dx = (pred - true).abs() # reduce missing label and false label effects
alpha_factor = 1 - torch.exp((dx - 1) / (self.alpha + 1e-4))
loss *= alpha_factor
return loss.mean()
class FocalLoss(nn.Module):
# Wraps focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
"""Initializes FocalLoss with specified loss function, gamma, and alpha values; modifies loss reduction to
'none'.
"""
super().__init__()
self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss()
self.gamma = gamma
self.alpha = alpha
self.reduction = loss_fcn.reduction
self.loss_fcn.reduction = "none" # required to apply FL to each element
def forward(self, pred, true,mask, object_loss,patch):
"""Calculates the focal loss between predicted and true labels using a modified BCEWithLogitsLoss."""
loss = self.loss_fcn(pred, true)
if object_loss:
loss= loss*mask
# p_t = torch.exp(-loss)
# loss *= self.alpha * (1.000001 - p_t) ** self.gamma # non-zero power for gradient stability
# TF implementation https://github.com/tensorflow/addons/blob/v0.7.1/tensorflow_addons/losses/focal_loss.py
pred_prob = torch.sigmoid(pred) # prob from logits
p_t = true * pred_prob + (1 - true) * (1 - pred_prob)
alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)
modulating_factor = (1.0 - p_t) ** self.gamma
loss *= alpha_factor * modulating_factor
if object_loss:
if patch:
if self.reduction == "mean":
# return loss.mean()
return loss.sum()/(mask > 0).sum().item()
elif self.reduction == "sum":
return loss.sum()
else:
if self.reduction == "mean":
return loss.mean()
elif self.reduction == "sum":
return loss.sum()
else:
if self.reduction == "mean":
return loss.mean()
elif self.reduction == "sum":
return loss.sum()
return loss
class QFocalLoss(nn.Module):
# Wraps Quality focal loss around existing loss_fcn(), i.e. criteria = FocalLoss(nn.BCEWithLogitsLoss(), gamma=1.5)
def __init__(self, loss_fcn, gamma=1.5, alpha=0.25):
"""Initializes Quality Focal Loss with given loss function, gamma, alpha; modifies reduction to 'none'."""
super().__init__()
self.loss_fcn = loss_fcn # must be nn.BCEWithLogitsLoss()
self.gamma = gamma
self.alpha = alpha
self.reduction = loss_fcn.reduction
self.loss_fcn.reduction = "none" # required to apply FL to each element
def forward(self, pred, true):
"""Computes the focal loss between `pred` and `true` using BCEWithLogitsLoss, adjusting for imbalance with
`gamma` and `alpha`.
"""
loss = self.loss_fcn(pred, true)
pred_prob = torch.sigmoid(pred) # prob from logits
alpha_factor = true * self.alpha + (1 - true) * (1 - self.alpha)
modulating_factor = torch.abs(true - pred_prob) ** self.gamma
loss *= alpha_factor * modulating_factor
if self.reduction == "mean":
return loss.mean()
elif self.reduction == "sum":
return loss.sum()
else: # 'none'
return loss
class ComputeLoss:
sort_obj_iou = False
# Compute losses
def __init__(self, model, autobalance=False):
"""Initializes ComputeLoss with model and autobalance option, autobalances losses if True."""
device = next(model.parameters()).device # get model device
h = model.hyp # hyperparameters
# Define criteria
BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h["cls_pw"]], device=device))
BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h["obj_pw"]], device=device))
# Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
self.cp, self.cn = smooth_BCE(eps=h.get("label_smoothing", 0.0)) # positive, negative BCE targets
# Focal loss
g = h["fl_gamma"] # focal loss gamma
if g > 0:
BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)
m = de_parallel(model).model[-1] # Detect() module
self.balance = {3: [4.0, 1.0, 0.4]}.get(m.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7
self.ssi = list(m.stride).index(16) if autobalance else 0 # stride 16 index
self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance
self.na = m.na # number of anchors
self.nc = m.nc # number of classes
self.nl = m.nl # number of layers
self.anchors = m.anchors
self.stride = m.stride
self.device = device
self.grid = [torch.empty(0) for _ in range(self.nl)] # init grid
self.anchor_grid = [torch.empty(0) for _ in range(self.nl)]
def __call__(self, p, targets, feat,epoch): # predictions, targets
"""Performs forward pass, calculating class, box, and object loss for given predictions and targets."""
epsilon = 1e-12
p_lcls = torch.zeros(1, device=self.device) # class loss
p_lbox = torch.zeros(1, device=self.device) # box loss
p_lobj = torch.zeros(1, device=self.device)
f_lcls = torch.zeros(1, device=self.device) # class loss
f_lbox = torch.zeros(1, device=self.device) # box loss
f_lobj = torch.zeros(1, device=self.device)
# target= targets[:,:6]
# masked_values_patch = torch.zeros(p, dtype=p.dtype, device=self.device)
p_region= torch.cat((targets[:,0:1],targets[:,6:10]),dim=1)
p_n=[]
complete_patch_mask= []
for i in range(self.nl):
selected_ratio=[8,16,32]
# size_ratio=[(32,32),(16,16),(8,8)]
unique_boxes, _ = torch.unique(p_region, dim=0, return_inverse=True)
extracted_slices= []
patch_mask=[]
for l in range(p[i].shape[0]):
_, x1, y1, x2, y2 = unique_boxes[l].int()
x1, x2, y1, y2 = int(x1 / selected_ratio[i]), int(x2 / selected_ratio[i]), int(y1 / selected_ratio[i]), int(y2 / selected_ratio[i])
# Create a mask of zeros
mask = torch.zeros_like(p[i][l])
# Set the specified region to 1
mask[:, y1:y2, x1:x2, :] = 1
# Apply the mask to p[i][l]
masked_p = p[i][l] * mask
# import cv2
# import numpy as np
# roi_aligned_features= masked_p[:,:,:,-1].detach().cpu().numpy()
# roi_aligned_features_numpy = roi_aligned_features
# roi_aligned_features_numpy[roi_aligned_features_numpy < 0] *= -1
# roi_aligned_features_numpy = np.transpose(roi_aligned_features_numpy, (1, 2, 0))
# # Write the NumPy array to an image file using OpenCV
# processed_array = ((roi_aligned_features_numpy) * 255).astype(np.uint8)
# #processed_array = np.squeeze(processed_array)
# # Write the processed image using OpenCV
# save_path = f"{l}_output.jpg"
# cv2.imwrite(save_path, processed_array)
# Extract the masked slice
# extracted_slice = masked_p[:, y1:y2, x1:x2, :]
extracted_slices.append(masked_p.squeeze(0))
patch_mask.append(mask.squeeze(0))
p_n.append(torch.stack(extracted_slices, dim=0))
complete_patch_mask.append(torch.stack(patch_mask, dim=0))
p_tcls, p_tbox, p_indices, p_anchors = self.build_targets(p_n, targets)
p_lbox , p_lobj ,p_lcls, p_bs, t_class_all,p_classes_path= self. loss_com_patch(p_tcls, p_tbox, p_indices, p_anchors,p_n,complete_patch_mask)
pseudo_targets, patch_targets,orignal_targets,pseudo_targets_60_90 = self.pseudo_targets(p, targets)
mean_sim=0
# # similarity_loss_patch_2= self.similarity(feat,pseudo_targets_60_90, patch_targets,orignal_targets,0 )
# # if similarity_loss_patch_2 != torch.zeros(1, device=self.device):
# # similarity_loss_patch_2= similarity_loss_patch_2.unsqueeze(0)
# # mean_sim+=1
if epoch < 20 :
similarity_loss_patch_3= self.similarity(feat,pseudo_targets_60_90, patch_targets,orignal_targets,1 )
if similarity_loss_patch_3 != torch.zeros(1, device=self.device):
similarity_loss_patch_3= similarity_loss_patch_3.unsqueeze(0)
mean_sim+=1
if mean_sim > 0:
similarity_loss_patch= (similarity_loss_patch_3)#+(similarity_loss_patch_3)/mean_sim
else:
similarity_loss_patch= (similarity_loss_patch_3)#+(similarity_loss_patch_3)
if epoch >= 20:
similarity_loss_patch = torch.zeros(1, device=self.device) # class loss
# # similarity_loss_patch_4= self.similarity(feat,pseudo_targets_60_90, patch_targets,orignal_targets,2 )
# # if similarity_loss_patch_4 != torch.zeros(1, device=self.device):
# # similarity_loss_patch_4= similarity_loss_patch_4.unsqueeze(0)
# # mean_sim+=1
if len(pseudo_targets) > 0:
f_tcls, f_tbox, f_indices, f_anchors = self.build_targets(p, pseudo_targets)
f_lbox , f_lobj ,f_lcls, p_classes= self. loss_com_background(f_tcls, f_tbox, f_indices, f_anchors,p)
# i_tcls, i_tbox, i_indices, i_anchors = self.build_targets(p, targets)
# # i_class_all= self.loss_com_image(i_tcls, i_tbox, i_indices, i_anchors,p)
# # kl_loss= ((self.compute_kl_loss(t_class_all,p_classes_path))*0.2) + epsilon
#sim_loss
if len(pseudo_targets) > 0:
lbox= p_lbox + (f_lbox*0.1)
lobj= p_lobj +(f_lobj*0.1)
lcls= (p_lcls)+(f_lcls*0.1)+(similarity_loss_patch*0.1) #+ (kl_loss)
# lcls= similarity_loss_patch
else:
lbox= p_lbox
lobj= p_lobj
lcls= p_lcls+(similarity_loss_patch*0.1)#+kl_loss
if epoch == 29:
lbox= p_lbox
lobj= p_lobj
lcls= p_lcls
# if similarity_loss_patch != torch.zeros(1, device=self.device):
# similarity_loss_patch= similarity_loss_patch.unsqueeze(0)
# return (kl_loss) * p_bs, torch.cat((kl_loss.unsqueeze(0),p_lobj, p_lcls)).detach()
return (lbox + lobj + lcls) * p_bs, torch.cat((p_lbox, p_lobj, p_lcls)).detach()
# return (lbox + lobj + lcls) * p_bs, torch.cat((p_lbox, p_lobj, p_lcls,f_lbox, f_lobj, f_lcls, similarity_loss_patch)).detach()
def loss_com_patch (self,tcls, tbox, indices, anchors,p_n,complete_patch_masks):
lcls = torch.zeros(1, device=self.device) # class loss
lbox = torch.zeros(1, device=self.device) # box loss
lobj = torch.zeros(1, device=self.device) # object loss84653940
# targets
t_classes_path=[]
p_classes_path=[]
# Losses
for i, (pi, pq) in enumerate(zip(p_n, complete_patch_masks)): # layer index, layer predictions
b, a, gj, gi = indices[i] # image, anchor, gridy, gridx
tobj = torch.zeros(pi.shape[:4], dtype=pi.dtype, device=self.device) # target obj
n = b.shape[0] # number of targets
if n:
# pxy, pwh, _, pcls = pi[b, a, gj, gi].tensor_split((2, 4, 5), dim=1) # faster, requires torch 1.8.0
pxy, pwh, _, pcls = pi[b, a, gj, gi].split((2, 2, 1, self.nc), 1) # target-subset of predictions
# Regression
pxy = pxy.sigmoid() * 2 - 0.5
pwh = (pwh.sigmoid() * 2) ** 2 * anchors[i]
pbox = torch.cat((pxy, pwh), 1) # predicted box
iou = bbox_iou(pbox, tbox[i], CIoU=True).squeeze() # iou(prediction, target)
lbox += (1.0 - iou).mean() # iou loss
# Objectness
iou = iou.detach().clamp(0).type(tobj.dtype)
if self.sort_obj_iou:
j = iou.argsort()
b, a, gj, gi, iou = b[j], a[j], gj[j], gi[j], iou[j]
if self.gr < 1:
iou = (1.0 - self.gr) + self.gr * iou
tobj[b, a, gj, gi] = iou # iou ratio
# Classification
if self.nc > 1: # cls loss (only if multiple classes)
t = torch.full_like(pcls, self.cn, device=self.device) # targets
t[range(n), tcls[i]] = self.cp
lcls += self.BCEcls(pcls, t,pcls,object_loss=False, patch= False) # BCE
# t_classes_path.append(t)
# p_classes_path.append(pcls)
# Append targets to text file
# with open('targets.txt', 'a') as file:
# [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]
obji = self.BCEobj(pi[..., 4], tobj,pq[..., 4],object_loss=True,patch= True)
lobj += obji * self.balance[i] # obj loss
if self.autobalance:
self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item()
if self.autobalance:
self.balance = [x / self.balance[self.ssi] for x in self.balance]
lbox *= self.hyp["box"]
lobj *= self.hyp["obj"]
lcls *= self.hyp["cls"]
bs = tobj.shape[0] # batch size
# sim_loss= torch.tensor(0.00, dtype=torch.float16, device='cuda:0')
# kl_loss= torch.tensor(0.00, dtype=torch.float16, device='cuda:0')
# torch.tensor(0.001, dtype=torch.float16, device='cuda:0')
# t_classes_path= torch.cat(t_classes_path)
# p_classes_path=torch.cat(p_classes_path)
return lbox , lobj ,lcls, bs ,t_classes_path,p_classes_path
def loss_com_background (self,tcls, tbox, indices, anchors,p):
lcls = torch.zeros(1, device=self.device) # class loss
lbox = torch.zeros(1, device=self.device) # box loss
lobj = torch.zeros(1, device=self.device) # object loss
p_classes_all=[]
# targets
# gt_path= paths[0].split("/")[-1].split(".")[0]
# gt_part=gt_path.split("_")
# gt_part[3]="1000"
# del gt_part[2]
# gt_path= '_'.join(gt_part)
# x_prediction=np.load(f"100x_GT/{gt_path}.npy", allow_pickle=True)
# Losses
for i, pi in enumerate(p): # layer index, layer predictions
b, a, gj, gi = indices[i] # image, anchor, gridy, gridx
tobj = torch.zeros(pi.shape[:4], dtype=pi.dtype, device=self.device) # target obj
n = b.shape[0] # number of targets
if n:
# pxy, pwh, _, pcls = pi[b, a, gj, gi].tensor_split((2, 4, 5), dim=1) # faster, requires torch 1.8.0
pxy, pwh,pobj, pcls = pi[b, a, gj, gi].split((2, 2, 1, self.nc), 1) # target-subset of predictions
# Regression
pxy = pxy.sigmoid() * 2 - 0.5
pwh = (pwh.sigmoid() * 2) ** 2 * anchors[i]
pbox = torch.cat((pxy, pwh), 1) # predicted box
iou = bbox_iou(pbox, tbox[i], CIoU=True).squeeze() # iou(prediction, target)
lbox += (1.0 - iou).mean() # iou loss
# Objectness
iou = iou.detach().clamp(0).type(tobj.dtype)
if self.sort_obj_iou:
j = iou.argsort()
b, a, gj, gi, iou = b[j], a[j], gj[j], gi[j], iou[j]
if self.gr < 1:
iou = (1.0 - self.gr) + self.gr * iou
tobj[b, a, gj, gi] = iou # iou ratio
# Classification
if self.nc > 1: # cls loss (only if multiple classes)
t = torch.full_like(pcls, self.cn, device=self.device) # targets
t[range(n), tcls[i]] = self.cp
lcls += self.BCEcls(pcls, t ,pcls,object_loss=False,patch= False) # BCE
p_classes_all.append(pcls)
# Append targets to text file
# with open('targets.txt', 'a') as file:
# [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]
# tobj[b, a, gj, gi] = iou # iou ratio
mask = torch.zeros_like(pi[..., 4], dtype=torch.bool)
mask[b, a, gj, gi] = True
# Apply the mask to pi[..., 4]
masked_values = torch.zeros(pi.shape[:4], dtype=pi.dtype, device=self.device)
masked_values[mask] = pi[..., 4][mask]
# masked_values[mask] = pi[..., 4][mask]
obji = self.BCEobj(masked_values, tobj,mask,object_loss=True,patch= False)
# obji*=
lobj += obji * self.balance[i] # obj loss
if self.autobalance:
self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item()
if self.autobalance:
self.balance = [x / self.balance[self.ssi] for x in self.balance]
# lbox *= self.hyp["box"]
# lobj *= self.hyp["obj"]
# lcls *= self.hyp["cls"]
bs = tobj.shape[0] # batch size
# sim_loss= torch.tensor(0.00, dtype=torch.float16, device='cuda:0')
# kl_loss= torch.tensor(0.00, dtype=torch.float16, device='cuda:0')
# torch.tensor(0.001, dtype=torch.float16, device='cuda:0')
p_classes_all= torch.cat(p_classes_all, dim=0)
return lbox , lobj ,lcls, p_classes_all
def loss_com_image (self,tcls, tbox, indices, anchors,p):
image_classes_all=[]
# Losses
for i, pi in enumerate(p): # layer index, layer predictions
b, a, gj, gi = indices[i] # image, anchor, gridy, gridx
# tobj = torch.zeros(pi.shape[:4], dtype=pi.dtype, device=self.device) # target obj
n = b.shape[0] # number of targets
if n:
# pxy, pwh, _, pcls = pi[b, a, gj, gi].tensor_split((2, 4, 5), dim=1) # faster, requires torch 1.8.0
pxy, pwh,pobj, pcls = pi[b, a, gj, gi].split((2, 2, 1, self.nc), 1) # target-subset of predictions
image_classes_all.append(pcls)
image_classes_all= torch.cat(image_classes_all)
return image_classes_all
def build_targets(self, p, target):
"""Prepares model targets from input targets (image,class,x,y,w,h) for loss computation, returning class, box,
indices, and anchors.
"""
targets= target[:,:6]
na, nt = self.na, targets.shape[0] # number of anchors, targets
tcls, tbox, indices, anch = [], [], [], []
gain = torch.ones(7, device=self.device) # normalized to gridspace gain
ai = torch.arange(na, device=self.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt)
targets = torch.cat((targets.repeat(na, 1, 1), ai[..., None]), 2) # append anchor indices
g = 0.5 # bias
off = (
torch.tensor(
[
[0, 0],
[1, 0],
[0, 1],
[-1, 0],
[0, -1], # j,k,l,m
# [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm
],
device=self.device,
).float()
* g
) # offsets
for i in range(self.nl):
anchors, shape = self.anchors[i], p[i].shape
gain[2:6] = torch.tensor(shape)[[3, 2, 3, 2]] # xyxy gain
# Match targets to anchors
t = targets* gain # shape(3,n,7)
if nt:
# Matches
r = t[..., 4:6] / anchors[:, None] # wh ratio
j = torch.max(r, 1 / r).max(2)[0] < self.hyp["anchor_t"] # compare
# j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))
t = t[j] # filter
# Offsets
gxy = t[:, 2:4] # grid xy
gxi = gain[[2, 3]] - gxy # inverse
j, k = ((gxy % 1 < g) & (gxy > 1)).T
l, m = ((gxi % 1 < g) & (gxi > 1)).T
j = torch.stack((torch.ones_like(j), j, k, l, m))
t = t.repeat((5, 1, 1))[j]
offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
else:
t = targets[0]
offsets = 0
# Define
bc, gxy, gwh, a = t.chunk(4, 1) # (image, class), grid xy, grid wh, anchors
a, (b, c) = a.long().view(-1), bc.long().T # anchors, image, class
gij = (gxy - offsets).long()
gi, gj = gij.T # grid indices
# Append
indices.append((b, a, gj.clamp_(0, shape[2] - 1), gi.clamp_(0, shape[3] - 1))) # image, anchor, grid
tbox.append(torch.cat((gxy - gij, gwh), 1)) # box
anch.append(anchors[a]) # anchors
tcls.append(c) # class
return tcls, tbox, indices, anch
def compute_kl_loss(self, tcls, pred):
# pcls_sigmoid = torch.sigmoid(pred)
tcls_sigmoid = tcls
data2 = self.gumbel_softmax(pred, tau=1, hard=False)
# Get the indices of the maximum values along each row
# max_indices = torch.argmax(pcls_sigmoid, dim=1)
# Create a new tensor using these indices
# new_tensor2 = max_indices.device
# new_tensor2= new_tensor2.to(dtype=torch.float)
# new_tensor2.requires_grad_(True)
# Combine tensor values into a single list for both sets
# data1 = []
data1 = [torch.tensor(lst) for lst in tcls]
# Stack tensors along a new dimension (dimension 0 by default)
stacked_data1= torch.stack(data1)
# Perform element-wise sum along dimension 0
data1_counts = torch.sum(stacked_data1, dim=0).detach()
# data1= []
# data2 = pcls_softmax #new_tensor2.cpu().tolist()
# for tensor in tcls_sigmoid:
# data1.extend(tensor.cpu().tolist())
# # for tensord in pcls_sigmoid:
# # data2.extend(tensord.cpu().tolist())
# # Convert to probability distributions
# data1_counts = np.bincount(data1, minlength=14)
# data1_counts = torch.sum(data1, dim=0)
data2_counts = torch.sum(data2, dim=0)
# data2_counts = np.bincount(data2, minlength=14)
epsilon = 1e-12
# Normalize to get probabilities
data1_probs = (data1_counts+epsilon) / sum(data1_counts)
data2_probs = (data2_counts+epsilon) / sum(data2_counts)
# Convert to tensors
data1_probs = torch.tensor(data1_probs, device='cuda:0', dtype=torch.float, requires_grad=True)
# data2_probs = torch.tensor(data2_probs, device='cuda:0', dtype=torch.float, requires_grad=True)
# Compute KL divergence
kl_div = F.kl_div(data1_probs.log(), data2_probs, reduction='batchmean')
return kl_div
def gumbel_softmax(self,logits, tau=1, hard=False, eps=1e-10):
gumbels = -torch.empty_like(logits).exponential_().log() # ~Gumbel(0,1)
gumbels = (logits + gumbels) / tau # ~Gumbel(logits,tau)
y_soft = gumbels.softmax(dim=-1)
if hard:
# Straight through
index = y_soft.max(dim=-1, keepdim=True)[1]
y_hard = torch.zeros_like(logits).scatter_(-1, index, 1.0)
ret = y_hard - y_soft.detach() + y_soft
else:
# Reparameterization trick
ret = y_soft
return ret
def make_grid(self, nx=20, ny=20, i=0):
d = self.anchors[i].device
t = self.anchors[i].dtype
shape = 1, self.na, ny, nx, 2 # grid shape
y, x = torch.arange(ny, device=d, dtype=t), torch.arange(nx, device=d, dtype=t)
yv, xv = torch.meshgrid(y, x, indexing='ij') #if torch_1_10 else torch.meshgrid(y, x) # torch>=0.7 compatibility
grid = torch.stack((xv, yv), 2).expand(shape) - 0.5 # add grid offset, i.e. y = 2.0 * x - 0.5
anchor_grid = (self.anchors[i] * self.stride[i]).view((1, self.na, 1, 1, 2)).expand(shape)
return grid, anchor_grid
def calculate_overlap(self,box1, box2):
"""Calculate the overlap area between two bounding boxes."""
# Extracting coordinates of the intersection rectangl+e
box1=box1*640
box2=box2*640
x_left = torch.max(box1[0] - box1[2] / 2, box2[0] - box2[2] / 2)
y_top = torch.max(box1[1] - box1[3] / 2, box2[1] - box2[3] / 2)
x_right = torch.min(box1[0] + box1[2] / 2, box2[0] + box2[2] / 2)
y_bottom = torch.min(box1[1] + box1[3] / 2, box2[1] + box2[3] / 2)
# Calculate width and height of the intersection rectangle
width = torch.clamp(x_right - x_left, min=0)
height = torch.clamp(y_bottom - y_top, min=0)
# If the intersection is valid (non-negative area), return the area
intersection_area = width * height
return intersection_area
def merge_tensors(self,tensor1, tensor_9):
"""Merge two tensors while keeping all values from tensor1 and discarding overlapped values from tensor2."""
without_patch_tensor = []
patch_tensor= []
tensor2= tensor_9[:,:6]
# Add all bounding boxes from tensor1
# merged_tensor.extend(tensor1)
# Iterate through each bounding box in tensor2
for box2 in tensor2:
overlap = False
# Check for overlap with each bounding box in tensor1
for box1 in tensor1:
if box1[0] == box2[0]:
if self.calculate_overlap(box1[2:], box2[2:]) > 100:
overlap = True
break
# If there's no overlap, add the bounding box from tensor2
if overlap:
patch_tensor.append(box2)
if not overlap:
without_patch_tensor.append(box2)
if without_patch_tensor:
without_patch_tensor = torch.stack(without_patch_tensor)
without_patch_tensor = without_patch_tensor[without_patch_tensor[:, 0].argsort()]
# else:
# merged_tensor= torch.tensor(merged_tensor).device
return without_patch_tensor,patch_tensor
def pseudo_targets(self, p, target):
"""Prepares model targets from input targets (image,class,x,y,w,h) for loss computation, returning class, box,
indices, and anchors.
"""
# targets= target[:,:6]
# p_region= torch.cat((target[:,0:1],target[:,6:10]),dim=1)
# p_n=[]
# for i in range(self.nl):
# selected_ratio=[8,16,32]
# size_ratio=[(32,32),(16,16),(8,8)]
# unique_boxes, _ = torch.unique(p_region, dim=0, return_inverse=True)
# extracted_slices= []
# for l in range(p[i].shape[0]):
# _, x1, y1, x2, y2 = unique_boxes[l].int()
# x1, x2, y1, y2 = int(x1 / selected_ratio[i]), int(x2 / selected_ratio[i]), int(y1 / selected_ratio[i]), int(y2 / selected_ratio[i])
# # Create a mask of zeros
# mask = torch.ones_like(p[i][l])
# # Set the specified region to 1
# mask[:, y1:y2, x1:x2, :] = 0
# # Apply the mask to p[i][l]
# masked_p = p[i][l] * mask
# # Extract the masked slice
# # extracted_slice = masked_p[:, y1:y2, x1:x2, :]
# extracted_slices.append(masked_p.squeeze(0))
# p_n.append(torch.stack(extracted_slices, dim=0))
z=[]
p_clone= p.copy()
for i in range(self.nl):
bs, self.na, ny, nx, self.no = p_clone[i].shape
xy, wh, conf = p_clone[i].sigmoid().split((2, 2, self.nc + 1), 4)
self.grid[i], self.anchor_grid[i] = self.make_grid(nx, ny, i)
xy = (xy * 2 + self.grid[i]) * self.stride[i] # xy
wh = (wh * 2) ** 2 * self.anchor_grid[i] # wh
y = torch.cat((xy, wh, conf), 4)
z.append(y.view(bs, self.na * nx * ny, self.no))
z_new= torch.cat(z, 1)
lb = [targets_lb[targets_lb[:, 0] == i, 1:] for i in range(range(p_clone[0].shape[0]))] if False else []
train_preds = non_max_suppression_ps(
z_new.detach().cpu(), 0.5, 0.2, labels=lb, multi_label=False, agnostic=True, max_det=300
)
# del sgrid
# del anchor_grid
train_pseudo_labels = [torch.tensor([]) for _ in range(len(train_preds))]
train_pseudo_labels_60_90 = [torch.tensor([]) for _ in range(len(train_preds))]
for num, preds in enumerate(train_preds):
for bbox in preds:
# print((bbox[2] - bbox[0]) * (bbox[3] - bbox[1]))
if ((bbox[2] - bbox[0]) * (bbox[3] - bbox[1])) > 10:
# print(bbox[4])
if bbox[4] > 0.95:
last_14_values = bbox[-14:].softmax(dim=0)
# Normalize the values to make them probabilities
probs = last_14_values / last_14_values.sum()
# Create a categorical distribution
dist = torch.distributions.Categorical(probs)
# Calculate entropy
entropy = dist.entropy()
# print(entropy)
if entropy.item() < 2.7:
bbox_tensor = torch.tensor(bbox).unsqueeze(0) # Convert bbox to tensor and add a batch dimension
train_pseudo_labels[num] = torch.cat((train_pseudo_labels[num], bbox_tensor), dim=0)
# else:
elif 0.60 < bbox[4] <= 0.95:
bbox_tensor = torch.tensor(bbox).unsqueeze(0)
train_pseudo_labels_60_90[num] = torch.cat((train_pseudo_labels_60_90[num], bbox_tensor), dim=0)
# print(entropy)
train_pseudo_box = [([]) for _ in range(len(train_pseudo_labels)) ]
train_pseudo_box_60_90 = [[] for _ in range(len(train_pseudo_labels_60_90))]
for num2, pred_box in enumerate(train_pseudo_labels):
for bbox in pred_box:
# train_pseudo_box[num2].append(bbox[:4].detach().cpu().numpy())
train_pseudo_box[num2].append(bbox.detach().cpu().numpy())
for num2, pred_box in enumerate(train_pseudo_labels_60_90):
for bbox in pred_box:
train_pseudo_box_60_90[num2].append(bbox.detach().cpu().numpy())
pesudo_target_list = []
pesudo_target_list_60_90 = []
for i, tensor in enumerate(train_pseudo_labels):
for t in tensor:
tensor_values = [
i,
t[5].item(), # [6]
torch.tensor(int(t[0].item())), # [0]
torch.tensor(int(t[1].item())), # [1]
torch.tensor(int(t[2].item())), # [2]
torch.tensor(int(t[3].item())) # [3]
]
pesudo_target_list.append(tensor_values)
for i, tensor in enumerate(train_pseudo_labels_60_90):
for t in tensor:
tensor_values = [
i,
t[5].item(),
torch.tensor(int(t[0].item())),
torch.tensor(int(t[1].item())),
torch.tensor(int(t[2].item())),
torch.tensor(int(t[3].item()))
]
pesudo_target_list_60_90.append(tensor_values)
# Convert the inner lists to tensors
pesudo_target_list = [torch.tensor(tensor_values) for tensor_values in pesudo_target_list]
pesudo_target_list_60_90 = [torch.tensor(tensor_values) for tensor_values in pesudo_target_list_60_90]
targets_ps= target[:,:6]
# optimizer_cell_model.zero_grad(
# Stack the tensors along a new dimension (dimension 0 in this example)
wp_target=[]
p_target=[]
wp_target_60_90=[]
if pesudo_target_list:
pesudo_target_list_concatenated = torch.stack(pesudo_target_list, dim=0)
pesudo_target_list_concatenated= pesudo_target_list_concatenated/torch.tensor([1,1,640,640,640,640])
x_min, y_min, x_max, y_max = pesudo_target_list_concatenated[:, 2], pesudo_target_list_concatenated[:, 3], pesudo_target_list_concatenated[:, 4], pesudo_target_list_concatenated[:, 5]
# Calculating x_center, y_center, width, height
x_center = (x_min + x_max) / 2
y_center = (y_min + y_max) / 2
width = x_max - x_min
height = y_max - y_min
# Updating the tensor with new values
pesudo_target_list_concatenated[:, 2] = x_center
pesudo_target_list_concatenated[:, 3] = y_center
pesudo_target_list_concatenated[:, 4] = width
pesudo_target_list_concatenated[:, 5] = height
pesudo_target_list_concatenated = pesudo_target_list_concatenated.to(target.device)
wp_target,p_target = self.merge_tensors(targets_ps,pesudo_target_list_concatenated)
if pesudo_target_list_60_90:
pesudo_target_list_concatenated_60_90 = torch.stack(pesudo_target_list_60_90, dim=0)
pesudo_target_list_concatenated_60_90 = pesudo_target_list_concatenated_60_90 / torch.tensor([1, 1, 640, 640, 640, 640])
x_min, y_min, x_max, y_max = pesudo_target_list_concatenated_60_90[:, 2], pesudo_target_list_concatenated_60_90[:, 3], pesudo_target_list_concatenated_60_90[:, 4], pesudo_target_list_concatenated_60_90[:, 5]
# Calculating x_center, y_center, width, height
x_center = (x_min + x_max) / 2
y_center = (y_min + y_max) / 2
width = x_max - x_min
height = y_max - y_min
# Updating the tensor with new values
pesudo_target_list_concatenated_60_90[:, 2] = x_center
pesudo_target_list_concatenated_60_90[:, 3] = y_center
pesudo_target_list_concatenated_60_90[:, 4] = width
pesudo_target_list_concatenated_60_90[:, 5] = height
pesudo_target_list_concatenated_60_90 = pesudo_target_list_concatenated_60_90.to(target.device)
wp_target_60_90,p_target_60_90 = self.merge_tensors(targets_ps,pesudo_target_list_concatenated_60_90)
# else:
return wp_target, p_target, targets_ps,wp_target_60_90
def calculate_iou(self,bbox1, bbox2):
"""
Calculate Intersection over Union (IoU) between two bounding boxes.
Arguments:
bbox1 (tuple): Coordinates of the first bounding box in the format (x1, y1, x2, y2).
bbox2 (tuple): Coordinates of the second bounding box in the format (x1, y1, x2, y2).
Returns:
float: Intersection over Union (IoU) between the two bounding boxes.
"""
# Extract coordinates of the bounding boxes
x1_1, y1_1, x2_1, y2_1 = bbox1
x1_2, y1_2, x2_2, y2_2 = bbox2
# Calculate the coordinates of the intersection rectangle
x_left = max(x1_1, x1_2)
y_top = max(y1_1, y1_2)
x_right = min(x2_1, x2_2)
y_bottom = min(y2_1, y2_2)
# If there's no intersection, return 0
if x_right < x_left or y_bottom < y_top:
return 0.0
# Calculate the area of intersection rectangle
intersection_area = (x_right - x_left) * (y_bottom - y_top)
# Calculate the area of both bounding boxes
bbox1_area = (x2_1 - x1_1) * (y2_1 - y1_1)
bbox2_area = (x2_2 - x1_2) * (y2_2 - y1_2)
# Calculate the area of union
union_area = bbox1_area + bbox2_area - intersection_area
# Calculate IoU
iou = intersection_area / union_area
return iou
def xywh_to_xyxy(self,xywh):
x_center, y_center, width, height = xywh
x_min = x_center - width / 2
y_min = y_center - height / 2
x_max = x_center + width / 2
y_max = y_center + height / 2
return torch.tensor([x_min, y_min, x_max, y_max])
def compute_similarity(self,feature_maps):
num_samples = len(feature_maps)
similarity_matrix = torch.zeros((num_samples, num_samples))
# Apply average pooling to each feature map
pooled_feature_maps = [F.avg_pool2d(fm, fm.shape[2]) for fm in feature_maps]
for i in range(num_samples):
for j in range(num_samples):
# Calculate cosine similarity between pooled feature maps
similarity_matrix[i, j] = F.cosine_similarity(pooled_feature_maps[i].view(-1), pooled_feature_maps[j].view(-1), dim=0)
return similarity_matrix
def feat_box(self,pseudo_targets,int_feat, Num_targets,p_layer ):
pooled_feature_map_pseudo= []
is_bbox=[]
for i in range(Num_targets):
img_num = int(pseudo_targets[i,0].item())
p2_feature_map =int_feat[p_layer ][img_num]#pred[0][img_num][:,:,:,-1]# imgs[img_num]
x_center = pseudo_targets[i, 2]
y_center = pseudo_targets[i, 3]
width = pseudo_targets[i, 4]
height = pseudo_targets[i, 5]
bb = [round(x_center.item(),4), round(y_center.item(),4), round(width.item(),4), round(height.item(),4)]
p2_feature_shape_tensor = torch.tensor([p2_feature_map.shape[2], p2_feature_map.shape[1],p2_feature_map.shape[2],p2_feature_map.shape[1]]) # reduce_channels_layer = torch.nn.Conv2d(1280, 250, kernel_size=1).to(device)
p2_normalized_xyxy = self.xywh_to_xyxy(bb)*p2_feature_shape_tensor #imgs.shape[2]
p2_x_min, p2_y_min, p2_x_max, p2_y_max = get_fixed_xyxy(p2_normalized_xyxy,p2_feature_map)
batch_index = torch.tensor([0], dtype=torch.float32).to(self.device)
p2_roi = torch.tensor([p2_x_min, p2_y_min, p2_x_max, p2_y_max], device=self.device).float()
is_bbox.append(p2_roi)
# Concatenate the batch index to the bounding box coordinates
p2_roi_with_batch_index = torch.cat([batch_index, p2_roi])
# relevant_feature_map = p3_feature_map.unsqueeze(0)[:, :, y_min:y_max, x_min:x_max]
p2_resized_object = torchvision.ops.roi_align(p2_feature_map.unsqueeze(0), p2_roi_with_batch_index.unsqueeze(0).to(self.device), output_size=(4, 4))
pooled_feature_map_pseudo.append(p2_resized_object)
return pooled_feature_map_pseudo, is_bbox
def similarity(self,int_feat,pseudo_targets, patch_targets, orignal_targets,p_layer):
losses = []
Num_targets = len(pseudo_targets)
Num_targets_label = len(orignal_targets)
target_featuers, target_box= self.feat_box(orignal_targets,int_feat, Num_targets_label,p_layer)
# for feat2, label2, is_bb in zip(target_featuers, orignal_targets[:,1],target_box):
# similarity=0
# for feat1, label1 , ps_bb in zip( target_featuers, orignal_targets[:,1],target_box ):
# iou= self.calculate_iou(ps_bb,is_bb)
# if iou < 0.3:
# feat2 = F.avg_pool2d(feat2, feat2.shape[2])
# if label1 == label2:
# feat1 = F.avg_pool2d(feat1, feat1.shape[2])
# similarity = cosine_similarity(feat1,feat2).mean()
# # similarity=(similarity+1)/2
# losses.append(1-similarity)
# if label1 != label2:
# feat1 = F.avg_pool2d(feat1, feat1.shape[2])
# similarity = cosine_similarity(feat1,feat2).mean()
# similarity=(similarity+1)/2
# alpha= 0.2
# torch.max(torch.tensor(0.0), similarity )
# losses.append(similarity)
# # Calculate similarity between features (e.g., cosine similari
if Num_targets :
pseudo_featuers,pseudo_box= self.feat_box(pseudo_targets,int_feat, Num_targets,p_layer )
for feat2, label2, is_bb in zip(pseudo_featuers, pseudo_targets[:,1],pseudo_box):
#sim=-10
min_same_class_sim=100
max_diff_class_sim= 0
for feat1, label1 , ps_bb in zip( target_featuers, orignal_targets[:,1],target_box ):
iou= self.calculate_iou(ps_bb,is_bb)
feat2 = F.avg_pool2d(feat2, feat2.shape[2])
feat1 = F.avg_pool2d(feat1, feat1.shape[2])
if iou < 0.3:
# Calculate similarity between features (e.g., cosine similarity)
similarity = cosine_similarity(feat1,feat2).mean()
similarity=(similarity+1) # Normalize
if label1 == label2:
if min_same_class_sim > similarity:
min_same_class_sim = similarity
else:
if max_diff_class_sim < similarity:
max_diff_class_sim = similarity
# max_similarity=(max_similarity+1)/2
# Compare labels
if min_same_class_sim < 100 and max_diff_class_sim > 0:
loss_value = torch.max(torch.tensor(0.0), min_same_class_sim - (max_diff_class_sim+0.05))
losses.append(loss_value)
# Compute loss (e.g., squared difference)
# loss_sim = torch.mean(torch.abs(torch.tensor(feat1) - torch.tensor(feat2)))
# losses.append(max_similarity)
# elif ((max_similarity < 0.30) and (max_label1 == max_label2.item())):
# # Compute loss (e.g., squared difference)
# # loss_sim = torch.mean(torch.abs(torch.tensor(feat1) - torch.tensor(feat2)))
# max_similarity=(max_similarity+1)/2
# losses.append(1-max_similarity)
if losses:
total_loss = (torch.sum(torch.stack(losses))/len(losses) )
else:
total_loss= torch.zeros(1, device=self.device)
return(total_loss)
# for feat2, label2, is_bb in zip(pseudo_featuers, pseudo_targets[:,1],pseudo_box):
# sim=-10
# for feat1_gr, label1 , ps_bb in zip( target_featuers, orignal_targets[:,1],target_box ):
# feat1 = feat1_gr.clone().detach()
# iou= self.calculate_iou(ps_bb,is_bb)
# feat2 = F.avg_pool2d(feat2, feat2.shape[2])
# feat1 = F.avg_pool2d(feat1, feat1.shape[2])
# if iou < 0.3:
# # Calculate similarity between features (e.g., cosine similarity)
# similarity = cosine_similarity(feat1,feat2).mean()
# if similarity > sim:
# sim=similarity
# max_similarity = similarity
# max_label1 =label1
# max_label2= label2
# # max_similarity=(max_similarity+1)/2
# # Compare labels
# if ((max_similarity > 0.95) and (max_label1 != max_label2)):
# # Compute loss (e.g., squared difference)
# # loss_sim = torch.mean(torch.abs(torch.tensor(feat1) - torch.tensor(feat2)))
# losses.append(max_similarity)
# elif ((max_similarity < 0.50) and (max_label1 == max_label2.item())):
# # Compute loss (e.g., squared difference)
# # loss_sim = torch.mean(torch.abs(torch.tensor(feat1) - torch.tensor(feat2)))
# max_similarity=(max_similarity+1)/2
# losses.append(1-max_similarity)
# if losses:
# total_loss = (torch.sum(torch.stack(losses))/len(losses) )
# else:
# total_loss= torch.zeros(1, device=self.device)
# return(total_loss)