yolov4目标检测算法,

  yolov4目标检测算法,

  本文主要介绍python目标检测YoloV4中的镶嵌数据增强方法。有需要的朋友可以借鉴一下,希望能有所帮助。祝大家进步很大,早日升职加薪。

  00-1010镶嵌数据增强方法的实现思路是什么?所有代码

  

目录

  Yolov4的马赛克数据增强参考的是CutMix数据增强方法,理论上差不多!

  CutMix数据增强方法使用两个图片来拼接。

  但是马赛克用了四张图片。根据论文,它有一个巨大的优势,丰富了检测对象的背景!而且在计算BN的时候,会一次性计算出四张图片的数据!比如下图:

  

什么是Mosaic数据增强方法

  1.一次看四张图。

  2.分别翻转、缩放、改变四张图片的色域,四个方向排列。

  3.组合图片和方框。

  

实现思路

  所有代码组成如下:

  从PIL导入图像,ImageDraw

  将numpy作为np导入

  从matplotlib.colors导入rgb_to_hsv,hsv_to_rgb

  导入数学

  定义兰德(a=0,b=1):

  return np.random.rand()*(b-a) a

  定义合并_bboxes(bboxes,cutx,cuty):

  merge_bbox=[]

  对于范围内的I(len(bboxes)):

  用于bbox[I]:中的箱子

  tmp_box=[]

  x1,y1,x2,y2=框[0],框[1],框[2],框[3]

  如果i==0:

  如果y1切割或x1切割x:

  继续

  如果y2=cuty且y1=cuty:

  y2=cuty

  如果y2-y1 5:

  继续

  如果x2=cutx且x1=cutx:

  x2=cutx

  如果x2-x1 5:

  继续

  如果i==1:

  如果y2切割或x1切割x:

  继续

  如果y2=cuty且y1=cuty:

  y1=cuty

  如果y2-y1 5:

  继续

  如果x2=cutx且x1=cutx:

  x2=cutx

  如果x2-x1 5:

  继续

  if i == 2:

   if y2 < cuty or x2 < cutx:

   continue

   if y2 >= cuty and y1 <= cuty:

   y1 = cuty

   if y2-y1 < 5:

   continue

   if x2 >= cutx and x1 <= cutx:

   x1 = cutx

   if x2-x1 < 5:

   continue

   if i == 3:

   if y1 > cuty or x2 < cutx:

   continue

   if y2 >= cuty and y1 <= cuty:

   y2 = cuty

   if y2-y1 < 5:

   continue

   if x2 >= cutx and x1 <= cutx:

   x1 = cutx

   if x2-x1 < 5:

   continue

   tmp_box.append(x1)

   tmp_box.append(y1)

   tmp_box.append(x2)

   tmp_box.append(y2)

   tmp_box.append(box[-1])

   merge_bbox.append(tmp_box)

   return merge_bbox

  def get_random_data(annotation_line, input_shape, random=True, hue=.1, sat=1.5, val=1.5, proc_img=True):

   random preprocessing for real-time data augmentation

   h, w = input_shape

   min_offset_x = 0.4

   min_offset_y = 0.4

   scale_low = 1-min(min_offset_x,min_offset_y)

   scale_high = scale_low+0.2

   image_datas = []

   box_datas = []

   index = 0

   place_x = [0,0,int(w*min_offset_x),int(w*min_offset_x)]

   place_y = [0,int(h*min_offset_y),int(w*min_offset_y),0]

   for line in annotation_line:

   # 每一行进行分割

   line_content = line.split()

   # 打开图片

   image = Image.open(line_content[0])

   image = image.convert("RGB")

   # 图片的大小

   iw, ih = image.size

   # 保存框的位置

   box = np.array([np.array(list(map(int,box.split(,)))) for box in line_content[1:]])

   # image.save(str(index)+".jpg")

   # 是否翻转图片

   flip = rand()<.5

   if flip and len(box)>0:

   image = image.transpose(Image.FLIP_LEFT_RIGHT)

   box[:, [0,2]] = iw - box[:, [2,0]]

   # 对输入进来的图片进行缩放

   new_ar = w/h

   scale = rand(scale_low, scale_high)

   if new_ar < 1:

   nh = int(scale*h)

   nw = int(nh*new_ar)

   else:

   nw = int(scale*w)

   nh = int(nw/new_ar)

   image = image.resize((nw,nh), Image.BICUBIC)

   # 进行色域变换

   hue = rand(-hue, hue)

   sat = rand(1, sat) if rand()<.5 else 1/rand(1, sat)

   val = rand(1, val) if rand()<.5 else 1/rand(1, val)

   x = rgb_to_hsv(np.array(image)/255.)

   x[..., 0] += hue

   x[..., 0][x[..., 0]>1] -= 1

   x[..., 0][x[..., 0]<0] += 1

   x[..., 1] *= sat

   x[..., 2] *= val

   x[x>1] = 1

   x[x<0] = 0

   image = hsv_to_rgb(x)

   image = Image.fromarray((image*255).astype(np.uint8))

   # 将图片进行放置,分别对应四张分割图片的位置

   dx = place_x[index]

   dy = place_y[index]

   new_image = Image.new(RGB, (w,h), (128,128,128))

   new_image.paste(image, (dx, dy))

   image_data = np.array(new_image)/255

   # Image.fromarray((image_data*255).astype(np.uint8)).save(str(index)+"distort.jpg")

   index = index + 1

   box_data = []

   # 对box进行重新处理

   if len(box)>0:

   np.random.shuffle(box)

   box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx

   box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy

   box[:, 0:2][box[:, 0:2]<0] = 0

   box[:, 2][box[:, 2]>w] = w

   box[:, 3][box[:, 3]>h] = h

   box_w = box[:, 2] - box[:, 0]

   box_h = box[:, 3] - box[:, 1]

   box = box[np.logical_and(box_w>1, box_h>1)]

   box_data = np.zeros((len(box),5))

   box_data[:len(box)] = box

   image_datas.append(image_data)

   box_datas.append(box_data)

   img = Image.fromarray((image_data*255).astype(np.uint8))

   for j in range(len(box_data)):

   thickness = 3

   left, top, right, bottom = box_data[j][0:4]

   draw = ImageDraw.Draw(img)

   for i in range(thickness):

   draw.rectangle([left + i, top + i, right - i, bottom - i],outline=(255,255,255))

   img.show()

   # 将图片分割,放在一起

   cutx = np.random.randint(int(w*min_offset_x), int(w*(1 - min_offset_x)))

   cuty = np.random.randint(int(h*min_offset_y), int(h*(1 - min_offset_y)))

   new_image = np.zeros([h,w,3])

   new_image[:cuty, :cutx, :] = image_datas[0][:cuty, :cutx, :]

   new_image[cuty:, :cutx, :] = image_datas[1][cuty:, :cutx, :]

   new_image[cuty:, cutx:, :] = image_datas[2][cuty:, cutx:, :]

   new_image[:cuty, cutx:, :] = image_datas[3][:cuty, cutx:, :]

   # 对框进行进一步的处理

   new_boxes = merge_bboxes(box_datas, cutx, cuty)

   return new_image, new_boxes

  def normal_(annotation_line, input_shape):

   random preprocessing for real-time data augmentation

   line = annotation_line.split()

   image = Image.open(line[0])

   box = np.array([np.array(list(map(int,box.split(,)))) for box in line[1:]])

   iw, ih = image.size

   image = image.transpose(Image.FLIP_LEFT_RIGHT)

   box[:, [0,2]] = iw - box[:, [2,0]]

   return image, box

  if __name__ == "__main__":

   with open("2007_train.txt") as f:

   lines = f.readlines()

   a = np.random.randint(0,len(lines))

   # index = 0

   # line_all = lines[a:a+4]

   # for line in line_all:

   # image_data, box_data = normal_(line,[416,416])

   # img = image_data

   # for j in range(len(box_data)):

   # thickness = 3

   # left, top, right, bottom = box_data[j][0:4]

   # draw = ImageDraw.Draw(img)

   # for i in range(thickness):

   # draw.rectangle([left + i, top + i, right - i, bottom - i],outline=(255,255,255))

   # img.show()

   # # img.save(str(index)+"box.jpg")

   # index = index+1

   line = lines[a:a+4]

   image_data, box_data = get_random_data(line,[416,416])

   img = Image.fromarray((image_data*255).astype(np.uint8))

   for j in range(len(box_data)):

   thickness = 3

   left, top, right, bottom = box_data[j][0:4]

   draw = ImageDraw.Draw(img)

   for i in range(thickness):

   draw.rectangle([left + i, top + i, right - i, bottom - i],outline=(255,255,255))

   img.show()

   # img.save("box_all.jpg")

  

  以上就是python目标检测YoloV4当中的Mosaic数据增强方法的详细内容,更多关于YoloV4Mosaic数据增强的资料请关注盛行IT软件开发工作室其它相关文章!

郑重声明:本文由网友发布,不代表盛行IT的观点,版权归原作者所有,仅为传播更多信息之目的,如有侵权请联系,我们将第一时间修改或删除,多谢。

留言与评论(共有 条评论)
   
验证码: