使用深度學習進行圖像類任務時,通常網絡的輸入大小是固定的,最近在進行涉及到文字檢測的工作中,由于預處理resize縮小了原圖,導致字體變模糊,從而檢測失敗,后來想到使用overlap來對圖像進行縮放裁剪,即先將原圖縮放到一定尺寸,再裁剪得到網絡的輸入。
好了,來說正題,使用yolov3,網絡的輸入是352x352x3,而輸入圖像大小為幾百上千不等,因此需對原圖進行resize,起初直接進行縮放 + 填充,檢測的map很低,后來分析發現有些352x352的輸入圖像中的文字已經很模糊,因此直接縮放的方案不可行,改進后方案如下:
- 原圖最大尺寸大于1000,則resize到800x800,再裁剪為9個352x352,overlap為128個像素
- 原圖最大尺寸大小500且小于1000,則resize到600x600,再裁剪為4個352x352,overlap為96個像素
- 原圖最大尺寸小于500,則resize到352x352。
python實現代碼如下,使用了PIL、opencv庫,將整個目錄下的圖像全部做縮放裁剪處理,代碼包含如下功能:
- 遍歷某一目錄的文件
- opencv進行圖像載入及保存
- opencv進行縮放裁剪
- PIL進行圖像顯示
import numpy as np
from PIL import Image
import cv2
import os
#輸入bgr通道,并顯示圖像
def img_show(img):
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) #生成numpy數組
print(type(img_rgb), img_rgb.shape)
pil_img = Image.fromarray(img_rgb)
pil_img.show()
#完成圖像的等比縮放及黑色填充
def img_resize(cvImageSrc, net_size, width, height):
if (width != net_size or height != net_size):
#寬高的縮放比非常接近時直接進行縮放
if ((net_size / height - net_size / height) < 0.001):
det_mat = cv2.resize(cvImageSrc, (net_size, net_size))
return det_mat
else:
new_w = width
new_h = height
if (net_size / width < net_size / height):
new_w = net_size
new_h = max(1, (height * net_size) / width)
else:
new_h = net_size
new_w = max(1, (width * net_size) / height)
det_mat = np.zeros((net_size, net_size, 3), dtype="uint8")
if (new_w == width and new_h == height):
cvImageSrc.copyTo(det_mat)
else:
net_w = int(new_w + 0.5)
net_h = int(new_h + 0.5)
if (net_w % 2 == 1):
net_w = net_w - 1
if (net_h % 2 == 1):
net_h = net_h - 1
#print(net_w, net_h)
det_matROI = cv2.resize(cvImageSrc, (net_w, net_h))
base_w = int((net_size - new_w) / 2 + 0.5)
base_h = int((net_size - new_h) / 2 + 0.5)
#print(base_h, base_w)
for c in range(3):
for j in range(net_h):
for i in range(net_w):
#print(c, j, i)
det_mat[j + base_h - 1, i + base_w - 1, :] = det_matROI[j - 1, i - 1, :]
else:
det_mat = cvImageSrc
return det_mat
baseRoot = "/Users/lemonhe/Documents/CNN/dataset/01-data/"
rootdir = baseRoot + "dataset_test"
list = os.listdir(rootdir) #列出文件夾下所有的目錄與文件
print(len(list))
count = 0
threshold1 = 1000
threshold2 = 500
for i in range(0,len(list)):
path = os.path.join(rootdir, list[i])
print(path)
if os.path.isfile(path):
img = cv2.imread(path)
if(img is None):
print("this is nonetype")
else:
height, width, channel = img.shape #獲取圖像信息
print(height, width, channel)
max_dim = max(height, width)
#img_show(img)
if(max_dim > threshold1):
det_mat = img_resize(img, 800, width, height)
#img_show(det_mat)
img11 = np.uint8(det_mat[0:352, 0:352, :])
img12 = np.uint8(det_mat[0:352, 223:575, :])
img13 = np.uint8(det_mat[0:352, 447:799, :])
img21 = np.uint8(det_mat[223:575, 0:352, :])
img22 = np.uint8(det_mat[223:575, 223:575, :])
img23 = np.uint8(det_mat[223:575, 447:799, :])
img31 = np.uint8(det_mat[447:799, 0:352, :])
img32 = np.uint8(det_mat[447:799, 223:575, :])
img33 = np.uint8(det_mat[447:799, 447:799, :])
#print(img13.shape)
path11 = baseRoot + "test1/img" + str(count) + "_11.jpg"
path12 = baseRoot + "test1/img" + str(count) + "_12.jpg"
path13 = baseRoot + "test1/img" + str(count) + "_13.jpg"
path21 = baseRoot + "test1/img" + str(count) + "_21.jpg"
path22 = baseRoot + "test1/img" + str(count) + "_22.jpg"
path23 = baseRoot + "test1/img" + str(count) + "_23.jpg"
path31 = baseRoot + "test1/img" + str(count) + "_31.jpg"
path32 = baseRoot + "test1/img" + str(count) + "_32.jpg"
path33 = baseRoot + "test1/img" + str(count) + "_33.jpg"
cv2.imwrite(path11, img11)
cv2.imwrite(path12, img12)
cv2.imwrite(path13, img13)
cv2.imwrite(path21, img21)
cv2.imwrite(path22, img22)
cv2.imwrite(path23, img23)
cv2.imwrite(path31, img31)
cv2.imwrite(path32, img32)
cv2.imwrite(path33, img33)
elif(max_dim > threshold2):
det_mat = img_resize(img, 608, width, height)
img11 = np.uint8(det_mat[0:352, 0:352, :])
img12 = np.uint8(det_mat[0:352, 255:607, :])
img21 = np.uint8(det_mat[255:607, 0:352, :])
img22 = np.uint8(det_mat[255:607, 255:607, :])
#img_show(img11)
#img_show(img12)
#img_show(img21)net_size
#img_show(img22)
path11 = baseRoot + "test1/img" + str(count) + "_11.jpg"
path12 = baseRoot + "test1/img" + str(count) + "_12.jpg"
path21 = baseRoot + "test1/img" + str(count) + "_21.jpg"
path22 = baseRoot + "test1/img" + str(count) + "_22.jpg"
cv2.imwrite(path11, img11)
cv2.imwrite(path12, img12)
cv2.imwrite(path21, img21)
cv2.imwrite(path22, img22)
else:
det_mat = img_resize(img, 352, width, height)
img_show(det_mat)
path_352 = baseRoot + "test1/img" + str(count) + ".jpg"
cv2.imwrite(path_352, np.uint8(det_mat))
count = count + 1
print(count)
執行python腳本,結果如下:
輸入圖片如下,分辨率為719x1280,裁剪后輸出9幅352x352的子圖,這樣就完成了圖像的預處理。
更多文章、技術交流、商務合作、聯系博主
微信掃碼或搜索:z360901061

微信掃一掃加我為好友
QQ號聯系: 360901061
您的支持是博主寫作最大的動力,如果您喜歡我的文章,感覺我的文章對您有幫助,請用微信掃描下面二維碼支持博主2元、5元、10元、20元等您想捐的金額吧,狠狠點擊下面給點支持吧,站長非常感激您!手機微信長按不能支付解決辦法:請將微信支付二維碼保存到相冊,切換到微信,然后點擊微信右上角掃一掃功能,選擇支付二維碼完成支付。
【本文對您有幫助就好】元
