學習後感覺的整體感覺:內容安排非常緊湊, 課件內容很準確,作業有針對性,比賽題目比較難。
下面從內容上的回顧一下課程內容:
首先,小白需要自學預習課(不過這部分內容在day5發不出來,稍微有點晚)。預習課主要熟悉Python語言,Paddle基本使用,NoteBook使用及深度學習基礎等。這一部分重點是熟悉Python語法和工具使用。學習內容中提供的深度學習基本概念和數學概念,基本上可以略過(數學部分只是把涉及的數學公式做了列舉,沒有展開;深度學習在day1和day2都有講解)。通過MINST和波士頓房價案例,可以熟悉Paddle一般過程,不過該過程屬於靜態模型,大概瀏覽即可。
Day01 新冠疫情可視化
主要學習數據爬取和PyEchart的使用。該部分沒有涉及深度學習內容。
Day02 手勢識別
熟悉數據處理和DNN書寫訓練,目標手勢識別 分類問題,這一天算是正式開始接觸深度學習,基本是使用全連接,還不能算是卷積網絡。這部分我覺得應該掌握動圖下設計的基本程序結構,大體上分為6步。
(1)準備訓練和測試數據
該部分主要學習訓練數據和測試數據的劃分方法。
<code># 生成圖像列表 data_path ='/home/aistudio/data/data23668/Dataset'
character_folders =os
.listdir(data_path) #if
(os
.path
.exists('./train_data.txt'
)):os
.remove
('./train_data.txt'
)if
(os
.path
.exists('./test_data.txt'
)):os
.remove
('./test_data.txt'
)for
character_folderin
character_folders: withopen
('./train_data.txt'
,'a'
) as f_train: withopen
('./test_data.txt'
,'a'
) as f_test:if
character_folder =='.DS_Store'
: continue character_imgs =os
.listdir(os
.path
.join(data_path,character_folder)) count =0
for
imgin
character_imgs:if
img =='.DS_Store'
: continueif
count%10
==0
: #控制訓練數據和測試數據比例 #形成“路徑 -t 標籤”格式 f_test.write
(os
.path
.join(data_path,character_folder,img) +'\t'
+ character_folder +'\n'
)else
: f_train.write
(os
.path
.join(data_path,character_folder,img) +'\t'
+ character_folder +'\n'
) count +=1
'列表已生成'
)/<code>
(2)數據讀取器
<code>def
data_mapper
(sample)
: img, label = sample img = Image.open(img) img = img.resize((100
,100
), Image.ANTIALIAS) img = np.array(img).astype('float32'
) img = img.transpose((2
,0
,1
)) img = img/255.0
return
img, labeldef
data_reader
(data_list_path)
:def
reader
()
:with
open(data_list_path,'r'
)as
f: lines = f.readlines()for
linein
lines: img, label = line.split('\t'
)yield
img, int(label)return
paddle.reader.xmap_readers(data_mapper, reader, cpu_count(),512
)/<code>
(3)定義網絡
<code>class
MyDNN
(fluid
.dygraph
.Layer
):def
__init__
(
self
):super
(MyDNN,self
).__init__
()self
.hidden1 = Linear(100
,100
,act='relu'
)self
.hidden2 = Linear(100
,100
,act='relu'
)self
.hidden3 = Linear(100
,100
,act='relu'
)self
.hidden4 = Linear(3
*100
*100
,10
,act='softmax'
)def
forward
(
self
,input): x=self
.hidden1(input) x =self
.hidden2(x) x =self
.hidden3(x) x = fluid.layers.reshape(x,shape=[-1
,3
*100
*100
]) y =self
.hidden4(x)return
y/<code>
(4)訓練
<code>with
fluid.dygraph.guard():model
=MyDNN() model.train() opt=fluid.optimizer.SGDOptimizer(learning_rate=0.001
, parameter_list=model.parameters( epochs_num=5
for
pass_numin
range
(epochs_num):for
batch_id,data
in
enumerate(train_reader()): images=np.array([x[0
].reshape(3
,100
,100
)for
xin
data
],np.float32) labels = np.array([x[1
]for
xin
data
]).astype('int64'
) labels = labels[:, np.newaxis] image=fluid.dygraph.to_variable(images) label=fluid.dygraph.to_variable(labels) predict=model
(image) loss=fluid.layers.cross_entropy(predict,label) avg_loss=fluid.layers.mean(loss)acc
=fluid.layers.accuracy(predict,label)if
batch_id!=0
and
batch_id%50
==0
: print("train_pass:{},batch_id:{},train_loss:{},train_acc:{}"
.format(pass_num,batch_id,avg_loss.numpy(),acc.numpy())) avg_loss.backward() opt.minimize(avg_loss) model.clear_gradients() fluid.save_dygraph(model.state_dict(),'MyDNN'
) /<code>
(5)模型測試
<code>with
fluid.dygraph.guard():
accs
=[]
_ = fluid.load_dygraph('MyDNN')
model
=MyDNN()
#加載模型參數
#訓練模式
for
batch_id,data in enumerate(test_reader()):#測試集
images
=np.array([x[0].reshape(3,100,100) for x in data],np.float32)
labels
=np.array([x[1] for x in data]).astype('int64')
labels
=labels[:, np.newaxis]
image
=fluid.dygraph.to_variable(images)
label
=fluid.dygraph.to_variable(labels)
predict
=model(image)
acc
=fluid.layers.accuracy(predict,label)
accs.append(acc.numpy()[0])
avg_acc
=np.mean(accs)
print(avg_acc)
/<code>
(6)預測
<code>with
fluid.dygraph.guard(): infer_path ='手勢.JPG'
model
=MyDNN() model_dict,_=fluid.load_dygraph('MyDNN'
) model.load_dict(model_dict) model.eval() infer_img = load_image(infer_path) infer_img=np.array(infer_img).astype('float32'
) infer_img=infer_img[np.newaxis,:, : ,:] infer_img = fluid.dygraph.to_variable(infer_img)result
=model
(infer_img) display(Image.open('手勢.JPG'
)) print(np.argmax(result.numpy()))/<code>
Day03 車牌識別
主要學習卷積網絡LeNet,生成字符圖像列表,目標車牌分類問題 ,另外使用了CV來分割圖像存儲,進而預測結果。
對於該部分數據準備過程,與day2基本相似,不過這邊種類更多一些。數據讀取器的buffer也要相應增加。不過源圖為灰度圖,所以不再有轉置處理。
<code>def
data_mapper
(sample)
: img, label = sample img = paddle.dataset.image.load_image(file=img, is_color=False
) img = img.flatten().astype('float32'
) /255.0
return
img, labeldef
data_reader
(data_list_path)
:def
reader
()
:with
open(data_list_path,'r'
)as
f: lines = f.readlines()for
linein
lines: img, label = line.split('\t'
)yield
img, int(label)return
paddle.reader.xmap_readers(data_mapper, reader, cpu_count(),1024
)/<code>
在實現網絡代碼,與LeNet稍有差別,主要在Pooling部分
<code>class
MyLeNet
(fluid
.dygraph
.Layer
):def
__init__
(
self
):super
(MyLeNet,self
).__init__
()self
.hidden1_1 = Conv2D(1
,28
,5
,1
)self
.hidden1_2 = Pool2D(pool_size=2
,pool_type='max'
,pool_stride=1
)self
.hidden2_1 = Conv2D(28
,32
,3
,1
)self
.hidden2_2 = Pool2D(pool_size=2
,pool_type='max'
,pool_stride=1
)self
.hidden3 = Conv2D(32
,32
,3
,1
)self
.hidden4 = Linear(32
*10
*10
,65
,act='softmax'
)def
forward
(
self
,input) : x =self
.hidden1_1(input) x =self
.hidden1_2(x) x =self
.hidden2_1(x) x =self
.hidden2_2(x) x =self
.hidden3(x) x = fluid.layers.reshape(x,shape=[-1
,32
*10
*10
]) y =self
.hidden4(x)return
y/<code>
訓練和驗證過程與day2基本類似。
預測過程由於要分割圖片,使用opencv。利用cv2.threshold提取文字。
<code> license_plate = cv2.imread('./車牌.png'
) gray_plate = cv2.cvtColor(license_plate, cv2.COLOR_RGB2GRAY) ret, binary_plate = cv2.threshold(gray_plate,175
,255
, cv2.THRESH_BINARY) result = []1
])for
col in range(binary_plate.shape[1
]): result.append(0
)for
row in range(binary_plate.shape[0
]): result[col] = result[col] + binary_plate[row][col]/255
character_dict = {} num =0
i =0
while
i < len(result):if
result[i] ==0
: i +=1
else
:index
= i +1
while
result[index
] !=0
:index
+=1
character_dict[num] = [i,index
-1
] num +=1
i =index
for
i in range(8
):if
i==2
:continue
padding = (170
- (character_dict[i][1
] - character_dict[i][0
])) /2
ndarray = np.pad(binary_plate[:,character_dict[i][0
]:character_dict[i][1
]], ((0
,0
), (int
(padding),int
(padding))),'constant'
, constant_values=(0
,0
)) ndarray = cv2.resize(ndarray, (20
,20
)) cv2.imwrite('./'
+ str(i) +'.png'
, ndarray) def load_image(path): img = paddle.dataset.image.load_image(file=path, is_color=False) img = img.astype('float32'
) img = img[np.newaxis, ] /255.0
return
img /<code>
構建標籤字典,用於映射顯示車牌
<code>match
= {'A'
:'A'
,'B'
:'B'
,'C'
:'C'
,'D'
:'D'
,'E'
:'E'
,'F'
:'F'
,'G'
:'G'
,'H'
:'H'
,'I'
:'I'
,'J'
:'J'
,'K'
:'K'
,'L'
:'L'
,'M'
:'M'
,'N'
:'N'
,'O'
:'O'
,'P'
:'P'
,'Q'
:'Q'
,'R'
:'R'
,'S'
:'S'
,'T'
:'T'
,'U'
:'U'
,'V'
:'V'
,'W'
:'W'
,'X'
:'X'
,'Y'
:'Y'
,'Z'
:'Z'
,'yun'
:'雲'
,'cuan'
:'川'
,'hei'
:'黑'
,'zhe'
:'浙'
,'ning'
:'寧'
,'jin'
:'津'
,'gan'
:'贛'
,'hu'
:'滬'
,'liao'
:'遼'
,'jl'
:'吉'
,'qing'
:'青'
,'zang'
:'藏'
,'e1'
:'鄂'
,'meng'
:'蒙'
,'gan1'
:'甘'
,'qiong'
:'瓊'
,'shan'
:'陝'
,'min'
:'閩'
,'su'
:'蘇'
,'xin'
:'新'
,'wan'
:'皖'
,'jing'
:'京'
,'xiang'
:'湘'
,'gui'
:'貴'
,'yu1'
:'渝'
,'yu'
:'豫'
,'ji'
:'冀'
,'yue'
:'粵'
,'gui1'
:'桂'
,'sx'
:'晉'
,'lu'
:'魯'
,'0'
:'0'
,'1'
:'1'
,'2'
:'2'
,'3'
:'3'
,'4'
:'4'
,'5'
:'5'
,'6'
:'6'
,'7'
:'7'
,'8'
:'8'
,'9'
:'9'
} L =0
LABEL ={}for
Vin
LABEL_temp.values(): LABEL[str(L)] =match
[V] L +=1
預測過程
<code>構建預測動態圖過程with
fluid.dygraph.guard():model
=MyLeNet() model_dict,_=fluid.load_dygraph('MyLeNet'
) model.load_dict(model_dict) model.eval() lab=[]for
iin
range
(8
):if
i==2
: continue infer_imgs = [] infer_imgs.append(load_image('./'
+str
(i) +'.png'
)) infer_imgs = np.array(infer_imgs) infer_imgs = fluid.dygraph.to_variable(infer_imgs)result
=model
(infer_imgs) lab.append(np.argmax(result.numpy())) print(lab) display(Image.open('./車牌.png'
)) print('\n車牌識別結果為:'
,end
=''
)for
iin
range
(len
(lab)): print(LABEL[str
(lab[i])],end
=''
) /<code>
Day04 口罩分類
主要學習構造VGG網絡實現分類處理。
代碼結構較之前有所變化,基本結構如下:
(1)參數字典
<code>train_parameters = {"input_size"
: [3, 224, 224],"class_dim"
: -1,"src_path"
:"/home/aistudio/work/maskDetect.zip"
,"target_path"
:"/home/aistudio/data/"
,"train_list_path"
:"/home/aistudio/data/train.txt"
,"eval_list_path"
:"/home/aistudio/data/eval.txt"
,"readme_path"
:"/home/aistudio/data/readme.json"
,"label_dict"
:{},"num_epochs"
: 1,"train_batch_size"
: 8,"learning_strategy"
: {"lr"
: 0.001 } }/<code>
(2)數據準備
最大變化是亂序部分
<code>def
get_data_list
(target_path,train_list_path,eval_list_path)
:''' 生成數據列表 '''
class_detail = [] data_list_path=target_path+"maskDetect/"
class_dirs = os.listdir(data_list_path) all_class_images =0
class_label=0
class_dim =0
trainer_list=[] eval_list=[]for
class_dirin
class_dirs:if
class_dir !=".DS_Store"
: class_dim +=1
class_detail_list = {} eval_sum =0
trainer_sum =0
class_sum =0
path = data_list_path + class_dir img_paths = os.listdir(path)for
img_pathin
img_paths: name_path = path +'/'
+ img_pathif
class_sum %10
==0
: eval_sum +=1
eval_list.append(name_path +"\t%d"
% class_label +"\n"
)else
: trainer_sum +=1
trainer_list.append(name_path +"\t%d"
% class_label +"\n"
) class_sum +=1
all_class_images +=1
class_detail_list['class_name'
] = class_dir class_detail_list['class_label'
] = class_label class_detail_list['class_eval_images'
] = eval_sum class_detail_list['class_trainer_images'
] = trainer_sum class_detail.append(class_detail_list) train_parameters['label_dict'
][str(class_label)] = class_dir class_label +=1
train_parameters['class_dim'
] = class_dim random.shuffle(eval_list)with
open(eval_list_path,'a'
)as
f:for
eval_imagein
eval_list: f.write(eval_image) random.shuffle(trainer_list)with
open(train_list_path,'a'
)as
f2:for
train_imagein
trainer_list: f2.write(train_image) readjson = {} readjson['all_class_name'
] = data_list_path readjson['all_class_images'
] = all_class_images readjson['class_detail'
] = class_detail jsons = json.dumps(readjson, sort_keys=True
, indent=4
, separators=(','
,': '
))with
open(train_parameters['readme_path'
],'w'
)as
f: f.write(jsons)'生成數據列表完成!'
)/<code>
<code>def
custom_reader
(file_list)
:''' 自定義reader '''
def
reader
()
:with
open(file_list,'r'
)as
f: lines = [line.strip()for
linein
f]for
linein
lines: img_path, lab = line.strip().split('\t'
) img = Image.open(img_path)if
img.mode !='RGB'
: img = img.convert('RGB'
) img = img.resize((224
,224
), Image.BILINEAR) img = np.array(img).astype('float32'
) img = img.transpose((2
,0
,1
)) img = img/255
yield
img, int(lab)return
reader/<code>
<code>''' 參數初始化 '''
src_path=train_parameters['src_path'
] target_path=train_parameters['target_path'
] train_list_path=train_parameters['train_list_path'
] eval_list_path=train_parameters['eval_list_path'
] batch_size=train_parameters['train_batch_size'
]''' 解壓原始數據到指定路徑 '''
unzip_data(src_path,target_path)''' 劃分訓練集與驗證集,亂序,生成數據列表 '''
with
open(train_list_path,'w'
)as
f: f.seek(0
) f.truncate()with
open(eval_list_path,'w'
)as
f: f.seek(0
) f.truncate() get_data_list(target_path,train_list_path,eval_list_path)''' 構造數據提供器 '''
train_reader = paddle.batch(custom_reader(train_list_path), batch_size=batch_size, drop_last=True
) eval_reader = paddle.batch(custom_reader(eval_list_path), batch_size=batch_size, drop_last=True
)/<code>
(2)定義模型
目標實現一個簡化的VGG
<code>class
ConvPool
(fluid.dygraph.Layer)
:'''卷積+池化'''
def
__init__
(self, num_channels, num_filters, filter_size, pool_size, pool_stride, groups, pool_padding=
0
, pool_type='max'
, conv_stride=1
, conv_padding=1
, act=None): super(ConvPool, self).__init__() self._conv2d_list = []for
iin
range(groups): conv2d = self.add_sublayer('bb_%d'
% i, fluid.dygraph.Conv2D( num_channels=num_channels, num_filters=num_filters, filter_size=filter_size, stride=conv_stride, padding=conv_padding, act=act) ) self._conv2d_list.append(conv2d) self._pool2d = fluid.dygraph.Pool2D( pool_size=pool_size, pool_type=pool_type, pool_stride=pool_stride, pool_padding=pool_padding )def
forward
(self, inputs)
: x = inputsfor
convin
self._conv2d_list: x = conv(x) x = self._pool2d(x)return
xclass
VGGNet
(fluid.dygraph.Layer)
:''' VGG網絡 '''
def
__init__
(self)
: super(VGGNet, self).__init__() self.convpool01 = ConvPool(3
,64
,3
,2
,2
,2
,act='relu'
) self.convpool02 = ConvPool(64
,128
,3
,2
,2
,2
,act='relu'
) self.convpool03 = ConvPool(128
,256
,3
,2
,2
,3
,act='relu'
) self.convpool04 = ConvPool(256
,512
,3
,2
,2
,3
,act='relu'
) self.convpool05 = ConvPool(512
,512
,3
,2
,2
,3
,act='relu'
) self.pool_5_shape =512
*7
*7
self.fc01 = fluid.dygraph.Linear(self.pool_5_shape,4096
,act='relu'
) self.fc02 = fluid.dygraph.Linear(4096
,4096
,act='relu'
) self.fc03 = fluid.dygraph.Linear(4096
,2
,act='softmax'
)def
forward
(self, inputs, label=None)
:"""前向計算"""
print(inputs.shape) out = self.convpool01(inputs) print(out.shape) out = self.convpool02(out) print(out.shape) out = self.convpool03(out) print(out.shape) out = self.convpool04(out) print(out.shape) out = self.convpool05(out) print(out.shape) out = fluid.layers.reshape(out,shape=[-1
,512
*7
*7
]) out = self.fc01(out) out = self.fc02(out) out = self.fc03(out)if
labelis
not
None
: acc = fluid.layers.accuracy(input=out,label=label)return
out,accelse
:return
out/<code>
(3)模型訓練
主要使用Adam優化器,較day3有所變化
<code>''' 模型訓練 '''
with
fluid.dygraph.guard(): print(train_parameters['class_dim'
]) print(train_parameters['label_dict'
]) vgg = VGGNet() optimizer=fluid.optimizer.AdamOptimizer(learning_rate=train_parameters['learning_strategy'
]['lr'
],parameter_list=vgg.parameters())for
epoch_numin
range(train_parameters['num_epochs'
]):for
batch_id, datain
enumerate(train_reader()): dy_x_data = np.array([x[0
]for
xin
data]).astype('float32'
) y_data = np.array([x[1
]for
xin
data]).astype('int64'
) y_data = y_data[:, np.newaxis] img = fluid.dygraph.to_variable(dy_x_data) label = fluid.dygraph.to_variable(y_data) out,acc = vgg(img,label) loss = fluid.layers.cross_entropy(out, label) avg_loss = fluid.layers.mean(loss) avg_loss.backward() optimizer.minimize(avg_loss) vgg.clear_gradients() all_train_iter=all_train_iter+train_parameters['train_batch_size'
] all_train_iters.append(all_train_iter) all_train_costs.append(loss.numpy()[0
]) all_train_accs.append(acc.numpy()[0
])if
batch_id %1
==0
: print("Loss at epoch {} step {}: {}, acc: {}"
.format(epoch_num, batch_id, avg_loss.numpy(), acc.numpy())) draw_train_process("training"
,all_train_iters,all_train_costs,all_train_accs,"trainning cost"
,"trainning acc"
) draw_process("trainning loss"
,"red"
,all_train_iters,all_train_costs,"trainning loss"
) draw_process("trainning acc"
,"green"
,all_train_iters,all_train_accs,"trainning acc"
) fluid.save_dygraph(vgg.state_dict(),"vgg"
) print("Final loss: {}"
.format(avg_loss.numpy()))/<code>
Day05 PaddleHub體驗
主要學習的模型遷移的使用方法。
遷移使用過程大體如下:
(1)查詢結構
<code>hub
search resnet/<code>
(2)加載預訓練模型
<code>mport paddlehubas
hubmodule
= hub.Module(name="resnet_v2_50_imagenet"
) input_dict, output_dict, program =module
.context( trainable=True)/<code>
(3)數據準備
<code> dataset = hub.dataset.DogCat() data_reader = hub.reader.ImageClassificationReader( image_width=module
.get_expected_image_width
(), image_height=module
.get_expected_image_height
(), images_mean=module
.get_pretrained_images_mean
(), images_std=module
.get_pretrained_images_std
(), dataset=dataset)/<code>
(4)配置策略
<code>config = hub.RunConfig( use_cuda=False, num_epoch=1, checkpoint_dir="cv_finetune_turtorial_demo"
, batch_size=32, eval_interval=50, strategy=hub.finetune.strategy.DefaultFinetuneStrategy()) /<code>
(5)遷移組網
<code>feature_map = output_dict["feature_map"
] feed_list = [input_dict["image"
].name] task = hub.ImageClassifierTask( data_reader=data_reader, feed_list=feed_list, feature=feature_map, num_classes=dataset.num_labels, config=config) /<code>
(6)Fine-tune
<code>run_states
= task.finetune_and_eval()/<code>
(7)預測
<code>import numpy as np data = ["test_img_dog.jpg"
] label_map = dataset.label_dict()index
=0
run_states = task.predict(data=data) results = [run_state.run_resultsfor
run_state in run_states]for
batch_result in results:2
)[0
]for
result in batch_result:index
+=1
result = label_map[result]"input %i is %s, and the predict result is %s"
% (index
, data[index
-1
], result))/<code>
Day06-PaddleSlim模型壓縮
主要學習模型壓縮優化方法,主要了解了四種主流方法,聯繫了
比賽 主要使用迴歸方法,注重數據增強(對比度,亮度,padding等)
本講作業關於模型量化,其一般處理過程:
(1)構建模型
<code>use_gpu = fluid.is_compiled_with_cuda() exe, train_program, val_program, inputs, outputs = slim.models.image_classification("MobileNet"
, [1, 28, 28], 10, use_gpu=use_gpu) place = fluid.CUDAPlace(0) if fluid.is_compiled_with_cuda()else
fluid.CPUPlace() /<code>
(2)定義輸入數據
<code>import
paddle.dataset.mnist as reader
train_reader
=paddle.batch(
batch_size=128, drop_last=True)
test_reader
=paddle.batch(
batch_size=128, drop_last=True)
data_feeder
=fluid.DataFeeder(inputs, place)
/<code>
(3)訓練和測試
<code>def train(prog): iter =0
for
datain
train_reader(): acc1, acc5, loss = exe.run(prog, feed=data_feeder.feed(data), fetch_list=outputs)if
iter %100
==0
:'train iter={}, top1={}, top5={}, loss={}'
.format
(iter, acc1.mean(), acc5.mean(), loss.mean())) iter +=1
def test(prog): iter =0
res =[[], []]
for
datain
test_reader(): acc1, acc5, loss = exe.run(prog, feed=data_feeder.feed(data), fetch_list=outputs)if
iter %100
==0
:'test iter={}, top1={}, top5={}, loss={}'
.format
(iter, acc1.mean(), acc5.mean(), loss.mean())) res[0
].append(acc1.mean()) res[1
].append(acc5.mean()) iter +=1
'final test result top1={}, top5={}'
.format
(np.array(res[0
]).mean(), np.array(res[1
]).mean()))/<code>
(4)量化模型
<code>place = exe.place import paddleslim.quant as quantconfig = {
'weight_quantize_type'
:'abs_max'
,
'activation_quantize_type'
:'moving_average_abs_max'
,
'weight_bits'
: 8,
'activation_bits'
: 8,
'not_quant_pattern'
: ['skip_quant'
],
'quantize_op_types'
: ['conv2d'
,'depthwise_conv2d'
,'mul'
],
'dtype'
:'int8'
,
'window_size'
: 10000,
'moving_rate'
: 0.9}
config = {'weight_quantize_type': 'abs_max', 'activation_quantize_type': 'moving_average_abs_max'} quant_program = quant.quant_aware(train_program, place, config, for_test=False) val_quant_program = quant.quant_aware(val_program, place, config, for_test=True) /<code>
(5)訓練和測試量化後的模型
<code>train
(quant_program)/<code>
比賽 人流密度檢測
主要學習人流密度的基本方法,根據提供的base版本,開發網絡,實現對人流識別。這部分通過講解明確強調了樣本增強內容,這部分很重要 。
<code>''' 加載相關類庫 '''
import
zipfileimport
paddleimport
paddle.fluidas
fluidimport
matplotlib.pyplotas
pltimport
matplotlib.imageas
mpingimport
jsonimport
numpyas
npimport
cv2import
sysimport
timeimport
h5pyfrom
matplotlibimport
pyplotas
pltfrom
scipy.ndimage.filtersimport
gaussian_filterimport
scipyfrom
matplotlibimport
cmas
CMfrom
paddle.utils.plotimport
Ploterfrom
PILimport
Imagefrom
PILimport
ImageFile ImageFile.LOAD_TRUNCATED_IMAGES =True
''' 查看train.json相關信息,重點關注annotations中的標註信息 '''
f = open('/home/aistudio/data/data1917/train.json'
,encoding='utf-8'
) content = json.load(f)''' 將上面的到的content中的name中的“stage1/”去掉 '''
for
jin
range(len(content['annotations'
])): content['annotations'
][j]['name'
] = content['annotations'
][j]['name'
].lstrip('stage1'
).lstrip('/'
)''' 使用高斯濾波變換生成密度圖 '''
def
gaussian_filter_density
(gt)
: density = np.zeros(gt.shape, dtype=np.float32) gt_count = np.count_nonzero(gt)if
gt_count ==0
:return
density pts = np.array(list(zip(np.nonzero(gt)[1
].ravel(), np.nonzero(gt)[0
].ravel())))for
i, ptin
enumerate(pts): pt2d = np.zeros(gt.shape, dtype=np.float32) pt2d[pt[1
],pt[0
]] =1.
if
gt_count >1
: sigma =25
else
: sigma = np.average(np.array(gt.shape))/2.
/2.
density += scipy.ndimage.filters.gaussian_filter(pt2d, sigma, mode='constant'
)return
density''' 圖片操作:對圖片進行resize、歸一化,將方框標註變為點標註 返回:resize後的圖片 和 gt '''
def
picture_opt
(img,ann)
: size_x,size_y = img.size train_img_size = (640
,480
) img = img.resize(train_img_size,Image.ANTIALIAS) img = np.array(img) img = img /255.0
gt = []for
b_lin
range(len(ann)):if
'w'
in
ann[b_l].keys(): x = (ann[b_l]['x'
]+(ann[b_l]['x'
]+ann[b_l]['w'
]))/2
y = ann[b_l]['y'
]+20
x = (x*640
/size_x)/8
y = (y*480
/size_y)/8
gt.append((x,y))else
: x = ann[b_l]['x'
] y = ann[b_l]['y'
] x = (x*640
/size_x)/8
y = (y*480
/size_y)/8
gt.append((x,y))return
img,gt''' 密度圖處理 '''
def
ground
(img,gt)
: imgs = img x = imgs.shape[0
]/8
y = imgs.shape[1
]/8
k = np.zeros((int(x),int(y)))for
iin
range(0
,len(gt)):if
int(gt[i][1
]) < int(x)and
int(gt[i][0
]) < int(y): k[int(gt[i][1
]),int(gt[i][0
])]=1
k = gaussian_filter_density(k)return
k''' 定義數據生成器 '''
def
train_set
()
:def
inner
()
:for
ig_indexin
range(2000
):if
len(content['annotations'
][ig_index]['annotation'
]) ==2
:continue
if
len(content['annotations'
][ig_index]['annotation'
]) ==3
:continue
if
content['annotations'
][ig_index]['ignore_region'
]: ig_list = [] ig_list1 = []if
len(content['annotations'
][ig_index]['ignore_region'
])==1
: ign_rge = content['annotations'
][ig_index]['ignore_region'
][0
]for
ig_lenin
range(len(ign_rge)): ig_list.append([ign_rge[ig_len]['x'
],ign_rge[ig_len]['y'
]]) ig_cv_img = cv2.imread(content['annotations'
][ig_index]['name'
]) pts = np.array(ig_list,np.int32) cv2.fillPoly(ig_cv_img,[pts],(0
,0
,0
),cv2.LINE_AA) ig_img = Image.fromarray(cv2.cvtColor(ig_cv_img,cv2.COLOR_BGR2RGB)) ann = content['annotations'
][ig_index]['annotation'
] ig_im,gt = picture_opt(ig_img,ann) k = ground(ig_im,gt) groundtruth = np.asarray(k) groundtruth = groundtruth.T.astype('float32'
) ig_im = ig_im.transpose().astype('float32'
)yield
ig_im,groundtruthif
len(content['annotations'
][ig_index]['ignore_region'
])==2
: ign_rge = content['annotations'
][ig_index]['ignore_region'
][0
] ign_rge1 = content['annotations'
][ig_index]['ignore_region'
][1
]for
ig_lenin
range(len(ign_rge)): ig_list.append([ign_rge[ig_len]['x'
],ign_rge[ig_len]['y'
]])for
ig_len1in
range(len(ign_rge1)): ig_list1.append([ign_rge1[ig_len1]['x'
],ign_rge1[ig_len1]['y'
]]) ig_cv_img2 = cv2.imread(content['annotations'
][ig_index]['name'
]) pts = np.array(ig_list,np.int32) pts1 = np.array(ig_list1,np.int32) cv2.fillPoly(ig_cv_img2,[pts],(0
,0
,0
),cv2.LINE_AA) cv2.fillPoly(ig_cv_img2,[pts1],(0
,0
,0
),cv2.LINE_AA) ig_img2 = Image.fromarray(cv2.cvtColor(ig_cv_img2,cv2.COLOR_BGR2RGB)) ann = content['annotations'
][ig_index]['annotation'
] ig_im,gt = picture_opt(ig_img2,ann) k = ground(ig_im,gt) k = np.zeros((int(ig_im.shape[0
]/8
),int(ig_im.shape[1
]/8
))) groundtruth = np.asarray(k) groundtruth = groundtruth.T.astype('float32'
) ig_im = ig_im.transpose().astype('float32'
)yield
ig_im,groundtruthelse
: img = Image.open(content['annotations'
][ig_index]['name'
]) ann = content['annotations'
][ig_index]['annotation'
] im,gt = picture_opt(img,ann) k = ground(im,gt) groundtruth = np.asarray(k) groundtruth = groundtruth.T.astype('float32'
) im = im.transpose().astype('float32'
)yield
im,groundtruthreturn
inner BATCH_SIZE=3
train_reader = paddle.batch( paddle.reader.shuffle( train_set(), buf_size=512
), batch_size=BATCH_SIZE) BATCH_SIZE=3
train_reader = paddle.batch( paddle.reader.shuffle( train_set(), buf_size=512
), batch_size=BATCH_SIZE) /<code>
總體還是有很大收穫,希望以後繼續努力。