Professional Documents
Culture Documents
导入所需库
导入所需库
采集物品图像数据
定义图像分类函数
执行程序
1. 了解产品智能分拣系统的意义
2. 了解智能分拣的行业应用案例
3. 了解图像分类的常用方法
4. 熟悉图像分类的实现原理
5. 掌握基于图像分类的产品识别系统的实现方法
一、所需库
Opencv
Numpy
Paddle
二、采集图像,不同类别的图像按以下方式组织
此处使用了百度 AI 开放平台中的数据集,
链接:https://aistudio.baidu.com/aistudio/datasetdetail/77996
编写脚本,划分训练集、验证集、测试集,下图为训练集:
三、图像分类实现
class Basicblock(paddle.nn.Layer):
# ResNet18 或 34 所使用的网络模块
super(Basicblock, self).__init__()
self.stride = stride
padding=1)
padding=1)
self.bn0 = BatchNorm2D(out_channel)
self.bn1 = BatchNorm2D(out_channel)
self.bn2 = BatchNorm2D(out_channel)
def forward(self, inputs):
y = inputs
x = self.conv0(inputs)
x = self.bn0(x)
x = F.relu(x)
x = self.conv1(x)
x = self.bn1(x)
if self.stride == 2:
y = self.conv2(y)
y = self.bn2(y)
z = F.relu(x + y)
return z
class Bottleneckblock(paddle.nn.Layer):
# ResNet50 或以上所使用的网络模块
start=False):
super(Bottleneckblock, self).__init__()
self.stride = stride
self.start = start
self.conv0 = Conv2D(in_channel, inplane, 1, stride=stride)
self.bn0 = BatchNorm2D(inplane)
self.bn1 = BatchNorm2D(inplane)
self.bn2 = BatchNorm2D(out_channel)
self.bn3 = BatchNorm2D(out_channel)
y = inputs
x = self.conv0(inputs)
x = self.bn0(x)
x = F.relu(x)
x = self.conv1(x)
x = self.bn1(x)
x = F.relu(x)
x = self.conv2(x)
x = self.bn2(x)
if self.start:
y = self.conv3(y)
y = self.bn3(y)
z = F.relu(x + y)
return z
class Resnet(paddle.nn.Layer):
‘’’
[3, 4, 6, 3]
class_num(int): 需要分类的类别数量
‘’’
super(Resnet, self).__init__()
self.bn = BatchNorm2D(64)
if bottlenet:
x = self.conv0(inputs)
x = self.bn(x)
x = self.pool1(x)
x = self.layer0(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.pool2(x)
x = paddle.squeeze(x)
x = self.fc(x)
return x
def add_basic_layer(self, num, inplane, start=False):
layer = []
if start:
layer.append(Basicblock(inplane, inplane))
else:
layer.append(Basicblock(inplane, inplane))
return nn.Sequential(*layer)
layer = []
if start:
start=True))
else:
stride=2, start=True))
return nn.Sequential(*layer)
定义数据集 MyDataset 类,继承自 paddle.io.Dataset 类,代码如下:
class MyDataset(paddle.io.Dataset):
txt_path,
check=False,
input_size=(224, 224),
data_cut=1.0,
preprocessing=True,
mode='train'):
"""
super(MyDataset, self).__init__()
self.data = list()
self.input_size = input_size
self.path = path
self.preprocessing = preprocessing
random.shuffle(self.txt)
self.total_num = len(self.txt)
# 数据集缩小
if not data_cut == 1:
self.txt = self.txt[:self.total_num + 1]
if check:
self.check()
else:
self.data.append([data_path, label])
# 进行图片的缩放及归一化
if mode == "train":
self.transforms = Compose([
Resize(input_size),
])
else:
self.transforms = Compose([
def check(self):
"""检查图片合法性"""
available_num = 0
try:
cv2.imread(os.path.join(self.path, data_path))
available_num += 1
self.data.append([data_path, label])
except:
pass
{available_num}")
def get_mean(self):
R_sum = 0
G_sum = 0
B_sum = 0
count = 0
count += 1
self.total_num):
print(f"CHECKING DATA {count} / {self.total_num}")
B_mean))
return RGB_mean
R_squared_mean = 0
G_squared_mean = 0
B_squared_mean = 0
count = 0
image_mean = np.array(mean)
count += 1
self.total_num):
return RGB_std
"""根据索引读取图片并进行处理"""
if self.preprocessing:
image = self.transforms(image)
image = np.reshape(image, [3, image.shape[0], image.shape[1]])
def __len__(self):
return self.total_num
定义训练及验证函数,代码如下:
def train(model,
train_dataset, val_dataset,
pretrained=False,
model_path='', opt_path='',
pretrained_ep=0):
opt = paddle.optimizer.Adam(learning_rate=lr,
parameters=model.parameters())
"""
训练函数
epoch(int): 数据集遍历次数
batch_size(int): 每次向网络传入的数据数量,根据电脑显存设定
lr(int): 学习率
pretrained(bool): 是否有预训练参数
pretrained_ep(int): 预训练的遍历次数
"""
# 加载预训练参数
if pretrained:
epoch -= pretrained_ep
model_dict = paddle.load(model_path)
model.set_state_dict(model_dict)
if opt_path != '':
opt_dict = paddle.load(opt_path)
opt.set_state_dict(opt_dict)
else:
pretrained_ep = 0
model.train()
train_loader = paddle.io.DataLoader(train_dataset,
batch_size=batch_size,
shuffle=True)
total_batch_val = int(len(val_dataset) / batch_size) + 1
# 开始训练
for ep in range(epoch):
total_loss = 0
bra = printBar.ProgressBar(total=total_batch)
if data[0].shape[0] != batch_size:
break
pre = model(images)
32))
total_loss += avg_loss
avg_loss.backward() # 反向传播
# 优化器优化
opt.step()
opt.clear_grad()
# 训练信息输出
100) * total_batch):
+ 1, total_batch,
round(float(avg_loss.numpy()[0]), 6),
bra.move(s, count=batch_id + 1)
bra.end(s)
# 每隔 5 个 epoch 保存一次模型
if (ep + 1) % 5 == 0:
paddle.save(model.state_dict(), f'./model/resnet50_ep{ep + 1 +
pretrained_ep}.pdparams')
total_batch_val)
"""
验证函数
model: 需要验证的模型
val_dataset(paddle.io.Dataset): 数据集加载类
epoch(int): 数据集遍历次数
batch_size(int): 每次向网络传入的数据数量,根据电脑显存设定
"""
model.eval()
total_num = 0
acc_num = 0
bra = printBar.ProgressBar(total=total_batch)
val_loader = paddle.io.DataLoader(val_dataset,
batch_size=batch_size,
shuffle=True)
if data[0].shape[0] != batch_size:
break
pre = model(images).numpy()
total_num += batch_size
* total_batch):
bra.move(s, count=batch_id + 1)
bra.end(s)
1000) / 10))
主函数如下:
if __name__ == '__main__':
resnet = resnet.resnet50(class_num=158)
train_data =
dataset.MyDataset('C:/Users/Administrator/Desktop/paddlepaddle/Mydat
a/',
val_data =
dataset.MyDataset('C:/Users/Administrator/Desktop/paddlepaddle/Mydat
a/',
定义完毕即可开始训练,输出如图: