Download as docx, pdf, or txt
Download as docx, pdf, or txt
You are on page 1of 20

导入所需库

采集物品图像数据
定义图像分类函数
执行程序

1. 了解产品智能分拣系统的意义
2. 了解智能分拣的行业应用案例
3. 了解图像分类的常用方法
4. 熟悉图像分类的实现原理
5. 掌握基于图像分类的产品识别系统的实现方法

一、所需库
Opencv
Numpy
Paddle

二、采集图像,不同类别的图像按以下方式组织
此处使用了百度 AI 开放平台中的数据集,
链接:https://aistudio.baidu.com/aistudio/datasetdetail/77996

编写脚本,划分训练集、验证集、测试集,下图为训练集:

三、图像分类实现

我们选用了经典的网络 ResNet50 来实现基于图像分类的产品识别系统


使用 PaddlePaddle 来定义网络, 代码如下:

class Basicblock(paddle.nn.Layer):

# ResNet18 或 34 所使用的网络模块

def __init__(self, in_channel, out_channel, stride=1):

super(Basicblock, self).__init__()

self.stride = stride

self.conv0 = Conv2D(in_channel, out_channel, 3, stride=stride,

padding=1)

self.conv1 = Conv2D(out_channel, out_channel, 3, stride=1,

padding=1)

self.conv2 = Conv2D(in_channel, out_channel, 1, stride=stride)

self.bn0 = BatchNorm2D(out_channel)

self.bn1 = BatchNorm2D(out_channel)

self.bn2 = BatchNorm2D(out_channel)
def forward(self, inputs):

y = inputs

x = self.conv0(inputs)

x = self.bn0(x)

x = F.relu(x)

x = self.conv1(x)

x = self.bn1(x)

if self.stride == 2:

y = self.conv2(y)

y = self.bn2(y)

z = F.relu(x + y)

return z

class Bottleneckblock(paddle.nn.Layer):

# ResNet50 或以上所使用的网络模块

def __init__(self, inplane, in_channel, out_channel, stride=1,

start=False):

super(Bottleneckblock, self).__init__()

self.stride = stride

self.start = start
self.conv0 = Conv2D(in_channel, inplane, 1, stride=stride)

self.conv1 = Conv2D(inplane, inplane, 3, stride=1, padding=1)

self.conv2 = Conv2D(inplane, out_channel, 1, stride=1)

self.conv3 = Conv2D(in_channel, out_channel, 1, stride=stride)

self.bn0 = BatchNorm2D(inplane)

self.bn1 = BatchNorm2D(inplane)

self.bn2 = BatchNorm2D(out_channel)

self.bn3 = BatchNorm2D(out_channel)

def forward(self, inputs):

y = inputs

x = self.conv0(inputs)

x = self.bn0(x)

x = F.relu(x)

x = self.conv1(x)

x = self.bn1(x)

x = F.relu(x)

x = self.conv2(x)

x = self.bn2(x)

if self.start:

y = self.conv3(y)

y = self.bn3(y)
z = F.relu(x + y)

return z

class Resnet(paddle.nn.Layer):

def __init__(self, num, bottlenet, class_num=1000):

‘’’

num(list): 根据所要定义的 ResNet 网络来确定,ResNet50 为

[3, 4, 6, 3]

bottlebnet(bool): 层数大于 50 时为 True

class_num(int): 需要分类的类别数量

‘’’

super(Resnet, self).__init__()

self.conv0 = Conv2D(3, 64, 7, stride=2)

self.bn = BatchNorm2D(64)

self.pool1 = MaxPool2D(3, stride=2)

if bottlenet:

self.layer0 = self.add_bottleneck_layer(num[0], 64, start=True)

self.layer1 = self.add_bottleneck_layer(num[1], 128)

self.layer2 = self.add_bottleneck_layer(num[2], 256)

self.layer3 = self.add_bottleneck_layer(num[3], 512)

self.fc = Linear(in_features=2048, out_features=class_num)


else:

self.layer0 = self.add_basic_layer(num[0], 64, start=True)

self.layer1 = self.add_basic_layer(num[1], 128)

self.layer2 = self.add_basic_layer(num[2], 256)

self.layer3 = self.add_basic_layer(num[3], 512)

self.fc = Linear(in_features=512, out_features=class_num)

self.pool2 = AdaptiveMaxPool2D(output_size=(1, 1))

def forward(self, inputs):

x = self.conv0(inputs)

x = self.bn(x)

x = self.pool1(x)

x = self.layer0(x)

x = self.layer1(x)

x = self.layer2(x)

x = self.layer3(x)

x = self.pool2(x)

x = paddle.squeeze(x)

x = self.fc(x)

return x
def add_basic_layer(self, num, inplane, start=False):

layer = []

if start:

layer.append(Basicblock(inplane, inplane))

else:

layer.append(Basicblock(inplane // 2, inplane, stride=2))

for i in range(num - 1):

layer.append(Basicblock(inplane, inplane))

return nn.Sequential(*layer)

def add_bottleneck_layer(self, num, inplane, start=False):

layer = []

if start:

layer.append(Bottleneckblock(inplane, inplane, inplane * 4,

start=True))

else:

layer.append(Bottleneckblock(inplane, inplane * 2, inplane * 4,

stride=2, start=True))

for i in range(num - 1):

layer.append(Bottleneckblock(inplane, inplane * 4, inplane * 4))

return nn.Sequential(*layer)
定义数据集 MyDataset 类,继承自 paddle.io.Dataset 类,代码如下:

class MyDataset(paddle.io.Dataset):

def __init__(self, path,

txt_path,

check=False,

input_size=(224, 224),

data_cut=1.0,

preprocessing=True,

mode='train'):

"""

:param path(str): 图片文件所在根目录

:param txt_path(str): 训练集划分的 txt 文件所在路径

:param check(bool): 是否检查图片合法性

:param input_size(list|tuple): 网络输入大小,图片会根据给定尺寸


进行缩放
:param data_cut(float): 0 到 1 之间的小数,可以缩小数据集

:param preprocessing(bool): 是否进行预处理

:param mode(str): 有 train、val、test 模式,train 模式下进行图片缩



"""

super(MyDataset, self).__init__()

self.data = list()

self.input_size = input_size
self.path = path

self.preprocessing = preprocessing

# 从给入的划分训练集的 txt 文件读取所有图片的标签及路径

self.txt = open(os.path.join(path, txt_path), 'r').readlines()

random.shuffle(self.txt)

self.total_num = len(self.txt)

# 数据集缩小

if not data_cut == 1:

self.total_num = int(self.total_num * data_cut)

self.txt = self.txt[:self.total_num + 1]

if check:

self.check()

else:

for data_text in self.txt:

data_path, label = data_text.split()

self.data.append([data_path, label])

# 进行图片的缩放及归一化

if mode == "train":
self.transforms = Compose([

Resize(input_size),

Normalize(mean=[160.5658, 149.9414, 139.2106], std=[80.3001,

78.0017, 85.9304], data_format='HWC')

])

else:

self.transforms = Compose([

Normalize(mean=[160.5658, 149.9414, 139.2106], std=[80.3001,

78.0017, 85.9304], data_format='HWC')])

def check(self):

"""检查图片合法性"""

available_num = 0

for data_text in self.txt:

data_path, label = data_text.split()

try:

cv2.imread(os.path.join(self.path, data_path))

available_num += 1

self.data.append([data_path, label])

except:

pass

if available_num == int((round((available_num / self.total_num) *


20) / 20) * self.total_num):

print(f"CHECKING DATA {available_num} / {self.total_num}")

print(f"CHECK DATA DONE TOTAL:{self.total_num} AVAILABLE:

{available_num}")

def get_mean(self):

"""获取 RGB 通道的均值"""

R_sum = 0

G_sum = 0

B_sum = 0

count = 0

for data_text in self.txt:

data_path, label = data_text.split()

image = cv2.imread(os.path.join(self.path, data_path))

image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

image = cv2.resize(image, self.input_size)

R_sum += image[:, :, 0].mean()

G_sum += image[:, :, 1].mean()

B_sum += image[:, :, 2].mean()

count += 1

if count == int((round((count / self.total_num) * 100) / 100) *

self.total_num):
print(f"CHECKING DATA {count} / {self.total_num}")

R_mean = R_sum / count

G_mean = G_sum / count

B_mean = B_sum / count

print('R_mean:{}, G_mean:{}, B_mean:{}'.format(R_mean, G_mean,

B_mean))

RGB_mean = [R_mean, G_mean, B_mean]

return RGB_mean

def get_std(self, mean):

"""获取 RGB 通道的标准差"""

R_squared_mean = 0

G_squared_mean = 0

B_squared_mean = 0

count = 0

image_mean = np.array(mean)

for data_text in self.txt:

data_path, label = data_text.split()

image = cv2.imread(os.path.join(self.path, data_path))

image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

image = cv2.resize(image, self.input_size)

image = image - image_mean # 零均值


# 求单张图片的方差,并累加

R_squared_mean += np.mean(np.square(image[:, :, 0]).flatten())

G_squared_mean += np.mean(np.square(image[:, :, 1]).flatten())

B_squared_mean += np.mean(np.square(image[:, :, 2]).flatten())

count += 1

if count == int((round((count / self.total_num) * 100) / 100) *

self.total_num):

print(f"CHECKING DATA {count} / {self.total_num}")

R_std = math.sqrt(R_squared_mean / count)

G_std = math.sqrt(G_squared_mean / count)

B_std = math.sqrt(B_squared_mean / count)

print('R_std:{}, G_std:{}, B_std:{}'.format(R_std, G_std, B_std))

RGB_std = [R_std, G_std, B_std]

return RGB_std

def __getitem__(self, item):

"""根据索引读取图片并进行处理"""

image_path, label = self.data[item]

image = cv2.imread(os.path.join(self.path, image_path))

image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

if self.preprocessing:

image = self.transforms(image)
image = np.reshape(image, [3, image.shape[0], image.shape[1]])

label = np.array([int(label)], dtype='int64')

return image, label

def __len__(self):

return self.total_num

定义训练及验证函数,代码如下:

def train(model,

train_dataset, val_dataset,

epoch=50, batch_size=16, lr=0.01,

pretrained=False,

model_path='', opt_path='',

pretrained_ep=0):

opt = paddle.optimizer.Adam(learning_rate=lr,

parameters=model.parameters())

"""

训练函数

train_dataset, val_dataset(paddle.io.Dataset): 数据集加载类

epoch(int): 数据集遍历次数

batch_size(int): 每次向网络传入的数据数量,根据电脑显存设定
lr(int): 学习率

pretrained(bool): 是否有预训练参数

model_path, opt_path(str): 优化器和模型的参数

pretrained_ep(int): 预训练的遍历次数

"""

# 加载预训练参数

if pretrained:

epoch -= pretrained_ep

model_dict = paddle.load(model_path)

model.set_state_dict(model_dict)

if opt_path != '':

opt_dict = paddle.load(opt_path)

opt.set_state_dict(opt_dict)

else:

pretrained_ep = 0

model.train()

train_loader = paddle.io.DataLoader(train_dataset,

batch_size=batch_size,

shuffle=True)
total_batch_val = int(len(val_dataset) / batch_size) + 1

total_batch = int(len(train_dataset) / batch_size) + 1

# 开始训练

for ep in range(epoch):

total_loss = 0

bra = printBar.ProgressBar(total=total_batch)

for batch_id, data in enumerate(train_loader):

if data[0].shape[0] != batch_size:

break

images, labels = data

pre = model(images)

# print(np.argmax(pre.numpy(), axis=1), np.reshape(labels.numpy(),

32))

loss = paddle.nn.functional.cross_entropy(pre, labels) # 损失函数使


用交叉熵函数
avg_loss = paddle.mean(loss) # 获取 loss 均值

total_loss += avg_loss

avg_loss.backward() # 反向传播
# 优化器优化

opt.step()

opt.clear_grad()

# 训练信息输出

if batch_id + 1 == int((round(((batch_id + 1) / total_batch) * 100) /

100) * total_batch):

s = 'TRAINING epoch:{} / {} batch:{} / {} loss:{} avg loss: {}'. \

format(ep + 1 + pretrained_ep, epoch + pretrained_ep, batch_id

+ 1, total_batch,

round(float(avg_loss.numpy()[0]), 6),

round(float(total_loss) / (batch_id + 1), 6))

s += (80 - len(s)) * ' '

bra.move(s, count=batch_id + 1)

bra.end(s)

# 每隔 5 个 epoch 保存一次模型

if (ep + 1) % 5 == 0:

paddle.save(model.state_dict(), f'./model/resnet50_ep{ep + 1 +

pretrained_ep}.pdparams')

print(f"MODEL SAVED IN:'./model/resnet50_ep{ep + 1 +


pretrained_ep}'")

val(model, val_dataset, batch_size, ep + pretrained_ep,

total_batch_val)

def val(model, val_dataset, batch_size, epoch, total_batch):

"""

验证函数

model: 需要验证的模型

val_dataset(paddle.io.Dataset): 数据集加载类

epoch(int): 数据集遍历次数

batch_size(int): 每次向网络传入的数据数量,根据电脑显存设定

total_batch(int): batch 总数,用于显示训练进度

"""

model.eval()

total_num = 0

acc_num = 0

bra = printBar.ProgressBar(total=total_batch)

val_loader = paddle.io.DataLoader(val_dataset,
batch_size=batch_size,

shuffle=True)

for batch_id, data in enumerate(val_loader):

if data[0].shape[0] != batch_size:

break

images, labels = data

pre = model(images).numpy()

# print(np.argmax(pre, axis=1), np.reshape(labels.numpy(), 32))

total_num += batch_size

acc_num += np.sum(np.equal(np.argmax(pre, axis=1),

np.reshape(labels.numpy(), (1, batch_size))))

if batch_id + 1 == int((round(((batch_id + 1) / total_batch) * 100) / 100)

* total_batch):

s = "VERIFYING batch:{} / {}".format(batch_id + 1, total_batch)

s += (80 - len(s)) * ' '

bra.move(s, count=batch_id + 1)

bra.end(s)

print('VERIFYING DONE EPOCH:{} ACC:{} / {} ACC RATE: {}%'.

format(epoch + 1, acc_num, total_num, int((acc_num / total_num) *

1000) / 10))
主函数如下:

if __name__ == '__main__':

resnet = resnet.resnet50(class_num=158)

input_size = (224, 224)

train_data =

dataset.MyDataset('C:/Users/Administrator/Desktop/paddlepaddle/Mydat

a/',

'./train.txt', input_size=input_size, data_cut=1)

val_data =

dataset.MyDataset('C:/Users/Administrator/Desktop/paddlepaddle/Mydat

a/',

'./val.txt', data_cut=1, mode='val')

train(resnet, train_data, val_data, epoch=300, batch_size=8, lr=1e-4)

定义完毕即可开始训练,输出如图:

在 model 目录下可以获得保存的.pdparams 模型参数文件

You might also like