背景简介

前段时间看到了百度新出的一篇论文，提出了一种基于MKLDNN加速策略的轻量级CPU网络，即PP-LCNet，它提高了轻量级模型在多任务上的性能,对于计算机视觉的下游任务，如目标检测、语义分割等，也有很好的表现。以下是论文链接和开源的基于PaddlePaddle的实现。

arXiv: https://arxiv.org/pdf/2109.15099.pdf

code: https://github.com/PaddlePaddle/PaddleClas

论文很短，模型结构也十分简洁，没有特别创新的部分，应该是属于深挖技术细节并细心整理的工程应用梳理性质的文章，里面有一些极其实用的工程细节，非常值得一读。

Pytorch实现PP-LCNet

简单浏览了一下网上对该文章的解读。

快到飞起的轻量级网络怎能不让人心动？可惜原版PP-LCNet只有PaddlePaddle的实现，对于我这样的Pytorch玩家没法直接白嫖，不过好在PaddlePaddle和Pytorch的动态图机制极其相似，参考相关代码，实现起来也并不难，下面贴一下我用Pytorch的实现

import os
import torch
import torch.nn as nn

NET_CONFIG = {
    "blocks2":
    # k, in_c, out_c, s, use_se
    [[3, 16, 32, 1, False]],
    "blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]],
    "blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]],
    "blocks5": [[3, 128, 256, 2, False], [5, 256, 256, 1, False],
                [5, 256, 256, 1, False], [5, 256, 256, 1, False],
                [5, 256, 256, 1, False], [5, 256, 256, 1, False]],
    "blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True]]
}


def autopad(k, p=None):
    if p is None:
        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]
    return p


def make_divisible(v, divisor=8, min_value=None):
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
    if new_v < 0.9 * v:
        new_v += divisor
    return new_v


class HardSwish(nn.Module):
    def __init__(self, inplace=True):
        super(HardSwish, self).__init__()
        self.relu6 = nn.ReLU6(inplace=inplace)

    def forward(self, x):
        return x * self.relu6(x+3) / 6


class HardSigmoid(nn.Module):
    def __init__(self, inplace=True):
        super(HardSigmoid, self).__init__()
        self.relu6 = nn.ReLU6(inplace=inplace)

    def forward(self, x):
        return (self.relu6(x+3)) / 6


class SELayer(nn.Module):
    def __init__(self, channel, reduction=16):
        super(SELayer, self).__init__()
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
            nn.Linear(channel, channel // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(channel // reduction, channel, bias=False),
            HardSigmoid()
        )

    def forward(self, x):
        b, c, h, w = x.size()
        y = self.avgpool(x).view(b, c)
        y = self.fc(y).view(b, c, 1, 1)
        return x * y.expand_as(x)


class DepthwiseSeparable(nn.Module):
    def __init__(self, inp, oup, dw_size, stride, use_se=False):
        super(DepthwiseSeparable, self).__init__()
        self.use_se = use_se
        self.stride = stride
        self.inp = inp
        self.oup = oup
        self.dw_size = dw_size
        self.dw_sp = nn.Sequential(
            nn.Conv2d(self.inp, self.inp, kernel_size=self.dw_size, stride=self.stride,
                      padding=autopad(self.dw_size, None), groups=self.inp, bias=False),
            nn.BatchNorm2d(self.inp),
            HardSwish(),

            nn.Conv2d(self.inp, self.oup, kernel_size=1, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(self.oup),
            HardSwish(),
        )
        self.se = SELayer(self.oup)

    def forward(self, x):
        x = self.dw_sp(x)
        if self.use_se:
            x = self.se(x)
        return x


class PP_LCNet(nn.Module):
    def __init__(self, scale=1.0, class_num=10, class_expand=1280, dropout_prob=0.2):
        super(PP_LCNet, self).__init__()
        self.scale = scale
        self.conv1 = nn.Conv2d(3, out_channels=make_divisible(16 * self.scale),
                               kernel_size=3, stride=2, padding=1, bias=False)
        # k, in_c, out_c, s, use_se   inp, oup, dw_size, stride, use_se=False
        self.blocks2 = nn.Sequential(*[
            DepthwiseSeparable(inp=make_divisible(in_c * self.scale),
                               oup=make_divisible(out_c * self.scale),
                               dw_size=k, stride=s, use_se=use_se)
            for i, (k, in_c, out_c, s, use_se) in enumerate(NET_CONFIG["blocks2"])
        ])

        self.blocks3 = nn.Sequential(*[
            DepthwiseSeparable(inp=make_divisible(in_c * self.scale),
                               oup=make_divisible(out_c * self.scale),
                               dw_size=k, stride=s, use_se=use_se)
            for i, (k, in_c, out_c, s, use_se) in enumerate(NET_CONFIG["blocks3"])
        ])

        self.blocks4 = nn.Sequential(*[
            DepthwiseSeparable(inp=make_divisible(in_c * self.scale),
                               oup=make_divisible(out_c * self.scale),
                               dw_size=k, stride=s, use_se=use_se)
            for i, (k, in_c, out_c, s, use_se) in enumerate(NET_CONFIG["blocks4"])
        ])
        # k, in_c, out_c, s, use_se  inp, oup, dw_size, stride, use_se=False
        self.blocks5 = nn.Sequential(*[
            DepthwiseSeparable(inp=make_divisible(in_c * self.scale),
                               oup=make_divisible(out_c * self.scale),
                               dw_size=k, stride=s, use_se=use_se)
            for i, (k, in_c, out_c, s, use_se) in enumerate(NET_CONFIG["blocks5"])
        ])

        self.blocks6 = nn.Sequential(*[
            DepthwiseSeparable(inp=make_divisible(in_c * self.scale),
                               oup=make_divisible(out_c * self.scale),
                               dw_size=k, stride=s, use_se=use_se)
            for i, (k, in_c, out_c, s, use_se) in enumerate(NET_CONFIG["blocks6"])
        ])

        self.GAP = nn.AdaptiveAvgPool2d(1)

        self.last_conv = nn.Conv2d(in_channels=make_divisible(NET_CONFIG["blocks6"][-1][2] * scale),
                                   out_channels=class_expand,
                                   kernel_size=1, stride=1, padding=0, bias=False)

        self.hardswish = HardSwish()
        self.dropout = nn.Dropout(p=dropout_prob)

        self.fc = nn.Linear(class_expand, class_num)

    def forward(self, x):
        x = self.conv1(x)

        x = self.blocks2(x)
        x = self.blocks3(x)
        x = self.blocks4(x)
        x = self.blocks5(x)
        x = self.blocks6(x)

        x = self.GAP(x)
        x = self.last_conv(x)
        x = self.hardswish(x)
        x = self.dropout(x)
        x = torch.flatten(x, start_dim=1, end_dim=-1)
        x = self.fc(x)
        return x


def PPLCNET_x0_25(**kwargs):
    model = PP_LCNet(scale=0.25, **kwargs)
    return model


def PPLCNET_x0_35(**kwargs):
    model = PP_LCNet(scale=0.35, **kwargs)
    return model


def PPLCNET_x0_5(**kwargs):
    model = PP_LCNet(scale=0.5, **kwargs)
    return model


def PPLCNET_x0_75(**kwargs):
    model = PP_LCNet(scale=0.75, **kwargs)
    return model


def PPLCNET_x1_0(**kwargs):
    model = PP_LCNet(scale=1.0, **kwargs)
    return model


def PPLCNET_x1_5(**kwargs):
    model = PP_LCNet(scale=1.5, **kwargs)
    return model


def PPLCNET_x2_0(**kwargs):
    model = PP_LCNet(scale=2.0, **kwargs)
    return model


def PPLCNET_x2_5(**kwargs):
    model = PP_LCNet(scale=2.5, **kwargs)
    return model


if __name__ == '__main__':
    model = PPLCNET_x1_5()
    input = torch.randn(1, 3, 224, 224)
    print(input.shape)
    output = model(input)
    print(output.shape)

PP-LCNet-YoloV5

既然已经实现了Pytorch版的PP-LCNet，接下里就是实际应用环节了，因为我的工作主要以检测、追踪为主，首先想到的自然就是目标检测的经典模型——YoloV5了,PP-LCNet有0.25,0.35，0.5,0.75，1.0,1.5,2.0,2.5一个八种模型，这里以PPLCNet_x_1_0为例,在原版YoloV5基础上修改以下三个文件

common.py

# 增加如下代码
#-------------------------------------PP_LCNet------------------------------------------------------
NET_CONFIG = {
    "blocks2":
    # k, in_c, out_c, s, use_se
    [[3, 16, 32, 1, False]],
    "blocks3": [[3, 32, 64, 2, False], [3, 64, 64, 1, False]],
    "blocks4": [[3, 64, 128, 2, False], [3, 128, 128, 1, False]],
    "blocks5": [[3, 128, 256, 2, False], [5, 256, 256, 1, False],
                [5, 256, 256, 1, False], [5, 256, 256, 1, False],
                [5, 256, 256, 1, False], [5, 256, 256, 1, False]],
    "blocks6": [[5, 256, 512, 2, True], [5, 512, 512, 1, True]]
}
BLOCK_LIST = ["blocks2", "blocks3", "blocks4", "blocks5", "blocks6"]

def make_divisible_LC(v, divisor=8, min_value=None):
    if min_value is None:
        min_value = divisor
    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
    if new_v < 0.9 * v:
        new_v += divisor
    return new_v


class HardSwish(nn.Module):
    def __init__(self, inplace=True):
        super(HardSwish, self).__init__()
        self.relu6 = nn.ReLU6(inplace=inplace)

    def forward(self, x):
        return x * self.relu6(x+3) / 6


class HardSigmoid(nn.Module):
    def __init__(self, inplace=True):
        super(HardSigmoid, self).__init__()
        self.relu6 = nn.ReLU6(inplace=inplace)

    def forward(self, x):
        return (self.relu6(x+3)) / 6


class SELayer(nn.Module):
    def __init__(self, channel, reduction=16):
        super(SELayer, self).__init__()
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
            nn.Linear(channel, channel // reduction, bias=False),
            nn.ReLU(inplace=True),
            nn.Linear(channel // reduction, channel, bias=False),
            HardSigmoid()
        )

    def forward(self, x):
        b, c, h, w = x.size()
        y = self.avgpool(x).view(b, c)
        y = self.fc(y).view(b, c, 1, 1)
        return x * y.expand_as(x)


class DepthwiseSeparable(nn.Module):
    def __init__(self, inp, oup, dw_size, stride, use_se=False):
        super(DepthwiseSeparable, self).__init__()
        self.use_se = use_se
        self.stride = stride
        self.inp = inp
        self.oup = oup
        self.dw_size = dw_size
        self.dw_sp = nn.Sequential(
            nn.Conv2d(self.inp, self.inp, kernel_size=self.dw_size, stride=self.stride,
                      padding=autopad(self.dw_size, None), groups=self.inp, bias=False),
            nn.BatchNorm2d(self.inp),
            HardSwish(),

            nn.Conv2d(self.inp, self.oup, kernel_size=1, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(self.oup),
            HardSwish(),
        )
        self.se = SELayer(self.oup)

    def forward(self, x):
        x = self.dw_sp(x)
        if self.use_se:
            x = self.se(x)
        return x

class PPLC_Conv(nn.Module):
    def __init__(self, scale):
        super(PPLC_Conv, self).__init__()
        self.scale = scale
        self.conv = nn.Conv2d(3, out_channels=make_divisible_LC(16 * self.scale),
                               kernel_size=3, stride=2, padding=1, bias=False)
    def forward(self, x):
        return self.conv(x)

class PPLC_Block(nn.Module):
    def __init__(self, scale, block_num):
        super(PPLC_Block, self).__init__()
        self.scale = scale
        self.block_num = BLOCK_LIST[block_num]
        self.block = nn.Sequential(*[
            DepthwiseSeparable(inp=make_divisible_LC(in_c * self.scale),
                               oup=make_divisible_LC(out_c * self.scale),
                               dw_size=k, stride=s, use_se=use_se)
            for i, (k, in_c, out_c, s, use_se) in enumerate(NET_CONFIG[self.block_num])
        ])
    def forward(self, x):
        return self.block(x)

yolo.py

# 修改parse_model函数
def parse_model(d, ch):  # model_dict, input_channels(3)
    LOGGER.info('\n%3s%18s%3s%10s  %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
    anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
    na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
    no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)

    layers, save, c2 = [], [], ch[-1]  # layers, savelist, ch out
    for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']):  # from, number, module, args
        m = eval(m) if isinstance(m, str) else m  # eval strings
        for j, a in enumerate(args):
            try:
                args[j] = eval(a) if isinstance(a, str) else a  # eval strings
            except:
                pass

        n = n_ = max(round(n * gd), 1) if n > 1 else n  # depth gain
        if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, SPPF, DWConv, MixConv2d, Focus, CrossConv,
                 BottleneckCSP, C3, C3TR, C3SPP, C3Ghost]:
            c1, c2 = ch[f], args[0]   
            if c2 != no:  # if not output
                c2 = make_divisible(c2 * gw, 8)

            args = [c1, c2, *args[1:]]

            if m in [BottleneckCSP, C3, C3TR, C3Ghost]:
                args.insert(2, n)  # number of repeats
                n = 1
        elif m is nn.BatchNorm2d:
            args = [ch[f]]
        elif m is Concat:
            c2 = sum([ch[x] for x in f])
        elif m is Detect:
            args.append([ch[x] for x in f])
            if isinstance(args[1], int):  # number of anchors
                args[1] = [list(range(args[1] * 2))] * len(f)
        elif m is Contract:
            c2 = ch[f] * args[0] ** 2
        elif m is Expand:
            c2 = ch[f] // args[0] ** 2
# 添加加该部分代码
#---------------------------------------------            
        elif m is PPLC_Conv:
            c2 = args[0]
            args = args[1:]
        elif m is PPLC_Block:
            c2 = args[0]
            args = args[1:]
#----------------------------------------------
        else:
            c2 = ch[f]

        m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args)  # module
        t = str(m)[8:-2].replace('__main__.', '')  # module type
        np = sum([x.numel() for x in m_.parameters()])  # number params
        m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
        LOGGER.info('%3s%18s%3s%10.0f  %-40s%-30s' % (i, f, n_, np, t, args))  # print
        save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
        layers.append(m_)
        if i == 0:
            ch = []
        ch.append(c2)
    return nn.Sequential(*layers), sorted(save)

yolov5_LCNet.yaml

# YOLOv5 🚀 by Ultralytics, GPL-3.0 license

# Parameters
nc: 80  # number of classes
depth_multiple: 0.33  # model depth multiple
width_multiple: 0.50  # layer channel multiple
anchors:
  - [10,13, 16,30, 33,23]  # P3/8
  - [30,61, 62,45, 59,119]  # P4/16
  - [116,90, 156,198, 373,326]  # P5/32

# YOLOv5 backbone
backbone:
  [[-1, 1, PPLC_Conv, [16, 1]],
   [-1, 1, PPLC_Block, [32, 1, 0]],
   [-1, 1, PPLC_Block, [64, 1, 1]],
   [-1, 1, PPLC_Block, [128, 1, 2]],
   [-1, 1, PPLC_Block, [256, 1, 3]],
   [-1, 1, PPLC_Block, [512, 1, 4]],
  ]

# YOLOv5 head
head:
  [[-1, 1, Conv, [512, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 4], 1, Concat, [1]],  # cat backbone P4
   [-1, 3, C3, [512, False]],  # 13

   [-1, 1, Conv, [256, 1, 1]],
   [-1, 1, nn.Upsample, [None, 2, 'nearest']],
   [[-1, 3], 1, Concat, [1]],  # cat backbone P3
   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)

   [-1, 1, Conv, [256, 3, 2]],
   [[-1, 10], 1, Concat, [1]],  # cat head P4
   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)

   [-1, 1, Conv, [512, 3, 2]],
   [[-1, 6], 1, Concat, [1]],  # cat head P5
   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)

   [[13, 16, 19], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
  ]

完整项目见Github

https://github.com/OutBreak-hui/Yolov5-PP-LCNet