您的位置：首页 > 理论基础 > 计算机网络

【深度学习】使用tensorflow实现VGG19网络

2017-10-14 19:14 579 查看

转载出处：http://blog.csdn.net/accepthjp/article/details/70170217

本文讲述使用tensorflow实现VGG19网络。

VGG网络与AlexNet类似，也是一种CNN，VGG在2014年的 ILSVRC localization and classification 两个问题上分别取得了第一名和第二名。VGG网络非常深，通常有16－19层，卷积核大小为 3 x 3，16和19层的区别主要在于后面三个卷积部分卷积层的数量。第二个用tensorflow独立完成的小玩意儿......

同样先放上我的代码，由AlexNet的代码改过来的：https://github.com/hjptriplebee/VGG19_with_tensorflow

如果想运行代码，详细的配置要求都在上面链接的readme文件中了。本文建立在一定的tensorflow基础上，不会对太细的点进行说明。

模型结构

可以看到VGG的前几层为卷积和maxpool的交替，每个卷积包含多个卷积层，后面紧跟三个全连接层。激活函数采用Relu，训练采用了dropout，但并没有像AlexNet一样采用LRN（论文给出的理由是加LRN实验效果不好）。

模型定义

[python] view
plain copy

def maxPoolLayer(x, kHeight, kWidth, strideX, strideY, name, padding = "SAME"):

    """max-pooling"""

    return tf.nn.max_pool(x, ksize = [1, kHeight, kWidth, 1],

                          strides = [1, strideX, strideY, 1], padding = padding, name = name)



def dropout(x, keepPro, name = None):

    """dropout"""

    return tf.nn.dropout(x, keepPro, name)



def fcLayer(x, inputD, outputD, reluFlag, name):

    """fully-connect"""

    with tf.variable_scope(name) as scope:

        w = tf.get_variable("w", shape = [inputD, outputD], dtype = "float")

        b = tf.get_variable("b", [outputD], dtype = "float")

        out = tf.nn.xw_plus_b(x, w, b, name = scope.name)

        if reluFlag:

            return tf.nn.relu(out)

        else:

            return out



def convLayer(x, kHeight, kWidth, strideX, strideY,

              featureNum, name, padding = "SAME"):

    """convlutional"""

    channel = int(x.get_shape()[-1]) #获取channel数

    with tf.variable_scope(name) as scope:

        w = tf.get_variable("w", shape = [kHeight, kWidth, channel, featureNum])

        b = tf.get_variable("b", shape = [featureNum])

        featureMap = tf.nn.conv2d(x, w, strides = [1, strideY, strideX, 1], padding = padding)

        out = tf.nn.bias_add(featureMap, b)

        return tf.nn.relu(tf.reshape(out, featureMap.get_shape().as_list()), name = scope.name)

定义了卷积、pooling、dropout、全连接五个模块，使用了上一篇AlexNet中的代码，其中卷积模块去除了group参数，因为网络没有像AlexNet一样分成两部分。接下来定义VGG19。

[python] view
plain copy

class VGG19(object):

    """VGG model"""

    def __init__(self, x, keepPro, classNum, skip, modelPath = "vgg19.npy"):

        self.X = x

        self.KEEPPRO = keepPro

        self.CLASSNUM = classNum

        self.SKIP = skip

        self.MODELPATH = modelPath

        #build CNN

        self.buildCNN()



    def buildCNN(self):

        """build model"""

        conv1_1 = convLayer(self.X, 3, 3, 1, 1, 64, "conv1_1" )

        conv1_2 = convLayer(conv1_1, 3, 3, 1, 1, 64, "conv1_2")

        pool1 = maxPoolLayer(conv1_2, 2, 2, 2, 2, "pool1")



        conv2_1 = convLayer(pool1, 3, 3, 1, 1, 128, "conv2_1")

        conv2_2 = convLayer(conv2_1, 3, 3, 1, 1, 128, "conv2_2")

        pool2 = maxPoolLayer(conv2_2, 2, 2, 2, 2, "pool2")



        conv3_1 = convLayer(pool2, 3, 3, 1, 1, 256, "conv3_1")

        conv3_2 = convLayer(conv3_1, 3, 3, 1, 1, 256, "conv3_2")

        conv3_3 = convLayer(conv3_2, 3, 3, 1, 1, 256, "conv3_3")

        conv3_4 = convLayer(conv3_3, 3, 3, 1, 1, 256, "conv3_4")

        pool3 = maxPoolLayer(conv3_4, 2, 2, 2, 2, "pool3")



        conv4_1 = convLayer(pool3, 3, 3, 1, 1, 512, "conv4_1")

        conv4_2 = convLayer(conv4_1, 3, 3, 1, 1, 512, "conv4_2")

        conv4_3 = convLayer(conv4_2, 3, 3, 1, 1, 512, "conv4_3")

        conv4_4 = convLayer(conv4_3, 3, 3, 1, 1, 512, "conv4_4")

        pool4 = maxPoolLayer(conv4_4, 2, 2, 2, 2, "pool4")



        conv5_1 = convLayer(pool4, 3, 3, 1, 1, 512, "conv5_1")

        conv5_2 = convLayer(conv5_1, 3, 3, 1, 1, 512, "conv5_2")

        conv5_3 = convLayer(conv5_2, 3, 3, 1, 1, 512, "conv5_3")

        conv5_4 = convLayer(conv5_3, 3, 3, 1, 1, 512, "conv5_4")

        pool5 = maxPoolLayer(conv5_4, 2, 2, 2, 2, "pool5")



        fcIn = tf.reshape(pool5, [-1, 7*7*512])

        fc6 = fcLayer(fcIn, 7*7*512, 4096, True, "fc6")

        dropout1 = dropout(fc6, self.KEEPPRO)



        fc7 = fcLayer(dropout1, 4096, 4096, True, "fc7")

        dropout2 = dropout(fc7, self.KEEPPRO)



        self.fc8 = fcLayer(dropout2, 4096, self.CLASSNUM, True, "fc8")



    def loadModel(self, sess):

        """load model"""

        wDict = np.load(self.MODELPATH, encoding = "bytes").item()

        #for layers in model

        for name in wDict:

            if name not in self.SKIP:

                with tf.variable_scope(name, reuse = True):

                    for p in wDict[name]:

                        if len(p.shape) == 1:

                            #bias 只有一维

                            sess.run(tf.get_variable('b', trainable = False).assign(p))

                        else:

                            #weights

                            sess.run(tf.get_variable('w', trainable = False).assign(p))

buildCNN函数完全按照VGG的结构搭建网络。

loadModel函数从模型文件中读取参数，采用的模型文件见github上的readme说明。

至此，我们定义了完整的模型，下面开始测试模型。

模型测试

ImageNet训练的VGG有很多类，几乎包含所有常见的物体，因此我们随便从网上找几张图片测试。比如我直接用了之前做项目的图片，为了避免审美疲劳，我们不只用渣土车，还要用挖掘机、采沙船：

然后编写测试代码：

[python] view
plain copy

parser = argparse.ArgumentParser(description='Classify some images.')

parser.add_argument('mode', choices=['folder', 'url'], default='folder')

parser.add_argument('path', help='Specify a path [e.g. testModel]')

args = parser.parse_args(sys.argv[1:])



if args.mode == 'folder': #测试方式为本地文件夹

    #get testImage

    withPath = lambda f: '{}/{}'.format(args.path,f)

    testImg = dict((f,cv2.imread(withPath(f))) for f in os.listdir(args.path) if os.path.isfile(withPath(f)))

elif args.mode == 'url': #测试方式为URL

    def url2img(url): #获取URL图像

        '''''url to image'''

        resp = urllib.request.urlopen(url)

        image = np.asarray(bytearray(resp.read()), dtype="uint8")

        image = cv2.imdecode(image, cv2.IMREAD_COLOR)

        return image

    testImg = {args.path:url2img(args.path)}



if testImg.values():

    #some params

    dropoutPro = 1

    classNum = 1000

    skip = []



    imgMean = np.array([104, 117, 124], np.float)

    x = tf.placeholder("float", [1, 224, 224, 3])



    model = vgg19.VGG19(x, dropoutPro, classNum, skip)

    score = model.fc8

    softmax = tf.nn.softmax(score)



    with tf.Session() as sess:

        sess.run(tf.global_variables_initializer())

        model.loadModel(sess) #加载模型



        for key,img in testImg.items():

            #img preprocess

            resized = cv2.resize(img.astype(np.float), (224, 224)) - imgMean #去均值

            maxx = np.argmax(sess.run(softmax, feed_dict = {x: resized.reshape((1, 224, 224, 3))})) #网络输入为224*224

            res = caffe_classes.class_names[maxx]



            font = cv2.FONT_HERSHEY_SIMPLEX

            cv2.putText(img, res, (int(img.shape[0]/3), int(img.shape[1]/3)), font, 1, (0, 255, 0), 2) #在图像上绘制结果

            print("{}: {}\n----".format(key,res)) #输出测试结果

            cv2.imshow("demo", img)

            cv2.waitKey(0)

如果你看完了我AlexNet的博客，那么一定会发现我这里的测试代码做了一些小的修改，增加了URL测试的功能，可以测试网上的图像，测试结果如下：

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航