基于vgg16的半监督视频单目标分割网络+ Dense-crf
one-shot 半监督视频单目标分割
网络实现
采用keras实现,网络结构如下。 类似于unet,但没有unet那么多的参数。使用dense-crf做后处理
# coding=utf-8
from keras import Input, Model
from keras.applications.vgg16 import VGG16
from keras.layers import Concatenate, Conv2D, UpSampling2D, BatchNormalization
class Seg:
def __init__(self):
self.input_img = Input(name='input_img',
shape=(512, 512, 3),
dtype='float32')
self.out_channel = 1
vgg16 = VGG16(input_tensor=self.input_img,
weights='imagenet',
include_top=False)
self.locked_layers = False
if self.locked_layers:
# locked first two conv layers
locked_layers = [vgg16.get_layer('block1_conv1'),
vgg16.get_layer('block1_conv2')]
for layer in locked_layers:
layer.trainable = False
self.vgg_pools = [vgg16.get_layer('block%d_pool' % i).output
for i in range(1, 6)]
def seg_network(self):
def decoder(layer_input, skip_input, channel, last_block=False):
if not last_block:
concat = Concatenate(axis=-1)([layer_input, skip_input])
bn1 = BatchNormalization()(concat)
else:
bn1 = BatchNormalization()(layer_input)
conv_1 = Conv2D(channel, 1,
activation='relu', padding='same')(bn1)
bn2 = BatchNormalization()(conv_1)
conv_2 = Conv2D(channel, 3,
activation='relu', padding='same')(bn2)
return conv_2
d1 = decoder(UpSampling2D((2, 2))(self.vgg_pools[4]), self.vgg_pools[3], 128)
d2 = decoder(UpSampling2D((2, 2))(d1), self.vgg_pools[2], 64)
d3 = decoder(UpSampling2D((2, 2))(d2), self.vgg_pools[1], 32)
d4 = decoder(UpSampling2D((2, 2))(d3), self.vgg_pools[0], 32)
d5 = decoder(UpSampling2D((2, 2))(d4), None, 32, True)
output = Conv2D(self.out_channel, 3, padding='same')(d5)
model = Model(inputs=self.input_img, outputs=[output])
model.summary()
return model
在DAVIS2016部分训练集上训练了网络。对于测试序列,使用第一帧的mask对网络进行微调。分割结果如下: