Dlib 人脸相关实践

Dlib由C++编写,提供了和机器学习、数值计算、图模型算法、图像处理等领域相关的一系列功能, 对Python 也提供了便利的接口, 但C++ 版功能更完善, 本文主要针对Python下的dlib使用.


dlib 官网, 提供完整的文档 dlib

DLIB 源码安装

本人使用的 macos 系统, 安装只针对macos. 尝试了很多方案, 只有以下方案成功medium blog, 需要翻墙, 故将命令放下:

# 安装依赖库, 若 --with 不能用可用brew search 列出,指定版本安装
brew install openblas
brew install opencv3 —- with-contrib —- with-python3 —- without-python
brew install cmake
brew install gtk+3 boost
brew install boost-python —- with-python3

# 下载安装xquartz
  https://www.xquartz.org

# 建立软连接
 sudo ln -s /opt/X11 /usr/local/opt/X11

# 安装 dlib

git clone https://github.com/davisking/dlib.git
cd dlib
mkdir build
cd build
cmake .. -DUSE_SSE4_INSTRUCTIONS=ON
cmake — build . — config Release
cd ..
sudo python3 setup.py install

# 测试能否导入
$python3
import dlib
dlib.__version__

dlib 基于CNN 的人脸检测器

import dlib


img_path = "0_Parade_marchingband_1_5.jpg"
# 需要手动下载模型, http://dlib.net/files/mmod_human_face_detector.dat.bz2, 解压
model_path = "./model/mmod_human_face_detector.dat"
# 加载模型
cnn_face_detector = dlib.cnn_face_detection_model_v1(model_path)
# 创建图片窗口
win = dlib.image_window()
# 加载处理图片
img = dlib.load_rgb_image(img_path)
# 检测, 参数 1 表示将图像进行 1 倍上采样, 对包含小人脸的图片上采样后可能能检测出更多人脸,
# 返回一个 mmod_rectangles 对象, 是 mmod_rectangle 对象的list 集合, 每一个 
# mmod_rectangle 包含两组成员变量, dlib.rectangle 和 confidence score, 坐标与置信度 
dets = cnn_face_detector(img, 1)
for i, d in enumerate(dets):
# 可以根据confidence阈值筛选
    print("Detection {}: Left: {} Top: {} Right: {} Bottom: {} Confidence: {}".format(
        i, d.rect.left(), d.rect.top(), d.rect.right(), d.rect.bottom(), d.confidence))
rects = dlib.rectangles()
rects.extend([d.rect for d in dets])

win.clear_overlay()
win.set_image(img)
win.add_overlay(rects)
dlib.hit_enter_to_continue()

如果是第一次使用, 应该会报错 dlib.gui_core: Unable to connect to the X display, 安装xquartz后需要重启电脑 测试结果如下, 图片来自 widerface, 未进行后处理, 左一人脸出了两个框.

dlib 之人脸对齐

import dlib

# landmark 检测模型 http://dlib.net/files/shape_predictor_5_face_landmarks.dat.bz2
predictor_path = "./model/shape_predictor_5_face_landmarks.dat"
face_file_path = "2.jpg"

# 使用 HOG 检测, 速度快, 检测能力无CNN强
detector = dlib.get_frontal_face_detector()
# 人脸 landmark 检测
sp = dlib.shape_predictor(predictor_path)

window = dlib.image_window()
# dlib图片加载
img = dlib.load_rgb_image(face_file_path)
# 参数 1 表示将图像进行 1 倍上采样, 返回一个 mmod_rectangles list
dets = detector(img, 1)
num_faces = len(dets)
if num_faces == 0:
    print("Sorry, there were no faces found in '{}'".format(face_file_path))
    exit()

# 找出人脸校准的五个关键点
faces = dlib.full_object_detections()
for detection in dets:
    landmark = sp(img, detection)
    faces.append(landmark)
# 获取校准后的图片
images = dlib.get_face_chips(img, faces, size=320)
for image in images:
    window.set_image(image)
    dlib.hit_enter_to_continue()

结果如下:

dlib 之人脸聚类

import os
import dlib
import glob

predictor_path = "./model/shape_predictor_5_face_landmarks.dat"
# 人脸识别模型, http://dlib.net/files/dlib_face_recognition_resnet_model_v1.dat.bz2
face_rec_model_path = "./model/dlib_face_recognition_resnet_model_v1.dat"
faces_folder_path = "./face"
output_folder_path = "./output"


# 人脸检测模型用来检测人脸
detector = dlib.get_frontal_face_detector()
# 人脸landmark模型进行人脸校准, 获得准确的人脸
sp = dlib.shape_predictor(predictor_path)
# 人脸识别生成每张人脸的128维特征
facerec = dlib.face_recognition_model_v1(face_rec_model_path)

descriptors = []
images = []

for f in glob.glob(os.path.join(faces_folder_path, "*.jpg")):
    img = dlib.load_rgb_image(f)

    # 人脸bounding box 检测
    dets = detector(img, 1)

    for k, d in enumerate(dets):

        # 获取landmark, 计算特征向量
        shape = sp(img, d)
        face_descriptor = facerec.compute_face_descriptor(img, shape)
        descriptors.append(face_descriptor)
        images.append((img, shape))

# 人脸聚类
labels = dlib.chinese_whispers_clustering(descriptors, 0.5)
num_classes = len(set(labels))
print("Number of clusters: {}".format(num_classes))

# 获取最大的类簇
biggest_class = None
biggest_class_length = 0
for i in range(0, num_classes):
    class_length = len([label for label in labels if label == i])
    if class_length > biggest_class_length:
        biggest_class_length = class_length
        biggest_class = i

print("Biggest cluster id number: {}".format(biggest_class))
print("Number of faces in biggest cluster: {}".format(biggest_class_length))

# 获取最大类簇的索引
indices = []
for i, label in enumerate(labels):
    if label == biggest_class:
        indices.append(i)

print("Indices of images in the biggest cluster: {}".format(str(indices)))

# Ensure output directory exists
if not os.path.isdir(output_folder_path):
    os.makedirs(output_folder_path)

# 保存抽取的人脸
print("Saving faces in largest cluster to output folder...")
for i, index in enumerate(indices):
    img, shape = images[index]
    file_path = os.path.join(output_folder_path, "face_" + str(i))
    dlib.save_face_chip(img, shape, file_path, size=150, padding=0.25)

dlib 之人脸landmark检测(五点)

import dlib


detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("./model/shape_predictor_5_face_landmarks.dat")
win = dlib.image_window()

img = dlib.load_rgb_image("13_Interview_Interview_2_People_Visible_13_14.jpg")
win.set_image(img)
dets = detector(img, 1)
for k, d in enumerate(dets):
    print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
        k, d.left(), d.top(), d.right(), d.bottom()))
    shape = predictor(img, d)
    for ii in range(shape.num_parts):
        print("Part {}, coord {}".format(ii, shape.part(ii)))
    win.add_overlay(shape)

win.add_overlay(dets)
dlib.hit_enter_to_continue()

结果:

dlib 之人脸识别

import dlib
import numpy as np


detector = dlib.get_frontal_face_detector()
sp = dlib.shape_predictor("./model/shape_predictor_5_face_landmarks.dat")
facerec = dlib.face_recognition_model_v1("./model/dlib_face_recognition_resnet_model_v1.dat")


def distance(a, b):
    return np.linalg.norm(np.array(a) - np.array(b), ord=2)


def get_face_descriptor(face_path):
    img = dlib.load_rgb_image(face_path)
    dets = detector(img, 1)
    shape = sp(img, dets[0])
    face_descriptor = facerec.compute_face_descriptor(img, shape)
    return face_descriptor


person_a1 = "persona1.jpg"
person_a2 = "persona2.jpg"
person_b = "personb.jpg"

person_a1_features = get_face_descriptor(person_a1)
person_a2_features = get_face_descriptor(person_a2)
person_b_features = get_face_descriptor(person_b)
print("distance between person a1 and person a2: {}".format(distance(person_a1_features, person_a2_features)))
print("distance between person a1 and person b: {}".format(distance(person_a1_features, person_b_features)))

测试了两个人, a1, a2的距离为0.08, a1 和 b 距离 为 0.55, 越小越相似

dlib 之目标跟踪

import os
import glob
import dlib


video_folder = os.path.join("./face")

# 基于相关滤波的目标跟踪, 论文DSST Accurate scale estimation for robust visual 
#                     tracking
tracker = dlib.correlation_tracker()
win = dlib.image_window()

for k, f in enumerate(sorted(glob.glob(os.path.join(video_folder, "*.jpg")))):

    img = dlib.load_rgb_image(f)

    if k == 0:
        # 第一帧需要给出目标框的坐标
        tracker.start_track(img, dlib.rectangle(60, 67, 250, 350))
    else:
        tracker.update(img)

    win.clear_overlay()
    win.set_image(img)
    win.add_overlay(tracker.get_position())
    dlib.hit_enter_to_continue()

ref

dlib 官网