导入需求库¶

from sklearn.svm import SVC #分类
from sklearn.datasets import fetch_lfw_people #名人人脸数据
import numpy as np
import matplotlib.pyplot as plt 
%matplotlib inline
from sklearn.decomposition import PCA #降低维度
from sklearn.model_selection import GridSearchCV #调节参数
import logging

一、配置日志并且导入人脸数据¶

logging.basicConfig(level= logging.INFO)

data= fetch_lfw_people(min_faces_per_person= 70, #最小的数量
                       resize= 1,
                       slice_= (slice(0, 250, None),
                                slice(0, 250, None))) #抓取人脸数据

友情提示：导入人脸数据，需要一定时间，请耐心等待。¶

二、展示脸图与数据¶

plt.imshow(data.images[10])

<matplotlib.image.AxesImage at 0x7f95fb659550>

data.images[10]

array([[1.6666666, 1.6666666, 1.6666666, ..., 0.0, 0.0, 0.0],
       [0.33333334, 0.33333334, 0.33333334, ..., 0.0, 0.0, 0.0],
       [0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0],
       ...,
       [61.333332, 32.333332, 15.0, ..., 11.0, 11.0, 11.0],
       [76.666664, 34.333332, 15.0, ..., 11.0, 11.0, 11.0],
       [78.666664, 36.333332, 16.0, ..., 11.0, 11.0, 11.0]], dtype=float32)

三、人脸数据其他相关参数¶

images= data.images # 1288张图片

target_names= data.target_names #图片的人名

target_names

array([Ariel Sharon, Colin Powell, Donald Rumsfeld, George W Bush,
       Gerhard Schroeder, Hugo Chavez, Tony Blair], dtype='<U17')

x= data.data
x

array([[0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0],
       [1.3333334, 1.3333334, 1.3333334, ..., 0.0, 0.0, 0.0],
       [2.3333333, 2.3333333, 1.6666666, ..., 0.0, 0.0, 0.0],
       ...,
       [0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0],
       [0.6666667, 0.6666667, 0.6666667, ..., 0.6666667, 0.6666667,
        0.6666667],
       [0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0]], dtype=float32)

y= data.target
y

array([5, 6, 3, ..., 5, 3, 5])

四、训练数据切割¶

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test= train_test_split(x, y, test_size= 0.12) # 数据切割， 0.88用于训练，0，12用于识别测试

五、简单SVC分类¶

import time
start= time.time() # 开始时间
svc= SVC() # SVM分类
svc.fit(x_train, y_train) # 训练
print(svc.score(x_test, y_test)) #测试得分,0.38不理想。
end= time.time() # 结束时间
print(end-start) # 时间差

0.44516129032258067
286.4365243911743

从上面简单SVC训练获取得分为0.38，非常不理想，接下来要加强学习-降维。¶

五、优化版SVC+降维-训练与预测¶

一、PCA降维处理¶

newsvc= SVC()
newsvc.fit(x_train[:500], y_train[:500]) #新的SVC，训练之
pca= PCA(n_components= 150, #主成分
         svd_solver= 'randomized', #打乱
         whiten= True) #白化
pca.fit(x)

PCA(copy=True, iterated_power='auto', n_components=150, random_state=None,
  svd_solver='randomized', tol=0.0, whiten=True)

友情提示：请勿于晚上运行，因为服务器不稳定，所以数据训练以500为界限，而且本笔者研究环境是有升级。（CPU：2，内存：2，硬盘：3）¶

二、训练与预测数据预处理¶

x_train_pca= pca.transform(x_train) #预处理x_train

x_test_pca= pca.transform(x_test) #预处理x_test

newsvc.score(x_test, y_test)

0.3741935483870968

三、直接降维预测¶

svc_best= SVC() #最好的
svc_best.fit(x_train_pca, y_train)
svc_best.score(x_test_pca, y_test)

0.6580645161290323

四、SVC调优预测¶

g_svc= SVC() # 自动调优
param_grid= {'C': [0.2, 0.5, 0.8, 1, 3, 5, 7, 9],
             'gamma': [0.001, 0.002, 0.0033, 0.0066, 0.01, 0.03, 0.05, 0.1]} #参数，调优，选择一个最好比例

gcv= GridSearchCV(g_svc, param_grid= param_grid) #创建一个自动调优
gcv.fit(x_train_pca, y_train) # 训练

GridSearchCV(cv=None, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'C': [0.2, 0.5, 0.8, 1, 3, 5, 7, 9], 'gamma': [0.001, 0.002, 0.0033, 0.0066, 0.01, 0.03, 0.05, 0.1]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

svc_last= SVC(C= 1.0, gamma= 0.001)
svc_last

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

一、训练数据¶

svc_last_1= SVC(C= 5.0, gamma= 0.001)
svc_last_1.fit(x_train_pca[:500], y_train[:500]) #训练数据

SVC(C=5.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

SVC自动调优出现bug，所以惩罚系数修为5。

二、预测数据¶

svc_last_1.score(x_test_pca, y_test) #评分

0.7354838709677419

y_new= svc_last_1.predict(x_test_pca) #训练数据
y_new

array([3, 3, 3, 3, 3, 1, 6, 3, 6, 3, 3, 6, 3, 3, 3, 3, 3, 1, 6, 2, 3, 3,
       6, 3, 3, 3, 3, 6, 3, 3, 3, 3, 3, 3, 1, 3, 3, 3, 3, 6, 3, 3, 1, 3,
       3, 0, 4, 6, 3, 2, 3, 2, 3, 3, 6, 3, 3, 6, 3, 3, 3, 3, 3, 4, 3, 3,
       1, 3, 3, 3, 3, 3, 1, 6, 3, 3, 1, 4, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3,
       3, 1, 3, 3, 0, 3, 6, 3, 4, 3, 5, 3, 2, 3, 6, 1, 3, 6, 1, 3, 3, 3,
       1, 3, 2, 3, 3, 6, 3, 1, 3, 3, 3, 3, 5, 1, 3, 2, 3, 3, 4, 3, 1, 2,
       3, 3, 3, 3, 3, 1, 3, 6, 3, 6, 3, 3, 1, 3, 3, 5, 3, 3, 1, 3, 1, 1,
       3])

四、真实数据与预测展示¶

一、数据展示¶

print("预测数据")
for i in y_new:
    print(target_names[i], end= ' ')

预测数据
George W Bush George W Bush George W Bush George W Bush George W Bush Colin Powell Tony Blair George W Bush Tony Blair George W Bush George W Bush Tony Blair George W Bush George W Bush George W Bush George W Bush George W Bush Colin Powell Tony Blair Donald Rumsfeld George W Bush George W Bush Tony Blair George W Bush George W Bush George W Bush George W Bush Tony Blair George W Bush George W Bush George W Bush George W Bush George W Bush George W Bush Colin Powell George W Bush George W Bush George W Bush George W Bush Tony Blair George W Bush George W Bush Colin Powell George W Bush George W Bush Ariel Sharon Gerhard Schroeder Tony Blair George W Bush Donald Rumsfeld George W Bush Donald Rumsfeld George W Bush George W Bush Tony Blair George W Bush George W Bush Tony Blair George W Bush George W Bush George W Bush George W Bush George W Bush Gerhard Schroeder George W Bush George W Bush Colin Powell George W Bush George W Bush George W Bush George W Bush George W Bush Colin Powell Tony Blair George W Bush George W Bush Colin Powell Gerhard Schroeder George W Bush George W Bush George W Bush George W Bush George W Bush George W Bush George W Bush George W Bush Gerhard Schroeder George W Bush George W Bush Colin Powell George W Bush George W Bush Ariel Sharon George W Bush Tony Blair George W Bush Gerhard Schroeder George W Bush Hugo Chavez George W Bush Donald Rumsfeld George W Bush Tony Blair Colin Powell George W Bush Tony Blair Colin Powell George W Bush George W Bush George W Bush Colin Powell George W Bush Donald Rumsfeld George W Bush George W Bush Tony Blair George W Bush Colin Powell George W Bush George W Bush George W Bush George W Bush Hugo Chavez Colin Powell George W Bush Donald Rumsfeld George W Bush George W Bush Gerhard Schroeder George W Bush Colin Powell Donald Rumsfeld George W Bush George W Bush George W Bush George W Bush George W Bush Colin Powell George W Bush Tony Blair George W Bush Tony Blair George W Bush George W Bush Colin Powell George W Bush George W Bush Hugo Chavez George W Bush George W Bush Colin Powell George W Bush Colin Powell Colin Powell George W Bush

print("真实数据")
for i in y_test:
    print(target_names[i], end= ' ')

真实数据
George W Bush Donald Rumsfeld George W Bush George W Bush George W Bush Colin Powell Gerhard Schroeder George W Bush George W Bush George W Bush George W Bush Tony Blair George W Bush George W Bush George W Bush George W Bush George W Bush Colin Powell Ariel Sharon Donald Rumsfeld George W Bush George W Bush Tony Blair George W Bush George W Bush Hugo Chavez Gerhard Schroeder Gerhard Schroeder George W Bush George W Bush George W Bush George W Bush Colin Powell Hugo Chavez Colin Powell George W Bush George W Bush George W Bush George W Bush Tony Blair George W Bush George W Bush Colin Powell Tony Blair George W Bush Ariel Sharon Gerhard Schroeder Colin Powell George W Bush Donald Rumsfeld George W Bush Donald Rumsfeld George W Bush George W Bush Donald Rumsfeld George W Bush Donald Rumsfeld Tony Blair George W Bush Donald Rumsfeld George W Bush George W Bush Donald Rumsfeld Gerhard Schroeder George W Bush George W Bush Colin Powell George W Bush George W Bush George W Bush Donald Rumsfeld George W Bush Colin Powell Gerhard Schroeder Colin Powell George W Bush Colin Powell Tony Blair George W Bush Colin Powell George W Bush George W Bush Donald Rumsfeld George W Bush George W Bush Gerhard Schroeder Gerhard Schroeder Tony Blair George W Bush Ariel Sharon Donald Rumsfeld George W Bush Ariel Sharon Donald Rumsfeld Tony Blair Hugo Chavez Hugo Chavez George W Bush Hugo Chavez George W Bush Donald Rumsfeld George W Bush Tony Blair Colin Powell George W Bush Tony Blair Colin Powell Donald Rumsfeld Hugo Chavez George W Bush Colin Powell George W Bush Donald Rumsfeld George W Bush Hugo Chavez Donald Rumsfeld George W Bush Colin Powell George W Bush George W Bush George W Bush George W Bush Hugo Chavez Colin Powell Gerhard Schroeder Donald Rumsfeld Hugo Chavez George W Bush Gerhard Schroeder Hugo Chavez Colin Powell Donald Rumsfeld George W Bush George W Bush George W Bush George W Bush Tony Blair Colin Powell Colin Powell Tony Blair George W Bush Tony Blair George W Bush George W Bush Colin Powell Donald Rumsfeld George W Bush Hugo Chavez George W Bush Gerhard Schroeder George W Bush Colin Powell Colin Powell Colin Powell George W Bush

y_last = svc_last_1.predict(x_test_pca[: 20])
y_last

array([3, 3, 3, 3, 3, 1, 6, 3, 6, 3, 3, 6, 3, 3, 3, 3, 3, 1, 6, 2])

y_test[: 20]

array([3, 2, 3, 3, 3, 1, 4, 3, 3, 3, 3, 6, 3, 3, 3, 3, 3, 1, 0, 2])

二、预测与真实数据对比¶

def get_names(y_last, y_test, target_names, i):
    predictname = target_names[y_last[i]].rsplit(' ')[-1]
    truename = target_names[y_test[i]].rsplit(' ')[-1]
    return 'predict: %s \n true: %s' % (predictname, truename) #预测名字
get_names(y_last, y_test, target_names, 1)

'predict: Bush \n true: Rumsfeld'

names= [get_names(y_last, y_test, target_names, i) for i in range(20)]
names

['predict: Bush \n true: Bush',
 'predict: Bush \n true: Rumsfeld',
 'predict: Bush \n true: Bush',
 'predict: Bush \n true: Bush',
 'predict: Bush \n true: Bush',
 'predict: Powell \n true: Powell',
 'predict: Blair \n true: Schroeder',
 'predict: Bush \n true: Bush',
 'predict: Blair \n true: Bush',
 'predict: Bush \n true: Bush',
 'predict: Bush \n true: Bush',
 'predict: Blair \n true: Blair',
 'predict: Bush \n true: Bush',
 'predict: Bush \n true: Bush',
 'predict: Bush \n true: Bush',
 'predict: Bush \n true: Bush',
 'predict: Bush \n true: Bush',
 'predict: Powell \n true: Powell',
 'predict: Blair \n true: Sharon',
 'predict: Rumsfeld \n true: Rumsfeld']

def show_predict_result(names, row, columns, x_test): #批量绘图
    plt.figure(figsize(columns*2, row*2.4)) #设置图片大小
    for i, image in enumerate(x_test): #循环索引，图片数组
        plt.subplot(row, columns, (i+ 1)) #增加一张子图
        plt.imshow(image.reshape((250, 250))) #调整形状
        plt.axis('off')
        plt.title(names[i]) #设置标题
show_predict_result(names, 5, 4, x_test[: 20])

show_predict_result(names, 3, 4, x_test[: 12])

量化交易吧 / 数理科学 帖子：3369513 新帖：3

机器学习-人脸识别

Tango发表于：9 月 22 日 12：00回复(1)

导入需求库¶

一、配置日志并且导入人脸数据¶

友情提示：导入人脸数据，需要一定时间，请耐心等待。¶

二、展示脸图与数据¶

三、人脸数据其他相关参数¶

四、训练数据切割¶

五、简单SVC分类¶

从上面简单SVC训练获取得分为0.38，非常不理想，接下来要加强学习-降维。¶

五、优化版SVC+降维-训练与预测¶

一、PCA降维处理¶

友情提示：请勿于晚上运行，因为服务器不稳定，所以数据训练以500为界限，而且本笔者研究环境是有升级。（CPU：2，内存：2，硬盘：3）¶

二、训练与预测数据预处理¶

三、直接降维预测¶

四、SVC调优预测¶

一、训练数据¶

二、预测数据¶

四、真实数据与预测展示¶

一、数据展示¶

二、预测与真实数据对比¶

全部回复

0/140

粉丝:473

帖子数:0

粉丝:555

帖子数:3

粉丝:676

帖子数:391

量化课程

热门标签

删除回复

确认要删除这篇文章么？

举报用户

信息提示

该文章已删除

设置置顶

完成设置【置顶】！

设置置顶

已取消设置【置顶】！

设置精华

完成设置【精华】！

设置精华

已取消设置【精华】！

审核信息

该文章已审核通过

审核信息

您已设置该文章审核不通过

举报成功

您已举报成功

用户登录

移动帖子

创建私信

屏蔽提示

确认要屏蔽该用户么？

屏蔽回复

您已对该用户实现屏蔽

信息回复

已发送成功

量化交易吧 / 数理科学帖子：3369513 新帖：3