请 [注册] 或 [登录]  | 返回主站

量化交易吧 /  数理科学 帖子:3366063 新帖:0

机器学习-人脸识别

Tango发表于:9 月 22 日 12:00回复(1)

Img

导入需求库¶

from sklearn.svm import SVC #分类
from sklearn.datasets import fetch_lfw_people #名人人脸数据
import numpy as np
import matplotlib.pyplot as plt 
%matplotlib inline
from sklearn.decomposition import PCA #降低维度
from sklearn.model_selection import GridSearchCV #调节参数
import logging

一、配置日志并且导入人脸数据¶

logging.basicConfig(level= logging.INFO)
data= fetch_lfw_people(min_faces_per_person= 70, #最小的数量
                       resize= 1,
                       slice_= (slice(0, 250, None),
                                slice(0, 250, None))) #抓取人脸数据

友情提示:导入人脸数据,需要一定时间,请耐心等待。¶

二、展示脸图与数据¶

plt.imshow(data.images[10])
<matplotlib.image.AxesImage at 0x7f95fb659550>
data.images[10]
array([[1.6666666, 1.6666666, 1.6666666, ..., 0.0, 0.0, 0.0],
       [0.33333334, 0.33333334, 0.33333334, ..., 0.0, 0.0, 0.0],
       [0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0],
       ...,
       [61.333332, 32.333332, 15.0, ..., 11.0, 11.0, 11.0],
       [76.666664, 34.333332, 15.0, ..., 11.0, 11.0, 11.0],
       [78.666664, 36.333332, 16.0, ..., 11.0, 11.0, 11.0]], dtype=float32)

三、人脸数据其他相关参数¶

images= data.images # 1288张图片
target_names= data.target_names #图片的人名
target_names
array([Ariel Sharon, Colin Powell, Donald Rumsfeld, George W Bush,
       Gerhard Schroeder, Hugo Chavez, Tony Blair], dtype='<U17')
x= data.data
x
array([[0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0],
       [1.3333334, 1.3333334, 1.3333334, ..., 0.0, 0.0, 0.0],
       [2.3333333, 2.3333333, 1.6666666, ..., 0.0, 0.0, 0.0],
       ...,
       [0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0],
       [0.6666667, 0.6666667, 0.6666667, ..., 0.6666667, 0.6666667,
        0.6666667],
       [0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0]], dtype=float32)
y= data.target
y
array([5, 6, 3, ..., 5, 3, 5])

四、训练数据切割¶

from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test= train_test_split(x, y, test_size= 0.12) # 数据切割, 0.88用于训练,0,12用于识别测试

五、简单SVC分类¶

import time
start= time.time() # 开始时间
svc= SVC() # SVM分类
svc.fit(x_train, y_train) # 训练
print(svc.score(x_test, y_test)) #测试得分,0.38不理想。
end= time.time() # 结束时间
print(end-start) # 时间差
0.44516129032258067
286.4365243911743
从上面简单SVC训练获取得分为0.38,非常不理想,接下来要加强学习-降维。¶

五、优化版SVC+降维-训练与预测¶

一、PCA降维处理¶

newsvc= SVC()
newsvc.fit(x_train[:500], y_train[:500]) #新的SVC,训练之
pca= PCA(n_components= 150, #主成分
         svd_solver= 'randomized', #打乱
         whiten= True) #白化
pca.fit(x)
PCA(copy=True, iterated_power='auto', n_components=150, random_state=None,
  svd_solver='randomized', tol=0.0, whiten=True)
友情提示:请勿于晚上运行,因为服务器不稳定,所以数据训练以500为界限,而且本笔者研究环境是有升级。(CPU:2,内存:2,硬盘:3)¶

二、训练与预测数据预处理¶

x_train_pca= pca.transform(x_train) #预处理x_train
x_test_pca= pca.transform(x_test) #预处理x_test
newsvc.score(x_test, y_test)
0.3741935483870968

三、直接降维预测¶

svc_best= SVC() #最好的
svc_best.fit(x_train_pca, y_train)
svc_best.score(x_test_pca, y_test)
0.6580645161290323

四、SVC调优预测¶

g_svc= SVC() # 自动调优
param_grid= {'C': [0.2, 0.5, 0.8, 1, 3, 5, 7, 9],
             'gamma': [0.001, 0.002, 0.0033, 0.0066, 0.01, 0.03, 0.05, 0.1]} #参数,调优,选择一个最好比例
gcv= GridSearchCV(g_svc, param_grid= param_grid) #创建一个自动调优
gcv.fit(x_train_pca, y_train) # 训练
GridSearchCV(cv=None, error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'C': [0.2, 0.5, 0.8, 1, 3, 5, 7, 9], 'gamma': [0.001, 0.002, 0.0033, 0.0066, 0.01, 0.03, 0.05, 0.1]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)
svc_last= SVC(C= 1.0, gamma= 0.001)
svc_last
SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

一、训练数据¶

svc_last_1= SVC(C= 5.0, gamma= 0.001)
svc_last_1.fit(x_train_pca[:500], y_train[:500]) #训练数据
SVC(C=5.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

SVC自动调优出现bug,所以惩罚系数修为5。

二、预测数据¶

svc_last_1.score(x_test_pca, y_test) #评分
0.7354838709677419
y_new= svc_last_1.predict(x_test_pca) #训练数据
y_new
array([3, 3, 3, 3, 3, 1, 6, 3, 6, 3, 3, 6, 3, 3, 3, 3, 3, 1, 6, 2, 3, 3,
       6, 3, 3, 3, 3, 6, 3, 3, 3, 3, 3, 3, 1, 3, 3, 3, 3, 6, 3, 3, 1, 3,
       3, 0, 4, 6, 3, 2, 3, 2, 3, 3, 6, 3, 3, 6, 3, 3, 3, 3, 3, 4, 3, 3,
       1, 3, 3, 3, 3, 3, 1, 6, 3, 3, 1, 4, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3,
       3, 1, 3, 3, 0, 3, 6, 3, 4, 3, 5, 3, 2, 3, 6, 1, 3, 6, 1, 3, 3, 3,
       1, 3, 2, 3, 3, 6, 3, 1, 3, 3, 3, 3, 5, 1, 3, 2, 3, 3, 4, 3, 1, 2,
       3, 3, 3, 3, 3, 1, 3, 6, 3, 6, 3, 3, 1, 3, 3, 5, 3, 3, 1, 3, 1, 1,
       3])

四、真实数据与预测展示¶

一、数据展示¶

print("预测数据")
for i in y_new:
    print(target_names[i], end= ' ')
预测数据
George W Bush George W Bush George W Bush George W Bush George W Bush Colin Powell Tony Blair George W Bush Tony Blair George W Bush George W Bush Tony Blair George W Bush George W Bush George W Bush George W Bush George W Bush Colin Powell Tony Blair Donald Rumsfeld George W Bush George W Bush Tony Blair George W Bush George W Bush George W Bush George W Bush Tony Blair George W Bush George W Bush George W Bush George W Bush George W Bush George W Bush Colin Powell George W Bush George W Bush George W Bush George W Bush Tony Blair George W Bush George W Bush Colin Powell George W Bush George W Bush Ariel Sharon Gerhard Schroeder Tony Blair George W Bush Donald Rumsfeld George W Bush Donald Rumsfeld George W Bush George W Bush Tony Blair George W Bush George W Bush Tony Blair George W Bush George W Bush George W Bush George W Bush George W Bush Gerhard Schroeder George W Bush George W Bush Colin Powell George W Bush George W Bush George W Bush George W Bush George W Bush Colin Powell Tony Blair George W Bush George W Bush Colin Powell Gerhard Schroeder George W Bush George W Bush George W Bush George W Bush George W Bush George W Bush George W Bush George W Bush Gerhard Schroeder George W Bush George W Bush Colin Powell George W Bush George W Bush Ariel Sharon George W Bush Tony Blair George W Bush Gerhard Schroeder George W Bush Hugo Chavez George W Bush Donald Rumsfeld George W Bush Tony Blair Colin Powell George W Bush Tony Blair Colin Powell George W Bush George W Bush George W Bush Colin Powell George W Bush Donald Rumsfeld George W Bush George W Bush Tony Blair George W Bush Colin Powell George W Bush George W Bush George W Bush George W Bush Hugo Chavez Colin Powell George W Bush Donald Rumsfeld George W Bush George W Bush Gerhard Schroeder George W Bush Colin Powell Donald Rumsfeld George W Bush George W Bush George W Bush George W Bush George W Bush Colin Powell George W Bush Tony Blair George W Bush Tony Blair George W Bush George W Bush Colin Powell George W Bush George W Bush Hugo Chavez George W Bush George W Bush Colin Powell George W Bush Colin Powell Colin Powell George W Bush 
print("真实数据")
for i in y_test:
    print(target_names[i], end= ' ')
真实数据
George W Bush Donald Rumsfeld George W Bush George W Bush George W Bush Colin Powell Gerhard Schroeder George W Bush George W Bush George W Bush George W Bush Tony Blair George W Bush George W Bush George W Bush George W Bush George W Bush Colin Powell Ariel Sharon Donald Rumsfeld George W Bush George W Bush Tony Blair George W Bush George W Bush Hugo Chavez Gerhard Schroeder Gerhard Schroeder George W Bush George W Bush George W Bush George W Bush Colin Powell Hugo Chavez Colin Powell George W Bush George W Bush George W Bush George W Bush Tony Blair George W Bush George W Bush Colin Powell Tony Blair George W Bush Ariel Sharon Gerhard Schroeder Colin Powell George W Bush Donald Rumsfeld George W Bush Donald Rumsfeld George W Bush George W Bush Donald Rumsfeld George W Bush Donald Rumsfeld Tony Blair George W Bush Donald Rumsfeld George W Bush George W Bush Donald Rumsfeld Gerhard Schroeder George W Bush George W Bush Colin Powell George W Bush George W Bush George W Bush Donald Rumsfeld George W Bush Colin Powell Gerhard Schroeder Colin Powell George W Bush Colin Powell Tony Blair George W Bush Colin Powell George W Bush George W Bush Donald Rumsfeld George W Bush George W Bush Gerhard Schroeder Gerhard Schroeder Tony Blair George W Bush Ariel Sharon Donald Rumsfeld George W Bush Ariel Sharon Donald Rumsfeld Tony Blair Hugo Chavez Hugo Chavez George W Bush Hugo Chavez George W Bush Donald Rumsfeld George W Bush Tony Blair Colin Powell George W Bush Tony Blair Colin Powell Donald Rumsfeld Hugo Chavez George W Bush Colin Powell George W Bush Donald Rumsfeld George W Bush Hugo Chavez Donald Rumsfeld George W Bush Colin Powell George W Bush George W Bush George W Bush George W Bush Hugo Chavez Colin Powell Gerhard Schroeder Donald Rumsfeld Hugo Chavez George W Bush Gerhard Schroeder Hugo Chavez Colin Powell Donald Rumsfeld George W Bush George W Bush George W Bush George W Bush Tony Blair Colin Powell Colin Powell Tony Blair George W Bush Tony Blair George W Bush George W Bush Colin Powell Donald Rumsfeld George W Bush Hugo Chavez George W Bush Gerhard Schroeder George W Bush Colin Powell Colin Powell Colin Powell George W Bush 
y_last = svc_last_1.predict(x_test_pca[: 20])
y_last
array([3, 3, 3, 3, 3, 1, 6, 3, 6, 3, 3, 6, 3, 3, 3, 3, 3, 1, 6, 2])
y_test[: 20]
array([3, 2, 3, 3, 3, 1, 4, 3, 3, 3, 3, 6, 3, 3, 3, 3, 3, 1, 0, 2])

二、预测与真实数据对比¶

def get_names(y_last, y_test, target_names, i):
    predictname = target_names[y_last[i]].rsplit(' ')[-1]
    truename = target_names[y_test[i]].rsplit(' ')[-1]
    return 'predict: %s \n true: %s' % (predictname, truename) #预测名字
get_names(y_last, y_test, target_names, 1)
'predict: Bush \n true: Rumsfeld'
names= [get_names(y_last, y_test, target_names, i) for i in range(20)]
names
['predict: Bush \n true: Bush',
 'predict: Bush \n true: Rumsfeld',
 'predict: Bush \n true: Bush',
 'predict: Bush \n true: Bush',
 'predict: Bush \n true: Bush',
 'predict: Powell \n true: Powell',
 'predict: Blair \n true: Schroeder',
 'predict: Bush \n true: Bush',
 'predict: Blair \n true: Bush',
 'predict: Bush \n true: Bush',
 'predict: Bush \n true: Bush',
 'predict: Blair \n true: Blair',
 'predict: Bush \n true: Bush',
 'predict: Bush \n true: Bush',
 'predict: Bush \n true: Bush',
 'predict: Bush \n true: Bush',
 'predict: Bush \n true: Bush',
 'predict: Powell \n true: Powell',
 'predict: Blair \n true: Sharon',
 'predict: Rumsfeld \n true: Rumsfeld']
def show_predict_result(names, row, columns, x_test): #批量绘图
    plt.figure(figsize(columns*2, row*2.4)) #设置图片大小
    for i, image in enumerate(x_test): #循环索引,图片数组
        plt.subplot(row, columns, (i+ 1)) #增加一张子图
        plt.imshow(image.reshape((250, 250))) #调整形状
        plt.axis('off')
        plt.title(names[i]) #设置标题
show_predict_result(names, 5, 4, x_test[: 20])
show_predict_result(names, 3, 4, x_test[: 12])
 

全部回复

0/140

量化课程

    移动端课程