from sklearn.svm import SVC #分类
from sklearn.datasets import fetch_lfw_people #名人人脸数据
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.decomposition import PCA #降低维度
from sklearn.model_selection import GridSearchCV #调节参数
import logging
logging.basicConfig(level= logging.INFO)
data= fetch_lfw_people(min_faces_per_person= 70, #最小的数量
resize= 1,
slice_= (slice(0, 250, None),
slice(0, 250, None))) #抓取人脸数据
plt.imshow(data.images[10])
<matplotlib.image.AxesImage at 0x7f95fb659550>
data.images[10]
array([[1.6666666, 1.6666666, 1.6666666, ..., 0.0, 0.0, 0.0], [0.33333334, 0.33333334, 0.33333334, ..., 0.0, 0.0, 0.0], [0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0], ..., [61.333332, 32.333332, 15.0, ..., 11.0, 11.0, 11.0], [76.666664, 34.333332, 15.0, ..., 11.0, 11.0, 11.0], [78.666664, 36.333332, 16.0, ..., 11.0, 11.0, 11.0]], dtype=float32)
images= data.images # 1288张图片
target_names= data.target_names #图片的人名
target_names
array([Ariel Sharon, Colin Powell, Donald Rumsfeld, George W Bush, Gerhard Schroeder, Hugo Chavez, Tony Blair], dtype='<U17')
x= data.data
x
array([[0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0], [1.3333334, 1.3333334, 1.3333334, ..., 0.0, 0.0, 0.0], [2.3333333, 2.3333333, 1.6666666, ..., 0.0, 0.0, 0.0], ..., [0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0], [0.6666667, 0.6666667, 0.6666667, ..., 0.6666667, 0.6666667, 0.6666667], [0.0, 0.0, 0.0, ..., 0.0, 0.0, 0.0]], dtype=float32)
y= data.target
y
array([5, 6, 3, ..., 5, 3, 5])
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test= train_test_split(x, y, test_size= 0.12) # 数据切割, 0.88用于训练,0,12用于识别测试
import time
start= time.time() # 开始时间
svc= SVC() # SVM分类
svc.fit(x_train, y_train) # 训练
print(svc.score(x_test, y_test)) #测试得分,0.38不理想。
end= time.time() # 结束时间
print(end-start) # 时间差
0.44516129032258067 286.4365243911743
newsvc= SVC()
newsvc.fit(x_train[:500], y_train[:500]) #新的SVC,训练之
pca= PCA(n_components= 150, #主成分
svd_solver= 'randomized', #打乱
whiten= True) #白化
pca.fit(x)
PCA(copy=True, iterated_power='auto', n_components=150, random_state=None, svd_solver='randomized', tol=0.0, whiten=True)
x_train_pca= pca.transform(x_train) #预处理x_train
x_test_pca= pca.transform(x_test) #预处理x_test
newsvc.score(x_test, y_test)
0.3741935483870968
svc_best= SVC() #最好的
svc_best.fit(x_train_pca, y_train)
svc_best.score(x_test_pca, y_test)
0.6580645161290323
g_svc= SVC() # 自动调优
param_grid= {'C': [0.2, 0.5, 0.8, 1, 3, 5, 7, 9],
'gamma': [0.001, 0.002, 0.0033, 0.0066, 0.01, 0.03, 0.05, 0.1]} #参数,调优,选择一个最好比例
gcv= GridSearchCV(g_svc, param_grid= param_grid) #创建一个自动调优
gcv.fit(x_train_pca, y_train) # 训练
GridSearchCV(cv=None, error_score='raise', estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False), fit_params=None, iid=True, n_jobs=1, param_grid={'C': [0.2, 0.5, 0.8, 1, 3, 5, 7, 9], 'gamma': [0.001, 0.002, 0.0033, 0.0066, 0.01, 0.03, 0.05, 0.1]}, pre_dispatch='2*n_jobs', refit=True, return_train_score='warn', scoring=None, verbose=0)
svc_last= SVC(C= 1.0, gamma= 0.001)
svc_last
SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False)
svc_last_1= SVC(C= 5.0, gamma= 0.001)
svc_last_1.fit(x_train_pca[:500], y_train[:500]) #训练数据
SVC(C=5.0, cache_size=200, class_weight=None, coef0=0.0, decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False)
SVC自动调优出现bug,所以惩罚系数修为5。
svc_last_1.score(x_test_pca, y_test) #评分
0.7354838709677419
y_new= svc_last_1.predict(x_test_pca) #训练数据
y_new
array([3, 3, 3, 3, 3, 1, 6, 3, 6, 3, 3, 6, 3, 3, 3, 3, 3, 1, 6, 2, 3, 3, 6, 3, 3, 3, 3, 6, 3, 3, 3, 3, 3, 3, 1, 3, 3, 3, 3, 6, 3, 3, 1, 3, 3, 0, 4, 6, 3, 2, 3, 2, 3, 3, 6, 3, 3, 6, 3, 3, 3, 3, 3, 4, 3, 3, 1, 3, 3, 3, 3, 3, 1, 6, 3, 3, 1, 4, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 1, 3, 3, 0, 3, 6, 3, 4, 3, 5, 3, 2, 3, 6, 1, 3, 6, 1, 3, 3, 3, 1, 3, 2, 3, 3, 6, 3, 1, 3, 3, 3, 3, 5, 1, 3, 2, 3, 3, 4, 3, 1, 2, 3, 3, 3, 3, 3, 1, 3, 6, 3, 6, 3, 3, 1, 3, 3, 5, 3, 3, 1, 3, 1, 1, 3])
print("预测数据")
for i in y_new:
print(target_names[i], end= ' ')
预测数据 George W Bush George W Bush George W Bush George W Bush George W Bush Colin Powell Tony Blair George W Bush Tony Blair George W Bush George W Bush Tony Blair George W Bush George W Bush George W Bush George W Bush George W Bush Colin Powell Tony Blair Donald Rumsfeld George W Bush George W Bush Tony Blair George W Bush George W Bush George W Bush George W Bush Tony Blair George W Bush George W Bush George W Bush George W Bush George W Bush George W Bush Colin Powell George W Bush George W Bush George W Bush George W Bush Tony Blair George W Bush George W Bush Colin Powell George W Bush George W Bush Ariel Sharon Gerhard Schroeder Tony Blair George W Bush Donald Rumsfeld George W Bush Donald Rumsfeld George W Bush George W Bush Tony Blair George W Bush George W Bush Tony Blair George W Bush George W Bush George W Bush George W Bush George W Bush Gerhard Schroeder George W Bush George W Bush Colin Powell George W Bush George W Bush George W Bush George W Bush George W Bush Colin Powell Tony Blair George W Bush George W Bush Colin Powell Gerhard Schroeder George W Bush George W Bush George W Bush George W Bush George W Bush George W Bush George W Bush George W Bush Gerhard Schroeder George W Bush George W Bush Colin Powell George W Bush George W Bush Ariel Sharon George W Bush Tony Blair George W Bush Gerhard Schroeder George W Bush Hugo Chavez George W Bush Donald Rumsfeld George W Bush Tony Blair Colin Powell George W Bush Tony Blair Colin Powell George W Bush George W Bush George W Bush Colin Powell George W Bush Donald Rumsfeld George W Bush George W Bush Tony Blair George W Bush Colin Powell George W Bush George W Bush George W Bush George W Bush Hugo Chavez Colin Powell George W Bush Donald Rumsfeld George W Bush George W Bush Gerhard Schroeder George W Bush Colin Powell Donald Rumsfeld George W Bush George W Bush George W Bush George W Bush George W Bush Colin Powell George W Bush Tony Blair George W Bush Tony Blair George W Bush George W Bush Colin Powell George W Bush George W Bush Hugo Chavez George W Bush George W Bush Colin Powell George W Bush Colin Powell Colin Powell George W Bush
print("真实数据")
for i in y_test:
print(target_names[i], end= ' ')
真实数据 George W Bush Donald Rumsfeld George W Bush George W Bush George W Bush Colin Powell Gerhard Schroeder George W Bush George W Bush George W Bush George W Bush Tony Blair George W Bush George W Bush George W Bush George W Bush George W Bush Colin Powell Ariel Sharon Donald Rumsfeld George W Bush George W Bush Tony Blair George W Bush George W Bush Hugo Chavez Gerhard Schroeder Gerhard Schroeder George W Bush George W Bush George W Bush George W Bush Colin Powell Hugo Chavez Colin Powell George W Bush George W Bush George W Bush George W Bush Tony Blair George W Bush George W Bush Colin Powell Tony Blair George W Bush Ariel Sharon Gerhard Schroeder Colin Powell George W Bush Donald Rumsfeld George W Bush Donald Rumsfeld George W Bush George W Bush Donald Rumsfeld George W Bush Donald Rumsfeld Tony Blair George W Bush Donald Rumsfeld George W Bush George W Bush Donald Rumsfeld Gerhard Schroeder George W Bush George W Bush Colin Powell George W Bush George W Bush George W Bush Donald Rumsfeld George W Bush Colin Powell Gerhard Schroeder Colin Powell George W Bush Colin Powell Tony Blair George W Bush Colin Powell George W Bush George W Bush Donald Rumsfeld George W Bush George W Bush Gerhard Schroeder Gerhard Schroeder Tony Blair George W Bush Ariel Sharon Donald Rumsfeld George W Bush Ariel Sharon Donald Rumsfeld Tony Blair Hugo Chavez Hugo Chavez George W Bush Hugo Chavez George W Bush Donald Rumsfeld George W Bush Tony Blair Colin Powell George W Bush Tony Blair Colin Powell Donald Rumsfeld Hugo Chavez George W Bush Colin Powell George W Bush Donald Rumsfeld George W Bush Hugo Chavez Donald Rumsfeld George W Bush Colin Powell George W Bush George W Bush George W Bush George W Bush Hugo Chavez Colin Powell Gerhard Schroeder Donald Rumsfeld Hugo Chavez George W Bush Gerhard Schroeder Hugo Chavez Colin Powell Donald Rumsfeld George W Bush George W Bush George W Bush George W Bush Tony Blair Colin Powell Colin Powell Tony Blair George W Bush Tony Blair George W Bush George W Bush Colin Powell Donald Rumsfeld George W Bush Hugo Chavez George W Bush Gerhard Schroeder George W Bush Colin Powell Colin Powell Colin Powell George W Bush
y_last = svc_last_1.predict(x_test_pca[: 20])
y_last
array([3, 3, 3, 3, 3, 1, 6, 3, 6, 3, 3, 6, 3, 3, 3, 3, 3, 1, 6, 2])
y_test[: 20]
array([3, 2, 3, 3, 3, 1, 4, 3, 3, 3, 3, 6, 3, 3, 3, 3, 3, 1, 0, 2])
def get_names(y_last, y_test, target_names, i):
predictname = target_names[y_last[i]].rsplit(' ')[-1]
truename = target_names[y_test[i]].rsplit(' ')[-1]
return 'predict: %s \n true: %s' % (predictname, truename) #预测名字
get_names(y_last, y_test, target_names, 1)
'predict: Bush \n true: Rumsfeld'
names= [get_names(y_last, y_test, target_names, i) for i in range(20)]
names
['predict: Bush \n true: Bush', 'predict: Bush \n true: Rumsfeld', 'predict: Bush \n true: Bush', 'predict: Bush \n true: Bush', 'predict: Bush \n true: Bush', 'predict: Powell \n true: Powell', 'predict: Blair \n true: Schroeder', 'predict: Bush \n true: Bush', 'predict: Blair \n true: Bush', 'predict: Bush \n true: Bush', 'predict: Bush \n true: Bush', 'predict: Blair \n true: Blair', 'predict: Bush \n true: Bush', 'predict: Bush \n true: Bush', 'predict: Bush \n true: Bush', 'predict: Bush \n true: Bush', 'predict: Bush \n true: Bush', 'predict: Powell \n true: Powell', 'predict: Blair \n true: Sharon', 'predict: Rumsfeld \n true: Rumsfeld']
def show_predict_result(names, row, columns, x_test): #批量绘图
plt.figure(figsize(columns*2, row*2.4)) #设置图片大小
for i, image in enumerate(x_test): #循环索引,图片数组
plt.subplot(row, columns, (i+ 1)) #增加一张子图
plt.imshow(image.reshape((250, 250))) #调整形状
plt.axis('off')
plt.title(names[i]) #设置标题
show_predict_result(names, 5, 4, x_test[: 20])
show_predict_result(names, 3, 4, x_test[: 12])
本社区仅针对特定人员开放
查看需注册登录并通过风险意识测评
5秒后跳转登录页面...
移动端课程