请 [注册] 或 [登录]  | 返回主站

量化交易吧 /  数理科学 帖子:3364716 新帖:3

【量化课堂】scikit-learn 之 kNN 分类

特朗普对头发表于:6 月 7 日 16:11回复(1)
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import neighbors
from matplotlib.colors import ListedColormap
fig_size = plt.rcParams["figure.figsize"]
fig_size[0] = 15
fig_size[1] = 10
x1 = random.normal(50, 6, 200)
y1 = random.normal(5, 0.5, 200)
plt.scatter(x1,y1,c='b',marker='s',s=50,alpha=0.8)

x2 = random.normal(30,6,200)
y2 = random.normal(4,0.5,200)
plt.scatter(x2,y2,c='r', marker='^', s=50, alpha=0.8)

x3 = random.normal(45,6,200)
y3 = random.normal(2.5, 0.5, 200)
plt.scatter(x3,y3, c='g', s=50, alpha=0.8)

plt.axis((10, 70,1,7))
(10, 70, 1, 7)
x_val = np.concatenate((x1,x2,x3))
y_val = np.concatenate((y1,y2,y3))
x_diff = max(x_val)-min(x_val)
y_diff = max(y_val)-min(y_val)
x_normalized = x_val/x_diff
y_normalized = y_val/y_diff
xy_normalized = zip(x_normalized,y_normalized)
labels = [1]*200+[2]*200+[3]*200
clf = neighbors.KNeighborsClassifier(30)
clf.fit(xy_normalized, labels)
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=30, p=2,
           weights='uniform')
nearests = clf.kneighbors([(50/x_diff, 5/y_diff),(30/x_diff, 3/y_diff)], 5, False)
nearests
array([[131, 144,  75,  61, 115],
       [281, 236, 375, 291, 234]])
prediction = clf.predict([(50/x_diff, 5/y_diff),(30/x_diff, 3/y_diff)])
prediction
array([1, 2])
prediction_proba = clf.predict_proba([(50/x_diff, 5/y_diff),(30/x_diff, 3/y_diff)])
prediction_proba
array([[ 1.        ,  0.        ,  0.        ],
       [ 0.        ,  0.93333333,  0.06666667]])
x1_test = random.normal(50, 6, 100)
y1_test = random.normal(5, 0.5, 100)

x2_test = random.normal(30,6,100)
y2_test = random.normal(4,0.5,100)

x3_test = random.normal(45,6,100)
y3_test = random.normal(2.5, 0.5, 100)

xy_test_normalized = zip(np.concatenate((x1_test,x2_test,x3_test))/x_diff,\
                        np.concatenate((y1_test,y2_test,y3_test))/y_diff)

labels_test = [1]*100+[2]*100+[3]*100
score = clf.score(xy_test_normalized, labels_test)
score
0.93666666666666665
clf1 = neighbors.KNeighborsClassifier(1)
clf1.fit(xy_normalized, labels)
clf1.score(xy_test_normalized, labels_test)
0.93333333333333335
 
xx,yy = np.meshgrid(np.arange(1,70.1,0.1), np.arange(1,7.01,0.01))
xx_normalized = xx/x_diff
yy_normalized = yy/y_diff
coords = np.c_[xx_normalized.ravel(), yy_normalized.ravel()]
Z = clf.predict(coords)
Z = Z.reshape(xx.shape)
light_rgb = ListedColormap([ '#AAAAFF', '#FFAAAA','#AAFFAA'])
plt.pcolormesh(xx, yy,Z, cmap=light_rgb)
plt.scatter(x1,y1,c='b',marker='s',s=50,alpha=0.8)
plt.scatter(x2,y2,c='r', marker='^', s=50, alpha=0.8)
plt.scatter(x3,y3, c='g', s=50, alpha=0.8)
plt.axis((10, 70,1,7))
(10, 70, 1, 7)
 
Z_proba = clf.predict_proba(coords)
Z_proba_reds = Z_proba[:,1].reshape(xx.shape)
plt.pcolormesh(xx, yy,Z_proba_reds, cmap='Reds')
plt.scatter(x1,y1,c='b',marker='s',s=50,alpha=0.8)
plt.scatter(x2,y2,c='r', marker='^', s=50, alpha=0.8)
plt.scatter(x3,y3, c='g', s=50, alpha=0.8)
plt.axis((10, 70,1,7))
(10, 70, 1, 7)
 

全部回复

0/140

量化课程

    移动端课程