·
包括市面主流的机器学习方法,简单看了一下机器学习预测大盘效果,不过因子找的还比较简单。读者可以在此基础补充新的因子,也可以熟悉基本的机器学习命令。
import numpy as npfrom sklearn.linear_model import LogisticRegression
data=attribute_history('000001.XSHG', count=3400,unit='1d',fields=('open', 'close', 'high', 'low', 'volume', 'money'),skip_paused=True, df=True, fq='pre')data=data.dropna()
data.head()
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
open | close | high | low | volume | money | |
---|---|---|---|---|---|---|
2005-01-04 | 1260.780 | 1242.770 | 1260.780 | 1238.180 | 816177000.0 | 4.418452e+09 |
2005-01-05 | 1241.682 | 1251.937 | 1258.580 | 1235.746 | 867865100.0 | 4.916589e+09 |
2005-01-06 | 1252.493 | 1239.430 | 1252.735 | 1234.236 | 792225400.0 | 4.381370e+09 |
2005-01-07 | 1239.323 | 1244.746 | 1256.313 | 1235.508 | 894087100.0 | 5.040042e+09 |
2005-01-10 | 1243.576 | 1252.401 | 1252.723 | 1236.089 | 723468300.0 | 4.118292e+09 |
#k线形态data['k_type1']=(data['low']-data['open'])/data['close']data['k_type2']=(data['high']-data['open'])/data['close']data['k_type3']=(data['close']-data['open'])/(data['high']-data['low'])#成交量倍数data['vol_time']=0data['vol_time'][1:]=list(data['volume'])[1:]/data['volume'][:-1]data['vol_time'][0]=1#涨跌data['up_down']=data['close']>data['open']data.head()
/opt/conda/lib/python3.5/site-packages/ipykernel_launcher.py:7: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the c*eats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy import sys
.dataframe thead tr:only-child th { text-align: right; } .dataframe thead th { text-align: left; } .dataframe tbody tr th { vertical-align: top; }
open | close | high | low | volume | money | k_type1 | k_type2 | k_type3 | vol_time | up_down | |
---|---|---|---|---|---|---|---|---|---|---|---|
2005-01-04 | 1260.780 | 1242.770 | 1260.780 | 1238.180 | 816177000.0 | 4.418452e+09 | -0.018185 | 0.000000 | -0.796903 | 1.000000 | False |
2005-01-05 | 1241.682 | 1251.937 | 1258.580 | 1235.746 | 867865100.0 | 4.916589e+09 | -0.004741 | 0.013497 | 0.449111 | 1.063330 | True |
2005-01-06 | 1252.493 | 1239.430 | 1252.735 | 1234.236 | 792225400.0 | 4.381370e+09 | -0.014730 | 0.000195 | -0.706146 | 0.912844 | False |
2005-01-07 | 1239.323 | 1244.746 | 1256.313 | 1235.508 | 894087100.0 | 5.040042e+09 | -0.003065 | 0.013649 | 0.260658 | 1.128577 | True |
2005-01-10 | 1243.576 | 1252.401 | 1252.723 | 1236.089 | 723468300.0 | 4.118292e+09 | -0.005978 | 0.007304 | 0.530540 | 0.809170 | True |
data_train=data[:-200]data_test=data[-200:]X=data_train[['k_type1','k_type2','k_type3','vol_time']][:-1]Y=data_train['up_down'][1:]X_test=data_test[['k_type1','k_type2','k_type3','vol_time']][:-1]Y_test=data_test['up_down'][1:]
from sklearn.linear_model import LogisticRegression#Assumed you h*e, X (predictor) and Y (target) for training data set and x_test(predictor) of test_dataset# Create logistic regression objectmodel = LogisticRegression()# Train the model using the training sets and check scoremodel.fit(X, Y)np.mean(model.predict(X)==Y),np.mean(model.predict(X_test)==Y_test)
(0.5599747474747475, 0.49748743718592964)
from sklearn import tree#Assumed you h*e, X (predictor) and Y (target) for training data set and x_test(predictor) of test_dataset# Create tree object model = tree.DecisionTreeClassifier(criterion='gini') #model.fit(X, Y)np.mean(model.predict(X)==Y),np.mean(model.predict(X_test)==Y_test)
(1.0, 0.4824120603015075)
from sklearn import svm#Assumed you h*e, X (predictor) and Y (target) for training data set and x_test(predictor) of test_dataset# Create SVM classification object model = svm.SVC() model.fit(X, Y)np.mean(model.predict(X)==Y),np.mean(model.predict(X_test)==Y_test)
(0.5599747474747475, 0.49748743718592964)
from sklearn.naive_bayes import GaussianNBmodel=GaussianNB()model.fit(X, Y)np.mean(model.predict(X)==Y),np.mean(model.predict(X_test)==Y_test)
(0.5394570707070707, 0.48743718592964824)
from sklearn.nei*ors import KNei*orsClassifier#Assumed you h*e, X (predictor) and Y (target) for training data set and x_test(predictor) of test_dataset# Create KNei*ors classifier object model model=KNei*orsClassifier(n_nei*ors=6) model.fit(X, Y)np.mean(model.predict(X)==Y),np.mean(model.predict(X_test)==Y_test)
(0.6641414141414141, 0.5125628140703518)
from sklearn.cluster import KMeans#Assumed you h*e, X (attributes) for training data set and x_test(attributes) of test_dataset# Create KNei*ors classifier object model model = KMeans(n_clusters=2, random_state=0)model.fit(X, Y)np.mean(model.predict(X)==Y),np.mean(model.predict(X_test)==Y_test)
(0.5205176767676768, 0.49748743718592964)
from sklearn.ensemble import RandomForestClassifier#Assumed you h*e, X (predictor) and Y (target) for training data set and x_test(predictor) of test_dataset# Create Random Forest objectmodel= RandomForestClassifier()model.fit(X, Y)np.mean(model.predict(X)==Y),np.mean(model.predict(X_test)==Y_test)
/opt/conda/lib/python3.5/site-packages/sklearn/ensemble/weight_boosting.py:29: DeprecationWarning: numpy.core.umath_tests is an internal NumPy module and should not be imported. It will be removed in a future NumPy release. from numpy.core.umath_tests import inner1d
(0.9816919191919192, 0.5527638190954773)
from sklearn.ensemble import GradientBoostingClassifier#Assumed you h*e, X (predictor) and Y (target) for training data set and x_test(predictor) of test_dataset# Create Gradient Boosting Classifier objectmodel= GradientBoostingClassifier(n_estimators=100, learning_rate=1.0, max_depth=1, random_state=0)model.fit(X, Y)np.mean(model.predict(X)==Y),np.mean(model.predict(X_test)==Y_test)
(0.61489898989899, 0.49748743718592964)
from xgboost import XGBClassifiermodel=XGBClassifier()model.fit(X, Y)np.mean(model.predict(X)==Y),np.mean(model.predict(X_test)==Y_test)
/opt/conda/lib/python3.5/site-packages/sklearn/preprocessing/label.py:171: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty. if diff: /opt/conda/lib/python3.5/site-packages/sklearn/preprocessing/label.py:171: DeprecationWarning: The truth value of an empty array is ambiguous. Returning False, but in future this will result in an error. Use `array.size > 0` to check that an array is not empty. if diff:
(0.6609848484848485, 0.5376884422110553)
本社区仅针对特定人员开放
查看需注册登录并通过风险意识测评
5秒后跳转登录页面...