请 [注册] 或 [登录]  | 返回主站

量化交易吧 /  量化平台 帖子:3366801 新帖:11

深度学习——基于tensorflow LSTM进行策略开发

Tango发表于:5 月 10 日 03:22回复(1)

最近在尝试使用深度学习探索市场规律,寻找盈利策略。
本文尝试使用tensorflow框架下的LSTM进行策略开发,搭建了数据因子获取,数据处理和LSTM预测框架。遗憾的是最终的结果不理想,没能形成盈利策略,不过没有关系,好的策略必然是反复锤炼才能找到,本文便是用于探索的工具。
本文策略不理想的主要原因是输入的数据质量不高,数据并不具备预测能力,或者模型选择不当,后续还会尝试利用深度神经网络预测,对输出数据也会再进一步研究。
虽然本文结果不理想,但是基本流程和框架已经写好了,有兴趣的小伙伴可以在数据上做处理,根据自己的理解获取数据,然后走一下模型,也许会有效果。其中模型中的参数需要调整,可能会调整的参数我在代码中标识了。深度学习的功夫应该是在数据获取和数据处理上,然后就是模型调参了。

本文思路:
利用过去n天(本文选了5天)的技术因子对股票涨跌预测。
1.获取技术因子,技术因子是基于聚宽平台选取了部分,其中也包括价格和成交量
2.去极值和标准化处理
3.PCA降维
4.获取LSTM数据,时间长度为5天
5.利用LSTM进行预测

from jqlib.technical_analysis import *
from jqdata import *
import pandas as pd
import numpy as np
import datetime
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
import tensorflow as tf
/opt/conda/envs/python3new/lib/python3.6/importlib/_bootstrap.py:219: RuntimeWarning: compiletime version 3.5 of module 'tensorflow.python.framework.fast_tensor_util' does not match runtime version 3.6
  return f(*args, **kwds)
start_date = '2010-01-01'
end_date = '2018-11-01'
trade_days = get_trade_days(start_date=start_date,end_date=end_date).tolist()
date = trade_days[0]

lookback = 5 #lstm时间轴数据长度
stocks = '000905.XSHG' #中证500
#技术因子数据准备

def get_factors_one_stock(stocks,date):
    '''
    获取一只股票一天的因子数据集
    input:
    stocks:一只股票
    date:日期
    output:
    df:dataframe,各个因子一天的数值
    '''
    if type(date) != str:
        date = datetime.datetime.strftime(date,'%Y-%m-%d')
    
    price = get_price(stocks,end_date=date,count=1,fields=['close','volume'])
    price.index = [date]
    
    accer = ACCER(stocks,check_date=date,N=5)
    accer_df = pd.DataFrame(list(accer.values()),columns=['ACCER'])

    #乘离率
    bias,bias_ma = BIAS_QL(stocks, date, M = 6)
    bias_df = pd.DataFrame(list(bias.values()),columns=['BIAS'])

    #动态买卖气
    adtm,maadtm = ADTM(stocks,check_date=date,N=23,M=8)
    adtm_df = pd.DataFrame(list(adtm.values()),columns=['ADTM'])

    #商品路径
    cci = CCI(stocks, date, N=14)
    cci_df = pd.DataFrame(list(cci.values()),columns=['CCI'])

    #多空线
    dkx,madkx = DKX(stocks, date, M = 10)
    dkx_df = pd.DataFrame(list(dkx.values()),columns=['DKX'])

    #随机指标
    k,d = SKDJ(stocks, date, N = 9, M = 3)
    k_df = pd.DataFrame(list(k.values()),columns=['KBJ'])
    
    #市场趋势
    cye,_ = CYE(stocks, date)
    cye_df = pd.DataFrame(list(cye.values()),columns=['CYE'])
    
    #简单波动指标
    emv,_ = EMV(stocks, date, N = 14, M = 9)
    emv_df = pd.DataFrame(list(emv.values()),columns=['EMV'])

    #相对强弱
    br, ar = BRAR(stocks, date, N=26)
    br_df = pd.DataFrame(list(br.values()),columns=['BR'])
    ar_df = pd.DataFrame(list(ar.values()),columns=['AR'])
    
    df = pd.concat([accer_df,bias_df,adtm_df,cci_df,dkx_df,k_df,cye_df,emv_df,br_df,ar_df],axis=1)
    df.index = [date]
    df = pd.concat([price,df],axis=1)
    
    return df
df = get_factors_one_stock('000300.XSHG',date)
def get_data_from_date(start_date,end_date,stocks):
    '''
    获取时间轴数据
    '''
    trade_date = get_trade_days(start_date=start_date,end_date=end_date)
    df = get_factors_one_stock(stocks,trade_date[0])
    for date in trade_date[1:]:
        df1 = get_factors_one_stock(stocks,date)
        df = pd.concat([df,df1])
    return df
data = get_data_from_date(start_date,end_date,stocks)
#数据去极值及标准化
def winsorize_and_standarlize(data,qrange=[0.05,0.95],axis=0):
    '''
    input:
    data:Dataframe or series,输入数据
    qrange:list,list[0]下分位数,list[1],上分位数,极值用分位数代替
    '''
    if isinstance(data,pd.DataFrame):
        if axis == 0:
            q_down = data.quantile(qrange[0])
            q_up = data.quantile(qrange[1])
            index = data.index
            col = data.columns
            for n in col:
                data[n][data[n] > q_up[n]] = q_up[n]
                data[n][data[n] < q_down[n]] = q_down[n]
            data = (data - data.mean())/data.std()
            data = data.fillna(0)
        else:
            data = data.stack()
            data = data.unstack(0)
            q = data.quantile(qrange)
            index = data.index
            col = data.columns
            for n in col:
                data[n][data[n] > q[n]] = q[n]
            data = (data - data.mean())/data.std()
            data = data.stack().unstack(0)
            data = data.fillna(0)
            
    elif isinstance(data,pd.Series):
        name = data.name
        q = data.quantile(qrange)
        data[data>q] = q
        data = (data - data.mean())/data.std()
    return data
data_pro = winsorize_and_standarlize(data)
#PCA降维
def pca_analysis(data,n_components='mle'):
    index = data.index
    model = PCA(n_components=n_components)
    model.fit(data)
    data_pca = model.transform(data)
    df = pd.DataFrame(data_pca,index=index)
    return df
data_pca = pca_analysis(data_pro,n_components=5)
def get_day_profit(stocks,end_date,start_date=None,count=-1,pre_num=1):
    '''
    获取每天的收益率
    input:
    stocks:list or Series,股票代码
    start_date:开始时间
    end_date:结束时间
    count:与start_date二选一,向前取值个数
    pre_num:int,向前计算的天数
    output:
    profit:dataframe,index为日期,columns为股票代码,values为收益率,收益率大于0标记为1,否则为0
    '''
    if count == -1:
        price = get_price(stocks,start_date,end_date,fields=['close'])['close']
    else:
        price = get_price(stocks,end_date=end_date,count=count,fields=['close'])['close']
    profit = price.pct_change(periods=pre_num).dropna()
    profit[profit > 0] = 1
    profit[profit < 0] = 0
    return profit
profit = get_day_profit(stocks,start_date=start_date,end_date=end_date)
pca_profit = pd.concat([profit,data_pca],axis=1).dropna()
def get_lstm_data(data,lookback=lookback):
    '''
    获取LSTM数据
    input:
    data:dataframe,因子降维后数据
    lookback:LSTM的长度
    outout:
    x_output:list, 时间轴上获取lookback组数据
    y_output:series,label数据,注意,预测的是未来一天的涨跌,所以y比x要滞后一天
    '''
    
    length = len(data)
    x_data = data.iloc[:,1:]
    #print(x_data)
    y_data = data.iloc[:,0]
    x_output = []
    for i in range(lookback,length):
        x = x_data.iloc[i-lookback:i,:]
        x_output.append(x.values)
    y_output = y_data[lookback:]
    return x_output,y_output
x_data,y_data = get_lstm_data(pca_profit)    
x_train,x_test,y_train,y_test = train_test_split(x_data,y_data,test_size=0.2)
len_train = len(x_train)
len_test = len(y_test)
n_input = len(data_pca.columns)
x_train = np.reshape(x_train,[len_train,lookback,n_input])
y_train = np.reshape(y_train,[len_train,1])

x_test = np.reshape(x_test,[len_test,lookback,n_input])
y_test = np.reshape(y_test,[len_test,1])
/opt/conda/envs/python3new/lib/python3.6/site-packages/numpy/core/fromnumeric.py:57: FutureWarning: reshape is deprecated and will raise in a subsequent release. Please use .values.reshape(...) instead
  return getattr(obj, method)(*args, **kwds)
#LSTM
lstm_size = 50  #lstm cell数量,基于数据量调整
epoch_num = 10  #打印次数,和n_batch相乘便是迭代次数
n_batch = 50

tf.reset_default_graph()

x = tf.placeholder(tf.float32,[None,lookback,n_input])
y = tf.placeholder(tf.float32,[None,1])

weights = tf.Variable(tf.truncated_normal([lstm_size,1],stddev=0.1))
biases = tf.Variable(tf.constant(0.1,shape=[1]))

#定义LSTM网络
def LSTM_net(x,weights,biases):
    lstm_cell = tf.contrib.rnn.BasicLSTMCell(lstm_size)
    output,final_state = tf.nn.dynamic_rnn(lstm_cell,x,dtype=tf.float32)
    results = tf.nn.relu(tf.matmul(final_state[1],weights) + biases)
    return results

prediction = LSTM_net(x,weights,biases)
#损失函数,均方差
loss = tf.reduce_mean(tf.square(y-prediction))

#梯度下降法 或者采用其他方法
#AdamOptimizer 收敛速度快,但是过拟合严重
train_step = tf.train.GradientDescentOptimizer(learning_rate=0.01).minimize(loss) #learning_rate可以调整

init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    for epoch in range(epoch_num): #打印次数,和n_batch相乘便是迭代次数
        for batch in range(n_batch):
            sess.run(train_step,feed_dict={x:x_train,y:y_train})
        train_loss = sess.run(loss,feed_dict={x:x_train,y:y_train})
        print('train loss is'+ str(train_loss))
        test_loss = sess.run(loss,feed_dict={x:x_test,y:y_test})
        print('test loss is' + str(test_loss))
    prediction_res = sess.run(prediction,feed_dict={x:x_test})
    #打印观察数据
    print(prediction_res[-5:])
    print(y_test[-5:])
train loss is0.275276
test loss is0.275262
train loss is0.251983
test loss is0.251416
train loss is0.248579
test loss is0.24808
train loss is0.247827
test loss is0.247526
train loss is0.247551
test loss is0.24743
train loss is0.247413
test loss is0.247424
train loss is0.247332
test loss is0.247434
train loss is0.24728
test loss is0.247445
train loss is0.247244
test loss is0.247452
train loss is0.247216
test loss is0.247456
[[ 0.50962383]
 [ 0.55818146]
 [ 0.56433469]
 [ 0.54548335]
 [ 0.57804573]]
[[ 1.]
 [ 1.]
 [ 1.]
 [ 0.]
 [ 1.]]

全部回复

0/140

达人推荐

量化课程

    移动端课程