传统的因子指标挖掘主要集中于财务报表、个股中低频率的价量等相关的数据维度,而这部分数据维度的增量价值的挖掘已逐渐饱和,需从其他新的数据维度中挖掘新的因子指标,本篇报告从个股日内高频数据出发尝试挖掘出新的因子指标。
基于个股日内高频数据,构建了已实现波动(Realized Volatility)RVol,已实现偏度(Realized Skewness)RSkew、已实现峰度(Realized Kurtosis)RKurt因子指标,考察这三个因子在回测区间内对个股收益率 的区别度。。按照这个思路,展开研究:
1)因子数据获取:
2)因子数据处理:
3)因子统计分析:
4)分组回测分析:
(一)因子IC统计:研报中涉及到的三个基础因子和构造因子,我们分年统计IC结果,发现RDVar_t因子、 RDSkew_t 和 RSkew_t IC均值绝对值超过了0.03,初步判断因子有一定的预测效果
(二)分组回测:将以上三个因子分别进行分组回测,五组区分度在比较明显,且尾部偏离更显著,初步估计空头组合效果比多头更为显著
(三)多头组合:为了更加贴合A股的实际操作,我们取多头部分构建组合,这里取了IC值高的 RSkew_t 因子,在无交易成本的情况下,多头部分能够取到12.9%的年化超额收益,在加入千二交易成本之后,超额收益到4.88%,平均每次交易换仓在0.7左右
综合来看,该研报所构造因子是有效的,因子贡献的超额收益在周度调仓下能够抵抗交易成本,但是所剩下的超额收益已经不多。
#导入需要的库、定义需要用到的工具函数
#工具函数
import time
from datetime import datetime, timedelta
from jqdata import *
import numpy as np
import pandas as pd
import math
from statsmodels import regression
import statsmodels.api as sm
import matplotlib.pyplot as plt
import datetime
from scipy import stats
from jqfactor import *
import warnings
warnings.filterwarnings('ignore')
plt.style.use('ggplot')
#输入起止日期,返回所有自然日日期
def get_date_list(begin_date, end_date):
dates = []
dt = datetime.strptime(begin_date,"%Y-%m-%d")
date = begin_date[:]
while date <= end_date:
dates.append(date)
dt += timedelta(days=1)
date = dt.strftime("%Y-%m-%d")
return dates
#获取日期列表
def get_tradeday_list(start,end,frequency=None,count=None):
if count != None:
df = get_price('000001.XSHG',end_date=end,count=count)
else:
df = get_price('000001.XSHG',start_date=start,end_date=end)
if frequency == None or frequency =='day':
return df.index
else:
df['year-month'] = [str(i)[0:7] for i in df.index]
if frequency == 'month':
return df.drop_duplicates('year-month').index
elif frequency == 'quarter':
df['month'] = [str(i)[5:7] for i in df.index]
df = df[(df['month']=='01') | (df['month']=='04') | (df['month']=='07') | (df['month']=='10') ]
return df.drop_duplicates('year-month').index
elif frequency =='halfyear':
df['month'] = [str(i)[5:7] for i in df.index]
df = df[(df['month']=='01') | (df['month']=='06')]
return df.drop_duplicates('year-month').index
#输入开始日期和结束日期,返回每周第一个交易日
def get_weekday(s_date,d_date):
df = get_price('000001.XSHG',start_date=s_date,end_date=d_date)
dt_list = []
for d1,d2 in zip(df.index[:-1],df.index[1:]):
d_1 = datetime.datetime(int(str(d1)[:4]),int(str(d1)[5:7]),int(str(d1)[8:10]))
d_2 = datetime.datetime(int(str(d2)[:4]),int(str(d2)[5:7]),int(str(d2)[8:10]))
weekday1 = d_1.strftime("%w")
weekday2 = d_2.strftime("%w")
interday = (d_2 - d_1).days
if (int(weekday1) >= int(weekday2)) or interday>7:
dt_list.append(str(d2)[:10])
return dt_list
def ret_se(start_date='2018-6-1',end_date='2018-7-1',stock_pool=None,weight=0):
pool = stock_pool
if len(pool) != 0:
#得到股票的历史价格数据
df = get_price(list(pool),start_date=start_date,end_date=end_date,fields=['close']).close
df = df.dropna(axis=1)
#获取列表中的股票流通市值对数值
df_mkt = get_fundamentals(query(valuation.code,valuation.circulating_market_cap).filter(valuation.code.in_(df.columns)))
df_mkt.index = df_mkt['code'].values
fact_se =pd.Series(df_mkt['circulating_market_cap'].values,index = df_mkt['code'].values)
fact_se = np.log(fact_se)
else:
df = get_price('000001.XSHG',start_date=start_date,end_date=end_date,fields=['close'])
df['v'] = [1]*len(df)
del df['close']
#相当于昨天的百分比变化
pct = df.pct_change()+1
pct.iloc[0,:] = 1
if weight == 0:
#等权重平均收益结果
se = pct.cumsum(axis=1).iloc[:,-1]/pct.shape[1]
return se
else:
#按权重的方式计算
se = (pct*fact_se).cumsum(axis=1).iloc[:,-1]/sum(fact_se)
return se
#获取所有分组pct
def get_all_pct(pool_dict,trade_list,groups=5):
num = 1
for s,e in zip(trade_list[:-1],trade_list[1:]):
stock_list = pool_dict[s]
stock_num = len(stock_list)//groups
if num == 0:
pct_se_list = []
for i in range(groups):
pct_se_list.append(ret_se(start_date=s,end_date=e,stock_pool=stock_list[i*stock_num:(i+1)*stock_num]))
pct_df1 = pd.concat(pct_se_list,axis=1)
pct_df1.columns = range(groups)
pct_df = pd.concat([pct_df,pct_df1],axis=0)
else:
pct_se_list = []
for i in range(groups):
pct_se_list.append(ret_se(start_date=s,end_date=e,stock_pool=stock_list[i*stock_num:(i+1)*stock_num]))
pct_df = pd.concat(pct_se_list,axis=1)
pct_df.columns = range(groups)
num = 0
return pct_df
def tradedays_before(date,count):#获取指定交易日往前推count天交易日
date = get_price('000001.XSHG',end_date=date,count=count+1).index[0]
return date
def ShiftTradingDay(date,shift):
# 获取所有的交易日,返回一个包含所有交易日的 list,元素值为 datetime.date 类型.
tradingday = get_all_trade_days()
# 得到date之后shift天那一天在列表中的行标号 返回一个数
date = datetime.date(int(str(date)[:4]),int(str(date)[5:7]),int(str(date)[8:10]))
shiftday_index = list(tradingday).index(date)+shift
# 根据行号返回该日日期 为datetime.date类型
return tradingday[shiftday_index]
#进行新股、St股过滤,返回筛选后的股票
def filter_stock(stockList,date,days=21*3,skip_paused=1,limit=0):#日频策略加入开盘涨停过滤
#去除上市距beginDate不足3个月的股票
def delect_stop(stocks,beginDate,n=days):
stockList=[]
beginDate = datetime.datetime.strptime(beginDate, "%Y-%m-%d")
for stock in stocks:
start_date=get_security_info(stock).start_date
if start_date<(beginDate-datetime.timedelta(days=n)).date():
stockList.append(stock)
return stockList
#剔除ST股
st_data=get_extras('is_st',stockList, count = 1,end_date=date)
stockList = [stock for stock in stockList if not st_data[stock][0]]
#剔除当天停牌股
if skip_paused == 1:
paused_df = get_price(stockList,end_date=date,count=1,fields=['paused'])['paused'].T
paused_df.columns = ['paused']
paused_df = paused_df[paused_df['paused']==0]
stockList = paused_df.index
#新股及退市股票
stockList=delect_stop(stockList,date)
#剔除开盘涨停股票
if limit == 1:
#如果需要收盘涨跌停可以改字段即可
df = get_price(stockList,end_date=date,fields=['open','high_limit','low_limit'],count=1).iloc[:,0,:]
df['h_limit']=(df['open']==df['high_limit'])
df['l_limit']=(df['open']==df['low_limit'])
stockList = [df.index[i] for i in range(len(df)) if not (df.h_limit[i] or df.l_limit[i])] #过滤涨跌停股票
return stockList
#设置股票池(指数成分股)
index = '000905.XSHG' #设置股票池,和对比基准,这里是中证500
#设置统计起止日期
date_start = '2014-06-01'
date_end = '2019-06-16'
#获取统计期内交易日列表、用于计算因子数据
date_list = get_tradeday_list(start=date_start,end=date_end,count=None)#获取回测日期间的所有交易日
date_list
DatetimeIndex(['2014-06-03', '2014-06-04', '2014-06-05', '2014-06-06', '2014-06-09', '2014-06-10', '2014-06-11', '2014-06-12', '2014-06-13', '2014-06-16', ... '2019-05-31', '2019-06-03', '2019-06-04', '2019-06-05', '2019-06-06', '2019-06-10', '2019-06-11', '2019-06-12', '2019-06-13', '2019-06-14'], dtype='datetime64[ns]', length=1229, freq=None)
因子构建方法参考研报说明,计算统计期内因子值
#定义因子值并计算
#输入股票列表,日期,输出当日的因子值
#格式为df,index:股票名称,columns:因子名称 ?
def get_factor(pool,date,freq = 5):
factor = pd.DataFrame(index = ['RDVar_t','RDSkew_t','RDKurt_t'])
for stock in pool:
price = get_price(stock,end_date=date,count=240,frequency='1m')
df = price[['close']]
df.index = range(1,len(df)+1)
mark_l = [i for i in df.index if i%freq==0]
df = df.loc[mark_l,:]
df['ln_c'] = log(df['close'])
df['r'] = df['ln_c']-df['ln_c'].shift(1)
df = df.dropna()
RDVar_t = sum([r**2 for r in df['r'].values])
RDSkew_t= np.sqrt(len(df))*sum([r**3 for r in df['r'].values])/np.sqrt(RDVar_t**3)
RDKurt_t= len(df)*sum([r**4 for r in df['r'].values])/RDVar_t**2
factor[stock] = [RDVar_t,RDSkew_t,RDKurt_t]
return factor
#定义因子值
#加入open价格作为初始价格
def get_factor1(pool,date,freq = 5):
factor = pd.DataFrame(index = ['RDVar_t','RDSkew_t','RDKurt_t'])
for stock in pool:
df = pd.DataFrame()
price = get_price(stock,end_date=date,count=240,frequency='1m',fields=['open','close'])
price_list = list(price['close'].values)
price_list.insert(0,price.iloc[0,0])
df['close'] = price_list
df.index = range(0,len(df))
mark_l = [i for i in df.index if i%freq==0]
df = df.loc[mark_l,:]
df['ln_c'] = log(df['close'])
df['r'] = df['ln_c']-df['ln_c'].shift(1)
df = df.dropna()
RDVar_t = sum([r**2 for r in df['r'].values])
RDSkew_t= np.sqrt(len(df))*sum([r**3 for r in df['r'].values])/np.sqrt(RDVar_t**3)
RDKurt_t= len(df)*sum([r**4 for r in df['r'].values])/RDVar_t**2
factor[stock] = [RDVar_t,RDSkew_t,RDKurt_t]
return factor
#循环日期列表,进行因子值记录,按字典的方式存储
#进行因子值计算
factor_dict = {}
#循环时间列表获取原始因子数据组成dict
for end_date in date_list[:]:
end_date=str(end_date)[:10]
print('正在计算 {} 因子数据......'.format(end_date))
stocks_list = get_index_stocks(index,date=end_date)#获取指定日期成分股列表
stocks_list = filter_stock(stocks_list,end_date,days=365,limit=1)#进行股票筛选
factor_dict[end_date] = get_factor1(pool=stocks_list,date=end_date,freq = 5)#计算因子值进行存储
#进行因子值计算
factor1_dict = {}
n = 5
#循环时间列表获取原始因子数据组成dict
for end_date in date_list[5:]:
date = str(end_date)[:10]
mark = 1
factor_df = factor_dict[date].T
for i in range(n):
date_ = str(ShiftTradingDay(date,-i))[:10]
if mark ==1:
df_rva = factor_dict[date_].T['RDVar_t']
df_rsk = factor_dict[date_].T['RDSkew_t']
df_rku = factor_dict[date_].T['RDKurt_t']
mark = 0
else:
df_temp = factor_dict[date_].T
df_rva = pd.concat([df_rva,df_temp['RDVar_t']],axis=1)
df_rsk = pd.concat([df_rsk,df_temp['RDSkew_t']],axis=1)
df_rku = pd.concat([df_rku,df_temp['RDKurt_t']],axis=1)
factor_df['RVol_t'] = np.sqrt(240*df_rva.mean(axis=1))
factor_df['RSkew_t'] = df_rsk.mean(axis=1)
factor_df['RKur_t'] = df_rku.mean(axis=1)
factor1_dict[date] = factor_df
factor1_dict[date].head(3)
#读取计算出来的因子值
import pickle
pkl_file = open('task1_1_factor.pkl', 'rb')
factor1_dict = pickle.load(pkl_file)
pkl_file.close()
#参数设置
#设置是否中性化
neu = 0 #1为进行中性化;0为不进行中性化
how_=['sw_l1', 'market_cap'] #中性化参数;行业、市值中性化
#获取调仓日历、交易列表
s_date = '2014-6-10'
d_date = '2019-6-13'
trade_list = get_weekday(s_date,d_date)#获取每周第一个交易日列表
trade_list[-5:]
['2019-05-13', '2019-05-20', '2019-05-27', '2019-06-03', '2019-06-10']
#数据清洗、包括去极值、标准化、中性化等,并加入y值
import time
t1 = time.time()
factor_y_dict = {}
for date_1,date_2 in zip(trade_list[:-1],trade_list[1:]):
d1 = ShiftTradingDay(date_1,1) #往后推一天
d2 = ShiftTradingDay(date_2,1)
#print('开始整理 {} 数据...'.format(str(date_1)[:10]))
factor_df = factor1_dict[str(date_1)[:10]] #根据字典存储的日期格式不同进行不同设置
pool = list(factor_df.index)
#计算指数涨跌幅
df_1 = get_price(index,end_date=d1,fields=['open'],count = 1)['open']
df_2 = get_price(index,end_date=d2,fields=['open'],count = 1)['open']
index_pct = df_2.values[0]/df_1.values[0] - 1#具体数值
#计算各股票涨跌幅
df_1 = get_price(pool,end_date=d1,fields=['open'],count = 1)['open']
df_2 = get_price(pool,end_date=d2,fields=['open'],count = 1)['open']
df_3 = pd.concat([df_1,df_2],axis=0).T #进行合并
stock_pct = df_3.iloc[:,1]/df_3.iloc[:,0] - 1 #计算pct,series
#对数据进行处理、标准化、去极值、中性化
#factor_df = winsorize_med(factor_df, scale=3, inclusive=True, inf2nan=True, axis=0) #中位数去极值处理
#factor_df = standardlize(factor_df, inf2nan=True, axis=0) #对每列做标准化处理
if neu == 1:
factor_df = neutralize(factor_df, how=how_, date=date_1, axis=0,fillna='sw_l1')#中性化
#factor_df['pct_alpha'] = stock_pct-index_pct
factor_df['pct_'] = stock_pct
factor_y_dict[date_1] = factor_df
t2 = time.time()
print('计算数据耗时:{0}'.format(t2-t1))
print(factor_y_dict[date_1].shape)
计算数据耗时:58.069838523864746 (482, 7)
#统计记录IC值
ic_df = pd.DataFrame()
for d in trade_list[:-1]:
d = str(d)[:10]
ic_df[d] = (factor_y_dict[d].corr()).iloc[:6,-1]
ic_df
2014-06-16 | 2014-06-23 | 2014-06-30 | 2014-07-07 | 2014-07-14 | 2014-07-21 | 2014-07-28 | 2014-08-04 | 2014-08-11 | 2014-08-18 | 2014-08-25 | 2014-09-01 | 2014-09-09 | 2014-09-15 | 2014-09-22 | 2014-09-29 | 2014-10-08 | 2014-10-13 | 2014-10-20 | 2014-10-27 | 2014-11-03 | 2014-11-10 | 2014-11-17 | 2014-11-24 | 2014-12-01 | 2014-12-08 | 2014-12-15 | 2014-12-22 | 2014-12-29 | 2015-01-05 | 2015-01-12 | 2015-01-19 | 2015-01-26 | 2015-02-02 | 2015-02-09 | 2015-02-16 | 2015-02-25 | 2015-03-02 | 2015-03-09 | 2015-03-16 | ... | 2018-08-20 | 2018-08-27 | 2018-09-03 | 2018-09-10 | 2018-09-17 | 2018-09-25 | 2018-10-08 | 2018-10-15 | 2018-10-22 | 2018-10-29 | 2018-11-05 | 2018-11-12 | 2018-11-19 | 2018-11-26 | 2018-12-03 | 2018-12-10 | 2018-12-17 | 2018-12-24 | 2019-01-02 | 2019-01-07 | 2019-01-14 | 2019-01-21 | 2019-01-28 | 2019-02-11 | 2019-02-18 | 2019-02-25 | 2019-03-04 | 2019-03-11 | 2019-03-18 | 2019-03-25 | 2019-04-01 | 2019-04-08 | 2019-04-15 | 2019-04-22 | 2019-04-29 | 2019-05-06 | 2019-05-13 | 2019-05-20 | 2019-05-27 | 2019-06-03 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
RDVar_t | 0.038194 | -0.025979 | 0.018120 | 0.092332 | -0.086154 | -0.070014 | 0.035563 | -0.124732 | -0.005735 | -0.101076 | 0.013804 | 0.021803 | -0.118980 | 0.137579 | -0.140638 | -0.061266 | 0.025366 | -0.036838 | -0.048403 | -0.048565 | 0.003107 | -0.061394 | 0.235365 | -0.135721 | 0.142473 | -0.302721 | 0.177011 | 0.076519 | -0.144163 | -0.057652 | -0.071038 | -0.015043 | 0.024016 | -0.051126 | 0.129114 | -0.117831 | 0.007582 | -0.103493 | 0.016878 | 0.011982 | ... | 0.189815 | -0.021519 | 0.132715 | -0.185520 | -0.006907 | -0.064762 | 0.071021 | -0.078150 | -0.055500 | -0.007255 | 0.061719 | 0.058094 | -0.018139 | -0.291083 | -0.080281 | -0.137383 | -0.144804 | 0.020516 | 0.069444 | -0.040349 | -0.110204 | -0.145854 | -0.071161 | 0.173247 | 0.219759 | -0.194248 | 0.031728 | -0.508386 | 0.300687 | -0.112774 | 0.082872 | -0.227677 | 0.164444 | -0.351309 | -0.021921 | 0.058687 | 0.017991 | -0.014623 | 0.032304 | -0.118480 |
RDSkew_t | 0.074308 | -0.088846 | -0.062924 | 0.015891 | -0.056505 | -0.049093 | -0.001650 | -0.142539 | -0.019599 | -0.026979 | 0.016111 | 0.035269 | -0.089233 | 0.124029 | -0.083152 | -0.150077 | 0.033232 | -0.129321 | -0.138057 | -0.073007 | 0.010261 | -0.080030 | -0.121914 | -0.056283 | -0.031546 | -0.284792 | -0.080725 | 0.078094 | -0.048892 | -0.055798 | 0.135796 | -0.072798 | 0.032413 | -0.103351 | -0.042420 | -0.086268 | -0.107827 | -0.021699 | -0.066322 | 0.031624 | ... | -0.076025 | 0.104615 | -0.001408 | -0.026777 | -0.084140 | 0.065261 | -0.018353 | -0.037632 | -0.113414 | -0.118594 | -0.026315 | 0.040645 | 0.068428 | -0.089492 | 0.010810 | 0.072112 | -0.090740 | 0.054377 | -0.004424 | 0.021840 | -0.009371 | -0.002239 | 0.081101 | 0.106954 | 0.135969 | -0.144962 | -0.154093 | -0.240850 | -0.107318 | 0.054512 | 0.097576 | -0.142727 | 0.092461 | -0.151280 | -0.149418 | -0.026014 | 0.015964 | -0.053164 | -0.023100 | 0.046425 |
RDKurt_t | 0.019042 | -0.150198 | -0.068174 | 0.044430 | -0.010220 | -0.003378 | 0.050677 | -0.124984 | -0.076547 | -0.053707 | -0.000251 | 0.057162 | -0.146581 | 0.136237 | -0.058522 | -0.110785 | -0.001777 | -0.071922 | -0.060491 | -0.125148 | -0.007836 | -0.043413 | -0.015060 | -0.108399 | -0.103387 | 0.148137 | 0.076932 | -0.021790 | -0.107019 | -0.035496 | 0.079307 | -0.017693 | -0.027954 | -0.060024 | 0.018183 | -0.011297 | -0.020281 | -0.047398 | -0.087813 | 0.010494 | ... | 0.134398 | 0.030493 | -0.114520 | -0.140751 | 0.000190 | 0.013462 | 0.084633 | -0.126761 | -0.018939 | -0.052915 | -0.070873 | 0.023092 | 0.091397 | -0.080474 | -0.020071 | -0.051727 | -0.038315 | 0.000405 | 0.044772 | -0.061853 | 0.023923 | -0.036028 | 0.084546 | 0.091795 | -0.005322 | -0.042177 | -0.027623 | -0.069185 | -0.000360 | 0.068299 | 0.024286 | -0.066726 | 0.032815 | -0.116497 | 0.090821 | -0.028236 | 0.011992 | 0.088094 | 0.034061 | 0.007795 |
RVol_t | 0.017180 | 0.025518 | 0.057269 | 0.069521 | -0.205821 | -0.105482 | 0.011114 | -0.097371 | -0.065591 | -0.154038 | 0.095846 | 0.049874 | -0.071701 | -0.037261 | -0.072209 | -0.027854 | 0.006496 | 0.019094 | -0.051543 | -0.008547 | 0.021236 | -0.045481 | 0.300799 | -0.154056 | 0.240570 | -0.233227 | 0.173362 | 0.075169 | -0.044263 | -0.080199 | -0.174509 | 0.021431 | -0.124917 | -0.025439 | 0.257253 | -0.067411 | 0.049796 | -0.171026 | 0.071976 | -0.034766 | ... | 0.109986 | -0.104748 | 0.092839 | -0.212309 | 0.197713 | -0.074022 | 0.034962 | -0.135110 | -0.097805 | 0.126328 | 0.168431 | 0.153124 | -0.031965 | -0.325237 | -0.075770 | -0.333297 | -0.103877 | -0.011386 | 0.110977 | -0.074309 | -0.123646 | -0.238143 | -0.086692 | 0.356504 | 0.299565 | -0.151244 | 0.216812 | -0.507671 | 0.321167 | -0.083077 | 0.007266 | -0.211733 | 0.136844 | -0.346245 | -0.076239 | 0.073825 | -0.068473 | -0.052729 | 0.070117 | -0.159387 |
RSkew_t | 0.080741 | -0.116367 | -0.041659 | 0.009085 | -0.007805 | 0.017148 | -0.029610 | -0.023709 | 0.029735 | -0.089872 | 0.017740 | 0.078732 | -0.031504 | 0.051386 | -0.126644 | -0.162785 | -0.056085 | -0.063711 | -0.108376 | 0.006254 | 0.063048 | -0.138717 | 0.137253 | -0.141088 | 0.116124 | -0.120620 | -0.060254 | 0.116410 | -0.152793 | -0.197459 | 0.095152 | -0.108826 | -0.036336 | 0.020731 | 0.141608 | -0.078131 | 0.012717 | -0.029678 | -0.032859 | -0.008859 | ... | -0.016285 | 0.117793 | 0.053832 | 0.055073 | -0.186191 | 0.010367 | 0.086037 | -0.219214 | 0.068379 | -0.100098 | 0.098990 | 0.158652 | 0.014562 | -0.194236 | 0.027639 | -0.038332 | -0.010261 | 0.035876 | -0.137289 | -0.047269 | -0.126052 | -0.022618 | -0.062637 | -0.045705 | 0.186405 | -0.061010 | -0.103987 | -0.348282 | -0.082450 | -0.084903 | 0.080999 | -0.042550 | 0.072939 | -0.233831 | -0.036800 | -0.053108 | 0.010796 | 0.016662 | -0.114640 | -0.007397 |
RKur_t | 0.036997 | -0.111976 | -0.063695 | -0.014111 | 0.000856 | -0.081081 | 0.072922 | -0.023234 | -0.015899 | -0.084947 | -0.084523 | 0.004393 | -0.157556 | 0.046009 | -0.068107 | -0.111781 | -0.056609 | -0.043977 | -0.122293 | -0.061151 | -0.027361 | -0.000037 | 0.027415 | -0.171981 | 0.013613 | 0.052662 | -0.074580 | -0.024718 | -0.114608 | 0.034072 | 0.052574 | -0.063814 | 0.009877 | -0.015158 | 0.057690 | -0.084937 | 0.017822 | -0.028942 | -0.064104 | -0.014783 | ... | 0.013148 | -0.038130 | -0.132837 | -0.106398 | 0.051115 | -0.032393 | 0.060215 | -0.079917 | -0.119455 | 0.077973 | -0.015506 | 0.094683 | 0.013205 | -0.156751 | -0.025128 | -0.077810 | -0.071657 | -0.022326 | -0.013968 | -0.111918 | -0.038880 | -0.019485 | -0.030328 | 0.159294 | 0.042010 | 0.024367 | -0.019868 | -0.146995 | 0.146419 | -0.048474 | 0.124590 | -0.054223 | -0.013286 | -0.129662 | 0.057892 | 0.115450 | 0.004978 | 0.043802 | 0.095066 | -0.092593 |
#设置要统计的因子名称
factor = 'RSkew_t'
factor_list = ["RDVar_t","RDSkew_t","RDKurt_t","RVol_t","RSkew_t","RKur_t"]
for factor in factor_list:
print('========================因子:{} IC统计信息如下======================'.format(factor))
ic_ = ic_df.T
tab_ic = pd.DataFrame()
for year in range(2014,2020):
#表格统计
ic_temp = ic_[(ic_.index>(str(year)+'-01-01')) & (ic_.index<(str(year+1)+'-01-01'))]
tab_ic[str(year)] = [ic_temp[factor].mean(),ic_temp[factor].std(),ic_temp[factor].min(),ic_temp[factor].max(),round(sum(ic_temp[factor]<0)/len(ic_temp),4)]
tab_ic['所有年份'] = [ic_[factor].mean(),ic_[factor].std(),ic_[factor].min(),ic_[factor].max(),round(sum(ic_[factor]<0)/len(ic_),4)]
tab_ic.index=['IC均值','IC标准差',"IC最小值","IC最大值","负IC占比"]
print(tab_ic.T)
#进行IC值展示
ic_df_temp = ic_df.T[factor]
ic_df_temp.plot(figsize=(9,5))
ic_df_temp.rolling(12).mean().plot(figsize=(9,5))
plt.show()
========================因子:RDVar_t IC统计信息如下====================== IC均值 IC标准差 IC最小值 IC最大值 负IC占比 2014 -0.017074 0.112348 -0.302721 0.235365 0.5517 2015 -0.056823 0.132613 -0.356136 0.340711 0.6731 2016 -0.060648 0.106490 -0.268554 0.166320 0.7000 2017 -0.008483 0.149552 -0.273765 0.449772 0.5490 2018 -0.025208 0.102997 -0.291083 0.193453 0.6275 2019 -0.034810 0.185885 -0.508386 0.300687 0.5455 所有年份 -0.035162 0.129931 -0.508386 0.449772 0.6196
========================因子:RDSkew_t IC统计信息如下====================== IC均值 IC标准差 IC最小值 IC最大值 负IC占比 2014 -0.049240 0.082916 -0.284792 0.124029 0.7241 2015 -0.043524 0.097426 -0.337580 0.282393 0.7308 2016 -0.051209 0.090116 -0.287087 0.095353 0.7200 2017 -0.019691 0.070827 -0.163522 0.132372 0.6471 2018 -0.023368 0.076614 -0.206290 0.114366 0.6078 2019 -0.025280 0.104742 -0.240850 0.135969 0.5909 所有年份 -0.035309 0.086327 -0.337580 0.282393 0.6745
========================因子:RDKurt_t IC统计信息如下====================== IC均值 IC标准差 IC最小值 IC最大值 负IC占比 2014 -0.032309 0.078883 -0.150198 0.148137 0.7586 2015 -0.023331 0.075416 -0.155360 0.317179 0.6538 2016 -0.025085 0.060639 -0.188267 0.173043 0.7400 2017 -0.015176 0.053466 -0.111233 0.099588 0.6275 2018 -0.017110 0.060966 -0.140751 0.134398 0.6275 2019 0.006781 0.058614 -0.116497 0.091795 0.4545 所有年份 -0.019223 0.064871 -0.188267 0.317179 0.6549
========================因子:RVol_t IC统计信息如下====================== IC均值 IC标准差 IC最小值 IC最大值 负IC占比 2014 -0.007290 0.118418 -0.233227 0.300799 0.5172 2015 -0.047304 0.169205 -0.472068 0.441960 0.6346 2016 -0.045586 0.145155 -0.361199 0.343433 0.5800 2017 -0.003829 0.175589 -0.408599 0.474545 0.5686 2018 -0.014534 0.144931 -0.342061 0.278125 0.5294 2019 -0.026659 0.214783 -0.507671 0.356504 0.5909 所有年份 -0.025387 0.160344 -0.507671 0.474545 0.5725
========================因子:RSkew_t IC统计信息如下====================== IC均值 IC标准差 IC最小值 IC最大值 负IC占比 2014 -0.025791 0.087737 -0.162785 0.137253 0.5862 2015 -0.049716 0.099289 -0.272852 0.240882 0.7500 2016 -0.046889 0.107378 -0.402717 0.157526 0.6800 2017 -0.009326 0.082944 -0.189355 0.141024 0.5294 2018 -0.016421 0.086620 -0.219214 0.158652 0.5294 2019 -0.056488 0.107796 -0.348282 0.186405 0.7727 所有年份 -0.032288 0.095710 -0.402717 0.240882 0.6314
========================因子:RKur_t IC统计信息如下====================== IC均值 IC标准差 IC最小值 IC最大值 负IC占比 2014 -0.043426 0.063186 -0.171981 0.072922 0.7241 2015 -0.028192 0.079407 -0.181464 0.113990 0.5577 2016 -0.027996 0.067640 -0.184648 0.133098 0.7200 2017 -0.019243 0.060733 -0.187544 0.131407 0.5882 2018 -0.024576 0.080089 -0.173190 0.144176 0.6078 2019 0.004281 0.087379 -0.146995 0.159294 0.5455 所有年份 -0.024571 0.073022 -0.187544 0.159294 0.6235
#挑选结果最好的因子进行展示
#设置要统计的因子名称
factor = 'RDSkew_t'
#进行IC值展示
ic_df_temp = ic_df.T[factor]
ic_df_temp.plot(figsize=(12,6))
ic_df_temp.rolling(12).mean().plot(figsize=(12,6))
plt.show()
ic_ = ic_df.T
tab_ic = pd.DataFrame()
for year in range(2014,2020):
#表格统计
ic_temp = ic_[(ic_.index>(str(year)+'-01-01')) & (ic_.index<(str(year+1)+'-01-01'))]
tab_ic[str(year)] = [ic_temp[factor].mean(),ic_temp[factor].std(),ic_temp[factor].min(),ic_temp[factor].max(),round(sum(ic_temp[factor]<0)/len(ic_temp),4)]
tab_ic['所有年份'] = [ic_[factor].mean(),ic_[factor].std(),ic_[factor].min(),ic_[factor].max(),round(sum(ic_[factor]<0)/len(ic_),4)]
tab_ic.index=['IC均值','IC标准差',"IC最小值","IC最大值","负IC占比"]
tab_ic.T
IC均值 | IC标准差 | IC最小值 | IC最大值 | 负IC占比 | |
---|---|---|---|---|---|
2014 | -0.049240 | 0.082916 | -0.284792 | 0.124029 | 0.7241 |
2015 | -0.043524 | 0.097426 | -0.337580 | 0.282393 | 0.7308 |
2016 | -0.051209 | 0.090116 | -0.287087 | 0.095353 | 0.7200 |
2017 | -0.019691 | 0.070827 | -0.163522 | 0.132372 | 0.6471 |
2018 | -0.023368 | 0.076614 | -0.206290 | 0.114366 | 0.6078 |
2019 | -0.025280 | 0.104742 | -0.240850 | 0.135969 | 0.5909 |
所有年份 | -0.035309 | 0.086327 | -0.337580 | 0.282393 | 0.6745 |
#对因子值进行观察
factor_list = ["RDVar_t","RDSkew_t","RDKurt_t","RVol_t","RSkew_t","RKur_t"]
for factor in factor_list:
factor_df = pd.DataFrame()
for d in trade_list[5:]:
d = str(d)[:10]
factor_df[d] = factor1_dict[d].loc[:,factor]#/factor_dict[d].loc[:,'turnover_ratio']
factor_df =factor_df.T
print('================{} 因子 分布一览=================='.format(factor))
factor_se = pd.Series(factor_df.values.flatten())
(factor_se.dropna()).hist(bins=50,figsize=(6,4))
plt.show()
================RDVar_t 因子 分布一览==================
================RDSkew_t 因子 分布一览==================
================RDKurt_t 因子 分布一览==================
================RVol_t 因子 分布一览==================
================RSkew_t 因子 分布一览==================
================RKur_t 因子 分布一览==================
#进行因子值回测分组收益统计
#分组个数
group = 10 #分组组数
factor_list = list(ic_df.index)#获取所有统计因子值
def get_risk_index(se): #输入每日收益变化,从零算起
return_se = se.cumprod()-1
total_returns = return_se[-1]
total_an_returns = ((1+total_returns)**(250/len(return_se))-1)
sharpe = (total_an_returns-0.025)/(np.std(se)*np.sqrt(250))
returns_mean = round(se.mean()-1,6)*100
ret = return_se.dropna()
ret = ret+1
maxdown_list = []
for i in range(1,len(ret)):
low = min(ret[i:])
high = max(ret[0:i])
if high>low:
#print(high,low)
maxdown_list.append((high-low)/high)
#print((high-low)/high)
else:
maxdown_list.append(0)
max_drawdown = max(maxdown_list)
#print('策略运行时间:{} 至 {}'.format(str(return_se.index[0])[:10],str(return_se.index[-1])[:10]))
total_returns = str(round(total_returns*100,2))+'%'
total_an_returns = str(round(total_an_returns*100,2))+'%'
sharpe = str(round(sharpe,2))
max_drawdown = str(round(max_drawdown*100,2))+'%'
'''
print('总收益:%s'%round(total_returns*100,2)+'%')
print('年化收益:%s'%round(total_an_returns*100,2)+'%')
print('夏普比率:%s'%round(sharpe,2))
print('最大回撤:%s'%round(max_drawdown*100,2)+'%')
'''
return total_returns,total_an_returns,sharpe,max_drawdown,returns_mean
for factor in factor_list:
factor_df = pd.DataFrame()
for d in trade_list[:]:
d = str(d)[:10]
factor_df[d] = factor1_dict[d].loc[:,factor]#/factor_dict[d].loc[:,'turnover_ratio']
factor_df =factor_df.T
#统计分组收益
#分组回测分析
#输入:index为日期,column是股票名,values是因子值得factor_df
#输出:股票池分组收益
pool_dict = {}
for i in range(len(factor_df.index)):
temp_se = factor_df.iloc[i,:].sort_values(ascending=False)#从大到小排序
#pool = temp_se[temp_se>0].index #去掉小于0的值
temp_se = temp_se.dropna() #去掉空值
pool = temp_se.index #不做负值处理
num = int(len(pool)/group)
#print('第%s期每组%s只股票'%(i,num))
pool_dict[factor_df.index[i]] = pool
backtest_list = factor_df.index
group_pct = get_all_pct(pool_dict,backtest_list,groups=group)
group_pct.columns = ['group'+str(i) for i in range(len(group_pct.columns))]
#进行分组收益统计
risk_index = group_pct.apply(get_risk_index,axis=0)
risk_tab = pd.DataFrame(index=["总收益","年化收益","夏普率","最大回撤","每日收益%"])
for i in range(group):
risk_tab['group'+str(i)] = list(risk_index.values[i])
print('=========================因子: {} 分组收益如下=========================='.format(factor))
print(risk_tab.T)
group_pct.cumprod().plot(figsize=(9,5))
plt.show()
=========================因子: RDVar_t 分组收益如下========================== 总收益 年化收益 夏普率 最大回撤 每日收益% group0 -72.22% -19.57% -0.63 87.92% -0.0622 group1 8.68% 1.43% -0.03 66.36% 0.0283 group2 16.01% 2.56% 0.0 68.91% 0.0318 group3 89.19% 11.45% 0.28 59.79% 0.0638 group4 45.37% 6.57% 0.13 66.14% 0.0452 group5 99.68% 12.48% 0.33 56.86% 0.0659 group6 84.19% 10.95% 0.28 61.21% 0.0599 group7 68.75% 9.31% 0.23 62.09% 0.054 group8 69.16% 9.35% 0.24 62.76% 0.0523 group9 52.83% 7.48% 0.18 63.84% 0.0445
=========================因子: RDSkew_t 分组收益如下========================== 总收益 年化收益 夏普率 最大回撤 每日收益% group0 -58.98% -14.06% -0.53 83.52% -0.041 group1 -29.86% -5.85% -0.27 79.54% -0.0045 group2 9.12% 1.49% -0.03 68.77% 0.0254 group3 37.27% 5.54% 0.1 63.26% 0.0408 group4 95.01% 12.03% 0.31 58.84% 0.0648 group5 56.35% 7.9% 0.17 62.1% 0.0498 group6 138.71% 15.95% 0.44 57.48% 0.0782 group7 74.45% 9.93% 0.24 61.27% 0.0573 group8 99.84% 12.5% 0.33 55.64% 0.066 group9 60.95% 8.43% 0.19 58.58% 0.0521
=========================因子: RDKurt_t 分组收益如下========================== 总收益 年化收益 夏普率 最大回撤 每日收益% group0 -34.58% -6.96% -0.3 78.22% -0.009 group1 -4.07% -0.7% -0.1 71.95% 0.0166 group2 42.43% 6.2% 0.12 65.29% 0.0429 group3 1.72% 0.29% -0.07 68.33% 0.0205 group4 44.07% 6.41% 0.13 65.49% 0.0437 group5 55.2% 7.76% 0.17 63.85% 0.0491 group6 85.23% 11.05% 0.28 50.91% 0.0607 group7 63.55% 8.73% 0.2 67.11% 0.0532 group8 44.23% 6.43% 0.13 64.54% 0.0446 group9 96.68% 12.19% 0.32 56.95% 0.065
=========================因子: RVol_t 分组收益如下========================== 总收益 年化收益 夏普率 最大回撤 每日收益% group0 -54.92% -12.67% -0.43 85.33% -0.0284 group1 -2.12% -0.36% -0.08 70.37% 0.0217 group2 22.86% 3.56% 0.03 64.89% 0.0354 group3 44.7% 6.49% 0.12 66.17% 0.0461 group4 45.45% 6.58% 0.13 62.83% 0.0457 group5 83.25% 10.85% 0.27 58.09% 0.06 group6 93.43% 11.87% 0.31 57.35% 0.063 group7 69.27% 9.36% 0.23 64.75% 0.0535 group8 69.73% 9.41% 0.24 60.77% 0.0529 group9 56.16% 7.88% 0.2 59.21% 0.0456
=========================因子: RSkew_t 分组收益如下========================== 总收益 年化收益 夏普率 最大回撤 每日收益% group0 -67.62% -17.45% -0.63 86.37% -0.0566 group1 -15.64% -2.85% -0.17 74.87% 0.0082 group2 8.97% 1.47% -0.03 69.2% 0.0256 group3 13.7% 2.21% -0.01 71.24% 0.0283 group4 59.09% 8.22% 0.18 59.47% 0.0511 group5 104.45% 12.93% 0.34 54.83% 0.0676 group6 71.3% 9.59% 0.23 61.42% 0.0557 group7 97.9% 12.31% 0.32 56.53% 0.0655 group8 96.18% 12.14% 0.32 56.17% 0.0648 group9 115.52% 13.95% 0.37 54.14% 0.0714
=========================因子: RKur_t 分组收益如下========================== 总收益 年化收益 夏普率 最大回撤 每日收益% group0 -44.34% -9.48% -0.39 82.18% -0.0203 group1 29.81% 4.54% 0.07 65.22% 0.0363 group2 6.78% 1.12% -0.04 70.49% 0.0236 group3 22.69% 3.54% 0.03 66.79% 0.0334 group4 42.66% 6.23% 0.12 61.39% 0.0433 group5 57.99% 8.09% 0.18 65.59% 0.0506 group6 34.71% 5.2% 0.09 68.44% 0.0397 group7 74.45% 9.93% 0.24 59.14% 0.0577 group8 80.34% 10.55% 0.26 57.88% 0.0592 group9 87.12% 11.25% 0.28 58.38% 0.062
def get_risk_index(se): #输入每日收益变化,从零算起
return_se = se.cumprod()-1
total_returns = return_se[-1]
total_an_returns = ((1+total_returns)**(250/len(return_se))-1)
sharpe = (total_an_returns-0.025)/(np.std(se)*np.sqrt(250))
returns_mean = round(se.mean()-1,6)*100
ret = return_se.dropna()
ret = ret+1
maxdown_list = []
for i in range(1,len(ret)):
low = min(ret[i:])
high = max(ret[0:i])
if high>low:
#print(high,low)
maxdown_list.append((high-low)/high)
#print((high-low)/high)
else:
maxdown_list.append(0)
max_drawdown = max(maxdown_list)
#print('策略运行时间:{} 至 {}'.format(str(return_se.index[0])[:10],str(return_se.index[-1])[:10]))
total_returns = str(round(total_returns*100,2))+'%'
total_an_returns = str(round(total_an_returns*100,2))+'%'
sharpe = str(round(sharpe,2))
max_drawdown = str(round(max_drawdown*100,2))+'%'
'''
print('总收益:%s'%round(total_returns*100,2)+'%')
print('年化收益:%s'%round(total_an_returns*100,2)+'%')
print('夏普比率:%s'%round(sharpe,2))
print('最大回撤:%s'%round(max_drawdown*100,2)+'%')
'''
return total_returns,total_an_returns,sharpe,max_drawdown,returns_mean
#选取因子计算多头收益
factor_df = "RSkew_t"
factor_df = pd.DataFrame()
for d in trade_list[5:]:
d = str(d)[:10]
factor_df[d] = factor1_dict[d].loc[:,factor]
factor_df =factor_df.T
#多头分组回测
fe = 0.0 设置交易成本
rev = 0
hold_num = 50
#多头持仓收益
backtest_list = get_tradeday_list(start=factor_df.index[0],end=factor_df.index[-1],count=None)
return_alpha_df = pd.DataFrame()
return_index_df = pd.DataFrame()
return_df = pd.DataFrame()
trade_record_df = pd.DataFrame()
pool_temp_bf = []
tur = 0
tur_list = []
trade = 0
year = str(backtest_list[0])[:4]
for d1,d2 in zip(backtest_list[:-1],backtest_list[1:]):
d1_ = ShiftTradingDay(d1,1) #往后推一天
d2_ = ShiftTradingDay(d2,1)
d1 = str(d1)[:10]
d2 = str(d2)[:10]
if d1 in factor_df.index:
trade = 1
#获取头部股票
#print('{}进行调仓操作'.format(str(d1_)[:10]))
df_temp = factor_df.loc[d1,:].sort_values(ascending=True) #mo默认从小到大排序
df_temp = df_temp.dropna()
if rev == 0:
pool_temp = df_temp.index[:hold_num]
else:
pool_temp = df_temp.index[-hold_num:]
trade_record_df[str(d1_)[:10]] = pool_temp
tur_temp = len([stock for stock in pool_temp if stock not in pool_temp_bf])/len(pool_temp) #换手率
tur_list.append(tur_temp)
#print换手率
if str(d1)[:4] == year:
tur += tur_temp
else:
#print('{} 年持仓交易换手率为: {}'.format(year,round(tur,2)))
tur = 0
year = str(d1)[:4]
pool_temp_bf = pool_temp
#计算组合收益
df1 = get_price(list(pool_temp),end_date=d1_,count=1,fields=['open'])['open'] #index为日期,columns为股票名称
df1 = df1.dropna(axis=1) #去掉NAN值,删除列
df2 = get_price(list(df1.columns),end_date=d2_,count=1,fields=['open'])['open']
ret = (df2.values/df1.values).mean() #计算组合收益均值
if trade == 1:
ret = ret*(1-tur_temp*fe)
trade = 0
#计算同期指数收益率
df_index1 = get_price('000905.XSHG',end_date=d1_,count=1,fields=['open'])['open']
df_index2 = get_price('000905.XSHG',end_date=d2_,count=1,fields=['open'])['open']
index_ret = df_index2.values[-1]/df_index1.values[-1]
return_alpha_df[d1] = [ret-index_ret] #记录超额收益
return_df[d1] = [ret] #记录组合收益
return_index_df[d1] = [index_ret] #记录基准收益
return_df = return_df.T
return_alpha_df = return_alpha_df.T
return_index_df = return_index_df.T
return_all_df = pd.concat([return_df,return_alpha_df+1,return_index_df],axis=1)
return_all_df.columns = ['ret','alpha','index']
#进行调仓股票记录
trade_record_df.to_csv('trade_record.csv')
summary = pd.DataFrame(index=['总收益','年化收益','夏普率','最大回撤','每日收益%'])
summary['ret'] = get_risk_index(return_all_df['ret'])
summary['alpha']=get_risk_index(return_all_df['alpha'])
summary['index']=get_risk_index(return_all_df['index'])
summary = summary.T
print('策略每次调仓平均交易换手率为:{}'.format(round(np.mean(tur_list),3)))
print('=====策略运行时间:{} 至 {}====='.format(str(return_all_df.index[0])[:10],str(return_all_df.index[-1])[:10]))
print(summary)
(return_all_df).cumprod().plot(figsize=(15,6))
plt.show()
策略每次调仓平均交易换手率为:0.707 =====策略运行时间:2014-07-21 至 2019-06-06===== 总收益 年化收益 夏普率 最大回撤 每日收益% ret 99.0% 15.55% 0.36 54.66% 0.0845 alpha 78.82% 12.99% 1.21 9.15% 0.0503 index 17.55% 3.46% 0.03 65.85% 0.0341
#选取因子计算多头收益
factor_df = "RSkew_t"
factor_df = pd.DataFrame()
for d in trade_list[5:]:
d = str(d)[:10]
factor_df[d] = factor1_dict[d].loc[:,factor]
factor_df =factor_df.T
#多头分组回测
fe = 0.002 #设置交易成本
rev = 0
hold_num = 50
#多头持仓收益
backtest_list = get_tradeday_list(start=factor_df.index[0],end=factor_df.index[-1],count=None)
return_alpha_df = pd.DataFrame()
return_index_df = pd.DataFrame()
return_df = pd.DataFrame()
trade_record_df = pd.DataFrame()
pool_temp_bf = []
tur = 0
tur_list = []
trade = 0
year = str(backtest_list[0])[:4]
for d1,d2 in zip(backtest_list[:-1],backtest_list[1:]):
d1_ = ShiftTradingDay(d1,1) #往后推一天
d2_ = ShiftTradingDay(d2,1)
d1 = str(d1)[:10]
d2 = str(d2)[:10]
if d1 in factor_df.index:
trade = 1
#获取头部股票
#print('{}进行调仓操作'.format(str(d1_)[:10]))
df_temp = factor_df.loc[d1,:].sort_values(ascending=True) #mo默认从小到大排序
df_temp = df_temp.dropna()
if rev == 0:
pool_temp = df_temp.index[:hold_num]
else:
pool_temp = df_temp.index[-hold_num:]
trade_record_df[str(d1_)[:10]] = pool_temp
tur_temp = len([stock for stock in pool_temp if stock not in pool_temp_bf])/len(pool_temp) #换手率
tur_list.append(tur_temp)
#print换手率
if str(d1)[:4] == year:
tur += tur_temp
else:
#print('{} 年持仓交易换手率为: {}'.format(year,round(tur,2)))
tur = 0
year = str(d1)[:4]
pool_temp_bf = pool_temp
#计算组合收益
df1 = get_price(list(pool_temp),end_date=d1_,count=1,fields=['open'])['open'] #index为日期,columns为股票名称
df1 = df1.dropna(axis=1) #去掉NAN值,删除列
df2 = get_price(list(df1.columns),end_date=d2_,count=1,fields=['open'])['open']
ret = (df2.values/df1.values).mean() #计算组合收益均值
if trade == 1:
ret = ret*(1-tur_temp*fe)
trade = 0
#计算同期指数收益率
df_index1 = get_price('000905.XSHG',end_date=d1_,count=1,fields=['open'])['open']
df_index2 = get_price('000905.XSHG',end_date=d2_,count=1,fields=['open'])['open']
index_ret = df_index2.values[-1]/df_index1.values[-1]
return_alpha_df[d1] = [ret-index_ret] #记录超额收益
return_df[d1] = [ret] #记录组合收益
return_index_df[d1] = [index_ret] #记录基准收益
return_df = return_df.T
return_alpha_df = return_alpha_df.T
return_index_df = return_index_df.T
return_all_df = pd.concat([return_df,return_alpha_df+1,return_index_df],axis=1)
return_all_df.columns = ['ret','alpha','index']
#进行调仓股票记录
trade_record_df.to_csv('trade_record.csv')
summary = pd.DataFrame(index=['总收益','年化收益','夏普率','最大回撤','每日收益%'])
summary['ret'] = get_risk_index(return_all_df['ret'])
summary['alpha']=get_risk_index(return_all_df['alpha'])
summary['index']=get_risk_index(return_all_df['index'])
summary = summary.T
print('策略每次调仓平均交易换手率为:{}'.format(round(np.mean(tur_list),3)))
print('=====策略运行时间:{} 至 {}====='.format(str(return_all_df.index[0])[:10],str(return_all_df.index[-1])[:10]))
print(summary)
(return_all_df).cumprod().plot(figsize=(15,6))
plt.show()
策略每次调仓平均交易换手率为:0.707 =====策略运行时间:2014-07-21 至 2019-06-06===== 总收益 年化收益 夏普率 最大回撤 每日收益% ret 39.72% 7.28% 0.13 64.29% 0.0547 alpha 25.48% 4.88% 0.27 13.38% 0.0206 index 17.55% 3.46% 0.03 65.85% 0.0341
(一)因子IC统计:研报中涉及到的三个基础因子和构造因子,我们分年统计IC结果,发现RDVar_t因子、 RDSkew_t 和 RSkew_t IC均值绝对值超过了0.03,初步判断因子有一定的预测效果
(二)分组回测:将以上三个因子分别进行分组回测,五组区分度在比较明显,且尾部偏离更显著,初步估计空头组合效果比多头更为显著
(三)多头组合:为了更加贴合A股的实际操作,我们取多头部分构建组合,这里取了IC值高的 RSkew_t 因子,在无交易成本的情况下,多头部分能够取到12.9%的年化超额收益,在加入千二交易成本之后,超额收益到4.88%,平均每次交易换仓在0.7左右
综合来看,该研报所构造因子是有效的,因子贡献的超额收益在周度调仓下能够抵抗交易成本,但是所剩下的超额收益已经不多。
本社区仅针对特定人员开放
查看需注册登录并通过风险意识测评
5秒后跳转登录页面...
移动端课程