从IC,IC_IR,分组收益,超额收益几个角度统计因子有效性。
多因子回测框架(上)--生成因子
# 盘古开天地-load数据
import datetime
import jqdata
import datetime
from multiprocessing.dummy import Pool as ThreadPool
from jqfactor import Factor,calc_factors
import pandas as pd
import statsmodels.api as sm
import scipy.stats as st
import pickle
pkl_file = open('Z1Package.pkl', 'rb')
load_Package = pickle.load(pkl_file)
univ_dict,return_df,all_return_df,all_factor_dict,MC_df,all_industry_df=load_Package
# 1.回测基础数据计算
def all_Group_Return_calculator(factor,univ_dict,all_return_df,GroupNum=10):
all_date_list=list(all_return_df.index)
date_list=list(univ_dict.keys())
all_Group_Ret_df=pd.DataFrame(index=all_date_list,columns=list(np.array(range(GroupNum))))
for n in range(len(date_list)-1):
start=date_list[n]
end=date_list[n+1]
univ=univ_dict[start]
univ=set(univ)&set(factor.loc[start].dropna().index)
factor_se_stock=list(factor.loc[start,univ].dropna().sort_values().index)
N=len(factor_se_stock)
for i in range(GroupNum):
group_stock=factor_se_stock[int(N/GroupNum*i):int(N/GroupNum*(i+1))]
#我他妈就是个天才
cumret=(all_return_df.loc[start:end,group_stock]+1).cumprod().mean(axis=1)
all_Group_Ret_df.loc[start:end,i]=cumret.shift(1).fillna(1).pct_change().shift(-1)
#(((all_return_df.loc[start:end,group_stock]+1).cumprod()-1).mean(axis=1)+1).pct_change().shift(-1)
all_Group_Ret_df=all_Group_Ret_df[date_list[0]:].shift(1).fillna(0)
return all_Group_Ret_df
def Group_Return_calculator(factor,univ_dict,return_df,GroupNum=10):
GroupRet_df=pd.DataFrame(index=list(list(univ_dict.keys())),columns=list(np.array(range(GroupNum))))
for date in list(univ_dict.keys()): #这个也是个循环
univ=univ_dict[date]
univ=list(set(univ)&set(factor.loc[date].dropna().index)&set(return_df.loc[date].dropna().index))
factor_se_stock=list(factor.loc[date,univ].sort_values().index)
N=len(factor_se_stock)
for i in range(GroupNum):
group_stock=factor_se_stock[int(N/GroupNum*i):int(N/GroupNum*(i+1))]
GroupRet_df.loc[date,i]=return_df.loc[date,group_stock].mean()
return GroupRet_df.shift(1).fillna(0)
def ic_calculator(factor,return_df,univ_dict):
ic_list=[]
p_value_list=[]
for date in list(univ_dict.keys()): #这里是循环
univ=univ_dict[date]
univ=list(set(univ)&set(factor.loc[date].dropna().index)&set(return_df.loc[date].dropna().index))
if len(univ)<10:
continue
factor_se=factor.loc[date,univ]
return_se=return_df.loc[date,univ]
ic,p_value=st.spearmanr(factor_se,return_se)
ic_list.append(ic)
p_value_list.append(p_value)
return ic_list
def get_index_return(univ_dict,index,count=250):
trade_date_list=list(univ_dict.keys())
date=max(trade_date_list)
price=get_price(index,end_date=date,count=count,fields=['close'])['close']
price_return=price.loc[trade_date_list[0]:].pct_change().fillna(0)
price_return_by_tradeday=price.loc[trade_date_list].pct_change().fillna(0)
return price_return,price_return_by_tradeday
def effect_test(univ_dict,key,group_return,group_excess_return):
daylength=(list(univ_dict.keys())[-1]-list(univ_dict.keys())[0]).days
annual_return=np.power(cumprod(group_return+1).iloc[-1,:],365/daylength)
index_annual_return=np.power((index_return+1).cumprod().iloc[-1],365/daylength)
# Test One: 组合序列与组合收益的相关性,相关性大于0.5
sequence=pd.Series(np.array(range(10)))
test_one_corr=annual_return.corr(sequence)
test_one_passgrade=0.5
test_one_pass=abs(test_one_corr)>test_one_passgrade
if test_one_corr<0:
wingroup,losegroup=0,9
else:
wingroup,losegroup=9,0
# Test Two: 赢家组合明显跑赢市场,输家组合明显跑输市场,程度大于5%
test_two_passgrade=0.05
test_two_win_pass=annual_return[wingroup]-index_annual_return>test_two_passgrade
test_two_lose_pass=index_annual_return-annual_return[losegroup]>test_two_passgrade
test_two_pass=test_two_win_pass&test_two_lose_pass
# Test Tree: 高收益组合跑赢基准的概率,低收益组合跑赢基准的概率,概率大小0.5
test_three_grade=0.5
test_three_win_pass=(group_excess_return[wingroup]>0).sum()/len(group_excess_return[wingroup])>0.5
test_three_lose_pass=(group_excess_return[losegroup]<0).sum()/len(group_excess_return[losegroup])>0.5
test_three_pass=test_three_win_pass&test_three_lose_pass
return [test_one_pass,test_two_win_pass,test_two_lose_pass,test_three_win_pass,test_three_lose_pass]
#2. 计算绩效
def plot_nav(all_return_df,index_return,key):
# Preallocate figures
fig = plt.figure(figsize=(12,12))
fig.set_facecolor('white')
fig.set_tight_layout(True)
ax1 = fig.add_subplot(211)
ax2 = fig.add_subplot(212)
ax1.grid()
ax2.grid()
ax1.set_ylabel(u"净值", fontsize=16)
ax2.set_ylabel(u"对冲净值", fontsize=16)
ax1.set_title(u"因子选股 - 净值走势",fontsize=16)
ax2.set_title(u"因子选股 - 对冲指数后净值走势", fontsize=16)
# preallocate data
date=list(all_return_df.index)
sequence=all_return_df.columns
# plot nav
for sq in sequence:
nav=(1+all_return_df[sq]).cumprod()
nav_excess=(1+all_return_df[sq]-index_return).cumprod()
ax1.plot(date,nav,label=str(sq))
ax2.plot(date,nav_excess,label=str(sq))
ax1.legend(loc=0,fontsize=12)
ax2.legend(loc=0,fontsize=12)
def polish(x):
return '%.2f%%' % (x*100)
def result_stats(key,all_return_df,index_return):
# Preallocate result DataFrame
sequences=all_return_df.columns
cols = [(u'风险指标', u'Alpha'), (u'风险指标', u'Beta'), (u'风险指标', u'信息比率'), (u'风险指标', u'夏普比率'),
(u'纯多头', u'年化收益'), (u'纯多头', u'最大回撤'), (u'纯多头', u'收益波动率'),
(u'对冲后', u'年化收益'), (u'对冲后', u'最大回撤'), (u'对冲后', u'收益波动率')]
columns = pd.MultiIndex.from_tuples(cols)
result_df = pd.DataFrame(index = sequences,columns=columns)
result_df.index.name = "%s" % (key)
for sq in sequences: #循环在这里开始
# 净值
return_data=all_return_df[sq]
return_data_excess=return_data-index_return
nav=(1+return_data).cumprod()
nav_excess=(1+return_data_excess).cumprod()
nav_index=(1+index_return).cumprod()
# Beta
beta=return_data.corr(index_return)*return_data.std()/index_return.std()
beta_excess=return_data_excess.corr(index_return)*return_data_excess.std()/index_return.std()
#年化收益
daylength=(return_data.index[-1]-return_data.index[0]).days
yearly_return=np.power(nav.iloc[-1],1.0*365/daylength)-1
yearly_return_excess=np.power(nav_excess.iloc[-1],1.0*365/daylength)-1
yearly_index_return=np.power(nav_index.iloc[-1],1.0*365/daylength)-1
# 最大回撤 其实这个完全看不懂
max_drawdown=max([1-v/max(1,max(nav.iloc[:i+1])) for i,v in enumerate(nav)])
max_drawdown_excess=max([1-v/max(1,max(nav_excess.iloc[:i+1])) for i,v in enumerate(nav_excess)])
# 波动率
vol=return_data.std()*sqrt(252)
vol_excess=return_data_excess.std()*sqrt(252)
# Alpha
rf=0.04
alpha=yearly_return-(rf+beta*(yearly_return-yearly_index_return))
alpha_excess=yearly_return_excess-(rf+beta_excess*(yearly_return-yearly_index_return))
# 信息比率
ir=(yearly_return-yearly_index_return)/(return_data_excess.std()*sqrt(252))
# 夏普比率
sharpe=(yearly_return-rf)/vol
# 美化打印
alpha,yearly_return,max_drawdown,vol,yearly_return_excess,max_drawdown_excess,vol_excess=\
map(polish,[alpha,yearly_return,max_drawdown,vol,yearly_return_excess,max_drawdown_excess,vol_excess])
sharpe=round(sharpe,2)
ir=round(ir,2)
beta=round(ir,2)
result_df.loc[sq]=[alpha,beta,ir,sharpe,yearly_return,max_drawdown,vol,yearly_return_excess,max_drawdown_excess,vol_excess]
return result_df
def draw_excess_return(excess_return):
excess_return_mean=excess_return[1:].mean()
excess_return_mean.index = map(lambda x:int(x)+1,excess_return_mean.index)
excess_plus=excess_return_mean[excess_return_mean>0]
excess_minus=excess_return_mean[excess_return_mean<0]
fig = plt.figure(figsize=(12, 6))
fig.set_facecolor('white')
ax1 = fig.add_subplot(111)
ax1.bar(excess_plus.index, excess_plus.values, align='center', color='r', width=0.35)
ax1.bar(excess_minus.index, excess_minus.values, align='center', color='g', width=0.35)
ax1.set_xlim(left=0.5, right=len(excess_return_mean)+0.5)
ax1.set_ylabel(u'超额收益', fontsize=16)
ax1.set_xlabel(u'十分位分组', fontsize=16)
ax1.set_xticks(excess_return_mean.index)
ax1.set_xticklabels([int(x) for x in ax1.get_xticks()], fontsize=14)
ax1.set_yticklabels([str(x*100)+'0%' for x in ax1.get_yticks()], fontsize=14)
ax1.set_title(u"因子选股分组超额收益", fontsize=16)
ax1.grid()
print('计算IC_IR......')
ic_list_dict={}
for key,factor in all_factor_dict.items():
ic_list=ic_calculator(factor,return_df,univ_dict)
ic_list_dict[key]=ic_list
# 整理结果
ic_df=pd.DataFrame(ic_list_dict,index=list(univ_dict.keys())[:-1])
ic_ir_se=ic_df.mean()/ic_df.std()
print('计算分组收益......')
GroupNum=10
all_Factor_Group_Return_dict={}
Factor_Group_Return_dict={}
for key,factor in all_factor_dict.items():
# 全return
all_GroupRet_df=all_Group_Return_calculator(factor,univ_dict,all_return_df,GroupNum)
all_Factor_Group_Return_dict[key]=all_GroupRet_df
# 调仓期return
GroupRet_df=Group_Return_calculator(factor,univ_dict,return_df,GroupNum)
Factor_Group_Return_dict[key]=GroupRet_df
print('计算指数收益......')
index='000300.XSHG'
index_return,index_return_by_tradeday=get_index_return(univ_dict,index)
Factor_Group_Excess_Return_dict={}
for key,group_return in Factor_Group_Return_dict.items():
Factor_Group_Excess_Return_dict[key]=group_return.subtract(index_return_by_tradeday,axis=0)
print('因子有效性测试......')
effect_test_dict={}
for key,group_return in Factor_Group_Return_dict.items():
group_excess_return=Factor_Group_Excess_Return_dict[key]
effect_test_dict[key]=effect_test(univ_dict,key,group_return,group_excess_return)
#----------有效因子列表-----------
effect_factor_list=[]
for key,effect in effect_test_dict.items():
if all(effect):
effect_factor_list.append(key)
effect_factor_list
#------------有效因子-------------
effect_factor_dict={key:value for key,value in all_factor_dict.items() if key in effect_factor_list}
print('完成')
EffectTestresult=pd.concat([ic_ir_se.to_frame('a'),pd.DataFrame(effect_test_dict).T],axis=1)
columns=[['ICIR','测试一', '测试二', '测试二', '测试三', '测试三', ], [' ', ' ', '胜者组','败者组','胜者组','败者组']]
EffectTestresult.columns=columns
EffectTestresult
#for key,factor in effect_factor_dict.items():
key='AQI'
plot_nav(all_Factor_Group_Return_dict[key],index_return,key)
#for key,factor in effect_factor_dict.items():
key='AQI'
result_df=result_stats(key,all_Factor_Group_Return_dict[key],index_return)
result_df
#for key,factor in effect_factor_dict.items():
key='AQI'
draw_excess_return(Factor_Group_Excess_Return_dict[key])