请 [注册] 或 [登录]  | 返回主站

量化交易吧 /  量化平台 帖子:3366781 新帖:20

多因子回测框架(下)--检验因子

Tango发表于:8 月 6 日 11:34回复(1)

从IC,IC_IR,分组收益,超额收益几个角度统计因子有效性。

多因子回测框架(上)--生成因子

# 盘古开天地-load数据
import datetime
import jqdata
import datetime
from multiprocessing.dummy import Pool as ThreadPool
from jqfactor import Factor,calc_factors
import pandas as pd
import statsmodels.api as sm
import scipy.stats as st
import pickle
pkl_file = open('Z1Package.pkl', 'rb')
load_Package = pickle.load(pkl_file)
univ_dict,return_df,all_return_df,all_factor_dict,MC_df,all_industry_df=load_Package
/opt/conda/envs/python3new/lib/python3.6/site-packages/statsmodels/compat/pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
  from pandas.core import datetools
# 1.回测基础数据计算
def all_Group_Return_calculator(factor,univ_dict,all_return_df,GroupNum=10):
    all_date_list=list(all_return_df.index)
    date_list=list(univ_dict.keys())
    all_Group_Ret_df=pd.DataFrame(index=all_date_list,columns=list(np.array(range(GroupNum))))
    for n in range(len(date_list)-1):
        start=date_list[n]
        end=date_list[n+1]
        univ=univ_dict[start]
        univ=set(univ)&set(factor.loc[start].dropna().index)
        factor_se_stock=list(factor.loc[start,univ].dropna().sort_values().index)
        N=len(factor_se_stock)
        for i in range(GroupNum):
            group_stock=factor_se_stock[int(N/GroupNum*i):int(N/GroupNum*(i+1))]
            #我他妈就是个天才
            cumret=(all_return_df.loc[start:end,group_stock]+1).cumprod().mean(axis=1)
            all_Group_Ret_df.loc[start:end,i]=cumret.shift(1).fillna(1).pct_change().shift(-1)
            #(((all_return_df.loc[start:end,group_stock]+1).cumprod()-1).mean(axis=1)+1).pct_change().shift(-1)
    all_Group_Ret_df=all_Group_Ret_df[date_list[0]:].shift(1).fillna(0)
    return all_Group_Ret_df

def Group_Return_calculator(factor,univ_dict,return_df,GroupNum=10):
    GroupRet_df=pd.DataFrame(index=list(list(univ_dict.keys())),columns=list(np.array(range(GroupNum))))
    for date in list(univ_dict.keys()):    #这个也是个循环
        univ=univ_dict[date]
        univ=list(set(univ)&set(factor.loc[date].dropna().index)&set(return_df.loc[date].dropna().index))
        factor_se_stock=list(factor.loc[date,univ].sort_values().index)
        N=len(factor_se_stock)
        for i in range(GroupNum):
            group_stock=factor_se_stock[int(N/GroupNum*i):int(N/GroupNum*(i+1))]
            GroupRet_df.loc[date,i]=return_df.loc[date,group_stock].mean()
    return GroupRet_df.shift(1).fillna(0)

def ic_calculator(factor,return_df,univ_dict):
    ic_list=[]
    p_value_list=[]
    for date in list(univ_dict.keys()):   #这里是循环
        univ=univ_dict[date]
        univ=list(set(univ)&set(factor.loc[date].dropna().index)&set(return_df.loc[date].dropna().index))
        if len(univ)<10:
            continue
        factor_se=factor.loc[date,univ]
        return_se=return_df.loc[date,univ]
        ic,p_value=st.spearmanr(factor_se,return_se)
        ic_list.append(ic)
        p_value_list.append(p_value)
    return ic_list

def get_index_return(univ_dict,index,count=250):
    trade_date_list=list(univ_dict.keys())
    date=max(trade_date_list)
    price=get_price(index,end_date=date,count=count,fields=['close'])['close']
    price_return=price.loc[trade_date_list[0]:].pct_change().fillna(0)
    price_return_by_tradeday=price.loc[trade_date_list].pct_change().fillna(0)
    return price_return,price_return_by_tradeday

def effect_test(univ_dict,key,group_return,group_excess_return):

    daylength=(list(univ_dict.keys())[-1]-list(univ_dict.keys())[0]).days
    annual_return=np.power(cumprod(group_return+1).iloc[-1,:],365/daylength)
    index_annual_return=np.power((index_return+1).cumprod().iloc[-1],365/daylength)

    # Test One: 组合序列与组合收益的相关性,相关性大于0.5
    sequence=pd.Series(np.array(range(10)))
    test_one_corr=annual_return.corr(sequence)
    test_one_passgrade=0.5
    test_one_pass=abs(test_one_corr)>test_one_passgrade
    
    if test_one_corr<0:
        wingroup,losegroup=0,9
    else:
        wingroup,losegroup=9,0
        
    # Test Two: 赢家组合明显跑赢市场,输家组合明显跑输市场,程度大于5%     
    test_two_passgrade=0.05
    test_two_win_pass=annual_return[wingroup]-index_annual_return>test_two_passgrade
    test_two_lose_pass=index_annual_return-annual_return[losegroup]>test_two_passgrade
    test_two_pass=test_two_win_pass&test_two_lose_pass

    # Test Tree: 高收益组合跑赢基准的概率,低收益组合跑赢基准的概率,概率大小0.5
    test_three_grade=0.5
    test_three_win_pass=(group_excess_return[wingroup]>0).sum()/len(group_excess_return[wingroup])>0.5
    test_three_lose_pass=(group_excess_return[losegroup]<0).sum()/len(group_excess_return[losegroup])>0.5
    test_three_pass=test_three_win_pass&test_three_lose_pass

    return [test_one_pass,test_two_win_pass,test_two_lose_pass,test_three_win_pass,test_three_lose_pass]

#2. 计算绩效
def plot_nav(all_return_df,index_return,key):
# Preallocate figures
    fig = plt.figure(figsize=(12,12))
    fig.set_facecolor('white')
    fig.set_tight_layout(True)
    ax1 = fig.add_subplot(211)
    ax2 = fig.add_subplot(212)
    ax1.grid()
    ax2.grid()
    ax1.set_ylabel(u"净值", fontsize=16)
    ax2.set_ylabel(u"对冲净值", fontsize=16)
    ax1.set_title(u"因子选股 - 净值走势",fontsize=16)
    ax2.set_title(u"因子选股 - 对冲指数后净值走势", fontsize=16)
# preallocate data    
    date=list(all_return_df.index)
    sequence=all_return_df.columns
# plot nav
    for sq in sequence:
        nav=(1+all_return_df[sq]).cumprod()
        nav_excess=(1+all_return_df[sq]-index_return).cumprod()
        ax1.plot(date,nav,label=str(sq))
        ax2.plot(date,nav_excess,label=str(sq))
    ax1.legend(loc=0,fontsize=12)
    ax2.legend(loc=0,fontsize=12)
    
def polish(x):
    return '%.2f%%' % (x*100)

def result_stats(key,all_return_df,index_return):  

    # Preallocate result DataFrame
    sequences=all_return_df.columns

    cols = [(u'风险指标', u'Alpha'), (u'风险指标', u'Beta'), (u'风险指标', u'信息比率'), (u'风险指标', u'夏普比率'),
            (u'纯多头', u'年化收益'), (u'纯多头', u'最大回撤'), (u'纯多头', u'收益波动率'), 
            (u'对冲后', u'年化收益'), (u'对冲后', u'最大回撤'), (u'对冲后', u'收益波动率')]
    columns = pd.MultiIndex.from_tuples(cols)
    result_df = pd.DataFrame(index = sequences,columns=columns)
    result_df.index.name = "%s" % (key)

    for sq in sequences:  #循环在这里开始

        # 净值
        return_data=all_return_df[sq]
        return_data_excess=return_data-index_return
        nav=(1+return_data).cumprod()
        nav_excess=(1+return_data_excess).cumprod()
        nav_index=(1+index_return).cumprod()

        # Beta
        beta=return_data.corr(index_return)*return_data.std()/index_return.std()
        beta_excess=return_data_excess.corr(index_return)*return_data_excess.std()/index_return.std()

        #年化收益
        daylength=(return_data.index[-1]-return_data.index[0]).days
        yearly_return=np.power(nav.iloc[-1],1.0*365/daylength)-1
        yearly_return_excess=np.power(nav_excess.iloc[-1],1.0*365/daylength)-1
        yearly_index_return=np.power(nav_index.iloc[-1],1.0*365/daylength)-1

        # 最大回撤 其实这个完全看不懂
        max_drawdown=max([1-v/max(1,max(nav.iloc[:i+1])) for i,v in enumerate(nav)])
        max_drawdown_excess=max([1-v/max(1,max(nav_excess.iloc[:i+1])) for i,v in enumerate(nav_excess)])

        # 波动率
        vol=return_data.std()*sqrt(252)
        vol_excess=return_data_excess.std()*sqrt(252)

        # Alpha
        rf=0.04
        alpha=yearly_return-(rf+beta*(yearly_return-yearly_index_return))
        alpha_excess=yearly_return_excess-(rf+beta_excess*(yearly_return-yearly_index_return))

        # 信息比率
        ir=(yearly_return-yearly_index_return)/(return_data_excess.std()*sqrt(252))

        # 夏普比率
        sharpe=(yearly_return-rf)/vol

        # 美化打印

        alpha,yearly_return,max_drawdown,vol,yearly_return_excess,max_drawdown_excess,vol_excess=\
        map(polish,[alpha,yearly_return,max_drawdown,vol,yearly_return_excess,max_drawdown_excess,vol_excess])
        sharpe=round(sharpe,2)
        ir=round(ir,2)
        beta=round(ir,2)

        result_df.loc[sq]=[alpha,beta,ir,sharpe,yearly_return,max_drawdown,vol,yearly_return_excess,max_drawdown_excess,vol_excess]
    return result_df

def draw_excess_return(excess_return):
    excess_return_mean=excess_return[1:].mean()
    excess_return_mean.index = map(lambda x:int(x)+1,excess_return_mean.index)
    excess_plus=excess_return_mean[excess_return_mean>0]
    excess_minus=excess_return_mean[excess_return_mean<0]

    fig = plt.figure(figsize=(12, 6))
    fig.set_facecolor('white')
    ax1 = fig.add_subplot(111)
    ax1.bar(excess_plus.index, excess_plus.values, align='center', color='r', width=0.35)
    ax1.bar(excess_minus.index, excess_minus.values, align='center', color='g', width=0.35)
    ax1.set_xlim(left=0.5, right=len(excess_return_mean)+0.5)
    ax1.set_ylabel(u'超额收益', fontsize=16)
    ax1.set_xlabel(u'十分位分组', fontsize=16)
    ax1.set_xticks(excess_return_mean.index)
    ax1.set_xticklabels([int(x) for x in ax1.get_xticks()], fontsize=14)
    ax1.set_yticklabels([str(x*100)+'0%' for x in ax1.get_yticks()], fontsize=14)
    ax1.set_title(u"因子选股分组超额收益", fontsize=16)
    ax1.grid()
print('计算IC_IR......')
ic_list_dict={}
for key,factor in all_factor_dict.items():
    ic_list=ic_calculator(factor,return_df,univ_dict)
    ic_list_dict[key]=ic_list
# 整理结果
ic_df=pd.DataFrame(ic_list_dict,index=list(univ_dict.keys())[:-1])
ic_ir_se=ic_df.mean()/ic_df.std()

print('计算分组收益......')
GroupNum=10
all_Factor_Group_Return_dict={}
Factor_Group_Return_dict={}
for key,factor in all_factor_dict.items():
# 全return    
    all_GroupRet_df=all_Group_Return_calculator(factor,univ_dict,all_return_df,GroupNum)
    all_Factor_Group_Return_dict[key]=all_GroupRet_df
# 调仓期return    
    GroupRet_df=Group_Return_calculator(factor,univ_dict,return_df,GroupNum)   
    Factor_Group_Return_dict[key]=GroupRet_df
    
print('计算指数收益......')
index='000300.XSHG'
index_return,index_return_by_tradeday=get_index_return(univ_dict,index)
Factor_Group_Excess_Return_dict={}
for key,group_return in Factor_Group_Return_dict.items():
    Factor_Group_Excess_Return_dict[key]=group_return.subtract(index_return_by_tradeday,axis=0)

print('因子有效性测试......')
effect_test_dict={}
for key,group_return in Factor_Group_Return_dict.items():
    group_excess_return=Factor_Group_Excess_Return_dict[key]   
    effect_test_dict[key]=effect_test(univ_dict,key,group_return,group_excess_return)
    
#----------有效因子列表-----------
effect_factor_list=[]
for key,effect in effect_test_dict.items():
    if all(effect):
        effect_factor_list.append(key)
effect_factor_list
#------------有效因子-------------
effect_factor_dict={key:value for key,value in all_factor_dict.items() if key in effect_factor_list}

print('完成')
计算IC_IR......
计算分组收益......
计算指数收益......
因子有效性测试......
完成
EffectTestresult=pd.concat([ic_ir_se.to_frame('a'),pd.DataFrame(effect_test_dict).T],axis=1)
columns=[['ICIR','测试一', '测试二', '测试二', '测试三', '测试三', ], [' ', ' ', '胜者组','败者组','胜者组','败者组']]
EffectTestresult.columns=columns
EffectTestresult
ICIR 测试一 测试二 测试三
胜者组 败者组 胜者组 败者组
AQI -1.372226 True True True True True
DSRI -0.165384 False False True False True
GMI -0.089589 False False True True True
LVGI 0.358615 False True False False True
SGAI 0.425046 False False False False False
SGI 0.153619 True False True False True
TATA -0.213534 False False False False False
#for key,factor in effect_factor_dict.items():
key='AQI'
plot_nav(all_Factor_Group_Return_dict[key],index_return,key)  
/opt/conda/envs/python3new/lib/python3.6/site-packages/matplotlib/figure.py:1743: UserWarning: This figure includes Axes that are not compatible with tight_layout, so its results might be incorrect.
  warnings.warn("This figure includes Axes that are not "
#for key,factor in effect_factor_dict.items():
key='AQI'
result_df=result_stats(key,all_Factor_Group_Return_dict[key],index_return)
result_df
风险指标 纯多头 对冲后
Alpha Beta 信息比率 夏普比率 年化收益 最大回撤 收益波动率 年化收益 最大回撤 收益波动率
AQI
0 -9.06% 1.46 1.46 0.36 10.88% 14.90% 19.08% 17.21% 11.70% 11.22%
1 -9.85% -0.43 -0.43 -0.8 -9.06% 24.32% 16.31% -4.07% 9.79% 8.45%
2 -9.29% 0.26 0.26 -0.45 -3.42% 20.17% 16.54% 1.96% 7.43% 7.95%
3 -9.84% -0.4 -0.4 -0.81 -8.39% 21.50% 15.39% -3.42% 11.02% 7.28%
4 -10.01% -0.64 -0.64 -0.9 -10.19% 19.36% 15.68% -5.28% 12.88% 7.39%
5 -10.46% -1.17 -1.17 -1.14 -13.24% 23.08% 15.18% -8.52% 12.20% 6.68%
6 -10.62% -0.86 -0.86 -1.07 -13.35% 25.46% 16.15% -8.68% 16.11% 9.19%
7 -11.25% -1.25 -1.25 -1.31 -17.53% 25.81% 16.39% -13.08% 20.16% 9.64%
8 -13.13% -2.56 -2.56 -2.05 -28.40% 35.27% 15.84% -24.55% 27.16% 8.98%
9 -9.63% -1.01 -1.01 -1.05 -15.32% 30.95% 18.34% -10.46% 16.39% 9.75%
#for key,factor in effect_factor_dict.items():
key='AQI'
draw_excess_return(Factor_Group_Excess_Return_dict[key])
 

全部回复

0/140

达人推荐

量化课程

    移动端课程