# 盘古开天地-load数据
import datetime
import jqdata
import datetime
from multiprocessing.dummy import Pool as ThreadPool
from jqfactor import Factor,calc_factors
import pandas as pd
import statsmodels.api as sm
import scipy.stats as st
import pickle
pkl_file = open('Z1Package.pkl', 'rb')
load_Package = pickle.load(pkl_file)
univ_dict,return_df,all_return_df,all_factor_dict,MC_df,all_industry_df=load_Package

/opt/conda/envs/python3new/lib/python3.6/site-packages/statsmodels/compat/pandas.py:56: FutureWarning: The pandas.core.datetools module is deprecated and will be removed in a future version. Please use the pandas.tseries module instead.
  from pandas.core import datetools

# 1.回测基础数据计算
def all_Group_Return_calculator(factor,univ_dict,all_return_df,GroupNum=10):
    all_date_list=list(all_return_df.index)
    date_list=list(univ_dict.keys())
    all_Group_Ret_df=pd.DataFrame(index=all_date_list,columns=list(np.array(range(GroupNum))))
    for n in range(len(date_list)-1):
        start=date_list[n]
        end=date_list[n+1]
        univ=univ_dict[start]
        univ=set(univ)&set(factor.loc[start].dropna().index)
        factor_se_stock=list(factor.loc[start,univ].dropna().sort_values().index)
        N=len(factor_se_stock)
        for i in range(GroupNum):
            group_stock=factor_se_stock[int(N/GroupNum*i):int(N/GroupNum*(i+1))]
            #我他妈就是个天才
            cumret=(all_return_df.loc[start:end,group_stock]+1).cumprod().mean(axis=1)
            all_Group_Ret_df.loc[start:end,i]=cumret.shift(1).fillna(1).pct_change().shift(-1)
            #(((all_return_df.loc[start:end,group_stock]+1).cumprod()-1).mean(axis=1)+1).pct_change().shift(-1)
    all_Group_Ret_df=all_Group_Ret_df[date_list[0]:].shift(1).fillna(0)
    return all_Group_Ret_df

def Group_Return_calculator(factor,univ_dict,return_df,GroupNum=10):
    GroupRet_df=pd.DataFrame(index=list(list(univ_dict.keys())),columns=list(np.array(range(GroupNum))))
    for date in list(univ_dict.keys()):    #这个也是个循环
        univ=univ_dict[date]
        univ=list(set(univ)&set(factor.loc[date].dropna().index)&set(return_df.loc[date].dropna().index))
        factor_se_stock=list(factor.loc[date,univ].sort_values().index)
        N=len(factor_se_stock)
        for i in range(GroupNum):
            group_stock=factor_se_stock[int(N/GroupNum*i):int(N/GroupNum*(i+1))]
            GroupRet_df.loc[date,i]=return_df.loc[date,group_stock].mean()
    return GroupRet_df.shift(1).fillna(0)

def ic_calculator(factor,return_df,univ_dict):
    ic_list=[]
    p_value_list=[]
    for date in list(univ_dict.keys()):   #这里是循环
        univ=univ_dict[date]
        univ=list(set(univ)&set(factor.loc[date].dropna().index)&set(return_df.loc[date].dropna().index))
        if len(univ)<10:
            continue
        factor_se=factor.loc[date,univ]
        return_se=return_df.loc[date,univ]
        ic,p_value=st.spearmanr(factor_se,return_se)
        ic_list.append(ic)
        p_value_list.append(p_value)
    return ic_list

def get_index_return(univ_dict,index,count=250):
    trade_date_list=list(univ_dict.keys())
    date=max(trade_date_list)
    price=get_price(index,end_date=date,count=count,fields=['close'])['close']
    price_return=price.loc[trade_date_list[0]:].pct_change().fillna(0)
    price_return_by_tradeday=price.loc[trade_date_list].pct_change().fillna(0)
    return price_return,price_return_by_tradeday

def effect_test(univ_dict,key,group_return,group_excess_return):

    daylength=(list(univ_dict.keys())[-1]-list(univ_dict.keys())[0]).days
    annual_return=np.power(cumprod(group_return+1).iloc[-1,:],365/daylength)
    index_annual_return=np.power((index_return+1).cumprod().iloc[-1],365/daylength)

    # Test One: 组合序列与组合收益的相关性，相关性大于0.5
    sequence=pd.Series(np.array(range(10)))
    test_one_corr=annual_return.corr(sequence)
    test_one_passgrade=0.5
    test_one_pass=abs(test_one_corr)>test_one_passgrade
    
    if test_one_corr<0:
        wingroup,losegroup=0,9
    else:
        wingroup,losegroup=9,0
        
    # Test Two: 赢家组合明显跑赢市场，输家组合明显跑输市场，程度大于5%     
    test_two_passgrade=0.05
    test_two_win_pass=annual_return[wingroup]-index_annual_return>test_two_passgrade
    test_two_lose_pass=index_annual_return-annual_return[losegroup]>test_two_passgrade
    test_two_pass=test_two_win_pass&test_two_lose_pass

    # Test Tree: 高收益组合跑赢基准的概率，低收益组合跑赢基准的概率，概率大小0.5
    test_three_grade=0.5
    test_three_win_pass=(group_excess_return[wingroup]>0).sum()/len(group_excess_return[wingroup])>0.5
    test_three_lose_pass=(group_excess_return[losegroup]<0).sum()/len(group_excess_return[losegroup])>0.5
    test_three_pass=test_three_win_pass&test_three_lose_pass

    return [test_one_pass,test_two_win_pass,test_two_lose_pass,test_three_win_pass,test_three_lose_pass]

#2. 计算绩效
def plot_nav(all_return_df,index_return,key):
# Preallocate figures
    fig = plt.figure(figsize=(12,12))
    fig.set_facecolor('white')
    fig.set_tight_layout(True)
    ax1 = fig.add_subplot(211)
    ax2 = fig.add_subplot(212)
    ax1.grid()
    ax2.grid()
    ax1.set_ylabel(u"净值", fontsize=16)
    ax2.set_ylabel(u"对冲净值", fontsize=16)
    ax1.set_title(u"因子选股 - 净值走势",fontsize=16)
    ax2.set_title(u"因子选股 - 对冲指数后净值走势", fontsize=16)
# preallocate data    
    date=list(all_return_df.index)
    sequence=all_return_df.columns
# plot nav
    for sq in sequence:
        nav=(1+all_return_df[sq]).cumprod()
        nav_excess=(1+all_return_df[sq]-index_return).cumprod()
        ax1.plot(date,nav,label=str(sq))
        ax2.plot(date,nav_excess,label=str(sq))
    ax1.legend(loc=0,fontsize=12)
    ax2.legend(loc=0,fontsize=12)
    
def polish(x):
    return '%.2f%%' % (x*100)

def result_stats(key,all_return_df,index_return):  

    # Preallocate result DataFrame
    sequences=all_return_df.columns

    cols = [(u'风险指标', u'Alpha'), (u'风险指标', u'Beta'), (u'风险指标', u'信息比率'), (u'风险指标', u'夏普比率'),
            (u'纯多头', u'年化收益'), (u'纯多头', u'最大回撤'), (u'纯多头', u'收益波动率'), 
            (u'对冲后', u'年化收益'), (u'对冲后', u'最大回撤'), (u'对冲后', u'收益波动率')]
    columns = pd.MultiIndex.from_tuples(cols)
    result_df = pd.DataFrame(index = sequences,columns=columns)
    result_df.index.name = "%s" % (key)

    for sq in sequences:  #循环在这里开始

        # 净值
        return_data=all_return_df[sq]
        return_data_excess=return_data-index_return
        nav=(1+return_data).cumprod()
        nav_excess=(1+return_data_excess).cumprod()
        nav_index=(1+index_return).cumprod()

        # Beta
        beta=return_data.corr(index_return)*return_data.std()/index_return.std()
        beta_excess=return_data_excess.corr(index_return)*return_data_excess.std()/index_return.std()

        #年化收益
        daylength=(return_data.index[-1]-return_data.index[0]).days
        yearly_return=np.power(nav.iloc[-1],1.0*365/daylength)-1
        yearly_return_excess=np.power(nav_excess.iloc[-1],1.0*365/daylength)-1
        yearly_index_return=np.power(nav_index.iloc[-1],1.0*365/daylength)-1

        # 最大回撤 其实这个完全看不懂
        max_drawdown=max([1-v/max(1,max(nav.iloc[:i+1])) for i,v in enumerate(nav)])
        max_drawdown_excess=max([1-v/max(1,max(nav_excess.iloc[:i+1])) for i,v in enumerate(nav_excess)])

        # 波动率
        vol=return_data.std()*sqrt(252)
        vol_excess=return_data_excess.std()*sqrt(252)

        # Alpha
        rf=0.04
        alpha=yearly_return-(rf+beta*(yearly_return-yearly_index_return))
        alpha_excess=yearly_return_excess-(rf+beta_excess*(yearly_return-yearly_index_return))

        # 信息比率
        ir=(yearly_return-yearly_index_return)/(return_data_excess.std()*sqrt(252))

        # 夏普比率
        sharpe=(yearly_return-rf)/vol

        # 美化打印

        alpha,yearly_return,max_drawdown,vol,yearly_return_excess,max_drawdown_excess,vol_excess=\
        map(polish,[alpha,yearly_return,max_drawdown,vol,yearly_return_excess,max_drawdown_excess,vol_excess])
        sharpe=round(sharpe,2)
        ir=round(ir,2)
        beta=round(ir,2)

        result_df.loc[sq]=[alpha,beta,ir,sharpe,yearly_return,max_drawdown,vol,yearly_return_excess,max_drawdown_excess,vol_excess]
    return result_df

def draw_excess_return(excess_return):
    excess_return_mean=excess_return[1:].mean()
    excess_return_mean.index = map(lambda x:int(x)+1,excess_return_mean.index)
    excess_plus=excess_return_mean[excess_return_mean>0]
    excess_minus=excess_return_mean[excess_return_mean<0]

    fig = plt.figure(figsize=(12, 6))
    fig.set_facecolor('white')
    ax1 = fig.add_subplot(111)
    ax1.bar(excess_plus.index, excess_plus.values, align='center', color='r', width=0.35)
    ax1.bar(excess_minus.index, excess_minus.values, align='center', color='g', width=0.35)
    ax1.set_xlim(left=0.5, right=len(excess_return_mean)+0.5)
    ax1.set_ylabel(u'超额收益', fontsize=16)
    ax1.set_xlabel(u'十分位分组', fontsize=16)
    ax1.set_xticks(excess_return_mean.index)
    ax1.set_xticklabels([int(x) for x in ax1.get_xticks()], fontsize=14)
    ax1.set_yticklabels([str(x*100)+'0%' for x in ax1.get_yticks()], fontsize=14)
    ax1.set_title(u"因子选股分组超额收益", fontsize=16)
    ax1.grid()

print('计算IC_IR......')
ic_list_dict={}
for key,factor in all_factor_dict.items():
    ic_list=ic_calculator(factor,return_df,univ_dict)
    ic_list_dict[key]=ic_list
# 整理结果
ic_df=pd.DataFrame(ic_list_dict,index=list(univ_dict.keys())[:-1])
ic_ir_se=ic_df.mean()/ic_df.std()

print('计算分组收益......')
GroupNum=10
all_Factor_Group_Return_dict={}
Factor_Group_Return_dict={}
for key,factor in all_factor_dict.items():
# 全return    
    all_GroupRet_df=all_Group_Return_calculator(factor,univ_dict,all_return_df,GroupNum)
    all_Factor_Group_Return_dict[key]=all_GroupRet_df
# 调仓期return    
    GroupRet_df=Group_Return_calculator(factor,univ_dict,return_df,GroupNum)   
    Factor_Group_Return_dict[key]=GroupRet_df
    
print('计算指数收益......')
index='000300.XSHG'
index_return,index_return_by_tradeday=get_index_return(univ_dict,index)
Factor_Group_Excess_Return_dict={}
for key,group_return in Factor_Group_Return_dict.items():
    Factor_Group_Excess_Return_dict[key]=group_return.subtract(index_return_by_tradeday,axis=0)

print('因子有效性测试......')
effect_test_dict={}
for key,group_return in Factor_Group_Return_dict.items():
    group_excess_return=Factor_Group_Excess_Return_dict[key]   
    effect_test_dict[key]=effect_test(univ_dict,key,group_return,group_excess_return)
    
#----------有效因子列表-----------
effect_factor_list=[]
for key,effect in effect_test_dict.items():
    if all(effect):
        effect_factor_list.append(key)
effect_factor_list
#------------有效因子-------------
effect_factor_dict={key:value for key,value in all_factor_dict.items() if key in effect_factor_list}

print('完成')

计算IC_IR......
计算分组收益......
计算指数收益......
因子有效性测试......
完成

EffectTestresult=pd.concat([ic_ir_se.to_frame('a'),pd.DataFrame(effect_test_dict).T],axis=1)
columns=[['ICIR','测试一', '测试二', '测试二', '测试三', '测试三', ], [' ', ' ', '胜者组','败者组','胜者组','败者组']]
EffectTestresult.columns=columns
EffectTestresult

#for key,factor in effect_factor_dict.items():
key='AQI'
plot_nav(all_Factor_Group_Return_dict[key],index_return,key)

/opt/conda/envs/python3new/lib/python3.6/site-packages/matplotlib/figure.py:1743: UserWarning: This figure includes Axes that are not compatible with tight_layout, so its results might be incorrect.
  warnings.warn("This figure includes Axes that are not "

#for key,factor in effect_factor_dict.items():
key='AQI'
result_df=result_stats(key,all_Factor_Group_Return_dict[key],index_return)
result_df

#for key,factor in effect_factor_dict.items():
key='AQI'
draw_excess_return(Factor_Group_Excess_Return_dict[key])

	ICIR	测试一	测试二		测试三
			胜者组	败者组	胜者组	败者组
AQI	-1.372226	True	True	True	True	True
DSRI	-0.165384	False	False	True	False	True
GMI	-0.089589	False	False	True	True	True
LVGI	0.358615	False	True	False	False	True
SGAI	0.425046	False	False	False	False	False
SGI	0.153619	True	False	True	False	True
TATA	-0.213534	False	False	False	False	False

	风险指标				纯多头			对冲后
	Alpha	Beta	信息比率	夏普比率	年化收益	最大回撤	收益波动率	年化收益	最大回撤	收益波动率
AQI
0	-9.06%	1.46	1.46	0.36	10.88%	14.90%	19.08%	17.21%	11.70%	11.22%
1	-9.85%	-0.43	-0.43	-0.8	-9.06%	24.32%	16.31%	-4.07%	9.79%	8.45%
2	-9.29%	0.26	0.26	-0.45	-3.42%	20.17%	16.54%	1.96%	7.43%	7.95%
3	-9.84%	-0.4	-0.4	-0.81	-8.39%	21.50%	15.39%	-3.42%	11.02%	7.28%
4	-10.01%	-0.64	-0.64	-0.9	-10.19%	19.36%	15.68%	-5.28%	12.88%	7.39%
5	-10.46%	-1.17	-1.17	-1.14	-13.24%	23.08%	15.18%	-8.52%	12.20%	6.68%
6	-10.62%	-0.86	-0.86	-1.07	-13.35%	25.46%	16.15%	-8.68%	16.11%	9.19%
7	-11.25%	-1.25	-1.25	-1.31	-17.53%	25.81%	16.39%	-13.08%	20.16%	9.64%
8	-13.13%	-2.56	-2.56	-2.05	-28.40%	35.27%	15.84%	-24.55%	27.16%	8.98%
9	-9.63%	-1.01	-1.01	-1.05	-15.32%	30.95%	18.34%	-10.46%	16.39%	9.75%

量化交易吧 / 量化平台 帖子：3369513 新帖：3

多因子回测框架（下）--检验因子

Tango发表于：8 月 6 日 11：34回复(1)

全部回复

0/140

粉丝:914

帖子数:0

粉丝:734

帖子数:0

粉丝:555

帖子数:3

量化课程

热门标签

删除回复

确认要删除这篇文章么？

举报用户

信息提示

该文章已删除

设置置顶

完成设置【置顶】！

设置置顶

已取消设置【置顶】！

设置精华

完成设置【精华】！

设置精华

已取消设置【精华】！

审核信息

该文章已审核通过

审核信息

您已设置该文章审核不通过

举报成功

您已举报成功

用户登录

移动帖子

创建私信

屏蔽提示

确认要屏蔽该用户么？

屏蔽回复

您已对该用户实现屏蔽

信息回复

已发送成功

量化交易吧 / 量化平台帖子：3369513 新帖：3