看了一下文章多因子模型(三)-交易回测(策略收益90%,回撤12%)
就回测了一下,发现这里的收益更多是来源于未来函数,计算得到的当期IC。
使用上一期和当期的IC计算,有着巨大的区别。
@颖da
#第一步-因子生成
import time
import jqdata
import datetime
from multiprocessing.dummy import Pool as ThreadPool
from jqfactor import Factor,calc_factors
import pandas as pd
from pandas import Panel,DataFrame,Series
import statsmodels.api as sm
import scipy.stats as st
from jqfactor import get_factor_values
from jqfactor import winsorize,winsorize_med,neutralize,standardlize
import pickle
import xlrd # 手工输入156个因子太麻烦,所以我就在EXCEL里上传了,也可手工输入。
ExcelFile=xlrd.open_workbook('FactorTable.xlsx')
name=ExcelFile.sheet_names()
sheet=ExcelFile.sheet_by_name(name[0])
factor_quality=list(sheet.col_values(1))
factor_fundamental=list(sheet.col_values(2))[:28]
factor_mood=list(sheet.col_values(3))[:35]
factor_growth=list(sheet.col_values(4))[:8]
factor_risk=list(sheet.col_values(5))[:12]
factor_stock=list(sheet.col_values(6))[:15]
starttime=time.clock()
global g_index
global g_count
global g_factor_list
global g_univ_dict
global g_neu_factor
global g_factor_dict
g_index='000300.XSHG'
g_count=500
g_factor_list=factor_quality+factor_fundamental+factor_mood+factor_growth+factor_risk+factor_stock
g_neu_factor=factor_quality+factor_fundamental+factor_growth+factor_stock
g_factor_dict = {}
import cPickle as pickle
from six import StringIO
# 文件写入
#使用pickle模块从文件中重构python对象
content = pickle.dumps(g_factor_list) # 该方法返回字符串
write_file('JQFactorAuto/g_factor_list.pkl', content, append=False)
import cPickle as pickle
from six import StringIO
# 文件写入
#使用pickle模块从文件中重构python对象
content = pickle.dumps(g_neu_factor) # 该方法返回字符串
write_file('JQFactorAuto/g_neu_factor.pkl', content, append=False)
# 获取过去一段时间的交易日列表【间隔时间默认20个交易日】
def get_trade_dates(end,count=250,interval=20):
date_list=list(jqdata.get_trade_days(end_date=end,count=count))
date_list=date_list[::-1]
date_list=list(filter(lambda x:date_list.index(x)%interval==0,date_list))
date_list=date_list[::-1]
return date_list
# 获得股票池
def get_stock_pool(date,index='all'):
df=get_all_securities(types=['stock'],date=date)
dayBefore=jqdata.get_trade_days(end_date=date,count=60)[0] #上市不足60天
df=df[df['start_date']<dayBefore] #上市不足count天的去掉
universe_pool=list(df.index)
if index=='all':
stock_pool=universe_pool
else:
index_pool=get_index_stocks(index,date=date)
stock_pool=list(set(index_pool)&set(universe_pool))
return stock_pool
def get_stock_universe(trade_date_list,index='all'):
univ_list=[]
univ_dict={}
for date in trade_date_list:
stock_pool=get_stock_pool(date,index)
univ_list.append(stock_pool)
univ_dict[date]=stock_pool
return univ_list,univ_dict
# 获得申万一级行业
def get_Industry_by_day(date,stock_list):
industry_set = ['801010', '801020', '801030', '801040', '801050', '801080', '801110', '801120', '801130',
'801140', '801150', '801160', '801170', '801180', '801200', '801210', '801230', '801710',
'801720', '801730', '801740', '801750', '801760', '801770', '801780', '801790', '801880','801890']
industry_se = Series(name=date,index=stock_list)
for industry in industry_set:
industry_stocks = get_industry_stocks(industry,date = date)
industry_stocks = list(set(industry_stocks)&set(stock_list))
industry_se.loc[industry_stocks] = industry
return industry_se
"""def get_Industry_by_day(date):
industry_set = ['801010', '801020', '801030', '801040', '801050', '801080', '801110', '801120', '801130',
'801140', '801150', '801160', '801170', '801180', '801200', '801210', '801230', '801710',
'801720', '801730', '801740', '801750', '801760', '801770', '801780', '801790', '801880','801890']
industry_df = pd.DataFrame(index=[date],columns=g_univ_dict[date])
for industry in industry_set:
industry_stocks = get_industry_stocks(industry,date = date)
industry_stocks = list(set(industry_stocks)&set(g_univ_dict[date]))
industry_df.loc[date,industry_stocks] = industry
return industry_df
# 得到对应日期的行业数据
def get_industry_df(trade_date_list):
all_industry_df = pd.DataFrame()
for date in trade_date_list:
data = get_Industry_by_day(date)
all_industry_df = pd.concat([all_industry_df, data])
return all_industry_df
all_industry_df = get_industry_df(trade_date_list)
# 行业日期类型
all_industry_df.index = list(map(lambda x:x.strftime("%Y-%m-%d"),all_industry_df.index))
"""
# 得到对应日期的所有因子数据
def get_jq_factor_by_day(date,stock_list):
factor_dict=get_factor_values(securities=stock_list, factors=g_factor_list, start_date=date, end_date=date)
return factor_dict
# 行业空值使用中位数替换
def replace_nan_indu(factor_se,indu_se):
#factor_se = factor_se.apply(np.float)
#indu_se = indu_se.dropna().apply(np.int)
# 因子值
factor_array = factor_se.to_frame('values')
# 行业值
indu_array = indu_se.dropna().to_frame('industryName1')
# 合并
factor_array = factor_array.merge(indu_array, left_index=True, right_index=True, how='inner')
# 行业中值
mid = factor_array.groupby('industryName1').median()
factor_array = factor_array.merge(mid, left_on='industryName1', right_index=True, how='left')
# 行业中值填充缺失
factor_array['values_x'][pd.isnull(factor_array['values_x'])] = factor_array['values_y'][pd.isnull(factor_array['values_x'])]
return factor_array['values_x']
# 得到某个日期的最终因子dataframe
def get_final_factors(date):
# 得到某日因子数据
stock_list = get_stock_pool(date,index=g_index)
# 得到对应日期的所有因子数据
factor_dict = get_jq_factor_by_day(date,stock_list)
# 得到行业数据
indu_se = get_Industry_by_day(date,stock_list)
#indu_se = all_industry_df.ix[date]
# 因子数据
factor_df = DataFrame()
for fac in list(g_factor_list):
# 因子se
factor_se = factor_dict[fac].iloc[0]
# 行业空值替换
factor_se = replace_nan_indu(factor_se,indu_se)
# 去极值、中性化、标准化处理
factor_se=winsorize_med(factor_se, scale=3, inclusive=True, inf2nan=True, axis=1) # 去极值
# 如果因子在需要中性化处理的因子列表中,则进行中性化处理
if fac in g_neu_factor:
factor_se=neutralize(factor_se, how=['jq_l1', 'market_cap'], date=date, axis=1) # 中性化
factor_se=standardlize(factor_se, inf2nan=True, axis=0) # 标准化
tmp_df = factor_se.to_frame(fac)
factor_df = pd.concat([factor_df,tmp_df],axis=1)
return factor_df
def get_all_final_factors(trade_date_list):
pool=ThreadPool(processes=len(trade_date_list))
frame_list=pool.map(get_final_factors,trade_date_list)
pool.close()
pool.join()
def get_all_final_factors(trade_date_list):
for date in trade_date_list:
# 每个年份保存一个文件
year = date[:4]
# 如果不存在文件,则新建
try:
pkl_file = open('g_factor_dict_%s.pkl'%year, 'rb')
g_factor_dict_tmp = pickle.load(pkl_file)
except:
g_factor_dict_tmp = {}
# 如果已经计算过,则跳过循环
if date in g_factor_dict_tmp.keys():
print("跳过:%s"%date)
continue
# 计算
g_factor_dict_tmp[date] = get_final_factors(date)
# 保存数据
print("已完成:%s"%date)
pkl_file = open('g_factor_dict_%s.pkl'%year, 'wb')
pickle.dump(g_factor_dict_tmp,pkl_file,0)
pkl_file.close()
# 当前日期
#today=datetime.date.today()
today = '2018-11-13'
today = '2015-07-31' # 开始时间
today = '2016-03-02'
g_count=500
yesterday=jqdata.get_trade_days(end_date=today,count=2)[0]
print("today:%s"%today)
print("yesterday:%s"%yesterday)
today:2016-03-02 yesterday:2016-03-01
# 过去两年的交易日【每隔20交易日】
g_count=250*2
g_index='000300.XSHG'
trade_date_list=get_trade_dates(yesterday,g_count,20)
trade_date_list = list(map(lambda x:x.strftime("%Y-%m-%d"),trade_date_list))
trade_date_list.reverse()
trade_date_list
['2016-03-01', '2016-01-26', '2015-12-28', '2015-11-30', '2015-11-02', '2015-09-28', '2015-08-27', '2015-07-30', '2015-07-02', '2015-06-03', '2015-05-06', '2015-04-07', '2015-03-09', '2015-02-02', '2015-01-05', '2014-12-04', '2014-11-06', '2014-10-09', '2014-09-03', '2014-08-06', '2014-07-09', '2014-06-11', '2014-05-13', '2014-04-11', '2014-03-13']
starttime=time.clock()
get_all_final_factors(trade_date_list)
endtime=time.clock()
runtime=endtime-starttime
print('因子生成运行完成,用时 %.2f 秒' % runtime)
跳过:2016-03-01 跳过:2016-01-26 跳过:2015-12-28 跳过:2015-11-30 跳过:2015-11-02 跳过:2015-09-28 跳过:2015-08-27 跳过:2015-07-30 跳过:2015-07-02 跳过:2015-06-03 跳过:2015-05-06 跳过:2015-04-07 跳过:2015-03-09 跳过:2015-02-02 跳过:2015-01-05 跳过:2014-12-04 跳过:2014-11-06 跳过:2014-10-09 跳过:2014-09-03 跳过:2014-08-06 跳过:2014-07-09 跳过:2014-06-11 跳过:2014-05-13 跳过:2014-04-11 跳过:2014-03-13 因子生成运行完成,用时 17.83 秒
# 当前日期
print("today:%s"%today)
print("yesterday:%s"%yesterday)
# 过去两年的交易日【每隔20交易日】
g_count=250*2
g_index='000300.XSHG'
trade_date_list=get_trade_dates(yesterday,g_count,20)
trade_date_list = list(map(lambda x:x.strftime("%Y-%m-%d"),trade_date_list))
# 得到需要读取的年份
years = list(set(map(lambda x:x[:4],trade_date_list)))
# 读取得到字典
g_factor_dict = {}
for year in years:
pkl_file = open('g_factor_dict_%s.pkl'%year, 'rb')
g_factor_dict_tmp = pickle.load(pkl_file)
g_factor_dict.update(g_factor_dict_tmp)
# 删除不必要的日期
for k in g_factor_dict.keys():
if k not in trade_date_list:
del g_factor_dict[k]
## 因子格式转换
p = Panel(g_factor_dict)
all_factor_dict = {}
for fac in p.minor_axis:
all_factor_dict[fac] = p[:,:,fac].T
today:2016-03-02 yesterday:2016-03-01
def get_return(trade_date_list,count=250): #小概率风险:一个股票曾经是指数成分股而如今已经退市
date=max(trade_date_list)
universe=get_stock_pool(date,index='all')
price=get_price(universe,end_date=date,count=count,fields=['close'],fq='pre')['close']
price.index = list(map(lambda x:x.strftime("%Y-%m-%d"),price.index))
return_df=price.loc[trade_date_list].pct_change().shift(-1)
all_return_df=price.pct_change().shift(-1)
return return_df,all_return_df
return_df,all_return_df=get_return(trade_date_list,count=g_count) # 获得所有股票的历史回报 (all stocks)
def get_stock_universe(trade_date_list,index='all'):
univ_list=[]
univ_dict={}
for date in trade_date_list:
stock_pool=get_stock_pool(date,index)
univ_list.append(stock_pool)
univ_dict[date]=stock_pool
return univ_list,univ_dict
print('获取股票池')
univ_list,univ_dict=get_stock_universe(trade_date_list,index=g_index) # 获取股票池
获取股票池
# Step II: 因子筛选用到的函数
def ic_calculator(factor,return_df,univ_dict):
ic_list=[]
p_value_list=[]
for date in sorted(list(univ_dict.keys())): #这里是循环
univ=univ_dict[date]
univ=list(set(univ)&set(factor.loc[date].dropna().index)&set(return_df.loc[date].dropna().index))
#if len(univ)<10:
# continue
factor_se=factor.loc[date,univ]
return_se=return_df.loc[date,univ]
ic,p_value=st.spearmanr(factor_se,return_se)
ic_list.append(ic)
p_value_list.append(p_value)
return ic_list
# 1.回测基础数据计算
def all_Group_Return_calculator(factor,univ_dict,all_return_df,GroupNum=10):
all_date_list=list(all_return_df.index) # 全部日期
date_list=sorted(list(univ_dict.keys())) # 交易日
all_Group_Ret_df=pd.DataFrame(index=all_date_list,columns=list(np.array(range(GroupNum)))) #根据交易日构造dataframe
for n in range(len(date_list)-1):
start=date_list[n] # 开始日期
end=date_list[n+1] # 结束日期
univ=univ_dict[start] # 开始日期的股票池
univ=set(univ)&set(factor.loc[start].dropna().index) # 和因子池的并集
factor_se_stock=list(factor.loc[start,univ].dropna().to_frame('a').sort('a',ascending=False).index) # 排序后的因子
N=len(factor_se_stock)
for i in range(GroupNum):
group_stock=factor_se_stock[int(N/GroupNum*i):int(N/GroupNum*(i+1))]
# 下面两行是关键
cumret=(all_return_df.loc[start:end,group_stock]+1).cumprod().mean(axis=1)
all_Group_Ret_df.loc[start:end,i]=cumret.shift(1).fillna(1).pct_change().shift(-1)
#(((all_return_df.loc[start:end,group_stock]+1).cumprod()-1).mean(axis=1)+1).pct_change().shift(-1)
all_Group_Ret_df=all_Group_Ret_df[date_list[0]:].shift(1).fillna(0)
return all_Group_Ret_df
#list(factor.loc[date,univ].dropna().to_frame('a').sort('a',ascending=False).index)
# 分组收益计算
def Group_Return_calculator(factor,univ_dict,return_df,GroupNum=10):
GroupRet_df=pd.DataFrame(index=sorted(list(univ_dict.keys())),columns=list(np.array(range(GroupNum))))
for date in sorted(list(univ_dict.keys())): #这个也是个循环
univ=univ_dict[date]
univ=list(set(univ)&set(factor.loc[date].dropna().index)&set(return_df.loc[date].dropna().index))
factor_se_stock=list(factor.loc[date,univ].dropna().to_frame('a').sort('a',ascending=False).index)
N=len(factor_se_stock)
for i in range(GroupNum):
group_stock=factor_se_stock[int(N*1.0/GroupNum*i):int(N*1.0/GroupNum*(i+1))]
GroupRet_df.loc[date,i]=return_df.loc[date,group_stock].mean()
return GroupRet_df.shift(1).fillna(0)
# 指数收益计算
def get_index_return(univ_dict,index,count=250):
trade_date_list=sorted(list(univ_dict.keys()))
date=max(trade_date_list)
price=get_price(index,end_date=date,count=count,fields=['close'])['close']
price.index = list(map(lambda x:x.strftime("%Y-%m-%d"),price.index))
price_return=price.loc[trade_date_list[0]:].pct_change().fillna(0)
price_return_by_tradeday=price.loc[trade_date_list].pct_change().fillna(0)
return price_return,price_return_by_tradeday
# 因子检验
def effect_test(univ_dict,key,group_return,index_return,group_excess_return):
start = sorted(list(univ_dict.keys()))[0]
end = sorted(list(univ_dict.keys()))[-1]
start = datetime.datetime.strptime(start, '%Y-%m-%d')
end = datetime.datetime.strptime(end, '%Y-%m-%d')
daylength=(end-start).days
annual_return=np.power(cumprod(group_return+1).iloc[-1,:],365.0/daylength)
index_annual_return=np.power((index_return+1).cumprod().iloc[-1],365.0/daylength)
# Test One: 组合序列与组合收益的相关性,相关性大于0.5
sequence=pd.Series(np.array(range(10)))
test_one_corr=annual_return.corr(sequence)
test_one_passgrade=0.4
test_one_pass=abs(test_one_corr)>test_one_passgrade
if test_one_corr<0:
wingroup,losegroup=0,9
else:
wingroup,losegroup=9,0
# Test Two: 赢家组合明显跑赢市场,输家组合明显跑输市场,程度大于5%
test_two_passgrade=0.05
test_two_win_excess=annual_return[wingroup]-index_annual_return
test_two_win_pass=test_two_win_excess>test_two_passgrade
test_two_lose_excess=index_annual_return-annual_return[losegroup]
test_two_lose_pass=test_two_lose_excess>test_two_passgrade
test_two_pass=test_two_win_pass&test_two_lose_pass
# Test Tree: 高收益组合跑赢基准的概率,低收益组合跑赢基准的概率,概率大小0.5
test_three_grade=0.5
test_three_win_prob=(group_excess_return[wingroup]>0).sum()*1.0/len(group_excess_return[wingroup])
test_three_win_pass=test_three_win_prob>0.5
test_three_lose_prob=(group_excess_return[losegroup]<0).sum()*1.0/len(group_excess_return[losegroup])
test_three_lose_pass=test_three_lose_prob>0.5
test_three_pass=test_three_win_pass&test_three_lose_pass
test_result=[test_one_pass,test_two_win_pass,test_two_lose_pass,test_three_win_pass,test_three_lose_pass]
test_score=[test_one_corr,test_two_win_excess,test_two_lose_excess,test_three_win_prob,test_three_lose_prob]
return test_result,test_score
# 计算每个因子的评分和筛选结果
starttime=time.clock()
print('\n计算IC_IR:')
count=1
ic_list_dict={}
for key,factor in all_factor_dict.items():
ic_list=ic_calculator(factor,return_df,univ_dict)
ic_list_dict[key]=ic_list
print(count)
count=count+1
# 整理结果
ic_df=pd.DataFrame(ic_list_dict,index=sorted(list(univ_dict.keys())))
ic_df = ic_df.iloc[:-1]
计算IC_IR: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
ic_df
ACCA | AR | ARBR | ATR14 | ATR6 | BR | DAVOL10 | DAVOL20 | DAVOL5 | DEGM | ... | total_asset_turnover_rate | total_operating_cost_ttm | total_operating_revenue_per_share | total_operating_revenue_per_share_ttm | total_operating_revenue_ttm | total_profit_growth_rate | total_profit_to_cost_ratio | total_profit_ttm | turnover_volatility | value_change_profit_ttm | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
2014-03-13 | 0.047200 | -0.162499 | 0.073416 | -0.309763 | -0.281965 | -0.231009 | -0.147443 | -0.175845 | -0.081170 | 0.030419 | ... | -0.020423 | 0.177860 | 0.172653 | 0.164485 | 0.190432 | -0.066611 | -0.110937 | 0.168022 | -0.322969 | 0.133184 |
2014-04-11 | 0.036714 | 0.194380 | 0.108152 | -0.139012 | -0.118406 | 0.133141 | 0.116173 | 0.091247 | 0.111316 | -0.047518 | ... | -0.044774 | -0.136920 | -0.081272 | -0.088791 | -0.139804 | -0.087635 | 0.007073 | -0.114859 | -0.103820 | 0.032107 |
2014-05-13 | -0.000212 | -0.226541 | -0.009925 | 0.132893 | 0.085662 | -0.199578 | -0.172168 | -0.165305 | -0.206175 | 0.126938 | ... | -0.050430 | 0.087289 | 0.051213 | 0.094782 | 0.090727 | 0.117277 | -0.056055 | 0.086737 | -0.173069 | 0.003919 |
2014-06-11 | -0.040416 | -0.030739 | -0.021044 | -0.072618 | -0.083106 | -0.026398 | -0.068965 | -0.033872 | -0.103080 | -0.126065 | ... | -0.030833 | -0.027832 | -0.043807 | -0.022776 | -0.040315 | -0.014275 | -0.125476 | -0.075107 | -0.048262 | -0.043109 |
2014-07-09 | 0.072393 | -0.073307 | 0.058638 | -0.225825 | -0.213875 | -0.079134 | -0.035755 | -0.035135 | -0.006113 | -0.056671 | ... | 0.079728 | 0.190624 | 0.201080 | 0.175578 | 0.200589 | -0.160514 | -0.061726 | 0.152767 | -0.209181 | 0.086706 |
2014-08-06 | -0.072160 | -0.327378 | 0.050347 | 0.041588 | 0.028097 | -0.402522 | -0.339449 | -0.290965 | -0.307473 | -0.122475 | ... | -0.106146 | -0.248803 | -0.170863 | -0.177226 | -0.253778 | 0.023571 | 0.004129 | -0.305918 | -0.024571 | -0.106277 |
2014-09-03 | 0.183476 | 0.166382 | 0.117706 | -0.202130 | -0.205865 | 0.081151 | 0.171634 | 0.220472 | 0.105560 | -0.021080 | ... | 0.047510 | -0.036580 | -0.072564 | -0.060323 | -0.066790 | -0.199037 | -0.171860 | -0.243713 | 0.069956 | -0.045877 |
2014-10-09 | 0.079243 | 0.216416 | 0.040139 | -0.334901 | -0.317678 | 0.184767 | 0.144671 | 0.099249 | 0.140093 | 0.070633 | ... | 0.047885 | 0.181602 | 0.042069 | 0.051591 | 0.170156 | -0.070369 | -0.094806 | 0.124304 | -0.075422 | 0.187840 |
2014-11-06 | 0.107623 | 0.254242 | 0.071531 | -0.244694 | -0.216206 | 0.197369 | 0.246151 | 0.148685 | 0.344854 | -0.046953 | ... | 0.046547 | 0.189635 | 0.087891 | 0.099721 | 0.189745 | -0.119383 | -0.036733 | 0.155362 | -0.086452 | 0.159541 |
2014-12-04 | 0.070791 | 0.494906 | -0.228191 | -0.217855 | -0.175923 | 0.519703 | 0.427885 | 0.443558 | 0.421980 | 0.013248 | ... | 0.046111 | 0.291692 | 0.187180 | 0.195063 | 0.309195 | -0.019766 | -0.053584 | 0.329402 | -0.034908 | 0.167332 |
2015-01-05 | -0.176229 | -0.593839 | 0.059320 | 0.198294 | 0.194665 | -0.560011 | -0.604952 | -0.575681 | -0.570806 | -0.000854 | ... | -0.009335 | -0.246806 | -0.114114 | -0.133284 | -0.258065 | 0.117908 | 0.104454 | -0.157458 | -0.183785 | -0.210673 |
2015-02-02 | -0.005689 | -0.026048 | -0.014286 | -0.017713 | 0.024080 | -0.029373 | 0.080398 | -0.044620 | 0.078978 | 0.036360 | ... | 0.041179 | -0.163963 | -0.052264 | -0.044623 | -0.163946 | 0.171416 | 0.019793 | -0.220149 | 0.165527 | -0.048715 |
2015-03-09 | 0.026273 | 0.131523 | 0.146415 | 0.024367 | 0.027291 | -0.007881 | -0.056378 | -0.037774 | -0.086783 | 0.009016 | ... | 0.040375 | -0.004725 | -0.042434 | -0.047514 | -0.016829 | -0.000389 | -0.095542 | -0.017712 | 0.136583 | 0.084405 |
2015-04-07 | -0.029815 | -0.101676 | 0.027478 | -0.216674 | -0.219848 | -0.092041 | -0.125115 | -0.153025 | -0.148122 | 0.037000 | ... | -0.016275 | 0.183953 | 0.032455 | 0.041570 | 0.196238 | -0.000499 | -0.064900 | 0.101965 | -0.097953 | 0.111724 |
2015-05-06 | -0.046848 | -0.135332 | 0.069100 | 0.273689 | 0.270338 | -0.165272 | -0.122701 | -0.127371 | -0.084481 | -0.019426 | ... | -0.049704 | -0.221286 | -0.181689 | -0.154368 | -0.239224 | -0.108290 | -0.028808 | -0.265511 | 0.061536 | -0.216903 |
2015-06-03 | 0.066756 | -0.274657 | 0.316860 | -0.257891 | -0.291975 | -0.386341 | -0.150172 | -0.260568 | -0.152281 | 0.086080 | ... | 0.109094 | 0.274057 | 0.213587 | 0.188194 | 0.304496 | 0.038556 | 0.045125 | 0.301082 | -0.306986 | 0.110210 |
2015-07-02 | -0.169283 | -0.123858 | 0.051503 | 0.081614 | 0.063758 | -0.243420 | -0.157666 | -0.190835 | -0.120788 | 0.058753 | ... | -0.127043 | -0.151049 | -0.178163 | -0.129954 | -0.163691 | 0.059118 | 0.053213 | -0.123827 | -0.209467 | -0.172912 |
2015-07-30 | 0.034665 | -0.213317 | -0.060590 | -0.120632 | -0.186212 | -0.170176 | -0.342672 | -0.117837 | -0.370694 | -0.050241 | ... | -0.028573 | 0.121466 | 0.056898 | 0.046520 | 0.130536 | -0.191123 | 0.008495 | 0.087274 | -0.249106 | 0.048845 |
2015-08-27 | -0.035067 | -0.124292 | -0.059187 | 0.081110 | 0.077700 | -0.109202 | 0.060253 | 0.044496 | 0.070619 | -0.008101 | ... | 0.001664 | 0.012053 | 0.018589 | -0.008497 | 0.011183 | -0.007299 | 0.047434 | 0.071043 | -0.105111 | -0.003825 |
2015-09-28 | 0.048052 | -0.174652 | -0.110641 | 0.350810 | 0.364360 | -0.082053 | 0.347133 | 0.336875 | 0.378152 | -0.035018 | ... | 0.052106 | -0.278746 | -0.014263 | -0.036665 | -0.265917 | 0.092505 | 0.119509 | -0.129747 | 0.156764 | -0.152218 |
2015-11-02 | -0.169341 | 0.173895 | 0.052161 | 0.027404 | 0.086556 | 0.125994 | 0.042939 | 0.069355 | 0.085641 | -0.047459 | ... | -0.147803 | -0.172801 | -0.145262 | -0.131982 | -0.152701 | 0.036559 | 0.217130 | 0.013376 | 0.028607 | 0.037462 |
2015-11-30 | 0.105001 | 0.010290 | 0.012644 | 0.174967 | 0.152275 | 0.059461 | 0.061660 | 0.077557 | 0.054064 | 0.026028 | ... | 0.132630 | 0.148941 | 0.265021 | 0.247416 | 0.154991 | 0.055242 | -0.028890 | 0.190370 | 0.010043 | 0.076753 |
2015-12-28 | 0.108269 | 0.061193 | 0.052929 | -0.222986 | -0.254030 | 0.054503 | -0.285090 | -0.242189 | -0.308712 | 0.108666 | ... | 0.133524 | 0.325140 | 0.251788 | 0.235202 | 0.334696 | -0.040853 | -0.099587 | 0.256852 | -0.375087 | 0.079161 |
2016-01-26 | 0.067380 | 0.003680 | -0.044531 | 0.000308 | 0.020704 | 0.077478 | -0.035031 | 0.033782 | -0.041382 | 0.012873 | ... | 0.116464 | 0.140025 | 0.136689 | 0.142024 | 0.133866 | 0.014959 | -0.057859 | 0.101760 | -0.120482 | -0.034535 |
24 rows × 156 columns
count = 0
for col in ic_df.columns:
tmpdf = ic_df[[col]]
pct = 1.0*len(tmpdf[tmpdf[col]>0])/len(tmpdf) # IC大于0的占比
if pct < 0.4 or pct > 0.6 :
count += 1
print "IC存在偏向和延续的概率:%.2f%%"%(count / len(ic_df.columns))
IC存在偏向和延续的概率:0.00%
# 计算分组收益
print('\n计算分组收益:')
count=1
GroupNum=10
all_Factor_Group_Return_dict={} ##这个用于计算NAV,再筛选出因子之后再用更效率
Factor_Group_Return_dict={}
for key,factor in all_factor_dict.items():
# 全return
#all_GroupRet_df=all_Group_Return_calculator(factor,univ_dict,all_return_df,GroupNum)
#all_Factor_Group_Return_dict[key]=all_GroupRet_df.sort_index()
# 调仓期return
GroupRet_df=Group_Return_calculator(factor,univ_dict,return_df,GroupNum)
Factor_Group_Return_dict[key]=GroupRet_df.sort_index()
print(count)
count=count+1
计算分组收益: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
# 计算指数收益
print('\n计算指数收益:')
count=1
index='000300.XSHG'
index_return,index_return_by_tradeday=get_index_return(univ_dict,index)
Factor_Group_Excess_Return_dict={}
for key,group_return in Factor_Group_Return_dict.items():
Factor_Group_Excess_Return_dict[key]=group_return.subtract(index_return_by_tradeday,axis=0)
print(count)
count=count+1
计算指数收益: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
# 因子有效性测试
print('\n因子有效性测试:')
count=1
effect_test_result_dict={}
effect_test_score_dict={}
for key,group_return in Factor_Group_Return_dict.items():
group_excess_return=Factor_Group_Excess_Return_dict[key]
effect_test_result_dict[key],effect_test_score_dict[key]=effect_test(univ_dict,key,group_return,index_return,group_excess_return)
print(count)
count=count+1
因子有效性测试: 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
ic_ir_se=ic_df.mean()/ic_df.std()
ic_avg_se=ic_df.mean().abs()
EffectTestresult=pd.concat([ic_avg_se.to_frame('a'),ic_ir_se.to_frame('b'),pd.DataFrame(effect_test_result_dict).T],axis=1)
columns=['IC','ICIR','测试一', '测试二-胜者组', '测试二-败者组', '测试三-胜者组', '测试三-败者组']
EffectTestresult.columns=columns
EffectTestresult2=pd.concat([ic_avg_se.to_frame('a'),ic_ir_se.to_frame('b'),pd.DataFrame(effect_test_score_dict).T],axis=1)
columns=['IC','ICIR','测试一', '测试二-胜者组', '测试二-败者组', '测试三-胜者组', '测试三-败者组']
EffectTestresult2.columns=columns
EffectTestresult
IC | ICIR | 测试一 | 测试二-胜者组 | 测试二-败者组 | 测试三-胜者组 | 测试三-败者组 | |
---|---|---|---|---|---|---|---|
ACCA | 0.012866 | 0.139050 | False | True | False | True | False |
AR | 0.036718 | -0.160995 | True | True | False | True | False |
ARBR | 0.031664 | 0.314587 | True | True | False | True | False |
ATR14 | 0.049819 | -0.258804 | True | True | False | True | False |
ATR6 | 0.048733 | -0.255409 | True | True | False | True | False |
BR | 0.056285 | -0.247606 | True | True | False | True | False |
DAVOL10 | 0.039361 | -0.170452 | True | True | False | True | True |
DAVOL20 | 0.036906 | -0.170552 | True | True | False | True | False |
DAVOL5 | 0.033200 | -0.141448 | False | True | False | True | False |
DEGM | 0.001423 | 0.022272 | False | True | False | True | False |
EBIT | 0.024991 | 0.126466 | True | True | False | True | False |
EBITDA | 0.023415 | 0.114423 | True | True | False | True | False |
Kurtosis120 | 0.016839 | 0.094716 | False | True | False | True | False |
Kurtosis20 | 0.023001 | -0.186492 | True | True | False | True | False |
Kurtosis60 | 0.002823 | -0.018834 | False | True | False | True | False |
MAWVAD | 0.012968 | 0.053739 | True | True | False | True | False |
MLEV | 0.029706 | 0.231814 | True | True | False | True | False |
OperateNetIncome | 0.010827 | 0.069878 | False | True | False | True | False |
OperatingCycle | 0.007722 | -0.076538 | False | True | False | True | False |
ROAEBITTTM | 0.020850 | -0.172085 | False | True | False | True | False |
Skewness120 | 0.017286 | -0.115842 | False | True | False | True | False |
Skewness20 | 0.022001 | -0.167139 | False | True | False | True | False |
Skewness60 | 0.018554 | -0.146239 | False | True | False | True | False |
TVMA20 | 0.096683 | -0.475439 | True | True | False | True | True |
TVMA6 | 0.102510 | -0.448285 | True | True | False | True | True |
TVSTD20 | 0.108780 | -0.545369 | True | True | False | True | False |
TVSTD6 | 0.097882 | -0.454276 | True | True | False | True | True |
VDEA | 0.006973 | 0.043340 | False | True | False | True | False |
VDIFF | 0.015364 | 0.114978 | False | True | False | True | False |
VEMA10 | 0.005741 | -0.025909 | False | True | False | True | False |
... | ... | ... | ... | ... | ... | ... | ... |
operating_profit_ttm | 0.018949 | 0.107587 | False | True | False | True | False |
operating_revenue_growth_rate | 0.003884 | 0.043997 | False | True | False | True | False |
operating_revenue_per_share | 0.026234 | 0.186531 | False | True | False | True | False |
operating_revenue_per_share_ttm | 0.027035 | 0.204013 | False | True | False | True | False |
operating_revenue_ttm | 0.027597 | 0.140095 | True | True | False | True | False |
operating_tax_to_operating_revenue_ratio_ttm | 0.007541 | 0.087353 | False | True | False | True | False |
quick_ratio | 0.021461 | -0.243853 | True | True | False | True | False |
retained_earnings | 0.021211 | 0.103753 | True | True | False | True | False |
retained_earnings_per_share | 0.014394 | 0.098722 | False | True | False | True | False |
retained_profit_per_share | 0.015068 | 0.106958 | False | True | False | True | False |
roa_ttm | 0.019886 | -0.179419 | False | True | False | True | False |
roe_ttm | 0.008612 | -0.074070 | False | True | False | True | False |
sale_expense_to_operating_revenue | 0.027127 | -0.344554 | True | True | False | True | False |
sale_expense_ttm | 0.007708 | 0.046442 | True | True | False | True | False |
sharpe_ratio_120 | 0.114723 | 0.554884 | True | True | False | True | False |
sharpe_ratio_20 | 0.096581 | 0.517602 | True | True | False | True | False |
sharpe_ratio_60 | 0.119046 | 0.556540 | True | True | False | True | False |
super_quick_ratio | 0.017008 | -0.209308 | False | True | False | True | False |
surplus_reserve_fund_per_share | 0.008414 | 0.061381 | False | True | False | True | False |
total_asset_growth_rate | 0.018643 | -0.175744 | True | True | False | True | False |
total_asset_turnover_rate | 0.010978 | 0.141707 | True | True | False | True | False |
total_operating_cost_ttm | 0.026451 | 0.138899 | True | True | False | True | False |
total_operating_revenue_per_share | 0.025851 | 0.184704 | False | True | False | True | False |
total_operating_revenue_per_share_ttm | 0.026923 | 0.203856 | False | True | False | True | False |
total_operating_revenue_ttm | 0.027325 | 0.138784 | True | True | False | True | False |
total_profit_growth_rate | 0.014955 | -0.153739 | False | True | False | False | False |
total_profit_to_cost_ratio | 0.019184 | -0.218573 | True | True | False | True | False |
total_profit_ttm | 0.020263 | 0.110612 | True | True | False | True | False |
turnover_volatility | 0.087401 | -0.585333 | True | True | False | True | True |
value_change_profit_ttm | 0.011839 | 0.099951 | False | True | False | True | False |
156 rows × 7 columns
EffectTestresult2
IC | ICIR | 测试一 | 测试二-胜者组 | 测试二-败者组 | 测试三-胜者组 | 测试三-败者组 | |
---|---|---|---|---|---|---|---|
ACCA | 0.012866 | 0.139050 | -0.347761 | 0.428992 | -0.240983 | 0.60 | 0.36 |
AR | 0.036718 | -0.160995 | 0.633776 | 0.311248 | -0.298716 | 0.56 | 0.40 |
ARBR | 0.031664 | 0.314587 | -0.622252 | 0.485234 | -0.067798 | 0.68 | 0.44 |
ATR14 | 0.049819 | -0.258804 | 0.682412 | 0.614150 | -0.179128 | 0.64 | 0.36 |
ATR6 | 0.048733 | -0.255409 | 0.765657 | 0.597185 | -0.144671 | 0.64 | 0.40 |
BR | 0.056285 | -0.247606 | 0.842111 | 0.358631 | -0.091797 | 0.64 | 0.48 |
DAVOL10 | 0.039361 | -0.170452 | 0.492066 | 0.327177 | -0.075492 | 0.64 | 0.52 |
DAVOL20 | 0.036906 | -0.170552 | 0.626611 | 0.319072 | -0.187351 | 0.64 | 0.44 |
DAVOL5 | 0.033200 | -0.141448 | 0.384998 | 0.324972 | -0.081628 | 0.64 | 0.44 |
DEGM | 0.001423 | 0.022272 | -0.245694 | 0.370361 | -0.250382 | 0.72 | 0.48 |
EBIT | 0.024991 | 0.126466 | -0.492138 | 0.302279 | -0.310369 | 0.60 | 0.36 |
EBITDA | 0.023415 | 0.114423 | -0.427733 | 0.433205 | -0.304974 | 0.60 | 0.36 |
Kurtosis120 | 0.016839 | 0.094716 | -0.367350 | 0.461381 | -0.211368 | 0.64 | 0.32 |
Kurtosis20 | 0.023001 | -0.186492 | 0.675492 | 0.340002 | -0.230069 | 0.60 | 0.40 |
Kurtosis60 | 0.002823 | -0.018834 | 0.067572 | 0.298060 | -0.383575 | 0.52 | 0.36 |
MAWVAD | 0.012968 | 0.053739 | -0.420102 | 0.214962 | -0.120384 | 0.56 | 0.40 |
MLEV | 0.029706 | 0.231814 | -0.484990 | 0.589323 | -0.196004 | 0.80 | 0.44 |
OperateNetIncome | 0.010827 | 0.069878 | -0.223968 | 0.282673 | -0.308166 | 0.56 | 0.44 |
OperatingCycle | 0.007722 | -0.076538 | 0.108109 | 0.363148 | -0.329961 | 0.60 | 0.28 |
ROAEBITTTM | 0.020850 | -0.172085 | 0.398922 | 0.270327 | -0.192646 | 0.60 | 0.32 |
Skewness120 | 0.017286 | -0.115842 | 0.264691 | 0.437910 | -0.430163 | 0.72 | 0.44 |
Skewness20 | 0.022001 | -0.167139 | 0.266950 | 0.181838 | -0.282793 | 0.52 | 0.24 |
Skewness60 | 0.018554 | -0.146239 | 0.163989 | 0.312804 | -0.403326 | 0.56 | 0.32 |
TVMA20 | 0.096683 | -0.475439 | 0.938999 | 0.479491 | -0.164642 | 0.64 | 0.56 |
TVMA6 | 0.102510 | -0.448285 | 0.888644 | 0.492120 | -0.086890 | 0.68 | 0.52 |
TVSTD20 | 0.108780 | -0.545369 | 0.897024 | 0.479845 | -0.124894 | 0.68 | 0.48 |
TVSTD6 | 0.097882 | -0.454276 | 0.910318 | 0.470792 | -0.126774 | 0.60 | 0.56 |
VDEA | 0.006973 | 0.043340 | 0.280343 | 0.314561 | -0.312307 | 0.52 | 0.36 |
VDIFF | 0.015364 | 0.114978 | -0.338846 | 0.351872 | -0.246974 | 0.56 | 0.32 |
VEMA10 | 0.005741 | -0.025909 | 0.126447 | 0.447497 | -0.324396 | 0.60 | 0.48 |
... | ... | ... | ... | ... | ... | ... | ... |
operating_profit_ttm | 0.018949 | 0.107587 | -0.361620 | 0.326432 | -0.345141 | 0.60 | 0.32 |
operating_revenue_growth_rate | 0.003884 | 0.043997 | -0.133464 | 0.399895 | -0.226262 | 0.60 | 0.48 |
operating_revenue_per_share | 0.026234 | 0.186531 | -0.381663 | 0.523079 | -0.286017 | 0.60 | 0.48 |
operating_revenue_per_share_ttm | 0.027035 | 0.204013 | -0.322013 | 0.561686 | -0.260936 | 0.72 | 0.44 |
operating_revenue_ttm | 0.027597 | 0.140095 | -0.469427 | 0.452515 | -0.246426 | 0.72 | 0.36 |
operating_tax_to_operating_revenue_ratio_ttm | 0.007541 | 0.087353 | -0.258939 | 0.266964 | -0.399442 | 0.60 | 0.36 |
quick_ratio | 0.021461 | -0.243853 | 0.429697 | 0.304762 | -0.210931 | 0.68 | 0.36 |
retained_earnings | 0.021211 | 0.103753 | -0.432607 | 0.417680 | -0.371353 | 0.68 | 0.32 |
retained_earnings_per_share | 0.014394 | 0.098722 | 0.227294 | 0.419124 | -0.303787 | 0.64 | 0.36 |
retained_profit_per_share | 0.015068 | 0.106958 | 0.186659 | 0.376731 | -0.304230 | 0.60 | 0.28 |
roa_ttm | 0.019886 | -0.179419 | 0.358638 | 0.304997 | -0.238565 | 0.68 | 0.32 |
roe_ttm | 0.008612 | -0.074070 | 0.273802 | 0.208943 | -0.238217 | 0.60 | 0.36 |
sale_expense_to_operating_revenue | 0.027127 | -0.344554 | 0.570582 | 0.436612 | -0.168843 | 0.68 | 0.40 |
sale_expense_ttm | 0.007708 | 0.046442 | -0.455231 | 0.400599 | -0.336663 | 0.64 | 0.36 |
sharpe_ratio_120 | 0.114723 | 0.554884 | -0.876687 | 0.628048 | -0.063511 | 0.72 | 0.36 |
sharpe_ratio_20 | 0.096581 | 0.517602 | -0.754790 | 0.485422 | -0.139325 | 0.64 | 0.32 |
sharpe_ratio_60 | 0.119046 | 0.556540 | -0.928760 | 0.551859 | -0.141734 | 0.64 | 0.40 |
super_quick_ratio | 0.017008 | -0.209308 | 0.354314 | 0.263987 | -0.214044 | 0.52 | 0.40 |
surplus_reserve_fund_per_share | 0.008414 | 0.061381 | 0.316521 | 0.268158 | -0.338592 | 0.56 | 0.32 |
total_asset_growth_rate | 0.018643 | -0.175744 | 0.529237 | 0.385668 | -0.248980 | 0.56 | 0.40 |
total_asset_turnover_rate | 0.010978 | 0.141707 | -0.408405 | 0.359223 | -0.282246 | 0.68 | 0.36 |
total_operating_cost_ttm | 0.026451 | 0.138899 | -0.502188 | 0.422254 | -0.228278 | 0.72 | 0.40 |
total_operating_revenue_per_share | 0.025851 | 0.184704 | -0.391091 | 0.521386 | -0.284032 | 0.60 | 0.48 |
total_operating_revenue_per_share_ttm | 0.026923 | 0.203856 | -0.341704 | 0.555859 | -0.260519 | 0.72 | 0.44 |
total_operating_revenue_ttm | 0.027325 | 0.138784 | -0.524532 | 0.454753 | -0.248052 | 0.72 | 0.36 |
total_profit_growth_rate | 0.014955 | -0.153739 | 0.151542 | 0.266453 | -0.366292 | 0.48 | 0.32 |
total_profit_to_cost_ratio | 0.019184 | -0.218573 | 0.540883 | 0.347947 | -0.255280 | 0.60 | 0.44 |
total_profit_ttm | 0.020263 | 0.110612 | -0.407374 | 0.294813 | -0.338419 | 0.56 | 0.36 |
turnover_volatility | 0.087401 | -0.585333 | 0.818967 | 0.437876 | -0.091742 | 0.64 | 0.52 |
value_change_profit_ttm | 0.011839 | 0.099951 | -0.268988 | 0.234034 | -0.366520 | 0.52 | 0.40 |
156 rows × 7 columns
EffectTestresult['IC'].hist()
IC_ratio = EffectTestresult['IC'].quantile(0.80)
IC_ratio
0.029812437613462698
EffectTestresult['ICIR'].abs().hist()
ICIR_ratio = EffectTestresult['ICIR'].abs().quantile(0.75)
ICIR_ratio
0.21943197512864868
#筛选有效因子
# IC大于0.07,ICIR大于0.4,测试一,测试二-胜者组,测试三-胜者组,必须通过
# 测试二、测试三中要至少通过3个。
index_ic=EffectTestresult['IC']>IC_ratio
index_icir=EffectTestresult['ICIR'].abs()>ICIR_ratio
test_index=all(EffectTestresult.iloc[:,[2,3,5]],axis=1)
test2_index=sum(EffectTestresult.iloc[:,3:7],axis=1)>=3
filter_index=index_ic&index_icir&test_index&test2_index
EffectFactorresult=EffectTestresult.loc[filter_index,:]
# 生成有效因子字典
EffectFactor=list(EffectFactorresult.index)
Effect_factor_dict={key:value for key,value in all_factor_dict.items() if key in EffectFactor}
EffectFactorresult
IC | ICIR | 测试一 | 测试二-胜者组 | 测试二-败者组 | 测试三-胜者组 | 测试三-败者组 | |
---|---|---|---|---|---|---|---|
TVMA20 | 0.096683 | -0.475439 | True | True | False | True | True |
TVMA6 | 0.102510 | -0.448285 | True | True | False | True | True |
TVSTD6 | 0.097882 | -0.454276 | True | True | False | True | True |
financial_expense_ttm | 0.032306 | 0.267826 | True | True | False | True | True |
money_flow_20 | 0.096490 | -0.462037 | True | True | False | True | True |
turnover_volatility | 0.087401 | -0.585333 | True | True | False | True | True |
# IC排序
effect_fac_list = EffectFactorresult.sort('IC',ascending=False).index.tolist()
def Group_Score_calculator(factor,univ_dict,signal,GroupNum=20):
Score_df=pd.DataFrame(index=list(factor.index),columns=list(factor.columns))
for date in sorted(list(univ_dict.keys())): #这个也是个循环
univ=univ_dict[date]
univ=list(set(univ)&set(factor.loc[date].dropna().index))
factor_se_stock=list(factor.loc[date,univ].to_frame('a').sort('a',ascending=False).index)
N=len(factor_se_stock)
for i in range(GroupNum):
group_stock=factor_se_stock[int(N/GroupNum*i):int(N/GroupNum*(i+1))]
if signal=='ascending':
Score_df.loc[date,group_stock]=i
else:
Score_df.loc[date,group_stock]=GroupNum-i
return Score_df
# 计算相关性矩阵
def factor_corr_calculator(Group_Score_dict,univ_dict):
Group_Score_dict_by_day={}
Group_Score_Corr_dict_by_day={}
# 每日的因子序列
for Date in sorted(list(univ_dict.keys())):
Group_Score_df=pd.DataFrame()
univ=univ_dict[Date]
for Factor in list(Group_Score_dict.keys()):
Group_Score_df=Group_Score_df.append(Group_Score_dict[Factor].loc[Date,univ].to_frame(Factor).T)
Group_Score_dict_by_day[Date]=Group_Score_df.T.fillna(4.5)
Group_Score_Corr_dict_by_day[Date]=Group_Score_dict_by_day[Date].corr()
# 算平均数
N=len(list(univ_dict.keys()))
Group_Score_Corr=Group_Score_Corr_dict_by_day[sorted(list(univ_dict.keys()))[0]]
for Date in sorted(list(univ_dict.keys()))[1:]:
Group_Score_Corr=Group_Score_Corr+Group_Score_Corr_dict_by_day[Date]
return np.round(Group_Score_Corr/N,2)
# 给因子赋值
Group_Score_dict={}
for key,factor in Effect_factor_dict.items():
signal='ascending' if ic_ir_se[key]>0 else 'descending'
Group_Score_dict[key]=Group_Score_calculator(factor,univ_dict,signal,20)
# 计算因子相关系数
factor_corrmatrix=factor_corr_calculator(Group_Score_dict,univ_dict)
factor_corrmatrix
fac_corr = factor_corrmatrix
# 相关性大于0.95的因子B剔除
MinCorr = 0.9
result_fac_list = effect_fac_list[:1]
for fac in effect_fac_list:
# 如果因子已经在结果列表中,则继续循环
if fac in result_fac_list:
continue
fac_corr_se = fac_corr[fac]
# 得到相关性大于0.95的因子
fac_corr_list = fac_corr_se[fac_corr_se > MinCorr].index.tolist()
# 相关因子结合与结果因子集合有没有交集,则将因子添加到结果因子集合中
if len(set(fac_corr_list) & set(result_fac_list)) <= 0:
result_fac_list.append(fac)
result_fac_list = sorted(result_fac_list)
result_fac_list
[]
result_fac_ic_se = EffectTestresult['IC'].loc[result_fac_list]
result_fac_ic_se
Series([], Name: IC, dtype: float64)
date = today
g_factor_list
stock_list = get_stock_pool(date,index=g_index)
len(stock_list)
299
factor_dict=get_factor_values(securities=stock_list, factors=result_fac_ic_se.index.tolist(), start_date=date, end_date=date)
# 得到行业数据
indu_se = get_Industry_by_day(date,stock_list)
# 因子数据
factor_df = DataFrame()
for fac in list(factor_dict.keys()):
# 因子se
factor_se = factor_dict[fac].iloc[0]
# 行业空值替换
factor_se = replace_nan_indu(factor_se,indu_se)
# 去极值、中性化、标准化处理
factor_se=winsorize_med(factor_se, scale=3, inclusive=True, inf2nan=True, axis=1) # 去极值
# 如果因子在需要中性化处理的因子列表中,则进行中性化处理
if fac in g_neu_factor:
factor_se=neutralize(factor_se, how=['jq_l1', 'market_cap'], date=date, axis=1) # 中性化
factor_se=standardlize(factor_se, inf2nan=True, axis=0) # 标准化
tmp_df = factor_se.to_frame(fac)
factor_df = pd.concat([factor_df,tmp_df],axis=1)
fianl_factor_df = factor_df.T
ic_se = ic_df[result_fac_list].iloc[-1]
stocks_to_buy = 30
final_stock_list = fianl_factor_df.multiply(ic_se,axis=0).sum().to_frame('a').sort('a',ascending=False).index[0:stocks_to_buy]
final_stock_list
Index([], dtype='object')
本社区仅针对特定人员开放
查看需注册登录并通过风险意识测评
5秒后跳转登录页面...
移动端课程