#导入需要的函数
from jqfactor import get_factor_values
import matplotlib.pyplot as plt
import time
import jqdata as jq
import numpy as np
import pandas as pd
import math
import statsmodels.api as sm
from sklearn.model_selection import train_test_split #这里是引用了交叉验证
from sklearn.linear_model import LinearRegression #线性回归
import matplotlib.pyplot as plt
import datetime
from jqfactor import *
import warnings
import pickle
warnings.filterwarnings('ignore')
plt.style.use('seaborn-bright')
2.1 起止时间,因子极值处理时的分位数,以及分组回测交易情况下(剔除一些开盘涨跌幅超过ct_pct的个股)--不追高不抄底
#首先取交易时间;
#设置统计起止日期、极端值的边界分位数、以及要求第二天开盘涨跌幅不能超过ct_pct
Begin_date='2018-01-01'
End_date='2019-07-11'
quantile=0.01
ct_pct=0.05
#获取统计期内交易日列表、用于计算因子数据
date_list = jq.get_trade_days(start_date=Begin_date,end_date=End_date)#获取回测日期间的所有交易日
2.2 取成分股--本文以全部A股为例,做初步筛选
#剔除当前时间点上市不足三个月的新股、st股以及停牌股
date='2019-07-05'
stock_pool=get_all_securities(types=[], date=date)
stock_pool=filter_stock(stock_pool,date,skip_paused=1)
2.3 为这些成分股构建行业哑变量(提前构建,为之后分组回测做准备)
hi_here_time=time.time()
stock_industry=get_industry(security=list(stock_pool.index), date=None)
#column增加了一列sw行业
stock_pool['industry']=[x[1]['sw_l1']['industry_name'] for x in list(stock_industry.items()) if x[1].setdefault('sw_l1',{'industry_name':np.nan})]
stock_pool=stock_pool.dropna()
#28个行业名称
sw_lv1_28=stock_pool.drop_duplicates(subset=['industry'],keep='first')['industry'].values
#构建行业哑变量矩阵
hy_dummy_matrix=pd.DataFrame(np.zeros([len(stock_pool),28]),index=stock_pool.index,columns=sw_lv1_28)
for i in sw_lv1_28:
hy_dummy_matrix[i][stock_pool['industry']==i]=1
#合并形成新的stocklist
stocklist=pd.concat([stock_pool[['display_name','start_date']],hy_dummy_matrix],axis=1)
hello_there_time=time.time()
time_last=hello_there_time-hi_here_time
print("持续时间:",time_last,"s")
持续时间: 0.8006150722503662 s
这里构建一个日涨幅偏离因子:
构建方法:Day_bias= (t+1开盘价/t-1日收盘价)/(1+相应指数t日涨跌幅)
其中:指数涨跌幅,600开头的股票都用上证指数,其他都用创业板
因为A股是T+1的,因此用第二天的开盘价(而非当日收盘价)更能反映当日的涨跌幅,加入分母,用于表示一个相对指数的偏离,逻辑上来讲,相对跌的多了第二天应该涨,而相对涨的多了第二天应该跌。
3.1 提取所需数据
#需要的三个数据
dependencies = ['clp_pred','open_nextd','index_chg']
#提取起止日期内所有数据
security=list(stocklist.index)
data=get_price(security, Begin_date,End_date, fields=["close","open","pre_close","avg"])
temp_indexdata=get_price(['000001.XSHG','399006.XSHE'],Begin_date,End_date, fields=["pre_close","close"])
index_data=temp_indexdata["close"]/temp_indexdata["pre_close"]
3.2 构建因子矩阵:index为股票代码,columns为日期
#因子值矩阵,index为股票代码,columns为日期
Factor_values_matrix=pd.DataFrame(index=stocklist.index,columns=date_list[:-1])
stock_600=pd.DataFrame([True if i[0]=="6" else False for i in security],index=stocklist.index)
stock_else=pd.DataFrame([False if i[0]=="6" else True for i in security],index=stocklist.index)
index_chg_pd=pd.DataFrame(index=stocklist.index,columns=[0])
for i in date_list[:-1]:
date=i
next_date=ShiftTradingDay(date,1,all_trade_day)
clp_pred=data.major_xs(date)["pre_close"]
open_nextd=data.major_xs(next_date)["open"]
#index_chg=pd.Series([index_data.loc[date][0] if i[0]=="6" else index_data.loc[date][1] for i in security],index=clp_pred.index)
index_chg_pd[stock_600]=index_data.loc[date][0]
index_chg_pd[stock_else]=index_data.loc[date][1]
index_chg=index_chg_pd[0]
#计算因子值:
factor=Day_bias(clp_pred,open_nextd,index_chg,date)
Factor_values_matrix[i]=factor.calc()
把因子值保存下来
#import pickle
#output = open('17-19_Day_bias.pkl', 'wb')
#pickle.dump(Factor_values_matrix, output)
#output.close()
#pkl_file = open('17-19_Day_bias.pkl', 'rb')
#Factor_values_matrix = pickle.load(pkl_file)
#pkl_file.close()
3.3 描述特征
#描述性统计
#画出Day_bias分布直方图
stock_num=len(stocklist.index)
date_num=len(date_list)
Allperiod_FV=sort([i[0] for i in Factor_values_matrix.values.reshape((date_num-1)*stock_num,1)])
plt.hist(Allperiod_FV[200:-200], bins=100,range=(0.9,1.2),density=True,color='r',rwidth=0.5,label='prob')
plt.show()
calc_feature(Allperiod_FV)
均值 | 中位数 | 标准差 | 偏度 | 峰度 | |
---|---|---|---|---|---|
0 | 0.999 | 0.998 | 0.03 | 3.951 | 0.027 |
4.1 提取流通市值
#提取流通市值
start=time.time()
Cir_mv_pd=pd.DataFrame(index=stocklist.index,columns=date_list)
for i in date_list:
df = get_fundamentals(query(valuation.code, valuation.circulating_market_cap).filter(valuation.code.in_(stocklist.index)),i)
Cir_mv_pd[i].loc[df.code]=list(df.circulating_market_cap)
Cir_mv_ifvalues=~Cir_mv_pd.isnull()
end=time.time()
print("持续时间:",end-start,"s")
持续时间: 113.22435355186462 s
4.2 回归部分正文
生成IC_IR表,每组每日选出的股票和权重
IC_IR=pd.DataFrame(index=["IC","IR","IC_mvnetural","IR_mvnetural"],columns=date_list)
tvalue=[]
#Stockandweight=pd.DataFrame(np.zeros([stocklist.shape[0],len(date_list)]),columns=date_list,index=stocklist.index)
St_weight_gp1=pd.DataFrame(np.zeros([stocklist.shape[0],len(date_list)]),columns=date_list,index=stocklist.index)
St_weight_gp2=pd.DataFrame(np.zeros([stocklist.shape[0],len(date_list)]),columns=date_list,index=stocklist.index)
St_weight_gp3=pd.DataFrame(np.zeros([stocklist.shape[0],len(date_list)]),columns=date_list,index=stocklist.index)
St_weight_gp4=pd.DataFrame(np.zeros([stocklist.shape[0],len(date_list)]),columns=date_list,index=stocklist.index)
St_weight_gp5=pd.DataFrame(np.zeros([stocklist.shape[0],len(date_list)]),columns=date_list,index=stocklist.index)
#factor_netural=pd.DataFrame(index=stocklist.index,columns=date_list)
for i in date_list[0:-1]:
ct_pct=0.05
stocklist_canbuy=Canbuy_stock(stocklist,ct_pct)
stocklist_ex_new_notrade=ex_new_notrade_stock(stocklist,date_list)
#提取次日收益率(y值)
next_date=ShiftTradingDay(i,1,all_trade_day)
day_ret_t1=data['close'].loc[next_date]/data['close'].loc[i]-1
#提取log市值
logmv=pd.DataFrame(log(list(Cir_mv_pd[i])),index=stocklist.index,columns=['mv'])
#首先要提取因子并处理
factor=normalize(winsor_quantile(pd.DataFrame(Factor_values_matrix[i]),quantile=0.01))
factor_ifvalue=~factor.isnull()
#合成df
Set=pd.concat([day_ret_t1,stocklist[sw_lv1_28[:-1]],factor,logmv],axis=1).dropna()
#IC值
IC_IR.loc["IC"][i]=Set[0].corr(Set[i],method='spearman')
if i>date_list[19]:
loc=np.where(date_list==i)[0][0]
mean_IC=IC_IR.loc["IC"][date_list[loc-19:loc+1]].mean()
std_IC=IC_IR.loc["IC"][date_list[loc-19:loc+1]].std()
IC_IR.loc["IR"][i]=mean_IC/std_IC
#首先市值中性
y=Set[i]
mvi=Set["mv"]
slr=LinearRegression()
mv_netural=slr.fit(np.array(mvi).reshape(-1,1),np.array(y))
factor_mv_netural=np.array(y)-mv_netural.predict(np.array(mvi).reshape(-1,1))
Set[i]=factor_mv_netural
factor_netural=pd.DataFrame(index=stocklist.index,columns=[i])
factor_netural[i].loc[Set.index]=factor_mv_netural
factor_netural_ifvalue=~factor_netural.isnull()
#市值中性IC值,IR值(取20天作为周期)
IC_IR.loc["IC_mvnetural"][i]=Set[0].corr(Set[i],method='spearman')
if i>date_list[19]:
loc=np.where(date_list==i)[0][0]
mean_ICn=IC_IR.loc["IC_mvnetural"][date_list[loc-19:loc+1]].mean()
std_ICn=IC_IR.loc["IC_mvnetural"][date_list[loc-19:loc+1]].std()
IC_IR.loc["IR_mvnetural"][i]=mean_ICn/std_ICn
#其次行业中性 --并采用流通市值加权回归
logmv_exna=logmv.loc[y.index]
y=Set.iloc[:,0]*logmv_exna["mv"]
x_temp=sm.add_constant(Set.iloc[:,1:-1])
x=pd.DataFrame(x_temp.values*logmv_exna.values,columns=x_temp.columns,index=x_temp.index)
#x[i]=x[i]*logmv_exna["mv"]
result = sm.OLS(y,x.astype(float)).fit()
#print(result.summary())
tvalue.append(result.tvalues[-1])
print("T统计量:",round(tvalue[-1],3),"日期:",i)
#共分5组,输出股票名单和权重
hy_weight=pd.DataFrame(1/28*np.ones([1,28]),columns=sw_lv1_28)
thatday=i
totalgroupnum=5
St_weight_gp1[i]=grouplist(date_list,stocklist_canbuy,stocklist_ex_new_notrade,factor,sw_lv1_28,thatday,
hy_weight,factor_netural,logmv,1,totalgroupnum=5,st_weight_method=None,
mv_netural="NO",hy_netural="YES")
St_weight_gp2[i]=grouplist(date_list,stocklist_canbuy,stocklist_ex_new_notrade,factor,sw_lv1_28,thatday,
hy_weight,factor_netural,logmv,2,totalgroupnum=5,st_weight_method=None,
mv_netural="NO",hy_netural="YES")
St_weight_gp3[i]=grouplist(date_list,stocklist_canbuy,stocklist_ex_new_notrade,factor,sw_lv1_28,thatday,
hy_weight,factor_netural,logmv,3,totalgroupnum=5,st_weight_method=None,
mv_netural="NO",hy_netural="YES")
St_weight_gp4[i]=grouplist(date_list,stocklist_canbuy,stocklist_ex_new_notrade,factor,sw_lv1_28,thatday,
hy_weight,factor_netural,logmv,4,totalgroupnum=5,st_weight_method=None,
mv_netural="NO",hy_netural="YES")
St_weight_gp5[i]=grouplist(date_list,stocklist_canbuy,stocklist_ex_new_notrade,factor,sw_lv1_28,thatday,
hy_weight,factor_netural,logmv,5,totalgroupnum=5,st_weight_method=None,
mv_netural="NO",hy_netural="YES")
T统计量: 4.395 日期: 2018-01-02 T统计量: 2.352 日期: 2018-01-03 T统计量: 0.85 日期: 2018-01-04 T统计量: 10.077 日期: 2018-01-05 T统计量: -0.272 日期: 2018-01-08 T统计量: 6.352 日期: 2018-01-09 T统计量: 1.486 日期: 2018-01-10 T统计量: 1.405 日期: 2018-01-11 T统计量: 14.432 日期: 2018-01-12 T统计量: 1.035 日期: 2018-01-15
#保存一下这些信息
save_st_weight={"gp1":St_weight_gp1,"gp2":St_weight_gp2,"gp3":St_weight_gp3,"gp4":St_weight_gp4,
"gp5":St_weight_gp5,"fac_exmv":factor_netural,"Tvalue":tvalue,"ICIR":IC_IR}
output = open('save_st_weight.pkl', 'wb')
pickle.dump(save_st_weight, output)
output.close()
pkl_file = open('save_st_weight.pkl', 'rb')
save_st_weight = pickle.load(pkl_file)
pkl_file.close()
St_weight_gp1=save_st_weight["gp1"]
St_weight_gp2=save_st_weight["gp2"]
St_weight_gp3=save_st_weight["gp3"]
St_weight_gp4=save_st_weight["gp4"]
St_weight_gp5=save_st_weight["gp5"]
factor_netural=save_st_weight["fac_exmv"]
4.3 画图展示IC、IR情况
fig = plt.figure(figsize=(8,4), dpi=500)
fig, ax= plt.subplots(figsize=(8,4))
ic_df = IC_IR.loc["IC"].T
ax.plot(date_list, ic_df, 'r')
ax.plot(date_list, ic_df.rolling(20).mean(), 'black')
ax.grid(color='b', alpha=0.5, linestyle='dashed', linewidth=0.5)
#my_x_ticks=[date_list[int(i)] for i in np.linspace(0,len(date_list)-1,5)]
#plt.xticks(my_x_ticks)
#ax.set_xticks([date_list[i] for i in np.arange(0,len(date_list)-1,5)])
#ax.set_xticklabels([date_list[i] for i in np.arange(0,len(date_list)-1,5)], fontsize=8)
ax.set_xlabel('日期')
ax.set_ylabel('IC')
IC_IR.mean(axis=1)
IC 0.044466 IR 0.344876 IC_mvnetural 0.042246 IR_mvnetural 0.351147 dtype: float64
<Figure size 4000x2000 with 0 Axes>
4.4 各组净值
#手续费万1.5,税千1,这里的净值属于近似净值(默认无手续费)--详细净值写在其他函数中
margin=0.00015
tax=0.001
#需要注意:Stockandweight的values是T日选出的股票的权重,这些股票是!!T+1!!日买入并持有
groupindex=["nv_gp"+str(i) for i in range(1,totalgroupnum+1,1)]
Netv=pd.DataFrame(index=groupindex,columns=date_list)
for m,n in zip(groupindex,[St_weight_gp1,St_weight_gp2,St_weight_gp3,St_weight_gp4,St_weight_gp5]):
Netv.loc[m]=group_netv(data,n,date_list,fee="NO",cal_method="fast").values
Netv=Netv.cumprod(axis=1)
#color_list =pd.DataFrame([['#5698c6', '#ff9e4a', '#60b760', '#e05c5d', '#ae8ccd']],columns=groupindex)
#基准指数:000001.XSHG
Base=get_price("000001.XSHG",date_list[0],date_list[-2],fields="close")
fig = plt.figure(figsize=(14,8))
ax= fig.add_subplot(1,1,1)
Netv.T.plot(ax = ax)
ax.plot(Base/Base.iloc[0,0],'black',label='000001.SH')
ax.legend(loc=2)
#x_label=[date_list[i] for i in np.arange(0,len(date_list)-1,8)]
#plt.xticks(x_label, rotation='vertical')
plt.ylabel('净值',{"size":18})
Text(0, 0.5, '净值')
4.5 多空组合净值
y=Netv.loc["nv_gp5"]-Netv.loc["nv_gp1"]
y.plot(figsize=(12,8))
plt.ylabel('多空组合',{'size': 15})
Text(0, 0.5, '多空组合')
计算传统表现
#每组的总收益,年化收益,最大回撤,SR,
risk_matrix=getrisk(groupindex,Netv,date_list)
print('===============各组收益如下================')
risk_matrix
===============各组收益如下================
total_return | Yearly_ret | MaxDraw | SharpR | |
---|---|---|---|---|
nv_gp1 | -57.33% | -43.93% | 57.79% | -2.85612 |
nv_gp2 | -18.86% | -13.24% | 39.50% | -0.655763 |
nv_gp3 | 4.57% | 3.09% | 32.06% | 0.133261 |
nv_gp4 | 26.08% | 17.05% | 26.96% | 0.645323 |
nv_gp5 | 42.14% | 26.99% | 24.27% | 0.91201 |
计算换手率
#计算换手率
factor_turnover_rate=pd.DataFrame(index=["gp1","gp5"],columns=date_list)
for group in ["gp1","gp5"]:
#最大分位换手率
if group=="gp1":
for i in range(1,len(date_list)):
factor_turnover_rate.loc[group][date_list[i]]=sum(abs(St_weight_gp1[date_list[i]]-St_weight_gp1[date_list[i-1]]))/2
#最小分位换手率
else:
for i in range(1,len(date_list)):
factor_turnover_rate.loc[group][date_list[i]]=sum(abs(St_weight_gp5[date_list[i]]-St_weight_gp5[date_list[i-1]]))/2
fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(1, 1, 1)
# ax.xaxis.set_major_formatter(mdate.DateFormatter('%Y%m%d')) # 设置时间标签显示格式
# ax.xaxis.set_major_locator(mdate.DayLocator())
ax.set_title("因子换手率")
#x = np.arange(0, len(factor_dict))
x=date_list
ax.scatter(x, factor_turnover_rate.loc["gp1"], label='5分位换手率')
ax.scatter(x, factor_turnover_rate.loc["gp5"], label='1分位换手率')
plt.grid(axis='y')
1.剔除股票
def filter_stock(stock_pool,date,N=30*3,skip_paused=1):
#剔除上市不足三个月的股票
start_date=stock_pool['start_date']
stock_pool['IPO more than N days']=start_date+datetime.timedelta(days=N)<datetime.date(*map(int, date.split('-')))
stock_pool=stock_pool[stock_pool['IPO more than N days']==True]
#剔除st股票(含*st)
hi_st=[x[:] for x in stock_pool['display_name'] if not ((x[0:2]=='ST') |(x[0:2]=='S*')|(x[0:2]=='*S')|(x[0:1]=='S'))]
stock_pool=stock_pool[stock_pool['display_name'].isin(hi_st)]
#保留交易的股票
if skip_paused==1:
trade_status=get_price(list(stock_pool.index), end_date=date,frequency='daily', fields='paused', skip_paused=False,count=1)
#ts_transpose=trade_status['paused'].T 两种方法均可
#ts_transpose[ts_transpose.values==0]
hi_trade=trade_status['paused'][trade_status['paused']==0]
hi_trade=hi_trade.dropna(1)
stock_pool=stock_pool.loc[list(hi_trade.columns)]
elif skip_paused==0:
stock_pool=stock_pool
else:
print('skip_paused的值为0或1,请正确输入')
return stock_pool
def ex_new_notrade_stock(stocklist,date_list,N=30*3,skip_paused=1):
stocklist_ex_new=pd.DataFrame(index=stocklist.index,columns=date_list)
#剔除上市不足三个月的股票
start_date=stocklist['start_date']
for i in date_list:
stocklist_ex_new[i]=start_date+datetime.timedelta(days=N)<i
#保留交易的股票
stocklist_trade=pd.DataFrame(index=stocklist.index,columns=date_list)
if skip_paused==1:
trade_status=get_price(list(stock_pool.index),date_list[0],date_list[-1],frequency='daily', fields='paused')
trade_status=trade_status["paused"].fillna(1).T
stocklist_trade=trade_status==0
elif skip_paused==0:
stocklist_trade=stocklist_trade.fillna(True)
else:
print('skip_paused的值为0或1,请正确输入')
return stocklist_trade&stocklist_ex_new
2.日期偏移
all_trade_day=get_price("000001.XSHG","2000-01-01","2019-07-13", fields="close").index
def ShiftTradingDay(date,shift,all_trade_day):
# 获取所有的交易日,返回一个包含所有交易日的 list,元素值为 datetime.date 类型.
try:
# 得到date之后shift天那一天在列表中的行标号 返回一个数
if type(date)!=datetime.date:
date = datetime.date(*map(int,date.split('-')))
shiftday_index = [time.mktime(i.timetuple()) for i in all_trade_day].index(time.mktime(date.timetuple()))+shift
# 根据行号返回该日日期 为datetime.date类型
temp=all_trade_day[shiftday_index]
format_shiftdate=datetime.date(temp.year,temp.month,temp.day)
return format_shiftdate
except:
return print('错误:请输入交易日,格式为2019-07-06')
3.因子构成
class Day_bias:
factor_name="Day_bias"
# 返回一个pd.Series数据
# 设置依赖的数据
dependencies = ['pct','open_nextd','index_chg']
def __init__(self,clp_pred,open_nextd,index_chg,date):
self.clp_pred = clp_pred
self.open_nextd = open_nextd
self.index_pct = index_chg
self.date = date
def calc(self):
factor_value=(open_nextd/clp_pred)/(index_chg)
return factor_value
4.计算特征
def feature(data):
data=data[~np.isnan(data)]
n = len(data)
niu = 0.0
niu2 = 0.0
niu3 = 0.0
for a in data:
niu += a
niu2 += a**2
niu3 += a**3
niu/= n #这是求E(X)
niu2 /= n #这是E(X^2)
niu3 /= n #这是E(X^3)
sigma = math.sqrt(niu2 - niu*niu) #这是D(X)的开方,标准差
return [round(niu,3),round(sigma,3),niu3] #返回[E(X),标准差,E(X^3)]
def calc_feature(data):
data=data[~np.isnan(data)]
median = data[len(data)//2] if len(data)%2==1 else round(0.5*(data[len(data)//2-1]+data[len(data)//2]),3)
[niu,sigma,niu3] = feature(data)
n = len(data)
niu4 = 0.0
for a in data:
a -= niu
niu4 += a ** 4
niu4 /= n
skew = round((niu3 - 3*niu*sigma**2 - niu**3)/(sigma**3),3)
kurt = round(niu4/(sigma**2),3)
return pd.DataFrame([[niu,median,sigma,skew,kurt]],columns=['均值','中位数','标准差','偏度','峰度']) #返回了均值,标准差,偏度,峰度,中位数
def winsor_quantile(factor,quantile=0.01):
factor_winsored=factor[(factor[factor.columns]<=factor[i].quantile(1-quantile)) & (factor[factor.columns]>=factor[i].quantile(quantile))]
#暂时不dropna()
return factor_winsored
#标准化:Z_SCORE,减均值除标准差;
def normalize(factor):
avg_df=factor.mean()
std_df=factor.std()
factor_normed=(factor-avg_df)/std_df
return factor_normed
def Canbuy_stock(stocklist,ct_pct):
trade_price=get_price(list(stocklist.index), Begin_date,End_date,fields=['open','high_limit','low_limit'])
open_price_df=trade_price['open'].T
high_limit_df=trade_price['high_limit'].T/1.1*(1+ct_pct)
low_limit_df=trade_price['low_limit'].T/0.9*(1-ct_pct)
price_ifhigher_df=(open_price_df<high_limit_df)
price_iflower_df=(open_price_df>low_limit_df)
stocklist_canbuy=price_iflower_df & price_ifhigher_df
return stocklist_canbuy
7.生成各组成分和权重
#要实现功能,生成一组Stockandweight-即为每天选出的股票(第二日要持仓的),type:df,index为股票,columns为日期,values为权重;返回一个pd.Series
def grouplist(date_list,stocklist_canbuy,stocklist_ex_new_notrade,factor,sw_lv1_28,thatday,
hy_weight,factor_netural,logmv,groupid,totalgroupnum=5,st_weight_method=None,
mv_netural="NO",hy_netural="YES"):
"st_weight_method可以选择logmv"
Stockandweight=pd.DataFrame(np.zeros([stocklist_canbuy.shape[0],len(date_list)]),
columns=date_list,index=stocklist_canbuy.index)
#市值中性后的因子
nd=ShiftTradingDay(thatday,1,all_trade_day)
if mv_netural=="YES":
"""
type factor,factor_netural:df,index=股票代码,columns=当日
type stocklist:df,index=股票代码,columns=股票名称,上市日期,sw28个行业
"""
#剔除不能交易、没有因子值以及开盘涨跌幅太大的个股(不买入)
con=stocklist_canbuy[nd] &stocklist_ex_new_notrade[thatday] & (~factor_netural.isnull()[thatday])
#仅保留有效候选
st=stocklist[con]
fac=factor_netural[con]
#用原因子
else:
con=stocklist_canbuy[nd] &stocklist_ex_new_notrade[thatday] & (~factor.isnull()[thatday])
st=stocklist[con]
fac=factor_netural[con]
#确定是否有分组,根据groupid确定分位数
if groupid!=None:
#各组分位数
quant=np.linspace(0,1,totalgroupnum+1)
upperlim=quant[-groupid]
lowerlim=quant[-groupid-1]
else:
upperlim=1
lowerlim=0
#行业中性
if hy_netural=="YES":
#求该组因子上下界,返回fac_edge(type:df),返回每个行业取出的股票名称及权重
fac_byhy=fac.values*st[sw_lv1_28]
fac_byhy[fac_byhy==0]=NaN
fac_edge=pd.DataFrame(columns=sw_lv1_28,index=[upperlim,lowerlim])
for i in sw_lv1_28:
fac_edge[i].loc[upperlim]=up=fac_byhy[i].quantile(upperlim)
fac_edge[i].loc[lowerlim]=low=fac_byhy[i].quantile(lowerlim)
hyi_st=fac_byhy[i][(fac_byhy[i]<=up) & (fac_byhy[i]>low)].index
#是否要按log流通市值分配股票权重:目的是对冲时候敞口更小一点(这里也可以用总市值,IR等方法,看个人喜好)
if st_weight_method=='logmv':
temp=(logmv.loc[hyi_st])/(logmv.loc[hyi_st].sum())
temp.columns=["stweight"]
temp["stweight"]=temp["stweight"]*hy_weight[i][0]
hyi_stweight=temp
else:
temp=np.array([hy_weight[i]*1/len(hyi_st) for x in range(len(hyi_st))])
hyi_stweight=pd.DataFrame(temp,index=hyi_st,columns=["stweight"])
Stockandweight[thatday].loc[hyi_st]=hyi_stweight["stweight"]
#不考虑行业中性
else:
up=fac[thatday].quantile(upperlim)
low=fac[thatday].quantile(lowerlim)
hyi_st=fac[thatday][(fac[thatday]<=up)& (fac[thatday]>low)].index
#是否要按log流通市值分配股票权重:目的是对冲时候敞口更小一点(这里也可以用总市值,IR等方法,看个人喜好)
if st_weight_method=='logmv':
temp=(logmv.loc[hyi_st])/(logmv.loc[hyi_st].sum())
temp.columns=["stweight"]
hyi_stweight=temp
else:
temp=np.array([1/len(hyi_st) for x in range(len(hyi_st))])
hyi_stweight=pd.DataFrame(temp,index=hyi_st,columns=["stweight"])
Stockandweight[thatday].loc[hyi_st]=hyi_stweight["stweight"]
return Stockandweight[thatday]
8.分组净值
def group_netv(data,St_weight_gp5,date_list,fee="NO",cal_method="fast"):
"""simple:以T+1涨跌幅作为收益--仅在特定因子的情况下接近实际
avg:以T+1日均价作为买入成本,T+2日均价作为平仓价格,不太贴近实际---因为每一天既要开仓又要平仓,很难把握价格
fast:以T+1日开盘价作为买入成本,T+2日开盘价作为平仓价格,更接近实际,但缺点就是容量小
我们这里选择fast作为默认选项
"""
net_v=pd.DataFrame(np.ones([1,len(date_list)]),index=["net_value"],columns=date_list)
if fee=="NO":
fee=0
else:
fee=tax+2*margin
if cal_method=="simple":
oprice_anchor=data["pre_close"].T
cprice_anchor=data["close"].T
for i in date_list[0:-3]:
next1_i=date_list[list(date_list).index(i)+1]
ret_array=(1-fee)*cprice_anchor[next1_i]/oprice_anchor[next1_i]-1
net_v[next1_i]=1+(St_weight_gp5[i]*ret_array).sum()
elif cal_method=="avg":
oprice_anchor=cprice_anchor=data["avg"].T
for i in date_list[0:-3]:
next1_i=date_list[list(date_list).index(i)+1]
next2_i=date_list[list(date_list).index(i)+2]
ret_array=(1-fee)*cprice_anchor[next2_i]/oprice_anchor[next1_i]-1
net_v[next1_i]=1+(St_weight_gp5[i]*ret_array).sum()
elif cal_method=="fast":
oprice_anchor=cprice_anchor=data["open"].T
for i in date_list[0:-3]:
next1_i=date_list[list(date_list).index(i)+1]
next2_i=date_list[list(date_list).index(i)+2]
ret_array=(1-fee)*cprice_anchor[next2_i]/oprice_anchor[next1_i]-1
net_v[next1_i]=1+(St_weight_gp5[i]*ret_array).sum()
return net_v
def getrisk(groupindex,Netv,date_list):
risk_matrix=pd.DataFrame(columns=["total_return","Yearly_ret","MaxDraw","SharpR"],index=groupindex)
total_return=Netv[date_list[-2]]/Netv[date_list[0]]-1
Yearly_ret=(total_return+1)**(250/(len(date_list)-2))-1
MaxDraw=pd.DataFrame(index=groupindex,columns=["MD"])
Tocal=Netv.T
for i in groupindex:
gp=Tocal[i]
MaxDraw_value=0
for j in range(len(gp)):
high=max(gp[:j+1]) #此前高点
high_loc=list(gp).index(high) #高点的位置
low=min(gp[high_loc:j+1]) #高点之后的低点
Draw=(high-low)/high
MaxDraw_value=max(MaxDraw_value,Draw)
MaxDraw.loc[i]=MaxDraw_value
SharpR=(Yearly_ret)/((Netv.diff(axis=1).std(axis=1))*(250**0.5))
risk_matrix["total_return"]=["{:.2%}".format(total_return[i]) for i in range(len(groupindex))]
risk_matrix["Yearly_ret"]=["{:.2%}".format(Yearly_ret[i]) for i in range(len(groupindex))]
risk_matrix["MaxDraw"]=["{:.2%}".format(MaxDraw["MD"][i]) for i in range(len(groupindex))]
risk_matrix["SharpR"]=SharpR
return risk_matrix
本社区仅针对特定人员开放
查看需注册登录并通过风险意识测评
5秒后跳转登录页面...
移动端课程