繁簡切換您正在訪問的是FX168財經網,本網站所提供的內容及信息均遵守中華人民共和國香港特別行政區當地法律法規。

FX168财经网>人物频道>帖子

一个简单的单因子测试,附上一个修正的反转因子

作者/dsdkasd 2019-07-28 21:41 0 来源: FX168财经网人物频道

1、导入所需的包¶

#导入需要的函数
from jqfactor import get_factor_values
import matplotlib.pyplot as plt
import time
import jqdata as jq
import numpy as np
import pandas as pd
import math
import statsmodels.api as sm
from sklearn.model_selection import train_test_split #这里是引用了交叉验证
from sklearn.linear_model import LinearRegression  #线性回归
import matplotlib.pyplot as plt
import datetime
from jqfactor import *
import warnings  
import pickle
warnings.filterwarnings('ignore') 
plt.style.use('seaborn-bright')

2、确认基本参数¶

2.1 起止时间,因子极值处理时的分位数,以及分组回测交易情况下(剔除一些开盘涨跌幅超过ct_pct的个股)--不追高不抄底

#首先取交易时间;
#设置统计起止日期、极端值的边界分位数、以及要求第二天开盘涨跌幅不能超过ct_pct
Begin_date='2018-01-01'
End_date='2019-07-11'
quantile=0.01
ct_pct=0.05
#获取统计期内交易日列表、用于计算因子数据
date_list = jq.get_trade_days(start_date=Begin_date,end_date=End_date)#获取回测日期间的所有交易日

2.2 取成分股--本文以全部A股为例,做初步筛选

#剔除当前时间点上市不足三个月的新股、st股以及停牌股
date='2019-07-05'
stock_pool=get_all_securities(types=[], date=date)
stock_pool=filter_stock(stock_pool,date,skip_paused=1)

2.3 为这些成分股构建行业哑变量(提前构建,为之后分组回测做准备)

hi_here_time=time.time()
stock_industry=get_industry(security=list(stock_pool.index), date=None)
#column增加了一列sw行业
stock_pool['industry']=[x[1]['sw_l1']['industry_name'] for x in list(stock_industry.items()) if x[1].setdefault('sw_l1',{'industry_name':np.nan})]
stock_pool=stock_pool.dropna() 
#28个行业名称
sw_lv1_28=stock_pool.drop_duplicates(subset=['industry'],keep='first')['industry'].values  
#构建行业哑变量矩阵
hy_dummy_matrix=pd.DataFrame(np.zeros([len(stock_pool),28]),index=stock_pool.index,columns=sw_lv1_28)
for i in sw_lv1_28:
    hy_dummy_matrix[i][stock_pool['industry']==i]=1

#合并形成新的stocklist
stocklist=pd.concat([stock_pool[['display_name','start_date']],hy_dummy_matrix],axis=1)

hello_there_time=time.time()
time_last=hello_there_time-hi_here_time
print("持续时间:",time_last,"s")
持续时间: 0.8006150722503662 s

3、因子构建和描述¶

这里构建一个日涨幅偏离因子:

构建方法:Day_bias= (t+1开盘价/t-1日收盘价)/(1+相应指数t日涨跌幅)

其中:指数涨跌幅,600开头的股票都用上证指数,其他都用创业板

因为A股是T+1的,因此用第二天的开盘价(而非当日收盘价)更能反映当日的涨跌幅,加入分母,用于表示一个相对指数的偏离,逻辑上来讲,相对跌的多了第二天应该涨,而相对涨的多了第二天应该跌。

3.1 提取所需数据

#需要的三个数据
dependencies = ['clp_pred','open_nextd','index_chg']

#提取起止日期内所有数据
security=list(stocklist.index)
data=get_price(security, Begin_date,End_date, fields=["close","open","pre_close","avg"])
temp_indexdata=get_price(['000001.XSHG','399006.XSHE'],Begin_date,End_date, fields=["pre_close","close"])
index_data=temp_indexdata["close"]/temp_indexdata["pre_close"]

3.2 构建因子矩阵:index为股票代码,columns为日期

#因子值矩阵,index为股票代码,columns为日期
Factor_values_matrix=pd.DataFrame(index=stocklist.index,columns=date_list[:-1])

stock_600=pd.DataFrame([True if i[0]=="6" else False for i in security],index=stocklist.index)
stock_else=pd.DataFrame([False if i[0]=="6" else True for i in security],index=stocklist.index)
index_chg_pd=pd.DataFrame(index=stocklist.index,columns=[0])
for i in date_list[:-1]:
    date=i
    next_date=ShiftTradingDay(date,1,all_trade_day)

    clp_pred=data.major_xs(date)["pre_close"]
    open_nextd=data.major_xs(next_date)["open"]
    #index_chg=pd.Series([index_data.loc[date][0] if i[0]=="6" else index_data.loc[date][1] for i in security],index=clp_pred.index)
    index_chg_pd[stock_600]=index_data.loc[date][0]
    index_chg_pd[stock_else]=index_data.loc[date][1]
    index_chg=index_chg_pd[0]
    #计算因子值:
    factor=Day_bias(clp_pred,open_nextd,index_chg,date)
    Factor_values_matrix[i]=factor.calc()

    

把因子值保存下来

#import pickle
#output = open('17-19_Day_bias.pkl', 'wb')
#pickle.dump(Factor_values_matrix, output)
#output.close()

#pkl_file = open('17-19_Day_bias.pkl', 'rb')
#Factor_values_matrix = pickle.load(pkl_file)
#pkl_file.close()

3.3 描述特征

#描述性统计
#画出Day_bias分布直方图
stock_num=len(stocklist.index)
date_num=len(date_list)
Allperiod_FV=sort([i[0] for i in Factor_values_matrix.values.reshape((date_num-1)*stock_num,1)])

plt.hist(Allperiod_FV[200:-200], bins=100,range=(0.9,1.2),density=True,color='r',rwidth=0.5,label='prob')
plt.show()
calc_feature(Allperiod_FV)
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
均值 中位数 标准差 偏度 峰度
0 0.999 0.998 0.03 3.951 0.027

4. 因子解释(linear regression)和分组回测¶

4.1 提取流通市值

#提取流通市值
start=time.time()
Cir_mv_pd=pd.DataFrame(index=stocklist.index,columns=date_list)
for i in date_list:
    df = get_fundamentals(query(valuation.code, valuation.circulating_market_cap).filter(valuation.code.in_(stocklist.index)),i)
    Cir_mv_pd[i].loc[df.code]=list(df.circulating_market_cap)
Cir_mv_ifvalues=~Cir_mv_pd.isnull()
end=time.time()
print("持续时间:",end-start,"s")
持续时间: 113.22435355186462 s

4.2 回归部分正文

生成IC_IR表,每组每日选出的股票和权重

IC_IR=pd.DataFrame(index=["IC","IR","IC_mvnetural","IR_mvnetural"],columns=date_list)
tvalue=[]
#Stockandweight=pd.DataFrame(np.zeros([stocklist.shape[0],len(date_list)]),columns=date_list,index=stocklist.index)
St_weight_gp1=pd.DataFrame(np.zeros([stocklist.shape[0],len(date_list)]),columns=date_list,index=stocklist.index)
St_weight_gp2=pd.DataFrame(np.zeros([stocklist.shape[0],len(date_list)]),columns=date_list,index=stocklist.index)
St_weight_gp3=pd.DataFrame(np.zeros([stocklist.shape[0],len(date_list)]),columns=date_list,index=stocklist.index)
St_weight_gp4=pd.DataFrame(np.zeros([stocklist.shape[0],len(date_list)]),columns=date_list,index=stocklist.index)
St_weight_gp5=pd.DataFrame(np.zeros([stocklist.shape[0],len(date_list)]),columns=date_list,index=stocklist.index)
#factor_netural=pd.DataFrame(index=stocklist.index,columns=date_list)
 
for i in date_list[0:-1]:
    
    ct_pct=0.05
    stocklist_canbuy=Canbuy_stock(stocklist,ct_pct)
    stocklist_ex_new_notrade=ex_new_notrade_stock(stocklist,date_list)

    #提取次日收益率(y值)
    next_date=ShiftTradingDay(i,1,all_trade_day)
    day_ret_t1=data['close'].loc[next_date]/data['close'].loc[i]-1

    #提取log市值
    logmv=pd.DataFrame(log(list(Cir_mv_pd[i])),index=stocklist.index,columns=['mv'])


    #首先要提取因子并处理
    factor=normalize(winsor_quantile(pd.DataFrame(Factor_values_matrix[i]),quantile=0.01))
    factor_ifvalue=~factor.isnull()

    #合成df
    Set=pd.concat([day_ret_t1,stocklist[sw_lv1_28[:-1]],factor,logmv],axis=1).dropna()
    
    #IC值
    IC_IR.loc["IC"][i]=Set[0].corr(Set[i],method='spearman')
    if i>date_list[19]:
        loc=np.where(date_list==i)[0][0]
        mean_IC=IC_IR.loc["IC"][date_list[loc-19:loc+1]].mean()
        std_IC=IC_IR.loc["IC"][date_list[loc-19:loc+1]].std()
        IC_IR.loc["IR"][i]=mean_IC/std_IC
        
    #首先市值中性
    y=Set[i]
    mvi=Set["mv"]
    slr=LinearRegression()
    mv_netural=slr.fit(np.array(mvi).reshape(-1,1),np.array(y))
    factor_mv_netural=np.array(y)-mv_netural.predict(np.array(mvi).reshape(-1,1))

    Set[i]=factor_mv_netural
    
    factor_netural=pd.DataFrame(index=stocklist.index,columns=[i])
    factor_netural[i].loc[Set.index]=factor_mv_netural
    factor_netural_ifvalue=~factor_netural.isnull()

    
    #市值中性IC值,IR值(取20天作为周期)
    IC_IR.loc["IC_mvnetural"][i]=Set[0].corr(Set[i],method='spearman')    
    if i>date_list[19]:
        loc=np.where(date_list==i)[0][0]
        mean_ICn=IC_IR.loc["IC_mvnetural"][date_list[loc-19:loc+1]].mean()
        std_ICn=IC_IR.loc["IC_mvnetural"][date_list[loc-19:loc+1]].std()
        IC_IR.loc["IR_mvnetural"][i]=mean_ICn/std_ICn
 
    #其次行业中性 --并采用流通市值加权回归
    logmv_exna=logmv.loc[y.index]
    y=Set.iloc[:,0]*logmv_exna["mv"]
    x_temp=sm.add_constant(Set.iloc[:,1:-1])
    x=pd.DataFrame(x_temp.values*logmv_exna.values,columns=x_temp.columns,index=x_temp.index)
    #x[i]=x[i]*logmv_exna["mv"]

    result = sm.OLS(y,x.astype(float)).fit()
    #print(result.summary())
    tvalue.append(result.tvalues[-1])    
    print("T统计量:",round(tvalue[-1],3),"日期:",i)
    
    #共分5组,输出股票名单和权重    
    hy_weight=pd.DataFrame(1/28*np.ones([1,28]),columns=sw_lv1_28)

    thatday=i

    totalgroupnum=5
    

    St_weight_gp1[i]=grouplist(date_list,stocklist_canbuy,stocklist_ex_new_notrade,factor,sw_lv1_28,thatday,
                               hy_weight,factor_netural,logmv,1,totalgroupnum=5,st_weight_method=None,
                               mv_netural="NO",hy_netural="YES")
    St_weight_gp2[i]=grouplist(date_list,stocklist_canbuy,stocklist_ex_new_notrade,factor,sw_lv1_28,thatday,
                               hy_weight,factor_netural,logmv,2,totalgroupnum=5,st_weight_method=None,
                               mv_netural="NO",hy_netural="YES")
    St_weight_gp3[i]=grouplist(date_list,stocklist_canbuy,stocklist_ex_new_notrade,factor,sw_lv1_28,thatday,
                               hy_weight,factor_netural,logmv,3,totalgroupnum=5,st_weight_method=None,
                               mv_netural="NO",hy_netural="YES")
    St_weight_gp4[i]=grouplist(date_list,stocklist_canbuy,stocklist_ex_new_notrade,factor,sw_lv1_28,thatday,
                               hy_weight,factor_netural,logmv,4,totalgroupnum=5,st_weight_method=None,
                               mv_netural="NO",hy_netural="YES")
    St_weight_gp5[i]=grouplist(date_list,stocklist_canbuy,stocklist_ex_new_notrade,factor,sw_lv1_28,thatday,
                               hy_weight,factor_netural,logmv,5,totalgroupnum=5,st_weight_method=None,
                               mv_netural="NO",hy_netural="YES")
 
T统计量: 4.395 日期: 2018-01-02
T统计量: 2.352 日期: 2018-01-03
T统计量: 0.85 日期: 2018-01-04
T统计量: 10.077 日期: 2018-01-05
T统计量: -0.272 日期: 2018-01-08
T统计量: 6.352 日期: 2018-01-09
T统计量: 1.486 日期: 2018-01-10
T统计量: 1.405 日期: 2018-01-11
T统计量: 14.432 日期: 2018-01-12
T统计量: 1.035 日期: 2018-01-15
#保存一下这些信息
save_st_weight={"gp1":St_weight_gp1,"gp2":St_weight_gp2,"gp3":St_weight_gp3,"gp4":St_weight_gp4,
                "gp5":St_weight_gp5,"fac_exmv":factor_netural,"Tvalue":tvalue,"ICIR":IC_IR}
output = open('save_st_weight.pkl', 'wb')
pickle.dump(save_st_weight, output)
output.close()
pkl_file = open('save_st_weight.pkl', 'rb')
save_st_weight = pickle.load(pkl_file)
pkl_file.close()

St_weight_gp1=save_st_weight["gp1"]
St_weight_gp2=save_st_weight["gp2"]
St_weight_gp3=save_st_weight["gp3"]
St_weight_gp4=save_st_weight["gp4"]
St_weight_gp5=save_st_weight["gp5"]
factor_netural=save_st_weight["fac_exmv"]

4.3 画图展示IC、IR情况

fig = plt.figure(figsize=(8,4), dpi=500)
fig, ax= plt.subplots(figsize=(8,4))

ic_df = IC_IR.loc["IC"].T
ax.plot(date_list, ic_df, 'r')
ax.plot(date_list, ic_df.rolling(20).mean(), 'black')
ax.grid(color='b', alpha=0.5, linestyle='dashed', linewidth=0.5)
#my_x_ticks=[date_list[int(i)] for i in np.linspace(0,len(date_list)-1,5)]
#plt.xticks(my_x_ticks)

#ax.set_xticks([date_list[i] for i in np.arange(0,len(date_list)-1,5)])
#ax.set_xticklabels([date_list[i] for i in np.arange(0,len(date_list)-1,5)], fontsize=8)
ax.set_xlabel('日期')
ax.set_ylabel('IC')

IC_IR.mean(axis=1)
IC              0.044466
IR              0.344876
IC_mvnetural    0.042246
IR_mvnetural    0.351147
dtype: float64
<Figure size 4000x2000 with 0 Axes>

4.4 各组净值

#手续费万1.5,税千1,这里的净值属于近似净值(默认无手续费)--详细净值写在其他函数中
margin=0.00015
tax=0.001
#需要注意:Stockandweight的values是T日选出的股票的权重,这些股票是!!T+1!!日买入并持有

groupindex=["nv_gp"+str(i) for i in range(1,totalgroupnum+1,1)]
Netv=pd.DataFrame(index=groupindex,columns=date_list)

for m,n in zip(groupindex,[St_weight_gp1,St_weight_gp2,St_weight_gp3,St_weight_gp4,St_weight_gp5]):
    Netv.loc[m]=group_netv(data,n,date_list,fee="NO",cal_method="fast").values
Netv=Netv.cumprod(axis=1)
    #color_list =pd.DataFrame([['#5698c6', '#ff9e4a', '#60b760', '#e05c5d', '#ae8ccd']],columns=groupindex)
#基准指数:000001.XSHG
Base=get_price("000001.XSHG",date_list[0],date_list[-2],fields="close")
    
fig = plt.figure(figsize=(14,8)) 
ax= fig.add_subplot(1,1,1)
Netv.T.plot(ax = ax)
ax.plot(Base/Base.iloc[0,0],'black',label='000001.SH')
ax.legend(loc=2) 
#x_label=[date_list[i] for i in np.arange(0,len(date_list)-1,8)]  
#plt.xticks(x_label, rotation='vertical')
plt.ylabel('净值',{"size":18})
Text(0, 0.5, '净值')

4.5 多空组合净值

y=Netv.loc["nv_gp5"]-Netv.loc["nv_gp1"]
y.plot(figsize=(12,8))
plt.ylabel('多空组合',{'size': 15})
Text(0, 0.5, '多空组合')

5. 评价表现¶

计算传统表现

#每组的总收益,年化收益,最大回撤,SR,
risk_matrix=getrisk(groupindex,Netv,date_list)
print('===============各组收益如下================')
risk_matrix
===============各组收益如下================
.dataframe tbody tr th:only-of-type { vertical-align: middle; } .dataframe tbody tr th { vertical-align: top; } .dataframe thead th { text-align: right; }
total_return Yearly_ret MaxDraw SharpR
nv_gp1 -57.33% -43.93% 57.79% -2.85612
nv_gp2 -18.86% -13.24% 39.50% -0.655763
nv_gp3 4.57% 3.09% 32.06% 0.133261
nv_gp4 26.08% 17.05% 26.96% 0.645323
nv_gp5 42.14% 26.99% 24.27% 0.91201

计算换手率

#计算换手率
factor_turnover_rate=pd.DataFrame(index=["gp1","gp5"],columns=date_list)
for group in ["gp1","gp5"]:
    #最大分位换手率
    if group=="gp1":
        for i in range(1,len(date_list)):            
            factor_turnover_rate.loc[group][date_list[i]]=sum(abs(St_weight_gp1[date_list[i]]-St_weight_gp1[date_list[i-1]]))/2
    #最小分位换手率
    else:
        for i in range(1,len(date_list)):
            factor_turnover_rate.loc[group][date_list[i]]=sum(abs(St_weight_gp5[date_list[i]]-St_weight_gp5[date_list[i-1]]))/2
fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(1, 1, 1)
#     ax.xaxis.set_major_formatter(mdate.DateFormatter('%Y%m%d'))  # 设置时间标签显示格式
#     ax.xaxis.set_major_locator(mdate.DayLocator())
ax.set_title("因子换手率")
#x = np.arange(0, len(factor_dict))
x=date_list
ax.scatter(x, factor_turnover_rate.loc["gp1"], label='5分位换手率')
ax.scatter(x, factor_turnover_rate.loc["gp5"], label='1分位换手率')
plt.grid(axis='y')

工具函数¶

1.剔除股票

def filter_stock(stock_pool,date,N=30*3,skip_paused=1):
    
    #剔除上市不足三个月的股票
    start_date=stock_pool['start_date']
    stock_pool['IPO more than N days']=start_date+datetime.timedelta(days=N)<datetime.date(*map(int, date.split('-')))
    stock_pool=stock_pool[stock_pool['IPO more than N days']==True]

    #剔除st股票(含*st)
    hi_st=[x[:] for x in stock_pool['display_name'] if not ((x[0:2]=='ST') |(x[0:2]=='S*')|(x[0:2]=='*S')|(x[0:1]=='S'))]
    stock_pool=stock_pool[stock_pool['display_name'].isin(hi_st)]

    #保留交易的股票
    if skip_paused==1:
        trade_status=get_price(list(stock_pool.index), end_date=date,frequency='daily', fields='paused', skip_paused=False,count=1)    
        #ts_transpose=trade_status['paused'].T  两种方法均可
        #ts_transpose[ts_transpose.values==0]
        hi_trade=trade_status['paused'][trade_status['paused']==0]
        hi_trade=hi_trade.dropna(1)
        stock_pool=stock_pool.loc[list(hi_trade.columns)]
    elif skip_paused==0:
        stock_pool=stock_pool
    else:
        print('skip_paused的值为0或1,请正确输入')
    
    return stock_pool
def ex_new_notrade_stock(stocklist,date_list,N=30*3,skip_paused=1):
    
    stocklist_ex_new=pd.DataFrame(index=stocklist.index,columns=date_list)
    #剔除上市不足三个月的股票
    start_date=stocklist['start_date']
    for i in date_list:
        stocklist_ex_new[i]=start_date+datetime.timedelta(days=N)<i

    #保留交易的股票
    stocklist_trade=pd.DataFrame(index=stocklist.index,columns=date_list)
    
    if skip_paused==1:
        trade_status=get_price(list(stock_pool.index),date_list[0],date_list[-1],frequency='daily', fields='paused')    
        trade_status=trade_status["paused"].fillna(1).T
        stocklist_trade=trade_status==0
    elif skip_paused==0:
        stocklist_trade=stocklist_trade.fillna(True)
    else:
        print('skip_paused的值为0或1,请正确输入')
    
    
    return stocklist_trade&stocklist_ex_new

2.日期偏移

all_trade_day=get_price("000001.XSHG","2000-01-01","2019-07-13", fields="close").index
def ShiftTradingDay(date,shift,all_trade_day):
    # 获取所有的交易日,返回一个包含所有交易日的 list,元素值为 datetime.date 类型.
    try:
        # 得到date之后shift天那一天在列表中的行标号 返回一个数
        if type(date)!=datetime.date:
            date = datetime.date(*map(int,date.split('-')))
        shiftday_index = [time.mktime(i.timetuple()) for i in all_trade_day].index(time.mktime(date.timetuple()))+shift
        # 根据行号返回该日日期 为datetime.date类型
        temp=all_trade_day[shiftday_index]
        format_shiftdate=datetime.date(temp.year,temp.month,temp.day)
        return format_shiftdate 
    except:
        return print('错误:请输入交易日,格式为2019-07-06')

3.因子构成

class Day_bias:
    
    factor_name="Day_bias" 
    # 返回一个pd.Series数据
    # 设置依赖的数据
    dependencies = ['pct','open_nextd','index_chg']
    def __init__(self,clp_pred,open_nextd,index_chg,date):
        self.clp_pred = clp_pred
        self.open_nextd = open_nextd
        self.index_pct = index_chg
        self.date = date
    def calc(self):
        factor_value=(open_nextd/clp_pred)/(index_chg)
        return factor_value

4.计算特征

def feature(data):
    data=data[~np.isnan(data)]
    n = len(data)
    niu = 0.0
    niu2 = 0.0
    niu3 = 0.0
    for a in data:
        niu += a
        niu2 += a**2
        niu3 += a**3
    niu/= n   #这是求E(X)
    niu2 /= n #这是E(X^2)
    niu3 /= n #这是E(X^3)
    sigma = math.sqrt(niu2 - niu*niu) #这是D(X)的开方,标准差
    return [round(niu,3),round(sigma,3),niu3] #返回[E(X),标准差,E(X^3)]

def calc_feature(data):
    data=data[~np.isnan(data)]
    median = data[len(data)//2] if len(data)%2==1 else round(0.5*(data[len(data)//2-1]+data[len(data)//2]),3)   
    [niu,sigma,niu3] = feature(data)
    n = len(data)
    niu4 = 0.0
    for a in data:
        a -= niu
        niu4 += a ** 4
    niu4 /= n   
    skew = round((niu3 - 3*niu*sigma**2 - niu**3)/(sigma**3),3)
    kurt = round(niu4/(sigma**2),3)
    return pd.DataFrame([[niu,median,sigma,skew,kurt]],columns=['均值','中位数','标准差','偏度','峰度']) #返回了均值,标准差,偏度,峰度,中位数
  1. 去极值和标准化
def winsor_quantile(factor,quantile=0.01):
    factor_winsored=factor[(factor[factor.columns]<=factor[i].quantile(1-quantile)) & (factor[factor.columns]>=factor[i].quantile(quantile))]
    #暂时不dropna()
    return factor_winsored

#标准化:Z_SCORE,减均值除标准差;
def normalize(factor):
    avg_df=factor.mean()
    std_df=factor.std()
    factor_normed=(factor-avg_df)/std_df
    return factor_normed
  1. 第二天开盘涨跌幅不超过ct_pct的个股
def Canbuy_stock(stocklist,ct_pct):
    trade_price=get_price(list(stocklist.index), Begin_date,End_date,fields=['open','high_limit','low_limit'])
    open_price_df=trade_price['open'].T
    high_limit_df=trade_price['high_limit'].T/1.1*(1+ct_pct)
    low_limit_df=trade_price['low_limit'].T/0.9*(1-ct_pct)
    price_ifhigher_df=(open_price_df<high_limit_df)
    price_iflower_df=(open_price_df>low_limit_df)
    
    stocklist_canbuy=price_iflower_df & price_ifhigher_df
    
    return stocklist_canbuy

7.生成各组成分和权重

#要实现功能,生成一组Stockandweight-即为每天选出的股票(第二日要持仓的),type:df,index为股票,columns为日期,values为权重;返回一个pd.Series
def grouplist(date_list,stocklist_canbuy,stocklist_ex_new_notrade,factor,sw_lv1_28,thatday,
              hy_weight,factor_netural,logmv,groupid,totalgroupnum=5,st_weight_method=None,
              mv_netural="NO",hy_netural="YES"):
    "st_weight_method可以选择logmv"
    
    Stockandweight=pd.DataFrame(np.zeros([stocklist_canbuy.shape[0],len(date_list)]),
                            columns=date_list,index=stocklist_canbuy.index)
    #市值中性后的因子
    nd=ShiftTradingDay(thatday,1,all_trade_day)

    if mv_netural=="YES":
        """
        type factor,factor_netural:df,index=股票代码,columns=当日
        type stocklist:df,index=股票代码,columns=股票名称,上市日期,sw28个行业
        """
        #剔除不能交易、没有因子值以及开盘涨跌幅太大的个股(不买入)
        con=stocklist_canbuy[nd] &stocklist_ex_new_notrade[thatday] & (~factor_netural.isnull()[thatday])
        #仅保留有效候选
        st=stocklist[con]  
        fac=factor_netural[con]
    #用原因子        
    else:
        con=stocklist_canbuy[nd] &stocklist_ex_new_notrade[thatday] & (~factor.isnull()[thatday])
        st=stocklist[con]  
        fac=factor_netural[con]
        
    #确定是否有分组,根据groupid确定分位数
    if groupid!=None:
        #各组分位数
        quant=np.linspace(0,1,totalgroupnum+1)
        upperlim=quant[-groupid]
        lowerlim=quant[-groupid-1]
    else:
        upperlim=1
        lowerlim=0   

    #行业中性        
    if hy_netural=="YES":
        #求该组因子上下界,返回fac_edge(type:df),返回每个行业取出的股票名称及权重                        
        fac_byhy=fac.values*st[sw_lv1_28]
        fac_byhy[fac_byhy==0]=NaN
        fac_edge=pd.DataFrame(columns=sw_lv1_28,index=[upperlim,lowerlim])

        for i in sw_lv1_28:
            fac_edge[i].loc[upperlim]=up=fac_byhy[i].quantile(upperlim)
            fac_edge[i].loc[lowerlim]=low=fac_byhy[i].quantile(lowerlim)

            hyi_st=fac_byhy[i][(fac_byhy[i]<=up) & (fac_byhy[i]>low)].index
            #是否要按log流通市值分配股票权重:目的是对冲时候敞口更小一点(这里也可以用总市值,IR等方法,看个人喜好)
            if st_weight_method=='logmv':
                temp=(logmv.loc[hyi_st])/(logmv.loc[hyi_st].sum())
                temp.columns=["stweight"]
                temp["stweight"]=temp["stweight"]*hy_weight[i][0]
                hyi_stweight=temp
            else:
                temp=np.array([hy_weight[i]*1/len(hyi_st) for x in range(len(hyi_st))])
                hyi_stweight=pd.DataFrame(temp,index=hyi_st,columns=["stweight"])

            Stockandweight[thatday].loc[hyi_st]=hyi_stweight["stweight"]
    #不考虑行业中性
    else:
        up=fac[thatday].quantile(upperlim)
        low=fac[thatday].quantile(lowerlim)
        hyi_st=fac[thatday][(fac[thatday]<=up)& (fac[thatday]>low)].index
        #是否要按log流通市值分配股票权重:目的是对冲时候敞口更小一点(这里也可以用总市值,IR等方法,看个人喜好)
        if st_weight_method=='logmv':
            temp=(logmv.loc[hyi_st])/(logmv.loc[hyi_st].sum())
            temp.columns=["stweight"]
            hyi_stweight=temp
        else:
            temp=np.array([1/len(hyi_st) for x in range(len(hyi_st))])
            hyi_stweight=pd.DataFrame(temp,index=hyi_st,columns=["stweight"])

        Stockandweight[thatday].loc[hyi_st]=hyi_stweight["stweight"]

    return Stockandweight[thatday]
    

8.分组净值

def group_netv(data,St_weight_gp5,date_list,fee="NO",cal_method="fast"):
    """simple:以T+1涨跌幅作为收益--仅在特定因子的情况下接近实际
       avg:以T+1日均价作为买入成本,T+2日均价作为平仓价格,不太贴近实际---因为每一天既要开仓又要平仓,很难把握价格
       fast:以T+1日开盘价作为买入成本,T+2日开盘价作为平仓价格,更接近实际,但缺点就是容量小    
       我们这里选择fast作为默认选项
    """
    
    net_v=pd.DataFrame(np.ones([1,len(date_list)]),index=["net_value"],columns=date_list)
    if fee=="NO":
        fee=0
    else:
        fee=tax+2*margin
    
    
    if cal_method=="simple":
        oprice_anchor=data["pre_close"].T
        cprice_anchor=data["close"].T        
        for i in date_list[0:-3]:
            next1_i=date_list[list(date_list).index(i)+1]
            ret_array=(1-fee)*cprice_anchor[next1_i]/oprice_anchor[next1_i]-1                
            net_v[next1_i]=1+(St_weight_gp5[i]*ret_array).sum()

    elif cal_method=="avg":
        oprice_anchor=cprice_anchor=data["avg"].T
        for i in date_list[0:-3]:
            next1_i=date_list[list(date_list).index(i)+1]
            next2_i=date_list[list(date_list).index(i)+2]
            ret_array=(1-fee)*cprice_anchor[next2_i]/oprice_anchor[next1_i]-1                
            net_v[next1_i]=1+(St_weight_gp5[i]*ret_array).sum()

    elif cal_method=="fast":
        oprice_anchor=cprice_anchor=data["open"].T        
        for i in date_list[0:-3]:
            next1_i=date_list[list(date_list).index(i)+1]
            next2_i=date_list[list(date_list).index(i)+2]
            ret_array=(1-fee)*cprice_anchor[next2_i]/oprice_anchor[next1_i]-1                
            net_v[next1_i]=1+(St_weight_gp5[i]*ret_array).sum()
    return net_v
  1. 计算回撤等指标
def getrisk(groupindex,Netv,date_list):
    risk_matrix=pd.DataFrame(columns=["total_return","Yearly_ret","MaxDraw","SharpR"],index=groupindex)

    total_return=Netv[date_list[-2]]/Netv[date_list[0]]-1

    Yearly_ret=(total_return+1)**(250/(len(date_list)-2))-1

    MaxDraw=pd.DataFrame(index=groupindex,columns=["MD"])
    Tocal=Netv.T
    for i in groupindex:
        gp=Tocal[i]
        MaxDraw_value=0
        for j in range(len(gp)):                  
            high=max(gp[:j+1])  #此前高点        
            high_loc=list(gp).index(high)  #高点的位置
            low=min(gp[high_loc:j+1])   #高点之后的低点
            Draw=(high-low)/high 
            MaxDraw_value=max(MaxDraw_value,Draw)
        MaxDraw.loc[i]=MaxDraw_value

    SharpR=(Yearly_ret)/((Netv.diff(axis=1).std(axis=1))*(250**0.5))

    risk_matrix["total_return"]=["{:.2%}".format(total_return[i]) for i in range(len(groupindex))]
    risk_matrix["Yearly_ret"]=["{:.2%}".format(Yearly_ret[i]) for i in range(len(groupindex))]
    risk_matrix["MaxDraw"]=["{:.2%}".format(MaxDraw["MD"][i]) for i in range(len(groupindex))]
    risk_matrix["SharpR"]=SharpR
    return risk_matrix
 
分享到:
举报财经168客户端下载

全部回复

0/140

投稿 您想发表你的观点和看法?

更多人气分析师

  • 张亦巧

    人气2200文章4145粉丝45

    暂无个人简介信息

  • 王启蒙现货黄金

    人气304文章3275粉丝8

    本人做分析师以来,并专注于贵金属投资市场,尤其是在现货黄金...

  • 指导老师

    人气1864文章4423粉丝52

    暂无个人简介信息

  • 李冉晴

    人气2320文章3821粉丝34

    李冉晴,专业现贷实盘分析师。

  • 梁孟梵

    人气2176文章3177粉丝39

    qq:2294906466 了解群指导添加微信mfmacd

  • 张迎妤

    人气1896文章3305粉丝34

    个人专注于行情技术分析,消息面解读剖析,给予您第一时间方向...

  • 金泰铬J

    人气2328文章3925粉丝51

    投资问答解咨询金泰铬V/信tgtg67即可获取每日的实时资讯、行情...

  • 金算盘

    人气2696文章7761粉丝125

    高级分析师,混过名校,厮杀于股市和期货、证券市场多年,专注...

  • 金帝财神

    人气4760文章8329粉丝119

    本文由资深分析师金帝财神微信:934295330,指导黄金,白银,...

FX168财经

FX168财经学院

FX168财经

FX168北美